diff options
775 files changed, 25781 insertions, 14515 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-regulator b/Documentation/ABI/testing/sysfs-class-regulator index e091fa87379..bc578bc6062 100644 --- a/Documentation/ABI/testing/sysfs-class-regulator +++ b/Documentation/ABI/testing/sysfs-class-regulator @@ -349,3 +349,24 @@ Description: This will be one of the same strings reported by the "state" attribute. + +What: /sys/class/regulator/.../bypass +Date: September 2012 +KernelVersion: 3.7 +Contact: Mark Brown <broonie@opensource.wolfsonmicro.com> +Description: + Some regulator directories will contain a field called + bypass. This indicates if the device is in bypass mode. + + This will be one of the following strings: + + 'enabled' + 'disabled' + 'unknown' + + 'enabled' means the regulator is in bypass mode. + + 'disabled' means that the regulator is regulating. + + 'unknown' means software cannot determine the state, or + the reported state is invalid. diff --git a/Documentation/ABI/testing/sysfs-driver-wacom b/Documentation/ABI/testing/sysfs-driver-wacom index 8d55a83d692..7fc781048b7 100644 --- a/Documentation/ABI/testing/sysfs-driver-wacom +++ b/Documentation/ABI/testing/sysfs-driver-wacom @@ -1,3 +1,16 @@ +WWhat: /sys/class/hidraw/hidraw*/device/oled*_img +Date: June 2012 +Contact: linux-bluetooth@vger.kernel.org +Description: + The /sys/class/hidraw/hidraw*/device/oled*_img files control + OLED mocro displays on Intuos4 Wireless tablet. Accepted image + has to contain 256 bytes (64x32 px 1 bit colour). The format + is the same as PBM image 62x32px without header (64 bits per + horizontal line, 32 lines). An example of setting OLED No. 0: + dd bs=256 count=1 if=img_file of=[path to oled0_img]/oled0_img + The attribute is read only and no local copy of the image is + stored. + What: /sys/class/hidraw/hidraw*/device/speed Date: April 2010 Kernel Version: 2.6.35 diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index fc103d7a047..cdb20d41a44 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -310,6 +310,12 @@ over a rather long period of time, but improvements are always welcome! code under the influence of preempt_disable(), you instead need to use synchronize_irq() or synchronize_sched(). + This same limitation also applies to synchronize_rcu_bh() + and synchronize_srcu(), as well as to the asynchronous and + expedited forms of the three primitives, namely call_rcu(), + call_rcu_bh(), call_srcu(), synchronize_rcu_expedited(), + synchronize_rcu_bh_expedited(), and synchronize_srcu_expedited(). + 12. Any lock acquired by an RCU callback must be acquired elsewhere with softirq disabled, e.g., via spin_lock_irqsave(), spin_lock_bh(), etc. Failing to disable irq on a given diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 523364e4e1f..1927151b386 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -99,7 +99,7 @@ In kernels with CONFIG_RCU_FAST_NO_HZ, even more information is printed: INFO: rcu_preempt detected stall on CPU - 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 drain=0 . timer=-1 + 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 drain=0 . timer not pending (t=65000 jiffies) The "(64628 ticks this GP)" indicates that this CPU has taken more @@ -116,13 +116,13 @@ number between the two "/"s is the value of the nesting, which will be a small positive number if in the idle loop and a very large positive number (as shown above) otherwise. -For CONFIG_RCU_FAST_NO_HZ kernels, the "drain=0" indicates that the -CPU is not in the process of trying to force itself into dyntick-idle -state, the "." indicates that the CPU has not given up forcing RCU -into dyntick-idle mode (it would be "H" otherwise), and the "timer=-1" -indicates that the CPU has not recented forced RCU into dyntick-idle -mode (it would otherwise indicate the number of microseconds remaining -in this forced state). +For CONFIG_RCU_FAST_NO_HZ kernels, the "drain=0" indicates that the CPU is +not in the process of trying to force itself into dyntick-idle state, the +"." indicates that the CPU has not given up forcing RCU into dyntick-idle +mode (it would be "H" otherwise), and the "timer not pending" indicates +that the CPU has not recently forced RCU into dyntick-idle mode (it +would otherwise indicate the number of microseconds remaining in this +forced state). Multiple Warnings From One Stall diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index f6f15ce3990..672d1908325 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -333,23 +333,23 @@ o Each element of the form "1/1 0:127 ^0" represents one struct The output of "cat rcu/rcu_pending" looks as follows: rcu_sched: - 0 np=255892 qsp=53936 rpq=85 cbr=0 cng=14417 gpc=10033 gps=24320 nf=6445 nn=146741 - 1 np=261224 qsp=54638 rpq=33 cbr=0 cng=25723 gpc=16310 gps=2849 nf=5912 nn=155792 - 2 np=237496 qsp=49664 rpq=23 cbr=0 cng=2762 gpc=45478 gps=1762 nf=1201 nn=136629 - 3 np=236249 qsp=48766 rpq=98 cbr=0 cng=286 gpc=48049 gps=1218 nf=207 nn=137723 - 4 np=221310 qsp=46850 rpq=7 cbr=0 cng=26 gpc=43161 gps=4634 nf=3529 nn=123110 - 5 np=237332 qsp=48449 rpq=9 cbr=0 cng=54 gpc=47920 gps=3252 nf=201 nn=137456 - 6 np=219995 qsp=46718 rpq=12 cbr=0 cng=50 gpc=42098 gps=6093 nf=4202 nn=120834 - 7 np=249893 qsp=49390 rpq=42 cbr=0 cng=72 gpc=38400 gps=17102 nf=41 nn=144888 + 0 np=255892 qsp=53936 rpq=85 cbr=0 cng=14417 gpc=10033 gps=24320 nn=146741 + 1 np=261224 qsp=54638 rpq=33 cbr=0 cng=25723 gpc=16310 gps=2849 nn=155792 + 2 np=237496 qsp=49664 rpq=23 cbr=0 cng=2762 gpc=45478 gps=1762 nn=136629 + 3 np=236249 qsp=48766 rpq=98 cbr=0 cng=286 gpc=48049 gps=1218 nn=137723 + 4 np=221310 qsp=46850 rpq=7 cbr=0 cng=26 gpc=43161 gps=4634 nn=123110 + 5 np=237332 qsp=48449 rpq=9 cbr=0 cng=54 gpc=47920 gps=3252 nn=137456 + 6 np=219995 qsp=46718 rpq=12 cbr=0 cng=50 gpc=42098 gps=6093 nn=120834 + 7 np=249893 qsp=49390 rpq=42 cbr=0 cng=72 gpc=38400 gps=17102 nn=144888 rcu_bh: - 0 np=146741 qsp=1419 rpq=6 cbr=0 cng=6 gpc=0 gps=0 nf=2 nn=145314 - 1 np=155792 qsp=12597 rpq=3 cbr=0 cng=0 gpc=4 gps=8 nf=3 nn=143180 - 2 np=136629 qsp=18680 rpq=1 cbr=0 cng=0 gpc=7 gps=6 nf=0 nn=117936 - 3 np=137723 qsp=2843 rpq=0 cbr=0 cng=0 gpc=10 gps=7 nf=0 nn=134863 - 4 np=123110 qsp=12433 rpq=0 cbr=0 cng=0 gpc=4 gps=2 nf=0 nn=110671 - 5 np=137456 qsp=4210 rpq=1 cbr=0 cng=0 gpc=6 gps=5 nf=0 nn=133235 - 6 np=120834 qsp=9902 rpq=2 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921 - 7 np=144888 qsp=26336 rpq=0 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542 + 0 np=146741 qsp=1419 rpq=6 cbr=0 cng=6 gpc=0 gps=0 nn=145314 + 1 np=155792 qsp=12597 rpq=3 cbr=0 cng=0 gpc=4 gps=8 nn=143180 + 2 np=136629 qsp=18680 rpq=1 cbr=0 cng=0 gpc=7 gps=6 nn=117936 + 3 np=137723 qsp=2843 rpq=0 cbr=0 cng=0 gpc=10 gps=7 nn=134863 + 4 np=123110 qsp=12433 rpq=0 cbr=0 cng=0 gpc=4 gps=2 nn=110671 + 5 np=137456 qsp=4210 rpq=1 cbr=0 cng=0 gpc=6 gps=5 nn=133235 + 6 np=120834 qsp=9902 rpq=2 cbr=0 cng=0 gpc=6 gps=3 nn=110921 + 7 np=144888 qsp=26336 rpq=0 cbr=0 cng=0 gpc=8 gps=2 nn=118542 As always, this is once again split into "rcu_sched" and "rcu_bh" portions, with CONFIG_TREE_PREEMPT_RCU kernels having an additional @@ -377,17 +377,6 @@ o "gpc" is the number of times that an old grace period had o "gps" is the number of times that a new grace period had started, but this CPU was not yet aware of it. -o "nf" is the number of times that this CPU suspected that the - current grace period had run for too long, and thus needed to - be forced. - - Please note that "forcing" consists of sending resched IPIs - to holdout CPUs. If that CPU really still is in an old RCU - read-side critical section, then we really do have to wait for it. - The assumption behing "forcing" is that the CPU is not still in - an old RCU read-side critical section, but has not yet responded - for some other reason. - o "nn" is the number of times that this CPU needed nothing. Alert readers will note that the rcu "nn" number for a given CPU very closely matches the rcu_bh "np" number for that same CPU. This diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 69ee188515e..bf0f6de2aa0 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -873,7 +873,7 @@ d. Do you need to treat NMI handlers, hardirq handlers, and code segments with preemption disabled (whether via preempt_disable(), local_irq_save(), local_bh_disable(), or some other mechanism) as if they were explicit RCU readers? - If so, you need RCU-sched. + If so, RCU-sched is the only choice that will work for you. e. Do you need RCU grace periods to complete even in the face of softirq monopolization of one or more of the CPUs? For @@ -884,7 +884,12 @@ f. Is your workload too update-intensive for normal use of RCU, but inappropriate for other synchronization mechanisms? If so, consider SLAB_DESTROY_BY_RCU. But please be careful! -g. Otherwise, use RCU. +g. Do you need read-side critical sections that are respected + even though they are in the middle of the idle loop, during + user-mode execution, or on an offlined CPU? If so, SRCU is the + only choice that will work for you. + +h. Otherwise, use RCU. Of course, this all assumes that you have determined that RCU is in fact the right tool for your job. diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c index f6318f6d7ba..6f706aca204 100644 --- a/Documentation/accounting/getdelays.c +++ b/Documentation/accounting/getdelays.c @@ -98,10 +98,9 @@ static int create_nl_socket(int protocol) if (rcvbufsz) if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbufsz, sizeof(rcvbufsz)) < 0) { - fprintf(stderr, "Unable to set socket rcv buf size " - "to %d\n", + fprintf(stderr, "Unable to set socket rcv buf size to %d\n", rcvbufsz); - return -1; + goto error; } memset(&local, 0, sizeof(local)); diff --git a/Documentation/devicetree/bindings/regulator/regulator.txt b/Documentation/devicetree/bindings/regulator/regulator.txt index 66ece3f87bb..ecfc6ccd67e 100644 --- a/Documentation/devicetree/bindings/regulator/regulator.txt +++ b/Documentation/devicetree/bindings/regulator/regulator.txt @@ -11,10 +11,13 @@ Optional properties: - regulator-boot-on: bootloader/firmware enabled regulator - <name>-supply: phandle to the parent supply/regulator node - regulator-ramp-delay: ramp delay for regulator(in uV/uS) + +Deprecated properties: - regulator-compatible: If a regulator chip contains multiple regulators, and if the chip's binding contains a child node that describes each regulator, then this property indicates which regulator - this child node is intended to configure. + this child node is intended to configure. If this property is missing, + the node's name will be used instead. Example: diff --git a/Documentation/devicetree/bindings/regulator/tps65217.txt b/Documentation/devicetree/bindings/regulator/tps65217.txt index 0487e9675ba..d316fb895da 100644 --- a/Documentation/devicetree/bindings/regulator/tps65217.txt +++ b/Documentation/devicetree/bindings/regulator/tps65217.txt @@ -22,66 +22,49 @@ Example: compatible = "ti,tps65217"; regulators { - #address-cells = <1>; - #size-cells = <0>; - - dcdc1_reg: regulator@0 { - reg = <0>; - regulator-compatible = "dcdc1"; + dcdc1_reg: dcdc1 { regulator-min-microvolt = <900000>; regulator-max-microvolt = <1800000>; regulator-boot-on; regulator-always-on; }; - dcdc2_reg: regulator@1 { - reg = <1>; - regulator-compatible = "dcdc2"; + dcdc2_reg: dcdc2 { regulator-min-microvolt = <900000>; regulator-max-microvolt = <3300000>; regulator-boot-on; regulator-always-on; }; - dcdc3_reg: regulator@2 { - reg = <2>; - regulator-compatible = "dcdc3"; + dcdc3_reg: dcc3 { regulator-min-microvolt = <900000>; regulator-max-microvolt = <1500000>; regulator-boot-on; regulator-always-on; }; - ldo1_reg: regulator@3 { - reg = <3>; - regulator-compatible = "ldo1"; + ldo1_reg: ldo1 { regulator-min-microvolt = <1000000>; regulator-max-microvolt = <3300000>; regulator-boot-on; regulator-always-on; }; - ldo2_reg: regulator@4 { - reg = <4>; - regulator-compatible = "ldo2"; + ldo2_reg: ldo2 { regulator-min-microvolt = <900000>; regulator-max-microvolt = <3300000>; regulator-boot-on; regulator-always-on; }; - ldo3_reg: regulator@5 { - reg = <5>; - regulator-compatible = "ldo3"; + ldo3_reg: ldo3 { regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; regulator-boot-on; regulator-always-on; }; - ldo4_reg: regulator@6 { - reg = <6>; - regulator-compatible = "ldo4"; + ldo4_reg: ldo4 { regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; regulator-boot-on; diff --git a/Documentation/devicetree/bindings/regulator/tps6586x.txt b/Documentation/devicetree/bindings/regulator/tps6586x.txt index da80c2ae091..07b9ef6e49d 100644 --- a/Documentation/devicetree/bindings/regulator/tps6586x.txt +++ b/Documentation/devicetree/bindings/regulator/tps6586x.txt @@ -6,9 +6,13 @@ Required properties: - interrupts: the interrupt outputs of the controller - #gpio-cells: number of cells to describe a GPIO - gpio-controller: mark the device as a GPIO controller -- regulators: list of regulators provided by this controller, must have - property "regulator-compatible" to match their hardware counterparts: - sm[0-2], ldo[0-9] and ldo_rtc +- regulators: A node that houses a sub-node for each regulator within the + device. Each sub-node is identified using the node's name (or the deprecated + regulator-compatible property if present), with valid values listed below. + The content of each sub-node is defined by the standard binding for + regulators; see regulator.txt. + sys, sm[0-2], ldo[0-9] and ldo_rtc +- sys-supply: The input supply for SYS. - vin-sm0-supply: The input supply for the SM0. - vin-sm1-supply: The input supply for the SM1. - vin-sm2-supply: The input supply for the SM2. @@ -20,6 +24,9 @@ Required properties: Each regulator is defined using the standard binding for regulators. +Note: LDO5 and LDO_RTC is supplied by SYS regulator internally and driver + take care of making proper parent child relationship. + Example: pmu: tps6586x@34 { @@ -30,6 +37,7 @@ Example: #gpio-cells = <2>; gpio-controller; + sys-supply = <&some_reg>; vin-sm0-supply = <&some_reg>; vin-sm1-supply = <&some_reg>; vin-sm2-supply = <&some_reg>; @@ -40,103 +48,80 @@ Example: vinldo9-supply = <...>; regulators { - #address-cells = <1>; - #size-cells = <0>; + sys_reg: sys { + regulator-name = "vdd_sys"; + regulator-boot-on; + regulator-always-on; + }; - sm0_reg: regulator@0 { - reg = <0>; - regulator-compatible = "sm0"; + sm0_reg: sm0 { regulator-min-microvolt = < 725000>; regulator-max-microvolt = <1500000>; regulator-boot-on; regulator-always-on; }; - sm1_reg: regulator@1 { - reg = <1>; - regulator-compatible = "sm1"; + sm1_reg: sm1 { regulator-min-microvolt = < 725000>; regulator-max-microvolt = <1500000>; regulator-boot-on; regulator-always-on; }; - sm2_reg: regulator@2 { - reg = <2>; - regulator-compatible = "sm2"; + sm2_reg: sm2 { regulator-min-microvolt = <3000000>; regulator-max-microvolt = <4550000>; regulator-boot-on; regulator-always-on; }; - ldo0_reg: regulator@3 { - reg = <3>; - regulator-compatible = "ldo0"; + ldo0_reg: ldo0 { regulator-name = "PCIE CLK"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; }; - ldo1_reg: regulator@4 { - reg = <4>; - regulator-compatible = "ldo1"; + ldo1_reg: ldo1 { regulator-min-microvolt = < 725000>; regulator-max-microvolt = <1500000>; }; - ldo2_reg: regulator@5 { - reg = <5>; - regulator-compatible = "ldo2"; + ldo2_reg: ldo2 { regulator-min-microvolt = < 725000>; regulator-max-microvolt = <1500000>; }; - ldo3_reg: regulator@6 { - reg = <6>; - regulator-compatible = "ldo3"; + ldo3_reg: ldo3 { regulator-min-microvolt = <1250000>; regulator-max-microvolt = <3300000>; }; - ldo4_reg: regulator@7 { - reg = <7>; - regulator-compatible = "ldo4"; + ldo4_reg: ldo4 { regulator-min-microvolt = <1700000>; regulator-max-microvolt = <2475000>; }; - ldo5_reg: regulator@8 { - reg = <8>; - regulator-compatible = "ldo5"; + ldo5_reg: ldo5 { regulator-min-microvolt = <1250000>; regulator-max-microvolt = <3300000>; }; - ldo6_reg: regulator@9 { - reg = <9>; - regulator-compatible = "ldo6"; + ldo6_reg: ldo6 { regulator-min-microvolt = <1250000>; regulator-max-microvolt = <3300000>; }; - ldo7_reg: regulator@10 { - reg = <10>; - regulator-compatible = "ldo7"; + ldo7_reg: ldo7 { regulator-min-microvolt = <1250000>; regulator-max-microvolt = <3300000>; }; - ldo8_reg: regulator@11 { - reg = <11>; - regulator-compatible = "ldo8"; + ldo8_reg: ldo8 { regulator-min-microvolt = <1250000>; regulator-max-microvolt = <3300000>; }; - ldo9_reg: regulator@12 { - reg = <12>; - regulator-compatible = "ldo9"; + ldo9_reg: ldo9 { regulator-min-microvolt = <1250000>; regulator-max-microvolt = <3300000>; }; diff --git a/Documentation/dontdiff b/Documentation/dontdiff index 39462cf35cd..74c25c8d888 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff @@ -162,7 +162,6 @@ mach-types.h machtypes.h map map_hugetlb -maui_boot.h media mconf miboot* diff --git a/Documentation/ia64/aliasing-test.c b/Documentation/ia64/aliasing-test.c index 5caa2af3320..62a190d45f3 100644 --- a/Documentation/ia64/aliasing-test.c +++ b/Documentation/ia64/aliasing-test.c @@ -132,6 +132,7 @@ static int read_rom(char *path) rc = write(fd, "1", 2); if (rc <= 0) { + close(fd); perror("write"); return -1; } diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index ad7e2e5088c..d1cda8d892a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2385,6 +2385,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted. rcutree.rcu_cpu_stall_timeout= [KNL,BOOT] Set timeout for RCU CPU stall warning messages. + rcutree.jiffies_till_first_fqs= [KNL,BOOT] + Set delay from grace-period initialization to + first attempt to force quiescent states. + Units are jiffies, minimum value is zero, + and maximum value is HZ. + + rcutree.jiffies_till_next_fqs= [KNL,BOOT] + Set delay between subsequent attempts to force + quiescent states. Units are jiffies, minimum + value is one, and maximum value is HZ. + rcutorture.fqs_duration= [KNL,BOOT] Set duration of force_quiescent_state bursts. @@ -2638,9 +2649,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. smart2= [HW] Format: <io1>[,<io2>[,...,<io8>]] - smp-alt-once [X86-32,SMP] On a hotplug CPU system, only - attempt to substitute SMP alternatives once at boot. - smsc-ircc2.nopnp [HW] Don't use PNP to discover SMC devices smsc-ircc2.ircc_cfg= [HW] Device configuration I/O port smsc-ircc2.ircc_sir= [HW] SIR base I/O port diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt index 92341b84250..0b4b63e7e9b 100644 --- a/Documentation/power/swsusp.txt +++ b/Documentation/power/swsusp.txt @@ -53,7 +53,7 @@ before suspend (it is limited to 500 MB by default). Article about goals and implementation of Software Suspend for Linux ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Author: G‚ábor Kuti +Author: Gábor Kuti Last revised: 2003-10-20 by Pavel Machek Idea and goals to achieve diff --git a/Documentation/scheduler/sched-arch.txt b/Documentation/scheduler/sched-arch.txt index 28aa1075e29..b1b8587b86f 100644 --- a/Documentation/scheduler/sched-arch.txt +++ b/Documentation/scheduler/sched-arch.txt @@ -17,16 +17,6 @@ you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file Unlocked context switches introduce only a very minor performance penalty to the core scheduler implementation in the CONFIG_SMP case. -2. Interrupt status -By default, the switch_to arch function is called with interrupts -disabled. Interrupts may be enabled over the call if it is likely to -introduce a significant interrupt latency by adding the line -`#define __ARCH_WANT_INTERRUPTS_ON_CTXSW` in the same place as for -unlocked context switches. This define also implies -`__ARCH_WANT_UNLOCKED_CTXSW`. See arch/arm/include/asm/system.h for an -example. - - CPU idle ======== Your cpu_idle routines need to obey the following rules: diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index d0d0bb9e3e2..d68ea5fc812 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -12,7 +12,7 @@ kprobes can probe (this means, all functions body except for __kprobes functions). Unlike the Tracepoint based event, this can be added and removed dynamically, on the fly. -To enable this feature, build your kernel with CONFIG_KPROBE_TRACING=y. +To enable this feature, build your kernel with CONFIG_KPROBE_EVENT=y. Similar to the events tracer, this doesn't need to be activated via current_tracer. Instead of that, add probe points via diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index 0cb6685c802..8eda3635a17 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt @@ -133,7 +133,7 @@ character devices for this group: $ lspci -n -s 0000:06:0d.0 06:0d.0 0401: 1102:0002 (rev 08) # echo 0000:06:0d.0 > /sys/bus/pci/devices/0000:06:0d.0/driver/unbind -# echo 1102 0002 > /sys/bus/pci/drivers/vfio/new_id +# echo 1102 0002 > /sys/bus/pci/drivers/vfio-pci/new_id Now we need to look at what other devices are in the group to free it for use by VFIO: diff --git a/MAINTAINERS b/MAINTAINERS index b17587d9412..9362f54bccb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3552,11 +3552,12 @@ K: \b(ABS|SYN)_MT_ INTEL C600 SERIES SAS CONTROLLER DRIVER M: Intel SCU Linux support <intel-linux-scu@intel.com> +M: Lukasz Dorau <lukasz.dorau@intel.com> +M: Maciej Patelczyk <maciej.patelczyk@intel.com> M: Dave Jiang <dave.jiang@intel.com> -M: Ed Nadolski <edmund.nadolski@intel.com> L: linux-scsi@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git -S: Maintained +T: git git://git.code.sf.net/p/intel-sas/isci +S: Supported F: drivers/scsi/isci/ F: firmware/isci/ @@ -5321,6 +5322,12 @@ L: linux-mtd@lists.infradead.org S: Maintained F: drivers/mtd/devices/phram.c +PICOLCD HID DRIVER +M: Bruno Prémont <bonbons@linux-vserver.org> +L: linux-input@vger.kernel.org +S: Maintained +F: drivers/hid/hid-picolcd* + PICOXCELL SUPPORT M: Jamie Iles <jamie@jamieiles.com> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@ -5544,6 +5551,8 @@ F: Documentation/devicetree/bindings/pwm/ F: include/linux/pwm.h F: include/linux/of_pwm.h F: drivers/pwm/ +F: drivers/video/backlight/pwm_bl.c +F: include/linux/pwm_backlight.h PXA2xx/PXA3xx SUPPORT M: Eric Miao <eric.y.miao@gmail.com> @@ -1,8 +1,8 @@ VERSION = 3 PATCHLEVEL = 6 SUBLEVEL = 0 -EXTRAVERSION = -rc6 -NAME = Saber-toothed Squirrel +EXTRAVERSION = +NAME = Terrified Chipmunk # *DOCUMENTATION* # To see a list of typical targets execute "make help" @@ -609,7 +609,11 @@ KBUILD_CFLAGS += $(call cc-option, -femit-struct-debug-baseonly) endif ifdef CONFIG_FUNCTION_TRACER -KBUILD_CFLAGS += -pg +ifdef CONFIG_HAVE_FENTRY +CC_USING_FENTRY := $(call cc-option, -mfentry -DCC_USING_FENTRY) +endif +KBUILD_CFLAGS += -pg $(CC_USING_FENTRY) +KBUILD_AFLAGS += $(CC_USING_FENTRY) ifdef CONFIG_DYNAMIC_FTRACE ifdef CONFIG_HAVE_C_RECORDMCOUNT BUILD_C_RECORDMCOUNT := y diff --git a/arch/Kconfig b/arch/Kconfig index 72f2fa189cc..a62965d057f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -222,6 +222,19 @@ config HAVE_PERF_EVENTS_NMI subsystem. Also has support for calculating CPU cycle events to determine how many clock cycles in a given period. +config HAVE_PERF_REGS + bool + help + Support selective register dumps for perf events. This includes + bit-mapping of each registers and a unique architecture id. + +config HAVE_PERF_USER_STACK_DUMP + bool + help + Support user stack dumps for perf event samples. This needs + access to the user stack pointer which is not unified across + architectures. + config HAVE_ARCH_JUMP_LABEL bool @@ -281,4 +294,23 @@ config SECCOMP_FILTER See Documentation/prctl/seccomp_filter.txt for details. +config HAVE_RCU_USER_QS + bool + help + Provide kernel entry/exit hooks necessary for userspace + RCU extended quiescent state. Syscalls need to be wrapped inside + rcu_user_exit()-rcu_user_enter() through the slow path using + TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs + are already protected inside rcu_irq_enter/rcu_irq_exit() but + preemption or signal handling on irq exit still need to be protected. + +config HAVE_VIRT_CPU_ACCOUNTING + bool + +config HAVE_IRQ_TIME_ACCOUNTING + bool + help + Archs need to ensure they use a high enough resolution clock to + support irq time accounting and then call enable_sched_clock_irqtime(). + source "kernel/gcov/Kconfig" diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index d6fde98b74b..83638aa096d 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -28,6 +28,7 @@ #include <linux/tty.h> #include <linux/console.h> #include <linux/slab.h> +#include <linux/rcupdate.h> #include <asm/reg.h> #include <asm/uaccess.h> @@ -54,9 +55,12 @@ cpu_idle(void) /* FIXME -- EV6 and LCA45 know how to power down the CPU. */ + rcu_idle_enter(); while (!need_resched()) cpu_relax(); - schedule(); + + rcu_idle_exit(); + schedule_preempt_disabled(); } } diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 35ddc02bfa4..a41ad90a97a 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -166,6 +166,7 @@ smp_callin(void) DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n", cpuid, current, current->active_mm)); + preempt_disable(); /* Do nothing. */ cpu_idle(); } diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 81769c1341f..bc67cbff394 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -653,6 +653,7 @@ __armv7_mmu_cache_on: mcrne p15, 0, r0, c8, c7, 0 @ flush I,D TLBs #endif mrc p15, 0, r0, c1, c0, 0 @ read control reg + bic r0, r0, #1 << 28 @ clear SCTLR.TRE orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement orr r0, r0, #0x003c @ write buffer #ifdef CONFIG_MMU diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi index 66389c1c6f6..7c95f76398d 100644 --- a/arch/arm/boot/dts/at91sam9260.dtsi +++ b/arch/arm/boot/dts/at91sam9260.dtsi @@ -104,6 +104,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioB: gpio@fffff600 { @@ -113,6 +114,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioC: gpio@fffff800 { @@ -122,6 +124,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; dbgu: serial@fffff200 { diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi index b460d6ce9eb..195019b7ca0 100644 --- a/arch/arm/boot/dts/at91sam9263.dtsi +++ b/arch/arm/boot/dts/at91sam9263.dtsi @@ -95,6 +95,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioB: gpio@fffff400 { @@ -104,6 +105,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioC: gpio@fffff600 { @@ -113,6 +115,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioD: gpio@fffff800 { @@ -122,6 +125,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioE: gpio@fffffa00 { @@ -131,6 +135,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; dbgu: serial@ffffee00 { diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi index bafa8806fc1..63751b1e744 100644 --- a/arch/arm/boot/dts/at91sam9g45.dtsi +++ b/arch/arm/boot/dts/at91sam9g45.dtsi @@ -113,6 +113,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioB: gpio@fffff400 { @@ -122,6 +123,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioC: gpio@fffff600 { @@ -131,6 +133,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioD: gpio@fffff800 { @@ -140,6 +143,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioE: gpio@fffffa00 { @@ -149,6 +153,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; dbgu: serial@ffffee00 { diff --git a/arch/arm/boot/dts/at91sam9n12.dtsi b/arch/arm/boot/dts/at91sam9n12.dtsi index bfac0dfc332..ef9336ae961 100644 --- a/arch/arm/boot/dts/at91sam9n12.dtsi +++ b/arch/arm/boot/dts/at91sam9n12.dtsi @@ -107,6 +107,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioB: gpio@fffff600 { @@ -116,6 +117,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioC: gpio@fffff800 { @@ -125,6 +127,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioD: gpio@fffffa00 { @@ -134,6 +137,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; dbgu: serial@fffff200 { diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index 4a18c393b13..8a387a8d61b 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -115,6 +115,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioB: gpio@fffff600 { @@ -124,6 +125,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioC: gpio@fffff800 { @@ -133,6 +135,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioD: gpio@fffffa00 { @@ -142,6 +145,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; dbgu: serial@fffff200 { diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 0cab47d4a83..2fde5fd1acc 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -404,6 +404,7 @@ #define __NR_setns (__NR_SYSCALL_BASE+375) #define __NR_process_vm_readv (__NR_SYSCALL_BASE+376) #define __NR_process_vm_writev (__NR_SYSCALL_BASE+377) + /* 378 for kcmp */ /* * The following SWIs are ARM private. @@ -483,6 +484,7 @@ */ #define __IGNORE_fadvise64_64 #define __IGNORE_migrate_pages +#define __IGNORE_kcmp #endif /* __KERNEL__ */ #endif /* __ASM_ARM_UNISTD_H */ diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 463ff4a0ec8..e337879595e 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -387,6 +387,7 @@ /* 375 */ CALL(sys_setns) CALL(sys_process_vm_readv) CALL(sys_process_vm_writev) + CALL(sys_ni_syscall) /* reserved for sys_kcmp */ #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index fef42b21cec..e1f906989bb 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -11,7 +11,6 @@ #include <linux/init.h> #include <linux/kernel.h> #include <linux/clk.h> -#include <linux/cpufreq.h> #include <linux/delay.h> #include <linux/device.h> #include <linux/err.h> @@ -96,7 +95,52 @@ static void twd_timer_stop(struct clock_event_device *clk) disable_percpu_irq(clk->irq); } -#ifdef CONFIG_CPU_FREQ +#ifdef CONFIG_COMMON_CLK + +/* + * Updates clockevent frequency when the cpu frequency changes. + * Called on the cpu that is changing frequency with interrupts disabled. + */ +static void twd_update_frequency(void *new_rate) +{ + twd_timer_rate = *((unsigned long *) new_rate); + + clockevents_update_freq(*__this_cpu_ptr(twd_evt), twd_timer_rate); +} + +static int twd_rate_change(struct notifier_block *nb, + unsigned long flags, void *data) +{ + struct clk_notifier_data *cnd = data; + + /* + * The twd clock events must be reprogrammed to account for the new + * frequency. The timer is local to a cpu, so cross-call to the + * changing cpu. + */ + if (flags == POST_RATE_CHANGE) + smp_call_function(twd_update_frequency, + (void *)&cnd->new_rate, 1); + + return NOTIFY_OK; +} + +static struct notifier_block twd_clk_nb = { + .notifier_call = twd_rate_change, +}; + +static int twd_clk_init(void) +{ + if (twd_evt && *__this_cpu_ptr(twd_evt) && !IS_ERR(twd_clk)) + return clk_notifier_register(twd_clk, &twd_clk_nb); + + return 0; +} +core_initcall(twd_clk_init); + +#elif defined (CONFIG_CPU_FREQ) + +#include <linux/cpufreq.h> /* * Updates clockevent frequency when the cpu frequency changes. diff --git a/arch/arm/mach-imx/clk-imx25.c b/arch/arm/mach-imx/clk-imx25.c index 4431a62fff5..d20d4795f4e 100644 --- a/arch/arm/mach-imx/clk-imx25.c +++ b/arch/arm/mach-imx/clk-imx25.c @@ -241,6 +241,6 @@ int __init mx25_clocks_init(void) clk_register_clkdev(clk[sdma_ahb], "ahb", "imx35-sdma"); clk_register_clkdev(clk[iim_ipg], "iim", NULL); - mxc_timer_init(MX25_IO_ADDRESS(MX25_GPT1_BASE_ADDR), 54); + mxc_timer_init(MX25_IO_ADDRESS(MX25_GPT1_BASE_ADDR), MX25_INT_GPT1); return 0; } diff --git a/arch/arm/mach-imx/mach-armadillo5x0.c b/arch/arm/mach-imx/mach-armadillo5x0.c index 2c6ab3273f9..5985ed1b8c9 100644 --- a/arch/arm/mach-imx/mach-armadillo5x0.c +++ b/arch/arm/mach-imx/mach-armadillo5x0.c @@ -526,7 +526,8 @@ static void __init armadillo5x0_init(void) imx31_add_mxc_nand(&armadillo5x0_nand_board_info); /* set NAND page size to 2k if not configured via boot mode pins */ - __raw_writel(__raw_readl(MXC_CCM_RCSR) | (1 << 30), MXC_CCM_RCSR); + __raw_writel(__raw_readl(mx3_ccm_base + MXC_CCM_RCSR) | + (1 << 30), mx3_ccm_base + MXC_CCM_RCSR); /* RTC */ /* Get RTC IRQ and register the chip */ diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c index 8dabfe81d07..ff886e01a0b 100644 --- a/arch/arm/mach-mxs/mach-mxs.c +++ b/arch/arm/mach-mxs/mach-mxs.c @@ -261,7 +261,7 @@ static void __init apx4devkit_init(void) enable_clk_enet_out(); if (IS_BUILTIN(CONFIG_PHYLIB)) - phy_register_fixup_for_uid(PHY_ID_KS8051, MICREL_PHY_ID_MASK, + phy_register_fixup_for_uid(PHY_ID_KSZ8051, MICREL_PHY_ID_MASK, apx4devkit_phy_fixup); mxsfb_pdata.mode_list = apx4devkit_video_modes; diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index 410291c6766..a6cd14ab1e4 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -204,6 +204,13 @@ void __init orion5x_wdt_init(void) void __init orion5x_init_early(void) { orion_time_set_base(TIMER_VIRT_BASE); + + /* + * Some Orion5x devices allocate their coherent buffers from atomic + * context. Increase size of atomic coherent pool to make sure such + * the allocations won't fail. + */ + init_dma_coherent_pool_size(SZ_1M); } int orion5x_tclk; diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c index 53b7ea92c32..3b8a0171c3c 100644 --- a/arch/arm/mach-shmobile/board-kzm9g.c +++ b/arch/arm/mach-shmobile/board-kzm9g.c @@ -346,11 +346,11 @@ static struct resource sh_mmcif_resources[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = gic_spi(141), + .start = gic_spi(140), .flags = IORESOURCE_IRQ, }, [2] = { - .start = gic_spi(140), + .start = gic_spi(141), .flags = IORESOURCE_IRQ, }, }; diff --git a/arch/arm/mach-tegra/board-harmony-power.c b/arch/arm/mach-tegra/board-harmony-power.c index b7344beec10..94486e7e9df 100644 --- a/arch/arm/mach-tegra/board-harmony-power.c +++ b/arch/arm/mach-tegra/board-harmony-power.c @@ -67,6 +67,13 @@ static struct regulator_init_data ldo0_data = { }, \ } +static struct regulator_init_data sys_data = { + .supply_regulator = "vdd_5v0", + .constraints = { + .name = "vdd_sys", + }, +}; + HARMONY_REGULATOR_INIT(sm0, "vdd_sm0", "vdd_sys", 725, 1500, 1); HARMONY_REGULATOR_INIT(sm1, "vdd_sm1", "vdd_sys", 725, 1500, 1); HARMONY_REGULATOR_INIT(sm2, "vdd_sm2", "vdd_sys", 3000, 4550, 1); @@ -74,7 +81,7 @@ HARMONY_REGULATOR_INIT(ldo1, "vdd_ldo1", "vdd_sm2", 725, 1500, 1); HARMONY_REGULATOR_INIT(ldo2, "vdd_ldo2", "vdd_sm2", 725, 1500, 0); HARMONY_REGULATOR_INIT(ldo3, "vdd_ldo3", "vdd_sm2", 1250, 3300, 1); HARMONY_REGULATOR_INIT(ldo4, "vdd_ldo4", "vdd_sm2", 1700, 2475, 1); -HARMONY_REGULATOR_INIT(ldo5, "vdd_ldo5", NULL, 1250, 3300, 1); +HARMONY_REGULATOR_INIT(ldo5, "vdd_ldo5", "vdd_sys", 1250, 3300, 1); HARMONY_REGULATOR_INIT(ldo6, "vdd_ldo6", "vdd_sm2", 1250, 3300, 0); HARMONY_REGULATOR_INIT(ldo7, "vdd_ldo7", "vdd_sm2", 1250, 3300, 0); HARMONY_REGULATOR_INIT(ldo8, "vdd_ldo8", "vdd_sm2", 1250, 3300, 0); @@ -88,6 +95,7 @@ HARMONY_REGULATOR_INIT(ldo9, "vdd_ldo9", "vdd_sm2", 1250, 3300, 1); } static struct tps6586x_subdev_info tps_devs[] = { + TPS_REG(SYS, &sys_data), TPS_REG(SM_0, &sm0_data), TPS_REG(SM_1, &sm1_data), TPS_REG(SM_2, &sm2_data), @@ -120,7 +128,7 @@ static struct i2c_board_info __initdata harmony_regulators[] = { int __init harmony_regulator_init(void) { - regulator_register_always_on(0, "vdd_sys", + regulator_register_always_on(0, "vdd_5v0", NULL, 0, 5000000); if (machine_is_harmony()) { diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index e59c4ab71bc..13f555d6249 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -346,6 +346,8 @@ static int __init atomic_pool_init(void) (unsigned)pool->size / 1024); return 0; } + + kfree(pages); no_pages: kfree(bitmap); no_bitmap: diff --git a/arch/arm/plat-mxc/include/mach/mx25.h b/arch/arm/plat-mxc/include/mach/mx25.h index 627d94f1b01..ec466400a20 100644 --- a/arch/arm/plat-mxc/include/mach/mx25.h +++ b/arch/arm/plat-mxc/include/mach/mx25.h @@ -98,6 +98,7 @@ #define MX25_INT_UART1 (NR_IRQS_LEGACY + 45) #define MX25_INT_GPIO2 (NR_IRQS_LEGACY + 51) #define MX25_INT_GPIO1 (NR_IRQS_LEGACY + 52) +#define MX25_INT_GPT1 (NR_IRQS_LEGACY + 54) #define MX25_INT_FEC (NR_IRQS_LEGACY + 57) #define MX25_DMA_REQ_SSI2_RX1 22 diff --git a/arch/arm/plat-samsung/clock.c b/arch/arm/plat-samsung/clock.c index 65c5eca475e..d1116e2dfbe 100644 --- a/arch/arm/plat-samsung/clock.c +++ b/arch/arm/plat-samsung/clock.c @@ -144,6 +144,7 @@ long clk_round_rate(struct clk *clk, unsigned long rate) int clk_set_rate(struct clk *clk, unsigned long rate) { + unsigned long flags; int ret; if (IS_ERR(clk)) @@ -159,9 +160,9 @@ int clk_set_rate(struct clk *clk, unsigned long rate) if (clk->ops == NULL || clk->ops->set_rate == NULL) return -EINVAL; - spin_lock(&clocks_lock); + spin_lock_irqsave(&clocks_lock, flags); ret = (clk->ops->set_rate)(clk, rate); - spin_unlock(&clocks_lock); + spin_unlock_irqrestore(&clocks_lock, flags); return ret; } @@ -173,17 +174,18 @@ struct clk *clk_get_parent(struct clk *clk) int clk_set_parent(struct clk *clk, struct clk *parent) { + unsigned long flags; int ret = 0; if (IS_ERR(clk)) return -EINVAL; - spin_lock(&clocks_lock); + spin_lock_irqsave(&clocks_lock, flags); if (clk->ops && clk->ops->set_parent) ret = (clk->ops->set_parent)(clk, parent); - spin_unlock(&clocks_lock); + spin_unlock_irqrestore(&clocks_lock, flags); return ret; } diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 3af601e31e6..f08e89183cd 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -2,6 +2,7 @@ include include/asm-generic/Kbuild.asm generic-y += atomic.h generic-y += auxvec.h +generic-y += barrier.h generic-y += bitsperlong.h generic-y += bugs.h generic-y += cputime.h diff --git a/arch/c6x/include/asm/barrier.h b/arch/c6x/include/asm/barrier.h deleted file mode 100644 index 538240e8590..00000000000 --- a/arch/c6x/include/asm/barrier.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Port on Texas Instruments TMS320C6x architecture - * - * Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated - * Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef _ASM_C6X_BARRIER_H -#define _ASM_C6X_BARRIER_H - -#define nop() asm("NOP\n"); - -#define mb() barrier() -#define rmb() barrier() -#define wmb() barrier() -#define set_mb(var, value) do { var = value; mb(); } while (0) -#define set_wmb(var, value) do { var = value; wmb(); } while (0) - -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while (0) - -#endif /* _ASM_C6X_BARRIER_H */ diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index 66fd0172879..7f65be6f7f1 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -25,6 +25,7 @@ #include <linux/elfcore.h> #include <linux/mqueue.h> #include <linux/reboot.h> +#include <linux/rcupdate.h> //#define DEBUG @@ -74,6 +75,7 @@ void cpu_idle (void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) { void (*idle)(void); /* @@ -86,6 +88,7 @@ void cpu_idle (void) idle = default_idle; idle(); } + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index ff95f50efea..2eb7fa5bf9d 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -25,6 +25,7 @@ #include <linux/reboot.h> #include <linux/interrupt.h> #include <linux/pagemap.h> +#include <linux/rcupdate.h> #include <asm/asm-offsets.h> #include <asm/uaccess.h> @@ -69,12 +70,14 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) { check_pgt_cache(); if (!frv_dma_inprogress && idle) idle(); } + rcu_idle_exit(); schedule_preempt_disabled(); } diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 0e9c315be10..f153ed1a4c0 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -36,6 +36,7 @@ #include <linux/reboot.h> #include <linux/fs.h> #include <linux/slab.h> +#include <linux/rcupdate.h> #include <asm/uaccess.h> #include <asm/traps.h> @@ -78,8 +79,10 @@ void (*idle)(void) = default_idle; void cpu_idle(void) { while (1) { + rcu_idle_enter(); while (!need_resched()) idle(); + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 310cf5781fa..3c720ef6c32 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -25,6 +25,7 @@ config IA64 select HAVE_GENERIC_HARDIRQS select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP + select HAVE_VIRT_CPU_ACCOUNTING select ARCH_DISCARD_MEMBLOCK select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP @@ -340,17 +341,6 @@ config FORCE_MAX_ZONEORDER default "17" if HUGETLB_PAGE default "11" -config VIRT_CPU_ACCOUNTING - bool "Deterministic task and CPU time accounting" - default n - help - Select this option to enable more accurate task and CPU time - accounting. This is done by reading a CPU counter on each - kernel entry and exit and on transitions within the kernel - between system, softirq and hardirq state, so there is a - small performance impact. - If in doubt, say N here. - config SMP bool "Symmetric multi-processing support" select USE_GENERIC_SMP_HELPERS diff --git a/arch/ia64/include/asm/switch_to.h b/arch/ia64/include/asm/switch_to.h index cb2412fcd17..d38c7ea5eea 100644 --- a/arch/ia64/include/asm/switch_to.h +++ b/arch/ia64/include/asm/switch_to.h @@ -30,13 +30,6 @@ extern struct task_struct *ia64_switch_to (void *next_task); extern void ia64_save_extra (struct task_struct *task); extern void ia64_load_extra (struct task_struct *task); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct *next); -# define IA64_ACCOUNT_ON_SWITCH(p,n) ia64_account_on_switch(p,n) -#else -# define IA64_ACCOUNT_ON_SWITCH(p,n) -#endif - #ifdef CONFIG_PERFMON DECLARE_PER_CPU(unsigned long, pfm_syst_info); # define PERFMON_IS_SYSWIDE() (__get_cpu_var(pfm_syst_info) & 0x1) @@ -49,7 +42,6 @@ extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct || PERFMON_IS_SYSWIDE()) #define __switch_to(prev,next,last) do { \ - IA64_ACCOUNT_ON_SWITCH(prev, next); \ if (IA64_HAS_EXTRA_STATE(prev)) \ ia64_save_extra(prev); \ if (IA64_HAS_EXTRA_STATE(next)) \ diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index dd6fc144974..3e316ec0b83 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -29,6 +29,7 @@ #include <linux/kdebug.h> #include <linux/utsname.h> #include <linux/tracehook.h> +#include <linux/rcupdate.h> #include <asm/cpu.h> #include <asm/delay.h> @@ -279,6 +280,7 @@ cpu_idle (void) /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); if (can_do_pal_halt) { current_thread_info()->status &= ~TS_POLLING; /* @@ -309,6 +311,7 @@ cpu_idle (void) normal_xtp(); #endif } + rcu_idle_exit(); schedule_preempt_disabled(); check_pgt_cache(); if (cpu_is_offline(cpu)) diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index ecc904b33c5..80ff9acc5ed 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -83,32 +83,36 @@ static struct clocksource *itc_clocksource; extern cputime_t cycle_to_cputime(u64 cyc); +static void vtime_account_user(struct task_struct *tsk) +{ + cputime_t delta_utime; + struct thread_info *ti = task_thread_info(tsk); + + if (ti->ac_utime) { + delta_utime = cycle_to_cputime(ti->ac_utime); + account_user_time(tsk, delta_utime, delta_utime); + ti->ac_utime = 0; + } +} + /* * Called from the context switch with interrupts disabled, to charge all * accumulated times to the current process, and to prepare accounting on * the next process. */ -void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next) +void vtime_task_switch(struct task_struct *prev) { struct thread_info *pi = task_thread_info(prev); - struct thread_info *ni = task_thread_info(next); - cputime_t delta_stime, delta_utime; - __u64 now; + struct thread_info *ni = task_thread_info(current); - now = ia64_get_itc(); - - delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp)); if (idle_task(smp_processor_id()) != prev) - account_system_time(prev, 0, delta_stime, delta_stime); + vtime_account_system(prev); else - account_idle_time(delta_stime); + vtime_account_idle(prev); - if (pi->ac_utime) { - delta_utime = cycle_to_cputime(pi->ac_utime); - account_user_time(prev, delta_utime, delta_utime); - } + vtime_account_user(prev); - pi->ac_stamp = ni->ac_stamp = now; + pi->ac_stamp = ni->ac_stamp; ni->ac_stime = ni->ac_utime = 0; } @@ -116,29 +120,32 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next) * Account time for a transition between system, hard irq or soft irq state. * Note that this function is called with interrupts enabled. */ -void account_system_vtime(struct task_struct *tsk) +static cputime_t vtime_delta(struct task_struct *tsk) { struct thread_info *ti = task_thread_info(tsk); - unsigned long flags; cputime_t delta_stime; __u64 now; - local_irq_save(flags); - now = ia64_get_itc(); delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp)); - if (irq_count() || idle_task(smp_processor_id()) != tsk) - account_system_time(tsk, 0, delta_stime, delta_stime); - else - account_idle_time(delta_stime); ti->ac_stime = 0; - ti->ac_stamp = now; - local_irq_restore(flags); + return delta_stime; +} + +void vtime_account_system(struct task_struct *tsk) +{ + cputime_t delta = vtime_delta(tsk); + + account_system_time(tsk, 0, delta, delta); +} + +void vtime_account_idle(struct task_struct *tsk) +{ + account_idle_time(vtime_delta(tsk)); } -EXPORT_SYMBOL_GPL(account_system_vtime); /* * Called from the timer interrupt handler to charge accumulated user time @@ -146,14 +153,7 @@ EXPORT_SYMBOL_GPL(account_system_vtime); */ void account_process_tick(struct task_struct *p, int user_tick) { - struct thread_info *ti = task_thread_info(p); - cputime_t delta_utime; - - if (ti->ac_utime) { - delta_utime = cycle_to_cputime(ti->ac_utime); - account_user_time(p, delta_utime, delta_utime); - ti->ac_utime = 0; - } + vtime_account_user(p); } #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index 3a4a32b2720..384e63f3a4c 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -26,6 +26,7 @@ #include <linux/ptrace.h> #include <linux/unistd.h> #include <linux/hardirq.h> +#include <linux/rcupdate.h> #include <asm/io.h> #include <asm/uaccess.h> @@ -82,6 +83,7 @@ void cpu_idle (void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) { void (*idle)(void) = pm_idle; @@ -90,6 +92,7 @@ void cpu_idle (void) idle(); } + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index c488e3cfab5..ac2892e49c7 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -25,6 +25,7 @@ #include <linux/reboot.h> #include <linux/init_task.h> #include <linux/mqueue.h> +#include <linux/rcupdate.h> #include <asm/uaccess.h> #include <asm/traps.h> @@ -75,8 +76,10 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) idle(); + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/m68k/platform/coldfire/clk.c b/arch/m68k/platform/coldfire/clk.c index 75f9ee967ea..9cd13b4ce42 100644 --- a/arch/m68k/platform/coldfire/clk.c +++ b/arch/m68k/platform/coldfire/clk.c @@ -146,9 +146,3 @@ struct clk_ops clk_ops1 = { }; #endif /* MCFPM_PPMCR1 */ #endif /* MCFPM_PPMCR0 */ - -struct clk *devm_clk_get(struct device *dev, const char *id) -{ - return NULL; -} -EXPORT_SYMBOL(devm_clk_get); diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c index e7e03ecf549..afc379ca375 100644 --- a/arch/mips/kernel/smp-cmp.c +++ b/arch/mips/kernel/smp-cmp.c @@ -102,7 +102,7 @@ static void cmp_init_secondary(void) c->vpe_id = (read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE; #endif #ifdef CONFIG_MIPS_MT_SMTC - c->tc_id = (read_c0_tcbind() >> TCBIND_CURTC_SHIFT) & TCBIND_CURTC; + c->tc_id = (read_c0_tcbind() & TCBIND_CURTC) >> TCBIND_CURTC_SHIFT; #endif } diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c index 33aadbcf170..dcfd573871c 100644 --- a/arch/mips/mm/gup.c +++ b/arch/mips/mm/gup.c @@ -152,6 +152,8 @@ static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end, do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; + if (PageTail(page)) + get_huge_page_tail(page); (*nr)++; page++; refs++; diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c index 7b13a4caeea..fea823f1847 100644 --- a/arch/mips/mti-malta/malta-int.c +++ b/arch/mips/mti-malta/malta-int.c @@ -273,16 +273,19 @@ asmlinkage void plat_irq_dispatch(void) unsigned int pending = read_c0_cause() & read_c0_status() & ST0_IM; int irq; + if (unlikely(!pending)) { + spurious_interrupt(); + return; + } + irq = irq_ffs(pending); if (irq == MIPSCPU_INT_I8259A) malta_hw0_irqdispatch(); else if (gic_present && ((1 << irq) & ipi_map[smp_processor_id()])) malta_ipi_irqdispatch(); - else if (irq >= 0) - do_IRQ(MIPS_CPU_IRQ_BASE + irq); else - spurious_interrupt(); + do_IRQ(MIPS_CPU_IRQ_BASE + irq); } #ifdef CONFIG_MIPS_MT_SMP diff --git a/arch/mips/mti-malta/malta-platform.c b/arch/mips/mti-malta/malta-platform.c index 4c35301720e..80562b81f0f 100644 --- a/arch/mips/mti-malta/malta-platform.c +++ b/arch/mips/mti-malta/malta-platform.c @@ -138,11 +138,6 @@ static int __init malta_add_devices(void) if (err) return err; - /* - * Set RTC to BCD mode to support current alarm code. - */ - CMOS_WRITE(CMOS_READ(RTC_CONTROL) & ~RTC_DM_BINARY, RTC_CONTROL); - return 0; } diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 7dab0cd3646..e9cceba193b 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -25,6 +25,7 @@ #include <linux/err.h> #include <linux/fs.h> #include <linux/slab.h> +#include <linux/rcupdate.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/io.h> @@ -107,6 +108,7 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ for (;;) { + rcu_idle_enter(); while (!need_resched()) { void (*idle)(void); @@ -121,6 +123,7 @@ void cpu_idle(void) } idle(); } + rcu_idle_exit(); schedule_preempt_disabled(); } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 2c05a9292a8..8c6b6b6561f 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -48,6 +48,7 @@ #include <linux/unistd.h> #include <linux/kallsyms.h> #include <linux/uaccess.h> +#include <linux/rcupdate.h> #include <asm/io.h> #include <asm/asm-offsets.h> @@ -69,8 +70,10 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) barrier(); + rcu_idle_exit(); schedule_preempt_disabled(); check_pgt_cache(); } diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore index 1c1aadc8c48..c32ae5ce9ff 100644 --- a/arch/powerpc/boot/.gitignore +++ b/arch/powerpc/boot/.gitignore @@ -1,10 +1,6 @@ addnote empty.c hack-coff -infblock.c -infblock.h -infcodes.c -infcodes.h inffast.c inffast.h inffixed.h diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 3b4b4a8da92..c1f267694ac 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -197,12 +197,6 @@ struct cpu_usage { DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array); -#if defined(CONFIG_VIRT_CPU_ACCOUNTING) -#define account_process_vtime(tsk) account_process_tick(tsk, 0) -#else -#define account_process_vtime(tsk) do { } while (0) -#endif - extern void secondary_cpu_time_init(void); DECLARE_PER_CPU(u64, decrementers_next_tb); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 1a1f2ddfb58..e9cb51f5f80 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -514,9 +514,6 @@ struct task_struct *__switch_to(struct task_struct *prev, local_irq_save(flags); - account_system_vtime(current); - account_process_vtime(current); - /* * We can't take a PMU exception inside _switch() since there is a * window where the kernel stack SLB and the kernel stack are out diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index e49e93191b6..eaa9d0e6abc 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -291,13 +291,12 @@ static inline u64 calculate_stolen_time(u64 stop_tb) * Account time for a transition between system, hard irq * or soft irq state. */ -void account_system_vtime(struct task_struct *tsk) +static u64 vtime_delta(struct task_struct *tsk, + u64 *sys_scaled, u64 *stolen) { - u64 now, nowscaled, delta, deltascaled; - unsigned long flags; - u64 stolen, udelta, sys_scaled, user_scaled; + u64 now, nowscaled, deltascaled; + u64 udelta, delta, user_scaled; - local_irq_save(flags); now = mftb(); nowscaled = read_spurr(now); get_paca()->system_time += now - get_paca()->starttime; @@ -305,7 +304,7 @@ void account_system_vtime(struct task_struct *tsk) deltascaled = nowscaled - get_paca()->startspurr; get_paca()->startspurr = nowscaled; - stolen = calculate_stolen_time(now); + *stolen = calculate_stolen_time(now); delta = get_paca()->system_time; get_paca()->system_time = 0; @@ -322,35 +321,45 @@ void account_system_vtime(struct task_struct *tsk) * the user ticks get saved up in paca->user_time_scaled to be * used by account_process_tick. */ - sys_scaled = delta; + *sys_scaled = delta; user_scaled = udelta; if (deltascaled != delta + udelta) { if (udelta) { - sys_scaled = deltascaled * delta / (delta + udelta); - user_scaled = deltascaled - sys_scaled; + *sys_scaled = deltascaled * delta / (delta + udelta); + user_scaled = deltascaled - *sys_scaled; } else { - sys_scaled = deltascaled; + *sys_scaled = deltascaled; } } get_paca()->user_time_scaled += user_scaled; - if (in_interrupt() || idle_task(smp_processor_id()) != tsk) { - account_system_time(tsk, 0, delta, sys_scaled); - if (stolen) - account_steal_time(stolen); - } else { - account_idle_time(delta + stolen); - } - local_irq_restore(flags); + return delta; +} + +void vtime_account_system(struct task_struct *tsk) +{ + u64 delta, sys_scaled, stolen; + + delta = vtime_delta(tsk, &sys_scaled, &stolen); + account_system_time(tsk, 0, delta, sys_scaled); + if (stolen) + account_steal_time(stolen); +} + +void vtime_account_idle(struct task_struct *tsk) +{ + u64 delta, sys_scaled, stolen; + + delta = vtime_delta(tsk, &sys_scaled, &stolen); + account_idle_time(delta + stolen); } -EXPORT_SYMBOL_GPL(account_system_vtime); /* * Transfer the user and system times accumulated in the paca * by the exception entry and exit code to the generic process * user and system time records. * Must be called with interrupts disabled. - * Assumes that account_system_vtime() has been called recently + * Assumes that vtime_account() has been called recently * (i.e. since the last entry from usermode) so that * get_paca()->user_time_scaled is up to date. */ @@ -366,6 +375,12 @@ void account_process_tick(struct task_struct *tsk, int user_tick) account_user_time(tsk, utime, utimescaled); } +void vtime_task_switch(struct task_struct *prev) +{ + vtime_account(prev); + account_process_tick(prev, 0); +} + #else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ #define calc_cputime_factors() #endif diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 30fd01de6be..72afd2888ca 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -1,6 +1,7 @@ config PPC64 bool "64-bit kernel" default n + select HAVE_VIRT_CPU_ACCOUNTING help This option selects whether a 32-bit or a 64-bit kernel will be built. @@ -337,21 +338,6 @@ config PPC_MM_SLICES default y if (!PPC_FSL_BOOK3E && PPC64 && HUGETLB_PAGE) || (PPC_STD_MMU_64 && PPC_64K_PAGES) default n -config VIRT_CPU_ACCOUNTING - bool "Deterministic task and CPU time accounting" - depends on PPC64 - default y - help - Select this option to enable more accurate task and CPU time - accounting. This is done by reading a CPU counter on each - kernel entry and exit and on transitions within the kernel - between system, softirq and hardirq state, so there is a - small performance impact. This also enables accounting of - stolen time on logically-partitioned systems running on - IBM POWER5-based machines. - - If in doubt, say Y here. - config PPC_HAVE_PMU_SUPPORT bool diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 107610e01a2..f5ab543396d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -49,9 +49,6 @@ config GENERIC_LOCKBREAK config PGSTE def_bool y if KVM -config VIRT_CPU_ACCOUNTING - def_bool y - config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y @@ -89,6 +86,8 @@ config S390 select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_CMPXCHG_LOCAL + select HAVE_VIRT_CPU_ACCOUNTING + select VIRT_CPU_ACCOUNTING select ARCH_DISCARD_MEMBLOCK select BUILDTIME_EXTABLE_SORT select ARCH_INLINE_SPIN_TRYLOCK diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 8709bdef233..023d5ae2448 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -12,6 +12,9 @@ #include <linux/spinlock.h> #include <asm/div64.h> + +#define __ARCH_HAS_VTIME_ACCOUNT + /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ typedef unsigned long long __nocast cputime_t; diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 799ed0f1643..2d6e6e38056 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -66,16 +66,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return pte; } -static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - pte_t pte = huge_ptep_get(ptep); - - mm->context.flush_mm = 1; - pmd_clear((pmd_t *) ptep); - return pte; -} - static inline void __pmd_csp(pmd_t *pmdp) { register unsigned long reg2 asm("2") = pmd_val(*pmdp); @@ -117,6 +107,15 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm, __pmd_csp(pmdp); } +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_t pte = huge_ptep_get(ptep); + + huge_ptep_invalidate(mm, addr, ptep); + return pte; +} + #define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ ({ \ int __changed = !pte_same(huge_ptep_get(__ptep), __entry); \ @@ -131,10 +130,7 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm, ({ \ pte_t __pte = huge_ptep_get(__ptep); \ if (pte_write(__pte)) { \ - (__mm)->context.flush_mm = 1; \ - if (atomic_read(&(__mm)->context.attach_count) > 1 || \ - (__mm) != current->active_mm) \ - huge_ptep_invalidate(__mm, __addr, __ptep); \ + huge_ptep_invalidate(__mm, __addr, __ptep); \ set_huge_pte_at(__mm, __addr, __ptep, \ huge_pte_wrprotect(__pte)); \ } \ diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index f223068b782..314cc9426fc 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -89,12 +89,8 @@ static inline void restore_access_regs(unsigned int *acrs) prev = __switch_to(prev,next); \ } while (0) -extern void account_vtime(struct task_struct *, struct task_struct *); -extern void account_tick_vtime(struct task_struct *); - #define finish_arch_switch(prev) do { \ set_fs(current->thread.mm_segment); \ - account_vtime(prev, current); \ } while (0) #endif /* __ASM_SWITCH_TO_H */ diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 9fde315f3a7..1d8fe2b17ef 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -90,12 +90,10 @@ static inline void __tlb_flush_mm(struct mm_struct * mm) static inline void __tlb_flush_mm_cond(struct mm_struct * mm) { - spin_lock(&mm->page_table_lock); if (mm->context.flush_mm) { __tlb_flush_mm(mm); mm->context.flush_mm = 0; } - spin_unlock(&mm->page_table_lock); } /* diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f86c81e13c3..40b57693de3 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -974,11 +974,13 @@ static void __init setup_hwcaps(void) if (MACHINE_HAS_HPAGE) elf_hwcap |= HWCAP_S390_HPAGE; +#if defined(CONFIG_64BIT) /* * 64-bit register support for 31-bit processes * HWCAP_S390_HIGH_GPRS is bit 9. */ elf_hwcap |= HWCAP_S390_HIGH_GPRS; +#endif get_cpu_id(&cpu_id); switch (cpu_id.machine) { diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 4fc97b40a6e..cb5093c26d1 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -99,7 +99,7 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset) return virt_timer_forward(user + system); } -void account_vtime(struct task_struct *prev, struct task_struct *next) +void vtime_task_switch(struct task_struct *prev) { struct thread_info *ti; @@ -107,7 +107,7 @@ void account_vtime(struct task_struct *prev, struct task_struct *next) ti = task_thread_info(prev); ti->user_timer = S390_lowcore.user_timer; ti->system_timer = S390_lowcore.system_timer; - ti = task_thread_info(next); + ti = task_thread_info(current); S390_lowcore.user_timer = ti->user_timer; S390_lowcore.system_timer = ti->system_timer; } @@ -122,7 +122,7 @@ void account_process_tick(struct task_struct *tsk, int user_tick) * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -void account_system_vtime(struct task_struct *tsk) +void vtime_account(struct task_struct *tsk) { struct thread_info *ti = task_thread_info(tsk); u64 timer, system; @@ -138,7 +138,7 @@ void account_system_vtime(struct task_struct *tsk) virt_timer_forward(system); } -EXPORT_SYMBOL_GPL(account_system_vtime); +EXPORT_SYMBOL_GPL(vtime_account); void __kprobes vtime_stop_cpu(void) { diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 60ee2b88379..2d37bb861fa 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -2,69 +2,82 @@ * User access functions based on page table walks for enhanced * system layout without hardware support. * - * Copyright IBM Corp. 2006 + * Copyright IBM Corp. 2006, 2012 * Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com) */ #include <linux/errno.h> #include <linux/hardirq.h> #include <linux/mm.h> +#include <linux/hugetlb.h> #include <asm/uaccess.h> #include <asm/futex.h> #include "uaccess.h" -static inline pte_t *follow_table(struct mm_struct *mm, unsigned long addr) + +/* + * Returns kernel address for user virtual address. If the returned address is + * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occured and the address + * contains the (negative) exception code. + */ +static __always_inline unsigned long follow_table(struct mm_struct *mm, + unsigned long addr, int write) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; + pte_t *ptep; pgd = pgd_offset(mm, addr); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) - return (pte_t *) 0x3a; + return -0x3aUL; pud = pud_offset(pgd, addr); if (pud_none(*pud) || unlikely(pud_bad(*pud))) - return (pte_t *) 0x3b; + return -0x3bUL; pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) - return (pte_t *) 0x10; + if (pmd_none(*pmd)) + return -0x10UL; + if (pmd_huge(*pmd)) { + if (write && (pmd_val(*pmd) & _SEGMENT_ENTRY_RO)) + return -0x04UL; + return (pmd_val(*pmd) & HPAGE_MASK) + (addr & ~HPAGE_MASK); + } + if (unlikely(pmd_bad(*pmd))) + return -0x10UL; + + ptep = pte_offset_map(pmd, addr); + if (!pte_present(*ptep)) + return -0x11UL; + if (write && !pte_write(*ptep)) + return -0x04UL; - return pte_offset_map(pmd, addr); + return (pte_val(*ptep) & PAGE_MASK) + (addr & ~PAGE_MASK); } static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr, size_t n, int write_user) { struct mm_struct *mm = current->mm; - unsigned long offset, pfn, done, size; - pte_t *pte; + unsigned long offset, done, size, kaddr; void *from, *to; done = 0; retry: spin_lock(&mm->page_table_lock); do { - pte = follow_table(mm, uaddr); - if ((unsigned long) pte < 0x1000) + kaddr = follow_table(mm, uaddr, write_user); + if (IS_ERR_VALUE(kaddr)) goto fault; - if (!pte_present(*pte)) { - pte = (pte_t *) 0x11; - goto fault; - } else if (write_user && !pte_write(*pte)) { - pte = (pte_t *) 0x04; - goto fault; - } - pfn = pte_pfn(*pte); - offset = uaddr & (PAGE_SIZE - 1); + offset = uaddr & ~PAGE_MASK; size = min(n - done, PAGE_SIZE - offset); if (write_user) { - to = (void *)((pfn << PAGE_SHIFT) + offset); + to = (void *) kaddr; from = kptr + done; } else { - from = (void *)((pfn << PAGE_SHIFT) + offset); + from = (void *) kaddr; to = kptr + done; } memcpy(to, from, size); @@ -75,7 +88,7 @@ retry: return n - done; fault: spin_unlock(&mm->page_table_lock); - if (__handle_fault(uaddr, (unsigned long) pte, write_user)) + if (__handle_fault(uaddr, -kaddr, write_user)) return n - done; goto retry; } @@ -84,27 +97,22 @@ fault: * Do DAT for user address by page table walk, return kernel address. * This function needs to be called with current->mm->page_table_lock held. */ -static __always_inline unsigned long __dat_user_addr(unsigned long uaddr) +static __always_inline unsigned long __dat_user_addr(unsigned long uaddr, + int write) { struct mm_struct *mm = current->mm; - unsigned long pfn; - pte_t *pte; + unsigned long kaddr; int rc; retry: - pte = follow_table(mm, uaddr); - if ((unsigned long) pte < 0x1000) - goto fault; - if (!pte_present(*pte)) { - pte = (pte_t *) 0x11; + kaddr = follow_table(mm, uaddr, write); + if (IS_ERR_VALUE(kaddr)) goto fault; - } - pfn = pte_pfn(*pte); - return (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1)); + return kaddr; fault: spin_unlock(&mm->page_table_lock); - rc = __handle_fault(uaddr, (unsigned long) pte, 0); + rc = __handle_fault(uaddr, -kaddr, write); spin_lock(&mm->page_table_lock); if (!rc) goto retry; @@ -159,11 +167,9 @@ static size_t clear_user_pt(size_t n, void __user *to) static size_t strnlen_user_pt(size_t count, const char __user *src) { - char *addr; unsigned long uaddr = (unsigned long) src; struct mm_struct *mm = current->mm; - unsigned long offset, pfn, done, len; - pte_t *pte; + unsigned long offset, done, len, kaddr; size_t len_str; if (segment_eq(get_fs(), KERNEL_DS)) @@ -172,19 +178,13 @@ static size_t strnlen_user_pt(size_t count, const char __user *src) retry: spin_lock(&mm->page_table_lock); do { - pte = follow_table(mm, uaddr); - if ((unsigned long) pte < 0x1000) - goto fault; - if (!pte_present(*pte)) { - pte = (pte_t *) 0x11; + kaddr = follow_table(mm, uaddr, 0); + if (IS_ERR_VALUE(kaddr)) goto fault; - } - pfn = pte_pfn(*pte); - offset = uaddr & (PAGE_SIZE-1); - addr = (char *)(pfn << PAGE_SHIFT) + offset; + offset = uaddr & ~PAGE_MASK; len = min(count - done, PAGE_SIZE - offset); - len_str = strnlen(addr, len); + len_str = strnlen((char *) kaddr, len); done += len_str; uaddr += len_str; } while ((len_str == len) && (done < count)); @@ -192,7 +192,7 @@ retry: return done + 1; fault: spin_unlock(&mm->page_table_lock); - if (__handle_fault(uaddr, (unsigned long) pte, 0)) + if (__handle_fault(uaddr, -kaddr, 0)) return 0; goto retry; } @@ -225,11 +225,10 @@ static size_t copy_in_user_pt(size_t n, void __user *to, const void __user *from) { struct mm_struct *mm = current->mm; - unsigned long offset_from, offset_to, offset_max, pfn_from, pfn_to, - uaddr, done, size, error_code; + unsigned long offset_max, uaddr, done, size, error_code; unsigned long uaddr_from = (unsigned long) from; unsigned long uaddr_to = (unsigned long) to; - pte_t *pte_from, *pte_to; + unsigned long kaddr_to, kaddr_from; int write_user; if (segment_eq(get_fs(), KERNEL_DS)) { @@ -242,38 +241,23 @@ retry: do { write_user = 0; uaddr = uaddr_from; - pte_from = follow_table(mm, uaddr_from); - error_code = (unsigned long) pte_from; - if (error_code < 0x1000) - goto fault; - if (!pte_present(*pte_from)) { - error_code = 0x11; + kaddr_from = follow_table(mm, uaddr_from, 0); + error_code = kaddr_from; + if (IS_ERR_VALUE(error_code)) goto fault; - } write_user = 1; uaddr = uaddr_to; - pte_to = follow_table(mm, uaddr_to); - error_code = (unsigned long) pte_to; - if (error_code < 0x1000) - goto fault; - if (!pte_present(*pte_to)) { - error_code = 0x11; + kaddr_to = follow_table(mm, uaddr_to, 1); + error_code = (unsigned long) kaddr_to; + if (IS_ERR_VALUE(error_code)) goto fault; - } else if (!pte_write(*pte_to)) { - error_code = 0x04; - goto fault; - } - pfn_from = pte_pfn(*pte_from); - pfn_to = pte_pfn(*pte_to); - offset_from = uaddr_from & (PAGE_SIZE-1); - offset_to = uaddr_from & (PAGE_SIZE-1); - offset_max = max(offset_from, offset_to); + offset_max = max(uaddr_from & ~PAGE_MASK, + uaddr_to & ~PAGE_MASK); size = min(n - done, PAGE_SIZE - offset_max); - memcpy((void *)(pfn_to << PAGE_SHIFT) + offset_to, - (void *)(pfn_from << PAGE_SHIFT) + offset_from, size); + memcpy((void *) kaddr_to, (void *) kaddr_from, size); done += size; uaddr_from += size; uaddr_to += size; @@ -282,7 +266,7 @@ retry: return n - done; fault: spin_unlock(&mm->page_table_lock); - if (__handle_fault(uaddr, error_code, write_user)) + if (__handle_fault(uaddr, -error_code, write_user)) return n - done; goto retry; } @@ -341,7 +325,7 @@ int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old) return __futex_atomic_op_pt(op, uaddr, oparg, old); spin_lock(¤t->mm->page_table_lock); uaddr = (u32 __force __user *) - __dat_user_addr((__force unsigned long) uaddr); + __dat_user_addr((__force unsigned long) uaddr, 1); if (!uaddr) { spin_unlock(¤t->mm->page_table_lock); return -EFAULT; @@ -378,7 +362,7 @@ int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr, return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval); spin_lock(¤t->mm->page_table_lock); uaddr = (u32 __force __user *) - __dat_user_addr((__force unsigned long) uaddr); + __dat_user_addr((__force unsigned long) uaddr, 1); if (!uaddr) { spin_unlock(¤t->mm->page_table_lock); return -EFAULT; diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c index 2707023c756..637970cfd3f 100644 --- a/arch/score/kernel/process.c +++ b/arch/score/kernel/process.c @@ -27,6 +27,7 @@ #include <linux/reboot.h> #include <linux/elfcore.h> #include <linux/pm.h> +#include <linux/rcupdate.h> void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); @@ -50,9 +51,10 @@ void __noreturn cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) barrier(); - + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S index b7cf6a547f1..7e605b95592 100644 --- a/arch/sh/kernel/cpu/sh5/entry.S +++ b/arch/sh/kernel/cpu/sh5/entry.S @@ -933,7 +933,7 @@ ret_with_reschedule: pta restore_all, tr1 - movi _TIF_SIGPENDING, r8 + movi (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME), r8 and r8, r7, r8 pta work_notifysig, tr0 bne r8, ZERO, tr0 diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S index f67601cb3f1..b96489d8b27 100644 --- a/arch/sh/kernel/entry-common.S +++ b/arch/sh/kernel/entry-common.S @@ -139,7 +139,7 @@ work_pending: ! r8: current_thread_info ! t: result of "tst #_TIF_NEED_RESCHED, r0" bf/s work_resched - tst #_TIF_SIGPENDING, r0 + tst #(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME), r0 work_notifysig: bt/s __restore_all mov r15, r4 diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index 15e0a169397..f1ddc0d2367 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c @@ -48,9 +48,7 @@ void *module_alloc(unsigned long size) return NULL; ret = module_map(size); - if (!ret) - ret = ERR_PTR(-ENOMEM); - else + if (ret) memset(ret, 0, size); return ret; @@ -116,6 +114,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, v = sym->st_value + rel[i].r_addend; switch (ELF_R_TYPE(rel[i].r_info) & 0xff) { + case R_SPARC_DISP32: + v -= (Elf_Addr) location; + *loc32 = v; + break; #ifdef CONFIG_SPARC64 case R_SPARC_64: location[0] = v >> 56; @@ -128,11 +130,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, location[7] = v >> 0; break; - case R_SPARC_DISP32: - v -= (Elf_Addr) location; - *loc32 = v; - break; - case R_SPARC_WDISP19: v -= (Elf_Addr) location; *loc32 = (*loc32 & ~0x7ffff) | diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h index 7a7ce390534..d5e86c9f74f 100644 --- a/arch/tile/include/asm/topology.h +++ b/arch/tile/include/asm/topology.h @@ -69,7 +69,6 @@ static inline const struct cpumask *cpumask_of_node(int node) | 1*SD_BALANCE_FORK \ | 0*SD_BALANCE_WAKE \ | 0*SD_WAKE_AFFINE \ - | 0*SD_PREFER_LOCAL \ | 0*SD_SHARE_CPUPOWER \ | 0*SD_SHARE_PKG_RESOURCES \ | 0*SD_SERIALIZE \ diff --git a/arch/tile/include/gxio/iorpc_trio.h b/arch/tile/include/gxio/iorpc_trio.h index 15fb7799208..58105c31228 100644 --- a/arch/tile/include/gxio/iorpc_trio.h +++ b/arch/tile/include/gxio/iorpc_trio.h @@ -25,21 +25,23 @@ #include <linux/module.h> #include <asm/pgtable.h> -#define GXIO_TRIO_OP_ALLOC_ASIDS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1400) +#define GXIO_TRIO_OP_DEALLOC_ASID IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1400) +#define GXIO_TRIO_OP_ALLOC_ASIDS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1401) -#define GXIO_TRIO_OP_ALLOC_MEMORY_MAPS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1402) +#define GXIO_TRIO_OP_ALLOC_MEMORY_MAPS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1404) -#define GXIO_TRIO_OP_ALLOC_PIO_REGIONS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x140e) -#define GXIO_TRIO_OP_INIT_PIO_REGION_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x140f) +#define GXIO_TRIO_OP_ALLOC_PIO_REGIONS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1412) -#define GXIO_TRIO_OP_INIT_MEMORY_MAP_MMU_AUX IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1417) -#define GXIO_TRIO_OP_GET_PORT_PROPERTY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1418) -#define GXIO_TRIO_OP_CONFIG_LEGACY_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1419) -#define GXIO_TRIO_OP_CONFIG_MSI_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x141a) +#define GXIO_TRIO_OP_INIT_PIO_REGION_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1414) -#define GXIO_TRIO_OP_SET_MPS_MRS IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141c) -#define GXIO_TRIO_OP_FORCE_RC_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141d) -#define GXIO_TRIO_OP_FORCE_EP_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141e) +#define GXIO_TRIO_OP_INIT_MEMORY_MAP_MMU_AUX IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141e) +#define GXIO_TRIO_OP_GET_PORT_PROPERTY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141f) +#define GXIO_TRIO_OP_CONFIG_LEGACY_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1420) +#define GXIO_TRIO_OP_CONFIG_MSI_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1421) + +#define GXIO_TRIO_OP_SET_MPS_MRS IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1423) +#define GXIO_TRIO_OP_FORCE_RC_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1424) +#define GXIO_TRIO_OP_FORCE_EP_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1425) #define GXIO_TRIO_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) #define GXIO_TRIO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 664a60e8dfb..c17de0db673 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -705,6 +705,7 @@ static void stack_proc(void *arg) struct task_struct *from = current, *to = arg; to->thread.saved_task = from; + rcu_switch(from, to); switch_to(from, to, from); } diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 69f1c57a8d0..33a6a2423bd 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -20,14 +20,6 @@ struct mm_struct; struct thread_struct { struct task_struct *saved_task; - /* - * This flag is set to 1 before calling do_fork (and analyzed in - * copy_thread) to mark that we are begin called from userspace (fork / - * vfork / clone), and reset to 0 after. It is left to 0 when called - * from kernelspace (i.e. kernel_thread() or fork_idle(), - * as of 2.6.11). - */ - int forking; struct pt_regs regs; int singlestep_syscall; void *fault_addr; @@ -58,7 +50,6 @@ struct thread_struct { #define INIT_THREAD \ { \ - .forking = 0, \ .regs = EMPTY_REGS, \ .fault_addr = NULL, \ .prev_sched = NULL, \ diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 40db8f71dea..2df313b6a58 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -7,16 +7,6 @@ DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK); DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT); DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); -DEFINE_STR(UM_KERN_EMERG, KERN_EMERG); -DEFINE_STR(UM_KERN_ALERT, KERN_ALERT); -DEFINE_STR(UM_KERN_CRIT, KERN_CRIT); -DEFINE_STR(UM_KERN_ERR, KERN_ERR); -DEFINE_STR(UM_KERN_WARNING, KERN_WARNING); -DEFINE_STR(UM_KERN_NOTICE, KERN_NOTICE); -DEFINE_STR(UM_KERN_INFO, KERN_INFO); -DEFINE_STR(UM_KERN_DEBUG, KERN_DEBUG); -DEFINE_STR(UM_KERN_CONT, KERN_CONT); - DEFINE(UM_ELF_CLASS, ELF_CLASS); DEFINE(UM_ELFCLASS32, ELFCLASS32); DEFINE(UM_ELFCLASS64, ELFCLASS64); diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h index 4fa82c055aa..cef06856333 100644 --- a/arch/um/include/shared/user.h +++ b/arch/um/include/shared/user.h @@ -26,6 +26,17 @@ extern void panic(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); +/* Requires preincluding include/linux/kern_levels.h */ +#define UM_KERN_EMERG KERN_EMERG +#define UM_KERN_ALERT KERN_ALERT +#define UM_KERN_CRIT KERN_CRIT +#define UM_KERN_ERR KERN_ERR +#define UM_KERN_WARNING KERN_WARNING +#define UM_KERN_NOTICE KERN_NOTICE +#define UM_KERN_INFO KERN_INFO +#define UM_KERN_DEBUG KERN_DEBUG +#define UM_KERN_CONT KERN_CONT + #ifdef UML_CONFIG_PRINTK extern int printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 6cade936636..8c82786da82 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -39,34 +39,21 @@ void flush_thread(void) void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) { + get_safe_registers(regs->regs.gp, regs->regs.fp); PT_REGS_IP(regs) = eip; PT_REGS_SP(regs) = esp; -} -EXPORT_SYMBOL(start_thread); - -static long execve1(const char *file, - const char __user *const __user *argv, - const char __user *const __user *env) -{ - long error; - - error = do_execve(file, argv, env, ¤t->thread.regs); - if (error == 0) { - task_lock(current); - current->ptrace &= ~PT_DTRACE; + current->ptrace &= ~PT_DTRACE; #ifdef SUBARCH_EXECVE1 - SUBARCH_EXECVE1(¤t->thread.regs.regs); + SUBARCH_EXECVE1(regs->regs); #endif - task_unlock(current); - } - return error; } +EXPORT_SYMBOL(start_thread); long um_execve(const char *file, const char __user *const __user *argv, const char __user *const __user *env) { long err; - err = execve1(file, argv, env); + err = do_execve(file, argv, env, ¤t->thread.regs); if (!err) UML_LONGJMP(current->thread.exec_buf, 1); return err; @@ -81,7 +68,7 @@ long sys_execve(const char __user *file, const char __user *const __user *argv, filename = getname(file); error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = execve1(filename, argv, env); + error = do_execve(filename, argv, env, ¤t->thread.regs); putname(filename); out: return error; diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 57fc7028714..c5f5afa5074 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -181,11 +181,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, struct pt_regs *regs) { void (*handler)(void); + int kthread = current->flags & PF_KTHREAD; int ret = 0; p->thread = (struct thread_struct) INIT_THREAD; - if (current->thread.forking) { + if (!kthread) { memcpy(&p->thread.regs.regs, ®s->regs, sizeof(p->thread.regs.regs)); PT_REGS_SET_SYSCALL_RETURN(&p->thread.regs, 0); @@ -195,8 +196,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, handler = fork_handler; arch_copy_thread(¤t->thread.arch, &p->thread.arch); - } - else { + } else { get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp); p->thread.request.u.thread = current->thread.request.u.thread; handler = new_thread_handler; @@ -204,7 +204,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, new_thread(task_stack_page(p), &p->thread.switch_buf, handler); - if (current->thread.forking) { + if (!kthread) { clear_flushed_tls(p); /* diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 7362d58efc2..cc9c2350e41 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -22,9 +22,13 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr, struct k_sigaction *ka, siginfo_t *info) { sigset_t *oldset = sigmask_to_save(); + int singlestep = 0; unsigned long sp; int err; + if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) + singlestep = 1; + /* Did we come from a system call? */ if (PT_REGS_SYSCALL_NR(regs) >= 0) { /* If so, check system call restarting.. */ @@ -61,7 +65,7 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr, if (err) force_sigsegv(signr, current); else - signal_delivered(signr, info, ka, regs, 0); + signal_delivered(signr, info, ka, regs, singlestep); } static int kern_do_signal(struct pt_regs *regs) diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index f958cb876ee..a4c6d8eee74 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -17,25 +17,25 @@ long sys_fork(void) { - long ret; - - current->thread.forking = 1; - ret = do_fork(SIGCHLD, UPT_SP(¤t->thread.regs.regs), + return do_fork(SIGCHLD, UPT_SP(¤t->thread.regs.regs), ¤t->thread.regs, 0, NULL, NULL); - current->thread.forking = 0; - return ret; } long sys_vfork(void) { - long ret; - - current->thread.forking = 1; - ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, UPT_SP(¤t->thread.regs.regs), ¤t->thread.regs, 0, NULL, NULL); - current->thread.forking = 0; - return ret; +} + +long sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid) +{ + if (!newsp) + newsp = UPT_SP(¤t->thread.regs.regs); + + return do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, + child_tid); } long old_mmap(unsigned long addr, unsigned long len, diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules index d50270d26b4..15889df9b46 100644 --- a/arch/um/scripts/Makefile.rules +++ b/arch/um/scripts/Makefile.rules @@ -8,7 +8,7 @@ USER_OBJS += $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) $(USER_OBJS:.o=.%): \ - c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include user.h $(CFLAGS_$(basetarget).o) + c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include $(srctree)/include/linux/kern_levels.h -include user.h $(CFLAGS_$(basetarget).o) # These are like USER_OBJS but filter USER_CFLAGS through unprofile instead of # using it directly. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8ec3a1aa4ab..57fecc1db94 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -7,11 +7,13 @@ config 64BIT Say no to build a 32-bit kernel - formerly known as i386 config X86_32 - def_bool !64BIT + def_bool y + depends on !64BIT select CLKSRC_I8253 config X86_64 - def_bool 64BIT + def_bool y + depends on 64BIT select X86_DEV_DMA_OPS ### Arch settings @@ -36,6 +38,7 @@ config X86 select HAVE_KRETPROBES select HAVE_OPTPROBES select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FENTRY if X86_64 select HAVE_C_RECORDMCOUNT select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER @@ -60,6 +63,8 @@ config X86 select HAVE_MIXED_BREAKPOINTS_REGS select PERF_EVENTS select HAVE_PERF_EVENTS_NMI + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select ANON_INODES select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 select HAVE_CMPXCHG_LOCAL if !M386 @@ -97,9 +102,12 @@ config X86 select KTIME_SCALAR if X86_32 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER + select HAVE_RCU_USER_QS if X86_64 + select HAVE_IRQ_TIME_ACCOUNTING config INSTRUCTION_DECODER - def_bool (KPROBES || PERF_EVENTS || UPROBES) + def_bool y + depends on KPROBES || PERF_EVENTS || UPROBES config OUTPUT_FORMAT string @@ -127,13 +135,15 @@ config SBUS bool config NEED_DMA_MAP_STATE - def_bool (X86_64 || INTEL_IOMMU || DMA_API_DEBUG) + def_bool y + depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG config NEED_SG_DMA_LENGTH def_bool y config GENERIC_ISA_DMA - def_bool ISA_DMA_API + def_bool y + depends on ISA_DMA_API config GENERIC_BUG def_bool y @@ -150,13 +160,16 @@ config GENERIC_GPIO bool config ARCH_MAY_HAVE_PC_FDC - def_bool ISA_DMA_API + def_bool y + depends on ISA_DMA_API config RWSEM_GENERIC_SPINLOCK - def_bool !X86_XADD + def_bool y + depends on !X86_XADD config RWSEM_XCHGADD_ALGORITHM - def_bool X86_XADD + def_bool y + depends on X86_XADD config GENERIC_CALIBRATE_DELAY def_bool y @@ -746,13 +759,14 @@ config SWIOTLB def_bool y if X86_64 ---help--- Support for software bounce buffers used on x86-64 systems - which don't have a hardware IOMMU (e.g. the current generation - of Intel's x86-64 CPUs). Using this PCI devices which can only - access 32-bits of memory can be used on systems with more than - 3 GB of memory. If unsure, say Y. + which don't have a hardware IOMMU. Using this PCI devices + which can only access 32-bits of memory can be used on systems + with more than 3 GB of memory. + If unsure, say Y. config IOMMU_HELPER - def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) + def_bool y + depends on CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" @@ -796,17 +810,6 @@ config SCHED_MC making when dealing with multi-core CPU chips at a cost of slightly increased overhead in some places. If unsure say N here. -config IRQ_TIME_ACCOUNTING - bool "Fine granularity task level IRQ time accounting" - default n - ---help--- - Select this option to enable fine granularity task irq time - accounting. This is done by reading a timestamp on each - transitions between softirq and hardirq state, so there can be a - small performance impact. - - If in doubt, say N here. - source "kernel/Kconfig.preempt" config X86_UP_APIC @@ -1159,10 +1162,12 @@ config X86_PAE consumes more pagetable space per process. config ARCH_PHYS_ADDR_T_64BIT - def_bool X86_64 || X86_PAE + def_bool y + depends on X86_64 || X86_PAE config ARCH_DMA_ADDR_T_64BIT - def_bool X86_64 || HIGHMEM64G + def_bool y + depends on X86_64 || HIGHMEM64G config DIRECT_GBPAGES bool "Enable 1GB pages for kernel pagetables" if EXPERT @@ -1285,8 +1290,8 @@ config ARCH_SELECT_MEMORY_MODEL depends on ARCH_SPARSEMEM_ENABLE config ARCH_MEMORY_PROBE - def_bool X86_64 - depends on MEMORY_HOTPLUG + def_bool y + depends on X86_64 && MEMORY_HOTPLUG config ARCH_PROC_KCORE_TEXT def_bool y @@ -1975,7 +1980,6 @@ config PCI_MMCONFIG config PCI_CNB20LE_QUIRK bool "Read CNB20LE Host Bridge Windows" if EXPERT - default n depends on PCI && EXPERIMENTAL help Read the PCI windows out of the CNB20LE host bridge. This allows @@ -2186,18 +2190,18 @@ config COMPAT depends on IA32_EMULATION || X86_X32 select ARCH_WANT_OLD_COMPAT_IPC +if COMPAT config COMPAT_FOR_U64_ALIGNMENT - def_bool COMPAT - depends on X86_64 + def_bool y config SYSVIPC_COMPAT def_bool y - depends on COMPAT && SYSVIPC + depends on SYSVIPC config KEYS_COMPAT - bool - depends on COMPAT && KEYS - default y + def_bool y + depends on KEYS +endif endmenu diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 706e12e9984..f3b86d0df44 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -306,7 +306,8 @@ config X86_INTERNODE_CACHE_SHIFT default X86_L1_CACHE_SHIFT config X86_CMPXCHG - def_bool X86_64 || (X86_32 && !M386) + def_bool y + depends on X86_64 || (X86_32 && !M386) config X86_L1_CACHE_SHIFT int @@ -317,7 +318,7 @@ config X86_L1_CACHE_SHIFT config X86_XADD def_bool y - depends on X86_64 || !M386 + depends on !M386 config X86_PPRO_FENCE bool "PentiumPro memory ordering errata workaround" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 682e9c210ba..474ca35b1bc 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -142,7 +142,7 @@ KBUILD_CFLAGS += $(call cc-option,-mno-avx,) KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) -archscripts: +archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/x86/tools relocs ### diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index b4e15dd6786..2a017441b8b 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -32,10 +32,6 @@ SYSSEG = 0x1000 /* historical load address >> 4 */ #define SVGA_MODE ASK_VGA #endif -#ifndef RAMDISK -#define RAMDISK 0 -#endif - #ifndef ROOT_RDONLY #define ROOT_RDONLY 1 #endif diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 119db67dcb0..5598547281a 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=18 CONFIG_CGROUPS=y CONFIG_CGROUP_FREEZER=y @@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y CONFIG_SUN_PARTITION=y CONFIG_KARMA_PARTITION=y CONFIG_EFI_PARTITION=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y CONFIG_X86_GENERIC=y CONFIG_HPET_TIMER=y @@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEBUG_DEVRES=y CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y @@ -231,8 +229,6 @@ CONFIG_SND_HRTIMER=y CONFIG_SND_HDA_INTEL=y CONFIG_SND_HDA_HWDEP=y CONFIG_HIDRAW=y -CONFIG_HID_PID=y -CONFIG_USB_HIDDEV=y CONFIG_HID_GYRATION=y CONFIG_LOGITECH_FF=y CONFIG_HID_NTRIG=y @@ -243,11 +239,11 @@ CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_HID_TOPSEED=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y CONFIG_USB=y CONFIG_USB_DEBUG=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_TT_NEWSCHED is not set @@ -262,10 +258,9 @@ CONFIG_RTC_CLASS=y CONFIG_DMADEVICES=y CONFIG_EEEPC_LAPTOP=y CONFIG_EFI_VARS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -280,7 +275,6 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y @@ -299,13 +293,11 @@ CONFIG_DEBUG_KERNEL=y CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y CONFIG_DEBUG_STACK_USAGE=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_RODATA_TEST is not set -CONFIG_DEBUG_NX_TEST=m CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y CONFIG_KEYS_DEBUG_PROC_KEYS=y @@ -316,4 +308,3 @@ CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_CRYPTO_AES_586=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 76eb2903809..671524d0f6c 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=18 CONFIG_CGROUPS=y CONFIG_CGROUP_FREEZER=y @@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y CONFIG_SUN_PARTITION=y CONFIG_KARMA_PARTITION=y CONFIG_EFI_PARTITION=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y CONFIG_CALGARY_IOMMU=y CONFIG_NR_CPUS=64 @@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEBUG_DEVRES=y CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y @@ -227,8 +225,6 @@ CONFIG_SND_HRTIMER=y CONFIG_SND_HDA_INTEL=y CONFIG_SND_HDA_HWDEP=y CONFIG_HIDRAW=y -CONFIG_HID_PID=y -CONFIG_USB_HIDDEV=y CONFIG_HID_GYRATION=y CONFIG_LOGITECH_FF=y CONFIG_HID_NTRIG=y @@ -239,11 +235,11 @@ CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_HID_TOPSEED=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y CONFIG_USB=y CONFIG_USB_DEBUG=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_TT_NEWSCHED is not set @@ -262,10 +258,9 @@ CONFIG_AMD_IOMMU_STATS=y CONFIG_INTEL_IOMMU=y # CONFIG_INTEL_IOMMU_DEFAULT_ON is not set CONFIG_EFI_VARS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -280,7 +275,6 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y @@ -298,13 +292,11 @@ CONFIG_DEBUG_KERNEL=y CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y CONFIG_DEBUG_STACK_USAGE=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_RODATA_TEST is not set -CONFIG_DEBUG_NX_TEST=m CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y CONFIG_KEYS_DEBUG_PROC_KEYS=y @@ -314,4 +306,3 @@ CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 673ac9b63d6..452d4dd0a95 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -162,7 +162,8 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, } seg = get_fs(); set_fs(KERNEL_DS); - ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp); + ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL), + (stack_t __force __user *) &uoss, regs->sp); set_fs(seg); if (ret >= 0 && uoss_ptr) { if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t))) @@ -361,7 +362,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, */ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, - void **fpstate) + void __user **fpstate) { unsigned long sp; @@ -382,7 +383,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, if (used_math()) { sp = sp - sig_xstate_ia32_size; - *fpstate = (struct _fpstate_ia32 *) sp; + *fpstate = (struct _fpstate_ia32 __user *) sp; if (save_i387_xstate_ia32(*fpstate) < 0) return (void __user *) -1L; } @@ -448,7 +449,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, * These are actually not used anymore, but left because some * gdb versions depend on them as a marker. */ - put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode); } put_user_catch(err); if (err) @@ -529,7 +530,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, * Not actually used anymore, but left because some gdb * versions need it. */ - put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode); } put_user_catch(err); if (err) diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 4540bece094..c5b938d92ea 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -287,7 +287,7 @@ asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act, return ret; } -asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, +asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr, int options) { return compat_sys_wait4(pid, stat_addr, options, NULL); diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 70780689599..444704c8e18 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -60,7 +60,7 @@ extern void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end); extern void alternatives_smp_module_del(struct module *mod); -extern void alternatives_smp_switch(int smp); +extern void alternatives_enable_smp(void); extern int alternatives_text_reserved(void *start, void *end); extern bool skip_smp_alternatives; #else @@ -68,7 +68,7 @@ static inline void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end) {} static inline void alternatives_smp_module_del(struct module *mod) {} -static inline void alternatives_smp_switch(int smp) {} +static inline void alternatives_enable_smp(void) {} static inline int alternatives_text_reserved(void *start, void *end) { return 0; diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 72f5009deb5..6dfd0195bb5 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -355,7 +355,7 @@ static int test_bit(int nr, const volatile unsigned long *addr); */ static inline unsigned long __ffs(unsigned long word) { - asm("bsf %1,%0" + asm("rep; bsf %1,%0" : "=r" (word) : "rm" (word)); return word; @@ -369,7 +369,7 @@ static inline unsigned long __ffs(unsigned long word) */ static inline unsigned long ffz(unsigned long word) { - asm("bsf %1,%0" + asm("rep; bsf %1,%0" : "=r" (word) : "r" (~word)); return word; @@ -417,10 +417,9 @@ static inline int ffs(int x) * We cannot do this on 32 bits because at the very least some * 486 CPUs did not behave this way. */ - long tmp = -1; asm("bsfl %1,%0" : "=r" (r) - : "rm" (x), "0" (tmp)); + : "rm" (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsfl %1,%0\n\t" "cmovzl %2,%0" @@ -459,10 +458,9 @@ static inline int fls(int x) * We cannot do this on 32 bits because at the very least some * 486 CPUs did not behave this way. */ - long tmp = -1; asm("bsrl %1,%0" : "=r" (r) - : "rm" (x), "0" (tmp)); + : "rm" (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsrl %1,%0\n\t" "cmovzl %2,%0" @@ -490,13 +488,13 @@ static inline int fls(int x) #ifdef CONFIG_X86_64 static __always_inline int fls64(__u64 x) { - long bitpos = -1; + int bitpos = -1; /* * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the * dest reg is undefined if x==0, but their CPU architect says its * value is written to set it to the same as before. */ - asm("bsrq %1,%0" + asm("bsrq %1,%q0" : "+r" (bitpos) : "rm" (x)); return bitpos + 1; diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index a9e3a740f69..7f8422a28a4 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -49,38 +49,36 @@ For 32-bit we have the following conventions - kernel is built with #include "dwarf2.h" /* - * 64-bit system call stack frame layout defines and helpers, for - * assembly code (note that the seemingly unnecessary parentheses - * are to prevent cpp from inserting spaces in expressions that get - * passed to macros): + * 64-bit system call stack frame layout defines and helpers, + * for assembly code: */ -#define R15 (0) -#define R14 (8) -#define R13 (16) -#define R12 (24) -#define RBP (32) -#define RBX (40) +#define R15 0 +#define R14 8 +#define R13 16 +#define R12 24 +#define RBP 32 +#define RBX 40 /* arguments: interrupts/non tracing syscalls only save up to here: */ -#define R11 (48) -#define R10 (56) -#define R9 (64) -#define R8 (72) -#define RAX (80) -#define RCX (88) -#define RDX (96) -#define RSI (104) -#define RDI (112) -#define ORIG_RAX (120) /* + error_code */ +#define R11 48 +#define R10 56 +#define R9 64 +#define R8 72 +#define RAX 80 +#define RCX 88 +#define RDX 96 +#define RSI 104 +#define RDI 112 +#define ORIG_RAX 120 /* + error_code */ /* end of arguments */ /* cpu exception frame or undefined in case of fast syscall: */ -#define RIP (128) -#define CS (136) -#define EFLAGS (144) -#define RSP (152) -#define SS (160) +#define RIP 128 +#define CS 136 +#define EFLAGS 144 +#define RSP 152 +#define SS 160 #define ARGOFFSET R11 #define SWFRAME ORIG_RAX diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 6b7ee5ff682..633b6176cf6 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -209,6 +209,7 @@ #define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ #define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */ #define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index b0767bc0874..9a25b522d37 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -3,38 +3,54 @@ #ifdef __ASSEMBLY__ - .macro MCOUNT_SAVE_FRAME - /* taken from glibc */ - subq $0x38, %rsp - movq %rax, (%rsp) - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rsi, 24(%rsp) - movq %rdi, 32(%rsp) - movq %r8, 40(%rsp) - movq %r9, 48(%rsp) + /* skip is set if the stack was already partially adjusted */ + .macro MCOUNT_SAVE_FRAME skip=0 + /* + * We add enough stack to save all regs. + */ + subq $(SS+8-\skip), %rsp + movq %rax, RAX(%rsp) + movq %rcx, RCX(%rsp) + movq %rdx, RDX(%rsp) + movq %rsi, RSI(%rsp) + movq %rdi, RDI(%rsp) + movq %r8, R8(%rsp) + movq %r9, R9(%rsp) + /* Move RIP to its proper location */ + movq SS+8(%rsp), %rdx + movq %rdx, RIP(%rsp) .endm - .macro MCOUNT_RESTORE_FRAME - movq 48(%rsp), %r9 - movq 40(%rsp), %r8 - movq 32(%rsp), %rdi - movq 24(%rsp), %rsi - movq 16(%rsp), %rdx - movq 8(%rsp), %rcx - movq (%rsp), %rax - addq $0x38, %rsp + .macro MCOUNT_RESTORE_FRAME skip=0 + movq R9(%rsp), %r9 + movq R8(%rsp), %r8 + movq RDI(%rsp), %rdi + movq RSI(%rsp), %rsi + movq RDX(%rsp), %rdx + movq RCX(%rsp), %rcx + movq RAX(%rsp), %rax + addq $(SS+8-\skip), %rsp .endm #endif #ifdef CONFIG_FUNCTION_TRACER -#define MCOUNT_ADDR ((long)(mcount)) +#ifdef CC_USING_FENTRY +# define MCOUNT_ADDR ((long)(__fentry__)) +#else +# define MCOUNT_ADDR ((long)(mcount)) +#endif #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ +#ifdef CONFIG_DYNAMIC_FTRACE +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#define ARCH_SUPPORTS_FTRACE_SAVE_REGS +#endif + #ifndef __ASSEMBLY__ extern void mcount(void); extern atomic_t modifying_ftrace_code; +extern void __fentry__(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 2c392d663dc..434e2106cc8 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -35,8 +35,6 @@ #define HPET_ID_NUMBER_SHIFT 8 #define HPET_ID_VENDOR_SHIFT 16 -#define HPET_ID_VENDOR_8086 0x8086 - #define HPET_CFG_ENABLE 0x001 #define HPET_CFG_LEGACY 0x002 #define HPET_LEGACY_8254 2 diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h index f229b13a5f3..f42a04735a0 100644 --- a/arch/x86/include/asm/iommu_table.h +++ b/arch/x86/include/asm/iommu_table.h @@ -48,7 +48,7 @@ struct iommu_table_entry { #define __IOMMU_INIT(_detect, _depend, _early_init, _late_init, _finish)\ - static const struct iommu_table_entry const \ + static const struct iommu_table_entry \ __iommu_entry_##_detect __used \ __attribute__ ((unused, __section__(".iommu_table"), \ aligned((sizeof(void *))))) \ @@ -63,10 +63,10 @@ struct iommu_table_entry { * to stop detecting the other IOMMUs after yours has been detected. */ #define IOMMU_INIT_POST(_detect) \ - __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 0) + __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 0) #define IOMMU_INIT_POST_FINISH(detect) \ - __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 1) + __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 1) /* * A more sophisticated version of IOMMU_INIT. This variant requires: diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 54788253915..d3ddd17405d 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -27,6 +27,7 @@ #include <asm/insn.h> #define __ARCH_WANT_KPROBES_INSN_SLOT +#define ARCH_SUPPORTS_KPROBES_ON_FTRACE struct pt_regs; struct kprobe; diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 246617efd67..41e08cb6a09 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -9,6 +9,22 @@ #include <linux/types.h> #include <linux/ioctl.h> +#define DE_VECTOR 0 +#define DB_VECTOR 1 +#define BP_VECTOR 3 +#define OF_VECTOR 4 +#define BR_VECTOR 5 +#define UD_VECTOR 6 +#define NM_VECTOR 7 +#define DF_VECTOR 8 +#define TS_VECTOR 10 +#define NP_VECTOR 11 +#define SS_VECTOR 12 +#define GP_VECTOR 13 +#define PF_VECTOR 14 +#define MF_VECTOR 16 +#define MC_VECTOR 18 + /* Select x86 specific features in <linux/kvm.h> */ #define __KVM_HAVE_PIT #define __KVM_HAVE_IOAPIC diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 09155d64cf7..1eaa6b05667 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -75,22 +75,6 @@ #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) -#define DE_VECTOR 0 -#define DB_VECTOR 1 -#define BP_VECTOR 3 -#define OF_VECTOR 4 -#define BR_VECTOR 5 -#define UD_VECTOR 6 -#define NM_VECTOR 7 -#define DF_VECTOR 8 -#define TS_VECTOR 10 -#define NP_VECTOR 11 -#define SS_VECTOR 12 -#define GP_VECTOR 13 -#define PF_VECTOR 14 -#define MF_VECTOR 16 -#define MC_VECTOR 18 - #define SELECTOR_TI_MASK (1 << 2) #define SELECTOR_RPL_MASK 0x03 diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index cb4e43bce98..4fabcdf1cfa 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -262,4 +262,6 @@ static inline void perf_check_microcode(void) { } static inline void amd_pmu_disable_virt(void) { } #endif +#define arch_perf_out_copy_user copy_from_user_nmi + #endif /* _ASM_X86_PERF_EVENT_H */ diff --git a/arch/x86/include/asm/perf_regs.h b/arch/x86/include/asm/perf_regs.h new file mode 100644 index 00000000000..3f2207bfd17 --- /dev/null +++ b/arch/x86/include/asm/perf_regs.h @@ -0,0 +1,33 @@ +#ifndef _ASM_X86_PERF_REGS_H +#define _ASM_X86_PERF_REGS_H + +enum perf_event_x86_regs { + PERF_REG_X86_AX, + PERF_REG_X86_BX, + PERF_REG_X86_CX, + PERF_REG_X86_DX, + PERF_REG_X86_SI, + PERF_REG_X86_DI, + PERF_REG_X86_BP, + PERF_REG_X86_SP, + PERF_REG_X86_IP, + PERF_REG_X86_FLAGS, + PERF_REG_X86_CS, + PERF_REG_X86_SS, + PERF_REG_X86_DS, + PERF_REG_X86_ES, + PERF_REG_X86_FS, + PERF_REG_X86_GS, + PERF_REG_X86_R8, + PERF_REG_X86_R9, + PERF_REG_X86_R10, + PERF_REG_X86_R11, + PERF_REG_X86_R12, + PERF_REG_X86_R13, + PERF_REG_X86_R14, + PERF_REG_X86_R15, + + PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1, + PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1, +}; +#endif /* _ASM_X86_PERF_REGS_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d048cad9bca..b98c0d958eb 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -423,7 +423,6 @@ DECLARE_INIT_PER_CPU(irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); DECLARE_PER_CPU(unsigned int, irq_count); -extern unsigned long kernel_eflags; extern asmlinkage void ignore_sysret(void); #else /* X86_64 */ #ifdef CONFIG_CC_STACKPROTECTOR @@ -759,6 +758,8 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); } +extern void set_task_blockstep(struct task_struct *task, bool on); + /* * from system description table in BIOS. Mostly for MCA use, but * others may find it useful: diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/rcu.h new file mode 100644 index 00000000000..d1ac07a2397 --- /dev/null +++ b/arch/x86/include/asm/rcu.h @@ -0,0 +1,32 @@ +#ifndef _ASM_X86_RCU_H +#define _ASM_X86_RCU_H + +#ifndef __ASSEMBLY__ + +#include <linux/rcupdate.h> +#include <asm/ptrace.h> + +static inline void exception_enter(struct pt_regs *regs) +{ + rcu_user_exit(); +} + +static inline void exception_exit(struct pt_regs *regs) +{ +#ifdef CONFIG_RCU_USER_QS + if (user_mode(regs)) + rcu_user_enter(); +#endif +} + +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_RCU_USER_QS +# define SCHEDULE_USER call schedule_user +#else +# define SCHEDULE_USER call schedule +#endif + +#endif /* !__ASSEMBLY__ */ + +#endif diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index f2b83bc7d78..cdf5674dd23 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -1,6 +1,135 @@ #ifndef __SVM_H #define __SVM_H +#define SVM_EXIT_READ_CR0 0x000 +#define SVM_EXIT_READ_CR3 0x003 +#define SVM_EXIT_READ_CR4 0x004 +#define SVM_EXIT_READ_CR8 0x008 +#define SVM_EXIT_WRITE_CR0 0x010 +#define SVM_EXIT_WRITE_CR3 0x013 +#define SVM_EXIT_WRITE_CR4 0x014 +#define SVM_EXIT_WRITE_CR8 0x018 +#define SVM_EXIT_READ_DR0 0x020 +#define SVM_EXIT_READ_DR1 0x021 +#define SVM_EXIT_READ_DR2 0x022 +#define SVM_EXIT_READ_DR3 0x023 +#define SVM_EXIT_READ_DR4 0x024 +#define SVM_EXIT_READ_DR5 0x025 +#define SVM_EXIT_READ_DR6 0x026 +#define SVM_EXIT_READ_DR7 0x027 +#define SVM_EXIT_WRITE_DR0 0x030 +#define SVM_EXIT_WRITE_DR1 0x031 +#define SVM_EXIT_WRITE_DR2 0x032 +#define SVM_EXIT_WRITE_DR3 0x033 +#define SVM_EXIT_WRITE_DR4 0x034 +#define SVM_EXIT_WRITE_DR5 0x035 +#define SVM_EXIT_WRITE_DR6 0x036 +#define SVM_EXIT_WRITE_DR7 0x037 +#define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_INTR 0x060 +#define SVM_EXIT_NMI 0x061 +#define SVM_EXIT_SMI 0x062 +#define SVM_EXIT_INIT 0x063 +#define SVM_EXIT_VINTR 0x064 +#define SVM_EXIT_CR0_SEL_WRITE 0x065 +#define SVM_EXIT_IDTR_READ 0x066 +#define SVM_EXIT_GDTR_READ 0x067 +#define SVM_EXIT_LDTR_READ 0x068 +#define SVM_EXIT_TR_READ 0x069 +#define SVM_EXIT_IDTR_WRITE 0x06a +#define SVM_EXIT_GDTR_WRITE 0x06b +#define SVM_EXIT_LDTR_WRITE 0x06c +#define SVM_EXIT_TR_WRITE 0x06d +#define SVM_EXIT_RDTSC 0x06e +#define SVM_EXIT_RDPMC 0x06f +#define SVM_EXIT_PUSHF 0x070 +#define SVM_EXIT_POPF 0x071 +#define SVM_EXIT_CPUID 0x072 +#define SVM_EXIT_RSM 0x073 +#define SVM_EXIT_IRET 0x074 +#define SVM_EXIT_SWINT 0x075 +#define SVM_EXIT_INVD 0x076 +#define SVM_EXIT_PAUSE 0x077 +#define SVM_EXIT_HLT 0x078 +#define SVM_EXIT_INVLPG 0x079 +#define SVM_EXIT_INVLPGA 0x07a +#define SVM_EXIT_IOIO 0x07b +#define SVM_EXIT_MSR 0x07c +#define SVM_EXIT_TASK_SWITCH 0x07d +#define SVM_EXIT_FERR_FREEZE 0x07e +#define SVM_EXIT_SHUTDOWN 0x07f +#define SVM_EXIT_VMRUN 0x080 +#define SVM_EXIT_VMMCALL 0x081 +#define SVM_EXIT_VMLOAD 0x082 +#define SVM_EXIT_VMSAVE 0x083 +#define SVM_EXIT_STGI 0x084 +#define SVM_EXIT_CLGI 0x085 +#define SVM_EXIT_SKINIT 0x086 +#define SVM_EXIT_RDTSCP 0x087 +#define SVM_EXIT_ICEBP 0x088 +#define SVM_EXIT_WBINVD 0x089 +#define SVM_EXIT_MONITOR 0x08a +#define SVM_EXIT_MWAIT 0x08b +#define SVM_EXIT_MWAIT_COND 0x08c +#define SVM_EXIT_XSETBV 0x08d +#define SVM_EXIT_NPF 0x400 + +#define SVM_EXIT_ERR -1 + +#define SVM_EXIT_REASONS \ + { SVM_EXIT_READ_CR0, "read_cr0" }, \ + { SVM_EXIT_READ_CR3, "read_cr3" }, \ + { SVM_EXIT_READ_CR4, "read_cr4" }, \ + { SVM_EXIT_READ_CR8, "read_cr8" }, \ + { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ + { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ + { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ + { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ + { SVM_EXIT_READ_DR0, "read_dr0" }, \ + { SVM_EXIT_READ_DR1, "read_dr1" }, \ + { SVM_EXIT_READ_DR2, "read_dr2" }, \ + { SVM_EXIT_READ_DR3, "read_dr3" }, \ + { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ + { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ + { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ + { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ + { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ + { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ + { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ + { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ + { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ + { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ + { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ + { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ + { SVM_EXIT_INTR, "interrupt" }, \ + { SVM_EXIT_NMI, "nmi" }, \ + { SVM_EXIT_SMI, "smi" }, \ + { SVM_EXIT_INIT, "init" }, \ + { SVM_EXIT_VINTR, "vintr" }, \ + { SVM_EXIT_CPUID, "cpuid" }, \ + { SVM_EXIT_INVD, "invd" }, \ + { SVM_EXIT_HLT, "hlt" }, \ + { SVM_EXIT_INVLPG, "invlpg" }, \ + { SVM_EXIT_INVLPGA, "invlpga" }, \ + { SVM_EXIT_IOIO, "io" }, \ + { SVM_EXIT_MSR, "msr" }, \ + { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ + { SVM_EXIT_SHUTDOWN, "shutdown" }, \ + { SVM_EXIT_VMRUN, "vmrun" }, \ + { SVM_EXIT_VMMCALL, "hypercall" }, \ + { SVM_EXIT_VMLOAD, "vmload" }, \ + { SVM_EXIT_VMSAVE, "vmsave" }, \ + { SVM_EXIT_STGI, "stgi" }, \ + { SVM_EXIT_CLGI, "clgi" }, \ + { SVM_EXIT_SKINIT, "skinit" }, \ + { SVM_EXIT_WBINVD, "wbinvd" }, \ + { SVM_EXIT_MONITOR, "monitor" }, \ + { SVM_EXIT_MWAIT, "mwait" }, \ + { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_NPF, "npf" } + +#ifdef __KERNEL__ + enum { INTERCEPT_INTR, INTERCEPT_NMI, @@ -264,81 +393,6 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_EXITINFO_REG_MASK 0x0F -#define SVM_EXIT_READ_CR0 0x000 -#define SVM_EXIT_READ_CR3 0x003 -#define SVM_EXIT_READ_CR4 0x004 -#define SVM_EXIT_READ_CR8 0x008 -#define SVM_EXIT_WRITE_CR0 0x010 -#define SVM_EXIT_WRITE_CR3 0x013 -#define SVM_EXIT_WRITE_CR4 0x014 -#define SVM_EXIT_WRITE_CR8 0x018 -#define SVM_EXIT_READ_DR0 0x020 -#define SVM_EXIT_READ_DR1 0x021 -#define SVM_EXIT_READ_DR2 0x022 -#define SVM_EXIT_READ_DR3 0x023 -#define SVM_EXIT_READ_DR4 0x024 -#define SVM_EXIT_READ_DR5 0x025 -#define SVM_EXIT_READ_DR6 0x026 -#define SVM_EXIT_READ_DR7 0x027 -#define SVM_EXIT_WRITE_DR0 0x030 -#define SVM_EXIT_WRITE_DR1 0x031 -#define SVM_EXIT_WRITE_DR2 0x032 -#define SVM_EXIT_WRITE_DR3 0x033 -#define SVM_EXIT_WRITE_DR4 0x034 -#define SVM_EXIT_WRITE_DR5 0x035 -#define SVM_EXIT_WRITE_DR6 0x036 -#define SVM_EXIT_WRITE_DR7 0x037 -#define SVM_EXIT_EXCP_BASE 0x040 -#define SVM_EXIT_INTR 0x060 -#define SVM_EXIT_NMI 0x061 -#define SVM_EXIT_SMI 0x062 -#define SVM_EXIT_INIT 0x063 -#define SVM_EXIT_VINTR 0x064 -#define SVM_EXIT_CR0_SEL_WRITE 0x065 -#define SVM_EXIT_IDTR_READ 0x066 -#define SVM_EXIT_GDTR_READ 0x067 -#define SVM_EXIT_LDTR_READ 0x068 -#define SVM_EXIT_TR_READ 0x069 -#define SVM_EXIT_IDTR_WRITE 0x06a -#define SVM_EXIT_GDTR_WRITE 0x06b -#define SVM_EXIT_LDTR_WRITE 0x06c -#define SVM_EXIT_TR_WRITE 0x06d -#define SVM_EXIT_RDTSC 0x06e -#define SVM_EXIT_RDPMC 0x06f -#define SVM_EXIT_PUSHF 0x070 -#define SVM_EXIT_POPF 0x071 -#define SVM_EXIT_CPUID 0x072 -#define SVM_EXIT_RSM 0x073 -#define SVM_EXIT_IRET 0x074 -#define SVM_EXIT_SWINT 0x075 -#define SVM_EXIT_INVD 0x076 -#define SVM_EXIT_PAUSE 0x077 -#define SVM_EXIT_HLT 0x078 -#define SVM_EXIT_INVLPG 0x079 -#define SVM_EXIT_INVLPGA 0x07a -#define SVM_EXIT_IOIO 0x07b -#define SVM_EXIT_MSR 0x07c -#define SVM_EXIT_TASK_SWITCH 0x07d -#define SVM_EXIT_FERR_FREEZE 0x07e -#define SVM_EXIT_SHUTDOWN 0x07f -#define SVM_EXIT_VMRUN 0x080 -#define SVM_EXIT_VMMCALL 0x081 -#define SVM_EXIT_VMLOAD 0x082 -#define SVM_EXIT_VMSAVE 0x083 -#define SVM_EXIT_STGI 0x084 -#define SVM_EXIT_CLGI 0x085 -#define SVM_EXIT_SKINIT 0x086 -#define SVM_EXIT_RDTSCP 0x087 -#define SVM_EXIT_ICEBP 0x088 -#define SVM_EXIT_WBINVD 0x089 -#define SVM_EXIT_MONITOR 0x08a -#define SVM_EXIT_MWAIT 0x08b -#define SVM_EXIT_MWAIT_COND 0x08c -#define SVM_EXIT_XSETBV 0x08d -#define SVM_EXIT_NPF 0x400 - -#define SVM_EXIT_ERR -1 - #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" @@ -350,3 +404,4 @@ struct __attribute__ ((__packed__)) vmcb { #endif +#endif diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 3fda9db4881..4ca1c611b55 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -40,7 +40,7 @@ asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *, struct old_sigaction32 __user *); asmlinkage long sys32_alarm(unsigned int); -asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); +asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int); asmlinkage long sys32_sysfs(int, u32, u32); asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 89f794f007e..c535d847e3b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -89,6 +89,7 @@ struct thread_info { #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ +#define TIF_NOHZ 19 /* in adaptive nohz mode */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_DEBUG 21 /* uses debug registers */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ @@ -114,6 +115,7 @@ struct thread_info { #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) +#define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_DEBUG (1 << TIF_DEBUG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) @@ -126,12 +128,13 @@ struct thread_info { /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT) + _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_NOHZ) /* work to do in syscall_trace_leave() */ #define _TIF_WORK_SYSCALL_EXIT \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ - _TIF_SYSCALL_TRACEPOINT) + _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ @@ -141,7 +144,8 @@ struct thread_info { /* work to do on any return to user space */ #define _TIF_ALLWORK_MASK \ - ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT) + ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_NOHZ) /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index f3971bbcd1d..8ff8be7835a 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -42,10 +42,11 @@ struct arch_uprobe { }; struct arch_uprobe_task { - unsigned long saved_trap_nr; #ifdef CONFIG_X86_64 unsigned long saved_scratch_register; #endif + unsigned int saved_trap_nr; + unsigned int saved_tf; }; extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index bb0522850b7..fddb53d6391 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -11,7 +11,8 @@ extern const char VDSO32_PRELINK[]; #define VDSO32_SYMBOL(base, name) \ ({ \ extern const char VDSO32_##name[]; \ - (void *)(VDSO32_##name - VDSO32_PRELINK + (unsigned long)(base)); \ + (void __user *)(VDSO32_##name - VDSO32_PRELINK + \ + (unsigned long)(base)); \ }) #endif diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 74fcb963595..36ec21c36d6 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -25,6 +25,88 @@ * */ +#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 + +#define EXIT_REASON_EXCEPTION_NMI 0 +#define EXIT_REASON_EXTERNAL_INTERRUPT 1 +#define EXIT_REASON_TRIPLE_FAULT 2 + +#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_NMI_WINDOW 8 +#define EXIT_REASON_TASK_SWITCH 9 +#define EXIT_REASON_CPUID 10 +#define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVD 13 +#define EXIT_REASON_INVLPG 14 +#define EXIT_REASON_RDPMC 15 +#define EXIT_REASON_RDTSC 16 +#define EXIT_REASON_VMCALL 18 +#define EXIT_REASON_VMCLEAR 19 +#define EXIT_REASON_VMLAUNCH 20 +#define EXIT_REASON_VMPTRLD 21 +#define EXIT_REASON_VMPTRST 22 +#define EXIT_REASON_VMREAD 23 +#define EXIT_REASON_VMRESUME 24 +#define EXIT_REASON_VMWRITE 25 +#define EXIT_REASON_VMOFF 26 +#define EXIT_REASON_VMON 27 +#define EXIT_REASON_CR_ACCESS 28 +#define EXIT_REASON_DR_ACCESS 29 +#define EXIT_REASON_IO_INSTRUCTION 30 +#define EXIT_REASON_MSR_READ 31 +#define EXIT_REASON_MSR_WRITE 32 +#define EXIT_REASON_INVALID_STATE 33 +#define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MONITOR_INSTRUCTION 39 +#define EXIT_REASON_PAUSE_INSTRUCTION 40 +#define EXIT_REASON_MCE_DURING_VMENTRY 41 +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 +#define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EPT_VIOLATION 48 +#define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 +#define EXIT_REASON_INVPCID 58 + +#define VMX_EXIT_REASONS \ + { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ + { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ + { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ + { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ + { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ + { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ + { EXIT_REASON_CPUID, "CPUID" }, \ + { EXIT_REASON_HLT, "HLT" }, \ + { EXIT_REASON_INVLPG, "INVLPG" }, \ + { EXIT_REASON_RDPMC, "RDPMC" }, \ + { EXIT_REASON_RDTSC, "RDTSC" }, \ + { EXIT_REASON_VMCALL, "VMCALL" }, \ + { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \ + { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \ + { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \ + { EXIT_REASON_VMPTRST, "VMPTRST" }, \ + { EXIT_REASON_VMREAD, "VMREAD" }, \ + { EXIT_REASON_VMRESUME, "VMRESUME" }, \ + { EXIT_REASON_VMWRITE, "VMWRITE" }, \ + { EXIT_REASON_VMOFF, "VMOFF" }, \ + { EXIT_REASON_VMON, "VMON" }, \ + { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \ + { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \ + { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ + { EXIT_REASON_MSR_READ, "MSR_READ" }, \ + { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ + { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ + { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ + { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ + { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ + { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ + { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ + { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ + { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ + { EXIT_REASON_WBINVD, "WBINVD" } + +#ifdef __KERNEL__ + #include <linux/types.h> /* @@ -241,49 +323,6 @@ enum vmcs_field { HOST_RIP = 0x00006c16, }; -#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 - -#define EXIT_REASON_EXCEPTION_NMI 0 -#define EXIT_REASON_EXTERNAL_INTERRUPT 1 -#define EXIT_REASON_TRIPLE_FAULT 2 - -#define EXIT_REASON_PENDING_INTERRUPT 7 -#define EXIT_REASON_NMI_WINDOW 8 -#define EXIT_REASON_TASK_SWITCH 9 -#define EXIT_REASON_CPUID 10 -#define EXIT_REASON_HLT 12 -#define EXIT_REASON_INVD 13 -#define EXIT_REASON_INVLPG 14 -#define EXIT_REASON_RDPMC 15 -#define EXIT_REASON_RDTSC 16 -#define EXIT_REASON_VMCALL 18 -#define EXIT_REASON_VMCLEAR 19 -#define EXIT_REASON_VMLAUNCH 20 -#define EXIT_REASON_VMPTRLD 21 -#define EXIT_REASON_VMPTRST 22 -#define EXIT_REASON_VMREAD 23 -#define EXIT_REASON_VMRESUME 24 -#define EXIT_REASON_VMWRITE 25 -#define EXIT_REASON_VMOFF 26 -#define EXIT_REASON_VMON 27 -#define EXIT_REASON_CR_ACCESS 28 -#define EXIT_REASON_DR_ACCESS 29 -#define EXIT_REASON_IO_INSTRUCTION 30 -#define EXIT_REASON_MSR_READ 31 -#define EXIT_REASON_MSR_WRITE 32 -#define EXIT_REASON_INVALID_STATE 33 -#define EXIT_REASON_MWAIT_INSTRUCTION 36 -#define EXIT_REASON_MONITOR_INSTRUCTION 39 -#define EXIT_REASON_PAUSE_INSTRUCTION 40 -#define EXIT_REASON_MCE_DURING_VMENTRY 41 -#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 -#define EXIT_REASON_APIC_ACCESS 44 -#define EXIT_REASON_EPT_VIOLATION 48 -#define EXIT_REASON_EPT_MISCONFIG 49 -#define EXIT_REASON_WBINVD 54 -#define EXIT_REASON_XSETBV 55 -#define EXIT_REASON_INVPCID 58 - /* * Interruption-information format */ @@ -488,3 +527,5 @@ enum vm_instruction_error_number { }; #endif + +#endif diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 93971e841dd..472b9b78301 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -51,7 +51,8 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s, extern int m2p_add_override(unsigned long mfn, struct page *page, struct gnttab_map_grant_ref *kmap_op); -extern int m2p_remove_override(struct page *page, bool clear_pte); +extern int m2p_remove_override(struct page *page, + struct gnttab_map_grant_ref *kmap_op); extern struct page *m2p_find_override(unsigned long mfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8215e5652d9..8d7a619718b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -100,6 +100,8 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-$(CONFIG_OF) += devicetree.o obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_PERF_EVENTS) += perf_regs.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b2297e58c6e..e651f7a589a 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -656,7 +656,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) acpi_register_lapic(physid, ACPI_MADT_ENABLED); /* - * If mp_register_lapic successfully generates a new logical cpu + * If acpi_register_lapic successfully generates a new logical cpu * number, then the following will get us exactly what was mapped */ cpumask_andnot(new_map, cpu_present_mask, tmp_map); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ced4534baed..ef5ccca79a6 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -23,19 +23,6 @@ #define MAX_PATCH_LEN (255-1) -#ifdef CONFIG_HOTPLUG_CPU -static int smp_alt_once; - -static int __init bootonly(char *str) -{ - smp_alt_once = 1; - return 1; -} -__setup("smp-alt-boot", bootonly); -#else -#define smp_alt_once 1 -#endif - static int __initdata_or_module debug_alternative; static int __init debug_alt(char *str) @@ -317,7 +304,7 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end, /* turn DS segment override prefix into lock prefix */ if (*ptr == 0x3e) text_poke(ptr, ((unsigned char []){0xf0}), 1); - }; + } mutex_unlock(&text_mutex); } @@ -326,9 +313,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end, { const s32 *poff; - if (noreplace_smp) - return; - mutex_lock(&text_mutex); for (poff = start; poff < end; poff++) { u8 *ptr = (u8 *)poff + *poff; @@ -338,7 +322,7 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end, /* turn lock prefix into DS segment override prefix */ if (*ptr == 0xf0) text_poke(ptr, ((unsigned char []){0x3E}), 1); - }; + } mutex_unlock(&text_mutex); } @@ -359,7 +343,7 @@ struct smp_alt_module { }; static LIST_HEAD(smp_alt_modules); static DEFINE_MUTEX(smp_alt); -static int smp_mode = 1; /* protected by smp_alt */ +static bool uniproc_patched = false; /* protected by smp_alt */ void __init_or_module alternatives_smp_module_add(struct module *mod, char *name, @@ -368,19 +352,18 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, { struct smp_alt_module *smp; - if (noreplace_smp) - return; + mutex_lock(&smp_alt); + if (!uniproc_patched) + goto unlock; - if (smp_alt_once) { - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(locks, locks_end, - text, text_end); - return; - } + if (num_possible_cpus() == 1) + /* Don't bother remembering, we'll never have to undo it. */ + goto smp_unlock; smp = kzalloc(sizeof(*smp), GFP_KERNEL); if (NULL == smp) - return; /* we'll run the (safe but slow) SMP code then ... */ + /* we'll run the (safe but slow) SMP code then ... */ + goto unlock; smp->mod = mod; smp->name = name; @@ -392,11 +375,10 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, __func__, smp->locks, smp->locks_end, smp->text, smp->text_end, smp->name); - mutex_lock(&smp_alt); list_add_tail(&smp->next, &smp_alt_modules); - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(smp->locks, smp->locks_end, - smp->text, smp->text_end); +smp_unlock: + alternatives_smp_unlock(locks, locks_end, text, text_end); +unlock: mutex_unlock(&smp_alt); } @@ -404,24 +386,18 @@ void __init_or_module alternatives_smp_module_del(struct module *mod) { struct smp_alt_module *item; - if (smp_alt_once || noreplace_smp) - return; - mutex_lock(&smp_alt); list_for_each_entry(item, &smp_alt_modules, next) { if (mod != item->mod) continue; list_del(&item->next); - mutex_unlock(&smp_alt); - DPRINTK("%s: %s\n", __func__, item->name); kfree(item); - return; + break; } mutex_unlock(&smp_alt); } -bool skip_smp_alternatives; -void alternatives_smp_switch(int smp) +void alternatives_enable_smp(void) { struct smp_alt_module *mod; @@ -436,34 +412,21 @@ void alternatives_smp_switch(int smp) pr_info("lockdep: fixing up alternatives\n"); #endif - if (noreplace_smp || smp_alt_once || skip_smp_alternatives) - return; - BUG_ON(!smp && (num_online_cpus() > 1)); + /* Why bother if there are no other CPUs? */ + BUG_ON(num_possible_cpus() == 1); mutex_lock(&smp_alt); - /* - * Avoid unnecessary switches because it forces JIT based VMs to - * throw away all cached translations, which can be quite costly. - */ - if (smp == smp_mode) { - /* nothing */ - } else if (smp) { + if (uniproc_patched) { pr_info("switching to SMP code\n"); + BUG_ON(num_online_cpus() != 1); clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); list_for_each_entry(mod, &smp_alt_modules, next) alternatives_smp_lock(mod->locks, mod->locks_end, mod->text, mod->text_end); - } else { - pr_info("switching to UP code\n"); - set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); - set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); - list_for_each_entry(mod, &smp_alt_modules, next) - alternatives_smp_unlock(mod->locks, mod->locks_end, - mod->text, mod->text_end); + uniproc_patched = false; } - smp_mode = smp; mutex_unlock(&smp_alt); } @@ -540,40 +503,22 @@ void __init alternative_instructions(void) apply_alternatives(__alt_instructions, __alt_instructions_end); - /* switch to patch-once-at-boottime-only mode and free the - * tables in case we know the number of CPUs will never ever - * change */ -#ifdef CONFIG_HOTPLUG_CPU - if (num_possible_cpus() < 2) - smp_alt_once = 1; -#endif - #ifdef CONFIG_SMP - if (smp_alt_once) { - if (1 == num_possible_cpus()) { - pr_info("switching to UP code\n"); - set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); - set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); - - alternatives_smp_unlock(__smp_locks, __smp_locks_end, - _text, _etext); - } - } else { + /* Patch to UP if other cpus not imminent. */ + if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) { + uniproc_patched = true; alternatives_smp_module_add(NULL, "core kernel", __smp_locks, __smp_locks_end, _text, _etext); - - /* Only switch to UP mode if we don't immediately boot others */ - if (num_present_cpus() == 1 || setup_max_cpus <= 1) - alternatives_smp_switch(0); } -#endif - apply_paravirt(__parainstructions, __parainstructions_end); - if (smp_alt_once) + if (!uniproc_patched || num_possible_cpus() == 1) free_init_pages("SMP alternatives", (unsigned long)__smp_locks, (unsigned long)__smp_locks_end); +#endif + + apply_paravirt(__parainstructions, __parainstructions_end); restart_nmi(); } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 24deb308232..b17416e72fb 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1934,7 +1934,7 @@ void smp_error_interrupt(struct pt_regs *regs) apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); i++; v1 >>= 1; - }; + } apic_printk(APIC_DEBUG, KERN_CONT "\n"); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a5fbc3c5fcc..134505e07b0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1023,14 +1023,16 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) printk(KERN_CONT "%s ", vendor); if (c->x86_model_id[0]) - printk(KERN_CONT "%s", c->x86_model_id); + printk(KERN_CONT "%s", strim(c->x86_model_id)); else printk(KERN_CONT "%d86", c->x86); + printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model); + if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT " stepping %02x\n", c->x86_mask); + printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask); else - printk(KERN_CONT "\n"); + printk(KERN_CONT ")\n"); print_cpu_msr(c); } @@ -1116,8 +1118,6 @@ void syscall_init(void) X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); } -unsigned long kernel_eflags; - /* * Copies of the original ist values from the tss are only accessed during * debugging, no special alignment required. @@ -1299,8 +1299,6 @@ void __cpuinit cpu_init(void) fpu_init(); xsave_init(); - raw_local_save_flags(kernel_eflags); - if (is_uv_system()) uv_cpu_init(); } diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6605a81ba33..8b6defe7eef 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -586,6 +586,8 @@ extern struct event_constraint intel_westmere_pebs_event_constraints[]; extern struct event_constraint intel_snb_pebs_event_constraints[]; +extern struct event_constraint intel_ivb_pebs_event_constraints[]; + struct event_constraint *intel_pebs_constraints(struct perf_event *event); void intel_pmu_pebs_enable(struct perf_event *event); diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index 7bfb5bec863..eebd5ffe1bb 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -209,6 +209,15 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config) return -EOPNOTSUPP; } +static const struct perf_event_attr ibs_notsupp = { + .exclude_user = 1, + .exclude_kernel = 1, + .exclude_hv = 1, + .exclude_idle = 1, + .exclude_host = 1, + .exclude_guest = 1, +}; + static int perf_ibs_init(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -229,6 +238,9 @@ static int perf_ibs_init(struct perf_event *event) if (event->pmu != &perf_ibs->pmu) return -ENOENT; + if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp)) + return -EINVAL; + if (config & ~perf_ibs->config_mask) return -EINVAL; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 0d3d63afa76..6bca492b854 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2048,7 +2048,6 @@ __init int intel_pmu_init(void) case 42: /* SandyBridge */ case 45: /* SandyBridge, "Romely-EP" */ x86_add_quirk(intel_sandybridge_quirk); - case 58: /* IvyBridge */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, @@ -2073,6 +2072,29 @@ __init int intel_pmu_init(void) pr_cont("SandyBridge events, "); break; + case 58: /* IvyBridge */ + memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + + intel_pmu_lbr_init_snb(); + + x86_pmu.event_constraints = intel_snb_event_constraints; + x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; + x86_pmu.pebs_aliases = intel_pebs_aliases_snb; + x86_pmu.extra_regs = intel_snb_extra_regs; + /* all extra regs are per-cpu when HT is on */ + x86_pmu.er_flags |= ERF_HAS_RSP_1; + x86_pmu.er_flags |= ERF_NO_HT_SHARING; + + /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); + + pr_cont("IvyBridge events, "); + break; + default: switch (x86_pmu.version) { diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index e38d97bf425..826054a4f2e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -407,6 +407,20 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_ivb_pebs_event_constraints[] = { + INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ + INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ + INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ + INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ + EVENT_CONSTRAINT_END +}; + struct event_constraint *intel_pebs_constraints(struct perf_event *event) { struct event_constraint *c; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 0a5571080e7..99d96a4978b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -661,6 +661,11 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box) } } +static struct uncore_event_desc snb_uncore_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), + { /* end: all zeroes */ }, +}; + static struct attribute *snb_uncore_formats_attr[] = { &format_attr_event.attr, &format_attr_umask.attr, @@ -704,6 +709,7 @@ static struct intel_uncore_type snb_uncore_cbox = { .constraints = snb_uncore_cbox_constraints, .ops = &snb_uncore_msr_ops, .format_group = &snb_uncore_format_group, + .event_descs = snb_uncore_events, }; static struct intel_uncore_type *snb_msr_uncores[] = { @@ -1944,7 +1950,7 @@ struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cp static struct intel_uncore_box * uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) { - static struct intel_uncore_box *box; + struct intel_uncore_box *box; box = *per_cpu_ptr(pmu->box, cpu); if (box) @@ -2341,6 +2347,27 @@ int uncore_pmu_event_init(struct perf_event *event) return ret; } +static ssize_t uncore_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask); + + buf[n++] = '\n'; + buf[n] = '\0'; + return n; +} + +static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); + +static struct attribute *uncore_pmu_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group uncore_pmu_attr_group = { + .attrs = uncore_pmu_attrs, +}; + static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) { int ret; @@ -2378,8 +2405,8 @@ static void __init uncore_type_exit(struct intel_uncore_type *type) free_percpu(type->pmus[i].box); kfree(type->pmus); type->pmus = NULL; - kfree(type->attr_groups[1]); - type->attr_groups[1] = NULL; + kfree(type->events_group); + type->events_group = NULL; } static void __init uncore_types_exit(struct intel_uncore_type **types) @@ -2431,9 +2458,10 @@ static int __init uncore_type_init(struct intel_uncore_type *type) for (j = 0; j < i; j++) attrs[j] = &type->event_descs[j].attr.attr; - type->attr_groups[1] = events_group; + type->events_group = events_group; } + type->pmu_group = &uncore_pmu_attr_group; type->pmus = pmus; return 0; fail: diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 5b81c1856aa..e68a4550e95 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -369,10 +369,12 @@ struct intel_uncore_type { struct intel_uncore_pmu *pmus; struct intel_uncore_ops *ops; struct uncore_event_desc *event_descs; - const struct attribute_group *attr_groups[3]; + const struct attribute_group *attr_groups[4]; }; -#define format_group attr_groups[0] +#define pmu_group attr_groups[0] +#define format_group attr_groups[1] +#define events_group attr_groups[2] struct intel_uncore_ops { void (*init_box)(struct intel_uncore_box *); diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 8022c668148..fbd89556229 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -140,10 +140,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - if (*pos == 0) /* just in case, cpu 0 is not the first */ - *pos = cpumask_first(cpu_online_mask); - else - *pos = cpumask_next(*pos - 1, cpu_online_mask); + *pos = cpumask_next(*pos - 1, cpu_online_mask); if ((*pos) < nr_cpu_ids) return &cpu_data(*pos); return NULL; diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 39472dd2323..60c78917190 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -199,12 +199,14 @@ static int __init cpuid_init(void) goto out_chrdev; } cpuid_class->devnode = cpuid_devnode; + get_online_cpus(); for_each_online_cpu(i) { err = cpuid_device_create(i); if (err != 0) goto out_class; } register_hotcpu_notifier(&cpuid_class_cpu_notifier); + put_online_cpus(); err = 0; goto out; @@ -214,6 +216,7 @@ out_class: for_each_online_cpu(i) { cpuid_device_destroy(i); } + put_online_cpus(); class_destroy(cpuid_class); out_chrdev: __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); @@ -225,11 +228,13 @@ static void __exit cpuid_exit(void) { int cpu = 0; + get_online_cpus(); for_each_online_cpu(cpu) cpuid_device_destroy(cpu); class_destroy(cpuid_class); __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); + put_online_cpus(); } module_init(cpuid_init); diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 3ae2ced4a87..b1581527a23 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -342,6 +342,47 @@ const struct irq_domain_ops ioapic_irq_domain_ops = { .xlate = ioapic_xlate, }; +static void dt_add_ioapic_domain(unsigned int ioapic_num, + struct device_node *np) +{ + struct irq_domain *id; + struct mp_ioapic_gsi *gsi_cfg; + int ret; + int num; + + gsi_cfg = mp_ioapic_gsi_routing(ioapic_num); + num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1; + + id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops, + (void *)ioapic_num); + BUG_ON(!id); + if (gsi_cfg->gsi_base == 0) { + /* + * The first NR_IRQS_LEGACY irq descs are allocated in + * early_irq_init() and need just a mapping. The + * remaining irqs need both. All of them are preallocated + * and assigned so we can keep the 1:1 mapping which the ioapic + * is having. + */ + ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY); + if (ret) + pr_err("Error mapping legacy IRQs: %d\n", ret); + + if (num > NR_IRQS_LEGACY) { + ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY, + NR_IRQS_LEGACY, num - NR_IRQS_LEGACY); + if (ret) + pr_err("Error creating mapping for the " + "remaining IRQs: %d\n", ret); + } + irq_set_default_host(id); + } else { + ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num); + if (ret) + pr_err("Error creating IRQ mapping: %d\n", ret); + } +} + static void __init ioapic_add_ofnode(struct device_node *np) { struct resource r; @@ -356,15 +397,7 @@ static void __init ioapic_add_ofnode(struct device_node *np) for (i = 0; i < nr_ioapics; i++) { if (r.start == mpc_ioapic_addr(i)) { - struct irq_domain *id; - struct mp_ioapic_gsi *gsi_cfg; - - gsi_cfg = mp_ioapic_gsi_routing(i); - - id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0, - &ioapic_irq_domain_ops, - (void*)i); - BUG_ON(!id); + dt_add_ioapic_domain(i, np); return; } } diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 623f2883747..f438a44bf8f 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1109,17 +1109,21 @@ ENTRY(ftrace_caller) pushl %eax pushl %ecx pushl %edx - movl 0xc(%esp), %eax + pushl $0 /* Pass NULL as regs pointer */ + movl 4*4(%esp), %eax movl 0x4(%ebp), %edx + leal function_trace_op, %ecx subl $MCOUNT_INSN_SIZE, %eax .globl ftrace_call ftrace_call: call ftrace_stub + addl $4,%esp /* skip NULL pointer */ popl %edx popl %ecx popl %eax +ftrace_ret: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_call ftrace_graph_call: @@ -1131,6 +1135,71 @@ ftrace_stub: ret END(ftrace_caller) +ENTRY(ftrace_regs_caller) + pushf /* push flags before compare (in cs location) */ + cmpl $0, function_trace_stop + jne ftrace_restore_flags + + /* + * i386 does not save SS and ESP when coming from kernel. + * Instead, to get sp, ®s->sp is used (see ptrace.h). + * Unfortunately, that means eflags must be at the same location + * as the current return ip is. We move the return ip into the + * ip location, and move flags into the return ip location. + */ + pushl 4(%esp) /* save return ip into ip slot */ + + pushl $0 /* Load 0 into orig_ax */ + pushl %gs + pushl %fs + pushl %es + pushl %ds + pushl %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx + + movl 13*4(%esp), %eax /* Get the saved flags */ + movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */ + /* clobbering return ip */ + movl $__KERNEL_CS,13*4(%esp) + + movl 12*4(%esp), %eax /* Load ip (1st parameter) */ + subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ + movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ + leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ + pushl %esp /* Save pt_regs as 4th parameter */ + +GLOBAL(ftrace_regs_call) + call ftrace_stub + + addl $4, %esp /* Skip pt_regs */ + movl 14*4(%esp), %eax /* Move flags back into cs */ + movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ + movl 12*4(%esp), %eax /* Get return ip from regs->ip */ + movl %eax, 14*4(%esp) /* Put return ip back for ret */ + + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax + popl %ds + popl %es + popl %fs + popl %gs + addl $8, %esp /* Skip orig_ax and ip */ + popf /* Pop flags at end (no addl to corrupt flags) */ + jmp ftrace_ret + +ftrace_restore_flags: + popf + jmp ftrace_stub #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) @@ -1171,9 +1240,6 @@ END(mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - pushl %eax pushl %ecx pushl %edx diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 69babd8c834..066334be7b7 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -56,6 +56,7 @@ #include <asm/ftrace.h> #include <asm/percpu.h> #include <asm/asm.h> +#include <asm/rcu.h> #include <linux/err.h> /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ @@ -68,25 +69,51 @@ .section .entry.text, "ax" #ifdef CONFIG_FUNCTION_TRACER + +#ifdef CC_USING_FENTRY +# define function_hook __fentry__ +#else +# define function_hook mcount +#endif + #ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(mcount) + +ENTRY(function_hook) retq -END(mcount) +END(function_hook) + +/* skip is set if stack has been adjusted */ +.macro ftrace_caller_setup skip=0 + MCOUNT_SAVE_FRAME \skip + + /* Load the ftrace_ops into the 3rd parameter */ + leaq function_trace_op, %rdx + + /* Load ip into the first parameter */ + movq RIP(%rsp), %rdi + subq $MCOUNT_INSN_SIZE, %rdi + /* Load the parent_ip into the second parameter */ +#ifdef CC_USING_FENTRY + movq SS+16(%rsp), %rsi +#else + movq 8(%rbp), %rsi +#endif +.endm ENTRY(ftrace_caller) + /* Check if tracing was disabled (quick check) */ cmpl $0, function_trace_stop jne ftrace_stub - MCOUNT_SAVE_FRAME - - movq 0x38(%rsp), %rdi - movq 8(%rbp), %rsi - subq $MCOUNT_INSN_SIZE, %rdi + ftrace_caller_setup + /* regs go into 4th parameter (but make it NULL) */ + movq $0, %rcx GLOBAL(ftrace_call) call ftrace_stub MCOUNT_RESTORE_FRAME +ftrace_return: #ifdef CONFIG_FUNCTION_GRAPH_TRACER GLOBAL(ftrace_graph_call) @@ -97,8 +124,78 @@ GLOBAL(ftrace_stub) retq END(ftrace_caller) +ENTRY(ftrace_regs_caller) + /* Save the current flags before compare (in SS location)*/ + pushfq + + /* Check if tracing was disabled (quick check) */ + cmpl $0, function_trace_stop + jne ftrace_restore_flags + + /* skip=8 to skip flags saved in SS */ + ftrace_caller_setup 8 + + /* Save the rest of pt_regs */ + movq %r15, R15(%rsp) + movq %r14, R14(%rsp) + movq %r13, R13(%rsp) + movq %r12, R12(%rsp) + movq %r11, R11(%rsp) + movq %r10, R10(%rsp) + movq %rbp, RBP(%rsp) + movq %rbx, RBX(%rsp) + /* Copy saved flags */ + movq SS(%rsp), %rcx + movq %rcx, EFLAGS(%rsp) + /* Kernel segments */ + movq $__KERNEL_DS, %rcx + movq %rcx, SS(%rsp) + movq $__KERNEL_CS, %rcx + movq %rcx, CS(%rsp) + /* Stack - skipping return address */ + leaq SS+16(%rsp), %rcx + movq %rcx, RSP(%rsp) + + /* regs go into 4th parameter */ + leaq (%rsp), %rcx + +GLOBAL(ftrace_regs_call) + call ftrace_stub + + /* Copy flags back to SS, to restore them */ + movq EFLAGS(%rsp), %rax + movq %rax, SS(%rsp) + + /* Handlers can change the RIP */ + movq RIP(%rsp), %rax + movq %rax, SS+8(%rsp) + + /* restore the rest of pt_regs */ + movq R15(%rsp), %r15 + movq R14(%rsp), %r14 + movq R13(%rsp), %r13 + movq R12(%rsp), %r12 + movq R10(%rsp), %r10 + movq RBP(%rsp), %rbp + movq RBX(%rsp), %rbx + + /* skip=8 to skip flags saved in SS */ + MCOUNT_RESTORE_FRAME 8 + + /* Restore flags */ + popfq + + jmp ftrace_return +ftrace_restore_flags: + popfq + jmp ftrace_stub + +END(ftrace_regs_caller) + + #else /* ! CONFIG_DYNAMIC_FTRACE */ -ENTRY(mcount) + +ENTRY(function_hook) cmpl $0, function_trace_stop jne ftrace_stub @@ -119,8 +216,12 @@ GLOBAL(ftrace_stub) trace: MCOUNT_SAVE_FRAME - movq 0x38(%rsp), %rdi + movq RIP(%rsp), %rdi +#ifdef CC_USING_FENTRY + movq SS+16(%rsp), %rsi +#else movq 8(%rbp), %rsi +#endif subq $MCOUNT_INSN_SIZE, %rdi call *ftrace_trace_function @@ -128,20 +229,22 @@ trace: MCOUNT_RESTORE_FRAME jmp ftrace_stub -END(mcount) +END(function_hook) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - MCOUNT_SAVE_FRAME +#ifdef CC_USING_FENTRY + leaq SS+16(%rsp), %rdi + movq $0, %rdx /* No framepointers needed */ +#else leaq 8(%rbp), %rdi - movq 0x38(%rsp), %rsi movq (%rbp), %rdx +#endif + movq RIP(%rsp), %rsi subq $MCOUNT_INSN_SIZE, %rsi call prepare_ftrace_return @@ -342,15 +445,15 @@ ENDPROC(native_usergs_sysret64) .macro SAVE_ARGS_IRQ cld /* start from rbp in pt_regs and jump over */ - movq_cfi rdi, RDI-RBP - movq_cfi rsi, RSI-RBP - movq_cfi rdx, RDX-RBP - movq_cfi rcx, RCX-RBP - movq_cfi rax, RAX-RBP - movq_cfi r8, R8-RBP - movq_cfi r9, R9-RBP - movq_cfi r10, R10-RBP - movq_cfi r11, R11-RBP + movq_cfi rdi, (RDI-RBP) + movq_cfi rsi, (RSI-RBP) + movq_cfi rdx, (RDX-RBP) + movq_cfi rcx, (RCX-RBP) + movq_cfi rax, (RAX-RBP) + movq_cfi r8, (R8-RBP) + movq_cfi r9, (R9-RBP) + movq_cfi r10, (R10-RBP) + movq_cfi r11, (R11-RBP) /* Save rbp so that we can unwind from get_irq_regs() */ movq_cfi rbp, 0 @@ -384,7 +487,7 @@ ENDPROC(native_usergs_sysret64) .endm ENTRY(save_rest) - PARTIAL_FRAME 1 REST_SKIP+8 + PARTIAL_FRAME 1 (REST_SKIP+8) movq 5*8+16(%rsp), %r11 /* save return address */ movq_cfi rbx, RBX+16 movq_cfi rbp, RBP+16 @@ -440,7 +543,7 @@ ENTRY(ret_from_fork) LOCK ; btr $TIF_FORK,TI_flags(%r8) - pushq_cfi kernel_eflags(%rip) + pushq_cfi $0x0002 popfq_cfi # reset kernel eflags call schedule_tail # rdi: 'prev' task parameter @@ -565,7 +668,7 @@ sysret_careful: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi - call schedule + SCHEDULE_USER popq_cfi %rdi jmp sysret_check @@ -678,7 +781,7 @@ int_careful: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi - call schedule + SCHEDULE_USER popq_cfi %rdi DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF @@ -974,7 +1077,7 @@ retint_careful: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi - call schedule + SCHEDULE_USER popq_cfi %rdi GET_THREAD_INFO(%rcx) DISABLE_INTERRUPTS(CLBR_NONE) @@ -1449,7 +1552,7 @@ paranoid_userspace: paranoid_schedule: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_ANY) - call schedule + SCHEDULE_USER DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF jmp paranoid_userspace diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index c3a7cb4bf6e..1d414029f1d 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -206,6 +206,21 @@ static int ftrace_modify_code(unsigned long ip, unsigned const char *old_code, unsigned const char *new_code); +/* + * Should never be called: + * As it is only called by __ftrace_replace_code() which is called by + * ftrace_replace_code() that x86 overrides, and by ftrace_update_code() + * which is called to turn mcount into nops or nops into function calls + * but not to convert a function from not using regs to one that uses + * regs, which ftrace_modify_call() is for. + */ +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + WARN_ON(1); + return -EINVAL; +} + int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); @@ -220,6 +235,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ret = ftrace_modify_code(ip, old, new); + /* Also update the regs callback function */ + if (!ret) { + ip = (unsigned long)(&ftrace_regs_call); + memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE); + new = ftrace_call_replace(ip, (unsigned long)func); + ret = ftrace_modify_code(ip, old, new); + } + atomic_dec(&modifying_ftrace_code); return ret; @@ -299,6 +322,32 @@ static int add_brk_on_nop(struct dyn_ftrace *rec) return add_break(rec->ip, old); } +/* + * If the record has the FTRACE_FL_REGS set, that means that it + * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS + * is not not set, then it wants to convert to the normal callback. + */ +static unsigned long get_ftrace_addr(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + +/* + * The FTRACE_FL_REGS_EN is set when the record already points to + * a function that saves all the regs. Basically the '_EN' version + * represents the current state of the function. + */ +static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS_EN) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + static int add_breakpoints(struct dyn_ftrace *rec, int enable) { unsigned long ftrace_addr; @@ -306,7 +355,7 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) ret = ftrace_test_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: @@ -316,6 +365,10 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) /* converting nop to call */ return add_brk_on_nop(rec); + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: + ftrace_addr = get_ftrace_old_addr(rec); + /* fall through */ case FTRACE_UPDATE_MAKE_NOP: /* converting a call to a nop */ return add_brk_on_call(rec, ftrace_addr); @@ -360,13 +413,21 @@ static int remove_breakpoint(struct dyn_ftrace *rec) * If not, don't touch the breakpoint, we make just create * a disaster. */ - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); + nop = ftrace_call_replace(ip, ftrace_addr); + + if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0) + goto update; + + /* Check both ftrace_addr and ftrace_old_addr */ + ftrace_addr = get_ftrace_old_addr(rec); nop = ftrace_call_replace(ip, ftrace_addr); if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) return -EINVAL; } + update: return probe_kernel_write((void *)ip, &nop[0], 1); } @@ -405,12 +466,14 @@ static int add_update(struct dyn_ftrace *rec, int enable) ret = ftrace_test_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return add_update_call(rec, ftrace_addr); @@ -455,12 +518,14 @@ static int finish_update(struct dyn_ftrace *rec, int enable) ret = ftrace_update_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return finish_update_call(rec, ftrace_addr); diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 36d1853e91a..9a5c460404d 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -263,7 +263,7 @@ static void i8259A_shutdown(void) * out of. */ outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ } static struct syscore_ops i8259_syscore_ops = { diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index e2f751efb7b..57916c0d3cf 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -541,6 +541,23 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb return 1; } +#ifdef KPROBES_CAN_USE_FTRACE +static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + /* + * Emulate singlestep (and also recover regs->ip) + * as if there is a 5byte nop + */ + regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + __this_cpu_write(current_kprobe, NULL); +} +#endif + /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled throughout this function. @@ -599,6 +616,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) } else if (kprobe_running()) { p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { +#ifdef KPROBES_CAN_USE_FTRACE + if (kprobe_ftrace(p)) { + skip_singlestep(p, regs, kcb); + return 1; + } +#endif setup_singlestep(p, regs, kcb, 0); return 1; } @@ -1052,6 +1075,50 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } +#ifdef KPROBES_CAN_USE_FTRACE +/* Ftrace callback handler for kprobes */ +void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + unsigned long flags; + + /* Disable irq for emulating a breakpoint and avoiding preempt */ + local_irq_save(flags); + + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + goto end; + + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ + regs->ip = ip + sizeof(kprobe_opcode_t); + + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (!p->pre_handler || !p->pre_handler(p, regs)) + skip_singlestep(p, regs, kcb); + /* + * If pre_handler returns !0, it sets regs->ip and + * resets current kprobe. + */ + } +end: + local_irq_restore(flags); +} + +int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + p->ainsn.boostable = -1; + return 0; +} +#endif + int __init arch_init_kprobes(void) { return arch_init_optprobes(); diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index eb113693f04..a7c5661f849 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -257,12 +257,14 @@ static int __init msr_init(void) goto out_chrdev; } msr_class->devnode = msr_devnode; + get_online_cpus(); for_each_online_cpu(i) { err = msr_device_create(i); if (err != 0) goto out_class; } register_hotcpu_notifier(&msr_class_cpu_notifier); + put_online_cpus(); err = 0; goto out; @@ -271,6 +273,7 @@ out_class: i = 0; for_each_online_cpu(i) msr_device_destroy(i); + put_online_cpus(); class_destroy(msr_class); out_chrdev: __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); @@ -281,11 +284,13 @@ out: static void __exit msr_exit(void) { int cpu = 0; + get_online_cpus(); for_each_online_cpu(cpu) msr_device_destroy(cpu); class_destroy(msr_class); __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); unregister_hotcpu_notifier(&msr_class_cpu_notifier); + put_online_cpus(); } module_init(msr_init); diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c new file mode 100644 index 00000000000..e309cc5c276 --- /dev/null +++ b/arch/x86/kernel/perf_regs.c @@ -0,0 +1,105 @@ +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/perf_event.h> +#include <linux/bug.h> +#include <linux/stddef.h> +#include <asm/perf_regs.h> +#include <asm/ptrace.h> + +#ifdef CONFIG_X86_32 +#define PERF_REG_X86_MAX PERF_REG_X86_32_MAX +#else +#define PERF_REG_X86_MAX PERF_REG_X86_64_MAX +#endif + +#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r) + +static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = { + PT_REGS_OFFSET(PERF_REG_X86_AX, ax), + PT_REGS_OFFSET(PERF_REG_X86_BX, bx), + PT_REGS_OFFSET(PERF_REG_X86_CX, cx), + PT_REGS_OFFSET(PERF_REG_X86_DX, dx), + PT_REGS_OFFSET(PERF_REG_X86_SI, si), + PT_REGS_OFFSET(PERF_REG_X86_DI, di), + PT_REGS_OFFSET(PERF_REG_X86_BP, bp), + PT_REGS_OFFSET(PERF_REG_X86_SP, sp), + PT_REGS_OFFSET(PERF_REG_X86_IP, ip), + PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags), + PT_REGS_OFFSET(PERF_REG_X86_CS, cs), + PT_REGS_OFFSET(PERF_REG_X86_SS, ss), +#ifdef CONFIG_X86_32 + PT_REGS_OFFSET(PERF_REG_X86_DS, ds), + PT_REGS_OFFSET(PERF_REG_X86_ES, es), + PT_REGS_OFFSET(PERF_REG_X86_FS, fs), + PT_REGS_OFFSET(PERF_REG_X86_GS, gs), +#else + /* + * The pt_regs struct does not store + * ds, es, fs, gs in 64 bit mode. + */ + (unsigned int) -1, + (unsigned int) -1, + (unsigned int) -1, + (unsigned int) -1, +#endif +#ifdef CONFIG_X86_64 + PT_REGS_OFFSET(PERF_REG_X86_R8, r8), + PT_REGS_OFFSET(PERF_REG_X86_R9, r9), + PT_REGS_OFFSET(PERF_REG_X86_R10, r10), + PT_REGS_OFFSET(PERF_REG_X86_R11, r11), + PT_REGS_OFFSET(PERF_REG_X86_R12, r12), + PT_REGS_OFFSET(PERF_REG_X86_R13, r13), + PT_REGS_OFFSET(PERF_REG_X86_R14, r14), + PT_REGS_OFFSET(PERF_REG_X86_R15, r15), +#endif +}; + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset))) + return 0; + + return regs_get_register(regs, pt_regs_offset[idx]); +} + +#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL)) + +#ifdef CONFIG_X86_32 +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + return PERF_SAMPLE_REGS_ABI_32; +} +#else /* CONFIG_X86_64 */ +#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ + (1ULL << PERF_REG_X86_ES) | \ + (1ULL << PERF_REG_X86_FS) | \ + (1ULL << PERF_REG_X86_GS)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + if (mask & REG_NOSUPPORT) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + if (test_tsk_thread_flag(task, TIF_IA32)) + return PERF_SAMPLE_REGS_ABI_32; + else + return PERF_SAMPLE_REGS_ABI_64; +} +#endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 0bc72e2069e..d5f15c3f7b2 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -150,7 +150,7 @@ static struct resource *find_oprom(struct pci_dev *pdev) return oprom; } -void *pci_map_biosrom(struct pci_dev *pdev) +void __iomem *pci_map_biosrom(struct pci_dev *pdev) { struct resource *oprom = find_oprom(pdev); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index c4c6a5c2bf0..9f94f8ec26e 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -21,6 +21,7 @@ #include <linux/signal.h> #include <linux/perf_event.h> #include <linux/hw_breakpoint.h> +#include <linux/rcupdate.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -1463,6 +1464,8 @@ long syscall_trace_enter(struct pt_regs *regs) { long ret = 0; + rcu_user_exit(); + /* * If we stepped into a sysenter/syscall insn, it trapped in * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. @@ -1526,4 +1529,6 @@ void syscall_trace_leave(struct pt_regs *regs) !test_thread_flag(TIF_SYSCALL_EMU); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, step); + + rcu_user_enter(); } diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b280908a376..bca0ab903e5 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -779,6 +779,8 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { + rcu_user_exit(); + #ifdef CONFIG_X86_MCE /* notify userspace of pending MCEs */ if (thread_info_flags & _TIF_MCE_NOTIFY) @@ -804,6 +806,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) #ifdef CONFIG_X86_32 clear_thread_flag(TIF_IRET); #endif /* CONFIG_X86_32 */ + + rcu_user_enter(); } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7c5a8c314c0..c80a33bc528 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -665,7 +665,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) unsigned long boot_error = 0; int timeout; - alternatives_smp_switch(1); + /* Just in case we booted with a single CPU. */ + alternatives_enable_smp(); idle->thread.sp = (unsigned long) (((struct pt_regs *) (THREAD_SIZE + task_stack_page(idle))) - 1); @@ -1053,20 +1054,6 @@ out: preempt_enable(); } -void arch_disable_nonboot_cpus_begin(void) -{ - /* - * Avoid the smp alternatives switch during the disable_nonboot_cpus(). - * In the suspend path, we will be back in the SMP mode shortly anyways. - */ - skip_smp_alternatives = true; -} - -void arch_disable_nonboot_cpus_end(void) -{ - skip_smp_alternatives = false; -} - void arch_enable_nonboot_cpus_begin(void) { set_mtrr_aps_delayed_init(); @@ -1256,9 +1243,6 @@ void native_cpu_die(unsigned int cpu) if (per_cpu(cpu_state, cpu) == CPU_DEAD) { if (system_state == SYSTEM_RUNNING) pr_info("CPU %u is now offline\n", cpu); - - if (1 == num_online_cpus()) - alternatives_smp_switch(0); return; } msleep(100); diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index c346d116148..cd3b2438a98 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -157,6 +157,33 @@ static int enable_single_step(struct task_struct *child) return 1; } +void set_task_blockstep(struct task_struct *task, bool on) +{ + unsigned long debugctl; + + /* + * Ensure irq/preemption can't change debugctl in between. + * Note also that both TIF_BLOCKSTEP and debugctl should + * be changed atomically wrt preemption. + * FIXME: this means that set/clear TIF_BLOCKSTEP is simply + * wrong if task != current, SIGKILL can wakeup the stopped + * tracee and set/clear can play with the running task, this + * can confuse the next __switch_to_xtra(). + */ + local_irq_disable(); + debugctl = get_debugctlmsr(); + if (on) { + debugctl |= DEBUGCTLMSR_BTF; + set_tsk_thread_flag(task, TIF_BLOCKSTEP); + } else { + debugctl &= ~DEBUGCTLMSR_BTF; + clear_tsk_thread_flag(task, TIF_BLOCKSTEP); + } + if (task == current) + update_debugctlmsr(debugctl); + local_irq_enable(); +} + /* * Enable single or block step. */ @@ -169,19 +196,10 @@ static void enable_step(struct task_struct *child, bool block) * So no one should try to use debugger block stepping in a program * that uses user-mode single stepping itself. */ - if (enable_single_step(child) && block) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl |= DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - set_tsk_thread_flag(child, TIF_BLOCKSTEP); - } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - clear_tsk_thread_flag(child, TIF_BLOCKSTEP); - } + if (enable_single_step(child) && block) + set_task_blockstep(child, true); + else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) + set_task_blockstep(child, false); } void user_enable_single_step(struct task_struct *child) @@ -199,13 +217,8 @@ void user_disable_single_step(struct task_struct *child) /* * Make sure block stepping (BTF) is disabled. */ - if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - clear_tsk_thread_flag(child, TIF_BLOCKSTEP); - } + if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) + set_task_blockstep(child, false); /* Always clear TIF_SINGLESTEP... */ clear_tsk_thread_flag(child, TIF_SINGLESTEP); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b481341c936..cfbe3fc4158 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -55,6 +55,7 @@ #include <asm/i387.h> #include <asm/fpu-internal.h> #include <asm/mce.h> +#include <asm/rcu.h> #include <asm/mach_traps.h> @@ -107,30 +108,45 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) dec_preempt_count(); } -static void __kprobes -do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, - long error_code, siginfo_t *info) +static int __kprobes +do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, + struct pt_regs *regs, long error_code) { - struct task_struct *tsk = current; - #ifdef CONFIG_X86_32 if (regs->flags & X86_VM_MASK) { /* - * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. + * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86. * On nmi (interrupt 2), do_trap should not be called. */ - if (trapnr < X86_TRAP_UD) - goto vm86_trap; - goto trap_signal; + if (trapnr < X86_TRAP_UD) { + if (!handle_vm86_trap((struct kernel_vm86_regs *) regs, + error_code, trapnr)) + return 0; + } + return -1; } #endif + if (!user_mode(regs)) { + if (!fixup_exception(regs)) { + tsk->thread.error_code = error_code; + tsk->thread.trap_nr = trapnr; + die(str, regs, error_code); + } + return 0; + } - if (!user_mode(regs)) - goto kernel_trap; + return -1; +} -#ifdef CONFIG_X86_32 -trap_signal: -#endif +static void __kprobes +do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, + long error_code, siginfo_t *info) +{ + struct task_struct *tsk = current; + + + if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code)) + return; /* * We want error_code and trap_nr set for userspace faults and * kernelspace faults which result in die(), but not @@ -158,33 +174,20 @@ trap_signal: force_sig_info(signr, info, tsk); else force_sig(signr, tsk); - return; - -kernel_trap: - if (!fixup_exception(regs)) { - tsk->thread.error_code = error_code; - tsk->thread.trap_nr = trapnr; - die(str, regs, error_code); - } - return; - -#ifdef CONFIG_X86_32 -vm86_trap: - if (handle_vm86_trap((struct kernel_vm86_regs *) regs, - error_code, trapnr)) - goto trap_signal; - return; -#endif } #define DO_ERROR(trapnr, signr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ + exception_enter(regs); \ + if (notify_die(DIE_TRAP, str, regs, error_code, \ + trapnr, signr) == NOTIFY_STOP) { \ + exception_exit(regs); \ return; \ + } \ conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, NULL); \ + exception_exit(regs); \ } #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ @@ -195,11 +198,15 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ info.si_errno = 0; \ info.si_code = sicode; \ info.si_addr = (void __user *)siaddr; \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ + exception_enter(regs); \ + if (notify_die(DIE_TRAP, str, regs, error_code, \ + trapnr, signr) == NOTIFY_STOP) { \ + exception_exit(regs); \ return; \ + } \ conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, &info); \ + exception_exit(regs); \ } DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, @@ -222,12 +229,14 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, /* Runs on IST stack */ dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) { + exception_enter(regs); if (notify_die(DIE_TRAP, "stack segment", regs, error_code, - X86_TRAP_SS, SIGBUS) == NOTIFY_STOP) - return; - preempt_conditional_sti(regs); - do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); - preempt_conditional_cli(regs); + X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { + preempt_conditional_sti(regs); + do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); + preempt_conditional_cli(regs); + } + exception_exit(regs); } dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) @@ -235,6 +244,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) static const char str[] = "double fault"; struct task_struct *tsk = current; + exception_enter(regs); /* Return not checked because double check cannot be ignored */ notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); @@ -255,16 +265,29 @@ do_general_protection(struct pt_regs *regs, long error_code) { struct task_struct *tsk; + exception_enter(regs); conditional_sti(regs); #ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) - goto gp_in_vm86; + if (regs->flags & X86_VM_MASK) { + local_irq_enable(); + handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); + goto exit; + } #endif tsk = current; - if (!user_mode(regs)) - goto gp_in_kernel; + if (!user_mode(regs)) { + if (fixup_exception(regs)) + goto exit; + + tsk->thread.error_code = error_code; + tsk->thread.trap_nr = X86_TRAP_GP; + if (notify_die(DIE_GPF, "general protection fault", regs, error_code, + X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) + die("general protection fault", regs, error_code); + goto exit; + } tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; @@ -279,25 +302,8 @@ do_general_protection(struct pt_regs *regs, long error_code) } force_sig(SIGSEGV, tsk); - return; - -#ifdef CONFIG_X86_32 -gp_in_vm86: - local_irq_enable(); - handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); - return; -#endif - -gp_in_kernel: - if (fixup_exception(regs)) - return; - - tsk->thread.error_code = error_code; - tsk->thread.trap_nr = X86_TRAP_GP; - if (notify_die(DIE_GPF, "general protection fault", regs, error_code, - X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP) - return; - die("general protection fault", regs, error_code); +exit: + exception_exit(regs); } /* May run on IST stack. */ @@ -312,15 +318,16 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co ftrace_int3_handler(regs)) return; #endif + exception_enter(regs); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) - return; + goto exit; #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) - return; + goto exit; /* * Let others (NMI) know that the debug stack is in use @@ -331,6 +338,8 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); preempt_conditional_cli(regs); debug_stack_usage_dec(); +exit: + exception_exit(regs); } #ifdef CONFIG_X86_64 @@ -391,6 +400,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) unsigned long dr6; int si_code; + exception_enter(regs); + get_debugreg(dr6, 6); /* Filter out all the reserved bits which are preset to 1 */ @@ -406,7 +417,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) /* Catch kmemcheck conditions first of all! */ if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) - return; + goto exit; /* DR6 may or may not be cleared by the CPU */ set_debugreg(0, 6); @@ -421,7 +432,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, SIGTRAP) == NOTIFY_STOP) - return; + goto exit; /* * Let others (NMI) know that the debug stack is in use @@ -437,7 +448,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) X86_TRAP_DB); preempt_conditional_cli(regs); debug_stack_usage_dec(); - return; + goto exit; } /* @@ -458,7 +469,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) preempt_conditional_cli(regs); debug_stack_usage_dec(); - return; +exit: + exception_exit(regs); } /* @@ -555,14 +567,17 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) #ifdef CONFIG_X86_32 ignore_fpu_irq = 1; #endif - + exception_enter(regs); math_error(regs, error_code, X86_TRAP_MF); + exception_exit(regs); } dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) { + exception_enter(regs); math_error(regs, error_code, X86_TRAP_XF); + exception_exit(regs); } dotraplinkage void @@ -629,6 +644,7 @@ EXPORT_SYMBOL_GPL(math_state_restore); dotraplinkage void __kprobes do_device_not_available(struct pt_regs *regs, long error_code) { + exception_enter(regs); #ifdef CONFIG_MATH_EMULATION if (read_cr0() & X86_CR0_EM) { struct math_emu_info info = { }; @@ -637,6 +653,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) info.regs = regs; math_emulate(&info); + exception_exit(regs); return; } #endif @@ -644,12 +661,15 @@ do_device_not_available(struct pt_regs *regs, long error_code) #ifdef CONFIG_X86_32 conditional_sti(regs); #endif + exception_exit(regs); } #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) { siginfo_t info; + + exception_enter(regs); local_irq_enable(); info.si_signo = SIGILL; @@ -657,10 +677,11 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) info.si_code = ILL_BADSTK; info.si_addr = NULL; if (notify_die(DIE_TRAP, "iret exception", regs, error_code, - X86_TRAP_IRET, SIGILL) == NOTIFY_STOP) - return; - do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, - &info); + X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) { + do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, + &info); + } + exception_exit(regs); } #endif diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 36fd42091fa..9538f00827a 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -41,6 +41,9 @@ /* Adjust the return address of a call insn */ #define UPROBE_FIX_CALL 0x2 +/* Instruction will modify TF, don't change it */ +#define UPROBE_FIX_SETF 0x4 + #define UPROBE_FIX_RIP_AX 0x8000 #define UPROBE_FIX_RIP_CX 0x4000 @@ -239,6 +242,10 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) insn_get_opcode(insn); /* should be a nop */ switch (OPCODE1(insn)) { + case 0x9d: + /* popf */ + auprobe->fixups |= UPROBE_FIX_SETF; + break; case 0xc3: /* ret/lret */ case 0xcb: case 0xc2: @@ -646,7 +653,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) * Skip these instructions as per the currently known x86 ISA. * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 } */ -bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { int i; @@ -673,3 +680,46 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) } return false; } + +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + bool ret = __skip_sstep(auprobe, regs); + if (ret && (regs->flags & X86_EFLAGS_TF)) + send_sig(SIGTRAP, current, 0); + return ret; +} + +void arch_uprobe_enable_step(struct arch_uprobe *auprobe) +{ + struct task_struct *task = current; + struct arch_uprobe_task *autask = &task->utask->autask; + struct pt_regs *regs = task_pt_regs(task); + + autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF); + + regs->flags |= X86_EFLAGS_TF; + if (test_tsk_thread_flag(task, TIF_BLOCKSTEP)) + set_task_blockstep(task, false); +} + +void arch_uprobe_disable_step(struct arch_uprobe *auprobe) +{ + struct task_struct *task = current; + struct arch_uprobe_task *autask = &task->utask->autask; + bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED); + struct pt_regs *regs = task_pt_regs(task); + /* + * The state of TIF_BLOCKSTEP was not saved so we can get an extra + * SIGTRAP if we do not clear TF. We need to examine the opcode to + * make it right. + */ + if (unlikely(trapped)) { + if (!autask->saved_tf) + regs->flags &= ~X86_EFLAGS_TF; + } else { + if (autask->saved_tf) + send_sig(SIGTRAP, task, 0); + else if (!(auprobe->fixups & UPROBE_FIX_SETF)) + regs->flags &= ~X86_EFLAGS_TF; + } +} diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 6020f6f5927..1330dd10295 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -13,9 +13,13 @@ #include <asm/ftrace.h> #ifdef CONFIG_FUNCTION_TRACER -/* mcount is defined in assembly */ +/* mcount and __fentry__ are defined in assembly */ +#ifdef CC_USING_FENTRY +EXPORT_SYMBOL(__fentry__); +#else EXPORT_SYMBOL(mcount); #endif +#endif EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 3d3e2070911..9e1a8a7ba6e 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -132,9 +132,9 @@ int check_for_xstate(struct i387_fxsave_struct __user *buf, fx_sw_user->xstate_size > fx_sw_user->extended_size) return -EINVAL; - err = __get_user(magic2, (__u32 *) (((void *)fpstate) + - fx_sw_user->extended_size - - FP_XSTATE_MAGIC2_SIZE)); + err = __get_user(magic2, (__u32 __user *) (fpstate + + fx_sw_user->extended_size - + FP_XSTATE_MAGIC2_SIZE)); if (err) return err; /* diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index a71faf727ff..bca63f04dcc 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -183,95 +183,6 @@ TRACE_EVENT(kvm_apic, #define KVM_ISA_VMX 1 #define KVM_ISA_SVM 2 -#define VMX_EXIT_REASONS \ - { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ - { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ - { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ - { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ - { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ - { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ - { EXIT_REASON_CPUID, "CPUID" }, \ - { EXIT_REASON_HLT, "HLT" }, \ - { EXIT_REASON_INVLPG, "INVLPG" }, \ - { EXIT_REASON_RDPMC, "RDPMC" }, \ - { EXIT_REASON_RDTSC, "RDTSC" }, \ - { EXIT_REASON_VMCALL, "VMCALL" }, \ - { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \ - { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \ - { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \ - { EXIT_REASON_VMPTRST, "VMPTRST" }, \ - { EXIT_REASON_VMREAD, "VMREAD" }, \ - { EXIT_REASON_VMRESUME, "VMRESUME" }, \ - { EXIT_REASON_VMWRITE, "VMWRITE" }, \ - { EXIT_REASON_VMOFF, "VMOFF" }, \ - { EXIT_REASON_VMON, "VMON" }, \ - { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \ - { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \ - { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ - { EXIT_REASON_MSR_READ, "MSR_READ" }, \ - { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ - { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ - { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ - { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ - { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ - { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ - { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ - { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ - { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ - { EXIT_REASON_WBINVD, "WBINVD" } - -#define SVM_EXIT_REASONS \ - { SVM_EXIT_READ_CR0, "read_cr0" }, \ - { SVM_EXIT_READ_CR3, "read_cr3" }, \ - { SVM_EXIT_READ_CR4, "read_cr4" }, \ - { SVM_EXIT_READ_CR8, "read_cr8" }, \ - { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ - { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ - { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ - { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ - { SVM_EXIT_READ_DR0, "read_dr0" }, \ - { SVM_EXIT_READ_DR1, "read_dr1" }, \ - { SVM_EXIT_READ_DR2, "read_dr2" }, \ - { SVM_EXIT_READ_DR3, "read_dr3" }, \ - { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ - { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ - { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ - { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ - { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ - { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ - { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ - { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ - { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ - { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ - { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ - { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ - { SVM_EXIT_INTR, "interrupt" }, \ - { SVM_EXIT_NMI, "nmi" }, \ - { SVM_EXIT_SMI, "smi" }, \ - { SVM_EXIT_INIT, "init" }, \ - { SVM_EXIT_VINTR, "vintr" }, \ - { SVM_EXIT_CPUID, "cpuid" }, \ - { SVM_EXIT_INVD, "invd" }, \ - { SVM_EXIT_HLT, "hlt" }, \ - { SVM_EXIT_INVLPG, "invlpg" }, \ - { SVM_EXIT_INVLPGA, "invlpga" }, \ - { SVM_EXIT_IOIO, "io" }, \ - { SVM_EXIT_MSR, "msr" }, \ - { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ - { SVM_EXIT_SHUTDOWN, "shutdown" }, \ - { SVM_EXIT_VMRUN, "vmrun" }, \ - { SVM_EXIT_VMMCALL, "hypercall" }, \ - { SVM_EXIT_VMLOAD, "vmload" }, \ - { SVM_EXIT_VMSAVE, "vmsave" }, \ - { SVM_EXIT_STGI, "stgi" }, \ - { SVM_EXIT_CLGI, "clgi" }, \ - { SVM_EXIT_SKINIT, "skinit" }, \ - { SVM_EXIT_WBINVD, "wbinvd" }, \ - { SVM_EXIT_MONITOR, "monitor" }, \ - { SVM_EXIT_MWAIT, "mwait" }, \ - { SVM_EXIT_XSETBV, "xsetbv" }, \ - { SVM_EXIT_NPF, "npf" } - /* * Tracepoint for kvm guest exit: */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b1eb202ee76..b06737d122f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4543,7 +4543,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_SET_TPR; return 0; } - }; + } break; case 2: /* clts */ handle_clts(vcpu); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 76dcd9d8e0b..7dde46d68a2 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -18,6 +18,7 @@ #include <asm/pgalloc.h> /* pgd_*(), ... */ #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ #include <asm/fixmap.h> /* VSYSCALL_START */ +#include <asm/rcu.h> /* exception_enter(), ... */ /* * Page fault error code bits: @@ -1000,8 +1001,8 @@ static int fault_in_kernel_space(unsigned long address) * and the problem, and then passes it off to one of the appropriate * routines. */ -dotraplinkage void __kprobes -do_page_fault(struct pt_regs *regs, unsigned long error_code) +static void __kprobes +__do_page_fault(struct pt_regs *regs, unsigned long error_code) { struct vm_area_struct *vma; struct task_struct *tsk; @@ -1209,3 +1210,11 @@ good_area: up_read(&mm->mmap_sem); } + +dotraplinkage void __kprobes +do_page_fault(struct pt_regs *regs, unsigned long error_code) +{ + exception_enter(regs); + __do_page_fault(regs, error_code); + exception_exit(regs); +} diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index e0e6990723e..ab1f6a93b52 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -319,7 +319,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, */ int devmem_is_allowed(unsigned long pagenr) { - if (pagenr <= 256) + if (pagenr < 256) return 1; if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) return 0; diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 937bcece700..704b9ec043d 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -585,7 +585,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) while (i >= sizeof(struct acpi_mcfg_allocation)) { entries++; i -= sizeof(struct acpi_mcfg_allocation); - }; + } if (entries == 0) { pr_err(PREFIX "MMCONFIG has no entries\n"); return -ENODEV; diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 9926e11a772..aeaff8bef2f 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -21,6 +21,7 @@ config 64BIT config X86_32 def_bool !64BIT select HAVE_AOUT + select ARCH_WANT_IPC_PARSE_VERSION config X86_64 def_bool 64BIT diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h index 5868526b5ee..46a9df99f3c 100644 --- a/arch/x86/um/shared/sysdep/kernel-offsets.h +++ b/arch/x86/um/shared/sysdep/kernel-offsets.h @@ -7,9 +7,6 @@ #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) -#define STR(x) #x -#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " STR(val) " " #val: : ) - #define BLANK() asm volatile("\n->" : : ) #define OFFSET(sym, str, mem) \ diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h index bd9a89b67e4..ca255a805ed 100644 --- a/arch/x86/um/shared/sysdep/syscalls.h +++ b/arch/x86/um/shared/sysdep/syscalls.h @@ -1,3 +1,5 @@ +extern long sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid); #ifdef __i386__ #include "syscalls_32.h" #else diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index a508cea1350..ba7363ecf89 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -416,9 +416,6 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig, PT_REGS_AX(regs) = (unsigned long) sig; PT_REGS_DX(regs) = (unsigned long) 0; PT_REGS_CX(regs) = (unsigned long) 0; - - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) - ptrace_notify(SIGTRAP); return 0; } @@ -466,9 +463,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, PT_REGS_AX(regs) = (unsigned long) sig; PT_REGS_DX(regs) = (unsigned long) &frame->info; PT_REGS_CX(regs) = (unsigned long) &frame->uc; - - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) - ptrace_notify(SIGTRAP); return 0; } diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index 68d1dc91b37..b5408cecac6 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -28,7 +28,7 @@ #define ptregs_execve sys_execve #define ptregs_iopl sys_iopl #define ptregs_vm86old sys_vm86old -#define ptregs_clone sys_clone +#define ptregs_clone i386_clone #define ptregs_vm86 sys_vm86 #define ptregs_sigaltstack sys_sigaltstack #define ptregs_vfork sys_vfork diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c index b853e8600b9..db444c7218f 100644 --- a/arch/x86/um/syscalls_32.c +++ b/arch/x86/um/syscalls_32.c @@ -3,37 +3,24 @@ * Licensed under the GPL */ -#include "linux/sched.h" -#include "linux/shm.h" -#include "linux/ipc.h" -#include "linux/syscalls.h" -#include "asm/mman.h" -#include "asm/uaccess.h" -#include "asm/unistd.h" +#include <linux/syscalls.h> +#include <sysdep/syscalls.h> /* * The prototype on i386 is: * - * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls, int * child_tidptr) + * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls * * and the "newtls" arg. on i386 is read by copy_thread directly from the * register saved on the stack. */ -long sys_clone(unsigned long clone_flags, unsigned long newsp, - int __user *parent_tid, void *newtls, int __user *child_tid) +long i386_clone(unsigned long clone_flags, unsigned long newsp, + int __user *parent_tid, void *newtls, int __user *child_tid) { - long ret; - - if (!newsp) - newsp = UPT_SP(¤t->thread.regs.regs); - - current->thread.forking = 1; - ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, - child_tid); - current->thread.forking = 0; - return ret; + return sys_clone(clone_flags, newsp, parent_tid, child_tid); } + long sys_sigaction(int sig, const struct old_sigaction __user *act, struct old_sigaction __user *oact) { diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c index f3d82bb6e15..adb08eb5c22 100644 --- a/arch/x86/um/syscalls_64.c +++ b/arch/x86/um/syscalls_64.c @@ -5,12 +5,9 @@ * Licensed under the GPL */ -#include "linux/linkage.h" -#include "linux/personality.h" -#include "linux/utsname.h" -#include "asm/prctl.h" /* XXX This should get the constants from libc */ -#include "asm/uaccess.h" -#include "os.h" +#include <linux/sched.h> +#include <asm/prctl.h> /* XXX This should get the constants from libc */ +#include <os.h> long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) { @@ -79,20 +76,6 @@ long sys_arch_prctl(int code, unsigned long addr) return arch_prctl(current, code, (unsigned long __user *) addr); } -long sys_clone(unsigned long clone_flags, unsigned long newsp, - void __user *parent_tid, void __user *child_tid) -{ - long ret; - - if (!newsp) - newsp = UPT_SP(¤t->thread.regs.regs); - current->thread.forking = 1; - ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, - child_tid); - current->thread.forking = 0; - return ret; -} - void arch_switch_to(struct task_struct *to) { if ((to->thread.arch.fs == 0) || (to->mm == NULL)) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 9642d4a3860..1fbe75a95f1 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1452,6 +1452,10 @@ asmlinkage void __init xen_start_kernel(void) pci_request_acs(); xen_acpi_sleep_register(); + + /* Avoid searching for BIOS MP tables */ + x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.get_smp_config = x86_init_uint_noop; } #ifdef CONFIG_PCI /* PCI BIOS service won't work from a PV guest. */ diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 76ba0e97e53..72213da605f 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -828,9 +828,6 @@ int m2p_add_override(unsigned long mfn, struct page *page, xen_mc_issue(PARAVIRT_LAZY_MMU); } - /* let's use dev_bus_addr to record the old mfn instead */ - kmap_op->dev_bus_addr = page->index; - page->index = (unsigned long) kmap_op; } spin_lock_irqsave(&m2p_override_lock, flags); list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); @@ -857,7 +854,8 @@ int m2p_add_override(unsigned long mfn, struct page *page, return 0; } EXPORT_SYMBOL_GPL(m2p_add_override); -int m2p_remove_override(struct page *page, bool clear_pte) +int m2p_remove_override(struct page *page, + struct gnttab_map_grant_ref *kmap_op) { unsigned long flags; unsigned long mfn; @@ -887,10 +885,8 @@ int m2p_remove_override(struct page *page, bool clear_pte) WARN_ON(!PagePrivate(page)); ClearPagePrivate(page); - if (clear_pte) { - struct gnttab_map_grant_ref *map_op = - (struct gnttab_map_grant_ref *) page->index; - set_phys_to_machine(pfn, map_op->dev_bus_addr); + set_phys_to_machine(pfn, page->index); + if (kmap_op != NULL) { if (!PageHighMem(page)) { struct multicall_space mcs; struct gnttab_unmap_grant_ref *unmap_op; @@ -902,13 +898,13 @@ int m2p_remove_override(struct page *page, bool clear_pte) * issued. In this case handle is going to -1 because * it hasn't been modified yet. */ - if (map_op->handle == -1) + if (kmap_op->handle == -1) xen_mc_flush(); /* - * Now if map_op->handle is negative it means that the + * Now if kmap_op->handle is negative it means that the * hypercall actually returned an error. */ - if (map_op->handle == GNTST_general_error) { + if (kmap_op->handle == GNTST_general_error) { printk(KERN_WARNING "m2p_remove_override: " "pfn %lx mfn %lx, failed to modify kernel mappings", pfn, mfn); @@ -918,8 +914,8 @@ int m2p_remove_override(struct page *page, bool clear_pte) mcs = xen_mc_entry( sizeof(struct gnttab_unmap_grant_ref)); unmap_op = mcs.args; - unmap_op->host_addr = map_op->host_addr; - unmap_op->handle = map_op->handle; + unmap_op->host_addr = kmap_op->host_addr; + unmap_op->handle = kmap_op->handle; unmap_op->dev_bus_addr = 0; MULTI_grant_table_op(mcs.mc, @@ -930,10 +926,9 @@ int m2p_remove_override(struct page *page, bool clear_pte) set_pte_at(&init_mm, address, ptep, pfn_pte(pfn, PAGE_KERNEL)); __flush_tlb_single(address); - map_op->host_addr = 0; + kmap_op->host_addr = 0; } - } else - set_phys_to_machine(pfn, page->index); + } /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present * somewhere in this domain, even before being added to the diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index d11ca11d14f..e2d62d697b5 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -17,6 +17,7 @@ #include <asm/e820.h> #include <asm/setup.h> #include <asm/acpi.h> +#include <asm/numa.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> @@ -544,4 +545,7 @@ void __init xen_arch_setup(void) disable_cpufreq(); WARN_ON(set_pm_idle_to_default()); fiddle_vdso(); +#ifdef CONFIG_NUMA + numa_off = 1; +#endif } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index f58dca7a6e5..353c50f1870 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -377,7 +377,8 @@ static int __cpuinit xen_cpu_up(unsigned int cpu, struct task_struct *idle) return rc; if (num_online_cpus() == 1) - alternatives_smp_switch(1); + /* Just in case we booted with a single CPU. */ + alternatives_enable_smp(); rc = xen_smp_intr_init(cpu); if (rc) @@ -424,9 +425,6 @@ static void xen_cpu_die(unsigned int cpu) unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); xen_uninit_lock_cpu(cpu); xen_teardown_timer(cpu); - - if (num_online_cpus() == 1) - alternatives_smp_switch(0); } static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */ diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 2c8d6a3d250..bc44311aa18 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -31,6 +31,7 @@ #include <linux/mqueue.h> #include <linux/fs.h> #include <linux/slab.h> +#include <linux/rcupdate.h> #include <asm/pgtable.h> #include <asm/uaccess.h> @@ -110,8 +111,10 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) platform_idle(); + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/block/blk-core.c b/block/blk-core.c index 4b4dbdfbca8..ee3cb3a5e27 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2254,9 +2254,11 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) error_type = "I/O"; break; } - printk(KERN_ERR "end_request: %s error, dev %s, sector %llu\n", - error_type, req->rq_disk ? req->rq_disk->disk_name : "?", - (unsigned long long)blk_rq_pos(req)); + printk_ratelimited(KERN_ERR "end_request: %s error, dev %s, sector %llu\n", + error_type, req->rq_disk ? + req->rq_disk->disk_name : "?", + (unsigned long long)blk_rq_pos(req)); + } blk_account_io_completion(req, nr_bytes); diff --git a/block/ioctl.c b/block/ioctl.c index 4476e0e85d1..4a85096f541 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -41,7 +41,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user sizeof(long long) > sizeof(long)) { long pstart = start, plength = length; if (pstart != start || plength != length - || pstart < 0 || plength < 0) + || pstart < 0 || plength < 0 || partno > 65535) return -EINVAL; } diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index a89734621e5..5b6b1d8e6cc 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -16,12 +16,14 @@ #include <linux/irq.h> #include <linux/interrupt.h> #include <linux/irqdomain.h> +#include <linux/pm_runtime.h> #include <linux/slab.h> #include "internal.h" struct regmap_irq_chip_data { struct mutex lock; + struct irq_chip irq_chip; struct regmap *map; const struct regmap_irq_chip *chip; @@ -59,6 +61,14 @@ static void regmap_irq_sync_unlock(struct irq_data *data) struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data); struct regmap *map = d->map; int i, ret; + u32 reg; + + if (d->chip->runtime_pm) { + ret = pm_runtime_get_sync(map->dev); + if (ret < 0) + dev_err(map->dev, "IRQ sync failed to resume: %d\n", + ret); + } /* * If there's been a change in the mask write it back to the @@ -66,15 +76,22 @@ static void regmap_irq_sync_unlock(struct irq_data *data) * suppress pointless writes. */ for (i = 0; i < d->chip->num_regs; i++) { - ret = regmap_update_bits(d->map, d->chip->mask_base + - (i * map->reg_stride * - d->irq_reg_stride), + reg = d->chip->mask_base + + (i * map->reg_stride * d->irq_reg_stride); + if (d->chip->mask_invert) + ret = regmap_update_bits(d->map, reg, + d->mask_buf_def[i], ~d->mask_buf[i]); + else + ret = regmap_update_bits(d->map, reg, d->mask_buf_def[i], d->mask_buf[i]); if (ret != 0) dev_err(d->map->dev, "Failed to sync masks in %x\n", - d->chip->mask_base + (i * map->reg_stride)); + reg); } + if (d->chip->runtime_pm) + pm_runtime_put(map->dev); + /* If we've changed our wakeup count propagate it to the parent */ if (d->wake_count < 0) for (i = d->wake_count; i < 0; i++) @@ -128,8 +145,7 @@ static int regmap_irq_set_wake(struct irq_data *data, unsigned int on) return 0; } -static struct irq_chip regmap_irq_chip = { - .name = "regmap", +static const struct irq_chip regmap_irq_chip = { .irq_bus_lock = regmap_irq_lock, .irq_bus_sync_unlock = regmap_irq_sync_unlock, .irq_disable = regmap_irq_disable, @@ -144,6 +160,16 @@ static irqreturn_t regmap_irq_thread(int irq, void *d) struct regmap *map = data->map; int ret, i; bool handled = false; + u32 reg; + + if (chip->runtime_pm) { + ret = pm_runtime_get_sync(map->dev); + if (ret < 0) { + dev_err(map->dev, "IRQ thread failed to resume: %d\n", + ret); + return IRQ_NONE; + } + } /* * Ignore masked IRQs and ack if we need to; we ack early so @@ -160,20 +186,20 @@ static irqreturn_t regmap_irq_thread(int irq, void *d) if (ret != 0) { dev_err(map->dev, "Failed to read IRQ status: %d\n", ret); + if (chip->runtime_pm) + pm_runtime_put(map->dev); return IRQ_NONE; } data->status_buf[i] &= ~data->mask_buf[i]; if (data->status_buf[i] && chip->ack_base) { - ret = regmap_write(map, chip->ack_base + - (i * map->reg_stride * - data->irq_reg_stride), - data->status_buf[i]); + reg = chip->ack_base + + (i * map->reg_stride * data->irq_reg_stride); + ret = regmap_write(map, reg, data->status_buf[i]); if (ret != 0) dev_err(map->dev, "Failed to ack 0x%x: %d\n", - chip->ack_base + (i * map->reg_stride), - ret); + reg, ret); } } @@ -185,6 +211,9 @@ static irqreturn_t regmap_irq_thread(int irq, void *d) } } + if (chip->runtime_pm) + pm_runtime_put(map->dev); + if (handled) return IRQ_HANDLED; else @@ -197,7 +226,7 @@ static int regmap_irq_map(struct irq_domain *h, unsigned int virq, struct regmap_irq_chip_data *data = h->host_data; irq_set_chip_data(virq, data); - irq_set_chip_and_handler(virq, ®map_irq_chip, handle_edge_irq); + irq_set_chip(virq, &data->irq_chip); irq_set_nested_thread(virq, 1); /* ARM needs us to explicitly flag the IRQ as valid @@ -238,6 +267,7 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags, struct regmap_irq_chip_data *d; int i; int ret = -ENOMEM; + u32 reg; for (i = 0; i < chip->num_irqs; i++) { if (chip->irqs[i].reg_offset % map->reg_stride) @@ -284,6 +314,13 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags, goto err_alloc; } + d->irq_chip = regmap_irq_chip; + d->irq_chip.name = chip->name; + if (!chip->wake_base) { + d->irq_chip.irq_set_wake = NULL; + d->irq_chip.flags |= IRQCHIP_MASK_ON_SUSPEND | + IRQCHIP_SKIP_SET_WAKE; + } d->irq = irq; d->map = map; d->chip = chip; @@ -303,16 +340,37 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags, /* Mask all the interrupts by default */ for (i = 0; i < chip->num_regs; i++) { d->mask_buf[i] = d->mask_buf_def[i]; - ret = regmap_write(map, chip->mask_base + (i * map->reg_stride - * d->irq_reg_stride), - d->mask_buf[i]); + reg = chip->mask_base + + (i * map->reg_stride * d->irq_reg_stride); + if (chip->mask_invert) + ret = regmap_update_bits(map, reg, + d->mask_buf[i], ~d->mask_buf[i]); + else + ret = regmap_update_bits(map, reg, + d->mask_buf[i], d->mask_buf[i]); if (ret != 0) { dev_err(map->dev, "Failed to set masks in 0x%x: %d\n", - chip->mask_base + (i * map->reg_stride), ret); + reg, ret); goto err_alloc; } } + /* Wake is disabled by default */ + if (d->wake_buf) { + for (i = 0; i < chip->num_regs; i++) { + d->wake_buf[i] = d->mask_buf_def[i]; + reg = chip->wake_base + + (i * map->reg_stride * d->irq_reg_stride); + ret = regmap_update_bits(map, reg, d->wake_buf[i], + d->wake_buf[i]); + if (ret != 0) { + dev_err(map->dev, "Failed to set masks in 0x%x: %d\n", + reg, ret); + goto err_alloc; + } + } + } + if (irq_base) d->domain = irq_domain_add_legacy(map->dev->of_node, chip->num_irqs, irq_base, 0, diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index c241ae2f2f1..52069d29ff1 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -659,13 +659,12 @@ EXPORT_SYMBOL_GPL(devm_regmap_init); * new cache. This can be used to restore the cache to defaults or to * update the cache configuration to reflect runtime discovery of the * hardware. + * + * No explicit locking is done here, the user needs to ensure that + * this function will not race with other calls to regmap. */ int regmap_reinit_cache(struct regmap *map, const struct regmap_config *config) { - int ret; - - map->lock(map); - regcache_exit(map); regmap_debugfs_exit(map); @@ -681,11 +680,7 @@ int regmap_reinit_cache(struct regmap *map, const struct regmap_config *config) map->cache_bypass = false; map->cache_only = false; - ret = regcache_init(map, config); - - map->unlock(map); - - return ret; + return regcache_init(map, config); } EXPORT_SYMBOL_GPL(regmap_reinit_cache); diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index de0435e63b0..887f68f6d79 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -35,6 +35,7 @@ new_skb(ulong len) skb_reset_mac_header(skb); skb_reset_network_header(skb); skb->protocol = __constant_htons(ETH_P_AOE); + skb_checksum_none_assert(skb); } return skb; } diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index 38aa6dda6b8..da3311129a0 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -795,6 +795,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout, } break; case CMD_PROTOCOL_ERR: + cmd->result = DID_ERROR << 16; dev_warn(&h->pdev->dev, "%p has protocol error\n", c); break; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index a8fddeb3d63..f946d31d691 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -1148,11 +1148,15 @@ static bool mtip_pause_ncq(struct mtip_port *port, reply = port->rxfis + RX_FIS_D2H_REG; task_file_data = readl(port->mmio+PORT_TFDATA); - if ((task_file_data & 1) || (fis->command == ATA_CMD_SEC_ERASE_UNIT)) + if (fis->command == ATA_CMD_SEC_ERASE_UNIT) + clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); + + if ((task_file_data & 1)) return false; if (fis->command == ATA_CMD_SEC_ERASE_PREP) { set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); + set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag); port->ic_pause_timer = jiffies; return true; } else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) && @@ -1900,7 +1904,7 @@ static int exec_drive_command(struct mtip_port *port, u8 *command, int rv = 0, xfer_sz = command[3]; if (xfer_sz) { - if (user_buffer) + if (!user_buffer) return -EFAULT; buf = dmam_alloc_coherent(&port->dd->pdev->dev, @@ -2043,7 +2047,7 @@ static void mtip_set_timeout(struct host_to_dev_fis *fis, unsigned int *timeout) *timeout = 240000; /* 4 minutes */ break; case ATA_CMD_STANDBYNOW1: - *timeout = 10000; /* 10 seconds */ + *timeout = 120000; /* 2 minutes */ break; case 0xF7: case 0xFA: @@ -2588,9 +2592,6 @@ static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf, if (!len || size) return 0; - if (size < 0) - return -EINVAL; - size += sprintf(&buf[size], "H/ S ACTive : [ 0x"); for (n = dd->slot_groups-1; n >= 0; n--) @@ -2660,9 +2661,6 @@ static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf, if (!len || size) return 0; - if (size < 0) - return -EINVAL; - size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n", dd->port->flags); size += sprintf(&buf[size], "Flag-dd : [ %08lX ]\n", @@ -3214,8 +3212,8 @@ static int mtip_hw_init(struct driver_data *dd) "Unable to check write protect progress\n"); else dev_info(&dd->pdev->dev, - "Write protect progress: %d%% (%d blocks)\n", - attr242.cur, attr242.data); + "Write protect progress: %u%% (%u blocks)\n", + attr242.cur, le32_to_cpu(attr242.data)); return rv; out3: @@ -3619,6 +3617,10 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio) bio_endio(bio, -ENODATA); return; } + if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))) { + bio_endio(bio, -ENODATA); + return; + } } if (unlikely(!bio_has_data(bio))) { @@ -4168,7 +4170,13 @@ static void mtip_pci_shutdown(struct pci_dev *pdev) /* Table of device ids supported by this driver. */ static DEFINE_PCI_DEVICE_TABLE(mtip_pci_tbl) = { - { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320_DEVICE_ID) }, + { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320H_DEVICE_ID) }, + { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320M_DEVICE_ID) }, + { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320S_DEVICE_ID) }, + { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P325M_DEVICE_ID) }, + { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P420H_DEVICE_ID) }, + { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P420M_DEVICE_ID) }, + { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P425M_DEVICE_ID) }, { 0 } }; @@ -4199,12 +4207,12 @@ static int __init mtip_init(void) { int error; - printk(KERN_INFO MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n"); + pr_info(MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n"); /* Allocate a major block device number to use with this driver. */ error = register_blkdev(0, MTIP_DRV_NAME); if (error <= 0) { - printk(KERN_ERR "Unable to register block device (%d)\n", + pr_err("Unable to register block device (%d)\n", error); return -EBUSY; } @@ -4213,7 +4221,7 @@ static int __init mtip_init(void) if (!dfs_parent) { dfs_parent = debugfs_create_dir("rssd", NULL); if (IS_ERR_OR_NULL(dfs_parent)) { - printk(KERN_WARNING "Error creating debugfs parent\n"); + pr_warn("Error creating debugfs parent\n"); dfs_parent = NULL; } } diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index f51fc23d17b..18627a1d04c 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -76,7 +76,13 @@ /* Micron Vendor ID & P320x SSD Device ID */ #define PCI_VENDOR_ID_MICRON 0x1344 -#define P320_DEVICE_ID 0x5150 +#define P320H_DEVICE_ID 0x5150 +#define P320M_DEVICE_ID 0x5151 +#define P320S_DEVICE_ID 0x5152 +#define P325M_DEVICE_ID 0x5153 +#define P420H_DEVICE_ID 0x5160 +#define P420M_DEVICE_ID 0x5161 +#define P425M_DEVICE_ID 0x5163 /* Driver name and version strings */ #define MTIP_DRV_NAME "mtip32xx" @@ -131,10 +137,12 @@ enum { MTIP_PF_SVC_THD_STOP_BIT = 8, /* below are bit numbers in 'dd_flag' defined in driver_data */ + MTIP_DDF_SEC_LOCK_BIT = 0, MTIP_DDF_REMOVE_PENDING_BIT = 1, MTIP_DDF_OVER_TEMP_BIT = 2, MTIP_DDF_WRITE_PROTECT_BIT = 3, MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | \ + (1 << MTIP_DDF_SEC_LOCK_BIT) | \ (1 << MTIP_DDF_OVER_TEMP_BIT) | \ (1 << MTIP_DDF_WRITE_PROTECT_BIT)), diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 38a2d063188..ad16c68c864 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c @@ -79,6 +79,7 @@ struct nvme_dev { char serial[20]; char model[40]; char firmware_rev[8]; + u32 max_hw_sectors; }; /* @@ -835,15 +836,15 @@ static int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, } static int nvme_get_features(struct nvme_dev *dev, unsigned fid, - unsigned dword11, dma_addr_t dma_addr) + unsigned nsid, dma_addr_t dma_addr) { struct nvme_command c; memset(&c, 0, sizeof(c)); c.features.opcode = nvme_admin_get_features; + c.features.nsid = cpu_to_le32(nsid); c.features.prp1 = cpu_to_le64(dma_addr); c.features.fid = cpu_to_le32(fid); - c.features.dword11 = cpu_to_le32(dword11); return nvme_submit_admin_cmd(dev, &c, NULL); } @@ -862,11 +863,51 @@ static int nvme_set_features(struct nvme_dev *dev, unsigned fid, return nvme_submit_admin_cmd(dev, &c, result); } +/** + * nvme_cancel_ios - Cancel outstanding I/Os + * @queue: The queue to cancel I/Os on + * @timeout: True to only cancel I/Os which have timed out + */ +static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout) +{ + int depth = nvmeq->q_depth - 1; + struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); + unsigned long now = jiffies; + int cmdid; + + for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) { + void *ctx; + nvme_completion_fn fn; + static struct nvme_completion cqe = { + .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, + }; + + if (timeout && !time_after(now, info[cmdid].timeout)) + continue; + dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d\n", cmdid); + ctx = cancel_cmdid(nvmeq, cmdid, &fn); + fn(nvmeq->dev, ctx, &cqe); + } +} + +static void nvme_free_queue_mem(struct nvme_queue *nvmeq) +{ + dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), + (void *)nvmeq->cqes, nvmeq->cq_dma_addr); + dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), + nvmeq->sq_cmds, nvmeq->sq_dma_addr); + kfree(nvmeq); +} + static void nvme_free_queue(struct nvme_dev *dev, int qid) { struct nvme_queue *nvmeq = dev->queues[qid]; int vector = dev->entry[nvmeq->cq_vector].vector; + spin_lock_irq(&nvmeq->q_lock); + nvme_cancel_ios(nvmeq, false); + spin_unlock_irq(&nvmeq->q_lock); + irq_set_affinity_hint(vector, NULL); free_irq(vector, nvmeq); @@ -876,18 +917,15 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid) adapter_delete_cq(dev, qid); } - dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), - (void *)nvmeq->cqes, nvmeq->cq_dma_addr); - dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), - nvmeq->sq_cmds, nvmeq->sq_dma_addr); - kfree(nvmeq); + nvme_free_queue_mem(nvmeq); } static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth, int vector) { struct device *dmadev = &dev->pci_dev->dev; - unsigned extra = (depth / 8) + (depth * sizeof(struct nvme_cmd_info)); + unsigned extra = DIV_ROUND_UP(depth, 8) + (depth * + sizeof(struct nvme_cmd_info)); struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); if (!nvmeq) return NULL; @@ -975,7 +1013,7 @@ static __devinit struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev) { - int result; + int result = 0; u32 aqa; u64 cap; unsigned long timeout; @@ -1005,17 +1043,22 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev) timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; dev->db_stride = NVME_CAP_STRIDE(cap); - while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) { + while (!result && !(readl(&dev->bar->csts) & NVME_CSTS_RDY)) { msleep(100); if (fatal_signal_pending(current)) - return -EINTR; + result = -EINTR; if (time_after(jiffies, timeout)) { dev_err(&dev->pci_dev->dev, "Device not ready; aborting initialisation\n"); - return -ENODEV; + result = -ENODEV; } } + if (result) { + nvme_free_queue_mem(nvmeq); + return result; + } + result = queue_request_irq(dev, nvmeq, "nvme admin"); dev->queues[0] = nvmeq; return result; @@ -1037,6 +1080,8 @@ static struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, offset = offset_in_page(addr); count = DIV_ROUND_UP(offset + length, PAGE_SIZE); pages = kcalloc(count, sizeof(*pages), GFP_KERNEL); + if (!pages) + return ERR_PTR(-ENOMEM); err = get_user_pages_fast(addr, count, 1, pages); if (err < count) { @@ -1146,14 +1191,13 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) return status; } -static int nvme_user_admin_cmd(struct nvme_ns *ns, +static int nvme_user_admin_cmd(struct nvme_dev *dev, struct nvme_admin_cmd __user *ucmd) { - struct nvme_dev *dev = ns->dev; struct nvme_admin_cmd cmd; struct nvme_command c; int status, length; - struct nvme_iod *iod; + struct nvme_iod *uninitialized_var(iod); if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -1204,7 +1248,7 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, case NVME_IOCTL_ID: return ns->ns_id; case NVME_IOCTL_ADMIN_CMD: - return nvme_user_admin_cmd(ns, (void __user *)arg); + return nvme_user_admin_cmd(ns->dev, (void __user *)arg); case NVME_IOCTL_SUBMIT_IO: return nvme_submit_io(ns, (void __user *)arg); default: @@ -1218,26 +1262,6 @@ static const struct block_device_operations nvme_fops = { .compat_ioctl = nvme_ioctl, }; -static void nvme_timeout_ios(struct nvme_queue *nvmeq) -{ - int depth = nvmeq->q_depth - 1; - struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); - unsigned long now = jiffies; - int cmdid; - - for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) { - void *ctx; - nvme_completion_fn fn; - static struct nvme_completion cqe = { .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, }; - - if (!time_after(now, info[cmdid].timeout)) - continue; - dev_warn(nvmeq->q_dmadev, "Timing out I/O %d\n", cmdid); - ctx = cancel_cmdid(nvmeq, cmdid, &fn); - fn(nvmeq->dev, ctx, &cqe); - } -} - static void nvme_resubmit_bios(struct nvme_queue *nvmeq) { while (bio_list_peek(&nvmeq->sq_cong)) { @@ -1269,7 +1293,7 @@ static int nvme_kthread(void *data) spin_lock_irq(&nvmeq->q_lock); if (nvme_process_cq(nvmeq)) printk("process_cq did something\n"); - nvme_timeout_ios(nvmeq); + nvme_cancel_ios(nvmeq, true); nvme_resubmit_bios(nvmeq); spin_unlock_irq(&nvmeq->q_lock); } @@ -1339,6 +1363,9 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid, ns->disk = disk; lbaf = id->flbas & 0xf; ns->lba_shift = id->lbaf[lbaf].ds; + blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); + if (dev->max_hw_sectors) + blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); disk->major = nvme_major; disk->minors = NVME_MINORS; @@ -1383,7 +1410,7 @@ static int set_queue_count(struct nvme_dev *dev, int count) static int __devinit nvme_setup_io_queues(struct nvme_dev *dev) { - int result, cpu, i, nr_io_queues, db_bar_size; + int result, cpu, i, nr_io_queues, db_bar_size, q_depth; nr_io_queues = num_online_cpus(); result = set_queue_count(dev, nr_io_queues); @@ -1429,9 +1456,10 @@ static int __devinit nvme_setup_io_queues(struct nvme_dev *dev) cpu = cpumask_next(cpu, cpu_online_mask); } + q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1, + NVME_Q_DEPTH); for (i = 0; i < nr_io_queues; i++) { - dev->queues[i + 1] = nvme_create_queue(dev, i + 1, - NVME_Q_DEPTH, i); + dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i); if (IS_ERR(dev->queues[i + 1])) return PTR_ERR(dev->queues[i + 1]); dev->queue_count++; @@ -1480,6 +1508,10 @@ static int __devinit nvme_dev_add(struct nvme_dev *dev) memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); + if (ctrl->mdts) { + int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; + dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9); + } id_ns = mem; for (i = 1; i <= nn; i++) { @@ -1523,8 +1555,6 @@ static int nvme_dev_remove(struct nvme_dev *dev) list_del(&dev->node); spin_unlock(&dev_list_lock); - /* TODO: wait all I/O finished or cancel them */ - list_for_each_entry_safe(ns, next, &dev->namespaces, list) { list_del(&ns->list); del_gendisk(ns->disk); @@ -1560,15 +1590,33 @@ static void nvme_release_prp_pools(struct nvme_dev *dev) dma_pool_destroy(dev->prp_small_pool); } -/* XXX: Use an ida or something to let remove / add work correctly */ -static void nvme_set_instance(struct nvme_dev *dev) +static DEFINE_IDA(nvme_instance_ida); + +static int nvme_set_instance(struct nvme_dev *dev) { - static int instance; - dev->instance = instance++; + int instance, error; + + do { + if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL)) + return -ENODEV; + + spin_lock(&dev_list_lock); + error = ida_get_new(&nvme_instance_ida, &instance); + spin_unlock(&dev_list_lock); + } while (error == -EAGAIN); + + if (error) + return -ENODEV; + + dev->instance = instance; + return 0; } static void nvme_release_instance(struct nvme_dev *dev) { + spin_lock(&dev_list_lock); + ida_remove(&nvme_instance_ida, dev->instance); + spin_unlock(&dev_list_lock); } static int __devinit nvme_probe(struct pci_dev *pdev, @@ -1601,7 +1649,10 @@ static int __devinit nvme_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); - nvme_set_instance(dev); + result = nvme_set_instance(dev); + if (result) + goto disable; + dev->entry[0].vector = pdev->irq; result = nvme_setup_prp_pools(dev); @@ -1704,15 +1755,17 @@ static struct pci_driver nvme_driver = { static int __init nvme_init(void) { - int result = -EBUSY; + int result; nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); if (IS_ERR(nvme_thread)) return PTR_ERR(nvme_thread); - nvme_major = register_blkdev(nvme_major, "nvme"); - if (nvme_major <= 0) + result = register_blkdev(nvme_major, "nvme"); + if (result < 0) goto kill_kthread; + else if (result > 0) + nvme_major = result; result = pci_register_driver(&nvme_driver); if (result) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 9917943a357..54a55f03115 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -246,13 +246,12 @@ static int rbd_open(struct block_device *bdev, fmode_t mode) { struct rbd_device *rbd_dev = bdev->bd_disk->private_data; - rbd_get_dev(rbd_dev); - - set_device_ro(bdev, rbd_dev->read_only); - if ((mode & FMODE_WRITE) && rbd_dev->read_only) return -EROFS; + rbd_get_dev(rbd_dev); + set_device_ro(bdev, rbd_dev->read_only); + return 0; } diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 73f196ca713..c6decb901e5 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -337,7 +337,7 @@ static void xen_blkbk_unmap(struct pending_req *req) invcount++; } - ret = gnttab_unmap_refs(unmap, pages, invcount, false); + ret = gnttab_unmap_refs(unmap, NULL, pages, invcount); BUG_ON(ret); } diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile index 5869ea38705..72ce247a0e8 100644 --- a/drivers/clk/Makefile +++ b/drivers/clk/Makefile @@ -1,4 +1,5 @@ # common clock types +obj-$(CONFIG_HAVE_CLK) += clk-devres.o obj-$(CONFIG_CLKDEV_LOOKUP) += clkdev.o obj-$(CONFIG_COMMON_CLK) += clk.o clk-fixed-rate.o clk-gate.o \ clk-mux.o clk-divider.o clk-fixed-factor.o diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c new file mode 100644 index 00000000000..8f571548870 --- /dev/null +++ b/drivers/clk/clk-devres.c @@ -0,0 +1,55 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/clk.h> +#include <linux/device.h> +#include <linux/export.h> +#include <linux/gfp.h> + +static void devm_clk_release(struct device *dev, void *res) +{ + clk_put(*(struct clk **)res); +} + +struct clk *devm_clk_get(struct device *dev, const char *id) +{ + struct clk **ptr, *clk; + + ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return ERR_PTR(-ENOMEM); + + clk = clk_get(dev, id); + if (!IS_ERR(clk)) { + *ptr = clk; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return clk; +} +EXPORT_SYMBOL(devm_clk_get); + +static int devm_clk_match(struct device *dev, void *res, void *data) +{ + struct clk **c = res; + if (!c || !*c) { + WARN_ON(!c || !*c); + return 0; + } + return *c == data; +} + +void devm_clk_put(struct device *dev, struct clk *clk) +{ + int ret; + + ret = devres_release(dev, devm_clk_release, devm_clk_match, clk); + + WARN_ON(ret); +} +EXPORT_SYMBOL(devm_clk_put); diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c index d423c9bdd71..442a3136387 100644 --- a/drivers/clk/clkdev.c +++ b/drivers/clk/clkdev.c @@ -171,51 +171,6 @@ void clk_put(struct clk *clk) } EXPORT_SYMBOL(clk_put); -static void devm_clk_release(struct device *dev, void *res) -{ - clk_put(*(struct clk **)res); -} - -struct clk *devm_clk_get(struct device *dev, const char *id) -{ - struct clk **ptr, *clk; - - ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL); - if (!ptr) - return ERR_PTR(-ENOMEM); - - clk = clk_get(dev, id); - if (!IS_ERR(clk)) { - *ptr = clk; - devres_add(dev, ptr); - } else { - devres_free(ptr); - } - - return clk; -} -EXPORT_SYMBOL(devm_clk_get); - -static int devm_clk_match(struct device *dev, void *res, void *data) -{ - struct clk **c = res; - if (!c || !*c) { - WARN_ON(!c || !*c); - return 0; - } - return *c == data; -} - -void devm_clk_put(struct device *dev, struct clk *clk) -{ - int ret; - - ret = devres_destroy(dev, devm_clk_release, devm_clk_match, clk); - - WARN_ON(ret); -} -EXPORT_SYMBOL(devm_clk_put); - void clkdev_add(struct clk_lookup *cl) { mutex_lock(&clocks_mutex); diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index c0e816468e3..1a40935c85f 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -35,7 +35,6 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/cpumask.h> -#include <linux/sched.h> /* for current / set_cpus_allowed() */ #include <linux/io.h> #include <linux/delay.h> @@ -1139,16 +1138,23 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, return res; } -/* Driver entry point to switch to the target frequency */ -static int powernowk8_target(struct cpufreq_policy *pol, - unsigned targfreq, unsigned relation) +struct powernowk8_target_arg { + struct cpufreq_policy *pol; + unsigned targfreq; + unsigned relation; +}; + +static long powernowk8_target_fn(void *arg) { - cpumask_var_t oldmask; + struct powernowk8_target_arg *pta = arg; + struct cpufreq_policy *pol = pta->pol; + unsigned targfreq = pta->targfreq; + unsigned relation = pta->relation; struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); u32 checkfid; u32 checkvid; unsigned int newstate; - int ret = -EIO; + int ret; if (!data) return -EINVAL; @@ -1156,29 +1162,16 @@ static int powernowk8_target(struct cpufreq_policy *pol, checkfid = data->currfid; checkvid = data->currvid; - /* only run on specific CPU from here on. */ - /* This is poor form: use a workqueue or smp_call_function_single */ - if (!alloc_cpumask_var(&oldmask, GFP_KERNEL)) - return -ENOMEM; - - cpumask_copy(oldmask, tsk_cpus_allowed(current)); - set_cpus_allowed_ptr(current, cpumask_of(pol->cpu)); - - if (smp_processor_id() != pol->cpu) { - printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); - goto err_out; - } - if (pending_bit_stuck()) { printk(KERN_ERR PFX "failing targ, change pending bit set\n"); - goto err_out; + return -EIO; } pr_debug("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n", pol->cpu, targfreq, pol->min, pol->max, relation); if (query_current_values_with_pending_wait(data)) - goto err_out; + return -EIO; if (cpu_family != CPU_HW_PSTATE) { pr_debug("targ: curr fid 0x%x, vid 0x%x\n", @@ -1196,7 +1189,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate)) - goto err_out; + return -EIO; mutex_lock(&fidvid_mutex); @@ -1209,9 +1202,8 @@ static int powernowk8_target(struct cpufreq_policy *pol, ret = transition_frequency_fidvid(data, newstate); if (ret) { printk(KERN_ERR PFX "transition frequency failed\n"); - ret = 1; mutex_unlock(&fidvid_mutex); - goto err_out; + return 1; } mutex_unlock(&fidvid_mutex); @@ -1220,12 +1212,25 @@ static int powernowk8_target(struct cpufreq_policy *pol, data->powernow_table[newstate].index); else pol->cur = find_khz_freq_from_fid(data->currfid); - ret = 0; -err_out: - set_cpus_allowed_ptr(current, oldmask); - free_cpumask_var(oldmask); - return ret; + return 0; +} + +/* Driver entry point to switch to the target frequency */ +static int powernowk8_target(struct cpufreq_policy *pol, + unsigned targfreq, unsigned relation) +{ + struct powernowk8_target_arg pta = { .pol = pol, .targfreq = targfreq, + .relation = relation }; + + /* + * Must run on @pol->cpu. cpufreq core is responsible for ensuring + * that we're bound to the current CPU and pol->cpu stays online. + */ + if (smp_processor_id() == pol->cpu) + return powernowk8_target_fn(&pta); + else + return work_on_cpu(pol->cpu, powernowk8_target_fn, &pta); } /* Driver entry point to verify the policy and range of frequencies */ diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 3934fcc4e00..17d6958342e 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -168,9 +168,9 @@ static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc) } /** - * atc_desc_chain - build chain adding a descripor - * @first: address of first descripor of the chain - * @prev: address of previous descripor of the chain + * atc_desc_chain - build chain adding a descriptor + * @first: address of first descriptor of the chain + * @prev: address of previous descriptor of the chain * @desc: descriptor to queue * * Called from prep_* functions @@ -661,7 +661,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, flags); if (unlikely(!atslave || !sg_len)) { - dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n"); + dev_dbg(chan2dev(chan), "prep_slave_sg: sg length is zero!\n"); return NULL; } @@ -689,6 +689,11 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, mem = sg_dma_address(sg); len = sg_dma_len(sg); + if (unlikely(!len)) { + dev_dbg(chan2dev(chan), + "prep_slave_sg: sg(%d) data length is zero\n", i); + goto err; + } mem_width = 2; if (unlikely(mem & 3 || len & 3)) mem_width = 0; @@ -724,6 +729,11 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, mem = sg_dma_address(sg); len = sg_dma_len(sg); + if (unlikely(!len)) { + dev_dbg(chan2dev(chan), + "prep_slave_sg: sg(%d) data length is zero\n", i); + goto err; + } mem_width = 2; if (unlikely(mem & 3 || len & 3)) mem_width = 0; @@ -757,6 +767,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, err_desc_get: dev_err(chan2dev(chan), "not enough descriptors available\n"); +err: atc_desc_put(atchan, first); return NULL; } @@ -785,7 +796,7 @@ err_out: } /** - * atc_dma_cyclic_fill_desc - Fill one period decriptor + * atc_dma_cyclic_fill_desc - Fill one period descriptor */ static int atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc, diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c index c64917ec313..bb02fd981af 100644 --- a/drivers/dma/ep93xx_dma.c +++ b/drivers/dma/ep93xx_dma.c @@ -1118,7 +1118,7 @@ fail: * @chan: channel * @dma_addr: DMA mapped address of the buffer * @buf_len: length of the buffer (in bytes) - * @period_len: lenght of a single period + * @period_len: length of a single period * @dir: direction of the operation * @context: operation context (ignored) * diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index 8f84761f98b..094437b9d82 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -1015,7 +1015,7 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data) /* * Programming Error * The DMA_INTERRUPT async_tx is a NULL transfer, which will - * triger a PE interrupt. + * trigger a PE interrupt. */ if (stat & FSL_DMA_SR_PE) { chan_dbg(chan, "irq: Programming Error INT\n"); diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index 5084975d793..54f580bb993 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -572,8 +572,8 @@ static void imxdma_tasklet(unsigned long data) if (desc->desc.callback) desc->desc.callback(desc->desc.callback_param); - /* If we are dealing with a cyclic descriptor keep it on ld_active - * and dont mark the descripor as complete. + /* If we are dealing with a cyclic descriptor, keep it on ld_active + * and dont mark the descriptor as complete. * Only in non-cyclic cases it would be marked as complete */ if (imxdma_chan_is_doing_cyclic(imxdmac)) diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c index 222e907bfaa..02b21d7d38e 100644 --- a/drivers/dma/intel_mid_dma.c +++ b/drivers/dma/intel_mid_dma.c @@ -427,7 +427,7 @@ DMA engine callback Functions*/ * intel_mid_dma_tx_submit - callback to submit DMA transaction * @tx: dma engine descriptor * - * Submit the DMA trasaction for this descriptor, start if ch idle + * Submit the DMA transaction for this descriptor, start if ch idle */ static dma_cookie_t intel_mid_dma_tx_submit(struct dma_async_tx_descriptor *tx) { diff --git a/drivers/dma/intel_mid_dma_regs.h b/drivers/dma/intel_mid_dma_regs.h index 1bfa9268fea..17b42192ea5 100644 --- a/drivers/dma/intel_mid_dma_regs.h +++ b/drivers/dma/intel_mid_dma_regs.h @@ -168,9 +168,9 @@ union intel_mid_dma_cfg_hi { * @active_list: current active descriptors * @queue: current queued up descriptors * @free_list: current free descriptors - * @slave: dma slave struture - * @descs_allocated: total number of decsiptors allocated - * @dma: dma device struture pointer + * @slave: dma slave structure + * @descs_allocated: total number of descriptors allocated + * @dma: dma device structure pointer * @busy: bool representing if ch is busy (active txn) or not * @in_use: bool representing if ch is in use or not * @raw_tfr: raw trf interrupt received diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index 60e675455b6..d2ff3fda0b1 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -22,7 +22,6 @@ #define _IOAT_HW_H_ /* PCI Configuration Space Values */ -#define IOAT_PCI_VID 0x8086 #define IOAT_MMIO_BAR 0 /* CB device ID's */ @@ -31,9 +30,6 @@ #define IOAT_PCI_DID_SCNB 0x65FF #define IOAT_PCI_DID_SNB 0x402F -#define IOAT_PCI_RID 0x00 -#define IOAT_PCI_SVID 0x8086 -#define IOAT_PCI_SID 0x8086 #define IOAT_VER_1_2 0x12 /* Version 1.2 */ #define IOAT_VER_2_0 0x20 /* Version 2.0 */ #define IOAT_VER_3_0 0x30 /* Version 3.0 */ diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index e4feba6b03c..5d3bbcd279b 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -522,7 +522,7 @@ enum desc_status { /* In the DMAC pool */ FREE, /* - * Allocted to some channel during prep_xxx + * Allocated to some channel during prep_xxx * Also may be sitting on the work_list. */ PREP, @@ -1567,17 +1567,19 @@ static int pl330_submit_req(void *ch_id, struct pl330_req *r) goto xfer_exit; } - /* Prefer Secure Channel */ - if (!_manager_ns(thrd)) - r->cfg->nonsecure = 0; - else - r->cfg->nonsecure = 1; /* Use last settings, if not provided */ - if (r->cfg) + if (r->cfg) { + /* Prefer Secure Channel */ + if (!_manager_ns(thrd)) + r->cfg->nonsecure = 0; + else + r->cfg->nonsecure = 1; + ccr = _prepare_ccr(r->cfg); - else + } else { ccr = readl(regs + CC(thrd->id)); + } /* If this req doesn't have valid xfer settings */ if (!_is_valid(ccr)) { @@ -2928,6 +2930,11 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) num_chan = max_t(int, pi->pcfg.num_peri, pi->pcfg.num_chan); pdmac->peripherals = kzalloc(num_chan * sizeof(*pch), GFP_KERNEL); + if (!pdmac->peripherals) { + ret = -ENOMEM; + dev_err(&adev->dev, "unable to allocate pdmac->peripherals\n"); + goto probe_err5; + } for (i = 0; i < num_chan; i++) { pch = &pdmac->peripherals[i]; diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c index ced98826684..f72348d0bc4 100644 --- a/drivers/dma/ppc4xx/adma.c +++ b/drivers/dma/ppc4xx/adma.c @@ -4446,7 +4446,7 @@ static int __devinit ppc440spe_adma_probe(struct platform_device *ofdev) ret = -ENOMEM; goto err_dma_alloc; } - dev_dbg(&ofdev->dev, "allocted descriptor pool virt 0x%p phys 0x%llx\n", + dev_dbg(&ofdev->dev, "allocated descriptor pool virt 0x%p phys 0x%llx\n", adev->dma_desc_pool_virt, (u64)adev->dma_desc_pool); regs = ioremap(res.start, resource_size(&res)); diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h index 51e8e5396e9..6d47373f3f5 100644 --- a/drivers/dma/ste_dma40_ll.h +++ b/drivers/dma/ste_dma40_ll.h @@ -202,7 +202,7 @@ /* LLI related structures */ /** - * struct d40_phy_lli - The basic configration register for each physical + * struct d40_phy_lli - The basic configuration register for each physical * channel. * * @reg_cfg: The configuration register. diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 616d90bcb3a..d5dc9da7f99 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -199,6 +199,36 @@ void *edac_align_ptr(void **p, unsigned size, int n_elems) return (void *)(((unsigned long)ptr) + align - r); } +static void _edac_mc_free(struct mem_ctl_info *mci) +{ + int i, chn, row; + struct csrow_info *csr; + const unsigned int tot_dimms = mci->tot_dimms; + const unsigned int tot_channels = mci->num_cschannel; + const unsigned int tot_csrows = mci->nr_csrows; + + if (mci->dimms) { + for (i = 0; i < tot_dimms; i++) + kfree(mci->dimms[i]); + kfree(mci->dimms); + } + if (mci->csrows) { + for (row = 0; row < tot_csrows; row++) { + csr = mci->csrows[row]; + if (csr) { + if (csr->channels) { + for (chn = 0; chn < tot_channels; chn++) + kfree(csr->channels[chn]); + kfree(csr->channels); + } + kfree(csr); + } + } + kfree(mci->csrows); + } + kfree(mci); +} + /** * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure * @mc_num: Memory controller number @@ -413,24 +443,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, return mci; error: - if (mci->dimms) { - for (i = 0; i < tot_dimms; i++) - kfree(mci->dimms[i]); - kfree(mci->dimms); - } - if (mci->csrows) { - for (chn = 0; chn < tot_channels; chn++) { - csr = mci->csrows[chn]; - if (csr) { - for (chn = 0; chn < tot_channels; chn++) - kfree(csr->channels[chn]); - kfree(csr); - } - kfree(mci->csrows[i]); - } - kfree(mci->csrows); - } - kfree(mci); + _edac_mc_free(mci); return NULL; } @@ -445,6 +458,14 @@ void edac_mc_free(struct mem_ctl_info *mci) { edac_dbg(1, "\n"); + /* If we're not yet registered with sysfs free only what was allocated + * in edac_mc_alloc(). + */ + if (!device_is_registered(&mci->dev)) { + _edac_mc_free(mci); + return; + } + /* the mci instance is freed here, when the sysfs object is dropped */ edac_unregister_sysfs(mci); } diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index 47180a08eda..b6653a6fc5d 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c @@ -391,7 +391,7 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx) for (j = 0; j < nr_channels; j++) { struct dimm_info *dimm = csrow->channels[j]->dimm; - dimm->nr_pages = nr_pages / nr_channels; + dimm->nr_pages = nr_pages; dimm->grain = nr_pages << PAGE_SHIFT; dimm->mtype = MEM_DDR2; dimm->dtype = DEV_UNKNOWN; diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index 39c63757c2a..6a49dd00b81 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -1012,6 +1012,10 @@ static void handle_channel(struct i5000_pvt *pvt, int slot, int channel, /* add the number of COLUMN bits */ addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr); + /* Dual-rank memories have twice the size */ + if (dinfo->dual_rank) + addrBits++; + addrBits += 6; /* add 64 bits per DIMM */ addrBits -= 20; /* divide by 2^^20 */ addrBits -= 3; /* 8 bits per bytes */ diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index f3b1f9fafa4..5715b7c2c51 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -513,7 +513,8 @@ static int get_dimm_config(struct mem_ctl_info *mci) { struct sbridge_pvt *pvt = mci->pvt_info; struct dimm_info *dimm; - int i, j, banks, ranks, rows, cols, size, npages; + unsigned i, j, banks, ranks, rows, cols, npages; + u64 size; u32 reg; enum edac_type mode; enum mem_type mtype; @@ -585,10 +586,10 @@ static int get_dimm_config(struct mem_ctl_info *mci) cols = numcol(mtr); /* DDR3 has 8 I/O banks */ - size = (rows * cols * banks * ranks) >> (20 - 3); + size = ((u64)rows * cols * banks * ranks) >> (20 - 3); npages = MiB_TO_PAGES(size); - edac_dbg(0, "mc#%d: channel %d, dimm %d, %d Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", + edac_dbg(0, "mc#%d: channel %d, dimm %d, %Ld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", pvt->sbridge_dev->mc, i, j, size, npages, banks, ranks, rows, cols); diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c index 427a289f32a..6c19833ed2d 100644 --- a/drivers/extcon/extcon-arizona.c +++ b/drivers/extcon/extcon-arizona.c @@ -434,6 +434,11 @@ static int __devinit arizona_extcon_probe(struct platform_device *pdev) regmap_update_bits(arizona->regmap, ARIZONA_JACK_DETECT_ANALOGUE, ARIZONA_JD1_ENA, ARIZONA_JD1_ENA); + ret = regulator_allow_bypass(info->micvdd, true); + if (ret != 0) + dev_warn(arizona->dev, "Failed to set MICVDD to bypass: %d\n", + ret); + pm_runtime_put(&pdev->dev); return 0; diff --git a/drivers/gpio/gpio-lpc32xx.c b/drivers/gpio/gpio-lpc32xx.c index 8a420f13905..ed94b4ea72e 100644 --- a/drivers/gpio/gpio-lpc32xx.c +++ b/drivers/gpio/gpio-lpc32xx.c @@ -308,6 +308,7 @@ static int lpc32xx_gpio_dir_output_p012(struct gpio_chip *chip, unsigned pin, { struct lpc32xx_gpio_chip *group = to_lpc32xx_gpio(chip); + __set_gpio_level_p012(group, pin, value); __set_gpio_dir_p012(group, pin, 0); return 0; @@ -318,6 +319,7 @@ static int lpc32xx_gpio_dir_output_p3(struct gpio_chip *chip, unsigned pin, { struct lpc32xx_gpio_chip *group = to_lpc32xx_gpio(chip); + __set_gpio_level_p3(group, pin, value); __set_gpio_dir_p3(group, pin, 0); return 0; @@ -326,6 +328,9 @@ static int lpc32xx_gpio_dir_output_p3(struct gpio_chip *chip, unsigned pin, static int lpc32xx_gpio_dir_out_always(struct gpio_chip *chip, unsigned pin, int value) { + struct lpc32xx_gpio_chip *group = to_lpc32xx_gpio(chip); + + __set_gpo_level_p3(group, pin, value); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 489e2b162b2..274d25de521 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3242,7 +3242,8 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, { int ret; - BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT); + if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) + return -EBUSY; if (obj->gtt_space != NULL) { if ((alignment && obj->gtt_offset & (alignment - 1)) || diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index bc2ad348e5d..c040aee1341 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4191,12 +4191,6 @@ static void i8xx_update_pll(struct drm_crtc *crtc, POSTING_READ(DPLL(pipe)); udelay(150); - I915_WRITE(DPLL(pipe), dpll); - - /* Wait for the clocks to stabilize. */ - POSTING_READ(DPLL(pipe)); - udelay(150); - /* The LVDS pin pair needs to be on before the DPLLs are enabled. * This is an exception to the general rule that mode_set doesn't turn * things on. @@ -4204,6 +4198,12 @@ static void i8xx_update_pll(struct drm_crtc *crtc, if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) intel_update_lvds(crtc, clock, adjusted_mode); + I915_WRITE(DPLL(pipe), dpll); + + /* Wait for the clocks to stabilize. */ + POSTING_READ(DPLL(pipe)); + udelay(150); + /* The pixel multiplier can only be updated once the * DPLL is enabled and the clocks are stable. * diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 98f602427eb..12dc3308ab8 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -609,7 +609,7 @@ static void intel_hdmi_dpms(struct drm_encoder *encoder, int mode) u32 temp; u32 enable_bits = SDVO_ENABLE; - if (intel_hdmi->has_audio) + if (intel_hdmi->has_audio || mode != DRM_MODE_DPMS_ON) enable_bits |= SDVO_AUDIO_ENABLE; temp = I915_READ(intel_hdmi->sdvox_reg); diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index ff23d88880e..3ca240b4413 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -179,7 +179,7 @@ nouveau_abi16_ioctl_grobj_alloc(ABI16_IOCTL_ARGS) return 0; } else if (init->class == 0x906e) { - NV_ERROR(dev, "906e not supported yet\n"); + NV_DEBUG(dev, "906e not supported yet\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/nouveau/nv50_gpio.c b/drivers/gpu/drm/nouveau/nv50_gpio.c index f0349053489..c399d510b27 100644 --- a/drivers/gpu/drm/nouveau/nv50_gpio.c +++ b/drivers/gpu/drm/nouveau/nv50_gpio.c @@ -22,6 +22,7 @@ * Authors: Ben Skeggs */ +#include <linux/dmi.h> #include "drmP.h" #include "nouveau_drv.h" #include "nouveau_hw.h" @@ -110,13 +111,25 @@ nv50_gpio_isr(struct drm_device *dev) nv_wr32(dev, 0xe074, intr1); } +static struct dmi_system_id gpio_reset_ids[] = { + { + .ident = "Apple Macbook 10,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro10,1"), + } + }, + { } +}; + int nv50_gpio_init(struct drm_device *dev) { struct drm_nouveau_private *dev_priv = dev->dev_private; /* initialise gpios and routing to vbios defaults */ - nouveau_gpio_reset(dev); + if (dmi_check_system(gpio_reset_ids)) + nouveau_gpio_reset(dev); /* disable, and ack any pending gpio interrupts */ nv_wr32(dev, 0xe050, 0x00000000); diff --git a/drivers/gpu/drm/nouveau/nvc0_fb.c b/drivers/gpu/drm/nouveau/nvc0_fb.c index f704e942372..f376c39310d 100644 --- a/drivers/gpu/drm/nouveau/nvc0_fb.c +++ b/drivers/gpu/drm/nouveau/nvc0_fb.c @@ -124,6 +124,7 @@ nvc0_fb_init(struct drm_device *dev) priv = dev_priv->engine.fb.priv; nv_wr32(dev, 0x100c10, priv->r100c10 >> 8); + nv_mask(dev, 0x17e820, 0x00100000, 0x00000000); /* NV_PLTCG_INTR_EN */ return 0; } diff --git a/drivers/gpu/drm/nouveau/nvc0_fifo.c b/drivers/gpu/drm/nouveau/nvc0_fifo.c index 7d85553d518..cd39eb99f5b 100644 --- a/drivers/gpu/drm/nouveau/nvc0_fifo.c +++ b/drivers/gpu/drm/nouveau/nvc0_fifo.c @@ -373,7 +373,8 @@ nvc0_fifo_isr_subfifo_intr(struct drm_device *dev, int unit) static void nvc0_fifo_isr(struct drm_device *dev) { - u32 stat = nv_rd32(dev, 0x002100); + u32 mask = nv_rd32(dev, 0x002140); + u32 stat = nv_rd32(dev, 0x002100) & mask; if (stat & 0x00000100) { NV_INFO(dev, "PFIFO: unknown status 0x00000100\n"); diff --git a/drivers/gpu/drm/nouveau/nve0_fifo.c b/drivers/gpu/drm/nouveau/nve0_fifo.c index e98d144e6eb..281bece751b 100644 --- a/drivers/gpu/drm/nouveau/nve0_fifo.c +++ b/drivers/gpu/drm/nouveau/nve0_fifo.c @@ -345,7 +345,8 @@ nve0_fifo_isr_subfifo_intr(struct drm_device *dev, int unit) static void nve0_fifo_isr(struct drm_device *dev) { - u32 stat = nv_rd32(dev, 0x002100); + u32 mask = nv_rd32(dev, 0x002140); + u32 stat = nv_rd32(dev, 0x002100) & mask; if (stat & 0x00000100) { NV_INFO(dev, "PFIFO: unknown status 0x00000100\n"); diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index e721e3087b9..2817101fb16 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1479,98 +1479,14 @@ static void radeon_legacy_atom_fixup(struct drm_crtc *crtc) } } -/** - * radeon_get_pll_use_mask - look up a mask of which pplls are in use - * - * @crtc: drm crtc - * - * Returns the mask of which PPLLs (Pixel PLLs) are in use. - */ -static u32 radeon_get_pll_use_mask(struct drm_crtc *crtc) -{ - struct drm_device *dev = crtc->dev; - struct drm_crtc *test_crtc; - struct radeon_crtc *radeon_test_crtc; - u32 pll_in_use = 0; - - list_for_each_entry(test_crtc, &dev->mode_config.crtc_list, head) { - if (crtc == test_crtc) - continue; - - radeon_test_crtc = to_radeon_crtc(test_crtc); - if (radeon_test_crtc->pll_id != ATOM_PPLL_INVALID) - pll_in_use |= (1 << radeon_test_crtc->pll_id); - } - return pll_in_use; -} - -/** - * radeon_get_shared_dp_ppll - return the PPLL used by another crtc for DP - * - * @crtc: drm crtc - * - * Returns the PPLL (Pixel PLL) used by another crtc/encoder which is - * also in DP mode. For DP, a single PPLL can be used for all DP - * crtcs/encoders. - */ -static int radeon_get_shared_dp_ppll(struct drm_crtc *crtc) -{ - struct drm_device *dev = crtc->dev; - struct drm_encoder *test_encoder; - struct radeon_crtc *radeon_test_crtc; - - list_for_each_entry(test_encoder, &dev->mode_config.encoder_list, head) { - if (test_encoder->crtc && (test_encoder->crtc != crtc)) { - if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_encoder))) { - /* for DP use the same PLL for all */ - radeon_test_crtc = to_radeon_crtc(test_encoder->crtc); - if (radeon_test_crtc->pll_id != ATOM_PPLL_INVALID) - return radeon_test_crtc->pll_id; - } - } - } - return ATOM_PPLL_INVALID; -} - -/** - * radeon_atom_pick_pll - Allocate a PPLL for use by the crtc. - * - * @crtc: drm crtc - * - * Returns the PPLL (Pixel PLL) to be used by the crtc. For DP monitors - * a single PPLL can be used for all DP crtcs/encoders. For non-DP - * monitors a dedicated PPLL must be used. If a particular board has - * an external DP PLL, return ATOM_PPLL_INVALID to skip PLL programming - * as there is no need to program the PLL itself. If we are not able to - * allocate a PLL, return ATOM_PPLL_INVALID to skip PLL programming to - * avoid messing up an existing monitor. - * - * Asic specific PLL information - * - * DCE 6.1 - * - PPLL2 is only available to UNIPHYA (both DP and non-DP) - * - PPLL0, PPLL1 are available for UNIPHYB/C/D/E/F (both DP and non-DP) - * - * DCE 6.0 - * - PPLL0 is available to all UNIPHY (DP only) - * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC - * - * DCE 5.0 - * - DCPLL is available to all UNIPHY (DP only) - * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC - * - * DCE 3.0/4.0/4.1 - * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC - * - */ static int radeon_atom_pick_pll(struct drm_crtc *crtc) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct drm_device *dev = crtc->dev; struct radeon_device *rdev = dev->dev_private; struct drm_encoder *test_encoder; - u32 pll_in_use; - int pll; + struct drm_crtc *test_crtc; + uint32_t pll_in_use = 0; if (ASIC_IS_DCE61(rdev)) { list_for_each_entry(test_encoder, &dev->mode_config.encoder_list, head) { @@ -1582,40 +1498,32 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc) if ((test_radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_UNIPHY) && - (dig->linkb == false)) - /* UNIPHY A uses PPLL2 */ + (dig->linkb == false)) /* UNIPHY A uses PPLL2 */ return ATOM_PPLL2; - else if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_encoder))) { - /* UNIPHY B/C/D/E/F */ - if (rdev->clock.dp_extclk) - /* skip PPLL programming if using ext clock */ - return ATOM_PPLL_INVALID; - else { - /* use the same PPLL for all DP monitors */ - pll = radeon_get_shared_dp_ppll(crtc); - if (pll != ATOM_PPLL_INVALID) - return pll; - } - } - break; } } /* UNIPHY B/C/D/E/F */ - pll_in_use = radeon_get_pll_use_mask(crtc); - if (!(pll_in_use & (1 << ATOM_PPLL0))) + list_for_each_entry(test_crtc, &dev->mode_config.crtc_list, head) { + struct radeon_crtc *radeon_test_crtc; + + if (crtc == test_crtc) + continue; + + radeon_test_crtc = to_radeon_crtc(test_crtc); + if ((radeon_test_crtc->pll_id == ATOM_PPLL0) || + (radeon_test_crtc->pll_id == ATOM_PPLL1)) + pll_in_use |= (1 << radeon_test_crtc->pll_id); + } + if (!(pll_in_use & 4)) return ATOM_PPLL0; - if (!(pll_in_use & (1 << ATOM_PPLL1))) - return ATOM_PPLL1; - DRM_ERROR("unable to allocate a PPLL\n"); - return ATOM_PPLL_INVALID; + return ATOM_PPLL1; } else if (ASIC_IS_DCE4(rdev)) { list_for_each_entry(test_encoder, &dev->mode_config.encoder_list, head) { if (test_encoder->crtc && (test_encoder->crtc == crtc)) { /* in DP mode, the DP ref clock can come from PPLL, DCPLL, or ext clock, * depending on the asic: * DCE4: PPLL or ext clock - * DCE5: PPLL, DCPLL, or ext clock - * DCE6: PPLL, PPLL0, or ext clock + * DCE5: DCPLL or ext clock * * Setting ATOM_PPLL_INVALID will cause SetPixelClock to skip * PPLL/DCPLL programming and only program the DP DTO for the @@ -1623,34 +1531,31 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc) */ if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_encoder))) { if (rdev->clock.dp_extclk) - /* skip PPLL programming if using ext clock */ return ATOM_PPLL_INVALID; else if (ASIC_IS_DCE6(rdev)) - /* use PPLL0 for all DP */ return ATOM_PPLL0; else if (ASIC_IS_DCE5(rdev)) - /* use DCPLL for all DP */ return ATOM_DCPLL; - else { - /* use the same PPLL for all DP monitors */ - pll = radeon_get_shared_dp_ppll(crtc); - if (pll != ATOM_PPLL_INVALID) - return pll; - } } - break; } } - /* all other cases */ - pll_in_use = radeon_get_pll_use_mask(crtc); - if (!(pll_in_use & (1 << ATOM_PPLL2))) - return ATOM_PPLL2; - if (!(pll_in_use & (1 << ATOM_PPLL1))) + + /* otherwise, pick one of the plls */ + list_for_each_entry(test_crtc, &dev->mode_config.crtc_list, head) { + struct radeon_crtc *radeon_test_crtc; + + if (crtc == test_crtc) + continue; + + radeon_test_crtc = to_radeon_crtc(test_crtc); + if ((radeon_test_crtc->pll_id >= ATOM_PPLL1) && + (radeon_test_crtc->pll_id <= ATOM_PPLL2)) + pll_in_use |= (1 << radeon_test_crtc->pll_id); + } + if (!(pll_in_use & 1)) return ATOM_PPLL1; - DRM_ERROR("unable to allocate a PPLL\n"); - return ATOM_PPLL_INVALID; + return ATOM_PPLL2; } else - /* use PPLL1 or PPLL2 */ return radeon_crtc->crtc_id; } @@ -1792,7 +1697,7 @@ static void atombios_crtc_disable(struct drm_crtc *crtc) break; } done: - radeon_crtc->pll_id = ATOM_PPLL_INVALID; + radeon_crtc->pll_id = -1; } static const struct drm_crtc_helper_funcs atombios_helper_funcs = { @@ -1841,6 +1746,6 @@ void radeon_atombios_init_crtc(struct drm_device *dev, else radeon_crtc->crtc_offset = 0; } - radeon_crtc->pll_id = ATOM_PPLL_INVALID; + radeon_crtc->pll_id = -1; drm_crtc_helper_add(&radeon_crtc->base, &atombios_helper_funcs); } diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 8acb34fd3fd..8d7e33a0b24 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -1182,7 +1182,8 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) ring->ready = true; radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); - if (radeon_ring_supports_scratch_reg(rdev, ring)) { + if (!ring->rptr_save_reg /* not resuming from suspend */ + && radeon_ring_supports_scratch_reg(rdev, ring)) { r = radeon_scratch_get(rdev, &ring->rptr_save_reg); if (r) { DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r); diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c index ba055e9ca00..8d9dc44f1f9 100644 --- a/drivers/gpu/drm/udl/udl_connector.c +++ b/drivers/gpu/drm/udl/udl_connector.c @@ -69,6 +69,13 @@ static int udl_get_modes(struct drm_connector *connector) static int udl_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct udl_device *udl = connector->dev->dev_private; + if (!udl->sku_pixel_limit) + return 0; + + if (mode->vdisplay * mode->hdisplay > udl->sku_pixel_limit) + return MODE_VIRTUAL_Y; + return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index f2fb8f15e2f..7e0743358df 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -1018,7 +1018,7 @@ int vmw_event_fence_action_create(struct drm_file *file_priv, } - event = kzalloc(sizeof(event->event), GFP_KERNEL); + event = kzalloc(sizeof(*event), GFP_KERNEL); if (unlikely(event == NULL)) { DRM_ERROR("Failed to allocate an event.\n"); ret = -ENOMEM; diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index fbf49503508..2af774ad106 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -307,7 +307,6 @@ config HID_LOGITECH config HID_LOGITECH_DJ tristate "Logitech Unifying receivers full support" depends on HID_LOGITECH - default m ---help--- Say Y if you want support for Logitech Unifying receivers and devices. Unifying receivers are capable of pairing up to 6 Logitech compliant @@ -527,6 +526,14 @@ config HID_PICOLCD_LEDS ---help--- Provide access to PicoLCD's GPO pins via leds class. +config HID_PICOLCD_CIR + bool "CIR via RC class" if EXPERT + default !EXPERT + depends on HID_PICOLCD + depends on HID_PICOLCD=RC_CORE || RC_CORE=y + ---help--- + Provide access to PicoLCD's CIR interface via remote control (LIRC). + config HID_PRIMAX tristate "Primax non-fully HID-compliant devices" depends on USB_HID @@ -534,6 +541,15 @@ config HID_PRIMAX Support for Primax devices that are not fully compliant with the HID standard. +config HID_PS3REMOTE + tristate "Sony PS3 BD Remote Control" + depends on BT_HIDP + ---help--- + Support for the Sony PS3 Blue-ray Disk Remote Control and Logitech + Harmony Adapter for PS3, which connect over Bluetooth. + + Support for the 6-axis controllers is provided by HID_SONY. + config HID_ROCCAT tristate "Roccat device support" depends on USB_HID @@ -561,7 +577,9 @@ config HID_SONY tristate "Sony PS3 controller" depends on USB_HID ---help--- - Support for Sony PS3 controller. + Support for Sony PS3 6-axis controllers. + + Support for the Sony PS3 BD Remote is provided by HID_PS3REMOTE. config HID_SPEEDLINK tristate "Speedlink VAD Cezanne mouse support" diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile index f975485f88b..5a3690ff9bf 100644 --- a/drivers/hid/Makefile +++ b/drivers/hid/Makefile @@ -69,7 +69,28 @@ obj-$(CONFIG_HID_PRODIKEYS) += hid-prodikeys.o obj-$(CONFIG_HID_PANTHERLORD) += hid-pl.o obj-$(CONFIG_HID_PETALYNX) += hid-petalynx.o obj-$(CONFIG_HID_PICOLCD) += hid-picolcd.o +hid-picolcd-y += hid-picolcd_core.o +ifdef CONFIG_HID_PICOLCD_FB +hid-picolcd-y += hid-picolcd_fb.o +endif +ifdef CONFIG_HID_PICOLCD_BACKLIGHT +hid-picolcd-y += hid-picolcd_backlight.o +endif +ifdef CONFIG_HID_PICOLCD_LCD +hid-picolcd-y += hid-picolcd_lcd.o +endif +ifdef CONFIG_HID_PICOLCD_LEDS +hid-picolcd-y += hid-picolcd_leds.o +endif +ifdef CONFIG_HID_PICOLCD_CIR +hid-picolcd-y += hid-picolcd_cir.o +endif +ifdef CONFIG_DEBUG_FS +hid-picolcd-y += hid-picolcd_debugfs.o +endif + obj-$(CONFIG_HID_PRIMAX) += hid-primax.o +obj-$(CONFIG_HID_PS3REMOTE) += hid-ps3remote.o obj-$(CONFIG_HID_ROCCAT) += hid-roccat.o hid-roccat-common.o \ hid-roccat-arvo.o hid-roccat-isku.o hid-roccat-kone.o \ hid-roccat-koneplus.o hid-roccat-kovaplus.o hid-roccat-pyra.o \ diff --git a/drivers/hid/hid-a4tech.c b/drivers/hid/hid-a4tech.c index 902d1dfeb1b..0a239885e67 100644 --- a/drivers/hid/hid-a4tech.c +++ b/drivers/hid/hid-a4tech.c @@ -5,7 +5,6 @@ * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina - * Copyright (c) 2007 Paul Walmsley * Copyright (c) 2008 Jiri Slaby */ diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 585344b6d33..06ebdbb6ea0 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -5,7 +5,6 @@ * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina - * Copyright (c) 2007 Paul Walmsley * Copyright (c) 2008 Jiri Slaby <jirislaby@gmail.com> */ diff --git a/drivers/hid/hid-aureal.c b/drivers/hid/hid-aureal.c index ba64b041b8b..7968187ddf7 100644 --- a/drivers/hid/hid-aureal.c +++ b/drivers/hid/hid-aureal.c @@ -9,7 +9,6 @@ * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina - * Copyright (c) 2007 Paul Walmsley * Copyright (c) 2008 Jiri Slaby */ #include <linux/device.h> diff --git a/drivers/hid/hid-belkin.c b/drivers/hid/hid-belkin.c index a1a765a5b08..a1a5a12c3a6 100644 --- a/drivers/hid/hid-belkin.c +++ b/drivers/hid/hid-belkin.c @@ -5,7 +5,6 @@ * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina - * Copyright (c) 2007 Paul Walmsley * Copyright (c) 2008 Jiri Slaby */ diff --git a/drivers/hid/hid-cherry.c b/drivers/hid/hid-cherry.c index 888ece68a47..af034d3d925 100644 --- a/drivers/hid/hid-cherry.c +++ b/drivers/hid/hid-cherry.c @@ -5,7 +5,6 @@ * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina - * Copyright (c) 2007 Paul Walmsley * Copyright (c) 2008 Jiri Slaby */ diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 8bcd168fffa..2cd6880b6b1 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -126,7 +126,7 @@ static int open_collection(struct hid_parser *parser, unsigned type) if (parser->collection_stack_ptr == HID_COLLECTION_STACK_SIZE) { hid_err(parser->device, "collection stack overflow\n"); - return -1; + return -EINVAL; } if (parser->device->maxcollection == parser->device->collection_size) { @@ -134,7 +134,7 @@ static int open_collection(struct hid_parser *parser, unsigned type) parser->device->collection_size * 2, GFP_KERNEL); if (collection == NULL) { hid_err(parser->device, "failed to reallocate collection array\n"); - return -1; + return -ENOMEM; } memcpy(collection, parser->device->collection, sizeof(struct hid_collection) * @@ -170,7 +170,7 @@ static int close_collection(struct hid_parser *parser) { if (!parser->collection_stack_ptr) { hid_err(parser->device, "collection stack underflow\n"); - return -1; + return -EINVAL; } parser->collection_stack_ptr--; return 0; @@ -374,7 +374,7 @@ static int hid_parser_global(struct hid_parser *parser, struct hid_item *item) case HID_GLOBAL_ITEM_TAG_REPORT_SIZE: parser->global.report_size = item_udata(item); - if (parser->global.report_size > 96) { + if (parser->global.report_size > 128) { hid_err(parser->device, "invalid report_size %d\n", parser->global.report_size); return -1; @@ -757,6 +757,7 @@ int hid_open_report(struct hid_device *device) struct hid_item item; unsigned int size; __u8 *start; + __u8 *buf; __u8 *end; int ret; static int (*dispatch_type[])(struct hid_parser *parser, @@ -775,12 +776,21 @@ int hid_open_report(struct hid_device *device) return -ENODEV; size = device->dev_rsize; + buf = kmemdup(start, size, GFP_KERNEL); + if (buf == NULL) + return -ENOMEM; + if (device->driver->report_fixup) - start = device->driver->report_fixup(device, start, &size); + start = device->driver->report_fixup(device, buf, &size); + else + start = buf; - device->rdesc = kmemdup(start, size, GFP_KERNEL); - if (device->rdesc == NULL) + start = kmemdup(start, size, GFP_KERNEL); + kfree(buf); + if (start == NULL) return -ENOMEM; + + device->rdesc = start; device->rsize = size; parser = vzalloc(sizeof(struct hid_parser)); @@ -1448,7 +1458,14 @@ void hid_disconnect(struct hid_device *hdev) } EXPORT_SYMBOL_GPL(hid_disconnect); -/* a list of devices for which there is a specialized driver on HID bus */ +/* + * A list of devices for which there is a specialized driver on HID bus. + * + * Please note that for multitouch devices (driven by hid-multitouch driver), + * there is a proper autodetection and autoloading in place (based on presence + * of HID_DG_CONTACTID), so those devices don't need to be added to this list, + * as we are doing the right thing in hid_scan_usage(). + */ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_WCP32PU) }, { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_X5_005D) }, @@ -1566,6 +1583,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_S510_RECEIVER) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_S510_RECEIVER_2) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RECEIVER) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_PS3) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_DESKTOP) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_EDGE) }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_MINI) }, @@ -1627,6 +1645,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ORTEK, USB_DEVICE_ID_ORTEK_WKB2000) }, { HID_USB_DEVICE(USB_VENDOR_ID_PETALYNX, USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE) }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_KEYBOARD) }, +#if IS_ENABLED(CONFIG_HID_ROCCAT) { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_KONE) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ARVO) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ISKU) }, @@ -1635,10 +1654,12 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_PYRA_WIRED) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_PYRA_WIRELESS) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_SAVU) }, +#endif { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_PS1000) }, { HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_IR_REMOTE) }, { HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_WIRELESS_KBD_MOUSE) }, { HID_USB_DEVICE(USB_VENDOR_ID_SKYCABLE, USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_BDREMOTE) }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER) }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_NAVIGATION_CONTROLLER) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER) }, @@ -1663,6 +1684,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP8060U) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP1062) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_WIRELESS_TABLET_TWHL850) }, + { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWHA60) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SMARTJOY_PLUS) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SUPER_JOY_BOX_3) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_DUAL_USB_JOYPAD) }, diff --git a/drivers/hid/hid-cypress.c b/drivers/hid/hid-cypress.c index 9e43aaca977..3e159a50dac 100644 --- a/drivers/hid/hid-cypress.c +++ b/drivers/hid/hid-cypress.c @@ -5,7 +5,6 @@ * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina - * Copyright (c) 2007 Paul Walmsley * Copyright (c) 2008 Jiri Slaby */ diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 01dd9a7daf7..933fff0fff1 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -911,15 +911,21 @@ static void hid_dump_input_mapping(struct hid_device *hid, struct seq_file *f) } - static int hid_debug_rdesc_show(struct seq_file *f, void *p) { struct hid_device *hdev = f->private; + const __u8 *rdesc = hdev->rdesc; + unsigned rsize = hdev->rsize; int i; + if (!rdesc) { + rdesc = hdev->dev_rdesc; + rsize = hdev->dev_rsize; + } + /* dump HID report descriptor */ - for (i = 0; i < hdev->rsize; i++) - seq_printf(f, "%02x ", hdev->rdesc[i]); + for (i = 0; i < rsize; i++) + seq_printf(f, "%02x ", rdesc[i]); seq_printf(f, "\n\n"); /* dump parsed data and input mappings */ diff --git a/drivers/hid/hid-ezkey.c b/drivers/hid/hid-ezkey.c index ca1163e9d42..6540af2871a 100644 --- a/drivers/hid/hid-ezkey.c +++ b/drivers/hid/hid-ezkey.c @@ -5,7 +5,6 @@ * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina - * Copyright (c) 2007 Paul Walmsley * Copyright (c) 2008 Jiri Slaby */ diff --git a/drivers/hid/hid-gyration.c b/drivers/hid/hid-gyration.c index e88b951cd10..4442c30ef53 100644 --- a/drivers/hid/hid-gyration.c +++ b/drivers/hid/hid-gyration.c @@ -4,7 +4,6 @@ * Copyright (c) 1999 Andreas Gal * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc - * Copyright (c) 2007 Paul Walmsley * Copyright (c) 2008 Jiri Slaby * Copyright (c) 2006-2008 Jiri Kosina */ diff --git a/drivers/hid/hid-holtekff.c b/drivers/hid/hid-holtekff.c index 4e7542151e2..ff295e60059 100644 --- a/drivers/hid/hid-holtekff.c +++ b/drivers/hid/hid-holtekff.c @@ -100,8 +100,7 @@ static void holtekff_send(struct holtekff_device *holtekff, holtekff->field->value[i] = data[i]; } - dbg_hid("sending %02x %02x %02x %02x %02x %02x %02x\n", data[0], - data[1], data[2], data[3], data[4], data[5], data[6]); + dbg_hid("sending %*ph\n", 7, data); usbhid_submit_report(hid, holtekff->field->report, USB_DIR_OUT); } diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 1dcb76ff51e..ca4d83e6e38 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -5,7 +5,6 @@ * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina - * Copyright (c) 2007 Paul Walmsley */ /* @@ -269,7 +268,11 @@ #define USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_72FA 0x72fa #define USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_7302 0x7302 #define USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_7349 0x7349 +#define USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_73F7 0x73f7 #define USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_A001 0xa001 +#define USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_7224 0x7224 +#define USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_72D0 0x72d0 +#define USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_72C4 0x72c4 #define USB_VENDOR_ID_ELECOM 0x056e #define USB_DEVICE_ID_ELECOM_BM084 0x0061 @@ -283,6 +286,9 @@ #define USB_VENDOR_ID_EMS 0x2006 #define USB_DEVICE_ID_EMS_TRIO_LINKER_PLUS_II 0x0118 +#define USB_VENDOR_ID_FLATFROG 0x25b5 +#define USB_DEVICE_ID_MULTITOUCH_3200 0x0002 + #define USB_VENDOR_ID_ESSENTIAL_REALITY 0x0d7f #define USB_DEVICE_ID_ESSENTIAL_REALITY_P5 0x0100 @@ -296,6 +302,9 @@ #define USB_VENDOR_ID_EZKEY 0x0518 #define USB_DEVICE_ID_BTC_8193 0x0002 +#define USB_VENDOR_ID_FREESCALE 0x15A2 +#define USB_DEVICE_ID_FREESCALE_MX28 0x004F + #define USB_VENDOR_ID_FRUCTEL 0x25B6 #define USB_DEVICE_ID_GAMETEL_MT_MODE 0x0002 @@ -305,6 +314,7 @@ #define USB_VENDOR_ID_GENERAL_TOUCH 0x0dfc #define USB_DEVICE_ID_GENERAL_TOUCH_WIN7_TWOFINGERS 0x0003 +#define USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PWT_TENFINGERS 0x0100 #define USB_VENDOR_ID_GLAB 0x06c2 #define USB_DEVICE_ID_4_PHIDGETSERVO_30 0x0038 @@ -496,6 +506,7 @@ #define USB_DEVICE_ID_LOGITECH_RECEIVER 0xc101 #define USB_DEVICE_ID_LOGITECH_HARMONY_FIRST 0xc110 #define USB_DEVICE_ID_LOGITECH_HARMONY_LAST 0xc14f +#define USB_DEVICE_ID_LOGITECH_HARMONY_PS3 0x0306 #define USB_DEVICE_ID_LOGITECH_RUMBLEPAD_CORD 0xc20a #define USB_DEVICE_ID_LOGITECH_RUMBLEPAD 0xc211 #define USB_DEVICE_ID_LOGITECH_EXTREME_3D 0xc215 @@ -652,7 +663,6 @@ #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH 0x3000 #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3001 0x3001 #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008 0x3008 -#define USB_DEVICE_ID_PIXART_IMAGING_INC_OPTICAL_TOUCH_SCREEN 0x3001 #define USB_VENDOR_ID_ROCCAT 0x1e7d #define USB_DEVICE_ID_ROCCAT_ARVO 0x30d4 @@ -683,6 +693,7 @@ #define USB_VENDOR_ID_SONY 0x054c #define USB_DEVICE_ID_SONY_VAIO_VGX_MOUSE 0x024b +#define USB_DEVICE_ID_SONY_PS3_BDREMOTE 0x0306 #define USB_DEVICE_ID_SONY_PS3_CONTROLLER 0x0268 #define USB_DEVICE_ID_SONY_NAVIGATION_CONTROLLER 0x042f @@ -758,6 +769,7 @@ #define USB_DEVICE_ID_UCLOGIC_TABLET_WP8060U 0x0005 #define USB_DEVICE_ID_UCLOGIC_TABLET_WP1062 0x0064 #define USB_DEVICE_ID_UCLOGIC_WIRELESS_TABLET_TWHL850 0x0522 +#define USB_DEVICE_ID_UCLOGIC_TABLET_TWHA60 0x0781 #define USB_VENDOR_ID_UNITEC 0x227d #define USB_DEVICE_ID_UNITEC_USB_TOUCH_0709 0x0709 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 811bfad6460..d917c0d5368 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1154,6 +1154,7 @@ static void report_features(struct hid_device *hid) int hidinput_connect(struct hid_device *hid, unsigned int force) { + struct hid_driver *drv = hid->driver; struct hid_report *report; struct hid_input *hidinput = NULL; struct input_dev *input_dev; @@ -1228,6 +1229,8 @@ int hidinput_connect(struct hid_device *hid, unsigned int force) * UGCI) cram a lot of unrelated inputs into the * same interface. */ hidinput->report = report; + if (drv->input_configured) + drv->input_configured(hid, hidinput); if (input_register_device(hidinput->input)) goto out_cleanup; hidinput = NULL; @@ -1235,8 +1238,12 @@ int hidinput_connect(struct hid_device *hid, unsigned int force) } } - if (hidinput && input_register_device(hidinput->input)) - goto out_cleanup; + if (hidinput) { + if (drv->input_configured) + drv->input_configured(hid, hidinput); + if (input_register_device(hidinput->input)) + goto out_cleanup; + } return 0; diff --git a/drivers/hid/hid-lcpower.c b/drivers/hid/hid-lcpower.c index c4fe9bd095b..22bc14abdfa 100644 --- a/drivers/hid/hid-lcpower.c +++ b/drivers/hid/hid-lcpower.c @@ -24,7 +24,7 @@ static int ts_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { - if ((usage->hid & HID_USAGE_PAGE) != 0x0ffbc0000) + if ((usage->hid & HID_USAGE_PAGE) != HID_UP_LOGIVENDOR) return 0; switch (usage->hid & HID_USAGE) { diff --git a/drivers/hid/hid-lenovo-tpkbd.c b/drivers/hid/hid-lenovo-tpkbd.c index 77d2df04c97..cea016e94f4 100644 --- a/drivers/hid/hid-lenovo-tpkbd.c +++ b/drivers/hid/hid-lenovo-tpkbd.c @@ -56,9 +56,8 @@ static int tpkbd_input_mapping(struct hid_device *hdev, static int tpkbd_features_set(struct hid_device *hdev) { struct hid_report *report; - struct tpkbd_data_pointer *data_pointer; + struct tpkbd_data_pointer *data_pointer = hid_get_drvdata(hdev); - data_pointer = (struct tpkbd_data_pointer *) hid_get_drvdata(hdev); report = hdev->report_enum[HID_FEATURE_REPORT].report_id_hash[4]; report->field[0]->value[0] = data_pointer->press_to_select ? 0x01 : 0x02; @@ -77,14 +76,8 @@ static ssize_t pointer_press_to_select_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hid_device *hdev; - struct tpkbd_data_pointer *data_pointer; - - hdev = container_of(dev, struct hid_device, dev); - if (hdev == NULL) - return -ENODEV; - - data_pointer = (struct tpkbd_data_pointer *) hid_get_drvdata(hdev); + struct hid_device *hdev = container_of(dev, struct hid_device, dev); + struct tpkbd_data_pointer *data_pointer = hid_get_drvdata(hdev); return snprintf(buf, PAGE_SIZE, "%u\n", data_pointer->press_to_select); } @@ -94,16 +87,10 @@ static ssize_t pointer_press_to_select_store(struct device *dev, const char *buf, size_t count) { - struct hid_device *hdev; - struct tpkbd_data_pointer *data_pointer; + struct hid_device *hdev = container_of(dev, struct hid_device, dev); + struct tpkbd_data_pointer *data_pointer = hid_get_drvdata(hdev); int value; - hdev = container_of(dev, struct hid_device, dev); - if (hdev == NULL) - return -ENODEV; - - data_pointer = (struct tpkbd_data_pointer *) hid_get_drvdata(hdev); - if (kstrtoint(buf, 10, &value)) return -EINVAL; if (value < 0 || value > 1) @@ -119,14 +106,8 @@ static ssize_t pointer_dragging_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hid_device *hdev; - struct tpkbd_data_pointer *data_pointer; - - hdev = container_of(dev, struct hid_device, dev); - if (hdev == NULL) - return -ENODEV; - - data_pointer = (struct tpkbd_data_pointer *) hid_get_drvdata(hdev); + struct hid_device *hdev = container_of(dev, struct hid_device, dev); + struct tpkbd_data_pointer *data_pointer = hid_get_drvdata(hdev); return snprintf(buf, PAGE_SIZE, "%u\n", data_pointer->dragging); } @@ -136,16 +117,10 @@ static ssize_t pointer_dragging_store(struct device *dev, const char *buf, size_t count) { - struct hid_device *hdev; - struct tpkbd_data_pointer *data_pointer; + struct hid_device *hdev = container_of(dev, struct hid_device, dev); + struct tpkbd_data_pointer *data_pointer = hid_get_drvdata(hdev); int value; - hdev = container_of(dev, struct hid_device, dev); - if (hdev == NULL) - return -ENODEV; - - data_pointer = (struct tpkbd_data_pointer *) hid_get_drvdata(hdev); - if (kstrtoint(buf, 10, &value)) return -EINVAL; if (value < 0 || value > 1) @@ -161,14 +136,8 @@ static ssize_t pointer_release_to_select_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hid_device *hdev; - struct tpkbd_data_pointer *data_pointer; - - hdev = container_of(dev, struct hid_device, dev); - if (hdev == NULL) - return -ENODEV; - - data_pointer = (struct tpkbd_data_pointer *) hid_get_drvdata(hdev); + struct hid_device *hdev = container_of(dev, struct hid_device, dev); + struct tpkbd_data_pointer *data_pointer = hid_get_drvdata(hdev); return snprintf(buf, PAGE_SIZE, "%u\n", data_pointer->release_to_select); } @@ -178,16 +147,10 @@ static ssize_t pointer_release_to_select_store(struct device *dev, const char *buf, size_t count) { - struct hid_device *hdev; - struct tpkbd_data_pointer *data_pointer; + struct hid_device *hdev = container_of(dev, struct hid_device, dev); + struct tpkbd_data_pointer *data_pointer = hid_get_drvdata(hdev); int value; - hdev = container_of(dev, struct hid_device, dev); - if (hdev == NULL) - return -ENODEV; - - data_pointer = (struct tpkbd_data_pointer *) hid_get_drvdata(hdev); - if (kstrtoint(buf, 10, &value)) return -EINVAL; if (value < 0 || value > 1) @@ -203,14 +166,8 @@ static ssize_t pointer_select_right_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hid_device *hdev; - struct tpkbd_data_pointer *data_pointer; - - hdev = container_of(dev, struct hid_device, dev); - if (hdev == NULL) - return -ENODEV; - - data_pointer = (struct tpkbd_data_pointer *) hid_get_drvdata(hdev); + struct hid_device *hdev = container_of(dev, struct hid_device, dev); + struct tpkbd_data_pointer *data_pointer = hid_get_drvdata(hdev); return snprintf(buf, PAGE_SIZE, "%u\n", data_pointer->select_right); } @@ -220,16 +177,10 @@ static ssize_t pointer_select_right_store(struct device *dev, const char *buf, size_t count) { - struct hid_device *hdev; - struct tpkbd_data_pointer *data_pointer; + struct hid_device *hdev = container_of(dev, struct hid_device, dev); + struct tpkbd_data_pointer *data_pointer = hid_get_drvdata(hdev); int value; - hdev = container_of(dev, struct hid_device, dev); - if (hdev == NULL) - return -ENODEV; - - data_pointer = (struct tpkbd_data_pointer *) hid_get_drvdata(hdev); - if (kstrtoint(buf, 10, &value)) return -EINVAL; if (value < 0 || value > 1) @@ -245,14 +196,8 @@ static ssize_t pointer_sensitivity_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hid_device *hdev; - struct tpkbd_data_pointer *data_pointer; - - hdev = container_of(dev, struct hid_device, dev); - if (hdev == NULL) - return -ENODEV; - - data_pointer = (struct tpkbd_data_pointer *) hid_get_drvdata(hdev); + struct hid_device *hdev = container_of(dev, struct hid_device, dev); + struct tpkbd_data_pointer *data_pointer = hid_get_drvdata(hdev); return snprintf(buf, PAGE_SIZE, "%u\n", data_pointer->sensitivity); @@ -263,16 +208,10 @@ static ssize_t pointer_sensitivity_store(struct device *dev, const char *buf, size_t count) { - struct hid_device *hdev; - struct tpkbd_data_pointer *data_pointer; + struct hid_device *hdev = container_of(dev, struct hid_device, dev); + struct tpkbd_data_pointer *data_pointer = hid_get_drvdata(hdev); int value; - hdev = container_of(dev, struct hid_device, dev); - if (hdev == NULL) - return -ENODEV; - - data_pointer = (struct tpkbd_data_pointer *) hid_get_drvdata(hdev); - if (kstrtoint(buf, 10, &value) || value < 1 || value > 255) return -EINVAL; @@ -286,14 +225,10 @@ static ssize_t pointer_press_speed_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hid_device *hdev; - struct tpkbd_data_pointer *data_pointer; + struct hid_device *hdev = container_of(dev, struct hid_device, dev); + struct tpkbd_data_pointer *data_pointer = hid_get_drvdata(hdev); - hdev = container_of(dev, struct hid_device, dev); - if (hdev == NULL) - return -ENODEV; - - data_pointer = (struct tpkbd_data_pointer *) hid_get_drvdata(hdev); + data_pointer = hid_get_drvdata(hdev); return snprintf(buf, PAGE_SIZE, "%u\n", data_pointer->press_speed); @@ -304,16 +239,10 @@ static ssize_t pointer_press_speed_store(struct device *dev, const char *buf, size_t count) { - struct hid_device *hdev; - struct tpkbd_data_pointer *data_pointer; + struct hid_device *hdev = container_of(dev, struct hid_device, dev); + struct tpkbd_data_pointer *data_pointer = hid_get_drvdata(hdev); int value; - hdev = container_of(dev, struct hid_device, dev); - if (hdev == NULL) - return -ENODEV; - - data_pointer = (struct tpkbd_data_pointer *) hid_get_drvdata(hdev); - if (kstrtoint(buf, 10, &value) || value < 1 || value > 255) return -EINVAL; @@ -370,15 +299,11 @@ static const struct attribute_group tpkbd_attr_group_pointer = { static enum led_brightness tpkbd_led_brightness_get( struct led_classdev *led_cdev) { - struct device *dev; - struct hid_device *hdev; - struct tpkbd_data_pointer *data_pointer; + struct device *dev = led_cdev->dev->parent; + struct hid_device *hdev = container_of(dev, struct hid_device, dev); + struct tpkbd_data_pointer *data_pointer = hid_get_drvdata(hdev); int led_nr = 0; - dev = led_cdev->dev->parent; - hdev = container_of(dev, struct hid_device, dev); - data_pointer = (struct tpkbd_data_pointer *) hid_get_drvdata(hdev); - if (led_cdev == &data_pointer->led_micmute) led_nr = 1; @@ -390,16 +315,12 @@ static enum led_brightness tpkbd_led_brightness_get( static void tpkbd_led_brightness_set(struct led_classdev *led_cdev, enum led_brightness value) { - struct device *dev; - struct hid_device *hdev; + struct device *dev = led_cdev->dev->parent; + struct hid_device *hdev = container_of(dev, struct hid_device, dev); + struct tpkbd_data_pointer *data_pointer = hid_get_drvdata(hdev); struct hid_report *report; - struct tpkbd_data_pointer *data_pointer; int led_nr = 0; - dev = led_cdev->dev->parent; - hdev = container_of(dev, struct hid_device, dev); - data_pointer = (struct tpkbd_data_pointer *) hid_get_drvdata(hdev); - if (led_cdev == &data_pointer->led_micmute) led_nr = 1; @@ -508,17 +429,17 @@ err_free: static void tpkbd_remove_tp(struct hid_device *hdev) { - struct tpkbd_data_pointer *data_pointer; + struct tpkbd_data_pointer *data_pointer = hid_get_drvdata(hdev); sysfs_remove_group(&hdev->dev.kobj, &tpkbd_attr_group_pointer); - data_pointer = (struct tpkbd_data_pointer *) hid_get_drvdata(hdev); - led_classdev_unregister(&data_pointer->led_micmute); led_classdev_unregister(&data_pointer->led_mute); hid_set_drvdata(hdev, NULL); + kfree(data_pointer->led_micmute.name); + kfree(data_pointer->led_mute.name); kfree(data_pointer); } diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c index fc37ed6b108..a2f8e88b9fa 100644 --- a/drivers/hid/hid-lg.c +++ b/drivers/hid/hid-lg.c @@ -5,7 +5,6 @@ * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina - * Copyright (c) 2007 Paul Walmsley * Copyright (c) 2008 Jiri Slaby * Copyright (c) 2010 Hendrik Iben */ @@ -109,7 +108,7 @@ static __u8 dfp_rdesc_fixed[] = { static __u8 *lg_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { - struct lg_drv_data *drv_data = (struct lg_drv_data *)hid_get_drvdata(hdev); + struct lg_drv_data *drv_data = hid_get_drvdata(hdev); if ((drv_data->quirks & LG_RDESC) && *rsize >= 90 && rdesc[83] == 0x26 && rdesc[84] == 0x8c && rdesc[85] == 0x02) { @@ -278,7 +277,7 @@ static int lg_input_mapping(struct hid_device *hdev, struct hid_input *hi, 0, 0, 0, 0, 0,183,184,185,186,187, 188,189,190,191,192,193,194, 0, 0, 0 }; - struct lg_drv_data *drv_data = (struct lg_drv_data *)hid_get_drvdata(hdev); + struct lg_drv_data *drv_data = hid_get_drvdata(hdev); unsigned int hid = usage->hid; if (hdev->product == USB_DEVICE_ID_LOGITECH_RECEIVER && @@ -319,7 +318,7 @@ static int lg_input_mapped(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { - struct lg_drv_data *drv_data = (struct lg_drv_data *)hid_get_drvdata(hdev); + struct lg_drv_data *drv_data = hid_get_drvdata(hdev); if ((drv_data->quirks & LG_BAD_RELATIVE_KEYS) && usage->type == EV_KEY && (field->flags & HID_MAIN_ITEM_RELATIVE)) @@ -335,13 +334,16 @@ static int lg_input_mapped(struct hid_device *hdev, struct hid_input *hi, static int lg_event(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage, __s32 value) { - struct lg_drv_data *drv_data = (struct lg_drv_data *)hid_get_drvdata(hdev); + struct lg_drv_data *drv_data = hid_get_drvdata(hdev); if ((drv_data->quirks & LG_INVERT_HWHEEL) && usage->code == REL_HWHEEL) { input_event(field->hidinput->input, usage->type, usage->code, -value); return 1; } + if (drv_data->quirks & LG_FF4) { + return lg4ff_adjust_input_event(hdev, field, usage, value, drv_data); + } return 0; } @@ -358,7 +360,7 @@ static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id) return -ENOMEM; } drv_data->quirks = id->driver_data; - + hid_set_drvdata(hdev, (void *)drv_data); if (drv_data->quirks & LG_NOGET) @@ -380,7 +382,7 @@ static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id) } /* Setup wireless link with Logitech Wii wheel */ - if(hdev->product == USB_DEVICE_ID_LOGITECH_WII_WHEEL) { + if (hdev->product == USB_DEVICE_ID_LOGITECH_WII_WHEEL) { unsigned char buf[] = { 0x00, 0xAF, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; ret = hdev->hid_output_raw_report(hdev, buf, sizeof(buf), HID_FEATURE_REPORT); @@ -416,7 +418,7 @@ err_free: static void lg_remove(struct hid_device *hdev) { - struct lg_drv_data *drv_data = (struct lg_drv_data *)hid_get_drvdata(hdev); + struct lg_drv_data *drv_data = hid_get_drvdata(hdev); if (drv_data->quirks & LG_FF4) lg4ff_deinit(hdev); @@ -476,7 +478,7 @@ static const struct hid_device_id lg_devices[] = { .driver_data = LG_NOGET | LG_FF4 }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WII_WHEEL), .driver_data = LG_FF4 }, - { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_FFG ), + { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_FFG), .driver_data = LG_FF }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2), .driver_data = LG_FF2 }, diff --git a/drivers/hid/hid-lg.h b/drivers/hid/hid-lg.h index d64cf8d2751..142ce3f5f05 100644 --- a/drivers/hid/hid-lg.h +++ b/drivers/hid/hid-lg.h @@ -25,9 +25,13 @@ static inline int lg3ff_init(struct hid_device *hdev) { return -1; } #endif #ifdef CONFIG_LOGIWHEELS_FF +int lg4ff_adjust_input_event(struct hid_device *hid, struct hid_field *field, + struct hid_usage *usage, __s32 value, struct lg_drv_data *drv_data); int lg4ff_init(struct hid_device *hdev); int lg4ff_deinit(struct hid_device *hdev); #else +static inline int lg4ff_adjust_input_event(struct hid_device *hid, struct hid_field *field, + struct hid_usage *usage, __s32 value, struct lg_drv_data *drv_data) { return 0; } static inline int lg4ff_init(struct hid_device *hdev) { return -1; } static inline int lg4ff_deinit(struct hid_device *hdev) { return -1; } #endif diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c index f3390ee6105..d7947c701f3 100644 --- a/drivers/hid/hid-lg4ff.c +++ b/drivers/hid/hid-lg4ff.c @@ -43,6 +43,11 @@ #define G27_REV_MAJ 0x12 #define G27_REV_MIN 0x38 +#define DFP_X_MIN 0 +#define DFP_X_MAX 16383 +#define DFP_PEDAL_MIN 0 +#define DFP_PEDAL_MAX 255 + #define to_hid_device(pdev) container_of(pdev, struct hid_device, dev) static void hid_lg4ff_set_range_dfp(struct hid_device *hid, u16 range); @@ -53,6 +58,7 @@ static ssize_t lg4ff_range_store(struct device *dev, struct device_attribute *at static DEVICE_ATTR(range, S_IRWXU | S_IRWXG | S_IRWXO, lg4ff_range_show, lg4ff_range_store); struct lg4ff_device_entry { + __u32 product_id; __u16 range; __u16 min_range; __u16 max_range; @@ -129,26 +135,77 @@ static const struct lg4ff_usb_revision lg4ff_revs[] = { {G27_REV_MAJ, G27_REV_MIN, &native_g27}, /* G27 */ }; +/* Recalculates X axis value accordingly to currently selected range */ +static __s32 lg4ff_adjust_dfp_x_axis(__s32 value, __u16 range) +{ + __u16 max_range; + __s32 new_value; + + if (range == 900) + return value; + else if (range == 200) + return value; + else if (range < 200) + max_range = 200; + else + max_range = 900; + + new_value = 8192 + mult_frac(value - 8192, max_range, range); + if (new_value < 0) + return 0; + else if (new_value > 16383) + return 16383; + else + return new_value; +} + +int lg4ff_adjust_input_event(struct hid_device *hid, struct hid_field *field, + struct hid_usage *usage, __s32 value, struct lg_drv_data *drv_data) +{ + struct lg4ff_device_entry *entry = drv_data->device_props; + __s32 new_value = 0; + + if (!entry) { + hid_err(hid, "Device properties not found"); + return 0; + } + + switch (entry->product_id) { + case USB_DEVICE_ID_LOGITECH_DFP_WHEEL: + switch (usage->code) { + case ABS_X: + new_value = lg4ff_adjust_dfp_x_axis(value, entry->range); + input_event(field->hidinput->input, usage->type, usage->code, new_value); + return 1; + default: + return 0; + } + default: + return 0; + } +} + static int hid_lg4ff_play(struct input_dev *dev, void *data, struct ff_effect *effect) { struct hid_device *hid = input_get_drvdata(dev); struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct hid_report *report = list_entry(report_list->next, struct hid_report, list); + __s32 *value = report->field[0]->value; int x; -#define CLAMP(x) if (x < 0) x = 0; if (x > 0xff) x = 0xff +#define CLAMP(x) do { if (x < 0) x = 0; else if (x > 0xff) x = 0xff; } while (0) switch (effect->type) { case FF_CONSTANT: x = effect->u.ramp.start_level + 0x80; /* 0x80 is no force */ CLAMP(x); - report->field[0]->value[0] = 0x11; /* Slot 1 */ - report->field[0]->value[1] = 0x08; - report->field[0]->value[2] = x; - report->field[0]->value[3] = 0x80; - report->field[0]->value[4] = 0x00; - report->field[0]->value[5] = 0x00; - report->field[0]->value[6] = 0x00; + value[0] = 0x11; /* Slot 1 */ + value[1] = 0x08; + value[2] = x; + value[3] = 0x80; + value[4] = 0x00; + value[5] = 0x00; + value[6] = 0x00; usbhid_submit_report(hid, report, USB_DIR_OUT); break; @@ -163,14 +220,15 @@ static void hid_lg4ff_set_autocenter_default(struct input_dev *dev, u16 magnitud struct hid_device *hid = input_get_drvdata(dev); struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct hid_report *report = list_entry(report_list->next, struct hid_report, list); + __s32 *value = report->field[0]->value; - report->field[0]->value[0] = 0xfe; - report->field[0]->value[1] = 0x0d; - report->field[0]->value[2] = magnitude >> 13; - report->field[0]->value[3] = magnitude >> 13; - report->field[0]->value[4] = magnitude >> 8; - report->field[0]->value[5] = 0x00; - report->field[0]->value[6] = 0x00; + value[0] = 0xfe; + value[1] = 0x0d; + value[2] = magnitude >> 13; + value[3] = magnitude >> 13; + value[4] = magnitude >> 8; + value[5] = 0x00; + value[6] = 0x00; usbhid_submit_report(hid, report, USB_DIR_OUT); } @@ -181,16 +239,16 @@ static void hid_lg4ff_set_autocenter_ffex(struct input_dev *dev, u16 magnitude) struct hid_device *hid = input_get_drvdata(dev); struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct hid_report *report = list_entry(report_list->next, struct hid_report, list); + __s32 *value = report->field[0]->value; magnitude = magnitude * 90 / 65535; - - report->field[0]->value[0] = 0xfe; - report->field[0]->value[1] = 0x03; - report->field[0]->value[2] = magnitude >> 14; - report->field[0]->value[3] = magnitude >> 14; - report->field[0]->value[4] = magnitude; - report->field[0]->value[5] = 0x00; - report->field[0]->value[6] = 0x00; + value[0] = 0xfe; + value[1] = 0x03; + value[2] = magnitude >> 14; + value[3] = magnitude >> 14; + value[4] = magnitude; + value[5] = 0x00; + value[6] = 0x00; usbhid_submit_report(hid, report, USB_DIR_OUT); } @@ -200,15 +258,17 @@ static void hid_lg4ff_set_range_g25(struct hid_device *hid, u16 range) { struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct hid_report *report = list_entry(report_list->next, struct hid_report, list); + __s32 *value = report->field[0]->value; + dbg_hid("G25/G27/DFGT: setting range to %u\n", range); - report->field[0]->value[0] = 0xf8; - report->field[0]->value[1] = 0x81; - report->field[0]->value[2] = range & 0x00ff; - report->field[0]->value[3] = (range & 0xff00) >> 8; - report->field[0]->value[4] = 0x00; - report->field[0]->value[5] = 0x00; - report->field[0]->value[6] = 0x00; + value[0] = 0xf8; + value[1] = 0x81; + value[2] = range & 0x00ff; + value[3] = (range & 0xff00) >> 8; + value[4] = 0x00; + value[5] = 0x00; + value[6] = 0x00; usbhid_submit_report(hid, report, USB_DIR_OUT); } @@ -219,16 +279,18 @@ static void hid_lg4ff_set_range_dfp(struct hid_device *hid, __u16 range) struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct hid_report *report = list_entry(report_list->next, struct hid_report, list); int start_left, start_right, full_range; + __s32 *value = report->field[0]->value; + dbg_hid("Driving Force Pro: setting range to %u\n", range); /* Prepare "coarse" limit command */ - report->field[0]->value[0] = 0xf8; - report->field[0]->value[1] = 0x00; /* Set later */ - report->field[0]->value[2] = 0x00; - report->field[0]->value[3] = 0x00; - report->field[0]->value[4] = 0x00; - report->field[0]->value[5] = 0x00; - report->field[0]->value[6] = 0x00; + value[0] = 0xf8; + value[1] = 0x00; /* Set later */ + value[2] = 0x00; + value[3] = 0x00; + value[4] = 0x00; + value[5] = 0x00; + value[6] = 0x00; if (range > 200) { report->field[0]->value[1] = 0x03; @@ -240,13 +302,13 @@ static void hid_lg4ff_set_range_dfp(struct hid_device *hid, __u16 range) usbhid_submit_report(hid, report, USB_DIR_OUT); /* Prepare "fine" limit command */ - report->field[0]->value[0] = 0x81; - report->field[0]->value[1] = 0x0b; - report->field[0]->value[2] = 0x00; - report->field[0]->value[3] = 0x00; - report->field[0]->value[4] = 0x00; - report->field[0]->value[5] = 0x00; - report->field[0]->value[6] = 0x00; + value[0] = 0x81; + value[1] = 0x0b; + value[2] = 0x00; + value[3] = 0x00; + value[4] = 0x00; + value[5] = 0x00; + value[6] = 0x00; if (range == 200 || range == 900) { /* Do not apply any fine limit */ usbhid_submit_report(hid, report, USB_DIR_OUT); @@ -257,11 +319,11 @@ static void hid_lg4ff_set_range_dfp(struct hid_device *hid, __u16 range) start_left = (((full_range - range + 1) * 2047) / full_range); start_right = 0xfff - start_left; - report->field[0]->value[2] = start_left >> 4; - report->field[0]->value[3] = start_right >> 4; - report->field[0]->value[4] = 0xff; - report->field[0]->value[5] = (start_right & 0xe) << 4 | (start_left & 0xe); - report->field[0]->value[6] = 0xff; + value[2] = start_left >> 4; + value[3] = start_right >> 4; + value[4] = 0xff; + value[5] = (start_right & 0xe) << 4 | (start_left & 0xe); + value[6] = 0xff; usbhid_submit_report(hid, report, USB_DIR_OUT); } @@ -344,14 +406,15 @@ static void lg4ff_set_leds(struct hid_device *hid, __u8 leds) { struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct hid_report *report = list_entry(report_list->next, struct hid_report, list); - - report->field[0]->value[0] = 0xf8; - report->field[0]->value[1] = 0x12; - report->field[0]->value[2] = leds; - report->field[0]->value[3] = 0x00; - report->field[0]->value[4] = 0x00; - report->field[0]->value[5] = 0x00; - report->field[0]->value[6] = 0x00; + __s32 *value = report->field[0]->value; + + value[0] = 0xf8; + value[1] = 0x12; + value[2] = leds; + value[3] = 0x00; + value[4] = 0x00; + value[5] = 0x00; + value[6] = 0x00; usbhid_submit_report(hid, report, USB_DIR_OUT); } @@ -360,7 +423,7 @@ static void lg4ff_led_set_brightness(struct led_classdev *led_cdev, { struct device *dev = led_cdev->dev->parent; struct hid_device *hid = container_of(dev, struct hid_device, dev); - struct lg_drv_data *drv_data = (struct lg_drv_data *)hid_get_drvdata(hid); + struct lg_drv_data *drv_data = hid_get_drvdata(hid); struct lg4ff_device_entry *entry; int i, state = 0; @@ -395,7 +458,7 @@ static enum led_brightness lg4ff_led_get_brightness(struct led_classdev *led_cde { struct device *dev = led_cdev->dev->parent; struct hid_device *hid = container_of(dev, struct hid_device, dev); - struct lg_drv_data *drv_data = (struct lg_drv_data *)hid_get_drvdata(hid); + struct lg_drv_data *drv_data = hid_get_drvdata(hid); struct lg4ff_device_entry *entry; int i, value = 0; @@ -501,7 +564,7 @@ int lg4ff_init(struct hid_device *hid) /* Check if autocentering is available and * set the centering force to zero by default */ if (test_bit(FF_AUTOCENTER, dev->ffbit)) { - if(rev_maj == FFEX_REV_MAJ && rev_min == FFEX_REV_MIN) /* Formula Force EX expects different autocentering command */ + if (rev_maj == FFEX_REV_MAJ && rev_min == FFEX_REV_MIN) /* Formula Force EX expects different autocentering command */ dev->ff->set_autocenter = hid_lg4ff_set_autocenter_ffex; else dev->ff->set_autocenter = hid_lg4ff_set_autocenter_default; @@ -524,6 +587,7 @@ int lg4ff_init(struct hid_device *hid) } drv_data->device_props = entry; + entry->product_id = lg4ff_devices[i].product_id; entry->min_range = lg4ff_devices[i].min_range; entry->max_range = lg4ff_devices[i].max_range; entry->set_range = lg4ff_devices[i].set_range; @@ -534,6 +598,18 @@ int lg4ff_init(struct hid_device *hid) return error; dbg_hid("sysfs interface created\n"); + /* Set default axes parameters */ + switch (lg4ff_devices[i].product_id) { + case USB_DEVICE_ID_LOGITECH_DFP_WHEEL: + dbg_hid("Setting axes parameters for Driving Force Pro\n"); + input_set_abs_params(dev, ABS_X, DFP_X_MIN, DFP_X_MAX, 0, 0); + input_set_abs_params(dev, ABS_Y, DFP_PEDAL_MIN, DFP_PEDAL_MAX, 0, 0); + input_set_abs_params(dev, ABS_RZ, DFP_PEDAL_MIN, DFP_PEDAL_MAX, 0, 0); + break; + default: + break; + } + /* Set the maximum range to start with */ entry->range = entry->max_range; if (entry->set_range != NULL) @@ -594,6 +670,8 @@ out: return 0; } + + int lg4ff_deinit(struct hid_device *hid) { struct lg4ff_device_entry *entry; diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 4d524b5f52f..9500f2f3f8f 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -193,6 +193,7 @@ static struct hid_ll_driver logi_dj_ll_driver; static int logi_dj_output_hidraw_report(struct hid_device *hid, u8 * buf, size_t count, unsigned char report_type); +static int logi_dj_recv_query_paired_devices(struct dj_receiver_dev *djrcv_dev); static void logi_dj_recv_destroy_djhid_device(struct dj_receiver_dev *djrcv_dev, struct dj_report *dj_report) @@ -233,6 +234,7 @@ static void logi_dj_recv_add_djhid_device(struct dj_receiver_dev *djrcv_dev, if (dj_report->report_params[DEVICE_PAIRED_PARAM_SPFUNCTION] & SPFUNCTION_DEVICE_LIST_EMPTY) { dbg_hid("%s: device list is empty\n", __func__); + djrcv_dev->querying_devices = false; return; } @@ -243,6 +245,12 @@ static void logi_dj_recv_add_djhid_device(struct dj_receiver_dev *djrcv_dev, return; } + if (djrcv_dev->paired_dj_devices[dj_report->device_index]) { + /* The device is already known. No need to reallocate it. */ + dbg_hid("%s: device is already known\n", __func__); + return; + } + dj_hiddev = hid_allocate_device(); if (IS_ERR(dj_hiddev)) { dev_err(&djrcv_hdev->dev, "%s: hid_allocate_device failed\n", @@ -306,6 +314,7 @@ static void delayedwork_callback(struct work_struct *work) struct dj_report dj_report; unsigned long flags; int count; + int retval; dbg_hid("%s\n", __func__); @@ -338,6 +347,25 @@ static void delayedwork_callback(struct work_struct *work) logi_dj_recv_destroy_djhid_device(djrcv_dev, &dj_report); break; default: + /* A normal report (i. e. not belonging to a pair/unpair notification) + * arriving here, means that the report arrived but we did not have a + * paired dj_device associated to the report's device_index, this + * means that the original "device paired" notification corresponding + * to this dj_device never arrived to this driver. The reason is that + * hid-core discards all packets coming from a device while probe() is + * executing. */ + if (!djrcv_dev->paired_dj_devices[dj_report.device_index]) { + /* ok, we don't know the device, just re-ask the + * receiver for the list of connected devices. */ + retval = logi_dj_recv_query_paired_devices(djrcv_dev); + if (!retval) { + /* everything went fine, so just leave */ + break; + } + dev_err(&djrcv_dev->hdev->dev, + "%s:logi_dj_recv_query_paired_devices " + "error:%d\n", __func__, retval); + } dbg_hid("%s: unexpected report type\n", __func__); } } @@ -368,6 +396,12 @@ static void logi_dj_recv_forward_null_report(struct dj_receiver_dev *djrcv_dev, if (!djdev) { dbg_hid("djrcv_dev->paired_dj_devices[dj_report->device_index]" " is NULL, index %d\n", dj_report->device_index); + kfifo_in(&djrcv_dev->notif_fifo, dj_report, sizeof(struct dj_report)); + + if (schedule_work(&djrcv_dev->work) == 0) { + dbg_hid("%s: did not schedule the work item, was already " + "queued\n", __func__); + } return; } @@ -398,6 +432,12 @@ static void logi_dj_recv_forward_report(struct dj_receiver_dev *djrcv_dev, if (dj_device == NULL) { dbg_hid("djrcv_dev->paired_dj_devices[dj_report->device_index]" " is NULL, index %d\n", dj_report->device_index); + kfifo_in(&djrcv_dev->notif_fifo, dj_report, sizeof(struct dj_report)); + + if (schedule_work(&djrcv_dev->work) == 0) { + dbg_hid("%s: did not schedule the work item, was already " + "queued\n", __func__); + } return; } @@ -439,6 +479,10 @@ static int logi_dj_recv_query_paired_devices(struct dj_receiver_dev *djrcv_dev) struct dj_report *dj_report; int retval; + /* no need to protect djrcv_dev->querying_devices */ + if (djrcv_dev->querying_devices) + return 0; + dj_report = kzalloc(sizeof(struct dj_report), GFP_KERNEL); if (!dj_report) return -ENOMEM; @@ -450,6 +494,7 @@ static int logi_dj_recv_query_paired_devices(struct dj_receiver_dev *djrcv_dev) return retval; } + static int logi_dj_recv_switch_to_dj_mode(struct dj_receiver_dev *djrcv_dev, unsigned timeout) { diff --git a/drivers/hid/hid-logitech-dj.h b/drivers/hid/hid-logitech-dj.h index fd28a5e0ca3..4a4000340ce 100644 --- a/drivers/hid/hid-logitech-dj.h +++ b/drivers/hid/hid-logitech-dj.h @@ -101,6 +101,7 @@ struct dj_receiver_dev { struct work_struct work; struct kfifo notif_fifo; spinlock_t lock; + bool querying_devices; }; struct dj_device { diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index 73647266daa..25ddf3e3aec 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -392,7 +392,7 @@ static int magicmouse_setup_input(struct input_dev *input, struct hid_device *hd __set_bit(EV_ABS, input->evbit); - error = input_mt_init_slots(input, 16); + error = input_mt_init_slots(input, 16, 0); if (error) return error; input_set_abs_params(input, ABS_MT_TOUCH_MAJOR, 0, 255 << 2, diff --git a/drivers/hid/hid-microsoft.c b/drivers/hid/hid-microsoft.c index e5c699b6c6f..3acdcfcc17d 100644 --- a/drivers/hid/hid-microsoft.c +++ b/drivers/hid/hid-microsoft.c @@ -5,7 +5,6 @@ * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina - * Copyright (c) 2007 Paul Walmsley * Copyright (c) 2008 Jiri Slaby */ diff --git a/drivers/hid/hid-monterey.c b/drivers/hid/hid-monterey.c index dedf757781a..cd3643e06fa 100644 --- a/drivers/hid/hid-monterey.c +++ b/drivers/hid/hid-monterey.c @@ -5,7 +5,6 @@ * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina - * Copyright (c) 2007 Paul Walmsley * Copyright (c) 2008 Jiri Slaby */ diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 59c8b5c1d2d..3eb02b94fc8 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -51,12 +51,12 @@ MODULE_LICENSE("GPL"); #define MT_QUIRK_VALID_IS_INRANGE (1 << 5) #define MT_QUIRK_VALID_IS_CONFIDENCE (1 << 6) #define MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE (1 << 8) +#define MT_QUIRK_NO_AREA (1 << 9) struct mt_slot { __s32 x, y, p, w, h; __s32 contactid; /* the device ContactID assigned to this slot */ bool touch_state; /* is the touch valid? */ - bool seen_in_this_frame;/* has this slot been updated */ }; struct mt_class { @@ -92,8 +92,9 @@ struct mt_device { __u8 touches_by_report; /* how many touches are present in one report: * 1 means we should use a serial protocol * > 1 means hybrid (multitouch) protocol */ + bool serial_maybe; /* need to check for serial protocol */ bool curvalid; /* is the current contact valid? */ - struct mt_slot *slots; + unsigned mt_flags; /* flags to pass to input-mt */ }; /* classes of device behavior */ @@ -115,6 +116,9 @@ struct mt_device { #define MT_CLS_EGALAX_SERIAL 0x0104 #define MT_CLS_TOPSEED 0x0105 #define MT_CLS_PANASONIC 0x0106 +#define MT_CLS_FLATFROG 0x0107 +#define MT_CLS_GENERALTOUCH_TWOFINGERS 0x0108 +#define MT_CLS_GENERALTOUCH_PWT_TENFINGERS 0x0109 #define MT_DEFAULT_MAXCONTACT 10 @@ -134,25 +138,6 @@ static int cypress_compute_slot(struct mt_device *td) return -1; } -static int find_slot_from_contactid(struct mt_device *td) -{ - int i; - for (i = 0; i < td->maxcontacts; ++i) { - if (td->slots[i].contactid == td->curdata.contactid && - td->slots[i].touch_state) - return i; - } - for (i = 0; i < td->maxcontacts; ++i) { - if (!td->slots[i].seen_in_this_frame && - !td->slots[i].touch_state) - return i; - } - /* should not occurs. If this happens that means - * that the device sent more touches that it says - * in the report descriptor. It is ignored then. */ - return -1; -} - static struct mt_class mt_classes[] = { { .name = MT_CLS_DEFAULT, .quirks = MT_QUIRK_NOT_SEEN_MEANS_UP }, @@ -190,7 +175,9 @@ static struct mt_class mt_classes[] = { MT_QUIRK_SLOT_IS_CONTACTID, .sn_move = 2048, .sn_width = 128, - .sn_height = 128 }, + .sn_height = 128, + .maxcontacts = 60, + }, { .name = MT_CLS_CYPRESS, .quirks = MT_QUIRK_NOT_SEEN_MEANS_UP | MT_QUIRK_CYPRESS, @@ -215,7 +202,24 @@ static struct mt_class mt_classes[] = { { .name = MT_CLS_PANASONIC, .quirks = MT_QUIRK_NOT_SEEN_MEANS_UP, .maxcontacts = 4 }, + { .name = MT_CLS_GENERALTOUCH_TWOFINGERS, + .quirks = MT_QUIRK_NOT_SEEN_MEANS_UP | + MT_QUIRK_VALID_IS_INRANGE | + MT_QUIRK_SLOT_IS_CONTACTNUMBER, + .maxcontacts = 2 + }, + { .name = MT_CLS_GENERALTOUCH_PWT_TENFINGERS, + .quirks = MT_QUIRK_NOT_SEEN_MEANS_UP | + MT_QUIRK_SLOT_IS_CONTACTNUMBER, + .maxcontacts = 10 + }, + { .name = MT_CLS_FLATFROG, + .quirks = MT_QUIRK_NOT_SEEN_MEANS_UP | + MT_QUIRK_NO_AREA, + .sn_move = 2048, + .maxcontacts = 40, + }, { } }; @@ -319,24 +323,16 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi, * We need to ignore fields that belong to other collections * such as Mouse that might have the same GenericDesktop usages. */ if (field->application == HID_DG_TOUCHSCREEN) - set_bit(INPUT_PROP_DIRECT, hi->input->propbit); + td->mt_flags |= INPUT_MT_DIRECT; else if (field->application != HID_DG_TOUCHPAD) return 0; - /* In case of an indirect device (touchpad), we need to add - * specific BTN_TOOL_* to be handled by the synaptics xorg - * driver. - * We also consider that touchscreens providing buttons are touchpads. + /* + * Model touchscreens providing buttons as touchpads. */ if (field->application == HID_DG_TOUCHPAD || - (usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON || - cls->is_indirect) { - set_bit(INPUT_PROP_POINTER, hi->input->propbit); - set_bit(BTN_TOOL_FINGER, hi->input->keybit); - set_bit(BTN_TOOL_DOUBLETAP, hi->input->keybit); - set_bit(BTN_TOOL_TRIPLETAP, hi->input->keybit); - set_bit(BTN_TOOL_QUADTAP, hi->input->keybit); - } + (usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON) + td->mt_flags |= INPUT_MT_POINTER; /* eGalax devices provide a Digitizer.Stylus input which overrides * the correct Digitizers.Finger X/Y ranges. @@ -353,8 +349,6 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi, EV_ABS, ABS_MT_POSITION_X); set_abs(hi->input, ABS_MT_POSITION_X, field, cls->sn_move); - /* touchscreen emulation */ - set_abs(hi->input, ABS_X, field, cls->sn_move); mt_store_field(usage, td, hi); td->last_field_index = field->index; return 1; @@ -363,8 +357,6 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi, EV_ABS, ABS_MT_POSITION_Y); set_abs(hi->input, ABS_MT_POSITION_Y, field, cls->sn_move); - /* touchscreen emulation */ - set_abs(hi->input, ABS_Y, field, cls->sn_move); mt_store_field(usage, td, hi); td->last_field_index = field->index; return 1; @@ -388,9 +380,6 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi, td->last_field_index = field->index; return 1; case HID_DG_CONTACTID: - if (!td->maxcontacts) - td->maxcontacts = MT_DEFAULT_MAXCONTACT; - input_mt_init_slots(hi->input, td->maxcontacts); mt_store_field(usage, td, hi); td->last_field_index = field->index; td->touches_by_report++; @@ -398,18 +387,21 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi, case HID_DG_WIDTH: hid_map_usage(hi, usage, bit, max, EV_ABS, ABS_MT_TOUCH_MAJOR); - set_abs(hi->input, ABS_MT_TOUCH_MAJOR, field, - cls->sn_width); + if (!(cls->quirks & MT_QUIRK_NO_AREA)) + set_abs(hi->input, ABS_MT_TOUCH_MAJOR, field, + cls->sn_width); mt_store_field(usage, td, hi); td->last_field_index = field->index; return 1; case HID_DG_HEIGHT: hid_map_usage(hi, usage, bit, max, EV_ABS, ABS_MT_TOUCH_MINOR); - set_abs(hi->input, ABS_MT_TOUCH_MINOR, field, - cls->sn_height); - input_set_abs_params(hi->input, + if (!(cls->quirks & MT_QUIRK_NO_AREA)) { + set_abs(hi->input, ABS_MT_TOUCH_MINOR, field, + cls->sn_height); + input_set_abs_params(hi->input, ABS_MT_ORIENTATION, 0, 1, 0, 0); + } mt_store_field(usage, td, hi); td->last_field_index = field->index; return 1; @@ -418,9 +410,6 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi, EV_ABS, ABS_MT_PRESSURE); set_abs(hi->input, ABS_MT_PRESSURE, field, cls->sn_pressure); - /* touchscreen emulation */ - set_abs(hi->input, ABS_PRESSURE, field, - cls->sn_pressure); mt_store_field(usage, td, hi); td->last_field_index = field->index; return 1; @@ -464,7 +453,7 @@ static int mt_input_mapped(struct hid_device *hdev, struct hid_input *hi, return -1; } -static int mt_compute_slot(struct mt_device *td) +static int mt_compute_slot(struct mt_device *td, struct input_dev *input) { __s32 quirks = td->mtclass.quirks; @@ -480,42 +469,23 @@ static int mt_compute_slot(struct mt_device *td) if (quirks & MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE) return td->curdata.contactid - 1; - return find_slot_from_contactid(td); + return input_mt_get_slot_by_key(input, td->curdata.contactid); } /* * this function is called when a whole contact has been processed, * so that it can assign it to a slot and store the data there */ -static void mt_complete_slot(struct mt_device *td) +static void mt_complete_slot(struct mt_device *td, struct input_dev *input) { - td->curdata.seen_in_this_frame = true; if (td->curvalid) { - int slotnum = mt_compute_slot(td); - - if (slotnum >= 0 && slotnum < td->maxcontacts) - td->slots[slotnum] = td->curdata; - } - td->num_received++; -} + int slotnum = mt_compute_slot(td, input); + struct mt_slot *s = &td->curdata; + if (slotnum < 0 || slotnum >= td->maxcontacts) + return; -/* - * this function is called when a whole packet has been received and processed, - * so that it can decide what to send to the input layer. - */ -static void mt_emit_event(struct mt_device *td, struct input_dev *input) -{ - int i; - - for (i = 0; i < td->maxcontacts; ++i) { - struct mt_slot *s = &(td->slots[i]); - if ((td->mtclass.quirks & MT_QUIRK_NOT_SEEN_MEANS_UP) && - !s->seen_in_this_frame) { - s->touch_state = false; - } - - input_mt_slot(input, i); + input_mt_slot(input, slotnum); input_mt_report_slot_state(input, MT_TOOL_FINGER, s->touch_state); if (s->touch_state) { @@ -532,24 +502,29 @@ static void mt_emit_event(struct mt_device *td, struct input_dev *input) input_event(input, EV_ABS, ABS_MT_TOUCH_MAJOR, major); input_event(input, EV_ABS, ABS_MT_TOUCH_MINOR, minor); } - s->seen_in_this_frame = false; - } - input_mt_report_pointer_emulation(input, true); + td->num_received++; +} + +/* + * this function is called when a whole packet has been received and processed, + * so that it can decide what to send to the input layer. + */ +static void mt_sync_frame(struct mt_device *td, struct input_dev *input) +{ + input_mt_sync_frame(input); input_sync(input); td->num_received = 0; } - - static int mt_event(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value) { struct mt_device *td = hid_get_drvdata(hid); __s32 quirks = td->mtclass.quirks; - if (hid->claimed & HID_CLAIMED_INPUT && td->slots) { + if (hid->claimed & HID_CLAIMED_INPUT) { switch (usage->hid) { case HID_DG_INRANGE: if (quirks & MT_QUIRK_ALWAYS_VALID) @@ -602,11 +577,11 @@ static int mt_event(struct hid_device *hid, struct hid_field *field, } if (usage->hid == td->last_slot_field) - mt_complete_slot(td); + mt_complete_slot(td, field->hidinput->input); if (field->index == td->last_field_index && td->num_received >= td->num_expected) - mt_emit_event(td, field->hidinput->input); + mt_sync_frame(td, field->hidinput->input); } @@ -685,18 +660,45 @@ static void mt_post_parse(struct mt_device *td) } } +static void mt_input_configured(struct hid_device *hdev, struct hid_input *hi) + +{ + struct mt_device *td = hid_get_drvdata(hdev); + struct mt_class *cls = &td->mtclass; + struct input_dev *input = hi->input; + + /* Only initialize slots for MT input devices */ + if (!test_bit(ABS_MT_POSITION_X, input->absbit)) + return; + + if (!td->maxcontacts) + td->maxcontacts = MT_DEFAULT_MAXCONTACT; + + mt_post_parse(td); + if (td->serial_maybe) + mt_post_parse_default_settings(td); + + if (cls->is_indirect) + td->mt_flags |= INPUT_MT_POINTER; + + if (cls->quirks & MT_QUIRK_NOT_SEEN_MEANS_UP) + td->mt_flags |= INPUT_MT_DROP_UNUSED; + + input_mt_init_slots(input, td->maxcontacts, td->mt_flags); + + td->mt_flags = 0; +} + static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret, i; struct mt_device *td; struct mt_class *mtclass = mt_classes; /* MT_CLS_DEFAULT */ - if (id) { - for (i = 0; mt_classes[i].name ; i++) { - if (id->driver_data == mt_classes[i].name) { - mtclass = &(mt_classes[i]); - break; - } + for (i = 0; mt_classes[i].name ; i++) { + if (id->driver_data == mt_classes[i].name) { + mtclass = &(mt_classes[i]); + break; } } @@ -722,6 +724,9 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) goto fail; } + if (id->vendor == HID_ANY_ID && id->product == HID_ANY_ID) + td->serial_maybe = true; + ret = hid_parse(hdev); if (ret != 0) goto fail; @@ -730,20 +735,6 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) if (ret) goto fail; - mt_post_parse(td); - - if (id->vendor == HID_ANY_ID && id->product == HID_ANY_ID) - mt_post_parse_default_settings(td); - - td->slots = kzalloc(td->maxcontacts * sizeof(struct mt_slot), - GFP_KERNEL); - if (!td->slots) { - dev_err(&hdev->dev, "cannot allocate multitouch slots\n"); - hid_hw_stop(hdev); - ret = -ENOMEM; - goto fail; - } - ret = sysfs_create_group(&hdev->dev.kobj, &mt_attribute_group); mt_set_maxcontacts(hdev); @@ -767,6 +758,32 @@ static int mt_reset_resume(struct hid_device *hdev) mt_set_input_mode(hdev); return 0; } + +static int mt_resume(struct hid_device *hdev) +{ + struct usb_interface *intf; + struct usb_host_interface *interface; + struct usb_device *dev; + + if (hdev->bus != BUS_USB) + return 0; + + intf = to_usb_interface(hdev->dev.parent); + interface = intf->cur_altsetting; + dev = hid_to_usb_dev(hdev); + + /* Some Elan legacy devices require SET_IDLE to be set on resume. + * It should be safe to send it to other devices too. + * Tested on 3M, Stantum, Cypress, Zytronic, eGalax, and Elan panels. */ + + usb_control_msg(dev, usb_sndctrlpipe(dev, 0), + HID_REQ_SET_IDLE, + USB_TYPE_CLASS | USB_RECIP_INTERFACE, + 0, interface->desc.bInterfaceNumber, + NULL, 0, USB_CTRL_SET_TIMEOUT); + + return 0; +} #endif static void mt_remove(struct hid_device *hdev) @@ -774,7 +791,6 @@ static void mt_remove(struct hid_device *hdev) struct mt_device *td = hid_get_drvdata(hdev); sysfs_remove_group(&hdev->dev.kobj, &mt_attribute_group); hid_hw_stop(hdev); - kfree(td->slots); kfree(td); hid_set_drvdata(hdev, NULL); } @@ -885,17 +901,37 @@ static const struct hid_device_id mt_devices[] = { USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_7349) }, { .driver_data = MT_CLS_EGALAX_SERIAL, MT_USB_DEVICE(USB_VENDOR_ID_DWAV, + USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_73F7) }, + { .driver_data = MT_CLS_EGALAX_SERIAL, + MT_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_A001) }, + { .driver_data = MT_CLS_EGALAX, + HID_USB_DEVICE(USB_VENDOR_ID_DWAV, + USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_7224) }, + { .driver_data = MT_CLS_EGALAX, + HID_USB_DEVICE(USB_VENDOR_ID_DWAV, + USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_72D0) }, + { .driver_data = MT_CLS_EGALAX, + HID_USB_DEVICE(USB_VENDOR_ID_DWAV, + USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_72C4) }, /* Elo TouchSystems IntelliTouch Plus panel */ { .driver_data = MT_CLS_DUAL_NSMU_CONTACTID, MT_USB_DEVICE(USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2515) }, + /* Flatfrog Panels */ + { .driver_data = MT_CLS_FLATFROG, + MT_USB_DEVICE(USB_VENDOR_ID_FLATFROG, + USB_DEVICE_ID_MULTITOUCH_3200) }, + /* GeneralTouch panel */ - { .driver_data = MT_CLS_DUAL_INRANGE_CONTACTNUMBER, + { .driver_data = MT_CLS_GENERALTOUCH_TWOFINGERS, MT_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, USB_DEVICE_ID_GENERAL_TOUCH_WIN7_TWOFINGERS) }, + { .driver_data = MT_CLS_GENERALTOUCH_PWT_TENFINGERS, + MT_USB_DEVICE(USB_VENDOR_ID_GENERAL_TOUCH, + USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PWT_TENFINGERS) }, /* Gametel game controller */ { .driver_data = MT_CLS_DEFAULT, @@ -1087,11 +1123,13 @@ static struct hid_driver mt_driver = { .remove = mt_remove, .input_mapping = mt_input_mapping, .input_mapped = mt_input_mapped, + .input_configured = mt_input_configured, .feature_mapping = mt_feature_mapping, .usage_table = mt_grabbed_usages, .event = mt_event, #ifdef CONFIG_PM .reset_resume = mt_reset_resume, + .resume = mt_resume, #endif }; diff --git a/drivers/hid/hid-ntrig.c b/drivers/hid/hid-ntrig.c index 9fae2ebdd75..86a969f6329 100644 --- a/drivers/hid/hid-ntrig.c +++ b/drivers/hid/hid-ntrig.c @@ -882,10 +882,10 @@ static int ntrig_probe(struct hid_device *hdev, const struct hid_device_id *id) nd->activate_slack = activate_slack; nd->act_state = activate_slack; nd->deactivate_slack = -deactivate_slack; - nd->sensor_logical_width = 0; - nd->sensor_logical_height = 0; - nd->sensor_physical_width = 0; - nd->sensor_physical_height = 0; + nd->sensor_logical_width = 1; + nd->sensor_logical_height = 1; + nd->sensor_physical_width = 1; + nd->sensor_physical_height = 1; hid_set_drvdata(hdev, nd); diff --git a/drivers/hid/hid-petalynx.c b/drivers/hid/hid-petalynx.c index f1ea3ff8a98..4c521de4e7e 100644 --- a/drivers/hid/hid-petalynx.c +++ b/drivers/hid/hid-petalynx.c @@ -5,7 +5,6 @@ * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina - * Copyright (c) 2007 Paul Walmsley * Copyright (c) 2008 Jiri Slaby */ diff --git a/drivers/hid/hid-picolcd.c b/drivers/hid/hid-picolcd.c deleted file mode 100644 index 27c8ebdfad0..00000000000 --- a/drivers/hid/hid-picolcd.c +++ /dev/null @@ -1,2748 +0,0 @@ -/*************************************************************************** - * Copyright (C) 2010 by Bruno Prémont <bonbons@linux-vserver.org> * - * * - * Based on Logitech G13 driver (v0.4) * - * Copyright (C) 2009 by Rick L. Vinyard, Jr. <rvinyard@cs.nmsu.edu> * - * * - * This program is free software: you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation, version 2 of the License. * - * * - * This driver is distributed in the hope that it will be useful, but * - * WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * - * General Public License for more details. * - * * - * You should have received a copy of the GNU General Public License * - * along with this software. If not see <http://www.gnu.org/licenses/>. * - ***************************************************************************/ - -#include <linux/hid.h> -#include <linux/hid-debug.h> -#include <linux/input.h> -#include "hid-ids.h" -#include "usbhid/usbhid.h" -#include <linux/usb.h> - -#include <linux/fb.h> -#include <linux/vmalloc.h> -#include <linux/backlight.h> -#include <linux/lcd.h> - -#include <linux/leds.h> - -#include <linux/seq_file.h> -#include <linux/debugfs.h> - -#include <linux/completion.h> -#include <linux/uaccess.h> -#include <linux/module.h> - -#define PICOLCD_NAME "PicoLCD (graphic)" - -/* Report numbers */ -#define REPORT_ERROR_CODE 0x10 /* LCD: IN[16] */ -#define ERR_SUCCESS 0x00 -#define ERR_PARAMETER_MISSING 0x01 -#define ERR_DATA_MISSING 0x02 -#define ERR_BLOCK_READ_ONLY 0x03 -#define ERR_BLOCK_NOT_ERASABLE 0x04 -#define ERR_BLOCK_TOO_BIG 0x05 -#define ERR_SECTION_OVERFLOW 0x06 -#define ERR_INVALID_CMD_LEN 0x07 -#define ERR_INVALID_DATA_LEN 0x08 -#define REPORT_KEY_STATE 0x11 /* LCD: IN[2] */ -#define REPORT_IR_DATA 0x21 /* LCD: IN[63] */ -#define REPORT_EE_DATA 0x32 /* LCD: IN[63] */ -#define REPORT_MEMORY 0x41 /* LCD: IN[63] */ -#define REPORT_LED_STATE 0x81 /* LCD: OUT[1] */ -#define REPORT_BRIGHTNESS 0x91 /* LCD: OUT[1] */ -#define REPORT_CONTRAST 0x92 /* LCD: OUT[1] */ -#define REPORT_RESET 0x93 /* LCD: OUT[2] */ -#define REPORT_LCD_CMD 0x94 /* LCD: OUT[63] */ -#define REPORT_LCD_DATA 0x95 /* LCD: OUT[63] */ -#define REPORT_LCD_CMD_DATA 0x96 /* LCD: OUT[63] */ -#define REPORT_EE_READ 0xa3 /* LCD: OUT[63] */ -#define REPORT_EE_WRITE 0xa4 /* LCD: OUT[63] */ -#define REPORT_ERASE_MEMORY 0xb2 /* LCD: OUT[2] */ -#define REPORT_READ_MEMORY 0xb3 /* LCD: OUT[3] */ -#define REPORT_WRITE_MEMORY 0xb4 /* LCD: OUT[63] */ -#define REPORT_SPLASH_RESTART 0xc1 /* LCD: OUT[1] */ -#define REPORT_EXIT_KEYBOARD 0xef /* LCD: OUT[2] */ -#define REPORT_VERSION 0xf1 /* LCD: IN[2],OUT[1] Bootloader: IN[2],OUT[1] */ -#define REPORT_BL_ERASE_MEMORY 0xf2 /* Bootloader: IN[36],OUT[4] */ -#define REPORT_BL_READ_MEMORY 0xf3 /* Bootloader: IN[36],OUT[4] */ -#define REPORT_BL_WRITE_MEMORY 0xf4 /* Bootloader: IN[36],OUT[36] */ -#define REPORT_DEVID 0xf5 /* LCD: IN[5], OUT[1] Bootloader: IN[5],OUT[1] */ -#define REPORT_SPLASH_SIZE 0xf6 /* LCD: IN[4], OUT[1] */ -#define REPORT_HOOK_VERSION 0xf7 /* LCD: IN[2], OUT[1] */ -#define REPORT_EXIT_FLASHER 0xff /* Bootloader: OUT[2] */ - -#ifdef CONFIG_HID_PICOLCD_FB -/* Framebuffer - * - * The PicoLCD use a Topway LCD module of 256x64 pixel - * This display area is tiled over 4 controllers with 8 tiles - * each. Each tile has 8x64 pixel, each data byte representing - * a 1-bit wide vertical line of the tile. - * - * The display can be updated at a tile granularity. - * - * Chip 1 Chip 2 Chip 3 Chip 4 - * +----------------+----------------+----------------+----------------+ - * | Tile 1 | Tile 1 | Tile 1 | Tile 1 | - * +----------------+----------------+----------------+----------------+ - * | Tile 2 | Tile 2 | Tile 2 | Tile 2 | - * +----------------+----------------+----------------+----------------+ - * ... - * +----------------+----------------+----------------+----------------+ - * | Tile 8 | Tile 8 | Tile 8 | Tile 8 | - * +----------------+----------------+----------------+----------------+ - */ -#define PICOLCDFB_NAME "picolcdfb" -#define PICOLCDFB_WIDTH (256) -#define PICOLCDFB_HEIGHT (64) -#define PICOLCDFB_SIZE (PICOLCDFB_WIDTH * PICOLCDFB_HEIGHT / 8) - -#define PICOLCDFB_UPDATE_RATE_LIMIT 10 -#define PICOLCDFB_UPDATE_RATE_DEFAULT 2 - -/* Framebuffer visual structures */ -static const struct fb_fix_screeninfo picolcdfb_fix = { - .id = PICOLCDFB_NAME, - .type = FB_TYPE_PACKED_PIXELS, - .visual = FB_VISUAL_MONO01, - .xpanstep = 0, - .ypanstep = 0, - .ywrapstep = 0, - .line_length = PICOLCDFB_WIDTH / 8, - .accel = FB_ACCEL_NONE, -}; - -static const struct fb_var_screeninfo picolcdfb_var = { - .xres = PICOLCDFB_WIDTH, - .yres = PICOLCDFB_HEIGHT, - .xres_virtual = PICOLCDFB_WIDTH, - .yres_virtual = PICOLCDFB_HEIGHT, - .width = 103, - .height = 26, - .bits_per_pixel = 1, - .grayscale = 1, - .red = { - .offset = 0, - .length = 1, - .msb_right = 0, - }, - .green = { - .offset = 0, - .length = 1, - .msb_right = 0, - }, - .blue = { - .offset = 0, - .length = 1, - .msb_right = 0, - }, - .transp = { - .offset = 0, - .length = 0, - .msb_right = 0, - }, -}; -#endif /* CONFIG_HID_PICOLCD_FB */ - -/* Input device - * - * The PicoLCD has an IR receiver header, a built-in keypad with 5 keys - * and header for 4x4 key matrix. The built-in keys are part of the matrix. - */ -static const unsigned short def_keymap[] = { - KEY_RESERVED, /* none */ - KEY_BACK, /* col 4 + row 1 */ - KEY_HOMEPAGE, /* col 3 + row 1 */ - KEY_RESERVED, /* col 2 + row 1 */ - KEY_RESERVED, /* col 1 + row 1 */ - KEY_SCROLLUP, /* col 4 + row 2 */ - KEY_OK, /* col 3 + row 2 */ - KEY_SCROLLDOWN, /* col 2 + row 2 */ - KEY_RESERVED, /* col 1 + row 2 */ - KEY_RESERVED, /* col 4 + row 3 */ - KEY_RESERVED, /* col 3 + row 3 */ - KEY_RESERVED, /* col 2 + row 3 */ - KEY_RESERVED, /* col 1 + row 3 */ - KEY_RESERVED, /* col 4 + row 4 */ - KEY_RESERVED, /* col 3 + row 4 */ - KEY_RESERVED, /* col 2 + row 4 */ - KEY_RESERVED, /* col 1 + row 4 */ -}; -#define PICOLCD_KEYS ARRAY_SIZE(def_keymap) - -/* Description of in-progress IO operation, used for operations - * that trigger response from device */ -struct picolcd_pending { - struct hid_report *out_report; - struct hid_report *in_report; - struct completion ready; - int raw_size; - u8 raw_data[64]; -}; - -/* Per device data structure */ -struct picolcd_data { - struct hid_device *hdev; -#ifdef CONFIG_DEBUG_FS - struct dentry *debug_reset; - struct dentry *debug_eeprom; - struct dentry *debug_flash; - struct mutex mutex_flash; - int addr_sz; -#endif - u8 version[2]; - unsigned short opmode_delay; - /* input stuff */ - u8 pressed_keys[2]; - struct input_dev *input_keys; - struct input_dev *input_cir; - unsigned short keycode[PICOLCD_KEYS]; - -#ifdef CONFIG_HID_PICOLCD_FB - /* Framebuffer stuff */ - u8 fb_update_rate; - u8 fb_bpp; - u8 fb_force; - u8 *fb_vbitmap; /* local copy of what was sent to PicoLCD */ - u8 *fb_bitmap; /* framebuffer */ - struct fb_info *fb_info; - struct fb_deferred_io fb_defio; -#endif /* CONFIG_HID_PICOLCD_FB */ -#ifdef CONFIG_HID_PICOLCD_LCD - struct lcd_device *lcd; - u8 lcd_contrast; -#endif /* CONFIG_HID_PICOLCD_LCD */ -#ifdef CONFIG_HID_PICOLCD_BACKLIGHT - struct backlight_device *backlight; - u8 lcd_brightness; - u8 lcd_power; -#endif /* CONFIG_HID_PICOLCD_BACKLIGHT */ -#ifdef CONFIG_HID_PICOLCD_LEDS - /* LED stuff */ - u8 led_state; - struct led_classdev *led[8]; -#endif /* CONFIG_HID_PICOLCD_LEDS */ - - /* Housekeeping stuff */ - spinlock_t lock; - struct mutex mutex; - struct picolcd_pending *pending; - int status; -#define PICOLCD_BOOTLOADER 1 -#define PICOLCD_FAILED 2 -#define PICOLCD_READY_FB 4 -}; - - -/* Find a given report */ -#define picolcd_in_report(id, dev) picolcd_report(id, dev, HID_INPUT_REPORT) -#define picolcd_out_report(id, dev) picolcd_report(id, dev, HID_OUTPUT_REPORT) - -static struct hid_report *picolcd_report(int id, struct hid_device *hdev, int dir) -{ - struct list_head *feature_report_list = &hdev->report_enum[dir].report_list; - struct hid_report *report = NULL; - - list_for_each_entry(report, feature_report_list, list) { - if (report->id == id) - return report; - } - hid_warn(hdev, "No report with id 0x%x found\n", id); - return NULL; -} - -#ifdef CONFIG_DEBUG_FS -static void picolcd_debug_out_report(struct picolcd_data *data, - struct hid_device *hdev, struct hid_report *report); -#define usbhid_submit_report(a, b, c) \ - do { \ - picolcd_debug_out_report(hid_get_drvdata(a), a, b); \ - usbhid_submit_report(a, b, c); \ - } while (0) -#endif - -/* Submit a report and wait for a reply from device - if device fades away - * or does not respond in time, return NULL */ -static struct picolcd_pending *picolcd_send_and_wait(struct hid_device *hdev, - int report_id, const u8 *raw_data, int size) -{ - struct picolcd_data *data = hid_get_drvdata(hdev); - struct picolcd_pending *work; - struct hid_report *report = picolcd_out_report(report_id, hdev); - unsigned long flags; - int i, j, k; - - if (!report || !data) - return NULL; - if (data->status & PICOLCD_FAILED) - return NULL; - work = kzalloc(sizeof(*work), GFP_KERNEL); - if (!work) - return NULL; - - init_completion(&work->ready); - work->out_report = report; - work->in_report = NULL; - work->raw_size = 0; - - mutex_lock(&data->mutex); - spin_lock_irqsave(&data->lock, flags); - for (i = k = 0; i < report->maxfield; i++) - for (j = 0; j < report->field[i]->report_count; j++) { - hid_set_field(report->field[i], j, k < size ? raw_data[k] : 0); - k++; - } - data->pending = work; - usbhid_submit_report(data->hdev, report, USB_DIR_OUT); - spin_unlock_irqrestore(&data->lock, flags); - wait_for_completion_interruptible_timeout(&work->ready, HZ*2); - spin_lock_irqsave(&data->lock, flags); - data->pending = NULL; - spin_unlock_irqrestore(&data->lock, flags); - mutex_unlock(&data->mutex); - return work; -} - -#ifdef CONFIG_HID_PICOLCD_FB -/* Send a given tile to PicoLCD */ -static int picolcd_fb_send_tile(struct hid_device *hdev, int chip, int tile) -{ - struct picolcd_data *data = hid_get_drvdata(hdev); - struct hid_report *report1 = picolcd_out_report(REPORT_LCD_CMD_DATA, hdev); - struct hid_report *report2 = picolcd_out_report(REPORT_LCD_DATA, hdev); - unsigned long flags; - u8 *tdata; - int i; - - if (!report1 || report1->maxfield != 1 || !report2 || report2->maxfield != 1) - return -ENODEV; - - spin_lock_irqsave(&data->lock, flags); - hid_set_field(report1->field[0], 0, chip << 2); - hid_set_field(report1->field[0], 1, 0x02); - hid_set_field(report1->field[0], 2, 0x00); - hid_set_field(report1->field[0], 3, 0x00); - hid_set_field(report1->field[0], 4, 0xb8 | tile); - hid_set_field(report1->field[0], 5, 0x00); - hid_set_field(report1->field[0], 6, 0x00); - hid_set_field(report1->field[0], 7, 0x40); - hid_set_field(report1->field[0], 8, 0x00); - hid_set_field(report1->field[0], 9, 0x00); - hid_set_field(report1->field[0], 10, 32); - - hid_set_field(report2->field[0], 0, (chip << 2) | 0x01); - hid_set_field(report2->field[0], 1, 0x00); - hid_set_field(report2->field[0], 2, 0x00); - hid_set_field(report2->field[0], 3, 32); - - tdata = data->fb_vbitmap + (tile * 4 + chip) * 64; - for (i = 0; i < 64; i++) - if (i < 32) - hid_set_field(report1->field[0], 11 + i, tdata[i]); - else - hid_set_field(report2->field[0], 4 + i - 32, tdata[i]); - - usbhid_submit_report(data->hdev, report1, USB_DIR_OUT); - usbhid_submit_report(data->hdev, report2, USB_DIR_OUT); - spin_unlock_irqrestore(&data->lock, flags); - return 0; -} - -/* Translate a single tile*/ -static int picolcd_fb_update_tile(u8 *vbitmap, const u8 *bitmap, int bpp, - int chip, int tile) -{ - int i, b, changed = 0; - u8 tdata[64]; - u8 *vdata = vbitmap + (tile * 4 + chip) * 64; - - if (bpp == 1) { - for (b = 7; b >= 0; b--) { - const u8 *bdata = bitmap + tile * 256 + chip * 8 + b * 32; - for (i = 0; i < 64; i++) { - tdata[i] <<= 1; - tdata[i] |= (bdata[i/8] >> (i % 8)) & 0x01; - } - } - } else if (bpp == 8) { - for (b = 7; b >= 0; b--) { - const u8 *bdata = bitmap + (tile * 256 + chip * 8 + b * 32) * 8; - for (i = 0; i < 64; i++) { - tdata[i] <<= 1; - tdata[i] |= (bdata[i] & 0x80) ? 0x01 : 0x00; - } - } - } else { - /* Oops, we should never get here! */ - WARN_ON(1); - return 0; - } - - for (i = 0; i < 64; i++) - if (tdata[i] != vdata[i]) { - changed = 1; - vdata[i] = tdata[i]; - } - return changed; -} - -/* Reconfigure LCD display */ -static int picolcd_fb_reset(struct picolcd_data *data, int clear) -{ - struct hid_report *report = picolcd_out_report(REPORT_LCD_CMD, data->hdev); - int i, j; - unsigned long flags; - static const u8 mapcmd[8] = { 0x00, 0x02, 0x00, 0x64, 0x3f, 0x00, 0x64, 0xc0 }; - - if (!report || report->maxfield != 1) - return -ENODEV; - - spin_lock_irqsave(&data->lock, flags); - for (i = 0; i < 4; i++) { - for (j = 0; j < report->field[0]->maxusage; j++) - if (j == 0) - hid_set_field(report->field[0], j, i << 2); - else if (j < sizeof(mapcmd)) - hid_set_field(report->field[0], j, mapcmd[j]); - else - hid_set_field(report->field[0], j, 0); - usbhid_submit_report(data->hdev, report, USB_DIR_OUT); - } - - data->status |= PICOLCD_READY_FB; - spin_unlock_irqrestore(&data->lock, flags); - - if (data->fb_bitmap) { - if (clear) { - memset(data->fb_vbitmap, 0, PICOLCDFB_SIZE); - memset(data->fb_bitmap, 0, PICOLCDFB_SIZE*data->fb_bpp); - } - data->fb_force = 1; - } - - /* schedule first output of framebuffer */ - if (data->fb_info) - schedule_delayed_work(&data->fb_info->deferred_work, 0); - - return 0; -} - -/* Update fb_vbitmap from the screen_base and send changed tiles to device */ -static void picolcd_fb_update(struct picolcd_data *data) -{ - int chip, tile, n; - unsigned long flags; - - if (!data) - return; - - spin_lock_irqsave(&data->lock, flags); - if (!(data->status & PICOLCD_READY_FB)) { - spin_unlock_irqrestore(&data->lock, flags); - picolcd_fb_reset(data, 0); - } else { - spin_unlock_irqrestore(&data->lock, flags); - } - - /* - * Translate the framebuffer into the format needed by the PicoLCD. - * See display layout above. - * Do this one tile after the other and push those tiles that changed. - * - * Wait for our IO to complete as otherwise we might flood the queue! - */ - n = 0; - for (chip = 0; chip < 4; chip++) - for (tile = 0; tile < 8; tile++) - if (picolcd_fb_update_tile(data->fb_vbitmap, - data->fb_bitmap, data->fb_bpp, chip, tile) || - data->fb_force) { - n += 2; - if (!data->fb_info->par) - return; /* device lost! */ - if (n >= HID_OUTPUT_FIFO_SIZE / 2) { - usbhid_wait_io(data->hdev); - n = 0; - } - picolcd_fb_send_tile(data->hdev, chip, tile); - } - data->fb_force = false; - if (n) - usbhid_wait_io(data->hdev); -} - -/* Stub to call the system default and update the image on the picoLCD */ -static void picolcd_fb_fillrect(struct fb_info *info, - const struct fb_fillrect *rect) -{ - if (!info->par) - return; - sys_fillrect(info, rect); - - schedule_delayed_work(&info->deferred_work, 0); -} - -/* Stub to call the system default and update the image on the picoLCD */ -static void picolcd_fb_copyarea(struct fb_info *info, - const struct fb_copyarea *area) -{ - if (!info->par) - return; - sys_copyarea(info, area); - - schedule_delayed_work(&info->deferred_work, 0); -} - -/* Stub to call the system default and update the image on the picoLCD */ -static void picolcd_fb_imageblit(struct fb_info *info, const struct fb_image *image) -{ - if (!info->par) - return; - sys_imageblit(info, image); - - schedule_delayed_work(&info->deferred_work, 0); -} - -/* - * this is the slow path from userspace. they can seek and write to - * the fb. it's inefficient to do anything less than a full screen draw - */ -static ssize_t picolcd_fb_write(struct fb_info *info, const char __user *buf, - size_t count, loff_t *ppos) -{ - ssize_t ret; - if (!info->par) - return -ENODEV; - ret = fb_sys_write(info, buf, count, ppos); - if (ret >= 0) - schedule_delayed_work(&info->deferred_work, 0); - return ret; -} - -static int picolcd_fb_blank(int blank, struct fb_info *info) -{ - if (!info->par) - return -ENODEV; - /* We let fb notification do this for us via lcd/backlight device */ - return 0; -} - -static void picolcd_fb_destroy(struct fb_info *info) -{ - struct picolcd_data *data = info->par; - u32 *ref_cnt = info->pseudo_palette; - int may_release; - - info->par = NULL; - if (data) - data->fb_info = NULL; - fb_deferred_io_cleanup(info); - - ref_cnt--; - mutex_lock(&info->lock); - (*ref_cnt)--; - may_release = !*ref_cnt; - mutex_unlock(&info->lock); - if (may_release) { - vfree((u8 *)info->fix.smem_start); - framebuffer_release(info); - } -} - -static int picolcd_fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) -{ - __u32 bpp = var->bits_per_pixel; - __u32 activate = var->activate; - - /* only allow 1/8 bit depth (8-bit is grayscale) */ - *var = picolcdfb_var; - var->activate = activate; - if (bpp >= 8) { - var->bits_per_pixel = 8; - var->red.length = 8; - var->green.length = 8; - var->blue.length = 8; - } else { - var->bits_per_pixel = 1; - var->red.length = 1; - var->green.length = 1; - var->blue.length = 1; - } - return 0; -} - -static int picolcd_set_par(struct fb_info *info) -{ - struct picolcd_data *data = info->par; - u8 *tmp_fb, *o_fb; - if (!data) - return -ENODEV; - if (info->var.bits_per_pixel == data->fb_bpp) - return 0; - /* switch between 1/8 bit depths */ - if (info->var.bits_per_pixel != 1 && info->var.bits_per_pixel != 8) - return -EINVAL; - - o_fb = data->fb_bitmap; - tmp_fb = kmalloc(PICOLCDFB_SIZE*info->var.bits_per_pixel, GFP_KERNEL); - if (!tmp_fb) - return -ENOMEM; - - /* translate FB content to new bits-per-pixel */ - if (info->var.bits_per_pixel == 1) { - int i, b; - for (i = 0; i < PICOLCDFB_SIZE; i++) { - u8 p = 0; - for (b = 0; b < 8; b++) { - p <<= 1; - p |= o_fb[i*8+b] ? 0x01 : 0x00; - } - tmp_fb[i] = p; - } - memcpy(o_fb, tmp_fb, PICOLCDFB_SIZE); - info->fix.visual = FB_VISUAL_MONO01; - info->fix.line_length = PICOLCDFB_WIDTH / 8; - } else { - int i; - memcpy(tmp_fb, o_fb, PICOLCDFB_SIZE); - for (i = 0; i < PICOLCDFB_SIZE * 8; i++) - o_fb[i] = tmp_fb[i/8] & (0x01 << (7 - i % 8)) ? 0xff : 0x00; - info->fix.visual = FB_VISUAL_DIRECTCOLOR; - info->fix.line_length = PICOLCDFB_WIDTH; - } - - kfree(tmp_fb); - data->fb_bpp = info->var.bits_per_pixel; - return 0; -} - -/* Do refcounting on our FB and cleanup per worker if FB is - * closed after unplug of our device - * (fb_release holds info->lock and still touches info after - * we return so we can't release it immediately. - */ -struct picolcd_fb_cleanup_item { - struct fb_info *info; - struct picolcd_fb_cleanup_item *next; -}; -static struct picolcd_fb_cleanup_item *fb_pending; -static DEFINE_SPINLOCK(fb_pending_lock); - -static void picolcd_fb_do_cleanup(struct work_struct *data) -{ - struct picolcd_fb_cleanup_item *item; - unsigned long flags; - - do { - spin_lock_irqsave(&fb_pending_lock, flags); - item = fb_pending; - fb_pending = item ? item->next : NULL; - spin_unlock_irqrestore(&fb_pending_lock, flags); - - if (item) { - u8 *fb = (u8 *)item->info->fix.smem_start; - /* make sure we do not race against fb core when - * releasing */ - mutex_lock(&item->info->lock); - mutex_unlock(&item->info->lock); - framebuffer_release(item->info); - vfree(fb); - } - } while (item); -} - -static DECLARE_WORK(picolcd_fb_cleanup, picolcd_fb_do_cleanup); - -static int picolcd_fb_open(struct fb_info *info, int u) -{ - u32 *ref_cnt = info->pseudo_palette; - ref_cnt--; - - (*ref_cnt)++; - return 0; -} - -static int picolcd_fb_release(struct fb_info *info, int u) -{ - u32 *ref_cnt = info->pseudo_palette; - ref_cnt--; - - (*ref_cnt)++; - if (!*ref_cnt) { - unsigned long flags; - struct picolcd_fb_cleanup_item *item = (struct picolcd_fb_cleanup_item *)ref_cnt; - item--; - spin_lock_irqsave(&fb_pending_lock, flags); - item->next = fb_pending; - fb_pending = item; - spin_unlock_irqrestore(&fb_pending_lock, flags); - schedule_work(&picolcd_fb_cleanup); - } - return 0; -} - -/* Note this can't be const because of struct fb_info definition */ -static struct fb_ops picolcdfb_ops = { - .owner = THIS_MODULE, - .fb_destroy = picolcd_fb_destroy, - .fb_open = picolcd_fb_open, - .fb_release = picolcd_fb_release, - .fb_read = fb_sys_read, - .fb_write = picolcd_fb_write, - .fb_blank = picolcd_fb_blank, - .fb_fillrect = picolcd_fb_fillrect, - .fb_copyarea = picolcd_fb_copyarea, - .fb_imageblit = picolcd_fb_imageblit, - .fb_check_var = picolcd_fb_check_var, - .fb_set_par = picolcd_set_par, -}; - - -/* Callback from deferred IO workqueue */ -static void picolcd_fb_deferred_io(struct fb_info *info, struct list_head *pagelist) -{ - picolcd_fb_update(info->par); -} - -static const struct fb_deferred_io picolcd_fb_defio = { - .delay = HZ / PICOLCDFB_UPDATE_RATE_DEFAULT, - .deferred_io = picolcd_fb_deferred_io, -}; - - -/* - * The "fb_update_rate" sysfs attribute - */ -static ssize_t picolcd_fb_update_rate_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct picolcd_data *data = dev_get_drvdata(dev); - unsigned i, fb_update_rate = data->fb_update_rate; - size_t ret = 0; - - for (i = 1; i <= PICOLCDFB_UPDATE_RATE_LIMIT; i++) - if (ret >= PAGE_SIZE) - break; - else if (i == fb_update_rate) - ret += snprintf(buf+ret, PAGE_SIZE-ret, "[%u] ", i); - else - ret += snprintf(buf+ret, PAGE_SIZE-ret, "%u ", i); - if (ret > 0) - buf[min(ret, (size_t)PAGE_SIZE)-1] = '\n'; - return ret; -} - -static ssize_t picolcd_fb_update_rate_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct picolcd_data *data = dev_get_drvdata(dev); - int i; - unsigned u; - - if (count < 1 || count > 10) - return -EINVAL; - - i = sscanf(buf, "%u", &u); - if (i != 1) - return -EINVAL; - - if (u > PICOLCDFB_UPDATE_RATE_LIMIT) - return -ERANGE; - else if (u == 0) - u = PICOLCDFB_UPDATE_RATE_DEFAULT; - - data->fb_update_rate = u; - data->fb_defio.delay = HZ / data->fb_update_rate; - return count; -} - -static DEVICE_ATTR(fb_update_rate, 0666, picolcd_fb_update_rate_show, - picolcd_fb_update_rate_store); - -/* initialize Framebuffer device */ -static int picolcd_init_framebuffer(struct picolcd_data *data) -{ - struct device *dev = &data->hdev->dev; - struct fb_info *info = NULL; - int i, error = -ENOMEM; - u8 *fb_vbitmap = NULL; - u8 *fb_bitmap = NULL; - u32 *palette; - - fb_bitmap = vmalloc(PICOLCDFB_SIZE*8); - if (fb_bitmap == NULL) { - dev_err(dev, "can't get a free page for framebuffer\n"); - goto err_nomem; - } - - fb_vbitmap = kmalloc(PICOLCDFB_SIZE, GFP_KERNEL); - if (fb_vbitmap == NULL) { - dev_err(dev, "can't alloc vbitmap image buffer\n"); - goto err_nomem; - } - - data->fb_update_rate = PICOLCDFB_UPDATE_RATE_DEFAULT; - data->fb_defio = picolcd_fb_defio; - /* The extra memory is: - * - struct picolcd_fb_cleanup_item - * - u32 for ref_count - * - 256*u32 for pseudo_palette - */ - info = framebuffer_alloc(257 * sizeof(u32) + sizeof(struct picolcd_fb_cleanup_item), dev); - if (info == NULL) { - dev_err(dev, "failed to allocate a framebuffer\n"); - goto err_nomem; - } - - palette = info->par + sizeof(struct picolcd_fb_cleanup_item); - *palette = 1; - palette++; - for (i = 0; i < 256; i++) - palette[i] = i > 0 && i < 16 ? 0xff : 0; - info->pseudo_palette = palette; - info->fbdefio = &data->fb_defio; - info->screen_base = (char __force __iomem *)fb_bitmap; - info->fbops = &picolcdfb_ops; - info->var = picolcdfb_var; - info->fix = picolcdfb_fix; - info->fix.smem_len = PICOLCDFB_SIZE*8; - info->fix.smem_start = (unsigned long)fb_bitmap; - info->par = data; - info->flags = FBINFO_FLAG_DEFAULT; - - data->fb_vbitmap = fb_vbitmap; - data->fb_bitmap = fb_bitmap; - data->fb_bpp = picolcdfb_var.bits_per_pixel; - error = picolcd_fb_reset(data, 1); - if (error) { - dev_err(dev, "failed to configure display\n"); - goto err_cleanup; - } - error = device_create_file(dev, &dev_attr_fb_update_rate); - if (error) { - dev_err(dev, "failed to create sysfs attributes\n"); - goto err_cleanup; - } - fb_deferred_io_init(info); - data->fb_info = info; - error = register_framebuffer(info); - if (error) { - dev_err(dev, "failed to register framebuffer\n"); - goto err_sysfs; - } - /* schedule first output of framebuffer */ - data->fb_force = 1; - schedule_delayed_work(&info->deferred_work, 0); - return 0; - -err_sysfs: - fb_deferred_io_cleanup(info); - device_remove_file(dev, &dev_attr_fb_update_rate); -err_cleanup: - data->fb_vbitmap = NULL; - data->fb_bitmap = NULL; - data->fb_bpp = 0; - data->fb_info = NULL; - -err_nomem: - framebuffer_release(info); - vfree(fb_bitmap); - kfree(fb_vbitmap); - return error; -} - -static void picolcd_exit_framebuffer(struct picolcd_data *data) -{ - struct fb_info *info = data->fb_info; - u8 *fb_vbitmap = data->fb_vbitmap; - - if (!info) - return; - - info->par = NULL; - device_remove_file(&data->hdev->dev, &dev_attr_fb_update_rate); - unregister_framebuffer(info); - data->fb_vbitmap = NULL; - data->fb_bitmap = NULL; - data->fb_bpp = 0; - data->fb_info = NULL; - kfree(fb_vbitmap); -} - -#define picolcd_fbinfo(d) ((d)->fb_info) -#else -static inline int picolcd_fb_reset(struct picolcd_data *data, int clear) -{ - return 0; -} -static inline int picolcd_init_framebuffer(struct picolcd_data *data) -{ - return 0; -} -static inline void picolcd_exit_framebuffer(struct picolcd_data *data) -{ -} -#define picolcd_fbinfo(d) NULL -#endif /* CONFIG_HID_PICOLCD_FB */ - -#ifdef CONFIG_HID_PICOLCD_BACKLIGHT -/* - * backlight class device - */ -static int picolcd_get_brightness(struct backlight_device *bdev) -{ - struct picolcd_data *data = bl_get_data(bdev); - return data->lcd_brightness; -} - -static int picolcd_set_brightness(struct backlight_device *bdev) -{ - struct picolcd_data *data = bl_get_data(bdev); - struct hid_report *report = picolcd_out_report(REPORT_BRIGHTNESS, data->hdev); - unsigned long flags; - - if (!report || report->maxfield != 1 || report->field[0]->report_count != 1) - return -ENODEV; - - data->lcd_brightness = bdev->props.brightness & 0x0ff; - data->lcd_power = bdev->props.power; - spin_lock_irqsave(&data->lock, flags); - hid_set_field(report->field[0], 0, data->lcd_power == FB_BLANK_UNBLANK ? data->lcd_brightness : 0); - usbhid_submit_report(data->hdev, report, USB_DIR_OUT); - spin_unlock_irqrestore(&data->lock, flags); - return 0; -} - -static int picolcd_check_bl_fb(struct backlight_device *bdev, struct fb_info *fb) -{ - return fb && fb == picolcd_fbinfo((struct picolcd_data *)bl_get_data(bdev)); -} - -static const struct backlight_ops picolcd_blops = { - .update_status = picolcd_set_brightness, - .get_brightness = picolcd_get_brightness, - .check_fb = picolcd_check_bl_fb, -}; - -static int picolcd_init_backlight(struct picolcd_data *data, struct hid_report *report) -{ - struct device *dev = &data->hdev->dev; - struct backlight_device *bdev; - struct backlight_properties props; - if (!report) - return -ENODEV; - if (report->maxfield != 1 || report->field[0]->report_count != 1 || - report->field[0]->report_size != 8) { - dev_err(dev, "unsupported BRIGHTNESS report"); - return -EINVAL; - } - - memset(&props, 0, sizeof(props)); - props.type = BACKLIGHT_RAW; - props.max_brightness = 0xff; - bdev = backlight_device_register(dev_name(dev), dev, data, - &picolcd_blops, &props); - if (IS_ERR(bdev)) { - dev_err(dev, "failed to register backlight\n"); - return PTR_ERR(bdev); - } - bdev->props.brightness = 0xff; - data->lcd_brightness = 0xff; - data->backlight = bdev; - picolcd_set_brightness(bdev); - return 0; -} - -static void picolcd_exit_backlight(struct picolcd_data *data) -{ - struct backlight_device *bdev = data->backlight; - - data->backlight = NULL; - if (bdev) - backlight_device_unregister(bdev); -} - -static inline int picolcd_resume_backlight(struct picolcd_data *data) -{ - if (!data->backlight) - return 0; - return picolcd_set_brightness(data->backlight); -} - -#ifdef CONFIG_PM -static void picolcd_suspend_backlight(struct picolcd_data *data) -{ - int bl_power = data->lcd_power; - if (!data->backlight) - return; - - data->backlight->props.power = FB_BLANK_POWERDOWN; - picolcd_set_brightness(data->backlight); - data->lcd_power = data->backlight->props.power = bl_power; -} -#endif /* CONFIG_PM */ -#else -static inline int picolcd_init_backlight(struct picolcd_data *data, - struct hid_report *report) -{ - return 0; -} -static inline void picolcd_exit_backlight(struct picolcd_data *data) -{ -} -static inline int picolcd_resume_backlight(struct picolcd_data *data) -{ - return 0; -} -static inline void picolcd_suspend_backlight(struct picolcd_data *data) -{ -} -#endif /* CONFIG_HID_PICOLCD_BACKLIGHT */ - -#ifdef CONFIG_HID_PICOLCD_LCD -/* - * lcd class device - */ -static int picolcd_get_contrast(struct lcd_device *ldev) -{ - struct picolcd_data *data = lcd_get_data(ldev); - return data->lcd_contrast; -} - -static int picolcd_set_contrast(struct lcd_device *ldev, int contrast) -{ - struct picolcd_data *data = lcd_get_data(ldev); - struct hid_report *report = picolcd_out_report(REPORT_CONTRAST, data->hdev); - unsigned long flags; - - if (!report || report->maxfield != 1 || report->field[0]->report_count != 1) - return -ENODEV; - - data->lcd_contrast = contrast & 0x0ff; - spin_lock_irqsave(&data->lock, flags); - hid_set_field(report->field[0], 0, data->lcd_contrast); - usbhid_submit_report(data->hdev, report, USB_DIR_OUT); - spin_unlock_irqrestore(&data->lock, flags); - return 0; -} - -static int picolcd_check_lcd_fb(struct lcd_device *ldev, struct fb_info *fb) -{ - return fb && fb == picolcd_fbinfo((struct picolcd_data *)lcd_get_data(ldev)); -} - -static struct lcd_ops picolcd_lcdops = { - .get_contrast = picolcd_get_contrast, - .set_contrast = picolcd_set_contrast, - .check_fb = picolcd_check_lcd_fb, -}; - -static int picolcd_init_lcd(struct picolcd_data *data, struct hid_report *report) -{ - struct device *dev = &data->hdev->dev; - struct lcd_device *ldev; - - if (!report) - return -ENODEV; - if (report->maxfield != 1 || report->field[0]->report_count != 1 || - report->field[0]->report_size != 8) { - dev_err(dev, "unsupported CONTRAST report"); - return -EINVAL; - } - - ldev = lcd_device_register(dev_name(dev), dev, data, &picolcd_lcdops); - if (IS_ERR(ldev)) { - dev_err(dev, "failed to register LCD\n"); - return PTR_ERR(ldev); - } - ldev->props.max_contrast = 0x0ff; - data->lcd_contrast = 0xe5; - data->lcd = ldev; - picolcd_set_contrast(ldev, 0xe5); - return 0; -} - -static void picolcd_exit_lcd(struct picolcd_data *data) -{ - struct lcd_device *ldev = data->lcd; - - data->lcd = NULL; - if (ldev) - lcd_device_unregister(ldev); -} - -static inline int picolcd_resume_lcd(struct picolcd_data *data) -{ - if (!data->lcd) - return 0; - return picolcd_set_contrast(data->lcd, data->lcd_contrast); -} -#else -static inline int picolcd_init_lcd(struct picolcd_data *data, - struct hid_report *report) -{ - return 0; -} -static inline void picolcd_exit_lcd(struct picolcd_data *data) -{ -} -static inline int picolcd_resume_lcd(struct picolcd_data *data) -{ - return 0; -} -#endif /* CONFIG_HID_PICOLCD_LCD */ - -#ifdef CONFIG_HID_PICOLCD_LEDS -/** - * LED class device - */ -static void picolcd_leds_set(struct picolcd_data *data) -{ - struct hid_report *report; - unsigned long flags; - - if (!data->led[0]) - return; - report = picolcd_out_report(REPORT_LED_STATE, data->hdev); - if (!report || report->maxfield != 1 || report->field[0]->report_count != 1) - return; - - spin_lock_irqsave(&data->lock, flags); - hid_set_field(report->field[0], 0, data->led_state); - usbhid_submit_report(data->hdev, report, USB_DIR_OUT); - spin_unlock_irqrestore(&data->lock, flags); -} - -static void picolcd_led_set_brightness(struct led_classdev *led_cdev, - enum led_brightness value) -{ - struct device *dev; - struct hid_device *hdev; - struct picolcd_data *data; - int i, state = 0; - - dev = led_cdev->dev->parent; - hdev = container_of(dev, struct hid_device, dev); - data = hid_get_drvdata(hdev); - for (i = 0; i < 8; i++) { - if (led_cdev != data->led[i]) - continue; - state = (data->led_state >> i) & 1; - if (value == LED_OFF && state) { - data->led_state &= ~(1 << i); - picolcd_leds_set(data); - } else if (value != LED_OFF && !state) { - data->led_state |= 1 << i; - picolcd_leds_set(data); - } - break; - } -} - -static enum led_brightness picolcd_led_get_brightness(struct led_classdev *led_cdev) -{ - struct device *dev; - struct hid_device *hdev; - struct picolcd_data *data; - int i, value = 0; - - dev = led_cdev->dev->parent; - hdev = container_of(dev, struct hid_device, dev); - data = hid_get_drvdata(hdev); - for (i = 0; i < 8; i++) - if (led_cdev == data->led[i]) { - value = (data->led_state >> i) & 1; - break; - } - return value ? LED_FULL : LED_OFF; -} - -static int picolcd_init_leds(struct picolcd_data *data, struct hid_report *report) -{ - struct device *dev = &data->hdev->dev; - struct led_classdev *led; - size_t name_sz = strlen(dev_name(dev)) + 8; - char *name; - int i, ret = 0; - - if (!report) - return -ENODEV; - if (report->maxfield != 1 || report->field[0]->report_count != 1 || - report->field[0]->report_size != 8) { - dev_err(dev, "unsupported LED_STATE report"); - return -EINVAL; - } - - for (i = 0; i < 8; i++) { - led = kzalloc(sizeof(struct led_classdev)+name_sz, GFP_KERNEL); - if (!led) { - dev_err(dev, "can't allocate memory for LED %d\n", i); - ret = -ENOMEM; - goto err; - } - name = (void *)(&led[1]); - snprintf(name, name_sz, "%s::GPO%d", dev_name(dev), i); - led->name = name; - led->brightness = 0; - led->max_brightness = 1; - led->brightness_get = picolcd_led_get_brightness; - led->brightness_set = picolcd_led_set_brightness; - - data->led[i] = led; - ret = led_classdev_register(dev, data->led[i]); - if (ret) { - data->led[i] = NULL; - kfree(led); - dev_err(dev, "can't register LED %d\n", i); - goto err; - } - } - return 0; -err: - for (i = 0; i < 8; i++) - if (data->led[i]) { - led = data->led[i]; - data->led[i] = NULL; - led_classdev_unregister(led); - kfree(led); - } - return ret; -} - -static void picolcd_exit_leds(struct picolcd_data *data) -{ - struct led_classdev *led; - int i; - - for (i = 0; i < 8; i++) { - led = data->led[i]; - data->led[i] = NULL; - if (!led) - continue; - led_classdev_unregister(led); - kfree(led); - } -} - -#else -static inline int picolcd_init_leds(struct picolcd_data *data, - struct hid_report *report) -{ - return 0; -} -static inline void picolcd_exit_leds(struct picolcd_data *data) -{ -} -static inline int picolcd_leds_set(struct picolcd_data *data) -{ - return 0; -} -#endif /* CONFIG_HID_PICOLCD_LEDS */ - -/* - * input class device - */ -static int picolcd_raw_keypad(struct picolcd_data *data, - struct hid_report *report, u8 *raw_data, int size) -{ - /* - * Keypad event - * First and second data bytes list currently pressed keys, - * 0x00 means no key and at most 2 keys may be pressed at same time - */ - int i, j; - - /* determine newly pressed keys */ - for (i = 0; i < size; i++) { - unsigned int key_code; - if (raw_data[i] == 0) - continue; - for (j = 0; j < sizeof(data->pressed_keys); j++) - if (data->pressed_keys[j] == raw_data[i]) - goto key_already_down; - for (j = 0; j < sizeof(data->pressed_keys); j++) - if (data->pressed_keys[j] == 0) { - data->pressed_keys[j] = raw_data[i]; - break; - } - input_event(data->input_keys, EV_MSC, MSC_SCAN, raw_data[i]); - if (raw_data[i] < PICOLCD_KEYS) - key_code = data->keycode[raw_data[i]]; - else - key_code = KEY_UNKNOWN; - if (key_code != KEY_UNKNOWN) { - dbg_hid(PICOLCD_NAME " got key press for %u:%d", - raw_data[i], key_code); - input_report_key(data->input_keys, key_code, 1); - } - input_sync(data->input_keys); -key_already_down: - continue; - } - - /* determine newly released keys */ - for (j = 0; j < sizeof(data->pressed_keys); j++) { - unsigned int key_code; - if (data->pressed_keys[j] == 0) - continue; - for (i = 0; i < size; i++) - if (data->pressed_keys[j] == raw_data[i]) - goto key_still_down; - input_event(data->input_keys, EV_MSC, MSC_SCAN, data->pressed_keys[j]); - if (data->pressed_keys[j] < PICOLCD_KEYS) - key_code = data->keycode[data->pressed_keys[j]]; - else - key_code = KEY_UNKNOWN; - if (key_code != KEY_UNKNOWN) { - dbg_hid(PICOLCD_NAME " got key release for %u:%d", - data->pressed_keys[j], key_code); - input_report_key(data->input_keys, key_code, 0); - } - input_sync(data->input_keys); - data->pressed_keys[j] = 0; -key_still_down: - continue; - } - return 1; -} - -static int picolcd_raw_cir(struct picolcd_data *data, - struct hid_report *report, u8 *raw_data, int size) -{ - /* Need understanding of CIR data format to implement ... */ - return 1; -} - -static int picolcd_check_version(struct hid_device *hdev) -{ - struct picolcd_data *data = hid_get_drvdata(hdev); - struct picolcd_pending *verinfo; - int ret = 0; - - if (!data) - return -ENODEV; - - verinfo = picolcd_send_and_wait(hdev, REPORT_VERSION, NULL, 0); - if (!verinfo) { - hid_err(hdev, "no version response from PicoLCD\n"); - return -ENODEV; - } - - if (verinfo->raw_size == 2) { - data->version[0] = verinfo->raw_data[1]; - data->version[1] = verinfo->raw_data[0]; - if (data->status & PICOLCD_BOOTLOADER) { - hid_info(hdev, "PicoLCD, bootloader version %d.%d\n", - verinfo->raw_data[1], verinfo->raw_data[0]); - } else { - hid_info(hdev, "PicoLCD, firmware version %d.%d\n", - verinfo->raw_data[1], verinfo->raw_data[0]); - } - } else { - hid_err(hdev, "confused, got unexpected version response from PicoLCD\n"); - ret = -EINVAL; - } - kfree(verinfo); - return ret; -} - -/* - * Reset our device and wait for answer to VERSION request - */ -static int picolcd_reset(struct hid_device *hdev) -{ - struct picolcd_data *data = hid_get_drvdata(hdev); - struct hid_report *report = picolcd_out_report(REPORT_RESET, hdev); - unsigned long flags; - int error; - - if (!data || !report || report->maxfield != 1) - return -ENODEV; - - spin_lock_irqsave(&data->lock, flags); - if (hdev->product == USB_DEVICE_ID_PICOLCD_BOOTLOADER) - data->status |= PICOLCD_BOOTLOADER; - - /* perform the reset */ - hid_set_field(report->field[0], 0, 1); - usbhid_submit_report(hdev, report, USB_DIR_OUT); - spin_unlock_irqrestore(&data->lock, flags); - - error = picolcd_check_version(hdev); - if (error) - return error; - - picolcd_resume_lcd(data); - picolcd_resume_backlight(data); -#ifdef CONFIG_HID_PICOLCD_FB - if (data->fb_info) - schedule_delayed_work(&data->fb_info->deferred_work, 0); -#endif /* CONFIG_HID_PICOLCD_FB */ - - picolcd_leds_set(data); - return 0; -} - -/* - * The "operation_mode" sysfs attribute - */ -static ssize_t picolcd_operation_mode_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct picolcd_data *data = dev_get_drvdata(dev); - - if (data->status & PICOLCD_BOOTLOADER) - return snprintf(buf, PAGE_SIZE, "[bootloader] lcd\n"); - else - return snprintf(buf, PAGE_SIZE, "bootloader [lcd]\n"); -} - -static ssize_t picolcd_operation_mode_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct picolcd_data *data = dev_get_drvdata(dev); - struct hid_report *report = NULL; - size_t cnt = count; - int timeout = data->opmode_delay; - unsigned long flags; - - if (cnt >= 3 && strncmp("lcd", buf, 3) == 0) { - if (data->status & PICOLCD_BOOTLOADER) - report = picolcd_out_report(REPORT_EXIT_FLASHER, data->hdev); - buf += 3; - cnt -= 3; - } else if (cnt >= 10 && strncmp("bootloader", buf, 10) == 0) { - if (!(data->status & PICOLCD_BOOTLOADER)) - report = picolcd_out_report(REPORT_EXIT_KEYBOARD, data->hdev); - buf += 10; - cnt -= 10; - } - if (!report) - return -EINVAL; - - while (cnt > 0 && (buf[cnt-1] == '\n' || buf[cnt-1] == '\r')) - cnt--; - if (cnt != 0) - return -EINVAL; - - spin_lock_irqsave(&data->lock, flags); - hid_set_field(report->field[0], 0, timeout & 0xff); - hid_set_field(report->field[0], 1, (timeout >> 8) & 0xff); - usbhid_submit_report(data->hdev, report, USB_DIR_OUT); - spin_unlock_irqrestore(&data->lock, flags); - return count; -} - -static DEVICE_ATTR(operation_mode, 0644, picolcd_operation_mode_show, - picolcd_operation_mode_store); - -/* - * The "operation_mode_delay" sysfs attribute - */ -static ssize_t picolcd_operation_mode_delay_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct picolcd_data *data = dev_get_drvdata(dev); - - return snprintf(buf, PAGE_SIZE, "%hu\n", data->opmode_delay); -} - -static ssize_t picolcd_operation_mode_delay_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct picolcd_data *data = dev_get_drvdata(dev); - unsigned u; - if (sscanf(buf, "%u", &u) != 1) - return -EINVAL; - if (u > 30000) - return -EINVAL; - else - data->opmode_delay = u; - return count; -} - -static DEVICE_ATTR(operation_mode_delay, 0644, picolcd_operation_mode_delay_show, - picolcd_operation_mode_delay_store); - - -#ifdef CONFIG_DEBUG_FS -/* - * The "reset" file - */ -static int picolcd_debug_reset_show(struct seq_file *f, void *p) -{ - if (picolcd_fbinfo((struct picolcd_data *)f->private)) - seq_printf(f, "all fb\n"); - else - seq_printf(f, "all\n"); - return 0; -} - -static int picolcd_debug_reset_open(struct inode *inode, struct file *f) -{ - return single_open(f, picolcd_debug_reset_show, inode->i_private); -} - -static ssize_t picolcd_debug_reset_write(struct file *f, const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct picolcd_data *data = ((struct seq_file *)f->private_data)->private; - char buf[32]; - size_t cnt = min(count, sizeof(buf)-1); - if (copy_from_user(buf, user_buf, cnt)) - return -EFAULT; - - while (cnt > 0 && (buf[cnt-1] == ' ' || buf[cnt-1] == '\n')) - cnt--; - buf[cnt] = '\0'; - if (strcmp(buf, "all") == 0) { - picolcd_reset(data->hdev); - picolcd_fb_reset(data, 1); - } else if (strcmp(buf, "fb") == 0) { - picolcd_fb_reset(data, 1); - } else { - return -EINVAL; - } - return count; -} - -static const struct file_operations picolcd_debug_reset_fops = { - .owner = THIS_MODULE, - .open = picolcd_debug_reset_open, - .read = seq_read, - .llseek = seq_lseek, - .write = picolcd_debug_reset_write, - .release = single_release, -}; - -/* - * The "eeprom" file - */ -static ssize_t picolcd_debug_eeprom_read(struct file *f, char __user *u, - size_t s, loff_t *off) -{ - struct picolcd_data *data = f->private_data; - struct picolcd_pending *resp; - u8 raw_data[3]; - ssize_t ret = -EIO; - - if (s == 0) - return -EINVAL; - if (*off > 0x0ff) - return 0; - - /* prepare buffer with info about what we want to read (addr & len) */ - raw_data[0] = *off & 0xff; - raw_data[1] = (*off >> 8) & 0xff; - raw_data[2] = s < 20 ? s : 20; - if (*off + raw_data[2] > 0xff) - raw_data[2] = 0x100 - *off; - resp = picolcd_send_and_wait(data->hdev, REPORT_EE_READ, raw_data, - sizeof(raw_data)); - if (!resp) - return -EIO; - - if (resp->in_report && resp->in_report->id == REPORT_EE_DATA) { - /* successful read :) */ - ret = resp->raw_data[2]; - if (ret > s) - ret = s; - if (copy_to_user(u, resp->raw_data+3, ret)) - ret = -EFAULT; - else - *off += ret; - } /* anything else is some kind of IO error */ - - kfree(resp); - return ret; -} - -static ssize_t picolcd_debug_eeprom_write(struct file *f, const char __user *u, - size_t s, loff_t *off) -{ - struct picolcd_data *data = f->private_data; - struct picolcd_pending *resp; - ssize_t ret = -EIO; - u8 raw_data[23]; - - if (s == 0) - return -EINVAL; - if (*off > 0x0ff) - return -ENOSPC; - - memset(raw_data, 0, sizeof(raw_data)); - raw_data[0] = *off & 0xff; - raw_data[1] = (*off >> 8) & 0xff; - raw_data[2] = min((size_t)20, s); - if (*off + raw_data[2] > 0xff) - raw_data[2] = 0x100 - *off; - - if (copy_from_user(raw_data+3, u, min((u8)20, raw_data[2]))) - return -EFAULT; - resp = picolcd_send_and_wait(data->hdev, REPORT_EE_WRITE, raw_data, - sizeof(raw_data)); - - if (!resp) - return -EIO; - - if (resp->in_report && resp->in_report->id == REPORT_EE_DATA) { - /* check if written data matches */ - if (memcmp(raw_data, resp->raw_data, 3+raw_data[2]) == 0) { - *off += raw_data[2]; - ret = raw_data[2]; - } - } - kfree(resp); - return ret; -} - -/* - * Notes: - * - read/write happens in chunks of at most 20 bytes, it's up to userspace - * to loop in order to get more data. - * - on write errors on otherwise correct write request the bytes - * that should have been written are in undefined state. - */ -static const struct file_operations picolcd_debug_eeprom_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = picolcd_debug_eeprom_read, - .write = picolcd_debug_eeprom_write, - .llseek = generic_file_llseek, -}; - -/* - * The "flash" file - */ -/* record a flash address to buf (bounds check to be done by caller) */ -static int _picolcd_flash_setaddr(struct picolcd_data *data, u8 *buf, long off) -{ - buf[0] = off & 0xff; - buf[1] = (off >> 8) & 0xff; - if (data->addr_sz == 3) - buf[2] = (off >> 16) & 0xff; - return data->addr_sz == 2 ? 2 : 3; -} - -/* read a given size of data (bounds check to be done by caller) */ -static ssize_t _picolcd_flash_read(struct picolcd_data *data, int report_id, - char __user *u, size_t s, loff_t *off) -{ - struct picolcd_pending *resp; - u8 raw_data[4]; - ssize_t ret = 0; - int len_off, err = -EIO; - - while (s > 0) { - err = -EIO; - len_off = _picolcd_flash_setaddr(data, raw_data, *off); - raw_data[len_off] = s > 32 ? 32 : s; - resp = picolcd_send_and_wait(data->hdev, report_id, raw_data, len_off+1); - if (!resp || !resp->in_report) - goto skip; - if (resp->in_report->id == REPORT_MEMORY || - resp->in_report->id == REPORT_BL_READ_MEMORY) { - if (memcmp(raw_data, resp->raw_data, len_off+1) != 0) - goto skip; - if (copy_to_user(u+ret, resp->raw_data+len_off+1, raw_data[len_off])) { - err = -EFAULT; - goto skip; - } - *off += raw_data[len_off]; - s -= raw_data[len_off]; - ret += raw_data[len_off]; - err = 0; - } -skip: - kfree(resp); - if (err) - return ret > 0 ? ret : err; - } - return ret; -} - -static ssize_t picolcd_debug_flash_read(struct file *f, char __user *u, - size_t s, loff_t *off) -{ - struct picolcd_data *data = f->private_data; - - if (s == 0) - return -EINVAL; - if (*off > 0x05fff) - return 0; - if (*off + s > 0x05fff) - s = 0x06000 - *off; - - if (data->status & PICOLCD_BOOTLOADER) - return _picolcd_flash_read(data, REPORT_BL_READ_MEMORY, u, s, off); - else - return _picolcd_flash_read(data, REPORT_READ_MEMORY, u, s, off); -} - -/* erase block aligned to 64bytes boundary */ -static ssize_t _picolcd_flash_erase64(struct picolcd_data *data, int report_id, - loff_t *off) -{ - struct picolcd_pending *resp; - u8 raw_data[3]; - int len_off; - ssize_t ret = -EIO; - - if (*off & 0x3f) - return -EINVAL; - - len_off = _picolcd_flash_setaddr(data, raw_data, *off); - resp = picolcd_send_and_wait(data->hdev, report_id, raw_data, len_off); - if (!resp || !resp->in_report) - goto skip; - if (resp->in_report->id == REPORT_MEMORY || - resp->in_report->id == REPORT_BL_ERASE_MEMORY) { - if (memcmp(raw_data, resp->raw_data, len_off) != 0) - goto skip; - ret = 0; - } -skip: - kfree(resp); - return ret; -} - -/* write a given size of data (bounds check to be done by caller) */ -static ssize_t _picolcd_flash_write(struct picolcd_data *data, int report_id, - const char __user *u, size_t s, loff_t *off) -{ - struct picolcd_pending *resp; - u8 raw_data[36]; - ssize_t ret = 0; - int len_off, err = -EIO; - - while (s > 0) { - err = -EIO; - len_off = _picolcd_flash_setaddr(data, raw_data, *off); - raw_data[len_off] = s > 32 ? 32 : s; - if (copy_from_user(raw_data+len_off+1, u, raw_data[len_off])) { - err = -EFAULT; - break; - } - resp = picolcd_send_and_wait(data->hdev, report_id, raw_data, - len_off+1+raw_data[len_off]); - if (!resp || !resp->in_report) - goto skip; - if (resp->in_report->id == REPORT_MEMORY || - resp->in_report->id == REPORT_BL_WRITE_MEMORY) { - if (memcmp(raw_data, resp->raw_data, len_off+1+raw_data[len_off]) != 0) - goto skip; - *off += raw_data[len_off]; - s -= raw_data[len_off]; - ret += raw_data[len_off]; - err = 0; - } -skip: - kfree(resp); - if (err) - break; - } - return ret > 0 ? ret : err; -} - -static ssize_t picolcd_debug_flash_write(struct file *f, const char __user *u, - size_t s, loff_t *off) -{ - struct picolcd_data *data = f->private_data; - ssize_t err, ret = 0; - int report_erase, report_write; - - if (s == 0) - return -EINVAL; - if (*off > 0x5fff) - return -ENOSPC; - if (s & 0x3f) - return -EINVAL; - if (*off & 0x3f) - return -EINVAL; - - if (data->status & PICOLCD_BOOTLOADER) { - report_erase = REPORT_BL_ERASE_MEMORY; - report_write = REPORT_BL_WRITE_MEMORY; - } else { - report_erase = REPORT_ERASE_MEMORY; - report_write = REPORT_WRITE_MEMORY; - } - mutex_lock(&data->mutex_flash); - while (s > 0) { - err = _picolcd_flash_erase64(data, report_erase, off); - if (err) - break; - err = _picolcd_flash_write(data, report_write, u, 64, off); - if (err < 0) - break; - ret += err; - *off += err; - s -= err; - if (err != 64) - break; - } - mutex_unlock(&data->mutex_flash); - return ret > 0 ? ret : err; -} - -/* - * Notes: - * - concurrent writing is prevented by mutex and all writes must be - * n*64 bytes and 64-byte aligned, each write being preceded by an - * ERASE which erases a 64byte block. - * If less than requested was written or an error is returned for an - * otherwise correct write request the next 64-byte block which should - * have been written is in undefined state (mostly: original, erased, - * (half-)written with write error) - * - reading can happen without special restriction - */ -static const struct file_operations picolcd_debug_flash_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = picolcd_debug_flash_read, - .write = picolcd_debug_flash_write, - .llseek = generic_file_llseek, -}; - - -/* - * Helper code for HID report level dumping/debugging - */ -static const char *error_codes[] = { - "success", "parameter missing", "data_missing", "block readonly", - "block not erasable", "block too big", "section overflow", - "invalid command length", "invalid data length", -}; - -static void dump_buff_as_hex(char *dst, size_t dst_sz, const u8 *data, - const size_t data_len) -{ - int i, j; - for (i = j = 0; i < data_len && j + 3 < dst_sz; i++) { - dst[j++] = hex_asc[(data[i] >> 4) & 0x0f]; - dst[j++] = hex_asc[data[i] & 0x0f]; - dst[j++] = ' '; - } - if (j < dst_sz) { - dst[j--] = '\0'; - dst[j] = '\n'; - } else - dst[j] = '\0'; -} - -static void picolcd_debug_out_report(struct picolcd_data *data, - struct hid_device *hdev, struct hid_report *report) -{ - u8 raw_data[70]; - int raw_size = (report->size >> 3) + 1; - char *buff; -#define BUFF_SZ 256 - - /* Avoid unnecessary overhead if debugfs is disabled */ - if (list_empty(&hdev->debug_list)) - return; - - buff = kmalloc(BUFF_SZ, GFP_ATOMIC); - if (!buff) - return; - - snprintf(buff, BUFF_SZ, "\nout report %d (size %d) = ", - report->id, raw_size); - hid_debug_event(hdev, buff); - if (raw_size + 5 > sizeof(raw_data)) { - kfree(buff); - hid_debug_event(hdev, " TOO BIG\n"); - return; - } else { - raw_data[0] = report->id; - hid_output_report(report, raw_data); - dump_buff_as_hex(buff, BUFF_SZ, raw_data, raw_size); - hid_debug_event(hdev, buff); - } - - switch (report->id) { - case REPORT_LED_STATE: - /* 1 data byte with GPO state */ - snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", - "REPORT_LED_STATE", report->id, raw_size-1); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tGPO state: 0x%02x\n", raw_data[1]); - hid_debug_event(hdev, buff); - break; - case REPORT_BRIGHTNESS: - /* 1 data byte with brightness */ - snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", - "REPORT_BRIGHTNESS", report->id, raw_size-1); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tBrightness: 0x%02x\n", raw_data[1]); - hid_debug_event(hdev, buff); - break; - case REPORT_CONTRAST: - /* 1 data byte with contrast */ - snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", - "REPORT_CONTRAST", report->id, raw_size-1); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tContrast: 0x%02x\n", raw_data[1]); - hid_debug_event(hdev, buff); - break; - case REPORT_RESET: - /* 2 data bytes with reset duration in ms */ - snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", - "REPORT_RESET", report->id, raw_size-1); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tDuration: 0x%02x%02x (%dms)\n", - raw_data[2], raw_data[1], raw_data[2] << 8 | raw_data[1]); - hid_debug_event(hdev, buff); - break; - case REPORT_LCD_CMD: - /* 63 data bytes with LCD commands */ - snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", - "REPORT_LCD_CMD", report->id, raw_size-1); - hid_debug_event(hdev, buff); - /* TODO: format decoding */ - break; - case REPORT_LCD_DATA: - /* 63 data bytes with LCD data */ - snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", - "REPORT_LCD_CMD", report->id, raw_size-1); - /* TODO: format decoding */ - hid_debug_event(hdev, buff); - break; - case REPORT_LCD_CMD_DATA: - /* 63 data bytes with LCD commands and data */ - snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", - "REPORT_LCD_CMD", report->id, raw_size-1); - /* TODO: format decoding */ - hid_debug_event(hdev, buff); - break; - case REPORT_EE_READ: - /* 3 data bytes with read area description */ - snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", - "REPORT_EE_READ", report->id, raw_size-1); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n", - raw_data[2], raw_data[1]); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]); - hid_debug_event(hdev, buff); - break; - case REPORT_EE_WRITE: - /* 3+1..20 data bytes with write area description */ - snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", - "REPORT_EE_WRITE", report->id, raw_size-1); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n", - raw_data[2], raw_data[1]); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]); - hid_debug_event(hdev, buff); - if (raw_data[3] == 0) { - snprintf(buff, BUFF_SZ, "\tNo data\n"); - } else if (raw_data[3] + 4 <= raw_size) { - snprintf(buff, BUFF_SZ, "\tData: "); - hid_debug_event(hdev, buff); - dump_buff_as_hex(buff, BUFF_SZ, raw_data+4, raw_data[3]); - } else { - snprintf(buff, BUFF_SZ, "\tData overflowed\n"); - } - hid_debug_event(hdev, buff); - break; - case REPORT_ERASE_MEMORY: - case REPORT_BL_ERASE_MEMORY: - /* 3 data bytes with pointer inside erase block */ - snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", - "REPORT_ERASE_MEMORY", report->id, raw_size-1); - hid_debug_event(hdev, buff); - switch (data->addr_sz) { - case 2: - snprintf(buff, BUFF_SZ, "\tAddress inside 64 byte block: 0x%02x%02x\n", - raw_data[2], raw_data[1]); - break; - case 3: - snprintf(buff, BUFF_SZ, "\tAddress inside 64 byte block: 0x%02x%02x%02x\n", - raw_data[3], raw_data[2], raw_data[1]); - break; - default: - snprintf(buff, BUFF_SZ, "\tNot supported\n"); - } - hid_debug_event(hdev, buff); - break; - case REPORT_READ_MEMORY: - case REPORT_BL_READ_MEMORY: - /* 4 data bytes with read area description */ - snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", - "REPORT_READ_MEMORY", report->id, raw_size-1); - hid_debug_event(hdev, buff); - switch (data->addr_sz) { - case 2: - snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n", - raw_data[2], raw_data[1]); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]); - break; - case 3: - snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x%02x\n", - raw_data[3], raw_data[2], raw_data[1]); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[4]); - break; - default: - snprintf(buff, BUFF_SZ, "\tNot supported\n"); - } - hid_debug_event(hdev, buff); - break; - case REPORT_WRITE_MEMORY: - case REPORT_BL_WRITE_MEMORY: - /* 4+1..32 data bytes with write adrea description */ - snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", - "REPORT_WRITE_MEMORY", report->id, raw_size-1); - hid_debug_event(hdev, buff); - switch (data->addr_sz) { - case 2: - snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n", - raw_data[2], raw_data[1]); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]); - hid_debug_event(hdev, buff); - if (raw_data[3] == 0) { - snprintf(buff, BUFF_SZ, "\tNo data\n"); - } else if (raw_data[3] + 4 <= raw_size) { - snprintf(buff, BUFF_SZ, "\tData: "); - hid_debug_event(hdev, buff); - dump_buff_as_hex(buff, BUFF_SZ, raw_data+4, raw_data[3]); - } else { - snprintf(buff, BUFF_SZ, "\tData overflowed\n"); - } - break; - case 3: - snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x%02x\n", - raw_data[3], raw_data[2], raw_data[1]); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[4]); - hid_debug_event(hdev, buff); - if (raw_data[4] == 0) { - snprintf(buff, BUFF_SZ, "\tNo data\n"); - } else if (raw_data[4] + 5 <= raw_size) { - snprintf(buff, BUFF_SZ, "\tData: "); - hid_debug_event(hdev, buff); - dump_buff_as_hex(buff, BUFF_SZ, raw_data+5, raw_data[4]); - } else { - snprintf(buff, BUFF_SZ, "\tData overflowed\n"); - } - break; - default: - snprintf(buff, BUFF_SZ, "\tNot supported\n"); - } - hid_debug_event(hdev, buff); - break; - case REPORT_SPLASH_RESTART: - /* TODO */ - break; - case REPORT_EXIT_KEYBOARD: - snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", - "REPORT_EXIT_KEYBOARD", report->id, raw_size-1); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tRestart delay: %dms (0x%02x%02x)\n", - raw_data[1] | (raw_data[2] << 8), - raw_data[2], raw_data[1]); - hid_debug_event(hdev, buff); - break; - case REPORT_VERSION: - snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", - "REPORT_VERSION", report->id, raw_size-1); - hid_debug_event(hdev, buff); - break; - case REPORT_DEVID: - snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", - "REPORT_DEVID", report->id, raw_size-1); - hid_debug_event(hdev, buff); - break; - case REPORT_SPLASH_SIZE: - snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", - "REPORT_SPLASH_SIZE", report->id, raw_size-1); - hid_debug_event(hdev, buff); - break; - case REPORT_HOOK_VERSION: - snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", - "REPORT_HOOK_VERSION", report->id, raw_size-1); - hid_debug_event(hdev, buff); - break; - case REPORT_EXIT_FLASHER: - snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", - "REPORT_VERSION", report->id, raw_size-1); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tRestart delay: %dms (0x%02x%02x)\n", - raw_data[1] | (raw_data[2] << 8), - raw_data[2], raw_data[1]); - hid_debug_event(hdev, buff); - break; - default: - snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", - "<unknown>", report->id, raw_size-1); - hid_debug_event(hdev, buff); - break; - } - wake_up_interruptible(&hdev->debug_wait); - kfree(buff); -} - -static void picolcd_debug_raw_event(struct picolcd_data *data, - struct hid_device *hdev, struct hid_report *report, - u8 *raw_data, int size) -{ - char *buff; - -#define BUFF_SZ 256 - /* Avoid unnecessary overhead if debugfs is disabled */ - if (!hdev->debug_events) - return; - - buff = kmalloc(BUFF_SZ, GFP_ATOMIC); - if (!buff) - return; - - switch (report->id) { - case REPORT_ERROR_CODE: - /* 2 data bytes with affected report and error code */ - snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", - "REPORT_ERROR_CODE", report->id, size-1); - hid_debug_event(hdev, buff); - if (raw_data[2] < ARRAY_SIZE(error_codes)) - snprintf(buff, BUFF_SZ, "\tError code 0x%02x (%s) in reply to report 0x%02x\n", - raw_data[2], error_codes[raw_data[2]], raw_data[1]); - else - snprintf(buff, BUFF_SZ, "\tError code 0x%02x in reply to report 0x%02x\n", - raw_data[2], raw_data[1]); - hid_debug_event(hdev, buff); - break; - case REPORT_KEY_STATE: - /* 2 data bytes with key state */ - snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", - "REPORT_KEY_STATE", report->id, size-1); - hid_debug_event(hdev, buff); - if (raw_data[1] == 0) - snprintf(buff, BUFF_SZ, "\tNo key pressed\n"); - else if (raw_data[2] == 0) - snprintf(buff, BUFF_SZ, "\tOne key pressed: 0x%02x (%d)\n", - raw_data[1], raw_data[1]); - else - snprintf(buff, BUFF_SZ, "\tTwo keys pressed: 0x%02x (%d), 0x%02x (%d)\n", - raw_data[1], raw_data[1], raw_data[2], raw_data[2]); - hid_debug_event(hdev, buff); - break; - case REPORT_IR_DATA: - /* Up to 20 byes of IR scancode data */ - snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", - "REPORT_IR_DATA", report->id, size-1); - hid_debug_event(hdev, buff); - if (raw_data[1] == 0) { - snprintf(buff, BUFF_SZ, "\tUnexpectedly 0 data length\n"); - hid_debug_event(hdev, buff); - } else if (raw_data[1] + 1 <= size) { - snprintf(buff, BUFF_SZ, "\tData length: %d\n\tIR Data: ", - raw_data[1]-1); - hid_debug_event(hdev, buff); - dump_buff_as_hex(buff, BUFF_SZ, raw_data+2, raw_data[1]-1); - hid_debug_event(hdev, buff); - } else { - snprintf(buff, BUFF_SZ, "\tOverflowing data length: %d\n", - raw_data[1]-1); - hid_debug_event(hdev, buff); - } - break; - case REPORT_EE_DATA: - /* Data buffer in response to REPORT_EE_READ or REPORT_EE_WRITE */ - snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", - "REPORT_EE_DATA", report->id, size-1); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n", - raw_data[2], raw_data[1]); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]); - hid_debug_event(hdev, buff); - if (raw_data[3] == 0) { - snprintf(buff, BUFF_SZ, "\tNo data\n"); - hid_debug_event(hdev, buff); - } else if (raw_data[3] + 4 <= size) { - snprintf(buff, BUFF_SZ, "\tData: "); - hid_debug_event(hdev, buff); - dump_buff_as_hex(buff, BUFF_SZ, raw_data+4, raw_data[3]); - hid_debug_event(hdev, buff); - } else { - snprintf(buff, BUFF_SZ, "\tData overflowed\n"); - hid_debug_event(hdev, buff); - } - break; - case REPORT_MEMORY: - /* Data buffer in response to REPORT_READ_MEMORY or REPORT_WRTIE_MEMORY */ - snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", - "REPORT_MEMORY", report->id, size-1); - hid_debug_event(hdev, buff); - switch (data->addr_sz) { - case 2: - snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n", - raw_data[2], raw_data[1]); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]); - hid_debug_event(hdev, buff); - if (raw_data[3] == 0) { - snprintf(buff, BUFF_SZ, "\tNo data\n"); - } else if (raw_data[3] + 4 <= size) { - snprintf(buff, BUFF_SZ, "\tData: "); - hid_debug_event(hdev, buff); - dump_buff_as_hex(buff, BUFF_SZ, raw_data+4, raw_data[3]); - } else { - snprintf(buff, BUFF_SZ, "\tData overflowed\n"); - } - break; - case 3: - snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x%02x\n", - raw_data[3], raw_data[2], raw_data[1]); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[4]); - hid_debug_event(hdev, buff); - if (raw_data[4] == 0) { - snprintf(buff, BUFF_SZ, "\tNo data\n"); - } else if (raw_data[4] + 5 <= size) { - snprintf(buff, BUFF_SZ, "\tData: "); - hid_debug_event(hdev, buff); - dump_buff_as_hex(buff, BUFF_SZ, raw_data+5, raw_data[4]); - } else { - snprintf(buff, BUFF_SZ, "\tData overflowed\n"); - } - break; - default: - snprintf(buff, BUFF_SZ, "\tNot supported\n"); - } - hid_debug_event(hdev, buff); - break; - case REPORT_VERSION: - snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", - "REPORT_VERSION", report->id, size-1); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tFirmware version: %d.%d\n", - raw_data[2], raw_data[1]); - hid_debug_event(hdev, buff); - break; - case REPORT_BL_ERASE_MEMORY: - snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", - "REPORT_BL_ERASE_MEMORY", report->id, size-1); - hid_debug_event(hdev, buff); - /* TODO */ - break; - case REPORT_BL_READ_MEMORY: - snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", - "REPORT_BL_READ_MEMORY", report->id, size-1); - hid_debug_event(hdev, buff); - /* TODO */ - break; - case REPORT_BL_WRITE_MEMORY: - snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", - "REPORT_BL_WRITE_MEMORY", report->id, size-1); - hid_debug_event(hdev, buff); - /* TODO */ - break; - case REPORT_DEVID: - snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", - "REPORT_DEVID", report->id, size-1); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tSerial: 0x%02x%02x%02x%02x\n", - raw_data[1], raw_data[2], raw_data[3], raw_data[4]); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tType: 0x%02x\n", - raw_data[5]); - hid_debug_event(hdev, buff); - break; - case REPORT_SPLASH_SIZE: - snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", - "REPORT_SPLASH_SIZE", report->id, size-1); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tTotal splash space: %d\n", - (raw_data[2] << 8) | raw_data[1]); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tUsed splash space: %d\n", - (raw_data[4] << 8) | raw_data[3]); - hid_debug_event(hdev, buff); - break; - case REPORT_HOOK_VERSION: - snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", - "REPORT_HOOK_VERSION", report->id, size-1); - hid_debug_event(hdev, buff); - snprintf(buff, BUFF_SZ, "\tFirmware version: %d.%d\n", - raw_data[1], raw_data[2]); - hid_debug_event(hdev, buff); - break; - default: - snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", - "<unknown>", report->id, size-1); - hid_debug_event(hdev, buff); - break; - } - wake_up_interruptible(&hdev->debug_wait); - kfree(buff); -} - -static void picolcd_init_devfs(struct picolcd_data *data, - struct hid_report *eeprom_r, struct hid_report *eeprom_w, - struct hid_report *flash_r, struct hid_report *flash_w, - struct hid_report *reset) -{ - struct hid_device *hdev = data->hdev; - - mutex_init(&data->mutex_flash); - - /* reset */ - if (reset) - data->debug_reset = debugfs_create_file("reset", 0600, - hdev->debug_dir, data, &picolcd_debug_reset_fops); - - /* eeprom */ - if (eeprom_r || eeprom_w) - data->debug_eeprom = debugfs_create_file("eeprom", - (eeprom_w ? S_IWUSR : 0) | (eeprom_r ? S_IRUSR : 0), - hdev->debug_dir, data, &picolcd_debug_eeprom_fops); - - /* flash */ - if (flash_r && flash_r->maxfield == 1 && flash_r->field[0]->report_size == 8) - data->addr_sz = flash_r->field[0]->report_count - 1; - else - data->addr_sz = -1; - if (data->addr_sz == 2 || data->addr_sz == 3) { - data->debug_flash = debugfs_create_file("flash", - (flash_w ? S_IWUSR : 0) | (flash_r ? S_IRUSR : 0), - hdev->debug_dir, data, &picolcd_debug_flash_fops); - } else if (flash_r || flash_w) - hid_warn(hdev, "Unexpected FLASH access reports, please submit rdesc for review\n"); -} - -static void picolcd_exit_devfs(struct picolcd_data *data) -{ - struct dentry *dent; - - dent = data->debug_reset; - data->debug_reset = NULL; - if (dent) - debugfs_remove(dent); - dent = data->debug_eeprom; - data->debug_eeprom = NULL; - if (dent) - debugfs_remove(dent); - dent = data->debug_flash; - data->debug_flash = NULL; - if (dent) - debugfs_remove(dent); - mutex_destroy(&data->mutex_flash); -} -#else -static inline void picolcd_debug_raw_event(struct picolcd_data *data, - struct hid_device *hdev, struct hid_report *report, - u8 *raw_data, int size) -{ -} -static inline void picolcd_init_devfs(struct picolcd_data *data, - struct hid_report *eeprom_r, struct hid_report *eeprom_w, - struct hid_report *flash_r, struct hid_report *flash_w, - struct hid_report *reset) -{ -} -static inline void picolcd_exit_devfs(struct picolcd_data *data) -{ -} -#endif /* CONFIG_DEBUG_FS */ - -/* - * Handle raw report as sent by device - */ -static int picolcd_raw_event(struct hid_device *hdev, - struct hid_report *report, u8 *raw_data, int size) -{ - struct picolcd_data *data = hid_get_drvdata(hdev); - unsigned long flags; - int ret = 0; - - if (!data) - return 1; - - if (report->id == REPORT_KEY_STATE) { - if (data->input_keys) - ret = picolcd_raw_keypad(data, report, raw_data+1, size-1); - } else if (report->id == REPORT_IR_DATA) { - if (data->input_cir) - ret = picolcd_raw_cir(data, report, raw_data+1, size-1); - } else { - spin_lock_irqsave(&data->lock, flags); - /* - * We let the caller of picolcd_send_and_wait() check if the - * report we got is one of the expected ones or not. - */ - if (data->pending) { - memcpy(data->pending->raw_data, raw_data+1, size-1); - data->pending->raw_size = size-1; - data->pending->in_report = report; - complete(&data->pending->ready); - } - spin_unlock_irqrestore(&data->lock, flags); - } - - picolcd_debug_raw_event(data, hdev, report, raw_data, size); - return 1; -} - -#ifdef CONFIG_PM -static int picolcd_suspend(struct hid_device *hdev, pm_message_t message) -{ - if (PMSG_IS_AUTO(message)) - return 0; - - picolcd_suspend_backlight(hid_get_drvdata(hdev)); - dbg_hid(PICOLCD_NAME " device ready for suspend\n"); - return 0; -} - -static int picolcd_resume(struct hid_device *hdev) -{ - int ret; - ret = picolcd_resume_backlight(hid_get_drvdata(hdev)); - if (ret) - dbg_hid(PICOLCD_NAME " restoring backlight failed: %d\n", ret); - return 0; -} - -static int picolcd_reset_resume(struct hid_device *hdev) -{ - int ret; - ret = picolcd_reset(hdev); - if (ret) - dbg_hid(PICOLCD_NAME " resetting our device failed: %d\n", ret); - ret = picolcd_fb_reset(hid_get_drvdata(hdev), 0); - if (ret) - dbg_hid(PICOLCD_NAME " restoring framebuffer content failed: %d\n", ret); - ret = picolcd_resume_lcd(hid_get_drvdata(hdev)); - if (ret) - dbg_hid(PICOLCD_NAME " restoring lcd failed: %d\n", ret); - ret = picolcd_resume_backlight(hid_get_drvdata(hdev)); - if (ret) - dbg_hid(PICOLCD_NAME " restoring backlight failed: %d\n", ret); - picolcd_leds_set(hid_get_drvdata(hdev)); - return 0; -} -#endif - -/* initialize keypad input device */ -static int picolcd_init_keys(struct picolcd_data *data, - struct hid_report *report) -{ - struct hid_device *hdev = data->hdev; - struct input_dev *idev; - int error, i; - - if (!report) - return -ENODEV; - if (report->maxfield != 1 || report->field[0]->report_count != 2 || - report->field[0]->report_size != 8) { - hid_err(hdev, "unsupported KEY_STATE report\n"); - return -EINVAL; - } - - idev = input_allocate_device(); - if (idev == NULL) { - hid_err(hdev, "failed to allocate input device\n"); - return -ENOMEM; - } - input_set_drvdata(idev, hdev); - memcpy(data->keycode, def_keymap, sizeof(def_keymap)); - idev->name = hdev->name; - idev->phys = hdev->phys; - idev->uniq = hdev->uniq; - idev->id.bustype = hdev->bus; - idev->id.vendor = hdev->vendor; - idev->id.product = hdev->product; - idev->id.version = hdev->version; - idev->dev.parent = hdev->dev.parent; - idev->keycode = &data->keycode; - idev->keycodemax = PICOLCD_KEYS; - idev->keycodesize = sizeof(data->keycode[0]); - input_set_capability(idev, EV_MSC, MSC_SCAN); - set_bit(EV_REP, idev->evbit); - for (i = 0; i < PICOLCD_KEYS; i++) - input_set_capability(idev, EV_KEY, data->keycode[i]); - error = input_register_device(idev); - if (error) { - hid_err(hdev, "error registering the input device\n"); - input_free_device(idev); - return error; - } - data->input_keys = idev; - return 0; -} - -static void picolcd_exit_keys(struct picolcd_data *data) -{ - struct input_dev *idev = data->input_keys; - - data->input_keys = NULL; - if (idev) - input_unregister_device(idev); -} - -/* initialize CIR input device */ -static inline int picolcd_init_cir(struct picolcd_data *data, struct hid_report *report) -{ - /* support not implemented yet */ - return 0; -} - -static inline void picolcd_exit_cir(struct picolcd_data *data) -{ -} - -static int picolcd_probe_lcd(struct hid_device *hdev, struct picolcd_data *data) -{ - int error; - - error = picolcd_check_version(hdev); - if (error) - return error; - - if (data->version[0] != 0 && data->version[1] != 3) - hid_info(hdev, "Device with untested firmware revision, please submit /sys/kernel/debug/hid/%s/rdesc for this device.\n", - dev_name(&hdev->dev)); - - /* Setup keypad input device */ - error = picolcd_init_keys(data, picolcd_in_report(REPORT_KEY_STATE, hdev)); - if (error) - goto err; - - /* Setup CIR input device */ - error = picolcd_init_cir(data, picolcd_in_report(REPORT_IR_DATA, hdev)); - if (error) - goto err; - - /* Set up the framebuffer device */ - error = picolcd_init_framebuffer(data); - if (error) - goto err; - - /* Setup lcd class device */ - error = picolcd_init_lcd(data, picolcd_out_report(REPORT_CONTRAST, hdev)); - if (error) - goto err; - - /* Setup backlight class device */ - error = picolcd_init_backlight(data, picolcd_out_report(REPORT_BRIGHTNESS, hdev)); - if (error) - goto err; - - /* Setup the LED class devices */ - error = picolcd_init_leds(data, picolcd_out_report(REPORT_LED_STATE, hdev)); - if (error) - goto err; - - picolcd_init_devfs(data, picolcd_out_report(REPORT_EE_READ, hdev), - picolcd_out_report(REPORT_EE_WRITE, hdev), - picolcd_out_report(REPORT_READ_MEMORY, hdev), - picolcd_out_report(REPORT_WRITE_MEMORY, hdev), - picolcd_out_report(REPORT_RESET, hdev)); - return 0; -err: - picolcd_exit_leds(data); - picolcd_exit_backlight(data); - picolcd_exit_lcd(data); - picolcd_exit_framebuffer(data); - picolcd_exit_cir(data); - picolcd_exit_keys(data); - return error; -} - -static int picolcd_probe_bootloader(struct hid_device *hdev, struct picolcd_data *data) -{ - int error; - - error = picolcd_check_version(hdev); - if (error) - return error; - - if (data->version[0] != 1 && data->version[1] != 0) - hid_info(hdev, "Device with untested bootloader revision, please submit /sys/kernel/debug/hid/%s/rdesc for this device.\n", - dev_name(&hdev->dev)); - - picolcd_init_devfs(data, NULL, NULL, - picolcd_out_report(REPORT_BL_READ_MEMORY, hdev), - picolcd_out_report(REPORT_BL_WRITE_MEMORY, hdev), NULL); - return 0; -} - -static int picolcd_probe(struct hid_device *hdev, - const struct hid_device_id *id) -{ - struct picolcd_data *data; - int error = -ENOMEM; - - dbg_hid(PICOLCD_NAME " hardware probe...\n"); - - /* - * Let's allocate the picolcd data structure, set some reasonable - * defaults, and associate it with the device - */ - data = kzalloc(sizeof(struct picolcd_data), GFP_KERNEL); - if (data == NULL) { - hid_err(hdev, "can't allocate space for Minibox PicoLCD device data\n"); - error = -ENOMEM; - goto err_no_cleanup; - } - - spin_lock_init(&data->lock); - mutex_init(&data->mutex); - data->hdev = hdev; - data->opmode_delay = 5000; - if (hdev->product == USB_DEVICE_ID_PICOLCD_BOOTLOADER) - data->status |= PICOLCD_BOOTLOADER; - hid_set_drvdata(hdev, data); - - /* Parse the device reports and start it up */ - error = hid_parse(hdev); - if (error) { - hid_err(hdev, "device report parse failed\n"); - goto err_cleanup_data; - } - - error = hid_hw_start(hdev, 0); - if (error) { - hid_err(hdev, "hardware start failed\n"); - goto err_cleanup_data; - } - - error = hid_hw_open(hdev); - if (error) { - hid_err(hdev, "failed to open input interrupt pipe for key and IR events\n"); - goto err_cleanup_hid_hw; - } - - error = device_create_file(&hdev->dev, &dev_attr_operation_mode_delay); - if (error) { - hid_err(hdev, "failed to create sysfs attributes\n"); - goto err_cleanup_hid_ll; - } - - error = device_create_file(&hdev->dev, &dev_attr_operation_mode); - if (error) { - hid_err(hdev, "failed to create sysfs attributes\n"); - goto err_cleanup_sysfs1; - } - - if (data->status & PICOLCD_BOOTLOADER) - error = picolcd_probe_bootloader(hdev, data); - else - error = picolcd_probe_lcd(hdev, data); - if (error) - goto err_cleanup_sysfs2; - - dbg_hid(PICOLCD_NAME " activated and initialized\n"); - return 0; - -err_cleanup_sysfs2: - device_remove_file(&hdev->dev, &dev_attr_operation_mode); -err_cleanup_sysfs1: - device_remove_file(&hdev->dev, &dev_attr_operation_mode_delay); -err_cleanup_hid_ll: - hid_hw_close(hdev); -err_cleanup_hid_hw: - hid_hw_stop(hdev); -err_cleanup_data: - kfree(data); -err_no_cleanup: - hid_set_drvdata(hdev, NULL); - - return error; -} - -static void picolcd_remove(struct hid_device *hdev) -{ - struct picolcd_data *data = hid_get_drvdata(hdev); - unsigned long flags; - - dbg_hid(PICOLCD_NAME " hardware remove...\n"); - spin_lock_irqsave(&data->lock, flags); - data->status |= PICOLCD_FAILED; - spin_unlock_irqrestore(&data->lock, flags); -#ifdef CONFIG_HID_PICOLCD_FB - /* short-circuit FB as early as possible in order to - * avoid long delays if we host console. - */ - if (data->fb_info) - data->fb_info->par = NULL; -#endif - - picolcd_exit_devfs(data); - device_remove_file(&hdev->dev, &dev_attr_operation_mode); - device_remove_file(&hdev->dev, &dev_attr_operation_mode_delay); - hid_hw_close(hdev); - hid_hw_stop(hdev); - hid_set_drvdata(hdev, NULL); - - /* Shortcut potential pending reply that will never arrive */ - spin_lock_irqsave(&data->lock, flags); - if (data->pending) - complete(&data->pending->ready); - spin_unlock_irqrestore(&data->lock, flags); - - /* Cleanup LED */ - picolcd_exit_leds(data); - /* Clean up the framebuffer */ - picolcd_exit_backlight(data); - picolcd_exit_lcd(data); - picolcd_exit_framebuffer(data); - /* Cleanup input */ - picolcd_exit_cir(data); - picolcd_exit_keys(data); - - mutex_destroy(&data->mutex); - /* Finally, clean up the picolcd data itself */ - kfree(data); -} - -static const struct hid_device_id picolcd_devices[] = { - { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD) }, - { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD_BOOTLOADER) }, - { } -}; -MODULE_DEVICE_TABLE(hid, picolcd_devices); - -static struct hid_driver picolcd_driver = { - .name = "hid-picolcd", - .id_table = picolcd_devices, - .probe = picolcd_probe, - .remove = picolcd_remove, - .raw_event = picolcd_raw_event, -#ifdef CONFIG_PM - .suspend = picolcd_suspend, - .resume = picolcd_resume, - .reset_resume = picolcd_reset_resume, -#endif -}; - -static int __init picolcd_init(void) -{ - return hid_register_driver(&picolcd_driver); -} - -static void __exit picolcd_exit(void) -{ - hid_unregister_driver(&picolcd_driver); -#ifdef CONFIG_HID_PICOLCD_FB - flush_work_sync(&picolcd_fb_cleanup); - WARN_ON(fb_pending); -#endif -} - -module_init(picolcd_init); -module_exit(picolcd_exit); -MODULE_DESCRIPTION("Minibox graphics PicoLCD Driver"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/hid/hid-picolcd.h b/drivers/hid/hid-picolcd.h new file mode 100644 index 00000000000..020cef69f6a --- /dev/null +++ b/drivers/hid/hid-picolcd.h @@ -0,0 +1,309 @@ +/*************************************************************************** + * Copyright (C) 2010-2012 by Bruno Prémont <bonbons@linux-vserver.org> * + * * + * Based on Logitech G13 driver (v0.4) * + * Copyright (C) 2009 by Rick L. Vinyard, Jr. <rvinyard@cs.nmsu.edu> * + * * + * This program is free software: you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation, version 2 of the License. * + * * + * This driver is distributed in the hope that it will be useful, but * + * WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * + * General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this software. If not see <http://www.gnu.org/licenses/>. * + ***************************************************************************/ + +#define PICOLCD_NAME "PicoLCD (graphic)" + +/* Report numbers */ +#define REPORT_ERROR_CODE 0x10 /* LCD: IN[16] */ +#define ERR_SUCCESS 0x00 +#define ERR_PARAMETER_MISSING 0x01 +#define ERR_DATA_MISSING 0x02 +#define ERR_BLOCK_READ_ONLY 0x03 +#define ERR_BLOCK_NOT_ERASABLE 0x04 +#define ERR_BLOCK_TOO_BIG 0x05 +#define ERR_SECTION_OVERFLOW 0x06 +#define ERR_INVALID_CMD_LEN 0x07 +#define ERR_INVALID_DATA_LEN 0x08 +#define REPORT_KEY_STATE 0x11 /* LCD: IN[2] */ +#define REPORT_IR_DATA 0x21 /* LCD: IN[63] */ +#define REPORT_EE_DATA 0x32 /* LCD: IN[63] */ +#define REPORT_MEMORY 0x41 /* LCD: IN[63] */ +#define REPORT_LED_STATE 0x81 /* LCD: OUT[1] */ +#define REPORT_BRIGHTNESS 0x91 /* LCD: OUT[1] */ +#define REPORT_CONTRAST 0x92 /* LCD: OUT[1] */ +#define REPORT_RESET 0x93 /* LCD: OUT[2] */ +#define REPORT_LCD_CMD 0x94 /* LCD: OUT[63] */ +#define REPORT_LCD_DATA 0x95 /* LCD: OUT[63] */ +#define REPORT_LCD_CMD_DATA 0x96 /* LCD: OUT[63] */ +#define REPORT_EE_READ 0xa3 /* LCD: OUT[63] */ +#define REPORT_EE_WRITE 0xa4 /* LCD: OUT[63] */ +#define REPORT_ERASE_MEMORY 0xb2 /* LCD: OUT[2] */ +#define REPORT_READ_MEMORY 0xb3 /* LCD: OUT[3] */ +#define REPORT_WRITE_MEMORY 0xb4 /* LCD: OUT[63] */ +#define REPORT_SPLASH_RESTART 0xc1 /* LCD: OUT[1] */ +#define REPORT_EXIT_KEYBOARD 0xef /* LCD: OUT[2] */ +#define REPORT_VERSION 0xf1 /* LCD: IN[2],OUT[1] Bootloader: IN[2],OUT[1] */ +#define REPORT_BL_ERASE_MEMORY 0xf2 /* Bootloader: IN[36],OUT[4] */ +#define REPORT_BL_READ_MEMORY 0xf3 /* Bootloader: IN[36],OUT[4] */ +#define REPORT_BL_WRITE_MEMORY 0xf4 /* Bootloader: IN[36],OUT[36] */ +#define REPORT_DEVID 0xf5 /* LCD: IN[5], OUT[1] Bootloader: IN[5],OUT[1] */ +#define REPORT_SPLASH_SIZE 0xf6 /* LCD: IN[4], OUT[1] */ +#define REPORT_HOOK_VERSION 0xf7 /* LCD: IN[2], OUT[1] */ +#define REPORT_EXIT_FLASHER 0xff /* Bootloader: OUT[2] */ + +/* Description of in-progress IO operation, used for operations + * that trigger response from device */ +struct picolcd_pending { + struct hid_report *out_report; + struct hid_report *in_report; + struct completion ready; + int raw_size; + u8 raw_data[64]; +}; + + +#define PICOLCD_KEYS 17 + +/* Per device data structure */ +struct picolcd_data { + struct hid_device *hdev; +#ifdef CONFIG_DEBUG_FS + struct dentry *debug_reset; + struct dentry *debug_eeprom; + struct dentry *debug_flash; + struct mutex mutex_flash; + int addr_sz; +#endif + u8 version[2]; + unsigned short opmode_delay; + /* input stuff */ + u8 pressed_keys[2]; + struct input_dev *input_keys; +#ifdef CONFIG_HID_PICOLCD_CIR + struct rc_dev *rc_dev; +#endif + unsigned short keycode[PICOLCD_KEYS]; + +#ifdef CONFIG_HID_PICOLCD_FB + /* Framebuffer stuff */ + struct fb_info *fb_info; +#endif /* CONFIG_HID_PICOLCD_FB */ +#ifdef CONFIG_HID_PICOLCD_LCD + struct lcd_device *lcd; + u8 lcd_contrast; +#endif /* CONFIG_HID_PICOLCD_LCD */ +#ifdef CONFIG_HID_PICOLCD_BACKLIGHT + struct backlight_device *backlight; + u8 lcd_brightness; + u8 lcd_power; +#endif /* CONFIG_HID_PICOLCD_BACKLIGHT */ +#ifdef CONFIG_HID_PICOLCD_LEDS + /* LED stuff */ + u8 led_state; + struct led_classdev *led[8]; +#endif /* CONFIG_HID_PICOLCD_LEDS */ + + /* Housekeeping stuff */ + spinlock_t lock; + struct mutex mutex; + struct picolcd_pending *pending; + int status; +#define PICOLCD_BOOTLOADER 1 +#define PICOLCD_FAILED 2 +#define PICOLCD_CIR_SHUN 4 +}; + +#ifdef CONFIG_HID_PICOLCD_FB +struct picolcd_fb_data { + /* Framebuffer stuff */ + spinlock_t lock; + struct picolcd_data *picolcd; + u8 update_rate; + u8 bpp; + u8 force; + u8 ready; + u8 *vbitmap; /* local copy of what was sent to PicoLCD */ + u8 *bitmap; /* framebuffer */ +}; +#endif /* CONFIG_HID_PICOLCD_FB */ + +/* Find a given report */ +#define picolcd_in_report(id, dev) picolcd_report(id, dev, HID_INPUT_REPORT) +#define picolcd_out_report(id, dev) picolcd_report(id, dev, HID_OUTPUT_REPORT) + +struct hid_report *picolcd_report(int id, struct hid_device *hdev, int dir); + +#ifdef CONFIG_DEBUG_FS +void picolcd_debug_out_report(struct picolcd_data *data, + struct hid_device *hdev, struct hid_report *report); +#define usbhid_submit_report(a, b, c) \ + do { \ + picolcd_debug_out_report(hid_get_drvdata(a), a, b); \ + usbhid_submit_report(a, b, c); \ + } while (0) + +void picolcd_debug_raw_event(struct picolcd_data *data, + struct hid_device *hdev, struct hid_report *report, + u8 *raw_data, int size); + +void picolcd_init_devfs(struct picolcd_data *data, + struct hid_report *eeprom_r, struct hid_report *eeprom_w, + struct hid_report *flash_r, struct hid_report *flash_w, + struct hid_report *reset); + +void picolcd_exit_devfs(struct picolcd_data *data); +#else +static inline void picolcd_debug_out_report(struct picolcd_data *data, + struct hid_device *hdev, struct hid_report *report) +{ +} +static inline void picolcd_debug_raw_event(struct picolcd_data *data, + struct hid_device *hdev, struct hid_report *report, + u8 *raw_data, int size) +{ +} +static inline void picolcd_init_devfs(struct picolcd_data *data, + struct hid_report *eeprom_r, struct hid_report *eeprom_w, + struct hid_report *flash_r, struct hid_report *flash_w, + struct hid_report *reset) +{ +} +static inline void picolcd_exit_devfs(struct picolcd_data *data) +{ +} +#endif /* CONFIG_DEBUG_FS */ + + +#ifdef CONFIG_HID_PICOLCD_FB +int picolcd_fb_reset(struct picolcd_data *data, int clear); + +int picolcd_init_framebuffer(struct picolcd_data *data); + +void picolcd_exit_framebuffer(struct picolcd_data *data); + +void picolcd_fb_refresh(struct picolcd_data *data); +#define picolcd_fbinfo(d) ((d)->fb_info) +#else +static inline int picolcd_fb_reset(struct picolcd_data *data, int clear) +{ + return 0; +} +static inline int picolcd_init_framebuffer(struct picolcd_data *data) +{ + return 0; +} +static inline void picolcd_exit_framebuffer(struct picolcd_data *data) +{ +} +static inline void picolcd_fb_refresh(struct picolcd_data *data) +{ +} +#define picolcd_fbinfo(d) NULL +#endif /* CONFIG_HID_PICOLCD_FB */ + + +#ifdef CONFIG_HID_PICOLCD_BACKLIGHT +int picolcd_init_backlight(struct picolcd_data *data, + struct hid_report *report); + +void picolcd_exit_backlight(struct picolcd_data *data); + +int picolcd_resume_backlight(struct picolcd_data *data); + +void picolcd_suspend_backlight(struct picolcd_data *data); +#else +static inline int picolcd_init_backlight(struct picolcd_data *data, + struct hid_report *report) +{ + return 0; +} +static inline void picolcd_exit_backlight(struct picolcd_data *data) +{ +} +static inline int picolcd_resume_backlight(struct picolcd_data *data) +{ + return 0; +} +static inline void picolcd_suspend_backlight(struct picolcd_data *data) +{ +} + +#endif /* CONFIG_HID_PICOLCD_BACKLIGHT */ + + +#ifdef CONFIG_HID_PICOLCD_LCD +int picolcd_init_lcd(struct picolcd_data *data, + struct hid_report *report); + +void picolcd_exit_lcd(struct picolcd_data *data); + +int picolcd_resume_lcd(struct picolcd_data *data); +#else +static inline int picolcd_init_lcd(struct picolcd_data *data, + struct hid_report *report) +{ + return 0; +} +static inline void picolcd_exit_lcd(struct picolcd_data *data) +{ +} +static inline int picolcd_resume_lcd(struct picolcd_data *data) +{ + return 0; +} +#endif /* CONFIG_HID_PICOLCD_LCD */ + + +#ifdef CONFIG_HID_PICOLCD_LEDS +int picolcd_init_leds(struct picolcd_data *data, + struct hid_report *report); + +void picolcd_exit_leds(struct picolcd_data *data); + +void picolcd_leds_set(struct picolcd_data *data); +#else +static inline int picolcd_init_leds(struct picolcd_data *data, + struct hid_report *report) +{ + return 0; +} +static inline void picolcd_exit_leds(struct picolcd_data *data) +{ +} +static inline void picolcd_leds_set(struct picolcd_data *data) +{ +} +#endif /* CONFIG_HID_PICOLCD_LEDS */ + + +#ifdef CONFIG_HID_PICOLCD_CIR +int picolcd_raw_cir(struct picolcd_data *data, + struct hid_report *report, u8 *raw_data, int size); + +int picolcd_init_cir(struct picolcd_data *data, struct hid_report *report); + +void picolcd_exit_cir(struct picolcd_data *data); +#else +static inline int picolcd_raw_cir(struct picolcd_data *data, + struct hid_report *report, u8 *raw_data, int size) +{ + return 1; +} +static inline int picolcd_init_cir(struct picolcd_data *data, struct hid_report *report) +{ + return 0; +} +static inline void picolcd_exit_cir(struct picolcd_data *data) +{ +} +#endif /* CONFIG_HID_PICOLCD_LIRC */ + +int picolcd_reset(struct hid_device *hdev); +struct picolcd_pending *picolcd_send_and_wait(struct hid_device *hdev, + int report_id, const u8 *raw_data, int size); diff --git a/drivers/hid/hid-picolcd_backlight.c b/drivers/hid/hid-picolcd_backlight.c new file mode 100644 index 00000000000..b91f30945f9 --- /dev/null +++ b/drivers/hid/hid-picolcd_backlight.c @@ -0,0 +1,122 @@ +/*************************************************************************** + * Copyright (C) 2010-2012 by Bruno Prémont <bonbons@linux-vserver.org> * + * * + * Based on Logitech G13 driver (v0.4) * + * Copyright (C) 2009 by Rick L. Vinyard, Jr. <rvinyard@cs.nmsu.edu> * + * * + * This program is free software: you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation, version 2 of the License. * + * * + * This driver is distributed in the hope that it will be useful, but * + * WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * + * General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this software. If not see <http://www.gnu.org/licenses/>. * + ***************************************************************************/ + +#include <linux/hid.h> +#include "usbhid/usbhid.h" +#include <linux/usb.h> + +#include <linux/fb.h> +#include <linux/backlight.h> + +#include "hid-picolcd.h" + +static int picolcd_get_brightness(struct backlight_device *bdev) +{ + struct picolcd_data *data = bl_get_data(bdev); + return data->lcd_brightness; +} + +static int picolcd_set_brightness(struct backlight_device *bdev) +{ + struct picolcd_data *data = bl_get_data(bdev); + struct hid_report *report = picolcd_out_report(REPORT_BRIGHTNESS, data->hdev); + unsigned long flags; + + if (!report || report->maxfield != 1 || report->field[0]->report_count != 1) + return -ENODEV; + + data->lcd_brightness = bdev->props.brightness & 0x0ff; + data->lcd_power = bdev->props.power; + spin_lock_irqsave(&data->lock, flags); + hid_set_field(report->field[0], 0, data->lcd_power == FB_BLANK_UNBLANK ? data->lcd_brightness : 0); + if (!(data->status & PICOLCD_FAILED)) + usbhid_submit_report(data->hdev, report, USB_DIR_OUT); + spin_unlock_irqrestore(&data->lock, flags); + return 0; +} + +static int picolcd_check_bl_fb(struct backlight_device *bdev, struct fb_info *fb) +{ + return fb && fb == picolcd_fbinfo((struct picolcd_data *)bl_get_data(bdev)); +} + +static const struct backlight_ops picolcd_blops = { + .update_status = picolcd_set_brightness, + .get_brightness = picolcd_get_brightness, + .check_fb = picolcd_check_bl_fb, +}; + +int picolcd_init_backlight(struct picolcd_data *data, struct hid_report *report) +{ + struct device *dev = &data->hdev->dev; + struct backlight_device *bdev; + struct backlight_properties props; + if (!report) + return -ENODEV; + if (report->maxfield != 1 || report->field[0]->report_count != 1 || + report->field[0]->report_size != 8) { + dev_err(dev, "unsupported BRIGHTNESS report"); + return -EINVAL; + } + + memset(&props, 0, sizeof(props)); + props.type = BACKLIGHT_RAW; + props.max_brightness = 0xff; + bdev = backlight_device_register(dev_name(dev), dev, data, + &picolcd_blops, &props); + if (IS_ERR(bdev)) { + dev_err(dev, "failed to register backlight\n"); + return PTR_ERR(bdev); + } + bdev->props.brightness = 0xff; + data->lcd_brightness = 0xff; + data->backlight = bdev; + picolcd_set_brightness(bdev); + return 0; +} + +void picolcd_exit_backlight(struct picolcd_data *data) +{ + struct backlight_device *bdev = data->backlight; + + data->backlight = NULL; + if (bdev) + backlight_device_unregister(bdev); +} + +int picolcd_resume_backlight(struct picolcd_data *data) +{ + if (!data->backlight) + return 0; + return picolcd_set_brightness(data->backlight); +} + +#ifdef CONFIG_PM +void picolcd_suspend_backlight(struct picolcd_data *data) +{ + int bl_power = data->lcd_power; + if (!data->backlight) + return; + + data->backlight->props.power = FB_BLANK_POWERDOWN; + picolcd_set_brightness(data->backlight); + data->lcd_power = data->backlight->props.power = bl_power; +} +#endif /* CONFIG_PM */ + diff --git a/drivers/hid/hid-picolcd_cir.c b/drivers/hid/hid-picolcd_cir.c new file mode 100644 index 00000000000..13ca9191b63 --- /dev/null +++ b/drivers/hid/hid-picolcd_cir.c @@ -0,0 +1,152 @@ +/*************************************************************************** + * Copyright (C) 2010-2012 by Bruno Prémont <bonbons@linux-vserver.org> * + * * + * Based on Logitech G13 driver (v0.4) * + * Copyright (C) 2009 by Rick L. Vinyard, Jr. <rvinyard@cs.nmsu.edu> * + * * + * This program is free software: you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation, version 2 of the License. * + * * + * This driver is distributed in the hope that it will be useful, but * + * WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * + * General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this software. If not see <http://www.gnu.org/licenses/>. * + ***************************************************************************/ + +#include <linux/hid.h> +#include <linux/hid-debug.h> +#include <linux/input.h> +#include "hid-ids.h" +#include "usbhid/usbhid.h" +#include <linux/usb.h> + +#include <linux/fb.h> +#include <linux/vmalloc.h> +#include <linux/backlight.h> +#include <linux/lcd.h> + +#include <linux/leds.h> + +#include <linux/seq_file.h> +#include <linux/debugfs.h> + +#include <linux/completion.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#include <media/rc-core.h> + +#include "hid-picolcd.h" + + +int picolcd_raw_cir(struct picolcd_data *data, + struct hid_report *report, u8 *raw_data, int size) +{ + unsigned long flags; + int i, w, sz; + DEFINE_IR_RAW_EVENT(rawir); + + /* ignore if rc_dev is NULL or status is shunned */ + spin_lock_irqsave(&data->lock, flags); + if (!data->rc_dev || (data->status & PICOLCD_CIR_SHUN)) { + spin_unlock_irqrestore(&data->lock, flags); + return 1; + } + spin_unlock_irqrestore(&data->lock, flags); + + /* PicoLCD USB packets contain 16-bit intervals in network order, + * with value negated for pulse. Intervals are in microseconds. + * + * Note: some userspace LIRC code for PicoLCD says negated values + * for space - is it a matter of IR chip? (pulse for my TSOP2236) + * + * In addition, the first interval seems to be around 15000 + base + * interval for non-first report of IR data - thus the quirk below + * to get RC_CODE to understand Sony and JVC remotes I have at hand + */ + sz = size > 0 ? min((int)raw_data[0], size-1) : 0; + for (i = 0; i+1 < sz; i += 2) { + init_ir_raw_event(&rawir); + w = (raw_data[i] << 8) | (raw_data[i+1]); + rawir.pulse = !!(w & 0x8000); + rawir.duration = US_TO_NS(rawir.pulse ? (65536 - w) : w); + /* Quirk!! - see above */ + if (i == 0 && rawir.duration > 15000000) + rawir.duration -= 15000000; + ir_raw_event_store(data->rc_dev, &rawir); + } + ir_raw_event_handle(data->rc_dev); + + return 1; +} + +static int picolcd_cir_open(struct rc_dev *dev) +{ + struct picolcd_data *data = dev->priv; + unsigned long flags; + + spin_lock_irqsave(&data->lock, flags); + data->status &= ~PICOLCD_CIR_SHUN; + spin_unlock_irqrestore(&data->lock, flags); + return 0; +} + +static void picolcd_cir_close(struct rc_dev *dev) +{ + struct picolcd_data *data = dev->priv; + unsigned long flags; + + spin_lock_irqsave(&data->lock, flags); + data->status |= PICOLCD_CIR_SHUN; + spin_unlock_irqrestore(&data->lock, flags); +} + +/* initialize CIR input device */ +int picolcd_init_cir(struct picolcd_data *data, struct hid_report *report) +{ + struct rc_dev *rdev; + int ret = 0; + + rdev = rc_allocate_device(); + if (!rdev) + return -ENOMEM; + + rdev->priv = data; + rdev->driver_type = RC_DRIVER_IR_RAW; + rdev->allowed_protos = RC_TYPE_ALL; + rdev->open = picolcd_cir_open; + rdev->close = picolcd_cir_close; + rdev->input_name = data->hdev->name; + rdev->input_phys = data->hdev->phys; + rdev->input_id.bustype = data->hdev->bus; + rdev->input_id.vendor = data->hdev->vendor; + rdev->input_id.product = data->hdev->product; + rdev->input_id.version = data->hdev->version; + rdev->dev.parent = &data->hdev->dev; + rdev->driver_name = PICOLCD_NAME; + rdev->map_name = RC_MAP_RC6_MCE; + rdev->timeout = MS_TO_NS(100); + rdev->rx_resolution = US_TO_NS(1); + + ret = rc_register_device(rdev); + if (ret) + goto err; + data->rc_dev = rdev; + return 0; + +err: + rc_free_device(rdev); + return ret; +} + +void picolcd_exit_cir(struct picolcd_data *data) +{ + struct rc_dev *rdev = data->rc_dev; + + data->rc_dev = NULL; + rc_unregister_device(rdev); +} + diff --git a/drivers/hid/hid-picolcd_core.c b/drivers/hid/hid-picolcd_core.c new file mode 100644 index 00000000000..86df26e58ab --- /dev/null +++ b/drivers/hid/hid-picolcd_core.c @@ -0,0 +1,689 @@ +/*************************************************************************** + * Copyright (C) 2010-2012 by Bruno Prémont <bonbons@linux-vserver.org> * + * * + * Based on Logitech G13 driver (v0.4) * + * Copyright (C) 2009 by Rick L. Vinyard, Jr. <rvinyard@cs.nmsu.edu> * + * * + * This program is free software: you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation, version 2 of the License. * + * * + * This driver is distributed in the hope that it will be useful, but * + * WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * + * General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this software. If not see <http://www.gnu.org/licenses/>. * + ***************************************************************************/ + +#include <linux/hid.h> +#include <linux/hid-debug.h> +#include <linux/input.h> +#include "hid-ids.h" +#include "usbhid/usbhid.h" +#include <linux/usb.h> + +#include <linux/fb.h> +#include <linux/vmalloc.h> + +#include <linux/completion.h> +#include <linux/uaccess.h> +#include <linux/module.h> + +#include "hid-picolcd.h" + + +/* Input device + * + * The PicoLCD has an IR receiver header, a built-in keypad with 5 keys + * and header for 4x4 key matrix. The built-in keys are part of the matrix. + */ +static const unsigned short def_keymap[PICOLCD_KEYS] = { + KEY_RESERVED, /* none */ + KEY_BACK, /* col 4 + row 1 */ + KEY_HOMEPAGE, /* col 3 + row 1 */ + KEY_RESERVED, /* col 2 + row 1 */ + KEY_RESERVED, /* col 1 + row 1 */ + KEY_SCROLLUP, /* col 4 + row 2 */ + KEY_OK, /* col 3 + row 2 */ + KEY_SCROLLDOWN, /* col 2 + row 2 */ + KEY_RESERVED, /* col 1 + row 2 */ + KEY_RESERVED, /* col 4 + row 3 */ + KEY_RESERVED, /* col 3 + row 3 */ + KEY_RESERVED, /* col 2 + row 3 */ + KEY_RESERVED, /* col 1 + row 3 */ + KEY_RESERVED, /* col 4 + row 4 */ + KEY_RESERVED, /* col 3 + row 4 */ + KEY_RESERVED, /* col 2 + row 4 */ + KEY_RESERVED, /* col 1 + row 4 */ +}; + + +/* Find a given report */ +struct hid_report *picolcd_report(int id, struct hid_device *hdev, int dir) +{ + struct list_head *feature_report_list = &hdev->report_enum[dir].report_list; + struct hid_report *report = NULL; + + list_for_each_entry(report, feature_report_list, list) { + if (report->id == id) + return report; + } + hid_warn(hdev, "No report with id 0x%x found\n", id); + return NULL; +} + +/* Submit a report and wait for a reply from device - if device fades away + * or does not respond in time, return NULL */ +struct picolcd_pending *picolcd_send_and_wait(struct hid_device *hdev, + int report_id, const u8 *raw_data, int size) +{ + struct picolcd_data *data = hid_get_drvdata(hdev); + struct picolcd_pending *work; + struct hid_report *report = picolcd_out_report(report_id, hdev); + unsigned long flags; + int i, j, k; + + if (!report || !data) + return NULL; + if (data->status & PICOLCD_FAILED) + return NULL; + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return NULL; + + init_completion(&work->ready); + work->out_report = report; + work->in_report = NULL; + work->raw_size = 0; + + mutex_lock(&data->mutex); + spin_lock_irqsave(&data->lock, flags); + for (i = k = 0; i < report->maxfield; i++) + for (j = 0; j < report->field[i]->report_count; j++) { + hid_set_field(report->field[i], j, k < size ? raw_data[k] : 0); + k++; + } + if (data->status & PICOLCD_FAILED) { + kfree(work); + work = NULL; + } else { + data->pending = work; + usbhid_submit_report(data->hdev, report, USB_DIR_OUT); + spin_unlock_irqrestore(&data->lock, flags); + wait_for_completion_interruptible_timeout(&work->ready, HZ*2); + spin_lock_irqsave(&data->lock, flags); + data->pending = NULL; + } + spin_unlock_irqrestore(&data->lock, flags); + mutex_unlock(&data->mutex); + return work; +} + +/* + * input class device + */ +static int picolcd_raw_keypad(struct picolcd_data *data, + struct hid_report *report, u8 *raw_data, int size) +{ + /* + * Keypad event + * First and second data bytes list currently pressed keys, + * 0x00 means no key and at most 2 keys may be pressed at same time + */ + int i, j; + + /* determine newly pressed keys */ + for (i = 0; i < size; i++) { + unsigned int key_code; + if (raw_data[i] == 0) + continue; + for (j = 0; j < sizeof(data->pressed_keys); j++) + if (data->pressed_keys[j] == raw_data[i]) + goto key_already_down; + for (j = 0; j < sizeof(data->pressed_keys); j++) + if (data->pressed_keys[j] == 0) { + data->pressed_keys[j] = raw_data[i]; + break; + } + input_event(data->input_keys, EV_MSC, MSC_SCAN, raw_data[i]); + if (raw_data[i] < PICOLCD_KEYS) + key_code = data->keycode[raw_data[i]]; + else + key_code = KEY_UNKNOWN; + if (key_code != KEY_UNKNOWN) { + dbg_hid(PICOLCD_NAME " got key press for %u:%d", + raw_data[i], key_code); + input_report_key(data->input_keys, key_code, 1); + } + input_sync(data->input_keys); +key_already_down: + continue; + } + + /* determine newly released keys */ + for (j = 0; j < sizeof(data->pressed_keys); j++) { + unsigned int key_code; + if (data->pressed_keys[j] == 0) + continue; + for (i = 0; i < size; i++) + if (data->pressed_keys[j] == raw_data[i]) + goto key_still_down; + input_event(data->input_keys, EV_MSC, MSC_SCAN, data->pressed_keys[j]); + if (data->pressed_keys[j] < PICOLCD_KEYS) + key_code = data->keycode[data->pressed_keys[j]]; + else + key_code = KEY_UNKNOWN; + if (key_code != KEY_UNKNOWN) { + dbg_hid(PICOLCD_NAME " got key release for %u:%d", + data->pressed_keys[j], key_code); + input_report_key(data->input_keys, key_code, 0); + } + input_sync(data->input_keys); + data->pressed_keys[j] = 0; +key_still_down: + continue; + } + return 1; +} + +static int picolcd_check_version(struct hid_device *hdev) +{ + struct picolcd_data *data = hid_get_drvdata(hdev); + struct picolcd_pending *verinfo; + int ret = 0; + + if (!data) + return -ENODEV; + + verinfo = picolcd_send_and_wait(hdev, REPORT_VERSION, NULL, 0); + if (!verinfo) { + hid_err(hdev, "no version response from PicoLCD\n"); + return -ENODEV; + } + + if (verinfo->raw_size == 2) { + data->version[0] = verinfo->raw_data[1]; + data->version[1] = verinfo->raw_data[0]; + if (data->status & PICOLCD_BOOTLOADER) { + hid_info(hdev, "PicoLCD, bootloader version %d.%d\n", + verinfo->raw_data[1], verinfo->raw_data[0]); + } else { + hid_info(hdev, "PicoLCD, firmware version %d.%d\n", + verinfo->raw_data[1], verinfo->raw_data[0]); + } + } else { + hid_err(hdev, "confused, got unexpected version response from PicoLCD\n"); + ret = -EINVAL; + } + kfree(verinfo); + return ret; +} + +/* + * Reset our device and wait for answer to VERSION request + */ +int picolcd_reset(struct hid_device *hdev) +{ + struct picolcd_data *data = hid_get_drvdata(hdev); + struct hid_report *report = picolcd_out_report(REPORT_RESET, hdev); + unsigned long flags; + int error; + + if (!data || !report || report->maxfield != 1) + return -ENODEV; + + spin_lock_irqsave(&data->lock, flags); + if (hdev->product == USB_DEVICE_ID_PICOLCD_BOOTLOADER) + data->status |= PICOLCD_BOOTLOADER; + + /* perform the reset */ + hid_set_field(report->field[0], 0, 1); + if (data->status & PICOLCD_FAILED) { + spin_unlock_irqrestore(&data->lock, flags); + return -ENODEV; + } + usbhid_submit_report(hdev, report, USB_DIR_OUT); + spin_unlock_irqrestore(&data->lock, flags); + + error = picolcd_check_version(hdev); + if (error) + return error; + + picolcd_resume_lcd(data); + picolcd_resume_backlight(data); + picolcd_fb_refresh(data); + picolcd_leds_set(data); + return 0; +} + +/* + * The "operation_mode" sysfs attribute + */ +static ssize_t picolcd_operation_mode_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct picolcd_data *data = dev_get_drvdata(dev); + + if (data->status & PICOLCD_BOOTLOADER) + return snprintf(buf, PAGE_SIZE, "[bootloader] lcd\n"); + else + return snprintf(buf, PAGE_SIZE, "bootloader [lcd]\n"); +} + +static ssize_t picolcd_operation_mode_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct picolcd_data *data = dev_get_drvdata(dev); + struct hid_report *report = NULL; + size_t cnt = count; + int timeout = data->opmode_delay; + unsigned long flags; + + if (cnt >= 3 && strncmp("lcd", buf, 3) == 0) { + if (data->status & PICOLCD_BOOTLOADER) + report = picolcd_out_report(REPORT_EXIT_FLASHER, data->hdev); + buf += 3; + cnt -= 3; + } else if (cnt >= 10 && strncmp("bootloader", buf, 10) == 0) { + if (!(data->status & PICOLCD_BOOTLOADER)) + report = picolcd_out_report(REPORT_EXIT_KEYBOARD, data->hdev); + buf += 10; + cnt -= 10; + } + if (!report) + return -EINVAL; + + while (cnt > 0 && (buf[cnt-1] == '\n' || buf[cnt-1] == '\r')) + cnt--; + if (cnt != 0) + return -EINVAL; + + spin_lock_irqsave(&data->lock, flags); + hid_set_field(report->field[0], 0, timeout & 0xff); + hid_set_field(report->field[0], 1, (timeout >> 8) & 0xff); + usbhid_submit_report(data->hdev, report, USB_DIR_OUT); + spin_unlock_irqrestore(&data->lock, flags); + return count; +} + +static DEVICE_ATTR(operation_mode, 0644, picolcd_operation_mode_show, + picolcd_operation_mode_store); + +/* + * The "operation_mode_delay" sysfs attribute + */ +static ssize_t picolcd_operation_mode_delay_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct picolcd_data *data = dev_get_drvdata(dev); + + return snprintf(buf, PAGE_SIZE, "%hu\n", data->opmode_delay); +} + +static ssize_t picolcd_operation_mode_delay_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct picolcd_data *data = dev_get_drvdata(dev); + unsigned u; + if (sscanf(buf, "%u", &u) != 1) + return -EINVAL; + if (u > 30000) + return -EINVAL; + else + data->opmode_delay = u; + return count; +} + +static DEVICE_ATTR(operation_mode_delay, 0644, picolcd_operation_mode_delay_show, + picolcd_operation_mode_delay_store); + +/* + * Handle raw report as sent by device + */ +static int picolcd_raw_event(struct hid_device *hdev, + struct hid_report *report, u8 *raw_data, int size) +{ + struct picolcd_data *data = hid_get_drvdata(hdev); + unsigned long flags; + int ret = 0; + + if (!data) + return 1; + + if (report->id == REPORT_KEY_STATE) { + if (data->input_keys) + ret = picolcd_raw_keypad(data, report, raw_data+1, size-1); + } else if (report->id == REPORT_IR_DATA) { + ret = picolcd_raw_cir(data, report, raw_data+1, size-1); + } else { + spin_lock_irqsave(&data->lock, flags); + /* + * We let the caller of picolcd_send_and_wait() check if the + * report we got is one of the expected ones or not. + */ + if (data->pending) { + memcpy(data->pending->raw_data, raw_data+1, size-1); + data->pending->raw_size = size-1; + data->pending->in_report = report; + complete(&data->pending->ready); + } + spin_unlock_irqrestore(&data->lock, flags); + } + + picolcd_debug_raw_event(data, hdev, report, raw_data, size); + return 1; +} + +#ifdef CONFIG_PM +static int picolcd_suspend(struct hid_device *hdev, pm_message_t message) +{ + if (PMSG_IS_AUTO(message)) + return 0; + + picolcd_suspend_backlight(hid_get_drvdata(hdev)); + dbg_hid(PICOLCD_NAME " device ready for suspend\n"); + return 0; +} + +static int picolcd_resume(struct hid_device *hdev) +{ + int ret; + ret = picolcd_resume_backlight(hid_get_drvdata(hdev)); + if (ret) + dbg_hid(PICOLCD_NAME " restoring backlight failed: %d\n", ret); + return 0; +} + +static int picolcd_reset_resume(struct hid_device *hdev) +{ + int ret; + ret = picolcd_reset(hdev); + if (ret) + dbg_hid(PICOLCD_NAME " resetting our device failed: %d\n", ret); + ret = picolcd_fb_reset(hid_get_drvdata(hdev), 0); + if (ret) + dbg_hid(PICOLCD_NAME " restoring framebuffer content failed: %d\n", ret); + ret = picolcd_resume_lcd(hid_get_drvdata(hdev)); + if (ret) + dbg_hid(PICOLCD_NAME " restoring lcd failed: %d\n", ret); + ret = picolcd_resume_backlight(hid_get_drvdata(hdev)); + if (ret) + dbg_hid(PICOLCD_NAME " restoring backlight failed: %d\n", ret); + picolcd_leds_set(hid_get_drvdata(hdev)); + return 0; +} +#endif + +/* initialize keypad input device */ +static int picolcd_init_keys(struct picolcd_data *data, + struct hid_report *report) +{ + struct hid_device *hdev = data->hdev; + struct input_dev *idev; + int error, i; + + if (!report) + return -ENODEV; + if (report->maxfield != 1 || report->field[0]->report_count != 2 || + report->field[0]->report_size != 8) { + hid_err(hdev, "unsupported KEY_STATE report\n"); + return -EINVAL; + } + + idev = input_allocate_device(); + if (idev == NULL) { + hid_err(hdev, "failed to allocate input device\n"); + return -ENOMEM; + } + input_set_drvdata(idev, hdev); + memcpy(data->keycode, def_keymap, sizeof(def_keymap)); + idev->name = hdev->name; + idev->phys = hdev->phys; + idev->uniq = hdev->uniq; + idev->id.bustype = hdev->bus; + idev->id.vendor = hdev->vendor; + idev->id.product = hdev->product; + idev->id.version = hdev->version; + idev->dev.parent = &hdev->dev; + idev->keycode = &data->keycode; + idev->keycodemax = PICOLCD_KEYS; + idev->keycodesize = sizeof(data->keycode[0]); + input_set_capability(idev, EV_MSC, MSC_SCAN); + set_bit(EV_REP, idev->evbit); + for (i = 0; i < PICOLCD_KEYS; i++) + input_set_capability(idev, EV_KEY, data->keycode[i]); + error = input_register_device(idev); + if (error) { + hid_err(hdev, "error registering the input device\n"); + input_free_device(idev); + return error; + } + data->input_keys = idev; + return 0; +} + +static void picolcd_exit_keys(struct picolcd_data *data) +{ + struct input_dev *idev = data->input_keys; + + data->input_keys = NULL; + if (idev) + input_unregister_device(idev); +} + +static int picolcd_probe_lcd(struct hid_device *hdev, struct picolcd_data *data) +{ + int error; + + /* Setup keypad input device */ + error = picolcd_init_keys(data, picolcd_in_report(REPORT_KEY_STATE, hdev)); + if (error) + goto err; + + /* Setup CIR input device */ + error = picolcd_init_cir(data, picolcd_in_report(REPORT_IR_DATA, hdev)); + if (error) + goto err; + + /* Set up the framebuffer device */ + error = picolcd_init_framebuffer(data); + if (error) + goto err; + + /* Setup lcd class device */ + error = picolcd_init_lcd(data, picolcd_out_report(REPORT_CONTRAST, hdev)); + if (error) + goto err; + + /* Setup backlight class device */ + error = picolcd_init_backlight(data, picolcd_out_report(REPORT_BRIGHTNESS, hdev)); + if (error) + goto err; + + /* Setup the LED class devices */ + error = picolcd_init_leds(data, picolcd_out_report(REPORT_LED_STATE, hdev)); + if (error) + goto err; + + picolcd_init_devfs(data, picolcd_out_report(REPORT_EE_READ, hdev), + picolcd_out_report(REPORT_EE_WRITE, hdev), + picolcd_out_report(REPORT_READ_MEMORY, hdev), + picolcd_out_report(REPORT_WRITE_MEMORY, hdev), + picolcd_out_report(REPORT_RESET, hdev)); + return 0; +err: + picolcd_exit_leds(data); + picolcd_exit_backlight(data); + picolcd_exit_lcd(data); + picolcd_exit_framebuffer(data); + picolcd_exit_cir(data); + picolcd_exit_keys(data); + return error; +} + +static int picolcd_probe_bootloader(struct hid_device *hdev, struct picolcd_data *data) +{ + picolcd_init_devfs(data, NULL, NULL, + picolcd_out_report(REPORT_BL_READ_MEMORY, hdev), + picolcd_out_report(REPORT_BL_WRITE_MEMORY, hdev), NULL); + return 0; +} + +static int picolcd_probe(struct hid_device *hdev, + const struct hid_device_id *id) +{ + struct picolcd_data *data; + int error = -ENOMEM; + + dbg_hid(PICOLCD_NAME " hardware probe...\n"); + + /* + * Let's allocate the picolcd data structure, set some reasonable + * defaults, and associate it with the device + */ + data = kzalloc(sizeof(struct picolcd_data), GFP_KERNEL); + if (data == NULL) { + hid_err(hdev, "can't allocate space for Minibox PicoLCD device data\n"); + error = -ENOMEM; + goto err_no_cleanup; + } + + spin_lock_init(&data->lock); + mutex_init(&data->mutex); + data->hdev = hdev; + data->opmode_delay = 5000; + if (hdev->product == USB_DEVICE_ID_PICOLCD_BOOTLOADER) + data->status |= PICOLCD_BOOTLOADER; + hid_set_drvdata(hdev, data); + + /* Parse the device reports and start it up */ + error = hid_parse(hdev); + if (error) { + hid_err(hdev, "device report parse failed\n"); + goto err_cleanup_data; + } + + error = hid_hw_start(hdev, 0); + if (error) { + hid_err(hdev, "hardware start failed\n"); + goto err_cleanup_data; + } + + error = hid_hw_open(hdev); + if (error) { + hid_err(hdev, "failed to open input interrupt pipe for key and IR events\n"); + goto err_cleanup_hid_hw; + } + + error = device_create_file(&hdev->dev, &dev_attr_operation_mode_delay); + if (error) { + hid_err(hdev, "failed to create sysfs attributes\n"); + goto err_cleanup_hid_ll; + } + + error = device_create_file(&hdev->dev, &dev_attr_operation_mode); + if (error) { + hid_err(hdev, "failed to create sysfs attributes\n"); + goto err_cleanup_sysfs1; + } + + if (data->status & PICOLCD_BOOTLOADER) + error = picolcd_probe_bootloader(hdev, data); + else + error = picolcd_probe_lcd(hdev, data); + if (error) + goto err_cleanup_sysfs2; + + dbg_hid(PICOLCD_NAME " activated and initialized\n"); + return 0; + +err_cleanup_sysfs2: + device_remove_file(&hdev->dev, &dev_attr_operation_mode); +err_cleanup_sysfs1: + device_remove_file(&hdev->dev, &dev_attr_operation_mode_delay); +err_cleanup_hid_ll: + hid_hw_close(hdev); +err_cleanup_hid_hw: + hid_hw_stop(hdev); +err_cleanup_data: + kfree(data); +err_no_cleanup: + hid_set_drvdata(hdev, NULL); + + return error; +} + +static void picolcd_remove(struct hid_device *hdev) +{ + struct picolcd_data *data = hid_get_drvdata(hdev); + unsigned long flags; + + dbg_hid(PICOLCD_NAME " hardware remove...\n"); + spin_lock_irqsave(&data->lock, flags); + data->status |= PICOLCD_FAILED; + spin_unlock_irqrestore(&data->lock, flags); + + picolcd_exit_devfs(data); + device_remove_file(&hdev->dev, &dev_attr_operation_mode); + device_remove_file(&hdev->dev, &dev_attr_operation_mode_delay); + hid_hw_close(hdev); + hid_hw_stop(hdev); + + /* Shortcut potential pending reply that will never arrive */ + spin_lock_irqsave(&data->lock, flags); + if (data->pending) + complete(&data->pending->ready); + spin_unlock_irqrestore(&data->lock, flags); + + /* Cleanup LED */ + picolcd_exit_leds(data); + /* Clean up the framebuffer */ + picolcd_exit_backlight(data); + picolcd_exit_lcd(data); + picolcd_exit_framebuffer(data); + /* Cleanup input */ + picolcd_exit_cir(data); + picolcd_exit_keys(data); + + hid_set_drvdata(hdev, NULL); + mutex_destroy(&data->mutex); + /* Finally, clean up the picolcd data itself */ + kfree(data); +} + +static const struct hid_device_id picolcd_devices[] = { + { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD) }, + { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD_BOOTLOADER) }, + { } +}; +MODULE_DEVICE_TABLE(hid, picolcd_devices); + +static struct hid_driver picolcd_driver = { + .name = "hid-picolcd", + .id_table = picolcd_devices, + .probe = picolcd_probe, + .remove = picolcd_remove, + .raw_event = picolcd_raw_event, +#ifdef CONFIG_PM + .suspend = picolcd_suspend, + .resume = picolcd_resume, + .reset_resume = picolcd_reset_resume, +#endif +}; + +static int __init picolcd_init(void) +{ + return hid_register_driver(&picolcd_driver); +} + +static void __exit picolcd_exit(void) +{ + hid_unregister_driver(&picolcd_driver); +} + +module_init(picolcd_init); +module_exit(picolcd_exit); +MODULE_DESCRIPTION("Minibox graphics PicoLCD Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/hid/hid-picolcd_debugfs.c b/drivers/hid/hid-picolcd_debugfs.c new file mode 100644 index 00000000000..4809aa1bdb9 --- /dev/null +++ b/drivers/hid/hid-picolcd_debugfs.c @@ -0,0 +1,899 @@ +/*************************************************************************** + * Copyright (C) 2010-2012 by Bruno Prémont <bonbons@linux-vserver.org> * + * * + * Based on Logitech G13 driver (v0.4) * + * Copyright (C) 2009 by Rick L. Vinyard, Jr. <rvinyard@cs.nmsu.edu> * + * * + * This program is free software: you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation, version 2 of the License. * + * * + * This driver is distributed in the hope that it will be useful, but * + * WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * + * General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this software. If not see <http://www.gnu.org/licenses/>. * + ***************************************************************************/ + +#include <linux/hid.h> +#include <linux/hid-debug.h> +#include "usbhid/usbhid.h" +#include <linux/usb.h> + +#include <linux/fb.h> +#include <linux/seq_file.h> +#include <linux/debugfs.h> + +#include <linux/module.h> +#include <linux/uaccess.h> + +#include "hid-picolcd.h" + + +static int picolcd_debug_reset_show(struct seq_file *f, void *p) +{ + if (picolcd_fbinfo((struct picolcd_data *)f->private)) + seq_printf(f, "all fb\n"); + else + seq_printf(f, "all\n"); + return 0; +} + +static int picolcd_debug_reset_open(struct inode *inode, struct file *f) +{ + return single_open(f, picolcd_debug_reset_show, inode->i_private); +} + +static ssize_t picolcd_debug_reset_write(struct file *f, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct picolcd_data *data = ((struct seq_file *)f->private_data)->private; + char buf[32]; + size_t cnt = min(count, sizeof(buf)-1); + if (copy_from_user(buf, user_buf, cnt)) + return -EFAULT; + + while (cnt > 0 && (buf[cnt-1] == ' ' || buf[cnt-1] == '\n')) + cnt--; + buf[cnt] = '\0'; + if (strcmp(buf, "all") == 0) { + picolcd_reset(data->hdev); + picolcd_fb_reset(data, 1); + } else if (strcmp(buf, "fb") == 0) { + picolcd_fb_reset(data, 1); + } else { + return -EINVAL; + } + return count; +} + +static const struct file_operations picolcd_debug_reset_fops = { + .owner = THIS_MODULE, + .open = picolcd_debug_reset_open, + .read = seq_read, + .llseek = seq_lseek, + .write = picolcd_debug_reset_write, + .release = single_release, +}; + +/* + * The "eeprom" file + */ +static ssize_t picolcd_debug_eeprom_read(struct file *f, char __user *u, + size_t s, loff_t *off) +{ + struct picolcd_data *data = f->private_data; + struct picolcd_pending *resp; + u8 raw_data[3]; + ssize_t ret = -EIO; + + if (s == 0) + return -EINVAL; + if (*off > 0x0ff) + return 0; + + /* prepare buffer with info about what we want to read (addr & len) */ + raw_data[0] = *off & 0xff; + raw_data[1] = (*off >> 8) & 0xff; + raw_data[2] = s < 20 ? s : 20; + if (*off + raw_data[2] > 0xff) + raw_data[2] = 0x100 - *off; + resp = picolcd_send_and_wait(data->hdev, REPORT_EE_READ, raw_data, + sizeof(raw_data)); + if (!resp) + return -EIO; + + if (resp->in_report && resp->in_report->id == REPORT_EE_DATA) { + /* successful read :) */ + ret = resp->raw_data[2]; + if (ret > s) + ret = s; + if (copy_to_user(u, resp->raw_data+3, ret)) + ret = -EFAULT; + else + *off += ret; + } /* anything else is some kind of IO error */ + + kfree(resp); + return ret; +} + +static ssize_t picolcd_debug_eeprom_write(struct file *f, const char __user *u, + size_t s, loff_t *off) +{ + struct picolcd_data *data = f->private_data; + struct picolcd_pending *resp; + ssize_t ret = -EIO; + u8 raw_data[23]; + + if (s == 0) + return -EINVAL; + if (*off > 0x0ff) + return -ENOSPC; + + memset(raw_data, 0, sizeof(raw_data)); + raw_data[0] = *off & 0xff; + raw_data[1] = (*off >> 8) & 0xff; + raw_data[2] = min_t(size_t, 20, s); + if (*off + raw_data[2] > 0xff) + raw_data[2] = 0x100 - *off; + + if (copy_from_user(raw_data+3, u, min((u8)20, raw_data[2]))) + return -EFAULT; + resp = picolcd_send_and_wait(data->hdev, REPORT_EE_WRITE, raw_data, + sizeof(raw_data)); + + if (!resp) + return -EIO; + + if (resp->in_report && resp->in_report->id == REPORT_EE_DATA) { + /* check if written data matches */ + if (memcmp(raw_data, resp->raw_data, 3+raw_data[2]) == 0) { + *off += raw_data[2]; + ret = raw_data[2]; + } + } + kfree(resp); + return ret; +} + +/* + * Notes: + * - read/write happens in chunks of at most 20 bytes, it's up to userspace + * to loop in order to get more data. + * - on write errors on otherwise correct write request the bytes + * that should have been written are in undefined state. + */ +static const struct file_operations picolcd_debug_eeprom_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = picolcd_debug_eeprom_read, + .write = picolcd_debug_eeprom_write, + .llseek = generic_file_llseek, +}; + +/* + * The "flash" file + */ +/* record a flash address to buf (bounds check to be done by caller) */ +static int _picolcd_flash_setaddr(struct picolcd_data *data, u8 *buf, long off) +{ + buf[0] = off & 0xff; + buf[1] = (off >> 8) & 0xff; + if (data->addr_sz == 3) + buf[2] = (off >> 16) & 0xff; + return data->addr_sz == 2 ? 2 : 3; +} + +/* read a given size of data (bounds check to be done by caller) */ +static ssize_t _picolcd_flash_read(struct picolcd_data *data, int report_id, + char __user *u, size_t s, loff_t *off) +{ + struct picolcd_pending *resp; + u8 raw_data[4]; + ssize_t ret = 0; + int len_off, err = -EIO; + + while (s > 0) { + err = -EIO; + len_off = _picolcd_flash_setaddr(data, raw_data, *off); + raw_data[len_off] = s > 32 ? 32 : s; + resp = picolcd_send_and_wait(data->hdev, report_id, raw_data, len_off+1); + if (!resp || !resp->in_report) + goto skip; + if (resp->in_report->id == REPORT_MEMORY || + resp->in_report->id == REPORT_BL_READ_MEMORY) { + if (memcmp(raw_data, resp->raw_data, len_off+1) != 0) + goto skip; + if (copy_to_user(u+ret, resp->raw_data+len_off+1, raw_data[len_off])) { + err = -EFAULT; + goto skip; + } + *off += raw_data[len_off]; + s -= raw_data[len_off]; + ret += raw_data[len_off]; + err = 0; + } +skip: + kfree(resp); + if (err) + return ret > 0 ? ret : err; + } + return ret; +} + +static ssize_t picolcd_debug_flash_read(struct file *f, char __user *u, + size_t s, loff_t *off) +{ + struct picolcd_data *data = f->private_data; + + if (s == 0) + return -EINVAL; + if (*off > 0x05fff) + return 0; + if (*off + s > 0x05fff) + s = 0x06000 - *off; + + if (data->status & PICOLCD_BOOTLOADER) + return _picolcd_flash_read(data, REPORT_BL_READ_MEMORY, u, s, off); + else + return _picolcd_flash_read(data, REPORT_READ_MEMORY, u, s, off); +} + +/* erase block aligned to 64bytes boundary */ +static ssize_t _picolcd_flash_erase64(struct picolcd_data *data, int report_id, + loff_t *off) +{ + struct picolcd_pending *resp; + u8 raw_data[3]; + int len_off; + ssize_t ret = -EIO; + + if (*off & 0x3f) + return -EINVAL; + + len_off = _picolcd_flash_setaddr(data, raw_data, *off); + resp = picolcd_send_and_wait(data->hdev, report_id, raw_data, len_off); + if (!resp || !resp->in_report) + goto skip; + if (resp->in_report->id == REPORT_MEMORY || + resp->in_report->id == REPORT_BL_ERASE_MEMORY) { + if (memcmp(raw_data, resp->raw_data, len_off) != 0) + goto skip; + ret = 0; + } +skip: + kfree(resp); + return ret; +} + +/* write a given size of data (bounds check to be done by caller) */ +static ssize_t _picolcd_flash_write(struct picolcd_data *data, int report_id, + const char __user *u, size_t s, loff_t *off) +{ + struct picolcd_pending *resp; + u8 raw_data[36]; + ssize_t ret = 0; + int len_off, err = -EIO; + + while (s > 0) { + err = -EIO; + len_off = _picolcd_flash_setaddr(data, raw_data, *off); + raw_data[len_off] = s > 32 ? 32 : s; + if (copy_from_user(raw_data+len_off+1, u, raw_data[len_off])) { + err = -EFAULT; + break; + } + resp = picolcd_send_and_wait(data->hdev, report_id, raw_data, + len_off+1+raw_data[len_off]); + if (!resp || !resp->in_report) + goto skip; + if (resp->in_report->id == REPORT_MEMORY || + resp->in_report->id == REPORT_BL_WRITE_MEMORY) { + if (memcmp(raw_data, resp->raw_data, len_off+1+raw_data[len_off]) != 0) + goto skip; + *off += raw_data[len_off]; + s -= raw_data[len_off]; + ret += raw_data[len_off]; + err = 0; + } +skip: + kfree(resp); + if (err) + break; + } + return ret > 0 ? ret : err; +} + +static ssize_t picolcd_debug_flash_write(struct file *f, const char __user *u, + size_t s, loff_t *off) +{ + struct picolcd_data *data = f->private_data; + ssize_t err, ret = 0; + int report_erase, report_write; + + if (s == 0) + return -EINVAL; + if (*off > 0x5fff) + return -ENOSPC; + if (s & 0x3f) + return -EINVAL; + if (*off & 0x3f) + return -EINVAL; + + if (data->status & PICOLCD_BOOTLOADER) { + report_erase = REPORT_BL_ERASE_MEMORY; + report_write = REPORT_BL_WRITE_MEMORY; + } else { + report_erase = REPORT_ERASE_MEMORY; + report_write = REPORT_WRITE_MEMORY; + } + mutex_lock(&data->mutex_flash); + while (s > 0) { + err = _picolcd_flash_erase64(data, report_erase, off); + if (err) + break; + err = _picolcd_flash_write(data, report_write, u, 64, off); + if (err < 0) + break; + ret += err; + *off += err; + s -= err; + if (err != 64) + break; + } + mutex_unlock(&data->mutex_flash); + return ret > 0 ? ret : err; +} + +/* + * Notes: + * - concurrent writing is prevented by mutex and all writes must be + * n*64 bytes and 64-byte aligned, each write being preceded by an + * ERASE which erases a 64byte block. + * If less than requested was written or an error is returned for an + * otherwise correct write request the next 64-byte block which should + * have been written is in undefined state (mostly: original, erased, + * (half-)written with write error) + * - reading can happen without special restriction + */ +static const struct file_operations picolcd_debug_flash_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = picolcd_debug_flash_read, + .write = picolcd_debug_flash_write, + .llseek = generic_file_llseek, +}; + + +/* + * Helper code for HID report level dumping/debugging + */ +static const char * const error_codes[] = { + "success", "parameter missing", "data_missing", "block readonly", + "block not erasable", "block too big", "section overflow", + "invalid command length", "invalid data length", +}; + +static void dump_buff_as_hex(char *dst, size_t dst_sz, const u8 *data, + const size_t data_len) +{ + int i, j; + for (i = j = 0; i < data_len && j + 4 < dst_sz; i++) { + dst[j++] = hex_asc[(data[i] >> 4) & 0x0f]; + dst[j++] = hex_asc[data[i] & 0x0f]; + dst[j++] = ' '; + } + dst[j] = '\0'; + if (j > 0) + dst[j-1] = '\n'; + if (i < data_len && j > 2) + dst[j-2] = dst[j-3] = '.'; +} + +void picolcd_debug_out_report(struct picolcd_data *data, + struct hid_device *hdev, struct hid_report *report) +{ + u8 raw_data[70]; + int raw_size = (report->size >> 3) + 1; + char *buff; +#define BUFF_SZ 256 + + /* Avoid unnecessary overhead if debugfs is disabled */ + if (list_empty(&hdev->debug_list)) + return; + + buff = kmalloc(BUFF_SZ, GFP_ATOMIC); + if (!buff) + return; + + snprintf(buff, BUFF_SZ, "\nout report %d (size %d) = ", + report->id, raw_size); + hid_debug_event(hdev, buff); + if (raw_size + 5 > sizeof(raw_data)) { + kfree(buff); + hid_debug_event(hdev, " TOO BIG\n"); + return; + } else { + raw_data[0] = report->id; + hid_output_report(report, raw_data); + dump_buff_as_hex(buff, BUFF_SZ, raw_data, raw_size); + hid_debug_event(hdev, buff); + } + + switch (report->id) { + case REPORT_LED_STATE: + /* 1 data byte with GPO state */ + snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", + "REPORT_LED_STATE", report->id, raw_size-1); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tGPO state: 0x%02x\n", raw_data[1]); + hid_debug_event(hdev, buff); + break; + case REPORT_BRIGHTNESS: + /* 1 data byte with brightness */ + snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", + "REPORT_BRIGHTNESS", report->id, raw_size-1); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tBrightness: 0x%02x\n", raw_data[1]); + hid_debug_event(hdev, buff); + break; + case REPORT_CONTRAST: + /* 1 data byte with contrast */ + snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", + "REPORT_CONTRAST", report->id, raw_size-1); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tContrast: 0x%02x\n", raw_data[1]); + hid_debug_event(hdev, buff); + break; + case REPORT_RESET: + /* 2 data bytes with reset duration in ms */ + snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", + "REPORT_RESET", report->id, raw_size-1); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tDuration: 0x%02x%02x (%dms)\n", + raw_data[2], raw_data[1], raw_data[2] << 8 | raw_data[1]); + hid_debug_event(hdev, buff); + break; + case REPORT_LCD_CMD: + /* 63 data bytes with LCD commands */ + snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", + "REPORT_LCD_CMD", report->id, raw_size-1); + hid_debug_event(hdev, buff); + /* TODO: format decoding */ + break; + case REPORT_LCD_DATA: + /* 63 data bytes with LCD data */ + snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", + "REPORT_LCD_CMD", report->id, raw_size-1); + /* TODO: format decoding */ + hid_debug_event(hdev, buff); + break; + case REPORT_LCD_CMD_DATA: + /* 63 data bytes with LCD commands and data */ + snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", + "REPORT_LCD_CMD", report->id, raw_size-1); + /* TODO: format decoding */ + hid_debug_event(hdev, buff); + break; + case REPORT_EE_READ: + /* 3 data bytes with read area description */ + snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", + "REPORT_EE_READ", report->id, raw_size-1); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n", + raw_data[2], raw_data[1]); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]); + hid_debug_event(hdev, buff); + break; + case REPORT_EE_WRITE: + /* 3+1..20 data bytes with write area description */ + snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", + "REPORT_EE_WRITE", report->id, raw_size-1); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n", + raw_data[2], raw_data[1]); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]); + hid_debug_event(hdev, buff); + if (raw_data[3] == 0) { + snprintf(buff, BUFF_SZ, "\tNo data\n"); + } else if (raw_data[3] + 4 <= raw_size) { + snprintf(buff, BUFF_SZ, "\tData: "); + hid_debug_event(hdev, buff); + dump_buff_as_hex(buff, BUFF_SZ, raw_data+4, raw_data[3]); + } else { + snprintf(buff, BUFF_SZ, "\tData overflowed\n"); + } + hid_debug_event(hdev, buff); + break; + case REPORT_ERASE_MEMORY: + case REPORT_BL_ERASE_MEMORY: + /* 3 data bytes with pointer inside erase block */ + snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", + "REPORT_ERASE_MEMORY", report->id, raw_size-1); + hid_debug_event(hdev, buff); + switch (data->addr_sz) { + case 2: + snprintf(buff, BUFF_SZ, "\tAddress inside 64 byte block: 0x%02x%02x\n", + raw_data[2], raw_data[1]); + break; + case 3: + snprintf(buff, BUFF_SZ, "\tAddress inside 64 byte block: 0x%02x%02x%02x\n", + raw_data[3], raw_data[2], raw_data[1]); + break; + default: + snprintf(buff, BUFF_SZ, "\tNot supported\n"); + } + hid_debug_event(hdev, buff); + break; + case REPORT_READ_MEMORY: + case REPORT_BL_READ_MEMORY: + /* 4 data bytes with read area description */ + snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", + "REPORT_READ_MEMORY", report->id, raw_size-1); + hid_debug_event(hdev, buff); + switch (data->addr_sz) { + case 2: + snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n", + raw_data[2], raw_data[1]); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]); + break; + case 3: + snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x%02x\n", + raw_data[3], raw_data[2], raw_data[1]); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[4]); + break; + default: + snprintf(buff, BUFF_SZ, "\tNot supported\n"); + } + hid_debug_event(hdev, buff); + break; + case REPORT_WRITE_MEMORY: + case REPORT_BL_WRITE_MEMORY: + /* 4+1..32 data bytes with write adrea description */ + snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", + "REPORT_WRITE_MEMORY", report->id, raw_size-1); + hid_debug_event(hdev, buff); + switch (data->addr_sz) { + case 2: + snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n", + raw_data[2], raw_data[1]); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]); + hid_debug_event(hdev, buff); + if (raw_data[3] == 0) { + snprintf(buff, BUFF_SZ, "\tNo data\n"); + } else if (raw_data[3] + 4 <= raw_size) { + snprintf(buff, BUFF_SZ, "\tData: "); + hid_debug_event(hdev, buff); + dump_buff_as_hex(buff, BUFF_SZ, raw_data+4, raw_data[3]); + } else { + snprintf(buff, BUFF_SZ, "\tData overflowed\n"); + } + break; + case 3: + snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x%02x\n", + raw_data[3], raw_data[2], raw_data[1]); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[4]); + hid_debug_event(hdev, buff); + if (raw_data[4] == 0) { + snprintf(buff, BUFF_SZ, "\tNo data\n"); + } else if (raw_data[4] + 5 <= raw_size) { + snprintf(buff, BUFF_SZ, "\tData: "); + hid_debug_event(hdev, buff); + dump_buff_as_hex(buff, BUFF_SZ, raw_data+5, raw_data[4]); + } else { + snprintf(buff, BUFF_SZ, "\tData overflowed\n"); + } + break; + default: + snprintf(buff, BUFF_SZ, "\tNot supported\n"); + } + hid_debug_event(hdev, buff); + break; + case REPORT_SPLASH_RESTART: + /* TODO */ + break; + case REPORT_EXIT_KEYBOARD: + snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", + "REPORT_EXIT_KEYBOARD", report->id, raw_size-1); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tRestart delay: %dms (0x%02x%02x)\n", + raw_data[1] | (raw_data[2] << 8), + raw_data[2], raw_data[1]); + hid_debug_event(hdev, buff); + break; + case REPORT_VERSION: + snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", + "REPORT_VERSION", report->id, raw_size-1); + hid_debug_event(hdev, buff); + break; + case REPORT_DEVID: + snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", + "REPORT_DEVID", report->id, raw_size-1); + hid_debug_event(hdev, buff); + break; + case REPORT_SPLASH_SIZE: + snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", + "REPORT_SPLASH_SIZE", report->id, raw_size-1); + hid_debug_event(hdev, buff); + break; + case REPORT_HOOK_VERSION: + snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", + "REPORT_HOOK_VERSION", report->id, raw_size-1); + hid_debug_event(hdev, buff); + break; + case REPORT_EXIT_FLASHER: + snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", + "REPORT_VERSION", report->id, raw_size-1); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tRestart delay: %dms (0x%02x%02x)\n", + raw_data[1] | (raw_data[2] << 8), + raw_data[2], raw_data[1]); + hid_debug_event(hdev, buff); + break; + default: + snprintf(buff, BUFF_SZ, "out report %s (%d, size=%d)\n", + "<unknown>", report->id, raw_size-1); + hid_debug_event(hdev, buff); + break; + } + wake_up_interruptible(&hdev->debug_wait); + kfree(buff); +} + +void picolcd_debug_raw_event(struct picolcd_data *data, + struct hid_device *hdev, struct hid_report *report, + u8 *raw_data, int size) +{ + char *buff; + +#define BUFF_SZ 256 + /* Avoid unnecessary overhead if debugfs is disabled */ + if (list_empty(&hdev->debug_list)) + return; + + buff = kmalloc(BUFF_SZ, GFP_ATOMIC); + if (!buff) + return; + + switch (report->id) { + case REPORT_ERROR_CODE: + /* 2 data bytes with affected report and error code */ + snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", + "REPORT_ERROR_CODE", report->id, size-1); + hid_debug_event(hdev, buff); + if (raw_data[2] < ARRAY_SIZE(error_codes)) + snprintf(buff, BUFF_SZ, "\tError code 0x%02x (%s) in reply to report 0x%02x\n", + raw_data[2], error_codes[raw_data[2]], raw_data[1]); + else + snprintf(buff, BUFF_SZ, "\tError code 0x%02x in reply to report 0x%02x\n", + raw_data[2], raw_data[1]); + hid_debug_event(hdev, buff); + break; + case REPORT_KEY_STATE: + /* 2 data bytes with key state */ + snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", + "REPORT_KEY_STATE", report->id, size-1); + hid_debug_event(hdev, buff); + if (raw_data[1] == 0) + snprintf(buff, BUFF_SZ, "\tNo key pressed\n"); + else if (raw_data[2] == 0) + snprintf(buff, BUFF_SZ, "\tOne key pressed: 0x%02x (%d)\n", + raw_data[1], raw_data[1]); + else + snprintf(buff, BUFF_SZ, "\tTwo keys pressed: 0x%02x (%d), 0x%02x (%d)\n", + raw_data[1], raw_data[1], raw_data[2], raw_data[2]); + hid_debug_event(hdev, buff); + break; + case REPORT_IR_DATA: + /* Up to 20 byes of IR scancode data */ + snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", + "REPORT_IR_DATA", report->id, size-1); + hid_debug_event(hdev, buff); + if (raw_data[1] == 0) { + snprintf(buff, BUFF_SZ, "\tUnexpectedly 0 data length\n"); + hid_debug_event(hdev, buff); + } else if (raw_data[1] + 1 <= size) { + snprintf(buff, BUFF_SZ, "\tData length: %d\n\tIR Data: ", + raw_data[1]); + hid_debug_event(hdev, buff); + dump_buff_as_hex(buff, BUFF_SZ, raw_data+2, raw_data[1]); + hid_debug_event(hdev, buff); + } else { + snprintf(buff, BUFF_SZ, "\tOverflowing data length: %d\n", + raw_data[1]-1); + hid_debug_event(hdev, buff); + } + break; + case REPORT_EE_DATA: + /* Data buffer in response to REPORT_EE_READ or REPORT_EE_WRITE */ + snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", + "REPORT_EE_DATA", report->id, size-1); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n", + raw_data[2], raw_data[1]); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]); + hid_debug_event(hdev, buff); + if (raw_data[3] == 0) { + snprintf(buff, BUFF_SZ, "\tNo data\n"); + hid_debug_event(hdev, buff); + } else if (raw_data[3] + 4 <= size) { + snprintf(buff, BUFF_SZ, "\tData: "); + hid_debug_event(hdev, buff); + dump_buff_as_hex(buff, BUFF_SZ, raw_data+4, raw_data[3]); + hid_debug_event(hdev, buff); + } else { + snprintf(buff, BUFF_SZ, "\tData overflowed\n"); + hid_debug_event(hdev, buff); + } + break; + case REPORT_MEMORY: + /* Data buffer in response to REPORT_READ_MEMORY or REPORT_WRTIE_MEMORY */ + snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", + "REPORT_MEMORY", report->id, size-1); + hid_debug_event(hdev, buff); + switch (data->addr_sz) { + case 2: + snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x\n", + raw_data[2], raw_data[1]); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[3]); + hid_debug_event(hdev, buff); + if (raw_data[3] == 0) { + snprintf(buff, BUFF_SZ, "\tNo data\n"); + } else if (raw_data[3] + 4 <= size) { + snprintf(buff, BUFF_SZ, "\tData: "); + hid_debug_event(hdev, buff); + dump_buff_as_hex(buff, BUFF_SZ, raw_data+4, raw_data[3]); + } else { + snprintf(buff, BUFF_SZ, "\tData overflowed\n"); + } + break; + case 3: + snprintf(buff, BUFF_SZ, "\tData address: 0x%02x%02x%02x\n", + raw_data[3], raw_data[2], raw_data[1]); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tData length: %d\n", raw_data[4]); + hid_debug_event(hdev, buff); + if (raw_data[4] == 0) { + snprintf(buff, BUFF_SZ, "\tNo data\n"); + } else if (raw_data[4] + 5 <= size) { + snprintf(buff, BUFF_SZ, "\tData: "); + hid_debug_event(hdev, buff); + dump_buff_as_hex(buff, BUFF_SZ, raw_data+5, raw_data[4]); + } else { + snprintf(buff, BUFF_SZ, "\tData overflowed\n"); + } + break; + default: + snprintf(buff, BUFF_SZ, "\tNot supported\n"); + } + hid_debug_event(hdev, buff); + break; + case REPORT_VERSION: + snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", + "REPORT_VERSION", report->id, size-1); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tFirmware version: %d.%d\n", + raw_data[2], raw_data[1]); + hid_debug_event(hdev, buff); + break; + case REPORT_BL_ERASE_MEMORY: + snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", + "REPORT_BL_ERASE_MEMORY", report->id, size-1); + hid_debug_event(hdev, buff); + /* TODO */ + break; + case REPORT_BL_READ_MEMORY: + snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", + "REPORT_BL_READ_MEMORY", report->id, size-1); + hid_debug_event(hdev, buff); + /* TODO */ + break; + case REPORT_BL_WRITE_MEMORY: + snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", + "REPORT_BL_WRITE_MEMORY", report->id, size-1); + hid_debug_event(hdev, buff); + /* TODO */ + break; + case REPORT_DEVID: + snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", + "REPORT_DEVID", report->id, size-1); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tSerial: 0x%02x%02x%02x%02x\n", + raw_data[1], raw_data[2], raw_data[3], raw_data[4]); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tType: 0x%02x\n", + raw_data[5]); + hid_debug_event(hdev, buff); + break; + case REPORT_SPLASH_SIZE: + snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", + "REPORT_SPLASH_SIZE", report->id, size-1); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tTotal splash space: %d\n", + (raw_data[2] << 8) | raw_data[1]); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tUsed splash space: %d\n", + (raw_data[4] << 8) | raw_data[3]); + hid_debug_event(hdev, buff); + break; + case REPORT_HOOK_VERSION: + snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", + "REPORT_HOOK_VERSION", report->id, size-1); + hid_debug_event(hdev, buff); + snprintf(buff, BUFF_SZ, "\tFirmware version: %d.%d\n", + raw_data[1], raw_data[2]); + hid_debug_event(hdev, buff); + break; + default: + snprintf(buff, BUFF_SZ, "report %s (%d, size=%d)\n", + "<unknown>", report->id, size-1); + hid_debug_event(hdev, buff); + break; + } + wake_up_interruptible(&hdev->debug_wait); + kfree(buff); +} + +void picolcd_init_devfs(struct picolcd_data *data, + struct hid_report *eeprom_r, struct hid_report *eeprom_w, + struct hid_report *flash_r, struct hid_report *flash_w, + struct hid_report *reset) +{ + struct hid_device *hdev = data->hdev; + + mutex_init(&data->mutex_flash); + + /* reset */ + if (reset) + data->debug_reset = debugfs_create_file("reset", 0600, + hdev->debug_dir, data, &picolcd_debug_reset_fops); + + /* eeprom */ + if (eeprom_r || eeprom_w) + data->debug_eeprom = debugfs_create_file("eeprom", + (eeprom_w ? S_IWUSR : 0) | (eeprom_r ? S_IRUSR : 0), + hdev->debug_dir, data, &picolcd_debug_eeprom_fops); + + /* flash */ + if (flash_r && flash_r->maxfield == 1 && flash_r->field[0]->report_size == 8) + data->addr_sz = flash_r->field[0]->report_count - 1; + else + data->addr_sz = -1; + if (data->addr_sz == 2 || data->addr_sz == 3) { + data->debug_flash = debugfs_create_file("flash", + (flash_w ? S_IWUSR : 0) | (flash_r ? S_IRUSR : 0), + hdev->debug_dir, data, &picolcd_debug_flash_fops); + } else if (flash_r || flash_w) + hid_warn(hdev, "Unexpected FLASH access reports, please submit rdesc for review\n"); +} + +void picolcd_exit_devfs(struct picolcd_data *data) +{ + struct dentry *dent; + + dent = data->debug_reset; + data->debug_reset = NULL; + if (dent) + debugfs_remove(dent); + dent = data->debug_eeprom; + data->debug_eeprom = NULL; + if (dent) + debugfs_remove(dent); + dent = data->debug_flash; + data->debug_flash = NULL; + if (dent) + debugfs_remove(dent); + mutex_destroy(&data->mutex_flash); +} + diff --git a/drivers/hid/hid-picolcd_fb.c b/drivers/hid/hid-picolcd_fb.c new file mode 100644 index 00000000000..0008a512211 --- /dev/null +++ b/drivers/hid/hid-picolcd_fb.c @@ -0,0 +1,615 @@ +/*************************************************************************** + * Copyright (C) 2010-2012 by Bruno Prémont <bonbons@linux-vserver.org> * + * * + * Based on Logitech G13 driver (v0.4) * + * Copyright (C) 2009 by Rick L. Vinyard, Jr. <rvinyard@cs.nmsu.edu> * + * * + * This program is free software: you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation, version 2 of the License. * + * * + * This driver is distributed in the hope that it will be useful, but * + * WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * + * General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this software. If not see <http://www.gnu.org/licenses/>. * + ***************************************************************************/ + +#include <linux/hid.h> +#include <linux/vmalloc.h> +#include "usbhid/usbhid.h" +#include <linux/usb.h> + +#include <linux/fb.h> +#include <linux/module.h> + +#include "hid-picolcd.h" + +/* Framebuffer + * + * The PicoLCD use a Topway LCD module of 256x64 pixel + * This display area is tiled over 4 controllers with 8 tiles + * each. Each tile has 8x64 pixel, each data byte representing + * a 1-bit wide vertical line of the tile. + * + * The display can be updated at a tile granularity. + * + * Chip 1 Chip 2 Chip 3 Chip 4 + * +----------------+----------------+----------------+----------------+ + * | Tile 1 | Tile 1 | Tile 1 | Tile 1 | + * +----------------+----------------+----------------+----------------+ + * | Tile 2 | Tile 2 | Tile 2 | Tile 2 | + * +----------------+----------------+----------------+----------------+ + * ... + * +----------------+----------------+----------------+----------------+ + * | Tile 8 | Tile 8 | Tile 8 | Tile 8 | + * +----------------+----------------+----------------+----------------+ + */ +#define PICOLCDFB_NAME "picolcdfb" +#define PICOLCDFB_WIDTH (256) +#define PICOLCDFB_HEIGHT (64) +#define PICOLCDFB_SIZE (PICOLCDFB_WIDTH * PICOLCDFB_HEIGHT / 8) + +#define PICOLCDFB_UPDATE_RATE_LIMIT 10 +#define PICOLCDFB_UPDATE_RATE_DEFAULT 2 + +/* Framebuffer visual structures */ +static const struct fb_fix_screeninfo picolcdfb_fix = { + .id = PICOLCDFB_NAME, + .type = FB_TYPE_PACKED_PIXELS, + .visual = FB_VISUAL_MONO01, + .xpanstep = 0, + .ypanstep = 0, + .ywrapstep = 0, + .line_length = PICOLCDFB_WIDTH / 8, + .accel = FB_ACCEL_NONE, +}; + +static const struct fb_var_screeninfo picolcdfb_var = { + .xres = PICOLCDFB_WIDTH, + .yres = PICOLCDFB_HEIGHT, + .xres_virtual = PICOLCDFB_WIDTH, + .yres_virtual = PICOLCDFB_HEIGHT, + .width = 103, + .height = 26, + .bits_per_pixel = 1, + .grayscale = 1, + .red = { + .offset = 0, + .length = 1, + .msb_right = 0, + }, + .green = { + .offset = 0, + .length = 1, + .msb_right = 0, + }, + .blue = { + .offset = 0, + .length = 1, + .msb_right = 0, + }, + .transp = { + .offset = 0, + .length = 0, + .msb_right = 0, + }, +}; + +/* Send a given tile to PicoLCD */ +static int picolcd_fb_send_tile(struct picolcd_data *data, u8 *vbitmap, + int chip, int tile) +{ + struct hid_report *report1, *report2; + unsigned long flags; + u8 *tdata; + int i; + + report1 = picolcd_out_report(REPORT_LCD_CMD_DATA, data->hdev); + if (!report1 || report1->maxfield != 1) + return -ENODEV; + report2 = picolcd_out_report(REPORT_LCD_DATA, data->hdev); + if (!report2 || report2->maxfield != 1) + return -ENODEV; + + spin_lock_irqsave(&data->lock, flags); + if ((data->status & PICOLCD_FAILED)) { + spin_unlock_irqrestore(&data->lock, flags); + return -ENODEV; + } + hid_set_field(report1->field[0], 0, chip << 2); + hid_set_field(report1->field[0], 1, 0x02); + hid_set_field(report1->field[0], 2, 0x00); + hid_set_field(report1->field[0], 3, 0x00); + hid_set_field(report1->field[0], 4, 0xb8 | tile); + hid_set_field(report1->field[0], 5, 0x00); + hid_set_field(report1->field[0], 6, 0x00); + hid_set_field(report1->field[0], 7, 0x40); + hid_set_field(report1->field[0], 8, 0x00); + hid_set_field(report1->field[0], 9, 0x00); + hid_set_field(report1->field[0], 10, 32); + + hid_set_field(report2->field[0], 0, (chip << 2) | 0x01); + hid_set_field(report2->field[0], 1, 0x00); + hid_set_field(report2->field[0], 2, 0x00); + hid_set_field(report2->field[0], 3, 32); + + tdata = vbitmap + (tile * 4 + chip) * 64; + for (i = 0; i < 64; i++) + if (i < 32) + hid_set_field(report1->field[0], 11 + i, tdata[i]); + else + hid_set_field(report2->field[0], 4 + i - 32, tdata[i]); + + usbhid_submit_report(data->hdev, report1, USB_DIR_OUT); + usbhid_submit_report(data->hdev, report2, USB_DIR_OUT); + spin_unlock_irqrestore(&data->lock, flags); + return 0; +} + +/* Translate a single tile*/ +static int picolcd_fb_update_tile(u8 *vbitmap, const u8 *bitmap, int bpp, + int chip, int tile) +{ + int i, b, changed = 0; + u8 tdata[64]; + u8 *vdata = vbitmap + (tile * 4 + chip) * 64; + + if (bpp == 1) { + for (b = 7; b >= 0; b--) { + const u8 *bdata = bitmap + tile * 256 + chip * 8 + b * 32; + for (i = 0; i < 64; i++) { + tdata[i] <<= 1; + tdata[i] |= (bdata[i/8] >> (i % 8)) & 0x01; + } + } + } else if (bpp == 8) { + for (b = 7; b >= 0; b--) { + const u8 *bdata = bitmap + (tile * 256 + chip * 8 + b * 32) * 8; + for (i = 0; i < 64; i++) { + tdata[i] <<= 1; + tdata[i] |= (bdata[i] & 0x80) ? 0x01 : 0x00; + } + } + } else { + /* Oops, we should never get here! */ + WARN_ON(1); + return 0; + } + + for (i = 0; i < 64; i++) + if (tdata[i] != vdata[i]) { + changed = 1; + vdata[i] = tdata[i]; + } + return changed; +} + +void picolcd_fb_refresh(struct picolcd_data *data) +{ + if (data->fb_info) + schedule_delayed_work(&data->fb_info->deferred_work, 0); +} + +/* Reconfigure LCD display */ +int picolcd_fb_reset(struct picolcd_data *data, int clear) +{ + struct hid_report *report = picolcd_out_report(REPORT_LCD_CMD, data->hdev); + struct picolcd_fb_data *fbdata = data->fb_info->par; + int i, j; + unsigned long flags; + static const u8 mapcmd[8] = { 0x00, 0x02, 0x00, 0x64, 0x3f, 0x00, 0x64, 0xc0 }; + + if (!report || report->maxfield != 1) + return -ENODEV; + + spin_lock_irqsave(&data->lock, flags); + for (i = 0; i < 4; i++) { + for (j = 0; j < report->field[0]->maxusage; j++) + if (j == 0) + hid_set_field(report->field[0], j, i << 2); + else if (j < sizeof(mapcmd)) + hid_set_field(report->field[0], j, mapcmd[j]); + else + hid_set_field(report->field[0], j, 0); + usbhid_submit_report(data->hdev, report, USB_DIR_OUT); + } + spin_unlock_irqrestore(&data->lock, flags); + + if (clear) { + memset(fbdata->vbitmap, 0, PICOLCDFB_SIZE); + memset(fbdata->bitmap, 0, PICOLCDFB_SIZE*fbdata->bpp); + } + fbdata->force = 1; + + /* schedule first output of framebuffer */ + if (fbdata->ready) + schedule_delayed_work(&data->fb_info->deferred_work, 0); + else + fbdata->ready = 1; + + return 0; +} + +/* Update fb_vbitmap from the screen_base and send changed tiles to device */ +static void picolcd_fb_update(struct fb_info *info) +{ + int chip, tile, n; + unsigned long flags; + struct picolcd_fb_data *fbdata = info->par; + struct picolcd_data *data; + + mutex_lock(&info->lock); + + spin_lock_irqsave(&fbdata->lock, flags); + if (!fbdata->ready && fbdata->picolcd) + picolcd_fb_reset(fbdata->picolcd, 0); + spin_unlock_irqrestore(&fbdata->lock, flags); + + /* + * Translate the framebuffer into the format needed by the PicoLCD. + * See display layout above. + * Do this one tile after the other and push those tiles that changed. + * + * Wait for our IO to complete as otherwise we might flood the queue! + */ + n = 0; + for (chip = 0; chip < 4; chip++) + for (tile = 0; tile < 8; tile++) { + if (!fbdata->force && !picolcd_fb_update_tile( + fbdata->vbitmap, fbdata->bitmap, + fbdata->bpp, chip, tile)) + continue; + n += 2; + if (n >= HID_OUTPUT_FIFO_SIZE / 2) { + spin_lock_irqsave(&fbdata->lock, flags); + data = fbdata->picolcd; + spin_unlock_irqrestore(&fbdata->lock, flags); + mutex_unlock(&info->lock); + if (!data) + return; + usbhid_wait_io(data->hdev); + mutex_lock(&info->lock); + n = 0; + } + spin_lock_irqsave(&fbdata->lock, flags); + data = fbdata->picolcd; + spin_unlock_irqrestore(&fbdata->lock, flags); + if (!data || picolcd_fb_send_tile(data, + fbdata->vbitmap, chip, tile)) + goto out; + } + fbdata->force = false; + if (n) { + spin_lock_irqsave(&fbdata->lock, flags); + data = fbdata->picolcd; + spin_unlock_irqrestore(&fbdata->lock, flags); + mutex_unlock(&info->lock); + if (data) + usbhid_wait_io(data->hdev); + return; + } +out: + mutex_unlock(&info->lock); +} + +/* Stub to call the system default and update the image on the picoLCD */ +static void picolcd_fb_fillrect(struct fb_info *info, + const struct fb_fillrect *rect) +{ + if (!info->par) + return; + sys_fillrect(info, rect); + + schedule_delayed_work(&info->deferred_work, 0); +} + +/* Stub to call the system default and update the image on the picoLCD */ +static void picolcd_fb_copyarea(struct fb_info *info, + const struct fb_copyarea *area) +{ + if (!info->par) + return; + sys_copyarea(info, area); + + schedule_delayed_work(&info->deferred_work, 0); +} + +/* Stub to call the system default and update the image on the picoLCD */ +static void picolcd_fb_imageblit(struct fb_info *info, const struct fb_image *image) +{ + if (!info->par) + return; + sys_imageblit(info, image); + + schedule_delayed_work(&info->deferred_work, 0); +} + +/* + * this is the slow path from userspace. they can seek and write to + * the fb. it's inefficient to do anything less than a full screen draw + */ +static ssize_t picolcd_fb_write(struct fb_info *info, const char __user *buf, + size_t count, loff_t *ppos) +{ + ssize_t ret; + if (!info->par) + return -ENODEV; + ret = fb_sys_write(info, buf, count, ppos); + if (ret >= 0) + schedule_delayed_work(&info->deferred_work, 0); + return ret; +} + +static int picolcd_fb_blank(int blank, struct fb_info *info) +{ + /* We let fb notification do this for us via lcd/backlight device */ + return 0; +} + +static void picolcd_fb_destroy(struct fb_info *info) +{ + struct picolcd_fb_data *fbdata = info->par; + + /* make sure no work is deferred */ + fb_deferred_io_cleanup(info); + + /* No thridparty should ever unregister our framebuffer! */ + WARN_ON(fbdata->picolcd != NULL); + + vfree((u8 *)info->fix.smem_start); + framebuffer_release(info); +} + +static int picolcd_fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) +{ + __u32 bpp = var->bits_per_pixel; + __u32 activate = var->activate; + + /* only allow 1/8 bit depth (8-bit is grayscale) */ + *var = picolcdfb_var; + var->activate = activate; + if (bpp >= 8) { + var->bits_per_pixel = 8; + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + } else { + var->bits_per_pixel = 1; + var->red.length = 1; + var->green.length = 1; + var->blue.length = 1; + } + return 0; +} + +static int picolcd_set_par(struct fb_info *info) +{ + struct picolcd_fb_data *fbdata = info->par; + u8 *tmp_fb, *o_fb; + if (info->var.bits_per_pixel == fbdata->bpp) + return 0; + /* switch between 1/8 bit depths */ + if (info->var.bits_per_pixel != 1 && info->var.bits_per_pixel != 8) + return -EINVAL; + + o_fb = fbdata->bitmap; + tmp_fb = kmalloc(PICOLCDFB_SIZE*info->var.bits_per_pixel, GFP_KERNEL); + if (!tmp_fb) + return -ENOMEM; + + /* translate FB content to new bits-per-pixel */ + if (info->var.bits_per_pixel == 1) { + int i, b; + for (i = 0; i < PICOLCDFB_SIZE; i++) { + u8 p = 0; + for (b = 0; b < 8; b++) { + p <<= 1; + p |= o_fb[i*8+b] ? 0x01 : 0x00; + } + tmp_fb[i] = p; + } + memcpy(o_fb, tmp_fb, PICOLCDFB_SIZE); + info->fix.visual = FB_VISUAL_MONO01; + info->fix.line_length = PICOLCDFB_WIDTH / 8; + } else { + int i; + memcpy(tmp_fb, o_fb, PICOLCDFB_SIZE); + for (i = 0; i < PICOLCDFB_SIZE * 8; i++) + o_fb[i] = tmp_fb[i/8] & (0x01 << (7 - i % 8)) ? 0xff : 0x00; + info->fix.visual = FB_VISUAL_DIRECTCOLOR; + info->fix.line_length = PICOLCDFB_WIDTH; + } + + kfree(tmp_fb); + fbdata->bpp = info->var.bits_per_pixel; + return 0; +} + +/* Note this can't be const because of struct fb_info definition */ +static struct fb_ops picolcdfb_ops = { + .owner = THIS_MODULE, + .fb_destroy = picolcd_fb_destroy, + .fb_read = fb_sys_read, + .fb_write = picolcd_fb_write, + .fb_blank = picolcd_fb_blank, + .fb_fillrect = picolcd_fb_fillrect, + .fb_copyarea = picolcd_fb_copyarea, + .fb_imageblit = picolcd_fb_imageblit, + .fb_check_var = picolcd_fb_check_var, + .fb_set_par = picolcd_set_par, +}; + + +/* Callback from deferred IO workqueue */ +static void picolcd_fb_deferred_io(struct fb_info *info, struct list_head *pagelist) +{ + picolcd_fb_update(info); +} + +static const struct fb_deferred_io picolcd_fb_defio = { + .delay = HZ / PICOLCDFB_UPDATE_RATE_DEFAULT, + .deferred_io = picolcd_fb_deferred_io, +}; + + +/* + * The "fb_update_rate" sysfs attribute + */ +static ssize_t picolcd_fb_update_rate_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct picolcd_data *data = dev_get_drvdata(dev); + struct picolcd_fb_data *fbdata = data->fb_info->par; + unsigned i, fb_update_rate = fbdata->update_rate; + size_t ret = 0; + + for (i = 1; i <= PICOLCDFB_UPDATE_RATE_LIMIT; i++) + if (ret >= PAGE_SIZE) + break; + else if (i == fb_update_rate) + ret += snprintf(buf+ret, PAGE_SIZE-ret, "[%u] ", i); + else + ret += snprintf(buf+ret, PAGE_SIZE-ret, "%u ", i); + if (ret > 0) + buf[min(ret, (size_t)PAGE_SIZE)-1] = '\n'; + return ret; +} + +static ssize_t picolcd_fb_update_rate_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct picolcd_data *data = dev_get_drvdata(dev); + struct picolcd_fb_data *fbdata = data->fb_info->par; + int i; + unsigned u; + + if (count < 1 || count > 10) + return -EINVAL; + + i = sscanf(buf, "%u", &u); + if (i != 1) + return -EINVAL; + + if (u > PICOLCDFB_UPDATE_RATE_LIMIT) + return -ERANGE; + else if (u == 0) + u = PICOLCDFB_UPDATE_RATE_DEFAULT; + + fbdata->update_rate = u; + data->fb_info->fbdefio->delay = HZ / fbdata->update_rate; + return count; +} + +static DEVICE_ATTR(fb_update_rate, 0666, picolcd_fb_update_rate_show, + picolcd_fb_update_rate_store); + +/* initialize Framebuffer device */ +int picolcd_init_framebuffer(struct picolcd_data *data) +{ + struct device *dev = &data->hdev->dev; + struct fb_info *info = NULL; + struct picolcd_fb_data *fbdata = NULL; + int i, error = -ENOMEM; + u32 *palette; + + /* The extra memory is: + * - 256*u32 for pseudo_palette + * - struct fb_deferred_io + */ + info = framebuffer_alloc(256 * sizeof(u32) + + sizeof(struct fb_deferred_io) + + sizeof(struct picolcd_fb_data) + + PICOLCDFB_SIZE, dev); + if (info == NULL) { + dev_err(dev, "failed to allocate a framebuffer\n"); + goto err_nomem; + } + + info->fbdefio = info->par; + *info->fbdefio = picolcd_fb_defio; + info->par += sizeof(struct fb_deferred_io); + palette = info->par; + info->par += 256 * sizeof(u32); + for (i = 0; i < 256; i++) + palette[i] = i > 0 && i < 16 ? 0xff : 0; + info->pseudo_palette = palette; + info->fbops = &picolcdfb_ops; + info->var = picolcdfb_var; + info->fix = picolcdfb_fix; + info->fix.smem_len = PICOLCDFB_SIZE*8; + info->flags = FBINFO_FLAG_DEFAULT; + + fbdata = info->par; + spin_lock_init(&fbdata->lock); + fbdata->picolcd = data; + fbdata->update_rate = PICOLCDFB_UPDATE_RATE_DEFAULT; + fbdata->bpp = picolcdfb_var.bits_per_pixel; + fbdata->force = 1; + fbdata->vbitmap = info->par + sizeof(struct picolcd_fb_data); + fbdata->bitmap = vmalloc(PICOLCDFB_SIZE*8); + if (fbdata->bitmap == NULL) { + dev_err(dev, "can't get a free page for framebuffer\n"); + goto err_nomem; + } + info->screen_base = (char __force __iomem *)fbdata->bitmap; + info->fix.smem_start = (unsigned long)fbdata->bitmap; + memset(fbdata->vbitmap, 0xff, PICOLCDFB_SIZE); + data->fb_info = info; + + error = picolcd_fb_reset(data, 1); + if (error) { + dev_err(dev, "failed to configure display\n"); + goto err_cleanup; + } + + error = device_create_file(dev, &dev_attr_fb_update_rate); + if (error) { + dev_err(dev, "failed to create sysfs attributes\n"); + goto err_cleanup; + } + + fb_deferred_io_init(info); + error = register_framebuffer(info); + if (error) { + dev_err(dev, "failed to register framebuffer\n"); + goto err_sysfs; + } + return 0; + +err_sysfs: + device_remove_file(dev, &dev_attr_fb_update_rate); + fb_deferred_io_cleanup(info); +err_cleanup: + data->fb_info = NULL; + +err_nomem: + if (fbdata) + vfree(fbdata->bitmap); + framebuffer_release(info); + return error; +} + +void picolcd_exit_framebuffer(struct picolcd_data *data) +{ + struct fb_info *info = data->fb_info; + struct picolcd_fb_data *fbdata = info->par; + unsigned long flags; + + device_remove_file(&data->hdev->dev, &dev_attr_fb_update_rate); + + /* disconnect framebuffer from HID dev */ + spin_lock_irqsave(&fbdata->lock, flags); + fbdata->picolcd = NULL; + spin_unlock_irqrestore(&fbdata->lock, flags); + + /* make sure there is no running update - thus that fbdata->picolcd + * once obtained under lock is guaranteed not to get free() under + * the feet of the deferred work */ + flush_delayed_work_sync(&info->deferred_work); + + data->fb_info = NULL; + unregister_framebuffer(info); +} diff --git a/drivers/hid/hid-picolcd_lcd.c b/drivers/hid/hid-picolcd_lcd.c new file mode 100644 index 00000000000..2d0ddc5ac65 --- /dev/null +++ b/drivers/hid/hid-picolcd_lcd.c @@ -0,0 +1,107 @@ +/*************************************************************************** + * Copyright (C) 2010-2012 by Bruno Prémont <bonbons@linux-vserver.org> * + * * + * Based on Logitech G13 driver (v0.4) * + * Copyright (C) 2009 by Rick L. Vinyard, Jr. <rvinyard@cs.nmsu.edu> * + * * + * This program is free software: you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation, version 2 of the License. * + * * + * This driver is distributed in the hope that it will be useful, but * + * WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * + * General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this software. If not see <http://www.gnu.org/licenses/>. * + ***************************************************************************/ + +#include <linux/hid.h> +#include "usbhid/usbhid.h" +#include <linux/usb.h> + +#include <linux/fb.h> +#include <linux/lcd.h> + +#include "hid-picolcd.h" + +/* + * lcd class device + */ +static int picolcd_get_contrast(struct lcd_device *ldev) +{ + struct picolcd_data *data = lcd_get_data(ldev); + return data->lcd_contrast; +} + +static int picolcd_set_contrast(struct lcd_device *ldev, int contrast) +{ + struct picolcd_data *data = lcd_get_data(ldev); + struct hid_report *report = picolcd_out_report(REPORT_CONTRAST, data->hdev); + unsigned long flags; + + if (!report || report->maxfield != 1 || report->field[0]->report_count != 1) + return -ENODEV; + + data->lcd_contrast = contrast & 0x0ff; + spin_lock_irqsave(&data->lock, flags); + hid_set_field(report->field[0], 0, data->lcd_contrast); + if (!(data->status & PICOLCD_FAILED)) + usbhid_submit_report(data->hdev, report, USB_DIR_OUT); + spin_unlock_irqrestore(&data->lock, flags); + return 0; +} + +static int picolcd_check_lcd_fb(struct lcd_device *ldev, struct fb_info *fb) +{ + return fb && fb == picolcd_fbinfo((struct picolcd_data *)lcd_get_data(ldev)); +} + +static struct lcd_ops picolcd_lcdops = { + .get_contrast = picolcd_get_contrast, + .set_contrast = picolcd_set_contrast, + .check_fb = picolcd_check_lcd_fb, +}; + +int picolcd_init_lcd(struct picolcd_data *data, struct hid_report *report) +{ + struct device *dev = &data->hdev->dev; + struct lcd_device *ldev; + + if (!report) + return -ENODEV; + if (report->maxfield != 1 || report->field[0]->report_count != 1 || + report->field[0]->report_size != 8) { + dev_err(dev, "unsupported CONTRAST report"); + return -EINVAL; + } + + ldev = lcd_device_register(dev_name(dev), dev, data, &picolcd_lcdops); + if (IS_ERR(ldev)) { + dev_err(dev, "failed to register LCD\n"); + return PTR_ERR(ldev); + } + ldev->props.max_contrast = 0x0ff; + data->lcd_contrast = 0xe5; + data->lcd = ldev; + picolcd_set_contrast(ldev, 0xe5); + return 0; +} + +void picolcd_exit_lcd(struct picolcd_data *data) +{ + struct lcd_device *ldev = data->lcd; + + data->lcd = NULL; + if (ldev) + lcd_device_unregister(ldev); +} + +int picolcd_resume_lcd(struct picolcd_data *data) +{ + if (!data->lcd) + return 0; + return picolcd_set_contrast(data->lcd, data->lcd_contrast); +} + diff --git a/drivers/hid/hid-picolcd_leds.c b/drivers/hid/hid-picolcd_leds.c new file mode 100644 index 00000000000..28cb6a4f963 --- /dev/null +++ b/drivers/hid/hid-picolcd_leds.c @@ -0,0 +1,175 @@ +/*************************************************************************** + * Copyright (C) 2010-2012 by Bruno Prémont <bonbons@linux-vserver.org> * + * * + * Based on Logitech G13 driver (v0.4) * + * Copyright (C) 2009 by Rick L. Vinyard, Jr. <rvinyard@cs.nmsu.edu> * + * * + * This program is free software: you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation, version 2 of the License. * + * * + * This driver is distributed in the hope that it will be useful, but * + * WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * + * General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this software. If not see <http://www.gnu.org/licenses/>. * + ***************************************************************************/ + +#include <linux/hid.h> +#include <linux/hid-debug.h> +#include <linux/input.h> +#include "hid-ids.h" +#include "usbhid/usbhid.h" +#include <linux/usb.h> + +#include <linux/fb.h> +#include <linux/vmalloc.h> +#include <linux/backlight.h> +#include <linux/lcd.h> + +#include <linux/leds.h> + +#include <linux/seq_file.h> +#include <linux/debugfs.h> + +#include <linux/completion.h> +#include <linux/uaccess.h> +#include <linux/module.h> + +#include "hid-picolcd.h" + + +void picolcd_leds_set(struct picolcd_data *data) +{ + struct hid_report *report; + unsigned long flags; + + if (!data->led[0]) + return; + report = picolcd_out_report(REPORT_LED_STATE, data->hdev); + if (!report || report->maxfield != 1 || report->field[0]->report_count != 1) + return; + + spin_lock_irqsave(&data->lock, flags); + hid_set_field(report->field[0], 0, data->led_state); + if (!(data->status & PICOLCD_FAILED)) + usbhid_submit_report(data->hdev, report, USB_DIR_OUT); + spin_unlock_irqrestore(&data->lock, flags); +} + +static void picolcd_led_set_brightness(struct led_classdev *led_cdev, + enum led_brightness value) +{ + struct device *dev; + struct hid_device *hdev; + struct picolcd_data *data; + int i, state = 0; + + dev = led_cdev->dev->parent; + hdev = container_of(dev, struct hid_device, dev); + data = hid_get_drvdata(hdev); + if (!data) + return; + for (i = 0; i < 8; i++) { + if (led_cdev != data->led[i]) + continue; + state = (data->led_state >> i) & 1; + if (value == LED_OFF && state) { + data->led_state &= ~(1 << i); + picolcd_leds_set(data); + } else if (value != LED_OFF && !state) { + data->led_state |= 1 << i; + picolcd_leds_set(data); + } + break; + } +} + +static enum led_brightness picolcd_led_get_brightness(struct led_classdev *led_cdev) +{ + struct device *dev; + struct hid_device *hdev; + struct picolcd_data *data; + int i, value = 0; + + dev = led_cdev->dev->parent; + hdev = container_of(dev, struct hid_device, dev); + data = hid_get_drvdata(hdev); + for (i = 0; i < 8; i++) + if (led_cdev == data->led[i]) { + value = (data->led_state >> i) & 1; + break; + } + return value ? LED_FULL : LED_OFF; +} + +int picolcd_init_leds(struct picolcd_data *data, struct hid_report *report) +{ + struct device *dev = &data->hdev->dev; + struct led_classdev *led; + size_t name_sz = strlen(dev_name(dev)) + 8; + char *name; + int i, ret = 0; + + if (!report) + return -ENODEV; + if (report->maxfield != 1 || report->field[0]->report_count != 1 || + report->field[0]->report_size != 8) { + dev_err(dev, "unsupported LED_STATE report"); + return -EINVAL; + } + + for (i = 0; i < 8; i++) { + led = kzalloc(sizeof(struct led_classdev)+name_sz, GFP_KERNEL); + if (!led) { + dev_err(dev, "can't allocate memory for LED %d\n", i); + ret = -ENOMEM; + goto err; + } + name = (void *)(&led[1]); + snprintf(name, name_sz, "%s::GPO%d", dev_name(dev), i); + led->name = name; + led->brightness = 0; + led->max_brightness = 1; + led->brightness_get = picolcd_led_get_brightness; + led->brightness_set = picolcd_led_set_brightness; + + data->led[i] = led; + ret = led_classdev_register(dev, data->led[i]); + if (ret) { + data->led[i] = NULL; + kfree(led); + dev_err(dev, "can't register LED %d\n", i); + goto err; + } + } + return 0; +err: + for (i = 0; i < 8; i++) + if (data->led[i]) { + led = data->led[i]; + data->led[i] = NULL; + led_classdev_unregister(led); + kfree(led); + } + return ret; +} + +void picolcd_exit_leds(struct picolcd_data *data) +{ + struct led_classdev *led; + int i; + + for (i = 0; i < 8; i++) { + led = data->led[i]; + data->led[i] = NULL; + if (!led) + continue; + led_classdev_unregister(led); + kfree(led); + } +} + + diff --git a/drivers/hid/hid-primax.c b/drivers/hid/hid-primax.c index 4d3c60d8831..c15adb0c98a 100644 --- a/drivers/hid/hid-primax.c +++ b/drivers/hid/hid-primax.c @@ -64,29 +64,6 @@ static int px_raw_event(struct hid_device *hid, struct hid_report *report, return 0; } -static int px_probe(struct hid_device *hid, const struct hid_device_id *id) -{ - int ret; - - ret = hid_parse(hid); - if (ret) { - hid_err(hid, "parse failed\n"); - goto fail; - } - - ret = hid_hw_start(hid, HID_CONNECT_DEFAULT); - if (ret) - hid_err(hid, "hw start failed\n"); - -fail: - return ret; -} - -static void px_remove(struct hid_device *hid) -{ - hid_hw_stop(hid); -} - static const struct hid_device_id px_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_KEYBOARD) }, { } @@ -97,8 +74,6 @@ static struct hid_driver px_driver = { .name = "primax", .id_table = px_devices, .raw_event = px_raw_event, - .probe = px_probe, - .remove = px_remove, }; static int __init px_init(void) diff --git a/drivers/hid/hid-prodikeys.c b/drivers/hid/hid-prodikeys.c index b71b77ab0dc..ec8ca333631 100644 --- a/drivers/hid/hid-prodikeys.c +++ b/drivers/hid/hid-prodikeys.c @@ -105,7 +105,7 @@ static ssize_t show_channel(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = container_of(dev, struct hid_device, dev); - struct pk_device *pk = (struct pk_device *)hid_get_drvdata(hdev); + struct pk_device *pk = hid_get_drvdata(hdev); dbg_hid("pcmidi sysfs read channel=%u\n", pk->pm->midi_channel); @@ -118,7 +118,7 @@ static ssize_t store_channel(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct hid_device *hdev = container_of(dev, struct hid_device, dev); - struct pk_device *pk = (struct pk_device *)hid_get_drvdata(hdev); + struct pk_device *pk = hid_get_drvdata(hdev); unsigned channel = 0; @@ -142,7 +142,7 @@ static ssize_t show_sustain(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = container_of(dev, struct hid_device, dev); - struct pk_device *pk = (struct pk_device *)hid_get_drvdata(hdev); + struct pk_device *pk = hid_get_drvdata(hdev); dbg_hid("pcmidi sysfs read sustain=%u\n", pk->pm->midi_sustain); @@ -155,7 +155,7 @@ static ssize_t store_sustain(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct hid_device *hdev = container_of(dev, struct hid_device, dev); - struct pk_device *pk = (struct pk_device *)hid_get_drvdata(hdev); + struct pk_device *pk = hid_get_drvdata(hdev); unsigned sustain = 0; @@ -181,7 +181,7 @@ static ssize_t show_octave(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = container_of(dev, struct hid_device, dev); - struct pk_device *pk = (struct pk_device *)hid_get_drvdata(hdev); + struct pk_device *pk = hid_get_drvdata(hdev); dbg_hid("pcmidi sysfs read octave=%d\n", pk->pm->midi_octave); @@ -194,7 +194,7 @@ static ssize_t store_octave(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct hid_device *hdev = container_of(dev, struct hid_device, dev); - struct pk_device *pk = (struct pk_device *)hid_get_drvdata(hdev); + struct pk_device *pk = hid_get_drvdata(hdev); int octave = 0; @@ -759,7 +759,7 @@ static int pk_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { - struct pk_device *pk = (struct pk_device *)hid_get_drvdata(hdev); + struct pk_device *pk = hid_get_drvdata(hdev); struct pcmidi_snd *pm; pm = pk->pm; @@ -777,7 +777,7 @@ static int pk_input_mapping(struct hid_device *hdev, struct hid_input *hi, static int pk_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { - struct pk_device *pk = (struct pk_device *)hid_get_drvdata(hdev); + struct pk_device *pk = hid_get_drvdata(hdev); int ret = 0; if (1 == pk->pm->ifnum) { @@ -858,7 +858,7 @@ err_free_pk: static void pk_remove(struct hid_device *hdev) { - struct pk_device *pk = (struct pk_device *)hid_get_drvdata(hdev); + struct pk_device *pk = hid_get_drvdata(hdev); struct pcmidi_snd *pm; pm = pk->pm; diff --git a/drivers/hid/hid-ps3remote.c b/drivers/hid/hid-ps3remote.c new file mode 100644 index 00000000000..03811e539d7 --- /dev/null +++ b/drivers/hid/hid-ps3remote.c @@ -0,0 +1,215 @@ +/* + * HID driver for Sony PS3 BD Remote Control + * + * Copyright (c) 2012 David Dillow <dave@thedillows.org> + * Based on a blend of the bluez fakehid user-space code by Marcel Holtmann + * and other kernel HID drivers. + */ + +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +/* NOTE: in order for the Sony PS3 BD Remote Control to be found by + * a Bluetooth host, the key combination Start+Enter has to be kept pressed + * for about 7 seconds with the Bluetooth Host Controller in discovering mode. + * + * There will be no PIN request from the device. + */ + +#include <linux/device.h> +#include <linux/hid.h> +#include <linux/module.h> + +#include "hid-ids.h" + +static __u8 ps3remote_rdesc[] = { + 0x05, 0x01, /* GUsagePage Generic Desktop */ + 0x09, 0x05, /* LUsage 0x05 [Game Pad] */ + 0xA1, 0x01, /* MCollection Application (mouse, keyboard) */ + + /* Use collection 1 for joypad buttons */ + 0xA1, 0x02, /* MCollection Logical (interrelated data) */ + + /* Ignore the 1st byte, maybe it is used for a controller + * number but it's not needed for correct operation */ + 0x75, 0x08, /* GReportSize 0x08 [8] */ + 0x95, 0x01, /* GReportCount 0x01 [1] */ + 0x81, 0x01, /* MInput 0x01 (Const[0] Arr[1] Abs[2]) */ + + /* Bytes from 2nd to 4th are a bitmap for joypad buttons, for these + * buttons multiple keypresses are allowed */ + 0x05, 0x09, /* GUsagePage Button */ + 0x19, 0x01, /* LUsageMinimum 0x01 [Button 1 (primary/trigger)] */ + 0x29, 0x18, /* LUsageMaximum 0x18 [Button 24] */ + 0x14, /* GLogicalMinimum [0] */ + 0x25, 0x01, /* GLogicalMaximum 0x01 [1] */ + 0x75, 0x01, /* GReportSize 0x01 [1] */ + 0x95, 0x18, /* GReportCount 0x18 [24] */ + 0x81, 0x02, /* MInput 0x02 (Data[0] Var[1] Abs[2]) */ + + 0xC0, /* MEndCollection */ + + /* Use collection 2 for remote control buttons */ + 0xA1, 0x02, /* MCollection Logical (interrelated data) */ + + /* 5th byte is used for remote control buttons */ + 0x05, 0x09, /* GUsagePage Button */ + 0x18, /* LUsageMinimum [No button pressed] */ + 0x29, 0xFE, /* LUsageMaximum 0xFE [Button 254] */ + 0x14, /* GLogicalMinimum [0] */ + 0x26, 0xFE, 0x00, /* GLogicalMaximum 0x00FE [254] */ + 0x75, 0x08, /* GReportSize 0x08 [8] */ + 0x95, 0x01, /* GReportCount 0x01 [1] */ + 0x80, /* MInput */ + + /* Ignore bytes from 6th to 11th, 6th to 10th are always constant at + * 0xff and 11th is for press indication */ + 0x75, 0x08, /* GReportSize 0x08 [8] */ + 0x95, 0x06, /* GReportCount 0x06 [6] */ + 0x81, 0x01, /* MInput 0x01 (Const[0] Arr[1] Abs[2]) */ + + /* 12th byte is for battery strength */ + 0x05, 0x06, /* GUsagePage Generic Device Controls */ + 0x09, 0x20, /* LUsage 0x20 [Battery Strength] */ + 0x14, /* GLogicalMinimum [0] */ + 0x25, 0x05, /* GLogicalMaximum 0x05 [5] */ + 0x75, 0x08, /* GReportSize 0x08 [8] */ + 0x95, 0x01, /* GReportCount 0x01 [1] */ + 0x81, 0x02, /* MInput 0x02 (Data[0] Var[1] Abs[2]) */ + + 0xC0, /* MEndCollection */ + + 0xC0 /* MEndCollection [Game Pad] */ +}; + +static const unsigned int ps3remote_keymap_joypad_buttons[] = { + [0x01] = KEY_SELECT, + [0x02] = BTN_THUMBL, /* L3 */ + [0x03] = BTN_THUMBR, /* R3 */ + [0x04] = BTN_START, + [0x05] = KEY_UP, + [0x06] = KEY_RIGHT, + [0x07] = KEY_DOWN, + [0x08] = KEY_LEFT, + [0x09] = BTN_TL2, /* L2 */ + [0x0a] = BTN_TR2, /* R2 */ + [0x0b] = BTN_TL, /* L1 */ + [0x0c] = BTN_TR, /* R1 */ + [0x0d] = KEY_OPTION, /* options/triangle */ + [0x0e] = KEY_BACK, /* back/circle */ + [0x0f] = BTN_0, /* cross */ + [0x10] = KEY_SCREEN, /* view/square */ + [0x11] = KEY_HOMEPAGE, /* PS button */ + [0x14] = KEY_ENTER, +}; +static const unsigned int ps3remote_keymap_remote_buttons[] = { + [0x00] = KEY_1, + [0x01] = KEY_2, + [0x02] = KEY_3, + [0x03] = KEY_4, + [0x04] = KEY_5, + [0x05] = KEY_6, + [0x06] = KEY_7, + [0x07] = KEY_8, + [0x08] = KEY_9, + [0x09] = KEY_0, + [0x0e] = KEY_ESC, /* return */ + [0x0f] = KEY_CLEAR, + [0x16] = KEY_EJECTCD, + [0x1a] = KEY_MENU, /* top menu */ + [0x28] = KEY_TIME, + [0x30] = KEY_PREVIOUS, + [0x31] = KEY_NEXT, + [0x32] = KEY_PLAY, + [0x33] = KEY_REWIND, /* scan back */ + [0x34] = KEY_FORWARD, /* scan forward */ + [0x38] = KEY_STOP, + [0x39] = KEY_PAUSE, + [0x40] = KEY_CONTEXT_MENU, /* pop up/menu */ + [0x60] = KEY_FRAMEBACK, /* slow/step back */ + [0x61] = KEY_FRAMEFORWARD, /* slow/step forward */ + [0x63] = KEY_SUBTITLE, + [0x64] = KEY_AUDIO, + [0x65] = KEY_ANGLE, + [0x70] = KEY_INFO, /* display */ + [0x80] = KEY_BLUE, + [0x81] = KEY_RED, + [0x82] = KEY_GREEN, + [0x83] = KEY_YELLOW, +}; + +static __u8 *ps3remote_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) +{ + *rsize = sizeof(ps3remote_rdesc); + return ps3remote_rdesc; +} + +static int ps3remote_mapping(struct hid_device *hdev, struct hid_input *hi, + struct hid_field *field, struct hid_usage *usage, + unsigned long **bit, int *max) +{ + unsigned int key = usage->hid & HID_USAGE; + + if ((usage->hid & HID_USAGE_PAGE) != HID_UP_BUTTON) + return -1; + + switch (usage->collection_index) { + case 1: + if (key >= ARRAY_SIZE(ps3remote_keymap_joypad_buttons)) + return -1; + + key = ps3remote_keymap_joypad_buttons[key]; + if (!key) + return -1; + break; + case 2: + if (key >= ARRAY_SIZE(ps3remote_keymap_remote_buttons)) + return -1; + + key = ps3remote_keymap_remote_buttons[key]; + if (!key) + return -1; + break; + default: + return -1; + } + + hid_map_usage_clear(hi, usage, bit, max, EV_KEY, key); + return 1; +} + +static const struct hid_device_id ps3remote_devices[] = { + /* PS3 BD Remote Control */ + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_BDREMOTE) }, + /* Logitech Harmony Adapter for PS3 */ + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_PS3) }, + { } +}; +MODULE_DEVICE_TABLE(hid, ps3remote_devices); + +static struct hid_driver ps3remote_driver = { + .name = "ps3_remote", + .id_table = ps3remote_devices, + .report_fixup = ps3remote_fixup, + .input_mapping = ps3remote_mapping, +}; + +static int __init ps3remote_init(void) +{ + return hid_register_driver(&ps3remote_driver); +} + +static void __exit ps3remote_exit(void) +{ + hid_unregister_driver(&ps3remote_driver); +} + +module_init(ps3remote_init); +module_exit(ps3remote_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("David Dillow <dave@thedillows.org>, Antonio Ospite <ospite@studenti.unina.it>"); diff --git a/drivers/hid/hid-samsung.c b/drivers/hid/hid-samsung.c index 3c1fd8af5e0..a5821d31722 100644 --- a/drivers/hid/hid-samsung.c +++ b/drivers/hid/hid-samsung.c @@ -5,7 +5,6 @@ * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina - * Copyright (c) 2007 Paul Walmsley * Copyright (c) 2008 Jiri Slaby * Copyright (c) 2010 Don Prince <dhprince.devel@yahoo.co.uk> * diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index 5cd25bd907f..7f33ebf299c 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -4,7 +4,6 @@ * Copyright (c) 1999 Andreas Gal * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc - * Copyright (c) 2007 Paul Walmsley * Copyright (c) 2008 Jiri Slaby * Copyright (c) 2006-2008 Jiri Kosina */ diff --git a/drivers/hid/hid-sunplus.c b/drivers/hid/hid-sunplus.c index d484a0043dd..45b4b066a26 100644 --- a/drivers/hid/hid-sunplus.c +++ b/drivers/hid/hid-sunplus.c @@ -5,7 +5,6 @@ * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina - * Copyright (c) 2007 Paul Walmsley * Copyright (c) 2008 Jiri Slaby */ diff --git a/drivers/hid/hid-uclogic.c b/drivers/hid/hid-uclogic.c index 3aba02be1f2..2e56a1fd237 100644 --- a/drivers/hid/hid-uclogic.c +++ b/drivers/hid/hid-uclogic.c @@ -466,6 +466,86 @@ static __u8 twhl850_rdesc_fixed2[] = { 0xC0 /* End Collection */ }; +/* + * See TWHA60 description, device and HID report descriptors at + * http://sf.net/apps/mediawiki/digimend/?title=UC-Logic_Tablet_TWHA60 + */ + +/* Size of the original descriptors of TWHA60 tablet */ +#define TWHA60_RDESC_ORIG_SIZE0 254 +#define TWHA60_RDESC_ORIG_SIZE1 139 + +/* Fixed TWHA60 report descriptor, interface 0 (stylus) */ +static __u8 twha60_rdesc_fixed0[] = { + 0x05, 0x0D, /* Usage Page (Digitizer), */ + 0x09, 0x02, /* Usage (Pen), */ + 0xA1, 0x01, /* Collection (Application), */ + 0x85, 0x09, /* Report ID (9), */ + 0x09, 0x20, /* Usage (Stylus), */ + 0xA0, /* Collection (Physical), */ + 0x75, 0x01, /* Report Size (1), */ + 0x09, 0x42, /* Usage (Tip Switch), */ + 0x09, 0x44, /* Usage (Barrel Switch), */ + 0x09, 0x46, /* Usage (Tablet Pick), */ + 0x14, /* Logical Minimum (0), */ + 0x25, 0x01, /* Logical Maximum (1), */ + 0x95, 0x03, /* Report Count (3), */ + 0x81, 0x02, /* Input (Variable), */ + 0x95, 0x04, /* Report Count (4), */ + 0x81, 0x01, /* Input (Constant), */ + 0x09, 0x32, /* Usage (In Range), */ + 0x95, 0x01, /* Report Count (1), */ + 0x81, 0x02, /* Input (Variable), */ + 0x75, 0x10, /* Report Size (16), */ + 0x95, 0x01, /* Report Count (1), */ + 0x14, /* Logical Minimum (0), */ + 0xA4, /* Push, */ + 0x05, 0x01, /* Usage Page (Desktop), */ + 0x55, 0xFD, /* Unit Exponent (-3), */ + 0x65, 0x13, /* Unit (Inch), */ + 0x34, /* Physical Minimum (0), */ + 0x09, 0x30, /* Usage (X), */ + 0x46, 0x10, 0x27, /* Physical Maximum (10000), */ + 0x27, 0x3F, 0x9C, + 0x00, 0x00, /* Logical Maximum (39999), */ + 0x81, 0x02, /* Input (Variable), */ + 0x09, 0x31, /* Usage (Y), */ + 0x46, 0x6A, 0x18, /* Physical Maximum (6250), */ + 0x26, 0xA7, 0x61, /* Logical Maximum (24999), */ + 0x81, 0x02, /* Input (Variable), */ + 0xB4, /* Pop, */ + 0x09, 0x30, /* Usage (Tip Pressure), */ + 0x26, 0xFF, 0x03, /* Logical Maximum (1023), */ + 0x81, 0x02, /* Input (Variable), */ + 0xC0, /* End Collection, */ + 0xC0 /* End Collection */ +}; + +/* Fixed TWHA60 report descriptor, interface 1 (frame buttons) */ +static __u8 twha60_rdesc_fixed1[] = { + 0x05, 0x01, /* Usage Page (Desktop), */ + 0x09, 0x06, /* Usage (Keyboard), */ + 0xA1, 0x01, /* Collection (Application), */ + 0x85, 0x05, /* Report ID (5), */ + 0x05, 0x07, /* Usage Page (Keyboard), */ + 0x14, /* Logical Minimum (0), */ + 0x25, 0x01, /* Logical Maximum (1), */ + 0x75, 0x01, /* Report Size (1), */ + 0x95, 0x08, /* Report Count (8), */ + 0x81, 0x01, /* Input (Constant), */ + 0x95, 0x0C, /* Report Count (12), */ + 0x19, 0x3A, /* Usage Minimum (KB F1), */ + 0x29, 0x45, /* Usage Maximum (KB F12), */ + 0x81, 0x02, /* Input (Variable), */ + 0x95, 0x0C, /* Report Count (12), */ + 0x19, 0x68, /* Usage Minimum (KB F13), */ + 0x29, 0x73, /* Usage Maximum (KB F24), */ + 0x81, 0x02, /* Input (Variable), */ + 0x95, 0x08, /* Report Count (8), */ + 0x81, 0x01, /* Input (Constant), */ + 0xC0 /* End Collection */ +}; + static __u8 *uclogic_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { @@ -525,6 +605,22 @@ static __u8 *uclogic_report_fixup(struct hid_device *hdev, __u8 *rdesc, break; } break; + case USB_DEVICE_ID_UCLOGIC_TABLET_TWHA60: + switch (iface_num) { + case 0: + if (*rsize == TWHA60_RDESC_ORIG_SIZE0) { + rdesc = twha60_rdesc_fixed0; + *rsize = sizeof(twha60_rdesc_fixed0); + } + break; + case 1: + if (*rsize == TWHA60_RDESC_ORIG_SIZE1) { + rdesc = twha60_rdesc_fixed1; + *rsize = sizeof(twha60_rdesc_fixed1); + } + break; + } + break; } return rdesc; @@ -543,6 +639,8 @@ static const struct hid_device_id uclogic_devices[] = { USB_DEVICE_ID_UCLOGIC_TABLET_WP1062) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_WIRELESS_TABLET_TWHL850) }, + { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, + USB_DEVICE_ID_UCLOGIC_TABLET_TWHA60) }, { } }; MODULE_DEVICE_TABLE(hid, uclogic_devices); diff --git a/drivers/hid/hid-wacom.c b/drivers/hid/hid-wacom.c index fe23a1eb586..2f60da9ed06 100644 --- a/drivers/hid/hid-wacom.c +++ b/drivers/hid/hid-wacom.c @@ -5,7 +5,6 @@ * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina - * Copyright (c) 2007 Paul Walmsley * Copyright (c) 2008 Jiri Slaby <jirislaby@gmail.com> * Copyright (c) 2006 Andrew Zabolotny <zap@homelink.ru> * Copyright (c) 2009 Bastien Nocera <hadess@hadess.net> @@ -33,6 +32,8 @@ #define PAD_DEVICE_ID 0x0F #define WAC_CMD_LED_CONTROL 0x20 +#define WAC_CMD_ICON_START_STOP 0x21 +#define WAC_CMD_ICON_TRANSFER 0x26 struct wacom_data { __u16 tool; @@ -69,6 +70,91 @@ static enum power_supply_property wacom_ac_props[] = { POWER_SUPPLY_PROP_SCOPE, }; +static void wacom_scramble(__u8 *image) +{ + __u16 mask; + __u16 s1; + __u16 s2; + __u16 r1 ; + __u16 r2 ; + __u16 r; + __u8 buf[256]; + int i, w, x, y, z; + + for (x = 0; x < 32; x++) { + for (y = 0; y < 8; y++) + buf[(8 * x) + (7 - y)] = image[(8 * x) + y]; + } + + /* Change 76543210 into GECA6420 as required by Intuos4 WL + * HGFEDCBA HFDB7531 + */ + for (x = 0; x < 4; x++) { + for (y = 0; y < 4; y++) { + for (z = 0; z < 8; z++) { + mask = 0x0001; + r1 = 0; + r2 = 0; + i = (x << 6) + (y << 4) + z; + s1 = buf[i]; + s2 = buf[i+8]; + for (w = 0; w < 8; w++) { + r1 |= (s1 & mask); + r2 |= (s2 & mask); + s1 <<= 1; + s2 <<= 1; + mask <<= 2; + } + r = r1 | (r2 << 1); + i = (x << 6) + (y << 4) + (z << 1); + image[i] = 0xFF & r; + image[i+1] = (0xFF00 & r) >> 8; + } + } + } +} + +static void wacom_set_image(struct hid_device *hdev, const char *image, + __u8 icon_no) +{ + __u8 rep_data[68]; + __u8 p[256]; + int ret, i, j; + + for (i = 0; i < 256; i++) + p[i] = image[i]; + + rep_data[0] = WAC_CMD_ICON_START_STOP; + rep_data[1] = 0; + ret = hdev->hid_output_raw_report(hdev, rep_data, 2, + HID_FEATURE_REPORT); + if (ret < 0) + goto err; + + rep_data[0] = WAC_CMD_ICON_TRANSFER; + rep_data[1] = icon_no & 0x07; + + wacom_scramble(p); + + for (i = 0; i < 4; i++) { + for (j = 0; j < 64; j++) + rep_data[j + 3] = p[(i << 6) + j]; + + rep_data[2] = i; + ret = hdev->hid_output_raw_report(hdev, rep_data, 67, + HID_FEATURE_REPORT); + } + + rep_data[0] = WAC_CMD_ICON_START_STOP; + rep_data[1] = 0; + + ret = hdev->hid_output_raw_report(hdev, rep_data, 2, + HID_FEATURE_REPORT); + +err: + return; +} + static void wacom_leds_set_brightness(struct led_classdev *led_dev, enum led_brightness value) { @@ -91,7 +177,10 @@ static void wacom_leds_set_brightness(struct led_classdev *led_dev, if (buf) { buf[0] = WAC_CMD_LED_CONTROL; buf[1] = led; - buf[2] = value; + buf[2] = value >> 2; + buf[3] = value; + /* use fixed brightness for OLEDs */ + buf[4] = 0x08; hdev->hid_output_raw_report(hdev, buf, 9, HID_FEATURE_REPORT); kfree(buf); } @@ -317,6 +406,34 @@ static ssize_t wacom_store_speed(struct device *dev, static DEVICE_ATTR(speed, S_IRUGO | S_IWUSR | S_IWGRP, wacom_show_speed, wacom_store_speed); +#define WACOM_STORE(OLED_ID) \ +static ssize_t wacom_oled##OLED_ID##_store(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, size_t count) \ +{ \ + struct hid_device *hdev = container_of(dev, struct hid_device, \ + dev); \ + \ + if (count != 256) \ + return -EINVAL; \ + \ + wacom_set_image(hdev, buf, OLED_ID); \ + \ + return count; \ +} \ + \ +static DEVICE_ATTR(oled##OLED_ID##_img, S_IWUSR | S_IWGRP, NULL, \ + wacom_oled##OLED_ID##_store) + +WACOM_STORE(0); +WACOM_STORE(1); +WACOM_STORE(2); +WACOM_STORE(3); +WACOM_STORE(4); +WACOM_STORE(5); +WACOM_STORE(6); +WACOM_STORE(7); + static int wacom_gr_parse_report(struct hid_device *hdev, struct wacom_data *wdata, struct input_dev *input, unsigned char *data) @@ -717,17 +834,33 @@ static int wacom_probe(struct hid_device *hdev, hid_warn(hdev, "can't create sysfs speed attribute err: %d\n", ret); +#define OLED_INIT(OLED_ID) \ + do { \ + ret = device_create_file(&hdev->dev, \ + &dev_attr_oled##OLED_ID##_img); \ + if (ret) \ + hid_warn(hdev, \ + "can't create sysfs oled attribute, err: %d\n", ret);\ + } while (0) + +OLED_INIT(0); +OLED_INIT(1); +OLED_INIT(2); +OLED_INIT(3); +OLED_INIT(4); +OLED_INIT(5); +OLED_INIT(6); +OLED_INIT(7); + wdata->features = 0; wacom_set_features(hdev, 1); if (hdev->product == USB_DEVICE_ID_WACOM_INTUOS4_BLUETOOTH) { sprintf(hdev->name, "%s", "Wacom Intuos4 WL"); ret = wacom_initialize_leds(hdev); - if (ret) { + if (ret) hid_warn(hdev, "can't create led attribute, err: %d\n", ret); - goto destroy_leds; - } } wdata->battery.properties = wacom_battery_props; @@ -740,8 +873,8 @@ static int wacom_probe(struct hid_device *hdev, ret = power_supply_register(&hdev->dev, &wdata->battery); if (ret) { - hid_warn(hdev, "can't create sysfs battery attribute, err: %d\n", - ret); + hid_err(hdev, "can't create sysfs battery attribute, err: %d\n", + ret); goto err_battery; } @@ -756,8 +889,8 @@ static int wacom_probe(struct hid_device *hdev, ret = power_supply_register(&hdev->dev, &wdata->ac); if (ret) { - hid_warn(hdev, - "can't create ac battery attribute, err: %d\n", ret); + hid_err(hdev, + "can't create ac battery attribute, err: %d\n", ret); goto err_ac; } @@ -767,10 +900,17 @@ static int wacom_probe(struct hid_device *hdev, err_ac: power_supply_unregister(&wdata->battery); err_battery: + wacom_destroy_leds(hdev); + device_remove_file(&hdev->dev, &dev_attr_oled0_img); + device_remove_file(&hdev->dev, &dev_attr_oled1_img); + device_remove_file(&hdev->dev, &dev_attr_oled2_img); + device_remove_file(&hdev->dev, &dev_attr_oled3_img); + device_remove_file(&hdev->dev, &dev_attr_oled4_img); + device_remove_file(&hdev->dev, &dev_attr_oled5_img); + device_remove_file(&hdev->dev, &dev_attr_oled6_img); + device_remove_file(&hdev->dev, &dev_attr_oled7_img); device_remove_file(&hdev->dev, &dev_attr_speed); hid_hw_stop(hdev); -destroy_leds: - wacom_destroy_leds(hdev); err_free: kfree(wdata); return ret; @@ -781,6 +921,14 @@ static void wacom_remove(struct hid_device *hdev) struct wacom_data *wdata = hid_get_drvdata(hdev); wacom_destroy_leds(hdev); + device_remove_file(&hdev->dev, &dev_attr_oled0_img); + device_remove_file(&hdev->dev, &dev_attr_oled1_img); + device_remove_file(&hdev->dev, &dev_attr_oled2_img); + device_remove_file(&hdev->dev, &dev_attr_oled3_img); + device_remove_file(&hdev->dev, &dev_attr_oled4_img); + device_remove_file(&hdev->dev, &dev_attr_oled5_img); + device_remove_file(&hdev->dev, &dev_attr_oled6_img); + device_remove_file(&hdev->dev, &dev_attr_oled7_img); device_remove_file(&hdev->dev, &dev_attr_speed); hid_hw_stop(hdev); diff --git a/drivers/hid/hid-waltop.c b/drivers/hid/hid-waltop.c index 745e4e9a8cf..bb536ab5941 100644 --- a/drivers/hid/hid-waltop.c +++ b/drivers/hid/hid-waltop.c @@ -638,28 +638,6 @@ static __u8 sirius_battery_free_tablet_rdesc_fixed[] = { 0xC0 /* End Collection */ }; -static int waltop_probe(struct hid_device *hdev, - const struct hid_device_id *id) -{ - int ret; - - ret = hid_parse(hdev); - if (ret) { - hid_err(hdev, "parse failed\n"); - goto err; - } - - ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); - if (ret) { - hid_err(hdev, "hw start failed\n"); - goto err; - } - - return 0; -err: - return ret; -} - static __u8 *waltop_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { @@ -776,11 +754,6 @@ static int waltop_raw_event(struct hid_device *hdev, struct hid_report *report, return 0; } -static void waltop_remove(struct hid_device *hdev) -{ - hid_hw_stop(hdev); -} - static const struct hid_device_id waltop_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SLIM_TABLET_5_8_INCH) }, @@ -803,10 +776,8 @@ MODULE_DEVICE_TABLE(hid, waltop_devices); static struct hid_driver waltop_driver = { .name = "waltop", .id_table = waltop_devices, - .probe = waltop_probe, .report_fixup = waltop_report_fixup, .raw_event = waltop_raw_event, - .remove = waltop_remove, }; static int __init waltop_init(void) diff --git a/drivers/hid/hid-wiimote-ext.c b/drivers/hid/hid-wiimote-ext.c index 0a1805c9b0e..bc85bf29062 100644 --- a/drivers/hid/hid-wiimote-ext.c +++ b/drivers/hid/hid-wiimote-ext.c @@ -28,12 +28,14 @@ struct wiimote_ext { bool mp_plugged; bool motionp; __u8 ext_type; + __u16 calib[4][3]; }; enum wiiext_type { WIIEXT_NONE, /* placeholder */ WIIEXT_CLASSIC, /* Nintendo classic controller */ WIIEXT_NUNCHUCK, /* Nintendo nunchuck controller */ + WIIEXT_BALANCE_BOARD, /* Nintendo balance board controller */ }; enum wiiext_keys { @@ -126,6 +128,7 @@ error: static __u8 ext_read(struct wiimote_ext *ext) { ssize_t ret; + __u8 buf[24], i, j, offs = 0; __u8 rmem[2], wmem; __u8 type = WIIEXT_NONE; @@ -151,6 +154,28 @@ static __u8 ext_read(struct wiimote_ext *ext) type = WIIEXT_NUNCHUCK; else if (rmem[0] == 0x01 && rmem[1] == 0x01) type = WIIEXT_CLASSIC; + else if (rmem[0] == 0x04 && rmem[1] == 0x02) + type = WIIEXT_BALANCE_BOARD; + } + + /* get balance board calibration data */ + if (type == WIIEXT_BALANCE_BOARD) { + ret = wiimote_cmd_read(ext->wdata, 0xa40024, buf, 12); + ret += wiimote_cmd_read(ext->wdata, 0xa40024 + 12, + buf + 12, 12); + + if (ret != 24) { + type = WIIEXT_NONE; + } else { + for (i = 0; i < 3; i++) { + for (j = 0; j < 4; j++) { + ext->calib[j][i] = buf[offs]; + ext->calib[j][i] <<= 8; + ext->calib[j][i] |= buf[offs + 1]; + offs += 2; + } + } + } } wiimote_cmd_release(ext->wdata); @@ -509,6 +534,71 @@ static void handler_classic(struct wiimote_ext *ext, const __u8 *payload) input_sync(ext->input); } +static void handler_balance_board(struct wiimote_ext *ext, const __u8 *payload) +{ + __s32 val[4], tmp; + unsigned int i; + + /* Byte | 8 7 6 5 4 3 2 1 | + * -----+--------------------------+ + * 1 | Top Right <15:8> | + * 2 | Top Right <7:0> | + * -----+--------------------------+ + * 3 | Bottom Right <15:8> | + * 4 | Bottom Right <7:0> | + * -----+--------------------------+ + * 5 | Top Left <15:8> | + * 6 | Top Left <7:0> | + * -----+--------------------------+ + * 7 | Bottom Left <15:8> | + * 8 | Bottom Left <7:0> | + * -----+--------------------------+ + * + * These values represent the weight-measurements of the Wii-balance + * board with 16bit precision. + * + * The balance-board is never reported interleaved with motionp. + */ + + val[0] = payload[0]; + val[0] <<= 8; + val[0] |= payload[1]; + + val[1] = payload[2]; + val[1] <<= 8; + val[1] |= payload[3]; + + val[2] = payload[4]; + val[2] <<= 8; + val[2] |= payload[5]; + + val[3] = payload[6]; + val[3] <<= 8; + val[3] |= payload[7]; + + /* apply calibration data */ + for (i = 0; i < 4; i++) { + if (val[i] < ext->calib[i][1]) { + tmp = val[i] - ext->calib[i][0]; + tmp *= 1700; + tmp /= ext->calib[i][1] - ext->calib[i][0]; + } else { + tmp = val[i] - ext->calib[i][1]; + tmp *= 1700; + tmp /= ext->calib[i][2] - ext->calib[i][1]; + tmp += 1700; + } + val[i] = tmp; + } + + input_report_abs(ext->input, ABS_HAT0X, val[0]); + input_report_abs(ext->input, ABS_HAT0Y, val[1]); + input_report_abs(ext->input, ABS_HAT1X, val[2]); + input_report_abs(ext->input, ABS_HAT1Y, val[3]); + + input_sync(ext->input); +} + /* call this with state.lock spinlock held */ void wiiext_handle(struct wiimote_data *wdata, const __u8 *payload) { @@ -523,6 +613,8 @@ void wiiext_handle(struct wiimote_data *wdata, const __u8 *payload) handler_nunchuck(ext, payload); } else if (ext->ext_type == WIIEXT_CLASSIC) { handler_classic(ext, payload); + } else if (ext->ext_type == WIIEXT_BALANCE_BOARD) { + handler_balance_board(ext, payload); } } @@ -551,6 +643,11 @@ static ssize_t wiiext_show(struct device *dev, struct device_attribute *attr, return sprintf(buf, "motionp+classic\n"); else return sprintf(buf, "classic\n"); + } else if (type == WIIEXT_BALANCE_BOARD) { + if (motionp) + return sprintf(buf, "motionp+balanceboard\n"); + else + return sprintf(buf, "balanceboard\n"); } else { if (motionp) return sprintf(buf, "motionp\n"); diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index 3b6f7bf5a77..17d15bb610d 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -42,6 +42,7 @@ static struct cdev hidraw_cdev; static struct class *hidraw_class; static struct hidraw *hidraw_table[HIDRAW_MAX_DEVICES]; static DEFINE_MUTEX(minors_lock); +static void drop_ref(struct hidraw *hid, int exists_bit); static ssize_t hidraw_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { @@ -113,7 +114,7 @@ static ssize_t hidraw_send_report(struct file *file, const char __user *buffer, __u8 *buf; int ret = 0; - if (!hidraw_table[minor]) { + if (!hidraw_table[minor] || !hidraw_table[minor]->exist) { ret = -ENODEV; goto out; } @@ -261,7 +262,7 @@ static int hidraw_open(struct inode *inode, struct file *file) } mutex_lock(&minors_lock); - if (!hidraw_table[minor]) { + if (!hidraw_table[minor] || !hidraw_table[minor]->exist) { err = -ENODEV; goto out_unlock; } @@ -298,36 +299,12 @@ out: static int hidraw_release(struct inode * inode, struct file * file) { unsigned int minor = iminor(inode); - struct hidraw *dev; struct hidraw_list *list = file->private_data; - int ret; - int i; - - mutex_lock(&minors_lock); - if (!hidraw_table[minor]) { - ret = -ENODEV; - goto unlock; - } + drop_ref(hidraw_table[minor], 0); list_del(&list->node); - dev = hidraw_table[minor]; - if (!--dev->open) { - if (list->hidraw->exist) { - hid_hw_power(dev->hid, PM_HINT_NORMAL); - hid_hw_close(dev->hid); - } else { - kfree(list->hidraw); - } - } - - for (i = 0; i < HIDRAW_BUFFER_SIZE; ++i) - kfree(list->buffer[i].value); kfree(list); - ret = 0; -unlock: - mutex_unlock(&minors_lock); - - return ret; + return 0; } static long hidraw_ioctl(struct file *file, unsigned int cmd, @@ -529,21 +506,7 @@ EXPORT_SYMBOL_GPL(hidraw_connect); void hidraw_disconnect(struct hid_device *hid) { struct hidraw *hidraw = hid->hidraw; - - mutex_lock(&minors_lock); - hidraw->exist = 0; - - device_destroy(hidraw_class, MKDEV(hidraw_major, hidraw->minor)); - - hidraw_table[hidraw->minor] = NULL; - - if (hidraw->open) { - hid_hw_close(hid); - wake_up_interruptible(&hidraw->wait); - } else { - kfree(hidraw); - } - mutex_unlock(&minors_lock); + drop_ref(hidraw, 1); } EXPORT_SYMBOL_GPL(hidraw_disconnect); @@ -559,21 +522,28 @@ int __init hidraw_init(void) if (result < 0) { pr_warn("can't get major number\n"); - result = 0; goto out; } hidraw_class = class_create(THIS_MODULE, "hidraw"); if (IS_ERR(hidraw_class)) { result = PTR_ERR(hidraw_class); - unregister_chrdev(hidraw_major, "hidraw"); - goto out; + goto error_cdev; } cdev_init(&hidraw_cdev, &hidraw_ops); - cdev_add(&hidraw_cdev, dev_id, HIDRAW_MAX_DEVICES); + result = cdev_add(&hidraw_cdev, dev_id, HIDRAW_MAX_DEVICES); + if (result < 0) + goto error_class; + out: return result; + +error_class: + class_destroy(hidraw_class); +error_cdev: + unregister_chrdev_region(dev_id, HIDRAW_MAX_DEVICES); + goto out; } void hidraw_exit(void) @@ -585,3 +555,23 @@ void hidraw_exit(void) unregister_chrdev_region(dev_id, HIDRAW_MAX_DEVICES); } + +static void drop_ref(struct hidraw *hidraw, int exists_bit) +{ + mutex_lock(&minors_lock); + if (exists_bit) { + hid_hw_close(hidraw->hid); + hidraw->exist = 0; + if (hidraw->open) + wake_up_interruptible(&hidraw->wait); + } else { + --hidraw->open; + } + + if (!hidraw->open && !hidraw->exist) { + device_destroy(hidraw_class, MKDEV(hidraw_major, hidraw->minor)); + hidraw_table[hidraw->minor] = NULL; + kfree(hidraw); + } + mutex_unlock(&minors_lock); +} diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index dedd8e4e5c6..8e0c4bf94eb 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -1415,20 +1415,20 @@ static int hid_post_reset(struct usb_interface *intf) * configuration descriptors passed, we already know that * the size of the HID report descriptor has not changed. */ - rdesc = kmalloc(hid->rsize, GFP_KERNEL); + rdesc = kmalloc(hid->dev_rsize, GFP_KERNEL); if (!rdesc) { dbg_hid("couldn't allocate rdesc memory (post_reset)\n"); return 1; } status = hid_get_class_descriptor(dev, interface->desc.bInterfaceNumber, - HID_DT_REPORT, rdesc, hid->rsize); + HID_DT_REPORT, rdesc, hid->dev_rsize); if (status < 0) { dbg_hid("reading report descriptor failed (post_reset)\n"); kfree(rdesc); return 1; } - status = memcmp(rdesc, hid->rdesc, hid->rsize); + status = memcmp(rdesc, hid->dev_rdesc, hid->dev_rsize); kfree(rdesc); if (status != 0) { dbg_hid("report descriptor changed\n"); diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 991e85c7325..11c7932dc7e 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -70,12 +70,13 @@ static const struct hid_blacklist { { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_AXIS_295, HID_QUIRK_NOGET }, { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET }, { USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28, HID_QUIRK_NOGET }, { USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS, HID_QUIRK_NOGET }, { USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN1, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN2, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_PRODIGE, USB_DEVICE_ID_PRODIGE_CORDLESS, HID_QUIRK_NOGET }, - { USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_PIXART_IMAGING_INC_OPTICAL_TOUCH_SCREEN, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3001, HID_QUIRK_NOGET }, { USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008, HID_QUIRK_NOGET }, { USB_VENDOR_ID_SENNHEISER, USB_DEVICE_ID_SENNHEISER_BTD500USB, HID_QUIRK_NOGET }, { USB_VENDOR_ID_SUN, USB_DEVICE_ID_RARITAN_KVM_DONGLE, HID_QUIRK_NOGET }, diff --git a/drivers/hwmon/ad7314.c b/drivers/hwmon/ad7314.c index cfec802cf9c..f915eb1c29f 100644 --- a/drivers/hwmon/ad7314.c +++ b/drivers/hwmon/ad7314.c @@ -87,10 +87,18 @@ static ssize_t ad7314_show_temperature(struct device *dev, } } +static ssize_t ad7314_show_name(struct device *dev, + struct device_attribute *devattr, char *buf) +{ + return sprintf(buf, "%s\n", to_spi_device(dev)->modalias); +} + +static DEVICE_ATTR(name, S_IRUGO, ad7314_show_name, NULL); static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, ad7314_show_temperature, NULL, 0); static struct attribute *ad7314_attributes[] = { + &dev_attr_name.attr, &sensor_dev_attr_temp1_input.dev_attr.attr, NULL, }; diff --git a/drivers/hwmon/ads7871.c b/drivers/hwmon/ads7871.c index e65c6e45d36..7bf4ce3d405 100644 --- a/drivers/hwmon/ads7871.c +++ b/drivers/hwmon/ads7871.c @@ -139,6 +139,12 @@ static ssize_t show_voltage(struct device *dev, } } +static ssize_t ads7871_show_name(struct device *dev, + struct device_attribute *devattr, char *buf) +{ + return sprintf(buf, "%s\n", to_spi_device(dev)->modalias); +} + static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, show_voltage, NULL, 0); static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, show_voltage, NULL, 1); static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, show_voltage, NULL, 2); @@ -148,6 +154,8 @@ static SENSOR_DEVICE_ATTR(in5_input, S_IRUGO, show_voltage, NULL, 5); static SENSOR_DEVICE_ATTR(in6_input, S_IRUGO, show_voltage, NULL, 6); static SENSOR_DEVICE_ATTR(in7_input, S_IRUGO, show_voltage, NULL, 7); +static DEVICE_ATTR(name, S_IRUGO, ads7871_show_name, NULL); + static struct attribute *ads7871_attributes[] = { &sensor_dev_attr_in0_input.dev_attr.attr, &sensor_dev_attr_in1_input.dev_attr.attr, @@ -157,6 +165,7 @@ static struct attribute *ads7871_attributes[] = { &sensor_dev_attr_in5_input.dev_attr.attr, &sensor_dev_attr_in6_input.dev_attr.attr, &sensor_dev_attr_in7_input.dev_attr.attr, + &dev_attr_name.attr, NULL }; diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c index 28270886051..8f3f6f2c45f 100644 --- a/drivers/hwmon/applesmc.c +++ b/drivers/hwmon/applesmc.c @@ -53,10 +53,10 @@ #define APPLESMC_MAX_DATA_LENGTH 32 -/* wait up to 32 ms for a status change. */ +/* wait up to 128 ms for a status change. */ #define APPLESMC_MIN_WAIT 0x0010 #define APPLESMC_RETRY_WAIT 0x0100 -#define APPLESMC_MAX_WAIT 0x8000 +#define APPLESMC_MAX_WAIT 0x20000 #define APPLESMC_READ_CMD 0x10 #define APPLESMC_WRITE_CMD 0x11 diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 0fa356fe82c..984a3f13923 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -815,17 +815,20 @@ static int __init coretemp_init(void) if (err) goto exit; + get_online_cpus(); for_each_online_cpu(i) get_core_online(i); #ifndef CONFIG_HOTPLUG_CPU if (list_empty(&pdev_list)) { + put_online_cpus(); err = -ENODEV; goto exit_driver_unreg; } #endif register_hotcpu_notifier(&coretemp_cpu_notifier); + put_online_cpus(); return 0; #ifndef CONFIG_HOTPLUG_CPU @@ -840,6 +843,7 @@ static void __exit coretemp_exit(void) { struct pdev_entry *p, *n; + get_online_cpus(); unregister_hotcpu_notifier(&coretemp_cpu_notifier); mutex_lock(&pdev_list_mutex); list_for_each_entry_safe(p, n, &pdev_list, list) { @@ -848,6 +852,7 @@ static void __exit coretemp_exit(void) kfree(p); } mutex_unlock(&pdev_list_mutex); + put_online_cpus(); platform_driver_unregister(&coretemp_driver); } diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c index 2764b78a784..af69073b3fe 100644 --- a/drivers/hwmon/fam15h_power.c +++ b/drivers/hwmon/fam15h_power.c @@ -129,12 +129,12 @@ static bool __devinit fam15h_power_is_internal_node0(struct pci_dev *f4) * counter saturations resulting in bogus power readings. * We correct this value ourselves to cope with older BIOSes. */ -static DEFINE_PCI_DEVICE_TABLE(affected_device) = { +static const struct pci_device_id affected_device[] = { { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, { 0 } }; -static void __devinit tweak_runavg_range(struct pci_dev *pdev) +static void tweak_runavg_range(struct pci_dev *pdev) { u32 val; @@ -158,6 +158,16 @@ static void __devinit tweak_runavg_range(struct pci_dev *pdev) REG_TDP_RUNNING_AVERAGE, val); } +#ifdef CONFIG_PM +static int fam15h_power_resume(struct pci_dev *pdev) +{ + tweak_runavg_range(pdev); + return 0; +} +#else +#define fam15h_power_resume NULL +#endif + static void __devinit fam15h_power_init_data(struct pci_dev *f4, struct fam15h_power_data *data) { @@ -256,6 +266,7 @@ static struct pci_driver fam15h_power_driver = { .id_table = fam15h_power_id_table, .probe = fam15h_power_probe, .remove = __devexit_p(fam15h_power_remove), + .resume = fam15h_power_resume, }; module_pci_driver(fam15h_power_driver); diff --git a/drivers/hwmon/via-cputemp.c b/drivers/hwmon/via-cputemp.c index ee4ebc198a9..2e56c6ce9fb 100644 --- a/drivers/hwmon/via-cputemp.c +++ b/drivers/hwmon/via-cputemp.c @@ -328,6 +328,7 @@ static int __init via_cputemp_init(void) if (err) goto exit; + get_online_cpus(); for_each_online_cpu(i) { struct cpuinfo_x86 *c = &cpu_data(i); @@ -347,12 +348,14 @@ static int __init via_cputemp_init(void) #ifndef CONFIG_HOTPLUG_CPU if (list_empty(&pdev_list)) { + put_online_cpus(); err = -ENODEV; goto exit_driver_unreg; } #endif register_hotcpu_notifier(&via_cputemp_cpu_notifier); + put_online_cpus(); return 0; #ifndef CONFIG_HOTPLUG_CPU @@ -367,6 +370,7 @@ static void __exit via_cputemp_exit(void) { struct pdev_entry *p, *n; + get_online_cpus(); unregister_hotcpu_notifier(&via_cputemp_cpu_notifier); mutex_lock(&pdev_list_mutex); list_for_each_entry_safe(p, n, &pdev_list, list) { @@ -375,6 +379,7 @@ static void __exit via_cputemp_exit(void) kfree(p); } mutex_unlock(&pdev_list_mutex); + put_online_cpus(); platform_driver_unregister(&via_cputemp_driver); } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 53589000fd0..8615d7cf7e0 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -42,6 +42,7 @@ */ #include <linux/slab.h> +#include <linux/smpboot.h> #include "ehca_classes.h" #include "ehca_irq.h" @@ -652,7 +653,7 @@ void ehca_tasklet_eq(unsigned long data) ehca_process_eq((struct ehca_shca*)data, 1); } -static inline int find_next_online_cpu(struct ehca_comp_pool *pool) +static int find_next_online_cpu(struct ehca_comp_pool *pool) { int cpu; unsigned long flags; @@ -662,17 +663,20 @@ static inline int find_next_online_cpu(struct ehca_comp_pool *pool) ehca_dmp(cpu_online_mask, cpumask_size(), ""); spin_lock_irqsave(&pool->last_cpu_lock, flags); - cpu = cpumask_next(pool->last_cpu, cpu_online_mask); - if (cpu >= nr_cpu_ids) - cpu = cpumask_first(cpu_online_mask); - pool->last_cpu = cpu; + do { + cpu = cpumask_next(pool->last_cpu, cpu_online_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(cpu_online_mask); + pool->last_cpu = cpu; + } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active); spin_unlock_irqrestore(&pool->last_cpu_lock, flags); return cpu; } static void __queue_comp_task(struct ehca_cq *__cq, - struct ehca_cpu_comp_task *cct) + struct ehca_cpu_comp_task *cct, + struct task_struct *thread) { unsigned long flags; @@ -683,7 +687,7 @@ static void __queue_comp_task(struct ehca_cq *__cq, __cq->nr_callbacks++; list_add_tail(&__cq->entry, &cct->cq_list); cct->cq_jobs++; - wake_up(&cct->wait_queue); + wake_up_process(thread); } else __cq->nr_callbacks++; @@ -695,6 +699,7 @@ static void queue_comp_task(struct ehca_cq *__cq) { int cpu_id; struct ehca_cpu_comp_task *cct; + struct task_struct *thread; int cq_jobs; unsigned long flags; @@ -702,7 +707,8 @@ static void queue_comp_task(struct ehca_cq *__cq) BUG_ON(!cpu_online(cpu_id)); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - BUG_ON(!cct); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); spin_lock_irqsave(&cct->task_lock, flags); cq_jobs = cct->cq_jobs; @@ -710,28 +716,25 @@ static void queue_comp_task(struct ehca_cq *__cq) if (cq_jobs > 0) { cpu_id = find_next_online_cpu(pool); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - BUG_ON(!cct); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); } - - __queue_comp_task(__cq, cct); + __queue_comp_task(__cq, cct, thread); } static void run_comp_task(struct ehca_cpu_comp_task *cct) { struct ehca_cq *cq; - unsigned long flags; - - spin_lock_irqsave(&cct->task_lock, flags); while (!list_empty(&cct->cq_list)) { cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - spin_unlock_irqrestore(&cct->task_lock, flags); + spin_unlock_irq(&cct->task_lock); comp_event_callback(cq); if (atomic_dec_and_test(&cq->nr_events)) wake_up(&cq->wait_completion); - spin_lock_irqsave(&cct->task_lock, flags); + spin_lock_irq(&cct->task_lock); spin_lock(&cq->task_lock); cq->nr_callbacks--; if (!cq->nr_callbacks) { @@ -740,159 +743,76 @@ static void run_comp_task(struct ehca_cpu_comp_task *cct) } spin_unlock(&cq->task_lock); } - - spin_unlock_irqrestore(&cct->task_lock, flags); } -static int comp_task(void *__cct) +static void comp_task_park(unsigned int cpu) { - struct ehca_cpu_comp_task *cct = __cct; - int cql_empty; - DECLARE_WAITQUEUE(wait, current); - - set_current_state(TASK_INTERRUPTIBLE); - while (!kthread_should_stop()) { - add_wait_queue(&cct->wait_queue, &wait); - - spin_lock_irq(&cct->task_lock); - cql_empty = list_empty(&cct->cq_list); - spin_unlock_irq(&cct->task_lock); - if (cql_empty) - schedule(); - else - __set_current_state(TASK_RUNNING); - - remove_wait_queue(&cct->wait_queue, &wait); + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + struct ehca_cpu_comp_task *target; + struct task_struct *thread; + struct ehca_cq *cq, *tmp; + LIST_HEAD(list); - spin_lock_irq(&cct->task_lock); - cql_empty = list_empty(&cct->cq_list); - spin_unlock_irq(&cct->task_lock); - if (!cql_empty) - run_comp_task(__cct); + spin_lock_irq(&cct->task_lock); + cct->cq_jobs = 0; + cct->active = 0; + list_splice_init(&cct->cq_list, &list); + spin_unlock_irq(&cct->task_lock); - set_current_state(TASK_INTERRUPTIBLE); + cpu = find_next_online_cpu(pool); + target = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu); + spin_lock_irq(&target->task_lock); + list_for_each_entry_safe(cq, tmp, &list, entry) { + list_del(&cq->entry); + __queue_comp_task(cq, target, thread); } - __set_current_state(TASK_RUNNING); - - return 0; -} - -static struct task_struct *create_comp_task(struct ehca_comp_pool *pool, - int cpu) -{ - struct ehca_cpu_comp_task *cct; - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - spin_lock_init(&cct->task_lock); - INIT_LIST_HEAD(&cct->cq_list); - init_waitqueue_head(&cct->wait_queue); - cct->task = kthread_create_on_node(comp_task, cct, cpu_to_node(cpu), - "ehca_comp/%d", cpu); - - return cct->task; + spin_unlock_irq(&target->task_lock); } -static void destroy_comp_task(struct ehca_comp_pool *pool, - int cpu) +static void comp_task_stop(unsigned int cpu, bool online) { - struct ehca_cpu_comp_task *cct; - struct task_struct *task; - unsigned long flags_cct; - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - - spin_lock_irqsave(&cct->task_lock, flags_cct); + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - task = cct->task; - cct->task = NULL; + spin_lock_irq(&cct->task_lock); cct->cq_jobs = 0; - - spin_unlock_irqrestore(&cct->task_lock, flags_cct); - - if (task) - kthread_stop(task); + cct->active = 0; + WARN_ON(!list_empty(&cct->cq_list)); + spin_unlock_irq(&cct->task_lock); } -static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu) +static int comp_task_should_run(unsigned int cpu) { struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - LIST_HEAD(list); - struct ehca_cq *cq; - unsigned long flags_cct; - - spin_lock_irqsave(&cct->task_lock, flags_cct); - - list_splice_init(&cct->cq_list, &list); - - while (!list_empty(&list)) { - cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - - list_del(&cq->entry); - __queue_comp_task(cq, this_cpu_ptr(pool->cpu_comp_tasks)); - } - - spin_unlock_irqrestore(&cct->task_lock, flags_cct); + return cct->cq_jobs; } -static int __cpuinit comp_pool_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +static void comp_task(unsigned int cpu) { - unsigned int cpu = (unsigned long)hcpu; - struct ehca_cpu_comp_task *cct; + struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks); + int cql_empty; - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); - if (!create_comp_task(pool, cpu)) { - ehca_gen_err("Can't create comp_task for cpu: %x", cpu); - return notifier_from_errno(-ENOMEM); - } - break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu); - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - kthread_bind(cct->task, cpumask_any(cpu_online_mask)); - destroy_comp_task(pool, cpu); - break; - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu); - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - kthread_bind(cct->task, cpu); - wake_up_process(cct->task); - break; - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu); - break; - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu); - destroy_comp_task(pool, cpu); - take_over_work(pool, cpu); - break; + spin_lock_irq(&cct->task_lock); + cql_empty = list_empty(&cct->cq_list); + if (!cql_empty) { + __set_current_state(TASK_RUNNING); + run_comp_task(cct); } - - return NOTIFY_OK; + spin_unlock_irq(&cct->task_lock); } -static struct notifier_block comp_pool_callback_nb __cpuinitdata = { - .notifier_call = comp_pool_callback, - .priority = 0, +static struct smp_hotplug_thread comp_pool_threads = { + .thread_should_run = comp_task_should_run, + .thread_fn = comp_task, + .thread_comm = "ehca_comp/%u", + .cleanup = comp_task_stop, + .park = comp_task_park, }; int ehca_create_comp_pool(void) { - int cpu; - struct task_struct *task; + int cpu, ret = -ENOMEM; if (!ehca_scaling_code) return 0; @@ -905,38 +825,46 @@ int ehca_create_comp_pool(void) pool->last_cpu = cpumask_any(cpu_online_mask); pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); - if (pool->cpu_comp_tasks == NULL) { - kfree(pool); - return -EINVAL; - } + if (!pool->cpu_comp_tasks) + goto out_pool; - for_each_online_cpu(cpu) { - task = create_comp_task(pool, cpu); - if (task) { - kthread_bind(task, cpu); - wake_up_process(task); - } + pool->cpu_comp_threads = alloc_percpu(struct task_struct *); + if (!pool->cpu_comp_threads) + goto out_tasks; + + for_each_present_cpu(cpu) { + struct ehca_cpu_comp_task *cct; + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + spin_lock_init(&cct->task_lock); + INIT_LIST_HEAD(&cct->cq_list); } - register_hotcpu_notifier(&comp_pool_callback_nb); + comp_pool_threads.store = pool->cpu_comp_threads; + ret = smpboot_register_percpu_thread(&comp_pool_threads); + if (ret) + goto out_threads; - printk(KERN_INFO "eHCA scaling code enabled\n"); + pr_info("eHCA scaling code enabled\n"); + return ret; - return 0; +out_threads: + free_percpu(pool->cpu_comp_threads); +out_tasks: + free_percpu(pool->cpu_comp_tasks); +out_pool: + kfree(pool); + return ret; } void ehca_destroy_comp_pool(void) { - int i; - if (!ehca_scaling_code) return; - unregister_hotcpu_notifier(&comp_pool_callback_nb); - - for_each_online_cpu(i) - destroy_comp_task(pool, i); + smpboot_unregister_percpu_thread(&comp_pool_threads); + free_percpu(pool->cpu_comp_threads); free_percpu(pool->cpu_comp_tasks); kfree(pool); } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h index 3346cb06cea..5370199f08c 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.h +++ b/drivers/infiniband/hw/ehca/ehca_irq.h @@ -58,15 +58,15 @@ void ehca_tasklet_eq(unsigned long data); void ehca_process_eq(struct ehca_shca *shca, int is_irq); struct ehca_cpu_comp_task { - wait_queue_head_t wait_queue; struct list_head cq_list; - struct task_struct *task; spinlock_t task_lock; int cq_jobs; + int active; }; struct ehca_comp_pool { - struct ehca_cpu_comp_task *cpu_comp_tasks; + struct ehca_cpu_comp_task __percpu *cpu_comp_tasks; + struct task_struct * __percpu *cpu_comp_threads; int last_cpu; spinlock_t last_cpu_lock; }; diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index 6c58bfff01a..118d0300f1f 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -54,16 +54,9 @@ struct evdev_client { static struct evdev *evdev_table[EVDEV_MINORS]; static DEFINE_MUTEX(evdev_table_mutex); -static void evdev_pass_event(struct evdev_client *client, - struct input_event *event, - ktime_t mono, ktime_t real) +static void __pass_event(struct evdev_client *client, + const struct input_event *event) { - event->time = ktime_to_timeval(client->clkid == CLOCK_MONOTONIC ? - mono : real); - - /* Interrupts are disabled, just acquire the lock. */ - spin_lock(&client->buffer_lock); - client->buffer[client->head++] = *event; client->head &= client->bufsize - 1; @@ -86,42 +79,74 @@ static void evdev_pass_event(struct evdev_client *client, client->packet_head = client->head; kill_fasync(&client->fasync, SIGIO, POLL_IN); } +} + +static void evdev_pass_values(struct evdev_client *client, + const struct input_value *vals, unsigned int count, + ktime_t mono, ktime_t real) +{ + struct evdev *evdev = client->evdev; + const struct input_value *v; + struct input_event event; + bool wakeup = false; + + event.time = ktime_to_timeval(client->clkid == CLOCK_MONOTONIC ? + mono : real); + + /* Interrupts are disabled, just acquire the lock. */ + spin_lock(&client->buffer_lock); + + for (v = vals; v != vals + count; v++) { + event.type = v->type; + event.code = v->code; + event.value = v->value; + __pass_event(client, &event); + if (v->type == EV_SYN && v->code == SYN_REPORT) + wakeup = true; + } spin_unlock(&client->buffer_lock); + + if (wakeup) + wake_up_interruptible(&evdev->wait); } /* - * Pass incoming event to all connected clients. + * Pass incoming events to all connected clients. */ -static void evdev_event(struct input_handle *handle, - unsigned int type, unsigned int code, int value) +static void evdev_events(struct input_handle *handle, + const struct input_value *vals, unsigned int count) { struct evdev *evdev = handle->private; struct evdev_client *client; - struct input_event event; ktime_t time_mono, time_real; time_mono = ktime_get(); time_real = ktime_sub(time_mono, ktime_get_monotonic_offset()); - event.type = type; - event.code = code; - event.value = value; - rcu_read_lock(); client = rcu_dereference(evdev->grab); if (client) - evdev_pass_event(client, &event, time_mono, time_real); + evdev_pass_values(client, vals, count, time_mono, time_real); else list_for_each_entry_rcu(client, &evdev->client_list, node) - evdev_pass_event(client, &event, time_mono, time_real); + evdev_pass_values(client, vals, count, + time_mono, time_real); rcu_read_unlock(); +} - if (type == EV_SYN && code == SYN_REPORT) - wake_up_interruptible(&evdev->wait); +/* + * Pass incoming event to all connected clients. + */ +static void evdev_event(struct input_handle *handle, + unsigned int type, unsigned int code, int value) +{ + struct input_value vals[] = { { type, code, value } }; + + evdev_events(handle, vals, 1); } static int evdev_fasync(int fd, struct file *file, int on) @@ -653,20 +678,22 @@ static int evdev_handle_mt_request(struct input_dev *dev, unsigned int size, int __user *ip) { - const struct input_mt_slot *mt = dev->mt; + const struct input_mt *mt = dev->mt; unsigned int code; int max_slots; int i; if (get_user(code, &ip[0])) return -EFAULT; - if (!input_is_mt_value(code)) + if (!mt || !input_is_mt_value(code)) return -EINVAL; max_slots = (size - sizeof(__u32)) / sizeof(__s32); - for (i = 0; i < dev->mtsize && i < max_slots; i++) - if (put_user(input_mt_get_value(&mt[i], code), &ip[1 + i])) + for (i = 0; i < mt->num_slots && i < max_slots; i++) { + int value = input_mt_get_value(&mt->slots[i], code); + if (put_user(value, &ip[1 + i])) return -EFAULT; + } return 0; } @@ -1048,6 +1075,7 @@ MODULE_DEVICE_TABLE(input, evdev_ids); static struct input_handler evdev_handler = { .event = evdev_event, + .events = evdev_events, .connect = evdev_connect, .disconnect = evdev_disconnect, .fops = &evdev_fops, diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c index 70a16c7da8c..c0ec7d42c3b 100644 --- a/drivers/input/input-mt.c +++ b/drivers/input/input-mt.c @@ -14,6 +14,14 @@ #define TRKID_SGN ((TRKID_MAX + 1) >> 1) +static void copy_abs(struct input_dev *dev, unsigned int dst, unsigned int src) +{ + if (dev->absinfo && test_bit(src, dev->absbit)) { + dev->absinfo[dst] = dev->absinfo[src]; + dev->absbit[BIT_WORD(dst)] |= BIT_MASK(dst); + } +} + /** * input_mt_init_slots() - initialize MT input slots * @dev: input device supporting MT events and finger tracking @@ -25,29 +33,63 @@ * May be called repeatedly. Returns -EINVAL if attempting to * reinitialize with a different number of slots. */ -int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots) +int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots, + unsigned int flags) { + struct input_mt *mt = dev->mt; int i; if (!num_slots) return 0; - if (dev->mt) - return dev->mtsize != num_slots ? -EINVAL : 0; + if (mt) + return mt->num_slots != num_slots ? -EINVAL : 0; - dev->mt = kcalloc(num_slots, sizeof(struct input_mt_slot), GFP_KERNEL); - if (!dev->mt) - return -ENOMEM; + mt = kzalloc(sizeof(*mt) + num_slots * sizeof(*mt->slots), GFP_KERNEL); + if (!mt) + goto err_mem; - dev->mtsize = num_slots; + mt->num_slots = num_slots; + mt->flags = flags; input_set_abs_params(dev, ABS_MT_SLOT, 0, num_slots - 1, 0, 0); input_set_abs_params(dev, ABS_MT_TRACKING_ID, 0, TRKID_MAX, 0, 0); - input_set_events_per_packet(dev, 6 * num_slots); + + if (flags & (INPUT_MT_POINTER | INPUT_MT_DIRECT)) { + __set_bit(EV_KEY, dev->evbit); + __set_bit(BTN_TOUCH, dev->keybit); + + copy_abs(dev, ABS_X, ABS_MT_POSITION_X); + copy_abs(dev, ABS_Y, ABS_MT_POSITION_Y); + copy_abs(dev, ABS_PRESSURE, ABS_MT_PRESSURE); + } + if (flags & INPUT_MT_POINTER) { + __set_bit(BTN_TOOL_FINGER, dev->keybit); + __set_bit(BTN_TOOL_DOUBLETAP, dev->keybit); + if (num_slots >= 3) + __set_bit(BTN_TOOL_TRIPLETAP, dev->keybit); + if (num_slots >= 4) + __set_bit(BTN_TOOL_QUADTAP, dev->keybit); + if (num_slots >= 5) + __set_bit(BTN_TOOL_QUINTTAP, dev->keybit); + __set_bit(INPUT_PROP_POINTER, dev->propbit); + } + if (flags & INPUT_MT_DIRECT) + __set_bit(INPUT_PROP_DIRECT, dev->propbit); + if (flags & INPUT_MT_TRACK) { + unsigned int n2 = num_slots * num_slots; + mt->red = kcalloc(n2, sizeof(*mt->red), GFP_KERNEL); + if (!mt->red) + goto err_mem; + } /* Mark slots as 'unused' */ for (i = 0; i < num_slots; i++) - input_mt_set_value(&dev->mt[i], ABS_MT_TRACKING_ID, -1); + input_mt_set_value(&mt->slots[i], ABS_MT_TRACKING_ID, -1); + dev->mt = mt; return 0; +err_mem: + kfree(mt); + return -ENOMEM; } EXPORT_SYMBOL(input_mt_init_slots); @@ -60,11 +102,11 @@ EXPORT_SYMBOL(input_mt_init_slots); */ void input_mt_destroy_slots(struct input_dev *dev) { - kfree(dev->mt); + if (dev->mt) { + kfree(dev->mt->red); + kfree(dev->mt); + } dev->mt = NULL; - dev->mtsize = 0; - dev->slot = 0; - dev->trkid = 0; } EXPORT_SYMBOL(input_mt_destroy_slots); @@ -83,18 +125,24 @@ EXPORT_SYMBOL(input_mt_destroy_slots); void input_mt_report_slot_state(struct input_dev *dev, unsigned int tool_type, bool active) { - struct input_mt_slot *mt; + struct input_mt *mt = dev->mt; + struct input_mt_slot *slot; int id; - if (!dev->mt || !active) { + if (!mt) + return; + + slot = &mt->slots[mt->slot]; + slot->frame = mt->frame; + + if (!active) { input_event(dev, EV_ABS, ABS_MT_TRACKING_ID, -1); return; } - mt = &dev->mt[dev->slot]; - id = input_mt_get_value(mt, ABS_MT_TRACKING_ID); - if (id < 0 || input_mt_get_value(mt, ABS_MT_TOOL_TYPE) != tool_type) - id = input_mt_new_trkid(dev); + id = input_mt_get_value(slot, ABS_MT_TRACKING_ID); + if (id < 0 || input_mt_get_value(slot, ABS_MT_TOOL_TYPE) != tool_type) + id = input_mt_new_trkid(mt); input_event(dev, EV_ABS, ABS_MT_TRACKING_ID, id); input_event(dev, EV_ABS, ABS_MT_TOOL_TYPE, tool_type); @@ -135,13 +183,19 @@ EXPORT_SYMBOL(input_mt_report_finger_count); */ void input_mt_report_pointer_emulation(struct input_dev *dev, bool use_count) { - struct input_mt_slot *oldest = NULL; - int oldid = dev->trkid; - int count = 0; - int i; + struct input_mt *mt = dev->mt; + struct input_mt_slot *oldest; + int oldid, count, i; + + if (!mt) + return; + + oldest = 0; + oldid = mt->trkid; + count = 0; - for (i = 0; i < dev->mtsize; ++i) { - struct input_mt_slot *ps = &dev->mt[i]; + for (i = 0; i < mt->num_slots; ++i) { + struct input_mt_slot *ps = &mt->slots[i]; int id = input_mt_get_value(ps, ABS_MT_TRACKING_ID); if (id < 0) @@ -160,13 +214,208 @@ void input_mt_report_pointer_emulation(struct input_dev *dev, bool use_count) if (oldest) { int x = input_mt_get_value(oldest, ABS_MT_POSITION_X); int y = input_mt_get_value(oldest, ABS_MT_POSITION_Y); - int p = input_mt_get_value(oldest, ABS_MT_PRESSURE); input_event(dev, EV_ABS, ABS_X, x); input_event(dev, EV_ABS, ABS_Y, y); - input_event(dev, EV_ABS, ABS_PRESSURE, p); + + if (test_bit(ABS_MT_PRESSURE, dev->absbit)) { + int p = input_mt_get_value(oldest, ABS_MT_PRESSURE); + input_event(dev, EV_ABS, ABS_PRESSURE, p); + } } else { - input_event(dev, EV_ABS, ABS_PRESSURE, 0); + if (test_bit(ABS_MT_PRESSURE, dev->absbit)) + input_event(dev, EV_ABS, ABS_PRESSURE, 0); } } EXPORT_SYMBOL(input_mt_report_pointer_emulation); + +/** + * input_mt_sync_frame() - synchronize mt frame + * @dev: input device with allocated MT slots + * + * Close the frame and prepare the internal state for a new one. + * Depending on the flags, marks unused slots as inactive and performs + * pointer emulation. + */ +void input_mt_sync_frame(struct input_dev *dev) +{ + struct input_mt *mt = dev->mt; + struct input_mt_slot *s; + + if (!mt) + return; + + if (mt->flags & INPUT_MT_DROP_UNUSED) { + for (s = mt->slots; s != mt->slots + mt->num_slots; s++) { + if (s->frame == mt->frame) + continue; + input_mt_slot(dev, s - mt->slots); + input_event(dev, EV_ABS, ABS_MT_TRACKING_ID, -1); + } + } + + input_mt_report_pointer_emulation(dev, (mt->flags & INPUT_MT_POINTER)); + + mt->frame++; +} +EXPORT_SYMBOL(input_mt_sync_frame); + +static int adjust_dual(int *begin, int step, int *end, int eq) +{ + int f, *p, s, c; + + if (begin == end) + return 0; + + f = *begin; + p = begin + step; + s = p == end ? f + 1 : *p; + + for (; p != end; p += step) + if (*p < f) + s = f, f = *p; + else if (*p < s) + s = *p; + + c = (f + s + 1) / 2; + if (c == 0 || (c > 0 && !eq)) + return 0; + if (s < 0) + c *= 2; + + for (p = begin; p != end; p += step) + *p -= c; + + return (c < s && s <= 0) || (f >= 0 && f < c); +} + +static void find_reduced_matrix(int *w, int nr, int nc, int nrc) +{ + int i, k, sum; + + for (k = 0; k < nrc; k++) { + for (i = 0; i < nr; i++) + adjust_dual(w + i, nr, w + i + nrc, nr <= nc); + sum = 0; + for (i = 0; i < nrc; i += nr) + sum += adjust_dual(w + i, 1, w + i + nr, nc <= nr); + if (!sum) + break; + } +} + +static int input_mt_set_matrix(struct input_mt *mt, + const struct input_mt_pos *pos, int num_pos) +{ + const struct input_mt_pos *p; + struct input_mt_slot *s; + int *w = mt->red; + int x, y; + + for (s = mt->slots; s != mt->slots + mt->num_slots; s++) { + if (!input_mt_is_active(s)) + continue; + x = input_mt_get_value(s, ABS_MT_POSITION_X); + y = input_mt_get_value(s, ABS_MT_POSITION_Y); + for (p = pos; p != pos + num_pos; p++) { + int dx = x - p->x, dy = y - p->y; + *w++ = dx * dx + dy * dy; + } + } + + return w - mt->red; +} + +static void input_mt_set_slots(struct input_mt *mt, + int *slots, int num_pos) +{ + struct input_mt_slot *s; + int *w = mt->red, *p; + + for (p = slots; p != slots + num_pos; p++) + *p = -1; + + for (s = mt->slots; s != mt->slots + mt->num_slots; s++) { + if (!input_mt_is_active(s)) + continue; + for (p = slots; p != slots + num_pos; p++) + if (*w++ < 0) + *p = s - mt->slots; + } + + for (s = mt->slots; s != mt->slots + mt->num_slots; s++) { + if (input_mt_is_active(s)) + continue; + for (p = slots; p != slots + num_pos; p++) + if (*p < 0) { + *p = s - mt->slots; + break; + } + } +} + +/** + * input_mt_assign_slots() - perform a best-match assignment + * @dev: input device with allocated MT slots + * @slots: the slot assignment to be filled + * @pos: the position array to match + * @num_pos: number of positions + * + * Performs a best match against the current contacts and returns + * the slot assignment list. New contacts are assigned to unused + * slots. + * + * Returns zero on success, or negative error in case of failure. + */ +int input_mt_assign_slots(struct input_dev *dev, int *slots, + const struct input_mt_pos *pos, int num_pos) +{ + struct input_mt *mt = dev->mt; + int nrc; + + if (!mt || !mt->red) + return -ENXIO; + if (num_pos > mt->num_slots) + return -EINVAL; + if (num_pos < 1) + return 0; + + nrc = input_mt_set_matrix(mt, pos, num_pos); + find_reduced_matrix(mt->red, num_pos, nrc / num_pos, nrc); + input_mt_set_slots(mt, slots, num_pos); + + return 0; +} +EXPORT_SYMBOL(input_mt_assign_slots); + +/** + * input_mt_get_slot_by_key() - return slot matching key + * @dev: input device with allocated MT slots + * @key: the key of the sought slot + * + * Returns the slot of the given key, if it exists, otherwise + * set the key on the first unused slot and return. + * + * If no available slot can be found, -1 is returned. + */ +int input_mt_get_slot_by_key(struct input_dev *dev, int key) +{ + struct input_mt *mt = dev->mt; + struct input_mt_slot *s; + + if (!mt) + return -1; + + for (s = mt->slots; s != mt->slots + mt->num_slots; s++) + if (input_mt_is_active(s) && s->key == key) + return s - mt->slots; + + for (s = mt->slots; s != mt->slots + mt->num_slots; s++) + if (!input_mt_is_active(s)) { + s->key = key; + return s - mt->slots; + } + + return -1; +} +EXPORT_SYMBOL(input_mt_get_slot_by_key); diff --git a/drivers/input/input.c b/drivers/input/input.c index 8921c6180c5..5244f3d05b1 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -47,6 +47,8 @@ static DEFINE_MUTEX(input_mutex); static struct input_handler *input_table[8]; +static const struct input_value input_value_sync = { EV_SYN, SYN_REPORT, 1 }; + static inline int is_event_supported(unsigned int code, unsigned long *bm, unsigned int max) { @@ -69,42 +71,102 @@ static int input_defuzz_abs_event(int value, int old_val, int fuzz) return value; } +static void input_start_autorepeat(struct input_dev *dev, int code) +{ + if (test_bit(EV_REP, dev->evbit) && + dev->rep[REP_PERIOD] && dev->rep[REP_DELAY] && + dev->timer.data) { + dev->repeat_key = code; + mod_timer(&dev->timer, + jiffies + msecs_to_jiffies(dev->rep[REP_DELAY])); + } +} + +static void input_stop_autorepeat(struct input_dev *dev) +{ + del_timer(&dev->timer); +} + /* * Pass event first through all filters and then, if event has not been * filtered out, through all open handles. This function is called with * dev->event_lock held and interrupts disabled. */ -static void input_pass_event(struct input_dev *dev, - unsigned int type, unsigned int code, int value) +static unsigned int input_to_handler(struct input_handle *handle, + struct input_value *vals, unsigned int count) +{ + struct input_handler *handler = handle->handler; + struct input_value *end = vals; + struct input_value *v; + + for (v = vals; v != vals + count; v++) { + if (handler->filter && + handler->filter(handle, v->type, v->code, v->value)) + continue; + if (end != v) + *end = *v; + end++; + } + + count = end - vals; + if (!count) + return 0; + + if (handler->events) + handler->events(handle, vals, count); + else if (handler->event) + for (v = vals; v != end; v++) + handler->event(handle, v->type, v->code, v->value); + + return count; +} + +/* + * Pass values first through all filters and then, if event has not been + * filtered out, through all open handles. This function is called with + * dev->event_lock held and interrupts disabled. + */ +static void input_pass_values(struct input_dev *dev, + struct input_value *vals, unsigned int count) { - struct input_handler *handler; struct input_handle *handle; + struct input_value *v; + + if (!count) + return; rcu_read_lock(); handle = rcu_dereference(dev->grab); - if (handle) - handle->handler->event(handle, type, code, value); - else { - bool filtered = false; - - list_for_each_entry_rcu(handle, &dev->h_list, d_node) { - if (!handle->open) - continue; + if (handle) { + count = input_to_handler(handle, vals, count); + } else { + list_for_each_entry_rcu(handle, &dev->h_list, d_node) + if (handle->open) + count = input_to_handler(handle, vals, count); + } - handler = handle->handler; - if (!handler->filter) { - if (filtered) - break; + rcu_read_unlock(); - handler->event(handle, type, code, value); + add_input_randomness(vals->type, vals->code, vals->value); - } else if (handler->filter(handle, type, code, value)) - filtered = true; + /* trigger auto repeat for key events */ + for (v = vals; v != vals + count; v++) { + if (v->type == EV_KEY && v->value != 2) { + if (v->value) + input_start_autorepeat(dev, v->code); + else + input_stop_autorepeat(dev); } } +} - rcu_read_unlock(); +static void input_pass_event(struct input_dev *dev, + unsigned int type, unsigned int code, int value) +{ + struct input_value vals[] = { { type, code, value } }; + + input_pass_values(dev, vals, ARRAY_SIZE(vals)); } /* @@ -121,18 +183,12 @@ static void input_repeat_key(unsigned long data) if (test_bit(dev->repeat_key, dev->key) && is_event_supported(dev->repeat_key, dev->keybit, KEY_MAX)) { + struct input_value vals[] = { + { EV_KEY, dev->repeat_key, 2 }, + input_value_sync + }; - input_pass_event(dev, EV_KEY, dev->repeat_key, 2); - - if (dev->sync) { - /* - * Only send SYN_REPORT if we are not in a middle - * of driver parsing a new hardware packet. - * Otherwise assume that the driver will send - * SYN_REPORT once it's done. - */ - input_pass_event(dev, EV_SYN, SYN_REPORT, 1); - } + input_pass_values(dev, vals, ARRAY_SIZE(vals)); if (dev->rep[REP_PERIOD]) mod_timer(&dev->timer, jiffies + @@ -142,30 +198,17 @@ static void input_repeat_key(unsigned long data) spin_unlock_irqrestore(&dev->event_lock, flags); } -static void input_start_autorepeat(struct input_dev *dev, int code) -{ - if (test_bit(EV_REP, dev->evbit) && - dev->rep[REP_PERIOD] && dev->rep[REP_DELAY] && - dev->timer.data) { - dev->repeat_key = code; - mod_timer(&dev->timer, - jiffies + msecs_to_jiffies(dev->rep[REP_DELAY])); - } -} - -static void input_stop_autorepeat(struct input_dev *dev) -{ - del_timer(&dev->timer); -} - #define INPUT_IGNORE_EVENT 0 #define INPUT_PASS_TO_HANDLERS 1 #define INPUT_PASS_TO_DEVICE 2 +#define INPUT_SLOT 4 +#define INPUT_FLUSH 8 #define INPUT_PASS_TO_ALL (INPUT_PASS_TO_HANDLERS | INPUT_PASS_TO_DEVICE) static int input_handle_abs_event(struct input_dev *dev, unsigned int code, int *pval) { + struct input_mt *mt = dev->mt; bool is_mt_event; int *pold; @@ -174,8 +217,8 @@ static int input_handle_abs_event(struct input_dev *dev, * "Stage" the event; we'll flush it later, when we * get actual touch data. */ - if (*pval >= 0 && *pval < dev->mtsize) - dev->slot = *pval; + if (mt && *pval >= 0 && *pval < mt->num_slots) + mt->slot = *pval; return INPUT_IGNORE_EVENT; } @@ -184,9 +227,8 @@ static int input_handle_abs_event(struct input_dev *dev, if (!is_mt_event) { pold = &dev->absinfo[code].value; - } else if (dev->mt) { - struct input_mt_slot *mtslot = &dev->mt[dev->slot]; - pold = &mtslot->abs[code - ABS_MT_FIRST]; + } else if (mt) { + pold = &mt->slots[mt->slot].abs[code - ABS_MT_FIRST]; } else { /* * Bypass filtering for multi-touch events when @@ -205,16 +247,16 @@ static int input_handle_abs_event(struct input_dev *dev, } /* Flush pending "slot" event */ - if (is_mt_event && dev->slot != input_abs_get_val(dev, ABS_MT_SLOT)) { - input_abs_set_val(dev, ABS_MT_SLOT, dev->slot); - input_pass_event(dev, EV_ABS, ABS_MT_SLOT, dev->slot); + if (is_mt_event && mt && mt->slot != input_abs_get_val(dev, ABS_MT_SLOT)) { + input_abs_set_val(dev, ABS_MT_SLOT, mt->slot); + return INPUT_PASS_TO_HANDLERS | INPUT_SLOT; } return INPUT_PASS_TO_HANDLERS; } -static void input_handle_event(struct input_dev *dev, - unsigned int type, unsigned int code, int value) +static int input_get_disposition(struct input_dev *dev, + unsigned int type, unsigned int code, int value) { int disposition = INPUT_IGNORE_EVENT; @@ -227,37 +269,34 @@ static void input_handle_event(struct input_dev *dev, break; case SYN_REPORT: - if (!dev->sync) { - dev->sync = true; - disposition = INPUT_PASS_TO_HANDLERS; - } + disposition = INPUT_PASS_TO_HANDLERS | INPUT_FLUSH; break; case SYN_MT_REPORT: - dev->sync = false; disposition = INPUT_PASS_TO_HANDLERS; break; } break; case EV_KEY: - if (is_event_supported(code, dev->keybit, KEY_MAX) && - !!test_bit(code, dev->key) != value) { + if (is_event_supported(code, dev->keybit, KEY_MAX)) { - if (value != 2) { - __change_bit(code, dev->key); - if (value) - input_start_autorepeat(dev, code); - else - input_stop_autorepeat(dev); + /* auto-repeat bypasses state updates */ + if (value == 2) { + disposition = INPUT_PASS_TO_HANDLERS; + break; } - disposition = INPUT_PASS_TO_HANDLERS; + if (!!test_bit(code, dev->key) != !!value) { + + __change_bit(code, dev->key); + disposition = INPUT_PASS_TO_HANDLERS; + } } break; case EV_SW: if (is_event_supported(code, dev->swbit, SW_MAX) && - !!test_bit(code, dev->sw) != value) { + !!test_bit(code, dev->sw) != !!value) { __change_bit(code, dev->sw); disposition = INPUT_PASS_TO_HANDLERS; @@ -284,7 +323,7 @@ static void input_handle_event(struct input_dev *dev, case EV_LED: if (is_event_supported(code, dev->ledbit, LED_MAX) && - !!test_bit(code, dev->led) != value) { + !!test_bit(code, dev->led) != !!value) { __change_bit(code, dev->led); disposition = INPUT_PASS_TO_ALL; @@ -317,14 +356,48 @@ static void input_handle_event(struct input_dev *dev, break; } - if (disposition != INPUT_IGNORE_EVENT && type != EV_SYN) - dev->sync = false; + return disposition; +} + +static void input_handle_event(struct input_dev *dev, + unsigned int type, unsigned int code, int value) +{ + int disposition; + + disposition = input_get_disposition(dev, type, code, value); if ((disposition & INPUT_PASS_TO_DEVICE) && dev->event) dev->event(dev, type, code, value); - if (disposition & INPUT_PASS_TO_HANDLERS) - input_pass_event(dev, type, code, value); + if (!dev->vals) + return; + + if (disposition & INPUT_PASS_TO_HANDLERS) { + struct input_value *v; + + if (disposition & INPUT_SLOT) { + v = &dev->vals[dev->num_vals++]; + v->type = EV_ABS; + v->code = ABS_MT_SLOT; + v->value = dev->mt->slot; + } + + v = &dev->vals[dev->num_vals++]; + v->type = type; + v->code = code; + v->value = value; + } + + if (disposition & INPUT_FLUSH) { + if (dev->num_vals >= 2) + input_pass_values(dev, dev->vals, dev->num_vals); + dev->num_vals = 0; + } else if (dev->num_vals >= dev->max_vals - 2) { + dev->vals[dev->num_vals++] = input_value_sync; + input_pass_values(dev, dev->vals, dev->num_vals); + dev->num_vals = 0; + } + } /** @@ -352,7 +425,6 @@ void input_event(struct input_dev *dev, if (is_event_supported(type, dev->evbit, EV_MAX)) { spin_lock_irqsave(&dev->event_lock, flags); - add_input_randomness(type, code, value); input_handle_event(dev, type, code, value); spin_unlock_irqrestore(&dev->event_lock, flags); } @@ -831,10 +903,12 @@ int input_set_keycode(struct input_dev *dev, if (test_bit(EV_KEY, dev->evbit) && !is_event_supported(old_keycode, dev->keybit, KEY_MAX) && __test_and_clear_bit(old_keycode, dev->key)) { + struct input_value vals[] = { + { EV_KEY, old_keycode, 0 }, + input_value_sync + }; - input_pass_event(dev, EV_KEY, old_keycode, 0); - if (dev->sync) - input_pass_event(dev, EV_SYN, SYN_REPORT, 1); + input_pass_values(dev, vals, ARRAY_SIZE(vals)); } out: @@ -1416,6 +1490,7 @@ static void input_dev_release(struct device *device) input_ff_destroy(dev); input_mt_destroy_slots(dev); kfree(dev->absinfo); + kfree(dev->vals); kfree(dev); module_put(THIS_MODULE); @@ -1751,8 +1826,8 @@ static unsigned int input_estimate_events_per_packet(struct input_dev *dev) int i; unsigned int events; - if (dev->mtsize) { - mt_slots = dev->mtsize; + if (dev->mt) { + mt_slots = dev->mt->num_slots; } else if (test_bit(ABS_MT_TRACKING_ID, dev->absbit)) { mt_slots = dev->absinfo[ABS_MT_TRACKING_ID].maximum - dev->absinfo[ABS_MT_TRACKING_ID].minimum + 1, @@ -1778,6 +1853,9 @@ static unsigned int input_estimate_events_per_packet(struct input_dev *dev) if (test_bit(i, dev->relbit)) events++; + /* Make room for KEY and MSC events */ + events += 7; + return events; } @@ -1816,6 +1894,7 @@ int input_register_device(struct input_dev *dev) { static atomic_t input_no = ATOMIC_INIT(0); struct input_handler *handler; + unsigned int packet_size; const char *path; int error; @@ -1828,9 +1907,14 @@ int input_register_device(struct input_dev *dev) /* Make sure that bitmasks not mentioned in dev->evbit are clean. */ input_cleanse_bitmasks(dev); - if (!dev->hint_events_per_packet) - dev->hint_events_per_packet = - input_estimate_events_per_packet(dev); + packet_size = input_estimate_events_per_packet(dev); + if (dev->hint_events_per_packet < packet_size) + dev->hint_events_per_packet = packet_size; + + dev->max_vals = max(dev->hint_events_per_packet, packet_size) + 2; + dev->vals = kcalloc(dev->max_vals, sizeof(*dev->vals), GFP_KERNEL); + if (!dev->vals) + return -ENOMEM; /* * If delay and period are pre-set by the driver, then autorepeating diff --git a/drivers/input/keyboard/imx_keypad.c b/drivers/input/keyboard/imx_keypad.c index ce68e361558..cdc252612c0 100644 --- a/drivers/input/keyboard/imx_keypad.c +++ b/drivers/input/keyboard/imx_keypad.c @@ -516,9 +516,9 @@ static int __devinit imx_keypad_probe(struct platform_device *pdev) input_set_drvdata(input_dev, keypad); /* Ensure that the keypad will stay dormant until opened */ - clk_enable(keypad->clk); + clk_prepare_enable(keypad->clk); imx_keypad_inhibit(keypad); - clk_disable(keypad->clk); + clk_disable_unprepare(keypad->clk); error = request_irq(irq, imx_keypad_irq_handler, 0, pdev->name, keypad); diff --git a/drivers/input/misc/ab8500-ponkey.c b/drivers/input/misc/ab8500-ponkey.c index f06231b7cab..84ec691c05a 100644 --- a/drivers/input/misc/ab8500-ponkey.c +++ b/drivers/input/misc/ab8500-ponkey.c @@ -74,8 +74,8 @@ static int __devinit ab8500_ponkey_probe(struct platform_device *pdev) ponkey->idev = input; ponkey->ab8500 = ab8500; - ponkey->irq_dbf = ab8500_irq_get_virq(ab8500, irq_dbf); - ponkey->irq_dbr = ab8500_irq_get_virq(ab8500, irq_dbr); + ponkey->irq_dbf = irq_dbf; + ponkey->irq_dbr = irq_dbr; input->name = "AB8500 POn(PowerOn) Key"; input->dev.parent = &pdev->dev; diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index 736056897e5..6b1797503e3 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -405,7 +405,7 @@ static int uinput_setup_device(struct uinput_device *udev, const char __user *bu goto exit; if (test_bit(ABS_MT_SLOT, dev->absbit)) { int nslot = input_abs_get_max(dev, ABS_MT_SLOT) + 1; - input_mt_init_slots(dev, nslot); + input_mt_init_slots(dev, nslot, 0); } else if (test_bit(ABS_MT_POSITION_X, dev->absbit)) { input_set_events_per_packet(dev, 60); } diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 4a1347e91bd..cf5af1f495e 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -1620,7 +1620,7 @@ int alps_init(struct psmouse *psmouse) case ALPS_PROTO_V3: case ALPS_PROTO_V4: set_bit(INPUT_PROP_SEMI_MT, dev1->propbit); - input_mt_init_slots(dev1, 2); + input_mt_init_slots(dev1, 2, 0); input_set_abs_params(dev1, ABS_MT_POSITION_X, 0, ALPS_V3_X_MAX, 0, 0); input_set_abs_params(dev1, ABS_MT_POSITION_Y, 0, ALPS_V3_Y_MAX, 0, 0); diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c index d528c23e194..3a78f235fa3 100644 --- a/drivers/input/mouse/bcm5974.c +++ b/drivers/input/mouse/bcm5974.c @@ -40,6 +40,7 @@ #include <linux/usb/input.h> #include <linux/hid.h> #include <linux/mutex.h> +#include <linux/input/mt.h> #define USB_VENDOR_ID_APPLE 0x05ac @@ -183,26 +184,26 @@ struct tp_finger { __le16 abs_y; /* absolute y coodinate */ __le16 rel_x; /* relative x coodinate */ __le16 rel_y; /* relative y coodinate */ - __le16 size_major; /* finger size, major axis? */ - __le16 size_minor; /* finger size, minor axis? */ + __le16 tool_major; /* tool area, major axis */ + __le16 tool_minor; /* tool area, minor axis */ __le16 orientation; /* 16384 when point, else 15 bit angle */ - __le16 force_major; /* trackpad force, major axis? */ - __le16 force_minor; /* trackpad force, minor axis? */ + __le16 touch_major; /* touch area, major axis */ + __le16 touch_minor; /* touch area, minor axis */ __le16 unused[3]; /* zeros */ __le16 multi; /* one finger: varies, more fingers: constant */ } __attribute__((packed,aligned(2))); /* trackpad finger data size, empirically at least ten fingers */ +#define MAX_FINGERS 16 #define SIZEOF_FINGER sizeof(struct tp_finger) -#define SIZEOF_ALL_FINGERS (16 * SIZEOF_FINGER) +#define SIZEOF_ALL_FINGERS (MAX_FINGERS * SIZEOF_FINGER) #define MAX_FINGER_ORIENTATION 16384 /* device-specific parameters */ struct bcm5974_param { - int dim; /* logical dimension */ - int fuzz; /* logical noise value */ - int devmin; /* device minimum reading */ - int devmax; /* device maximum reading */ + int snratio; /* signal-to-noise ratio */ + int min; /* device minimum reading */ + int max; /* device maximum reading */ }; /* device-specific configuration */ @@ -219,6 +220,7 @@ struct bcm5974_config { struct bcm5974_param w; /* finger width limits */ struct bcm5974_param x; /* horizontal limits */ struct bcm5974_param y; /* vertical limits */ + struct bcm5974_param o; /* orientation limits */ }; /* logical device structure */ @@ -234,23 +236,16 @@ struct bcm5974 { struct bt_data *bt_data; /* button transferred data */ struct urb *tp_urb; /* trackpad usb request block */ u8 *tp_data; /* trackpad transferred data */ - int fingers; /* number of fingers on trackpad */ + const struct tp_finger *index[MAX_FINGERS]; /* finger index data */ + struct input_mt_pos pos[MAX_FINGERS]; /* position array */ + int slots[MAX_FINGERS]; /* slot assignments */ }; -/* logical dimensions */ -#define DIM_PRESSURE 256 /* maximum finger pressure */ -#define DIM_WIDTH 16 /* maximum finger width */ -#define DIM_X 1280 /* maximum trackpad x value */ -#define DIM_Y 800 /* maximum trackpad y value */ - /* logical signal quality */ #define SN_PRESSURE 45 /* pressure signal-to-noise ratio */ -#define SN_WIDTH 100 /* width signal-to-noise ratio */ +#define SN_WIDTH 25 /* width signal-to-noise ratio */ #define SN_COORD 250 /* coordinate signal-to-noise ratio */ - -/* pressure thresholds */ -#define PRESSURE_LOW (2 * DIM_PRESSURE / SN_PRESSURE) -#define PRESSURE_HIGH (3 * PRESSURE_LOW) +#define SN_ORIENT 10 /* orientation signal-to-noise ratio */ /* device constants */ static const struct bcm5974_config bcm5974_config_table[] = { @@ -261,10 +256,11 @@ static const struct bcm5974_config bcm5974_config_table[] = { 0, 0x84, sizeof(struct bt_data), 0x81, TYPE1, FINGER_TYPE1, FINGER_TYPE1 + SIZEOF_ALL_FINGERS, - { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 256 }, - { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 }, - { DIM_X, DIM_X / SN_COORD, -4824, 5342 }, - { DIM_Y, DIM_Y / SN_COORD, -172, 5820 } + { SN_PRESSURE, 0, 256 }, + { SN_WIDTH, 0, 2048 }, + { SN_COORD, -4824, 5342 }, + { SN_COORD, -172, 5820 }, + { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING2_ANSI, @@ -273,10 +269,11 @@ static const struct bcm5974_config bcm5974_config_table[] = { 0, 0x84, sizeof(struct bt_data), 0x81, TYPE1, FINGER_TYPE1, FINGER_TYPE1 + SIZEOF_ALL_FINGERS, - { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 256 }, - { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 }, - { DIM_X, DIM_X / SN_COORD, -4824, 4824 }, - { DIM_Y, DIM_Y / SN_COORD, -172, 4290 } + { SN_PRESSURE, 0, 256 }, + { SN_WIDTH, 0, 2048 }, + { SN_COORD, -4824, 4824 }, + { SN_COORD, -172, 4290 }, + { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING3_ANSI, @@ -285,10 +282,11 @@ static const struct bcm5974_config bcm5974_config_table[] = { HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, - { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 }, - { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 }, - { DIM_X, DIM_X / SN_COORD, -4460, 5166 }, - { DIM_Y, DIM_Y / SN_COORD, -75, 6700 } + { SN_PRESSURE, 0, 300 }, + { SN_WIDTH, 0, 2048 }, + { SN_COORD, -4460, 5166 }, + { SN_COORD, -75, 6700 }, + { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING4_ANSI, @@ -297,10 +295,11 @@ static const struct bcm5974_config bcm5974_config_table[] = { HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, - { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 }, - { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 }, - { DIM_X, DIM_X / SN_COORD, -4620, 5140 }, - { DIM_Y, DIM_Y / SN_COORD, -150, 6600 } + { SN_PRESSURE, 0, 300 }, + { SN_WIDTH, 0, 2048 }, + { SN_COORD, -4620, 5140 }, + { SN_COORD, -150, 6600 }, + { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING4A_ANSI, @@ -309,10 +308,11 @@ static const struct bcm5974_config bcm5974_config_table[] = { HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, - { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 }, - { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 }, - { DIM_X, DIM_X / SN_COORD, -4616, 5112 }, - { DIM_Y, DIM_Y / SN_COORD, -142, 5234 } + { SN_PRESSURE, 0, 300 }, + { SN_WIDTH, 0, 2048 }, + { SN_COORD, -4616, 5112 }, + { SN_COORD, -142, 5234 }, + { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI, @@ -321,10 +321,11 @@ static const struct bcm5974_config bcm5974_config_table[] = { HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, - { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 }, - { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 }, - { DIM_X, DIM_X / SN_COORD, -4415, 5050 }, - { DIM_Y, DIM_Y / SN_COORD, -55, 6680 } + { SN_PRESSURE, 0, 300 }, + { SN_WIDTH, 0, 2048 }, + { SN_COORD, -4415, 5050 }, + { SN_COORD, -55, 6680 }, + { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI, @@ -333,10 +334,11 @@ static const struct bcm5974_config bcm5974_config_table[] = { HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, - { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 }, - { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 }, - { DIM_X, DIM_X / SN_COORD, -4620, 5140 }, - { DIM_Y, DIM_Y / SN_COORD, -150, 6600 } + { SN_PRESSURE, 0, 300 }, + { SN_WIDTH, 0, 2048 }, + { SN_COORD, -4620, 5140 }, + { SN_COORD, -150, 6600 }, + { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI, @@ -345,10 +347,11 @@ static const struct bcm5974_config bcm5974_config_table[] = { HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, - { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 }, - { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 }, - { DIM_X, DIM_X / SN_COORD, -4750, 5280 }, - { DIM_Y, DIM_Y / SN_COORD, -150, 6730 } + { SN_PRESSURE, 0, 300 }, + { SN_WIDTH, 0, 2048 }, + { SN_COORD, -4750, 5280 }, + { SN_COORD, -150, 6730 }, + { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI, @@ -357,10 +360,11 @@ static const struct bcm5974_config bcm5974_config_table[] = { HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, - { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 }, - { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 }, - { DIM_X, DIM_X / SN_COORD, -4620, 5140 }, - { DIM_Y, DIM_Y / SN_COORD, -150, 6600 } + { SN_PRESSURE, 0, 300 }, + { SN_WIDTH, 0, 2048 }, + { SN_COORD, -4620, 5140 }, + { SN_COORD, -150, 6600 }, + { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, { USB_DEVICE_ID_APPLE_WELLSPRING7_ANSI, @@ -369,10 +373,11 @@ static const struct bcm5974_config bcm5974_config_table[] = { HAS_INTEGRATED_BUTTON, 0x84, sizeof(struct bt_data), 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, - { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 }, - { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 }, - { DIM_X, DIM_X / SN_COORD, -4750, 5280 }, - { DIM_Y, DIM_Y / SN_COORD, -150, 6730 } + { SN_PRESSURE, 0, 300 }, + { SN_WIDTH, 0, 2048 }, + { SN_COORD, -4750, 5280 }, + { SN_COORD, -150, 6730 }, + { SN_ORIENT, -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION } }, {} }; @@ -396,18 +401,11 @@ static inline int raw2int(__le16 x) return (signed short)le16_to_cpu(x); } -/* scale device data to logical dimensions (asserts devmin < devmax) */ -static inline int int2scale(const struct bcm5974_param *p, int x) -{ - return x * p->dim / (p->devmax - p->devmin); -} - -/* all logical value ranges are [0,dim). */ -static inline int int2bound(const struct bcm5974_param *p, int x) +static void set_abs(struct input_dev *input, unsigned int code, + const struct bcm5974_param *p) { - int s = int2scale(p, x); - - return clamp_val(s, 0, p->dim - 1); + int fuzz = p->snratio ? (p->max - p->min) / p->snratio : 0; + input_set_abs_params(input, code, p->min, p->max, fuzz, 0); } /* setup which logical events to report */ @@ -416,48 +414,30 @@ static void setup_events_to_report(struct input_dev *input_dev, { __set_bit(EV_ABS, input_dev->evbit); - input_set_abs_params(input_dev, ABS_PRESSURE, - 0, cfg->p.dim, cfg->p.fuzz, 0); - input_set_abs_params(input_dev, ABS_TOOL_WIDTH, - 0, cfg->w.dim, cfg->w.fuzz, 0); - input_set_abs_params(input_dev, ABS_X, - 0, cfg->x.dim, cfg->x.fuzz, 0); - input_set_abs_params(input_dev, ABS_Y, - 0, cfg->y.dim, cfg->y.fuzz, 0); + /* for synaptics only */ + input_set_abs_params(input_dev, ABS_PRESSURE, 0, 256, 5, 0); + input_set_abs_params(input_dev, ABS_TOOL_WIDTH, 0, 16, 0, 0); /* finger touch area */ - input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, - cfg->w.devmin, cfg->w.devmax, 0, 0); - input_set_abs_params(input_dev, ABS_MT_TOUCH_MINOR, - cfg->w.devmin, cfg->w.devmax, 0, 0); + set_abs(input_dev, ABS_MT_TOUCH_MAJOR, &cfg->w); + set_abs(input_dev, ABS_MT_TOUCH_MINOR, &cfg->w); /* finger approach area */ - input_set_abs_params(input_dev, ABS_MT_WIDTH_MAJOR, - cfg->w.devmin, cfg->w.devmax, 0, 0); - input_set_abs_params(input_dev, ABS_MT_WIDTH_MINOR, - cfg->w.devmin, cfg->w.devmax, 0, 0); + set_abs(input_dev, ABS_MT_WIDTH_MAJOR, &cfg->w); + set_abs(input_dev, ABS_MT_WIDTH_MINOR, &cfg->w); /* finger orientation */ - input_set_abs_params(input_dev, ABS_MT_ORIENTATION, - -MAX_FINGER_ORIENTATION, - MAX_FINGER_ORIENTATION, 0, 0); + set_abs(input_dev, ABS_MT_ORIENTATION, &cfg->o); /* finger position */ - input_set_abs_params(input_dev, ABS_MT_POSITION_X, - cfg->x.devmin, cfg->x.devmax, 0, 0); - input_set_abs_params(input_dev, ABS_MT_POSITION_Y, - cfg->y.devmin, cfg->y.devmax, 0, 0); + set_abs(input_dev, ABS_MT_POSITION_X, &cfg->x); + set_abs(input_dev, ABS_MT_POSITION_Y, &cfg->y); __set_bit(EV_KEY, input_dev->evbit); - __set_bit(BTN_TOUCH, input_dev->keybit); - __set_bit(BTN_TOOL_FINGER, input_dev->keybit); - __set_bit(BTN_TOOL_DOUBLETAP, input_dev->keybit); - __set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit); - __set_bit(BTN_TOOL_QUADTAP, input_dev->keybit); __set_bit(BTN_LEFT, input_dev->keybit); - __set_bit(INPUT_PROP_POINTER, input_dev->propbit); if (cfg->caps & HAS_INTEGRATED_BUTTON) __set_bit(INPUT_PROP_BUTTONPAD, input_dev->propbit); - input_set_events_per_packet(input_dev, 60); + input_mt_init_slots(input_dev, MAX_FINGERS, + INPUT_MT_POINTER | INPUT_MT_DROP_UNUSED | INPUT_MT_TRACK); } /* report button data as logical button state */ @@ -477,24 +457,44 @@ static int report_bt_state(struct bcm5974 *dev, int size) return 0; } -static void report_finger_data(struct input_dev *input, - const struct bcm5974_config *cfg, +static void report_finger_data(struct input_dev *input, int slot, + const struct input_mt_pos *pos, const struct tp_finger *f) { + input_mt_slot(input, slot); + input_mt_report_slot_state(input, MT_TOOL_FINGER, true); + input_report_abs(input, ABS_MT_TOUCH_MAJOR, - raw2int(f->force_major) << 1); + raw2int(f->touch_major) << 1); input_report_abs(input, ABS_MT_TOUCH_MINOR, - raw2int(f->force_minor) << 1); + raw2int(f->touch_minor) << 1); input_report_abs(input, ABS_MT_WIDTH_MAJOR, - raw2int(f->size_major) << 1); + raw2int(f->tool_major) << 1); input_report_abs(input, ABS_MT_WIDTH_MINOR, - raw2int(f->size_minor) << 1); + raw2int(f->tool_minor) << 1); input_report_abs(input, ABS_MT_ORIENTATION, MAX_FINGER_ORIENTATION - raw2int(f->orientation)); - input_report_abs(input, ABS_MT_POSITION_X, raw2int(f->abs_x)); - input_report_abs(input, ABS_MT_POSITION_Y, - cfg->y.devmin + cfg->y.devmax - raw2int(f->abs_y)); - input_mt_sync(input); + input_report_abs(input, ABS_MT_POSITION_X, pos->x); + input_report_abs(input, ABS_MT_POSITION_Y, pos->y); +} + +static void report_synaptics_data(struct input_dev *input, + const struct bcm5974_config *cfg, + const struct tp_finger *f, int raw_n) +{ + int abs_p = 0, abs_w = 0; + + if (raw_n) { + int p = raw2int(f->touch_major); + int w = raw2int(f->tool_major); + if (p > 0 && raw2int(f->origin)) { + abs_p = clamp_val(256 * p / cfg->p.max, 0, 255); + abs_w = clamp_val(16 * w / cfg->w.max, 0, 15); + } + } + + input_report_abs(input, ABS_PRESSURE, abs_p); + input_report_abs(input, ABS_TOOL_WIDTH, abs_w); } /* report trackpad data as logical trackpad state */ @@ -503,9 +503,7 @@ static int report_tp_state(struct bcm5974 *dev, int size) const struct bcm5974_config *c = &dev->cfg; const struct tp_finger *f; struct input_dev *input = dev->input; - int raw_p, raw_w, raw_x, raw_y, raw_n, i; - int ptest, origin, ibt = 0, nmin = 0, nmax = 0; - int abs_p = 0, abs_w = 0, abs_x = 0, abs_y = 0; + int raw_n, i, n = 0; if (size < c->tp_offset || (size - c->tp_offset) % SIZEOF_FINGER != 0) return -EIO; @@ -514,76 +512,29 @@ static int report_tp_state(struct bcm5974 *dev, int size) f = (const struct tp_finger *)(dev->tp_data + c->tp_offset); raw_n = (size - c->tp_offset) / SIZEOF_FINGER; - /* always track the first finger; when detached, start over */ - if (raw_n) { - - /* report raw trackpad data */ - for (i = 0; i < raw_n; i++) - report_finger_data(input, c, &f[i]); - - raw_p = raw2int(f->force_major); - raw_w = raw2int(f->size_major); - raw_x = raw2int(f->abs_x); - raw_y = raw2int(f->abs_y); - - dprintk(9, - "bcm5974: " - "raw: p: %+05d w: %+05d x: %+05d y: %+05d n: %d\n", - raw_p, raw_w, raw_x, raw_y, raw_n); - - ptest = int2bound(&c->p, raw_p); - origin = raw2int(f->origin); - - /* while tracking finger still valid, count all fingers */ - if (ptest > PRESSURE_LOW && origin) { - abs_p = ptest; - abs_w = int2bound(&c->w, raw_w); - abs_x = int2bound(&c->x, raw_x - c->x.devmin); - abs_y = int2bound(&c->y, c->y.devmax - raw_y); - while (raw_n--) { - ptest = int2bound(&c->p, - raw2int(f->force_major)); - if (ptest > PRESSURE_LOW) - nmax++; - if (ptest > PRESSURE_HIGH) - nmin++; - f++; - } - } + for (i = 0; i < raw_n; i++) { + if (raw2int(f[i].touch_major) == 0) + continue; + dev->pos[n].x = raw2int(f[i].abs_x); + dev->pos[n].y = c->y.min + c->y.max - raw2int(f[i].abs_y); + dev->index[n++] = &f[i]; } - /* set the integrated button if applicable */ - if (c->tp_type == TYPE2) - ibt = raw2int(dev->tp_data[BUTTON_TYPE2]); - - if (dev->fingers < nmin) - dev->fingers = nmin; - if (dev->fingers > nmax) - dev->fingers = nmax; - - input_report_key(input, BTN_TOUCH, dev->fingers > 0); - input_report_key(input, BTN_TOOL_FINGER, dev->fingers == 1); - input_report_key(input, BTN_TOOL_DOUBLETAP, dev->fingers == 2); - input_report_key(input, BTN_TOOL_TRIPLETAP, dev->fingers == 3); - input_report_key(input, BTN_TOOL_QUADTAP, dev->fingers > 3); - - input_report_abs(input, ABS_PRESSURE, abs_p); - input_report_abs(input, ABS_TOOL_WIDTH, abs_w); + input_mt_assign_slots(input, dev->slots, dev->pos, n); - if (abs_p) { - input_report_abs(input, ABS_X, abs_x); - input_report_abs(input, ABS_Y, abs_y); + for (i = 0; i < n; i++) + report_finger_data(input, dev->slots[i], + &dev->pos[i], dev->index[i]); - dprintk(8, - "bcm5974: abs: p: %+05d w: %+05d x: %+05d y: %+05d " - "nmin: %d nmax: %d n: %d ibt: %d\n", abs_p, abs_w, - abs_x, abs_y, nmin, nmax, dev->fingers, ibt); + input_mt_sync_frame(input); - } + report_synaptics_data(input, c, f, raw_n); /* type 2 reports button events via ibt only */ - if (c->tp_type == TYPE2) + if (c->tp_type == TYPE2) { + int ibt = raw2int(dev->tp_data[BUTTON_TYPE2]); input_report_key(input, BTN_LEFT, ibt); + } input_sync(input); @@ -742,9 +693,11 @@ static int bcm5974_start_traffic(struct bcm5974 *dev) goto err_out; } - error = usb_submit_urb(dev->bt_urb, GFP_KERNEL); - if (error) - goto err_reset_mode; + if (dev->bt_urb) { + error = usb_submit_urb(dev->bt_urb, GFP_KERNEL); + if (error) + goto err_reset_mode; + } error = usb_submit_urb(dev->tp_urb, GFP_KERNEL); if (error) @@ -868,19 +821,23 @@ static int bcm5974_probe(struct usb_interface *iface, mutex_init(&dev->pm_mutex); /* setup urbs */ - dev->bt_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!dev->bt_urb) - goto err_free_devs; + if (cfg->tp_type == TYPE1) { + dev->bt_urb = usb_alloc_urb(0, GFP_KERNEL); + if (!dev->bt_urb) + goto err_free_devs; + } dev->tp_urb = usb_alloc_urb(0, GFP_KERNEL); if (!dev->tp_urb) goto err_free_bt_urb; - dev->bt_data = usb_alloc_coherent(dev->udev, + if (dev->bt_urb) { + dev->bt_data = usb_alloc_coherent(dev->udev, dev->cfg.bt_datalen, GFP_KERNEL, &dev->bt_urb->transfer_dma); - if (!dev->bt_data) - goto err_free_urb; + if (!dev->bt_data) + goto err_free_urb; + } dev->tp_data = usb_alloc_coherent(dev->udev, dev->cfg.tp_datalen, GFP_KERNEL, @@ -888,10 +845,11 @@ static int bcm5974_probe(struct usb_interface *iface, if (!dev->tp_data) goto err_free_bt_buffer; - usb_fill_int_urb(dev->bt_urb, udev, - usb_rcvintpipe(udev, cfg->bt_ep), - dev->bt_data, dev->cfg.bt_datalen, - bcm5974_irq_button, dev, 1); + if (dev->bt_urb) + usb_fill_int_urb(dev->bt_urb, udev, + usb_rcvintpipe(udev, cfg->bt_ep), + dev->bt_data, dev->cfg.bt_datalen, + bcm5974_irq_button, dev, 1); usb_fill_int_urb(dev->tp_urb, udev, usb_rcvintpipe(udev, cfg->tp_ep), @@ -929,8 +887,9 @@ err_free_buffer: usb_free_coherent(dev->udev, dev->cfg.tp_datalen, dev->tp_data, dev->tp_urb->transfer_dma); err_free_bt_buffer: - usb_free_coherent(dev->udev, dev->cfg.bt_datalen, - dev->bt_data, dev->bt_urb->transfer_dma); + if (dev->bt_urb) + usb_free_coherent(dev->udev, dev->cfg.bt_datalen, + dev->bt_data, dev->bt_urb->transfer_dma); err_free_urb: usb_free_urb(dev->tp_urb); err_free_bt_urb: @@ -951,8 +910,9 @@ static void bcm5974_disconnect(struct usb_interface *iface) input_unregister_device(dev->input); usb_free_coherent(dev->udev, dev->cfg.tp_datalen, dev->tp_data, dev->tp_urb->transfer_dma); - usb_free_coherent(dev->udev, dev->cfg.bt_datalen, - dev->bt_data, dev->bt_urb->transfer_dma); + if (dev->bt_urb) + usb_free_coherent(dev->udev, dev->cfg.bt_datalen, + dev->bt_data, dev->bt_urb->transfer_dma); usb_free_urb(dev->tp_urb); usb_free_urb(dev->bt_urb); kfree(dev); diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 479011004a1..1e8e42fb03a 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1004,7 +1004,7 @@ static int elantech_set_input_params(struct psmouse *psmouse) input_set_abs_params(dev, ABS_TOOL_WIDTH, ETP_WMIN_V2, ETP_WMAX_V2, 0, 0); } - input_mt_init_slots(dev, 2); + input_mt_init_slots(dev, 2, 0); input_set_abs_params(dev, ABS_MT_POSITION_X, x_min, x_max, 0, 0); input_set_abs_params(dev, ABS_MT_POSITION_Y, y_min, y_max, 0, 0); break; @@ -1035,7 +1035,7 @@ static int elantech_set_input_params(struct psmouse *psmouse) input_set_abs_params(dev, ABS_TOOL_WIDTH, ETP_WMIN_V2, ETP_WMAX_V2, 0, 0); /* Multitouch capable pad, up to 5 fingers. */ - input_mt_init_slots(dev, ETP_MAX_FINGERS); + input_mt_init_slots(dev, ETP_MAX_FINGERS, 0); input_set_abs_params(dev, ABS_MT_POSITION_X, x_min, x_max, 0, 0); input_set_abs_params(dev, ABS_MT_POSITION_Y, y_min, y_max, 0, 0); input_abs_set_res(dev, ABS_MT_POSITION_X, x_res); diff --git a/drivers/input/mouse/sentelic.c b/drivers/input/mouse/sentelic.c index 3f5649f1908..e582922bacf 100644 --- a/drivers/input/mouse/sentelic.c +++ b/drivers/input/mouse/sentelic.c @@ -721,6 +721,17 @@ static psmouse_ret_t fsp_process_byte(struct psmouse *psmouse) switch (psmouse->packet[0] >> FSP_PKT_TYPE_SHIFT) { case FSP_PKT_TYPE_ABS: + + if ((packet[0] == 0x48 || packet[0] == 0x49) && + packet[1] == 0 && packet[2] == 0) { + /* + * Ignore coordinate noise when finger leaving the + * surface, otherwise cursor may jump to upper-left + * corner. + */ + packet[3] &= 0xf0; + } + abs_x = GET_ABS_X(packet); abs_y = GET_ABS_Y(packet); @@ -960,7 +971,7 @@ static int fsp_set_input_params(struct psmouse *psmouse) input_set_abs_params(dev, ABS_X, 0, abs_x, 0, 0); input_set_abs_params(dev, ABS_Y, 0, abs_y, 0, 0); - input_mt_init_slots(dev, 2); + input_mt_init_slots(dev, 2, 0); input_set_abs_params(dev, ABS_MT_POSITION_X, 0, abs_x, 0, 0); input_set_abs_params(dev, ABS_MT_POSITION_Y, 0, abs_y, 0, 0); } diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 14eaecea2b7..37033ade79d 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -1232,7 +1232,7 @@ static void set_input_params(struct input_dev *dev, struct synaptics_data *priv) input_set_abs_params(dev, ABS_PRESSURE, 0, 255, 0, 0); if (SYN_CAP_IMAGE_SENSOR(priv->ext_cap_0c)) { - input_mt_init_slots(dev, 2); + input_mt_init_slots(dev, 2, 0); set_abs_position_params(dev, priv, ABS_MT_POSITION_X, ABS_MT_POSITION_Y); /* Image sensors can report per-contact pressure */ @@ -1244,7 +1244,7 @@ static void set_input_params(struct input_dev *dev, struct synaptics_data *priv) } else if (SYN_CAP_ADV_GESTURE(priv->ext_cap_0c)) { /* Non-image sensors with AGM use semi-mt */ __set_bit(INPUT_PROP_SEMI_MT, dev->propbit); - input_mt_init_slots(dev, 2); + input_mt_init_slots(dev, 2, 0); set_abs_position_params(dev, priv, ABS_MT_POSITION_X, ABS_MT_POSITION_Y); } diff --git a/drivers/input/serio/ambakmi.c b/drivers/input/serio/ambakmi.c index 2ffd110bd5b..2e77246c2e5 100644 --- a/drivers/input/serio/ambakmi.c +++ b/drivers/input/serio/ambakmi.c @@ -72,7 +72,7 @@ static int amba_kmi_open(struct serio *io) unsigned int divisor; int ret; - ret = clk_enable(kmi->clk); + ret = clk_prepare_enable(kmi->clk); if (ret) goto out; @@ -92,7 +92,7 @@ static int amba_kmi_open(struct serio *io) return 0; clk_disable: - clk_disable(kmi->clk); + clk_disable_unprepare(kmi->clk); out: return ret; } @@ -104,7 +104,7 @@ static void amba_kmi_close(struct serio *io) writeb(0, KMICR); free_irq(kmi->irq, kmi); - clk_disable(kmi->clk); + clk_disable_unprepare(kmi->clk); } static int __devinit amba_kmi_probe(struct amba_device *dev, diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 6918773ce02..d6cc77a53c7 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -335,6 +335,12 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = { }, { .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_NAME, "SATELLITE C850D"), + }, + }, + { + .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ALIENWARE"), DMI_MATCH(DMI_PRODUCT_NAME, "Sentia"), }, diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index 532d067a9e0..2a81ce375f7 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -1530,7 +1530,7 @@ int wacom_setup_input_capabilities(struct input_dev *input_dev, __set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit); __set_bit(BTN_TOOL_QUADTAP, input_dev->keybit); - input_mt_init_slots(input_dev, features->touch_max); + input_mt_init_slots(input_dev, features->touch_max, 0); input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, 0, 255, 0, 0); @@ -1575,7 +1575,7 @@ int wacom_setup_input_capabilities(struct input_dev *input_dev, case TABLETPC2FG: if (features->device_type == BTN_TOOL_FINGER) { - input_mt_init_slots(input_dev, features->touch_max); + input_mt_init_slots(input_dev, features->touch_max, 0); input_set_abs_params(input_dev, ABS_MT_TOOL_TYPE, 0, MT_TOOL_MAX, 0, 0); input_set_abs_params(input_dev, ABS_MT_POSITION_X, @@ -1631,7 +1631,7 @@ int wacom_setup_input_capabilities(struct input_dev *input_dev, __set_bit(BTN_TOOL_FINGER, input_dev->keybit); __set_bit(BTN_TOOL_DOUBLETAP, input_dev->keybit); - input_mt_init_slots(input_dev, features->touch_max); + input_mt_init_slots(input_dev, features->touch_max, 0); if (features->pktlen == WACOM_PKGLEN_BBTOUCH3) { __set_bit(BTN_TOOL_TRIPLETAP, diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 4623cc69fc6..e92615d0b1b 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -1152,7 +1152,7 @@ static int __devinit mxt_probe(struct i2c_client *client, /* For multi touch */ num_mt_slots = data->T9_reportid_max - data->T9_reportid_min + 1; - error = input_mt_init_slots(input_dev, num_mt_slots); + error = input_mt_init_slots(input_dev, num_mt_slots, 0); if (error) goto err_free_object; input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, diff --git a/drivers/input/touchscreen/cyttsp_core.c b/drivers/input/touchscreen/cyttsp_core.c index f030d9ec795..8e60437ac85 100644 --- a/drivers/input/touchscreen/cyttsp_core.c +++ b/drivers/input/touchscreen/cyttsp_core.c @@ -571,7 +571,7 @@ struct cyttsp *cyttsp_probe(const struct cyttsp_bus_ops *bus_ops, input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, 0, CY_MAXZ, 0, 0); - input_mt_init_slots(input_dev, CY_MAX_ID); + input_mt_init_slots(input_dev, CY_MAX_ID, 0); error = request_threaded_irq(ts->irq, NULL, cyttsp_irq, IRQF_TRIGGER_FALLING | IRQF_ONESHOT, diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index b06a5e3a665..099d144ab7c 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -566,9 +566,12 @@ static ssize_t edt_ft5x06_debugfs_raw_data_read(struct file *file, } read = min_t(size_t, count, tsdata->raw_bufsize - *off); - error = copy_to_user(buf, tsdata->raw_buffer + *off, read); - if (!error) - *off += read; + if (copy_to_user(buf, tsdata->raw_buffer + *off, read)) { + error = -EFAULT; + goto out; + } + + *off += read; out: mutex_unlock(&tsdata->mutex); return error ?: read; @@ -779,7 +782,7 @@ static int __devinit edt_ft5x06_ts_probe(struct i2c_client *client, 0, tsdata->num_x * 64 - 1, 0, 0); input_set_abs_params(input, ABS_MT_POSITION_Y, 0, tsdata->num_y * 64 - 1, 0, 0); - error = input_mt_init_slots(input, MAX_SUPPORT_POINTS); + error = input_mt_init_slots(input, MAX_SUPPORT_POINTS, 0); if (error) { dev_err(&client->dev, "Unable to init MT slots.\n"); goto err_free_mem; diff --git a/drivers/input/touchscreen/egalax_ts.c b/drivers/input/touchscreen/egalax_ts.c index 70524dd34f4..c1e3460f119 100644 --- a/drivers/input/touchscreen/egalax_ts.c +++ b/drivers/input/touchscreen/egalax_ts.c @@ -204,7 +204,7 @@ static int __devinit egalax_ts_probe(struct i2c_client *client, ABS_MT_POSITION_X, 0, EGALAX_MAX_X, 0, 0); input_set_abs_params(input_dev, ABS_MT_POSITION_X, 0, EGALAX_MAX_Y, 0, 0); - input_mt_init_slots(input_dev, MAX_SUPPORT_POINTS); + input_mt_init_slots(input_dev, MAX_SUPPORT_POINTS, 0); input_set_drvdata(input_dev, ts); diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index c0044175a92..4ac69760ec0 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -252,7 +252,7 @@ static int __devinit ili210x_i2c_probe(struct i2c_client *client, input_set_abs_params(input, ABS_Y, 0, ymax, 0, 0); /* Multi touch */ - input_mt_init_slots(input, MAX_TOUCHES); + input_mt_init_slots(input, MAX_TOUCHES, 0); input_set_abs_params(input, ABS_MT_POSITION_X, 0, xmax, 0, 0); input_set_abs_params(input, ABS_MT_POSITION_Y, 0, ymax, 0, 0); diff --git a/drivers/input/touchscreen/mms114.c b/drivers/input/touchscreen/mms114.c index 49c44bbf548..560cf09d1c5 100644 --- a/drivers/input/touchscreen/mms114.c +++ b/drivers/input/touchscreen/mms114.c @@ -404,7 +404,7 @@ static int __devinit mms114_probe(struct i2c_client *client, input_set_abs_params(input_dev, ABS_Y, 0, data->pdata->y_size, 0, 0); /* For multi touch */ - input_mt_init_slots(input_dev, MMS114_MAX_TOUCH); + input_mt_init_slots(input_dev, MMS114_MAX_TOUCH, 0); input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, 0, MMS114_MAX_AREA, 0, 0); input_set_abs_params(input_dev, ABS_MT_POSITION_X, diff --git a/drivers/input/touchscreen/penmount.c b/drivers/input/touchscreen/penmount.c index 4ccde45b9da..b49f0b83692 100644 --- a/drivers/input/touchscreen/penmount.c +++ b/drivers/input/touchscreen/penmount.c @@ -264,7 +264,7 @@ static int pm_connect(struct serio *serio, struct serio_driver *drv) input_set_abs_params(pm->dev, ABS_Y, 0, max_y, 0, 0); if (pm->maxcontacts > 1) { - input_mt_init_slots(pm->dev, pm->maxcontacts); + input_mt_init_slots(pm->dev, pm->maxcontacts, 0); input_set_abs_params(pm->dev, ABS_MT_POSITION_X, 0, max_x, 0, 0); input_set_abs_params(pm->dev, diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c index e32709e0dd6..721fdb3597c 100644 --- a/drivers/input/touchscreen/usbtouchscreen.c +++ b/drivers/input/touchscreen/usbtouchscreen.c @@ -304,6 +304,45 @@ static int e2i_read_data(struct usbtouch_usb *dev, unsigned char *pkt) #define EGALAX_PKT_TYPE_REPT 0x80 #define EGALAX_PKT_TYPE_DIAG 0x0A +static int egalax_init(struct usbtouch_usb *usbtouch) +{ + int ret, i; + unsigned char *buf; + struct usb_device *udev = interface_to_usbdev(usbtouch->interface); + + /* + * An eGalax diagnostic packet kicks the device into using the right + * protocol. We send a "check active" packet. The response will be + * read later and ignored. + */ + + buf = kmalloc(3, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf[0] = EGALAX_PKT_TYPE_DIAG; + buf[1] = 1; /* length */ + buf[2] = 'A'; /* command - check active */ + + for (i = 0; i < 3; i++) { + ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), + 0, + USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + 0, 0, buf, 3, + USB_CTRL_SET_TIMEOUT); + if (ret >= 0) { + ret = 0; + break; + } + if (ret != -EPIPE) + break; + } + + kfree(buf); + + return ret; +} + static int egalax_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { if ((pkt[0] & EGALAX_PKT_TYPE_MASK) != EGALAX_PKT_TYPE_REPT) @@ -1056,6 +1095,7 @@ static struct usbtouch_device_info usbtouch_dev_info[] = { .process_pkt = usbtouch_process_multi, .get_pkt_len = egalax_get_pkt_len, .read_data = egalax_read_data, + .init = egalax_init, }, #endif diff --git a/drivers/input/touchscreen/wacom_w8001.c b/drivers/input/touchscreen/wacom_w8001.c index 8f9ad2f893b..9a83be6b658 100644 --- a/drivers/input/touchscreen/wacom_w8001.c +++ b/drivers/input/touchscreen/wacom_w8001.c @@ -471,7 +471,7 @@ static int w8001_setup(struct w8001 *w8001) case 5: w8001->pktlen = W8001_PKTLEN_TOUCH2FG; - input_mt_init_slots(dev, 2); + input_mt_init_slots(dev, 2, 0); input_set_abs_params(dev, ABS_MT_POSITION_X, 0, touch.x, 0, 0); input_set_abs_params(dev, ABS_MT_POSITION_Y, diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b64502dfa9f..e89daf1b21b 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -266,7 +266,7 @@ static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to) static int iommu_init_device(struct device *dev) { - struct pci_dev *dma_pdev, *pdev = to_pci_dev(dev); + struct pci_dev *dma_pdev = NULL, *pdev = to_pci_dev(dev); struct iommu_dev_data *dev_data; struct iommu_group *group; u16 alias; @@ -293,7 +293,9 @@ static int iommu_init_device(struct device *dev) dev_data->alias_data = alias_data; dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff); - } else + } + + if (dma_pdev == NULL) dma_pdev = pci_dev_get(pdev); /* Account for quirked devices */ diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index d8abb90a6c2..034233eefc8 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1555,6 +1555,7 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long arg) { struct multipath *m = ti->private; + struct pgpath *pgpath; struct block_device *bdev; fmode_t mode; unsigned long flags; @@ -1570,12 +1571,14 @@ again: if (!m->current_pgpath) __choose_pgpath(m, 0); - if (m->current_pgpath) { - bdev = m->current_pgpath->path.dev->bdev; - mode = m->current_pgpath->path.dev->mode; + pgpath = m->current_pgpath; + + if (pgpath) { + bdev = pgpath->path.dev->bdev; + mode = pgpath->path.dev->mode; } - if (m->queue_io) + if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path)) r = -EAGAIN; else if (!bdev) r = -EIO; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index f90069029aa..100368eb799 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1212,6 +1212,41 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) return &t->targets[(KEYS_PER_NODE * n) + k]; } +static int count_device(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + unsigned *num_devices = data; + + (*num_devices)++; + + return 0; +} + +/* + * Check whether a table has no data devices attached using each + * target's iterate_devices method. + * Returns false if the result is unknown because a target doesn't + * support iterate_devices. + */ +bool dm_table_has_no_data_devices(struct dm_table *table) +{ + struct dm_target *uninitialized_var(ti); + unsigned i = 0, num_devices = 0; + + while (i < dm_table_get_num_targets(table)) { + ti = dm_table_get_target(table, i++); + + if (!ti->type->iterate_devices) + return false; + + ti->type->iterate_devices(ti, count_device, &num_devices); + if (num_devices) + return false; + } + + return true; +} + /* * Establish the new table's queue_limits and validate them. */ @@ -1354,17 +1389,25 @@ static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev, return q && blk_queue_nonrot(q); } -static bool dm_table_is_nonrot(struct dm_table *t) +static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && !blk_queue_add_random(q); +} + +static bool dm_table_all_devices_attribute(struct dm_table *t, + iterate_devices_callout_fn func) { struct dm_target *ti; unsigned i = 0; - /* Ensure that all underlying device are non-rotational. */ while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); if (!ti->type->iterate_devices || - !ti->type->iterate_devices(ti, device_is_nonrot, NULL)) + !ti->type->iterate_devices(ti, func, NULL)) return 0; } @@ -1396,7 +1439,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (!dm_table_discard_zeroes_data(t)) q->limits.discard_zeroes_data = 0; - if (dm_table_is_nonrot(t)) + /* Ensure that all underlying devices are non-rotational. */ + if (dm_table_all_devices_attribute(t, device_is_nonrot)) queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); else queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q); @@ -1404,6 +1448,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, dm_table_set_integrity(t); /* + * Determine whether or not this queue's I/O timings contribute + * to the entropy pool, Only request-based targets use this. + * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not + * have it set. + */ + if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random)) + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q); + + /* * QUEUE_FLAG_STACKABLE must be set after all queue settings are * visible to other CPUs because, once the flag is set, incoming bios * are processed by request-based dm, which refers to the queue diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index af1fc3b2c2a..c29410af1e2 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -509,9 +509,9 @@ enum pool_mode { struct pool_features { enum pool_mode mode; - unsigned zero_new_blocks:1; - unsigned discard_enabled:1; - unsigned discard_passdown:1; + bool zero_new_blocks:1; + bool discard_enabled:1; + bool discard_passdown:1; }; struct thin_c; @@ -580,7 +580,8 @@ struct pool_c { struct dm_target_callbacks callbacks; dm_block_t low_water_blocks; - struct pool_features pf; + struct pool_features requested_pf; /* Features requested during table load */ + struct pool_features adjusted_pf; /* Features used after adjusting for constituent devices */ }; /* @@ -1839,6 +1840,47 @@ static void __requeue_bios(struct pool *pool) /*---------------------------------------------------------------- * Binding of control targets to a pool object *--------------------------------------------------------------*/ +static bool data_dev_supports_discard(struct pool_c *pt) +{ + struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); + + return q && blk_queue_discard(q); +} + +/* + * If discard_passdown was enabled verify that the data device + * supports discards. Disable discard_passdown if not. + */ +static void disable_passdown_if_not_supported(struct pool_c *pt) +{ + struct pool *pool = pt->pool; + struct block_device *data_bdev = pt->data_dev->bdev; + struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits; + sector_t block_size = pool->sectors_per_block << SECTOR_SHIFT; + const char *reason = NULL; + char buf[BDEVNAME_SIZE]; + + if (!pt->adjusted_pf.discard_passdown) + return; + + if (!data_dev_supports_discard(pt)) + reason = "discard unsupported"; + + else if (data_limits->max_discard_sectors < pool->sectors_per_block) + reason = "max discard sectors smaller than a block"; + + else if (data_limits->discard_granularity > block_size) + reason = "discard granularity larger than a block"; + + else if (block_size & (data_limits->discard_granularity - 1)) + reason = "discard granularity not a factor of block size"; + + if (reason) { + DMWARN("Data device (%s) %s: Disabling discard passdown.", bdevname(data_bdev, buf), reason); + pt->adjusted_pf.discard_passdown = false; + } +} + static int bind_control_target(struct pool *pool, struct dm_target *ti) { struct pool_c *pt = ti->private; @@ -1847,31 +1889,16 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) * We want to make sure that degraded pools are never upgraded. */ enum pool_mode old_mode = pool->pf.mode; - enum pool_mode new_mode = pt->pf.mode; + enum pool_mode new_mode = pt->adjusted_pf.mode; if (old_mode > new_mode) new_mode = old_mode; pool->ti = ti; pool->low_water_blocks = pt->low_water_blocks; - pool->pf = pt->pf; - set_pool_mode(pool, new_mode); + pool->pf = pt->adjusted_pf; - /* - * If discard_passdown was enabled verify that the data device - * supports discards. Disable discard_passdown if not; otherwise - * -EOPNOTSUPP will be returned. - */ - /* FIXME: pull this out into a sep fn. */ - if (pt->pf.discard_passdown) { - struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); - if (!q || !blk_queue_discard(q)) { - char buf[BDEVNAME_SIZE]; - DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.", - bdevname(pt->data_dev->bdev, buf)); - pool->pf.discard_passdown = 0; - } - } + set_pool_mode(pool, new_mode); return 0; } @@ -1889,9 +1916,9 @@ static void unbind_control_target(struct pool *pool, struct dm_target *ti) static void pool_features_init(struct pool_features *pf) { pf->mode = PM_WRITE; - pf->zero_new_blocks = 1; - pf->discard_enabled = 1; - pf->discard_passdown = 1; + pf->zero_new_blocks = true; + pf->discard_enabled = true; + pf->discard_passdown = true; } static void __pool_destroy(struct pool *pool) @@ -2119,13 +2146,13 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, argc--; if (!strcasecmp(arg_name, "skip_block_zeroing")) - pf->zero_new_blocks = 0; + pf->zero_new_blocks = false; else if (!strcasecmp(arg_name, "ignore_discard")) - pf->discard_enabled = 0; + pf->discard_enabled = false; else if (!strcasecmp(arg_name, "no_discard_passdown")) - pf->discard_passdown = 0; + pf->discard_passdown = false; else if (!strcasecmp(arg_name, "read_only")) pf->mode = PM_READ_ONLY; @@ -2259,8 +2286,9 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) pt->metadata_dev = metadata_dev; pt->data_dev = data_dev; pt->low_water_blocks = low_water_blocks; - pt->pf = pf; + pt->adjusted_pf = pt->requested_pf = pf; ti->num_flush_requests = 1; + /* * Only need to enable discards if the pool should pass * them down to the data device. The thin device's discard @@ -2268,12 +2296,14 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) */ if (pf.discard_enabled && pf.discard_passdown) { ti->num_discard_requests = 1; + /* * Setting 'discards_supported' circumvents the normal * stacking of discard limits (this keeps the pool and * thin devices' discard limits consistent). */ ti->discards_supported = true; + ti->discard_zeroes_data_unsupported = true; } ti->private = pt; @@ -2703,7 +2733,7 @@ static int pool_status(struct dm_target *ti, status_type_t type, format_dev_t(buf2, pt->data_dev->bdev->bd_dev), (unsigned long)pool->sectors_per_block, (unsigned long long)pt->low_water_blocks); - emit_flags(&pt->pf, result, sz, maxlen); + emit_flags(&pt->requested_pf, result, sz, maxlen); break; } @@ -2732,20 +2762,21 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm, return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } -static void set_discard_limits(struct pool *pool, struct queue_limits *limits) +static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits) { - /* - * FIXME: these limits may be incompatible with the pool's data device - */ + struct pool *pool = pt->pool; + struct queue_limits *data_limits; + limits->max_discard_sectors = pool->sectors_per_block; /* - * This is just a hint, and not enforced. We have to cope with - * bios that cover a block partially. A discard that spans a block - * boundary is not sent to this target. + * discard_granularity is just a hint, and not enforced. */ - limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; - limits->discard_zeroes_data = pool->pf.zero_new_blocks; + if (pt->adjusted_pf.discard_passdown) { + data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits; + limits->discard_granularity = data_limits->discard_granularity; + } else + limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; } static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) @@ -2755,15 +2786,25 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_min(limits, 0); blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); - if (pool->pf.discard_enabled) - set_discard_limits(pool, limits); + + /* + * pt->adjusted_pf is a staging area for the actual features to use. + * They get transferred to the live pool in bind_control_target() + * called from pool_preresume(). + */ + if (!pt->adjusted_pf.discard_enabled) + return; + + disable_passdown_if_not_supported(pt); + + set_discard_limits(pt, limits); } static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 3, 0}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -3042,19 +3083,19 @@ static int thin_iterate_devices(struct dm_target *ti, return 0; } +/* + * A thin device always inherits its queue limits from its pool. + */ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct thin_c *tc = ti->private; - struct pool *pool = tc->pool; - blk_limits_io_min(limits, 0); - blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); - set_discard_limits(pool, limits); + *limits = bdev_get_queue(tc->pool_dev->bdev)->limits; } static struct target_type thin_target = { .name = "thin", - .version = {1, 3, 0}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 254d19268ad..892ae2766aa 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -718,8 +718,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) v->hash_dev_block_bits = ffs(num) - 1; if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 || - num_ll << (v->data_dev_block_bits - SECTOR_SHIFT) != - (sector_t)num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) { + (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) + >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) { ti->error = "Invalid data blocks"; r = -EINVAL; goto bad; @@ -733,8 +733,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) } if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 || - num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT) != - (sector_t)num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) { + (sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) + >> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) { ti->error = "Invalid hash start"; r = -EINVAL; goto bad; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4e09b6ff5b4..67ffa391edc 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -865,10 +865,14 @@ static void dm_done(struct request *clone, int error, bool mapped) { int r = error; struct dm_rq_target_io *tio = clone->end_io_data; - dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io; + dm_request_endio_fn rq_end_io = NULL; - if (mapped && rq_end_io) - r = rq_end_io(tio->ti, clone, error, &tio->info); + if (tio->ti) { + rq_end_io = tio->ti->type->rq_end_io; + + if (mapped && rq_end_io) + r = rq_end_io(tio->ti, clone, error, &tio->info); + } if (r <= 0) /* The target wants to complete the I/O */ @@ -1588,15 +1592,6 @@ static int map_request(struct dm_target *ti, struct request *clone, int r, requeued = 0; struct dm_rq_target_io *tio = clone->end_io_data; - /* - * Hold the md reference here for the in-flight I/O. - * We can't rely on the reference count by device opener, - * because the device may be closed during the request completion - * when all bios are completed. - * See the comment in rq_completed() too. - */ - dm_get(md); - tio->ti = ti; r = ti->type->map_rq(ti, clone, &tio->info); switch (r) { @@ -1628,6 +1623,26 @@ static int map_request(struct dm_target *ti, struct request *clone, return requeued; } +static struct request *dm_start_request(struct mapped_device *md, struct request *orig) +{ + struct request *clone; + + blk_start_request(orig); + clone = orig->special; + atomic_inc(&md->pending[rq_data_dir(clone)]); + + /* + * Hold the md reference here for the in-flight I/O. + * We can't rely on the reference count by device opener, + * because the device may be closed during the request completion + * when all bios are completed. + * See the comment in rq_completed() too. + */ + dm_get(md); + + return clone; +} + /* * q->request_fn for request-based dm. * Called with the queue lock held. @@ -1657,14 +1672,21 @@ static void dm_request_fn(struct request_queue *q) pos = blk_rq_pos(rq); ti = dm_table_find_target(map, pos); - BUG_ON(!dm_target_is_valid(ti)); + if (!dm_target_is_valid(ti)) { + /* + * Must perform setup, that dm_done() requires, + * before calling dm_kill_unmapped_request + */ + DMERR_LIMIT("request attempted access beyond the end of device"); + clone = dm_start_request(md, rq); + dm_kill_unmapped_request(clone, -EIO); + continue; + } if (ti->type->busy && ti->type->busy(ti)) goto delay_and_out; - blk_start_request(rq); - clone = rq->special; - atomic_inc(&md->pending[rq_data_dir(clone)]); + clone = dm_start_request(md, rq); spin_unlock(q->queue_lock); if (map_request(ti, clone, md)) @@ -1684,8 +1706,6 @@ delay_and_out: blk_delay_queue(q, HZ / 10); out: dm_table_put(map); - - return; } int dm_underlying_device_busy(struct request_queue *q) @@ -2409,7 +2429,7 @@ static void dm_queue_flush(struct mapped_device *md) */ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) { - struct dm_table *map = ERR_PTR(-EINVAL); + struct dm_table *live_map, *map = ERR_PTR(-EINVAL); struct queue_limits limits; int r; @@ -2419,6 +2439,19 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) if (!dm_suspended_md(md)) goto out; + /* + * If the new table has no data devices, retain the existing limits. + * This helps multipath with queue_if_no_path if all paths disappear, + * then new I/O is queued based on these limits, and then some paths + * reappear. + */ + if (dm_table_has_no_data_devices(table)) { + live_map = dm_get_live_table(md); + if (live_map) + limits = md->queue->limits; + dm_table_put(live_map); + } + r = dm_calculate_queue_limits(table, &limits); if (r) { map = ERR_PTR(r); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 52eef493d26..6a99fefaa74 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -54,6 +54,7 @@ void dm_table_event_callback(struct dm_table *t, void (*fn)(void *), void *context); struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); +bool dm_table_has_no_data_devices(struct dm_table *table); int dm_calculate_queue_limits(struct dm_table *table, struct queue_limits *limits); void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, diff --git a/drivers/md/md.c b/drivers/md/md.c index 3f6203a4c7e..308e87b417e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7619,6 +7619,8 @@ static int remove_and_add_spares(struct mddev *mddev) } } } + if (removed) + set_bit(MD_CHANGE_DEVS, &mddev->flags); return spares; } @@ -7632,9 +7634,11 @@ static void reap_sync_thread(struct mddev *mddev) !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { /* success...*/ /* activate any spares */ - if (mddev->pers->spare_active(mddev)) + if (mddev->pers->spare_active(mddev)) { sysfs_notify(&mddev->kobj, NULL, "degraded"); + set_bit(MD_CHANGE_DEVS, &mddev->flags); + } } if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && mddev->pers->finish_reshape) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 1c2eb38f3c5..0138a727c1f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1512,14 +1512,16 @@ static int _enough(struct r10conf *conf, struct geom *geo, int ignore) do { int n = conf->copies; int cnt = 0; + int this = first; while (n--) { - if (conf->mirrors[first].rdev && - first != ignore) + if (conf->mirrors[this].rdev && + this != ignore) cnt++; - first = (first+1) % geo->raid_disks; + this = (this+1) % geo->raid_disks; } if (cnt == 0) return 0; + first = (first + geo->near_copies) % geo->raid_disks; } while (first != 0); return 1; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index adda94df5eb..0689173fd9f 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -393,6 +393,8 @@ static int calc_degraded(struct r5conf *conf) degraded = 0; for (i = 0; i < conf->previous_raid_disks; i++) { struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); + if (rdev && test_bit(Faulty, &rdev->flags)) + rdev = rcu_dereference(conf->disks[i].replacement); if (!rdev || test_bit(Faulty, &rdev->flags)) degraded++; else if (test_bit(In_sync, &rdev->flags)) @@ -417,6 +419,8 @@ static int calc_degraded(struct r5conf *conf) degraded2 = 0; for (i = 0; i < conf->raid_disks; i++) { struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev); + if (rdev && test_bit(Faulty, &rdev->flags)) + rdev = rcu_dereference(conf->disks[i].replacement); if (!rdev || test_bit(Faulty, &rdev->flags)) degraded2++; else if (test_bit(In_sync, &rdev->flags)) @@ -1587,6 +1591,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) #ifdef CONFIG_MULTICORE_RAID456 init_waitqueue_head(&nsh->ops.wait_for_ops); #endif + spin_lock_init(&nsh->stripe_lock); list_add(&nsh->lru, &newstripes); } @@ -4192,7 +4197,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) finish_wait(&conf->wait_for_overlap, &w); set_bit(STRIPE_HANDLE, &sh->state); clear_bit(STRIPE_DELAYED, &sh->state); - if ((bi->bi_rw & REQ_NOIDLE) && + if ((bi->bi_rw & REQ_SYNC) && !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) atomic_inc(&conf->preread_active_stripes); release_stripe_plug(mddev, sh); diff --git a/drivers/mfd/ab8500-gpadc.c b/drivers/mfd/ab8500-gpadc.c index 866f95960b4..29d72a259c8 100644 --- a/drivers/mfd/ab8500-gpadc.c +++ b/drivers/mfd/ab8500-gpadc.c @@ -342,7 +342,7 @@ int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel) /* * Delay might be needed for ABB8500 cut 3.0, if not, remove - * when hardware will be availible + * when hardware will be available */ msleep(1); break; diff --git a/drivers/mfd/rc5t583.c b/drivers/mfd/rc5t583.c index 3a8fa88567b..ff61efc76ce 100644 --- a/drivers/mfd/rc5t583.c +++ b/drivers/mfd/rc5t583.c @@ -281,7 +281,7 @@ static int __devinit rc5t583_i2c_probe(struct i2c_client *i2c, if (i2c->irq) { ret = rc5t583_irq_init(rc5t583, i2c->irq, pdata->irq_base); - /* Still continue with waring if irq init fails */ + /* Still continue with warning, if irq init fails */ if (ret) dev_warn(&i2c->dev, "IRQ init failed: %d\n", ret); else diff --git a/drivers/mfd/rdc321x-southbridge.c b/drivers/mfd/rdc321x-southbridge.c index 0f70dce6116..fbabc3cbe35 100644 --- a/drivers/mfd/rdc321x-southbridge.c +++ b/drivers/mfd/rdc321x-southbridge.c @@ -1,5 +1,5 @@ /* - * RDC321x MFD southbrige driver + * RDC321x MFD southbridge driver * * Copyright (C) 2007-2010 Florian Fainelli <florian@openwrt.org> * Copyright (C) 2010 Bernhard Loos <bernhardloos@googlemail.com> diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c index 5f58370ccf5..345960ca2fd 100644 --- a/drivers/mfd/tps6586x.c +++ b/drivers/mfd/tps6586x.c @@ -25,6 +25,7 @@ #include <linux/i2c.h> #include <linux/regmap.h> #include <linux/regulator/of_regulator.h> +#include <linux/regulator/machine.h> #include <linux/mfd/core.h> #include <linux/mfd/tps6586x.h> @@ -346,6 +347,7 @@ failed: #ifdef CONFIG_OF static struct of_regulator_match tps6586x_matches[] = { + { .name = "sys", .driver_data = (void *)TPS6586X_ID_SYS }, { .name = "sm0", .driver_data = (void *)TPS6586X_ID_SM_0 }, { .name = "sm1", .driver_data = (void *)TPS6586X_ID_SM_1 }, { .name = "sm2", .driver_data = (void *)TPS6586X_ID_SM_2 }, @@ -369,6 +371,7 @@ static struct tps6586x_platform_data *tps6586x_parse_dt(struct i2c_client *clien struct tps6586x_platform_data *pdata; struct tps6586x_subdev_info *devs; struct device_node *regs; + const char *sys_rail_name = NULL; unsigned int count; unsigned int i, j; int err; @@ -391,12 +394,22 @@ static struct tps6586x_platform_data *tps6586x_parse_dt(struct i2c_client *clien return NULL; for (i = 0, j = 0; i < num && j < count; i++) { + struct regulator_init_data *reg_idata; + if (!tps6586x_matches[i].init_data) continue; + reg_idata = tps6586x_matches[i].init_data; devs[j].name = "tps6586x-regulator"; devs[j].platform_data = tps6586x_matches[i].init_data; devs[j].id = (int)tps6586x_matches[i].driver_data; + if (devs[j].id == TPS6586X_ID_SYS) + sys_rail_name = reg_idata->constraints.name; + + if ((devs[j].id == TPS6586X_ID_LDO_5) || + (devs[j].id == TPS6586X_ID_LDO_RTC)) + reg_idata->supply_regulator = sys_rail_name; + devs[j].of_node = tps6586x_matches[i].of_node; j++; } diff --git a/drivers/mfd/tps65911-comparator.c b/drivers/mfd/tps65911-comparator.c index 5a62e6bf89a..0b6e361432c 100644 --- a/drivers/mfd/tps65911-comparator.c +++ b/drivers/mfd/tps65911-comparator.c @@ -136,7 +136,7 @@ static __devinit int tps65911_comparator_probe(struct platform_device *pdev) ret = comp_threshold_set(tps65910, COMP2, pdata->vmbch2_threshold); if (ret < 0) { - dev_err(&pdev->dev, "cannot set COMP2 theshold\n"); + dev_err(&pdev->dev, "cannot set COMP2 threshold\n"); return ret; } diff --git a/drivers/mfd/wm8994-irq.c b/drivers/mfd/wm8994-irq.c index 0aac4aff17a..a050e56a9bb 100644 --- a/drivers/mfd/wm8994-irq.c +++ b/drivers/mfd/wm8994-irq.c @@ -135,6 +135,7 @@ static struct regmap_irq_chip wm8994_irq_chip = { .status_base = WM8994_INTERRUPT_STATUS_1, .mask_base = WM8994_INTERRUPT_STATUS_1_MASK, .ack_base = WM8994_INTERRUPT_STATUS_1, + .runtime_pm = true, }; int wm8994_irq_init(struct wm8994 *wm8994) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index d4619e2ec03..2273ce6b6c1 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -641,7 +641,7 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr, /* * If the host and card support UHS-I mode request the card * to switch to 1.8V signaling level. No 1.8v signalling if - * UHS mode is not enabled to maintain compatibilty and some + * UHS mode is not enabled to maintain compatibility and some * systems that claim 1.8v signalling in fact do not support * it. */ diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index efdb81d21c4..74bed0fc23e 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -356,7 +356,7 @@ static void at91_mci_handle_transmitted(struct at91mci_host *host) } /* - * Update bytes tranfered count during a write operation + * Update bytes transfered count during a write operation */ static void at91_mci_update_bytes_xfered(struct at91mci_host *host) { diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index a53c7c478e0..852d5fbda63 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -1022,7 +1022,7 @@ static void atmci_stop_transfer(struct atmel_mci *host) } /* - * Stop data transfer because error(s) occured. + * Stop data transfer because error(s) occurred. */ static void atmci_stop_transfer_pdc(struct atmel_mci *host) { diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 3a09f93cc3b..686e256764c 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -447,7 +447,7 @@ static void omap_hsmmc_stop_clock(struct omap_hsmmc_host *host) OMAP_HSMMC_WRITE(host->base, SYSCTL, OMAP_HSMMC_READ(host->base, SYSCTL) & ~CEN); if ((OMAP_HSMMC_READ(host->base, SYSCTL) & CEN) != 0x0) - dev_dbg(mmc_dev(host->mmc), "MMC Clock is not stoped\n"); + dev_dbg(mmc_dev(host->mmc), "MMC Clock is not stopped\n"); } static void omap_hsmmc_enable_irq(struct omap_hsmmc_host *host, diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index e23f8134591..32f4a070551 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -315,7 +315,7 @@ static void esdhc_writeb_le(struct sdhci_host *host, u8 val, int reg) new_val = val & (SDHCI_CTRL_LED | \ SDHCI_CTRL_4BITBUS | \ SDHCI_CTRL_D3CD); - /* ensure the endianess */ + /* ensure the endianness */ new_val |= ESDHC_HOST_CONTROL_LE; /* bits 8&9 are reserved on mx25 */ if (!is_imx25_esdhc(imx_data)) { diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c index 3135a1a5d75..58eab9ac1d0 100644 --- a/drivers/mmc/host/vub300.c +++ b/drivers/mmc/host/vub300.c @@ -806,7 +806,7 @@ static void command_res_completed(struct urb *urb) * we suspect a buggy USB host controller */ } else if (!vub300->data) { - /* this means that the command (typically CMD52) suceeded */ + /* this means that the command (typically CMD52) succeeded */ } else if (vub300->resp.common.header_type != 0x02) { /* * this is an error response from the VUB300 chip diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index f2f482bec57..a6e74514e66 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1123,6 +1123,33 @@ static unsigned long mtdchar_get_unmapped_area(struct file *file, } #endif +static inline unsigned long get_vm_size(struct vm_area_struct *vma) +{ + return vma->vm_end - vma->vm_start; +} + +static inline resource_size_t get_vm_offset(struct vm_area_struct *vma) +{ + return (resource_size_t) vma->vm_pgoff << PAGE_SHIFT; +} + +/* + * Set a new vm offset. + * + * Verify that the incoming offset really works as a page offset, + * and that the offset and size fit in a resource_size_t. + */ +static inline int set_vm_offset(struct vm_area_struct *vma, resource_size_t off) +{ + pgoff_t pgoff = off >> PAGE_SHIFT; + if (off != (resource_size_t) pgoff << PAGE_SHIFT) + return -EINVAL; + if (off + get_vm_size(vma) - 1 < off) + return -EINVAL; + vma->vm_pgoff = pgoff; + return 0; +} + /* * set up a mapping for shared memory segments */ @@ -1132,20 +1159,29 @@ static int mtdchar_mmap(struct file *file, struct vm_area_struct *vma) struct mtd_file_info *mfi = file->private_data; struct mtd_info *mtd = mfi->mtd; struct map_info *map = mtd->priv; - unsigned long start; - unsigned long off; - u32 len; + resource_size_t start, off; + unsigned long len, vma_len; if (mtd->type == MTD_RAM || mtd->type == MTD_ROM) { - off = vma->vm_pgoff << PAGE_SHIFT; + off = get_vm_offset(vma); start = map->phys; len = PAGE_ALIGN((start & ~PAGE_MASK) + map->size); start &= PAGE_MASK; - if ((vma->vm_end - vma->vm_start + off) > len) + vma_len = get_vm_size(vma); + + /* Overflow in off+len? */ + if (vma_len + off < off) + return -EINVAL; + /* Does it fit in the mapping? */ + if (vma_len + off > len) return -EINVAL; off += start; - vma->vm_pgoff = off >> PAGE_SHIFT; + /* Did that overflow? */ + if (off < start) + return -EINVAL; + if (set_vm_offset(vma, off) < 0) + return -EINVAL; vma->vm_flags |= VM_IO | VM_RESERVED; #ifdef pgprot_noncached diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c index 98ee4381991..7edadee487b 100644 --- a/drivers/net/can/janz-ican3.c +++ b/drivers/net/can/janz-ican3.c @@ -1391,7 +1391,6 @@ static irqreturn_t ican3_irq(int irq, void *dev_id) */ static int ican3_reset_module(struct ican3_dev *mod) { - u8 val = 1 << mod->num; unsigned long start; u8 runold, runnew; @@ -1405,8 +1404,7 @@ static int ican3_reset_module(struct ican3_dev *mod) runold = ioread8(mod->dpm + TARGET_RUNNING); /* reset the module */ - iowrite8(val, &mod->ctrl->reset_assert); - iowrite8(val, &mod->ctrl->reset_deassert); + iowrite8(0x00, &mod->dpmctrl->hwreset); /* wait until the module has finished resetting and is running */ start = jiffies; diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 527dbcf9533..9ded21e79db 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -984,12 +984,12 @@ static int __devexit ti_hecc_remove(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct ti_hecc_priv *priv = netdev_priv(ndev); + unregister_candev(ndev); clk_disable(priv->clk); clk_put(priv->clk); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); iounmap(priv->base); release_mem_region(res->start, resource_size(res)); - unregister_candev(ndev); free_candev(ndev); platform_set_drvdata(pdev, NULL); diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index b1536663514..bb9670f29b5 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -364,7 +364,7 @@ typhoon_inc_rxfree_index(u32 *index, const int count) static inline void typhoon_inc_tx_index(u32 *index, const int count) { - /* if we start using the Hi Tx ring, this needs updateing */ + /* if we start using the Hi Tx ring, this needs updating */ typhoon_inc_index(index, count, TXLO_ENTRIES); } diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 79cebd8525c..e48312f2305 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -8564,7 +8564,7 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; error: - iounmap(bp->regview); + pci_iounmap(pdev, bp->regview); pci_release_regions(pdev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 6d1a24acb77..eac25236856 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1458,7 +1458,7 @@ struct bnx2x { int fw_stats_req_sz; /* - * FW statistics data shortcut (points at the begining of + * FW statistics data shortcut (points at the beginning of * fw_stats buffer + fw_stats_req_sz). */ struct bnx2x_fw_stats_data *fw_stats_data; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index af20c6ee2cd..e8e97a7d1d0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -662,14 +662,16 @@ void bnx2x_csum_validate(struct sk_buff *skb, union eth_rx_cqe *cqe, struct bnx2x_fastpath *fp, struct bnx2x_eth_q_stats *qstats) { - /* Do nothing if no IP/L4 csum validation was done */ - + /* Do nothing if no L4 csum validation was done. + * We do not check whether IP csum was validated. For IPv4 we assume + * that if the card got as far as validating the L4 csum, it also + * validated the IP csum. IPv6 has no IP csum. + */ if (cqe->fast_path_cqe.status_flags & - (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG | - ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)) + ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG) return; - /* If both IP/L4 validation were done, check if an error was found. */ + /* If L4 validation was done, check if an error was found. */ if (cqe->fast_path_cqe.type_error_flags & (ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG | diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 211753e01f8..0875ecfe337 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -9831,12 +9831,13 @@ static void __devinit bnx2x_get_igu_cam_info(struct bnx2x *bp) } #ifdef CONFIG_PCI_MSI - /* - * It's expected that number of CAM entries for this functions is equal - * to the number evaluated based on the MSI-X table size. We want a - * harsh warning if these values are different! + /* Due to new PF resource allocation by MFW T7.4 and above, it's + * optional that number of CAM entries will not be equal to the value + * advertised in PCI. + * Driver should use the minimal value of both as the actual status + * block count */ - WARN_ON(bp->igu_sb_cnt != igu_sb_cnt); + bp->igu_sb_cnt = min_t(int, bp->igu_sb_cnt, igu_sb_cnt); #endif if (igu_sb_cnt == 0) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h index f83e033da6d..acf2fe4ca60 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h @@ -1321,7 +1321,7 @@ void bnx2x_init_mcast_obj(struct bnx2x *bp, * the current command will be enqueued to the tail of the * pending commands list. * - * Return: 0 is operation was sucessfull and there are no pending completions, + * Return: 0 is operation was successfull and there are no pending completions, * negative if there were errors, positive if there are pending * completions. */ diff --git a/drivers/net/ethernet/cadence/at91_ether.c b/drivers/net/ethernet/cadence/at91_ether.c index 77884191a8c..4e980a7886f 100644 --- a/drivers/net/ethernet/cadence/at91_ether.c +++ b/drivers/net/ethernet/cadence/at91_ether.c @@ -1086,7 +1086,7 @@ static int __init at91ether_probe(struct platform_device *pdev) /* Clock */ lp->ether_clk = clk_get(&pdev->dev, "ether_clk"); if (IS_ERR(lp->ether_clk)) { - res = -ENODEV; + res = PTR_ERR(lp->ether_clk); goto err_ioumap; } clk_enable(lp->ether_clk); diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index 8971921cc1c..ab6762caa95 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -1773,6 +1773,7 @@ static int gfar_get_nfc(struct net_device *dev, struct ethtool_rxnfc *cmd, } int gfar_phc_index = -1; +EXPORT_SYMBOL(gfar_phc_index); static int gfar_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/net/ethernet/freescale/gianfar_ptp.c index c08e5d40fec..0daa66b8eca 100644 --- a/drivers/net/ethernet/freescale/gianfar_ptp.c +++ b/drivers/net/ethernet/freescale/gianfar_ptp.c @@ -515,7 +515,7 @@ static int gianfar_ptp_probe(struct platform_device *dev) err = PTR_ERR(etsects->clock); goto no_clock; } - gfar_phc_clock = ptp_clock_index(etsects->clock); + gfar_phc_index = ptp_clock_index(etsects->clock); dev_set_drvdata(&dev->dev, etsects); @@ -539,7 +539,7 @@ static int gianfar_ptp_remove(struct platform_device *dev) gfar_write(&etsects->regs->tmr_temask, 0); gfar_write(&etsects->regs->tmr_ctrl, 0); - gfar_phc_clock = -1; + gfar_phc_index = -1; ptp_clock_unregister(etsects->clock); iounmap(etsects->regs); release_resource(etsects->rsrc); diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 3bfbb8df898..bde337ee1a3 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -3149,6 +3149,17 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, return NETDEV_TX_OK; } + /* On PCI/PCI-X HW, if packet size is less than ETH_ZLEN, + * packets may get corrupted during padding by HW. + * To WA this issue, pad all small packets manually. + */ + if (skb->len < ETH_ZLEN) { + if (skb_pad(skb, ETH_ZLEN - skb->len)) + return NETDEV_TX_OK; + skb->len = ETH_ZLEN; + skb_set_tail_pointer(skb, ETH_ZLEN); + } + mss = skb_shinfo(skb)->gso_size; /* The controller does a simple calculation to * make sure there is enough room in the FIFO before diff --git a/drivers/net/ethernet/octeon/octeon_mgmt.c b/drivers/net/ethernet/octeon/octeon_mgmt.c index c42bbb16cda..a688a2ddcfd 100644 --- a/drivers/net/ethernet/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/octeon/octeon_mgmt.c @@ -722,10 +722,8 @@ static int octeon_mgmt_init_phy(struct net_device *netdev) octeon_mgmt_adjust_link, 0, PHY_INTERFACE_MODE_MII); - if (IS_ERR(p->phydev)) { - p->phydev = NULL; + if (!p->phydev) return -1; - } phy_start_aneg(p->phydev); diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index e559dfa06d6..6fa74d530e4 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -1101,9 +1101,9 @@ static int pasemi_mac_phy_init(struct net_device *dev) phydev = of_phy_connect(dev, phy_dn, &pasemi_adjust_link, 0, PHY_INTERFACE_MODE_SGMII); - if (IS_ERR(phydev)) { + if (!phydev) { printk(KERN_ERR "%s: Could not attach to phy\n", dev->name); - return PTR_ERR(phydev); + return -ENODEV; } mac->phydev = phydev; diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index 342b3a79bd0..a77c558d8f4 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -1378,6 +1378,10 @@ static void netxen_mask_aer_correctable(struct netxen_adapter *adapter) struct pci_dev *root = pdev->bus->self; u32 aer_pos; + /* root bus? */ + if (!root) + return; + if (adapter->ahw.board_type != NETXEN_BRDTYPE_P3_4_GB_MM && adapter->ahw.board_type != NETXEN_BRDTYPE_P3_10G_TP) return; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c index b8ead696141..2a179d08720 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c @@ -15,7 +15,7 @@ qlcnic_poll_rsp(struct qlcnic_adapter *adapter) do { /* give atleast 1ms for firmware to respond */ - msleep(1); + mdelay(1); if (++timeout > QLCNIC_OS_CRB_RETRY_COUNT) return QLCNIC_CDRP_RSP_TIMEOUT; @@ -601,7 +601,7 @@ void qlcnic_fw_destroy_ctx(struct qlcnic_adapter *adapter) qlcnic_fw_cmd_destroy_tx_ctx(adapter); /* Allow dma queues to drain after context reset */ - msleep(20); + mdelay(20); } } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c136162e647..3be88331d17 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1066,7 +1066,7 @@ static int stmmac_open(struct net_device *dev) } else priv->tm->enable = 1; #endif - clk_enable(priv->stmmac_clk); + clk_prepare_enable(priv->stmmac_clk); stmmac_check_ether_addr(priv); @@ -1188,7 +1188,7 @@ open_error: if (priv->phydev) phy_disconnect(priv->phydev); - clk_disable(priv->stmmac_clk); + clk_disable_unprepare(priv->stmmac_clk); return ret; } @@ -1246,7 +1246,7 @@ static int stmmac_release(struct net_device *dev) #ifdef CONFIG_STMMAC_DEBUG_FS stmmac_exit_fs(); #endif - clk_disable(priv->stmmac_clk); + clk_disable_unprepare(priv->stmmac_clk); return 0; } @@ -2178,7 +2178,7 @@ int stmmac_suspend(struct net_device *ndev) else { stmmac_set_mac(priv->ioaddr, false); /* Disable clock in case of PWM is off */ - clk_disable(priv->stmmac_clk); + clk_disable_unprepare(priv->stmmac_clk); } spin_unlock_irqrestore(&priv->lock, flags); return 0; @@ -2203,7 +2203,7 @@ int stmmac_resume(struct net_device *ndev) priv->hw->mac->pmt(priv->ioaddr, 0); else /* enable the clk prevously disabled */ - clk_enable(priv->stmmac_clk); + clk_prepare_enable(priv->stmmac_clk); netif_device_attach(ndev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_timer.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_timer.c index 2a0e1abde7e..4ccd4e2977b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_timer.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_timer.c @@ -97,19 +97,19 @@ static struct clk *timer_clock; static void stmmac_tmu_start(unsigned int new_freq) { clk_set_rate(timer_clock, new_freq); - clk_enable(timer_clock); + clk_prepare_enable(timer_clock); } static void stmmac_tmu_stop(void) { - clk_disable(timer_clock); + clk_disable_unprepare(timer_clock); } int stmmac_open_ext_timer(struct net_device *dev, struct stmmac_timer *tm) { timer_clock = clk_get(NULL, TMU_CHANNEL); - if (timer_clock == NULL) + if (IS_ERR(timer_clock)) return -1; if (tmu2_register_user(stmmac_timer_handler, (void *)dev) < 0) { @@ -126,7 +126,7 @@ int stmmac_open_ext_timer(struct net_device *dev, struct stmmac_timer *tm) int stmmac_close_ext_timer(void) { - clk_disable(timer_clock); + clk_disable_unprepare(timer_clock); tmu2_unregister_user(); clk_put(timer_clock); return 0; diff --git a/drivers/net/irda/sh_sir.c b/drivers/net/irda/sh_sir.c index 256eddf1f75..79510942556 100644 --- a/drivers/net/irda/sh_sir.c +++ b/drivers/net/irda/sh_sir.c @@ -280,7 +280,7 @@ static int sh_sir_set_baudrate(struct sh_sir_self *self, u32 baudrate) } clk = clk_get(NULL, "irda_clk"); - if (!clk) { + if (IS_ERR(clk)) { dev_err(dev, "can not get irda_clk\n"); return -EIO; } diff --git a/drivers/net/phy/bcm87xx.c b/drivers/net/phy/bcm87xx.c index 2346b38b983..799789518e8 100644 --- a/drivers/net/phy/bcm87xx.c +++ b/drivers/net/phy/bcm87xx.c @@ -229,3 +229,5 @@ static void __exit bcm87xx_exit(void) ARRAY_SIZE(bcm87xx_driver)); } module_exit(bcm87xx_exit); + +MODULE_LICENSE("GPL"); diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index cf287e0eb40..2165d5fdb8c 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -21,6 +21,12 @@ #include <linux/phy.h> #include <linux/micrel_phy.h> +/* Operation Mode Strap Override */ +#define MII_KSZPHY_OMSO 0x16 +#define KSZPHY_OMSO_B_CAST_OFF (1 << 9) +#define KSZPHY_OMSO_RMII_OVERRIDE (1 << 1) +#define KSZPHY_OMSO_MII_OVERRIDE (1 << 0) + /* general Interrupt control/status reg in vendor specific block. */ #define MII_KSZPHY_INTCS 0x1B #define KSZPHY_INTCS_JABBER (1 << 15) @@ -101,6 +107,13 @@ static int kszphy_config_init(struct phy_device *phydev) return 0; } +static int ksz8021_config_init(struct phy_device *phydev) +{ + const u16 val = KSZPHY_OMSO_B_CAST_OFF | KSZPHY_OMSO_RMII_OVERRIDE; + phy_write(phydev, MII_KSZPHY_OMSO, val); + return 0; +} + static int ks8051_config_init(struct phy_device *phydev) { int regval; @@ -128,9 +141,22 @@ static struct phy_driver ksphy_driver[] = { .config_intr = ks8737_config_intr, .driver = { .owner = THIS_MODULE,}, }, { - .phy_id = PHY_ID_KS8041, + .phy_id = PHY_ID_KSZ8021, + .phy_id_mask = 0x00ffffff, + .name = "Micrel KSZ8021", + .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause | + SUPPORTED_Asym_Pause), + .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, + .config_init = ksz8021_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = kszphy_ack_interrupt, + .config_intr = kszphy_config_intr, + .driver = { .owner = THIS_MODULE,}, +}, { + .phy_id = PHY_ID_KSZ8041, .phy_id_mask = 0x00fffff0, - .name = "Micrel KS8041", + .name = "Micrel KSZ8041", .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause), .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, @@ -141,9 +167,9 @@ static struct phy_driver ksphy_driver[] = { .config_intr = kszphy_config_intr, .driver = { .owner = THIS_MODULE,}, }, { - .phy_id = PHY_ID_KS8051, + .phy_id = PHY_ID_KSZ8051, .phy_id_mask = 0x00fffff0, - .name = "Micrel KS8051", + .name = "Micrel KSZ8051", .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause), .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, @@ -154,8 +180,8 @@ static struct phy_driver ksphy_driver[] = { .config_intr = kszphy_config_intr, .driver = { .owner = THIS_MODULE,}, }, { - .phy_id = PHY_ID_KS8001, - .name = "Micrel KS8001 or KS8721", + .phy_id = PHY_ID_KSZ8001, + .name = "Micrel KSZ8001 or KS8721", .phy_id_mask = 0x00ffffff, .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause), .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, @@ -201,10 +227,11 @@ MODULE_LICENSE("GPL"); static struct mdio_device_id __maybe_unused micrel_tbl[] = { { PHY_ID_KSZ9021, 0x000ffffe }, - { PHY_ID_KS8001, 0x00ffffff }, + { PHY_ID_KSZ8001, 0x00ffffff }, { PHY_ID_KS8737, 0x00fffff0 }, - { PHY_ID_KS8041, 0x00fffff0 }, - { PHY_ID_KS8051, 0x00fffff0 }, + { PHY_ID_KSZ8021, 0x00ffffff }, + { PHY_ID_KSZ8041, 0x00fffff0 }, + { PHY_ID_KSZ8051, 0x00fffff0 }, { } }; diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 6d6192316b3..88e3991464e 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -56,6 +56,32 @@ static int smsc_phy_config_init(struct phy_device *phydev) return smsc_phy_ack_interrupt (phydev); } +static int lan87xx_config_init(struct phy_device *phydev) +{ + /* + * Make sure the EDPWRDOWN bit is NOT set. Setting this bit on + * LAN8710/LAN8720 PHY causes the PHY to misbehave, likely due + * to a bug on the chip. + * + * When the system is powered on with the network cable being + * disconnected all the way until after ifconfig ethX up is + * issued for the LAN port with this PHY, connecting the cable + * afterwards does not cause LINK change detection, while the + * expected behavior is the Link UP being detected. + */ + int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS); + if (rc < 0) + return rc; + + rc &= ~MII_LAN83C185_EDPWRDOWN; + + rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS, rc); + if (rc < 0) + return rc; + + return smsc_phy_ack_interrupt(phydev); +} + static int lan911x_config_init(struct phy_device *phydev) { return smsc_phy_ack_interrupt(phydev); @@ -162,7 +188,7 @@ static struct phy_driver smsc_phy_driver[] = { /* basic functions */ .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, - .config_init = smsc_phy_config_init, + .config_init = lan87xx_config_init, /* IRQ related */ .ack_interrupt = smsc_phy_ack_interrupt, diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index cbf7047decc..20f31d0d153 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -570,7 +570,7 @@ static int pppoe_release(struct socket *sock) po = pppox_sk(sk); - if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { + if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND | PPPOX_ZOMBIE)) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 341b65dbbcd..f8cd61f449a 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -848,7 +848,7 @@ static struct netpoll_info *team_netpoll_info(struct team *team) } #endif -static void __team_port_change_check(struct team_port *port, bool linkup); +static void __team_port_change_port_added(struct team_port *port, bool linkup); static int team_port_add(struct team *team, struct net_device *port_dev) { @@ -948,7 +948,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev) team_port_enable(team, port); list_add_tail_rcu(&port->list, &team->port_list); __team_compute_features(team); - __team_port_change_check(port, !!netif_carrier_ok(port_dev)); + __team_port_change_port_added(port, !!netif_carrier_ok(port_dev)); __team_options_change_check(team); netdev_info(dev, "Port device %s added\n", portname); @@ -983,6 +983,8 @@ err_set_mtu: return err; } +static void __team_port_change_port_removed(struct team_port *port); + static int team_port_del(struct team *team, struct net_device *port_dev) { struct net_device *dev = team->dev; @@ -999,8 +1001,7 @@ static int team_port_del(struct team *team, struct net_device *port_dev) __team_option_inst_mark_removed_port(team, port); __team_options_change_check(team); __team_option_inst_del_port(team, port); - port->removed = true; - __team_port_change_check(port, false); + __team_port_change_port_removed(port); team_port_disable(team, port); list_del_rcu(&port->list); netdev_rx_handler_unregister(port_dev); @@ -1652,8 +1653,8 @@ static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, &team_nl_family, 0, TEAM_CMD_NOOP); - if (IS_ERR(hdr)) { - err = PTR_ERR(hdr); + if (!hdr) { + err = -EMSGSIZE; goto err_msg_put; } @@ -1847,8 +1848,8 @@ start_again: hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags | NLM_F_MULTI, TEAM_CMD_OPTIONS_GET); - if (IS_ERR(hdr)) - return PTR_ERR(hdr); + if (!hdr) + return -EMSGSIZE; if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) goto nla_put_failure; @@ -2067,8 +2068,8 @@ static int team_nl_fill_port_list_get(struct sk_buff *skb, hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags, TEAM_CMD_PORT_LIST_GET); - if (IS_ERR(hdr)) - return PTR_ERR(hdr); + if (!hdr) + return -EMSGSIZE; if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) goto nla_put_failure; @@ -2251,13 +2252,11 @@ static void __team_options_change_check(struct team *team) } /* rtnl lock is held */ -static void __team_port_change_check(struct team_port *port, bool linkup) + +static void __team_port_change_send(struct team_port *port, bool linkup) { int err; - if (!port->removed && port->state.linkup == linkup) - return; - port->changed = true; port->state.linkup = linkup; team_refresh_port_linkup(port); @@ -2282,6 +2281,23 @@ send_event: } +static void __team_port_change_check(struct team_port *port, bool linkup) +{ + if (port->state.linkup != linkup) + __team_port_change_send(port, linkup); +} + +static void __team_port_change_port_added(struct team_port *port, bool linkup) +{ + __team_port_change_send(port, linkup); +} + +static void __team_port_change_port_removed(struct team_port *port) +{ + port->removed = true; + __team_port_change_send(port, false); +} + static void team_port_change_check(struct team_port *port, bool linkup) { struct team *team = port->team; diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 4fd48df6b98..32e31c5c5dc 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -962,6 +962,10 @@ static const struct usb_device_id products [] = { USB_DEVICE (0x2001, 0x3c05), .driver_info = (unsigned long) &ax88772_info, }, { + // DLink DUB-E100 H/W Ver C1 + USB_DEVICE (0x2001, 0x1a02), + .driver_info = (unsigned long) &ax88772_info, +}, { // Linksys USB1000 USB_DEVICE (0x1737, 0x0039), .driver_info = (unsigned long) &ax88178_info, diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index b1ba68f1a04..3543c9e5782 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -366,16 +366,20 @@ static const struct usb_device_id products[] = { }, /* 2. Combined interface devices matching on class+protocol */ + { /* Huawei E367 and possibly others in "Windows mode" */ + USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_VENDOR_SPEC, 1, 7), + .driver_info = (unsigned long)&qmi_wwan_info, + }, { /* Huawei E392, E398 and possibly others in "Windows mode" */ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_VENDOR_SPEC, 1, 17), .driver_info = (unsigned long)&qmi_wwan_shared, }, - { /* Pantech UML290 */ - USB_DEVICE_AND_INTERFACE_INFO(0x106c, 0x3718, USB_CLASS_VENDOR_SPEC, 0xf0, 0xff), + { /* Pantech UML290, P4200 and more */ + USB_VENDOR_AND_INTERFACE_INFO(0x106c, USB_CLASS_VENDOR_SPEC, 0xf0, 0xff), .driver_info = (unsigned long)&qmi_wwan_shared, }, { /* Pantech UML290 - newer firmware */ - USB_DEVICE_AND_INTERFACE_INFO(0x106c, 0x3718, USB_CLASS_VENDOR_SPEC, 0xf1, 0xff), + USB_VENDOR_AND_INTERFACE_INFO(0x106c, USB_CLASS_VENDOR_SPEC, 0xf1, 0xff), .driver_info = (unsigned long)&qmi_wwan_shared, }, @@ -383,6 +387,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x0055, 1)}, /* ZTE (Vodafone) K3520-Z */ {QMI_FIXED_INTF(0x19d2, 0x0063, 4)}, /* ZTE (Vodafone) K3565-Z */ {QMI_FIXED_INTF(0x19d2, 0x0104, 4)}, /* ZTE (Vodafone) K4505-Z */ + {QMI_FIXED_INTF(0x19d2, 0x0157, 5)}, /* ZTE MF683 */ {QMI_FIXED_INTF(0x19d2, 0x0167, 4)}, /* ZTE MF820D */ {QMI_FIXED_INTF(0x19d2, 0x0326, 4)}, /* ZTE MF821D */ {QMI_FIXED_INTF(0x19d2, 0x1008, 4)}, /* ZTE (Vodafone) K3570-Z */ diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index f5ab6e613ec..376143e8a1a 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -1253,6 +1253,7 @@ static struct usb_driver smsc75xx_driver = { .probe = usbnet_probe, .suspend = usbnet_suspend, .resume = usbnet_resume, + .reset_resume = usbnet_resume, .disconnect = usbnet_disconnect, .disable_hub_initiated_lpm = 1, }; diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c index 2588848f4a8..d066f2516e4 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c @@ -2982,6 +2982,10 @@ static u32 ath9k_hw_ar9300_get_eeprom(struct ath_hw *ah, case EEP_RX_MASK: return pBase->txrxMask & 0xf; case EEP_PAPRD: + if (AR_SREV_9462(ah)) + return false; + if (!ah->config.enable_paprd); + return false; return !!(pBase->featureEnable & BIT(5)); case EEP_CHAIN_MASK_REDUCE: return (pBase->miscConfiguration >> 0x3) & 0x1; diff --git a/drivers/net/wireless/ath/ath9k/debug.c b/drivers/net/wireless/ath/ath9k/debug.c index 68b643c8943..c8ef30127ad 100644 --- a/drivers/net/wireless/ath/ath9k/debug.c +++ b/drivers/net/wireless/ath/ath9k/debug.c @@ -1577,6 +1577,8 @@ int ath9k_init_debug(struct ath_hw *ah) sc->debug.debugfs_phy, sc, &fops_tx_chainmask); debugfs_create_file("disable_ani", S_IRUSR | S_IWUSR, sc->debug.debugfs_phy, sc, &fops_disable_ani); + debugfs_create_bool("paprd", S_IRUSR | S_IWUSR, sc->debug.debugfs_phy, + &sc->sc_ah->config.enable_paprd); debugfs_create_file("regidx", S_IRUSR | S_IWUSR, sc->debug.debugfs_phy, sc, &fops_regidx); debugfs_create_file("regval", S_IRUSR | S_IWUSR, sc->debug.debugfs_phy, diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 48af40151d2..4faf0a39587 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -2497,10 +2497,6 @@ int ath9k_hw_fill_cap_info(struct ath_hw *ah) pCap->rx_status_len = sizeof(struct ar9003_rxs); pCap->tx_desc_len = sizeof(struct ar9003_txc); pCap->txs_len = sizeof(struct ar9003_txs); - if (!ah->config.paprd_disable && - ah->eep_ops->get_eeprom(ah, EEP_PAPRD) && - !AR_SREV_9462(ah)) - pCap->hw_caps |= ATH9K_HW_CAP_PAPRD; } else { pCap->tx_desc_len = sizeof(struct ath_desc); if (AR_SREV_9280_20(ah)) diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h index 6599a75f01f..de6968fc64f 100644 --- a/drivers/net/wireless/ath/ath9k/hw.h +++ b/drivers/net/wireless/ath/ath9k/hw.h @@ -236,7 +236,6 @@ enum ath9k_hw_caps { ATH9K_HW_CAP_LDPC = BIT(6), ATH9K_HW_CAP_FASTCLOCK = BIT(7), ATH9K_HW_CAP_SGI_20 = BIT(8), - ATH9K_HW_CAP_PAPRD = BIT(9), ATH9K_HW_CAP_ANT_DIV_COMB = BIT(10), ATH9K_HW_CAP_2GHZ = BIT(11), ATH9K_HW_CAP_5GHZ = BIT(12), @@ -287,12 +286,12 @@ struct ath9k_ops_config { u8 pcie_clock_req; u32 pcie_waen; u8 analog_shiftreg; - u8 paprd_disable; u32 ofdm_trig_low; u32 ofdm_trig_high; u32 cck_trig_high; u32 cck_trig_low; u32 enable_ani; + u32 enable_paprd; int serialize_regmode; bool rx_intr_mitigation; bool tx_intr_mitigation; diff --git a/drivers/net/wireless/ath/ath9k/link.c b/drivers/net/wireless/ath/ath9k/link.c index 825a29cc931..7b88b9c39cc 100644 --- a/drivers/net/wireless/ath/ath9k/link.c +++ b/drivers/net/wireless/ath/ath9k/link.c @@ -423,7 +423,7 @@ set_timer: cal_interval = min(cal_interval, (u32)short_cal_interval); mod_timer(&common->ani.timer, jiffies + msecs_to_jiffies(cal_interval)); - if ((sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_PAPRD) && ah->caldata) { + if (ah->eep_ops->get_eeprom(ah, EEP_PAPRD) && ah->caldata) { if (!ah->caldata->paprd_done) ieee80211_queue_work(sc->hw, &sc->paprd_work); else if (!ah->paprd_table_write_done) diff --git a/drivers/net/wireless/b43/Kconfig b/drivers/net/wireless/b43/Kconfig index 3876c7ea54f..7a28d21ac38 100644 --- a/drivers/net/wireless/b43/Kconfig +++ b/drivers/net/wireless/b43/Kconfig @@ -34,8 +34,8 @@ config B43_BCMA config B43_BCMA_EXTRA bool "Hardware support that overlaps with the brcmsmac driver" depends on B43_BCMA - default n if BRCMSMAC || BRCMSMAC_MODULE - default y + default n if BRCMSMAC + default y config B43_SSB bool diff --git a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh_sdmmc.c b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh_sdmmc.c index 49765d34b4e..7c4ee72f9d5 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh_sdmmc.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh_sdmmc.c @@ -638,6 +638,8 @@ static int brcmf_sdio_pd_probe(struct platform_device *pdev) oobirq_entry = kzalloc(sizeof(struct brcmf_sdio_oobirq), GFP_KERNEL); + if (!oobirq_entry) + return -ENOMEM; oobirq_entry->irq = res->start; oobirq_entry->flags = res->flags & IRQF_TRIGGER_MASK; list_add_tail(&oobirq_entry->list, &oobirq_lh); diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_common.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_common.c index 2621dd3d7dc..6f70953f0ba 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_common.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_common.c @@ -764,8 +764,11 @@ static void brcmf_c_arp_offload_set(struct brcmf_pub *drvr, int arp_mode) { char iovbuf[32]; int retcode; + __le32 arp_mode_le; - brcmf_c_mkiovar("arp_ol", (char *)&arp_mode, 4, iovbuf, sizeof(iovbuf)); + arp_mode_le = cpu_to_le32(arp_mode); + brcmf_c_mkiovar("arp_ol", (char *)&arp_mode_le, 4, iovbuf, + sizeof(iovbuf)); retcode = brcmf_proto_cdc_set_dcmd(drvr, 0, BRCMF_C_SET_VAR, iovbuf, sizeof(iovbuf)); retcode = retcode >= 0 ? 0 : retcode; @@ -781,8 +784,11 @@ static void brcmf_c_arp_offload_enable(struct brcmf_pub *drvr, int arp_enable) { char iovbuf[32]; int retcode; + __le32 arp_enable_le; - brcmf_c_mkiovar("arpoe", (char *)&arp_enable, 4, + arp_enable_le = cpu_to_le32(arp_enable); + + brcmf_c_mkiovar("arpoe", (char *)&arp_enable_le, 4, iovbuf, sizeof(iovbuf)); retcode = brcmf_proto_cdc_set_dcmd(drvr, 0, BRCMF_C_SET_VAR, iovbuf, sizeof(iovbuf)); @@ -800,10 +806,10 @@ int brcmf_c_preinit_dcmds(struct brcmf_pub *drvr) char iovbuf[BRCMF_EVENTING_MASK_LEN + 12]; /* Room for "event_msgs" + '\0' + bitvec */ char buf[128], *ptr; - u32 roaming = 1; - uint bcn_timeout = 3; - int scan_assoc_time = 40; - int scan_unassoc_time = 40; + __le32 roaming_le = cpu_to_le32(1); + __le32 bcn_timeout_le = cpu_to_le32(3); + __le32 scan_assoc_time_le = cpu_to_le32(40); + __le32 scan_unassoc_time_le = cpu_to_le32(40); int i; struct brcmf_bus_dcmd *cmdlst; struct list_head *cur, *q; @@ -829,14 +835,14 @@ int brcmf_c_preinit_dcmds(struct brcmf_pub *drvr) /* Setup timeout if Beacons are lost and roam is off to report link down */ - brcmf_c_mkiovar("bcn_timeout", (char *)&bcn_timeout, 4, iovbuf, + brcmf_c_mkiovar("bcn_timeout", (char *)&bcn_timeout_le, 4, iovbuf, sizeof(iovbuf)); brcmf_proto_cdc_set_dcmd(drvr, 0, BRCMF_C_SET_VAR, iovbuf, sizeof(iovbuf)); /* Enable/Disable build-in roaming to allowed ext supplicant to take of romaing */ - brcmf_c_mkiovar("roam_off", (char *)&roaming, 4, + brcmf_c_mkiovar("roam_off", (char *)&roaming_le, 4, iovbuf, sizeof(iovbuf)); brcmf_proto_cdc_set_dcmd(drvr, 0, BRCMF_C_SET_VAR, iovbuf, sizeof(iovbuf)); @@ -848,9 +854,9 @@ int brcmf_c_preinit_dcmds(struct brcmf_pub *drvr) sizeof(iovbuf)); brcmf_proto_cdc_set_dcmd(drvr, 0, BRCMF_C_SET_SCAN_CHANNEL_TIME, - (char *)&scan_assoc_time, sizeof(scan_assoc_time)); + (char *)&scan_assoc_time_le, sizeof(scan_assoc_time_le)); brcmf_proto_cdc_set_dcmd(drvr, 0, BRCMF_C_SET_SCAN_UNASSOC_TIME, - (char *)&scan_unassoc_time, sizeof(scan_unassoc_time)); + (char *)&scan_unassoc_time_le, sizeof(scan_unassoc_time_le)); /* Set and enable ARP offload feature */ brcmf_c_arp_offload_set(drvr, BRCMF_ARPOL_MODE); diff --git a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c index c36e9231244..50b5553b696 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c @@ -500,8 +500,10 @@ static void wl_iscan_prep(struct brcmf_scan_params_le *params_le, params_le->active_time = cpu_to_le32(-1); params_le->passive_time = cpu_to_le32(-1); params_le->home_time = cpu_to_le32(-1); - if (ssid && ssid->SSID_len) - memcpy(¶ms_le->ssid_le, ssid, sizeof(struct brcmf_ssid)); + if (ssid && ssid->SSID_len) { + params_le->ssid_le.SSID_len = cpu_to_le32(ssid->SSID_len); + memcpy(¶ms_le->ssid_le.SSID, ssid->SSID, ssid->SSID_len); + } } static s32 diff --git a/drivers/net/wireless/brcm80211/brcmsmac/channel.c b/drivers/net/wireless/brcm80211/brcmsmac/channel.c index 7ed7d757702..64a48f06d68 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/channel.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/channel.c @@ -77,7 +77,7 @@ NL80211_RRF_NO_IBSS) static const struct ieee80211_regdomain brcms_regdom_x2 = { - .n_reg_rules = 7, + .n_reg_rules = 6, .alpha2 = "X2", .reg_rules = { BRCM_2GHZ_2412_2462, diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 1e86ea2266d..dbeebef562d 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -1442,6 +1442,7 @@ static int iwl_trans_pcie_start_hw(struct iwl_trans *trans) return err; err_free_irq: + trans_pcie->irq_requested = false; free_irq(trans_pcie->irq, trans); error: iwl_free_isr_ict(trans); diff --git a/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.c b/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.c index 44febfde949..8a7b864faca 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.c +++ b/drivers/net/wireless/rtlwifi/rtl8192c/fw_common.c @@ -315,7 +315,7 @@ static void _rtl92c_fill_h2c_command(struct ieee80211_hw *hw, u16 box_reg = 0, box_extreg = 0; u8 u1b_tmp; bool isfw_read = false; - bool bwrite_sucess = false; + bool bwrite_success = false; u8 wait_h2c_limmit = 100; u8 wait_writeh2c_limmit = 100; u8 boxcontent[4], boxextcontent[2]; @@ -354,7 +354,7 @@ static void _rtl92c_fill_h2c_command(struct ieee80211_hw *hw, } } - while (!bwrite_sucess) { + while (!bwrite_success) { wait_writeh2c_limmit--; if (wait_writeh2c_limmit == 0) { RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, @@ -491,7 +491,7 @@ static void _rtl92c_fill_h2c_command(struct ieee80211_hw *hw, break; } - bwrite_sucess = true; + bwrite_success = true; rtlhal->last_hmeboxnum = boxnum + 1; if (rtlhal->last_hmeboxnum == 4) diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/def.h b/drivers/net/wireless/rtlwifi/rtl8192ce/def.h index 04c3aef8a4f..2925094b2d9 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192ce/def.h +++ b/drivers/net/wireless/rtlwifi/rtl8192ce/def.h @@ -117,6 +117,7 @@ #define CHIP_VER_B BIT(4) #define CHIP_92C_BITMASK BIT(0) +#define CHIP_UNKNOWN BIT(7) #define CHIP_92C_1T2R 0x03 #define CHIP_92C 0x01 #define CHIP_88C 0x00 diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c index bd0da7ef290..dd4bb0950a5 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c @@ -994,8 +994,16 @@ static enum version_8192c _rtl92ce_read_chip_version(struct ieee80211_hw *hw) version = (value32 & TYPE_ID) ? VERSION_A_CHIP_92C : VERSION_A_CHIP_88C; } else { - version = (value32 & TYPE_ID) ? VERSION_B_CHIP_92C : - VERSION_B_CHIP_88C; + version = (enum version_8192c) (CHIP_VER_B | + ((value32 & TYPE_ID) ? CHIP_92C_BITMASK : 0) | + ((value32 & VENDOR_ID) ? CHIP_VENDOR_UMC : 0)); + if ((!IS_CHIP_VENDOR_UMC(version)) && (value32 & + CHIP_VER_RTL_MASK)) { + version = (enum version_8192c)(version | + ((((value32 & CHIP_VER_RTL_MASK) == BIT(12)) + ? CHIP_VENDOR_UMC_B_CUT : CHIP_UNKNOWN) | + CHIP_VENDOR_UMC)); + } } switch (version) { diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c index 3aa927f8b9b..7d8f96405f4 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c @@ -162,10 +162,12 @@ int rtl92c_init_sw_vars(struct ieee80211_hw *hw) /* request fw */ if (IS_VENDOR_UMC_A_CUT(rtlhal->version) && - !IS_92C_SERIAL(rtlhal->version)) + !IS_92C_SERIAL(rtlhal->version)) { rtlpriv->cfg->fw_name = "rtlwifi/rtl8192cfwU.bin"; - else if (IS_81xxC_VENDOR_UMC_B_CUT(rtlhal->version)) + } else if (IS_81xxC_VENDOR_UMC_B_CUT(rtlhal->version)) { rtlpriv->cfg->fw_name = "rtlwifi/rtl8192cfwU_B.bin"; + pr_info("****** This B_CUT device may not work with kernels 3.6 and earlier\n"); + } rtlpriv->max_fw_size = 0x4000; pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name); diff --git a/drivers/net/wireless/rtlwifi/rtl8192de/fw.c b/drivers/net/wireless/rtlwifi/rtl8192de/fw.c index 895ae6c1f35..eb22dccc418 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192de/fw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192de/fw.c @@ -365,7 +365,7 @@ static void _rtl92d_fill_h2c_command(struct ieee80211_hw *hw, u8 u1b_tmp; bool isfw_read = false; u8 buf_index = 0; - bool bwrite_sucess = false; + bool bwrite_success = false; u8 wait_h2c_limmit = 100; u8 wait_writeh2c_limmit = 100; u8 boxcontent[4], boxextcontent[2]; @@ -408,7 +408,7 @@ static void _rtl92d_fill_h2c_command(struct ieee80211_hw *hw, break; } } - while (!bwrite_sucess) { + while (!bwrite_success) { wait_writeh2c_limmit--; if (wait_writeh2c_limmit == 0) { RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, @@ -515,7 +515,7 @@ static void _rtl92d_fill_h2c_command(struct ieee80211_hw *hw, "switch case not processed\n"); break; } - bwrite_sucess = true; + bwrite_success = true; rtlhal->last_hmeboxnum = boxnum + 1; if (rtlhal->last_hmeboxnum == 4) rtlhal->last_hmeboxnum = 0; diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index b8ef8ddcc29..8aa73fac6ad 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -451,14 +451,9 @@ static void wq_sync_buffer(struct work_struct *work) { struct oprofile_cpu_buffer *b = container_of(work, struct oprofile_cpu_buffer, work.work); - if (b->cpu != smp_processor_id()) { - printk(KERN_DEBUG "WQ on CPU%d, prefer CPU%d\n", - smp_processor_id(), b->cpu); - - if (!cpu_online(b->cpu)) { - cancel_delayed_work(&b->work); - return; - } + if (b->cpu != smp_processor_id() && !cpu_online(b->cpu)) { + cancel_delayed_work(&b->work); + return; } sync_buffer(b->cpu); diff --git a/drivers/pci/.gitignore b/drivers/pci/.gitignore deleted file mode 100644 index f297ca8d313..00000000000 --- a/drivers/pci/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -classlist.h -devlist.h -gen-devlist - diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 4e932cc695e..e98a5e7827d 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -33,9 +33,8 @@ config REGULATOR_DUMMY help If this option is enabled then when a regulator lookup fails and the board has not specified that it has provided full - constraints then the regulator core will provide an always - enabled dummy regulator will be provided, allowing consumer - drivers to continue. + constraints the regulator core will provide an always + enabled dummy regulator, allowing consumer drivers to continue. A warning will be generated when this substitution is done. @@ -50,11 +49,11 @@ config REGULATOR_VIRTUAL_CONSUMER tristate "Virtual regulator consumer support" help This driver provides a virtual consumer for the voltage and - current regulator API which provides sysfs controls for - configuring the supplies requested. This is mainly useful - for test purposes. + current regulator API which provides sysfs controls for + configuring the supplies requested. This is mainly useful + for test purposes. - If unsure, say no. + If unsure, say no. config REGULATOR_USERSPACE_CONSUMER tristate "Userspace regulator consumer support" @@ -63,7 +62,7 @@ config REGULATOR_USERSPACE_CONSUMER from user space. Userspace consumer driver provides ability to control power supplies for such devices. - If unsure, say no. + If unsure, say no. config REGULATOR_GPIO tristate "GPIO regulator support" @@ -110,6 +109,17 @@ config REGULATOR_DA9052 This driver supports the voltage regulators of DA9052-BC and DA9053-AA/Bx PMIC. +config REGULATOR_FAN53555 + tristate "Fairchild FAN53555 Regulator" + depends on I2C + select REGMAP_I2C + help + This driver supports Fairchild FAN53555 Digitally Programmable + TinyBuck Regulator. The FAN53555 is a step-down switching voltage + regulator that delivers a digitally programmable output from an + input voltage supply of 2.5V to 5.5V. The output voltage is + programmed through an I2C interface. + config REGULATOR_ANATOP tristate "Freescale i.MX on-chip ANATOP LDO regulators" depends on MFD_ANATOP @@ -172,6 +182,14 @@ config REGULATOR_MAX8660 This driver controls a Maxim 8660/8661 voltage output regulator via I2C bus. +config REGULATOR_MAX8907 + tristate "Maxim 8907 voltage regulator" + depends on MFD_MAX8907 + help + This driver controls a Maxim 8907 voltage output regulator + via I2C bus. The provided regulator is suitable for Tegra + chip to control Step-Down DC-DC and LDOs. + config REGULATOR_MAX8925 tristate "Maxim MAX8925 Power Management IC" depends on MFD_MAX8925 @@ -247,7 +265,7 @@ config REGULATOR_LP8788 config REGULATOR_PCF50633 tristate "NXP PCF50633 regulator driver" - depends on MFD_PCF50633 + depends on MFD_PCF50633 help Say Y here to support the voltage regulators and convertors on PCF50633 @@ -416,7 +434,7 @@ config REGULATOR_WM8350 depends on MFD_WM8350 help This driver provides support for the voltage and current regulators - of the WM8350 AudioPlus PMIC. + of the WM8350 AudioPlus PMIC. config REGULATOR_WM8400 tristate "Wolfson Microelectronics WM8400 AudioPlus PMIC" diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index 3342615cf25..e431eed8a87 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_REGULATOR_DA903X) += da903x.o obj-$(CONFIG_REGULATOR_DA9052) += da9052-regulator.o obj-$(CONFIG_REGULATOR_DBX500_PRCMU) += dbx500-prcmu.o obj-$(CONFIG_REGULATOR_DB8500_PRCMU) += db8500-prcmu.o +obj-$(CONFIG_REGULATOR_FAN53555) += fan53555.o obj-$(CONFIG_REGULATOR_GPIO) += gpio-regulator.o obj-$(CONFIG_REGULATOR_ISL6271A) += isl6271a-regulator.o obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o @@ -30,6 +31,7 @@ obj-$(CONFIG_REGULATOR_LP8788) += lp8788-ldo.o obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o obj-$(CONFIG_REGULATOR_MAX8649) += max8649.o obj-$(CONFIG_REGULATOR_MAX8660) += max8660.o +obj-$(CONFIG_REGULATOR_MAX8907) += max8907-regulator.o obj-$(CONFIG_REGULATOR_MAX8925) += max8925-regulator.o obj-$(CONFIG_REGULATOR_MAX8952) += max8952.o obj-$(CONFIG_REGULATOR_MAX8997) += max8997.o diff --git a/drivers/regulator/aat2870-regulator.c b/drivers/regulator/aat2870-regulator.c index 6f45bfd22e8..167c93f2198 100644 --- a/drivers/regulator/aat2870-regulator.c +++ b/drivers/regulator/aat2870-regulator.c @@ -162,7 +162,7 @@ static struct aat2870_regulator *aat2870_get_regulator(int id) static int aat2870_regulator_probe(struct platform_device *pdev) { struct aat2870_regulator *ri; - struct regulator_config config = { 0 }; + struct regulator_config config = { }; struct regulator_dev *rdev; ri = aat2870_get_regulator(pdev->id); diff --git a/drivers/regulator/ab3100.c b/drivers/regulator/ab3100.c index c151fd5d8c9..65ad2b36ce3 100644 --- a/drivers/regulator/ab3100.c +++ b/drivers/regulator/ab3100.c @@ -347,17 +347,11 @@ static int ab3100_get_voltage_regulator_external(struct regulator_dev *reg) return abreg->plfdata->external_voltage; } -static int ab3100_get_fixed_voltage_regulator(struct regulator_dev *reg) -{ - return reg->desc->min_uV; -} - static struct regulator_ops regulator_ops_fixed = { .list_voltage = regulator_list_voltage_linear, .enable = ab3100_enable_regulator, .disable = ab3100_disable_regulator, .is_enabled = ab3100_is_enabled_regulator, - .get_voltage = ab3100_get_fixed_voltage_regulator, }; static struct regulator_ops regulator_ops_variable = { diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c index 10f2f4d4d19..e3d1d063025 100644 --- a/drivers/regulator/ab8500.c +++ b/drivers/regulator/ab8500.c @@ -37,6 +37,7 @@ * @voltage_bank: bank to control regulator voltage * @voltage_reg: register to control regulator voltage * @voltage_mask: mask to control regulator voltage + * @voltage_shift: shift to control regulator voltage * @delay: startup/set voltage delay in us */ struct ab8500_regulator_info { @@ -50,6 +51,7 @@ struct ab8500_regulator_info { u8 voltage_bank; u8 voltage_reg; u8 voltage_mask; + u8 voltage_shift; unsigned int delay; }; @@ -195,17 +197,14 @@ static int ab8500_regulator_get_voltage_sel(struct regulator_dev *rdev) } dev_vdbg(rdev_get_dev(rdev), - "%s-get_voltage (bank, reg, mask, value): 0x%x, 0x%x, 0x%x," - " 0x%x\n", - info->desc.name, info->voltage_bank, info->voltage_reg, - info->voltage_mask, regval); + "%s-get_voltage (bank, reg, mask, shift, value): " + "0x%x, 0x%x, 0x%x, 0x%x, 0x%x\n", + info->desc.name, info->voltage_bank, + info->voltage_reg, info->voltage_mask, + info->voltage_shift, regval); - /* vintcore has a different layout */ val = regval & info->voltage_mask; - if (info->desc.id == AB8500_LDO_INTCORE) - return val >> 0x3; - else - return val; + return val >> info->voltage_shift; } static int ab8500_regulator_set_voltage_sel(struct regulator_dev *rdev, @@ -221,7 +220,7 @@ static int ab8500_regulator_set_voltage_sel(struct regulator_dev *rdev, } /* set the registers for the request */ - regval = (u8)selector; + regval = (u8)selector << info->voltage_shift; ret = abx500_mask_and_set_register_interruptible(info->dev, info->voltage_bank, info->voltage_reg, info->voltage_mask, regval); @@ -238,13 +237,6 @@ static int ab8500_regulator_set_voltage_sel(struct regulator_dev *rdev, return ret; } -static int ab8500_regulator_enable_time(struct regulator_dev *rdev) -{ - struct ab8500_regulator_info *info = rdev_get_drvdata(rdev); - - return info->delay; -} - static int ab8500_regulator_set_voltage_time_sel(struct regulator_dev *rdev, unsigned int old_sel, unsigned int new_sel) @@ -261,22 +253,14 @@ static struct regulator_ops ab8500_regulator_ops = { .get_voltage_sel = ab8500_regulator_get_voltage_sel, .set_voltage_sel = ab8500_regulator_set_voltage_sel, .list_voltage = regulator_list_voltage_table, - .enable_time = ab8500_regulator_enable_time, .set_voltage_time_sel = ab8500_regulator_set_voltage_time_sel, }; -static int ab8500_fixed_get_voltage(struct regulator_dev *rdev) -{ - return rdev->desc->min_uV; -} - static struct regulator_ops ab8500_regulator_fixed_ops = { .enable = ab8500_regulator_enable, .disable = ab8500_regulator_disable, .is_enabled = ab8500_regulator_is_enabled, - .get_voltage = ab8500_fixed_get_voltage, .list_voltage = regulator_list_voltage_linear, - .enable_time = ab8500_regulator_enable_time, }; static struct ab8500_regulator_info @@ -358,6 +342,7 @@ static struct ab8500_regulator_info .voltage_bank = 0x03, .voltage_reg = 0x80, .voltage_mask = 0x38, + .voltage_shift = 3, }, /* @@ -374,6 +359,7 @@ static struct ab8500_regulator_info .owner = THIS_MODULE, .n_voltages = 1, .min_uV = 2000000, + .enable_time = 10000, }, .delay = 10000, .update_bank = 0x03, diff --git a/drivers/regulator/arizona-ldo1.c b/drivers/regulator/arizona-ldo1.c index c8f95c07adb..d184aa35abc 100644 --- a/drivers/regulator/arizona-ldo1.c +++ b/drivers/regulator/arizona-ldo1.c @@ -39,6 +39,8 @@ static struct regulator_ops arizona_ldo1_ops = { .map_voltage = regulator_map_voltage_linear, .get_voltage_sel = regulator_get_voltage_sel_regmap, .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_bypass = regulator_get_bypass_regmap, + .set_bypass = regulator_set_bypass_regmap, }; static const struct regulator_desc arizona_ldo1 = { @@ -49,9 +51,11 @@ static const struct regulator_desc arizona_ldo1 = { .vsel_reg = ARIZONA_LDO1_CONTROL_1, .vsel_mask = ARIZONA_LDO1_VSEL_MASK, + .bypass_reg = ARIZONA_LDO1_CONTROL_1, + .bypass_mask = ARIZONA_LDO1_BYPASS, .min_uV = 900000, .uV_step = 50000, - .n_voltages = 7, + .n_voltages = 6, .owner = THIS_MODULE, }; diff --git a/drivers/regulator/arizona-micsupp.c b/drivers/regulator/arizona-micsupp.c index 450a069aa9b..d9b1f82cc5b 100644 --- a/drivers/regulator/arizona-micsupp.c +++ b/drivers/regulator/arizona-micsupp.c @@ -82,6 +82,9 @@ static struct regulator_ops arizona_micsupp_ops = { .get_voltage_sel = regulator_get_voltage_sel_regmap, .set_voltage_sel = regulator_set_voltage_sel_regmap, + + .get_bypass = regulator_get_bypass_regmap, + .set_bypass = regulator_set_bypass_regmap, }; static const struct regulator_desc arizona_micsupp = { @@ -95,6 +98,8 @@ static const struct regulator_desc arizona_micsupp = { .vsel_mask = ARIZONA_LDO2_VSEL_MASK, .enable_reg = ARIZONA_MIC_CHARGE_PUMP_1, .enable_mask = ARIZONA_CPMIC_ENA, + .bypass_reg = ARIZONA_MIC_CHARGE_PUMP_1, + .bypass_mask = ARIZONA_CPMIC_BYPASS, .owner = THIS_MODULE, }; diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 48385318175..2e0352dc26b 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -77,6 +77,7 @@ struct regulator { struct device *dev; struct list_head list; unsigned int always_on:1; + unsigned int bypass:1; int uA_load; int min_uV; int max_uV; @@ -394,6 +395,9 @@ static ssize_t regulator_status_show(struct device *dev, case REGULATOR_STATUS_STANDBY: label = "standby"; break; + case REGULATOR_STATUS_BYPASS: + label = "bypass"; + break; case REGULATOR_STATUS_UNDEFINED: label = "undefined"; break; @@ -585,6 +589,27 @@ static ssize_t regulator_suspend_standby_state_show(struct device *dev, static DEVICE_ATTR(suspend_standby_state, 0444, regulator_suspend_standby_state_show, NULL); +static ssize_t regulator_bypass_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct regulator_dev *rdev = dev_get_drvdata(dev); + const char *report; + bool bypass; + int ret; + + ret = rdev->desc->ops->get_bypass(rdev, &bypass); + + if (ret != 0) + report = "unknown"; + else if (bypass) + report = "enabled"; + else + report = "disabled"; + + return sprintf(buf, "%s\n", report); +} +static DEVICE_ATTR(bypass, 0444, + regulator_bypass_show, NULL); /* * These are the only attributes are present for all regulators. @@ -778,6 +803,9 @@ static void print_constraints(struct regulator_dev *rdev) if (constraints->valid_modes_mask & REGULATOR_MODE_STANDBY) count += sprintf(buf + count, "standby"); + if (!count) + sprintf(buf, "no parameters"); + rdev_info(rdev, "%s\n", buf); if ((constraints->min_uV != constraints->max_uV) && @@ -974,6 +1002,7 @@ static int set_supply(struct regulator_dev *rdev, err = -ENOMEM; return err; } + supply_rdev->open_count++; return 0; } @@ -1720,6 +1749,9 @@ int regulator_disable_deferred(struct regulator *regulator, int ms) if (regulator->always_on) return 0; + if (!ms) + return regulator_disable(regulator); + mutex_lock(&rdev->mutex); rdev->deferred_disables++; mutex_unlock(&rdev->mutex); @@ -2178,9 +2210,12 @@ static int _regulator_do_set_voltage(struct regulator_dev *rdev, } } - if (ret == 0 && best_val >= 0) + if (ret == 0 && best_val >= 0) { + unsigned long data = best_val; + _notifier_call_chain(rdev, REGULATOR_EVENT_VOLTAGE_CHANGE, - (void *)best_val); + (void *)data); + } trace_regulator_set_voltage_complete(rdev_get_name(rdev), best_val); @@ -2291,8 +2326,8 @@ int regulator_set_voltage_time(struct regulator *regulator, EXPORT_SYMBOL_GPL(regulator_set_voltage_time); /** - *regulator_set_voltage_time_sel - get raise/fall time - * @regulator: regulator source + * regulator_set_voltage_time_sel - get raise/fall time + * @rdev: regulator source device * @old_selector: selector for starting voltage * @new_selector: selector for target voltage * @@ -2388,6 +2423,8 @@ static int _regulator_get_voltage(struct regulator_dev *rdev) ret = rdev->desc->ops->list_voltage(rdev, sel); } else if (rdev->desc->ops->get_voltage) { ret = rdev->desc->ops->get_voltage(rdev); + } else if (rdev->desc->ops->list_voltage) { + ret = rdev->desc->ops->list_voltage(rdev, 0); } else { return -EINVAL; } @@ -2674,6 +2711,100 @@ out: EXPORT_SYMBOL_GPL(regulator_set_optimum_mode); /** + * regulator_set_bypass_regmap - Default set_bypass() using regmap + * + * @rdev: device to operate on. + * @enable: state to set. + */ +int regulator_set_bypass_regmap(struct regulator_dev *rdev, bool enable) +{ + unsigned int val; + + if (enable) + val = rdev->desc->bypass_mask; + else + val = 0; + + return regmap_update_bits(rdev->regmap, rdev->desc->bypass_reg, + rdev->desc->bypass_mask, val); +} +EXPORT_SYMBOL_GPL(regulator_set_bypass_regmap); + +/** + * regulator_get_bypass_regmap - Default get_bypass() using regmap + * + * @rdev: device to operate on. + * @enable: current state. + */ +int regulator_get_bypass_regmap(struct regulator_dev *rdev, bool *enable) +{ + unsigned int val; + int ret; + + ret = regmap_read(rdev->regmap, rdev->desc->bypass_reg, &val); + if (ret != 0) + return ret; + + *enable = val & rdev->desc->bypass_mask; + + return 0; +} +EXPORT_SYMBOL_GPL(regulator_get_bypass_regmap); + +/** + * regulator_allow_bypass - allow the regulator to go into bypass mode + * + * @regulator: Regulator to configure + * @allow: enable or disable bypass mode + * + * Allow the regulator to go into bypass mode if all other consumers + * for the regulator also enable bypass mode and the machine + * constraints allow this. Bypass mode means that the regulator is + * simply passing the input directly to the output with no regulation. + */ +int regulator_allow_bypass(struct regulator *regulator, bool enable) +{ + struct regulator_dev *rdev = regulator->rdev; + int ret = 0; + + if (!rdev->desc->ops->set_bypass) + return 0; + + if (rdev->constraints && + !(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_BYPASS)) + return 0; + + mutex_lock(&rdev->mutex); + + if (enable && !regulator->bypass) { + rdev->bypass_count++; + + if (rdev->bypass_count == rdev->open_count) { + ret = rdev->desc->ops->set_bypass(rdev, enable); + if (ret != 0) + rdev->bypass_count--; + } + + } else if (!enable && regulator->bypass) { + rdev->bypass_count--; + + if (rdev->bypass_count != rdev->open_count) { + ret = rdev->desc->ops->set_bypass(rdev, enable); + if (ret != 0) + rdev->bypass_count++; + } + } + + if (ret == 0) + regulator->bypass = enable; + + mutex_unlock(&rdev->mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(regulator_allow_bypass); + +/** * regulator_register_notifier - register regulator event notifier * @regulator: regulator source * @nb: notifier block @@ -3011,7 +3142,8 @@ static int add_regulator_attributes(struct regulator_dev *rdev) /* some attributes need specific methods to be displayed */ if ((ops->get_voltage && ops->get_voltage(rdev) >= 0) || - (ops->get_voltage_sel && ops->get_voltage_sel(rdev) >= 0)) { + (ops->get_voltage_sel && ops->get_voltage_sel(rdev) >= 0) || + (ops->list_voltage && ops->list_voltage(rdev, 0) >= 0)) { status = device_create_file(dev, &dev_attr_microvolts); if (status < 0) return status; @@ -3036,6 +3168,11 @@ static int add_regulator_attributes(struct regulator_dev *rdev) if (status < 0) return status; } + if (ops->get_bypass) { + status = device_create_file(dev, &dev_attr_bypass); + if (status < 0) + return status; + } /* some attributes are type-specific */ if (rdev->desc->type == REGULATOR_CURRENT) { @@ -3124,6 +3261,8 @@ static void rdev_init_debugfs(struct regulator_dev *rdev) &rdev->use_count); debugfs_create_u32("open_count", 0444, rdev->debugfs, &rdev->open_count); + debugfs_create_u32("bypass_count", 0444, rdev->debugfs, + &rdev->bypass_count); } /** @@ -3189,8 +3328,10 @@ regulator_register(const struct regulator_desc *regulator_desc, rdev->desc = regulator_desc; if (config->regmap) rdev->regmap = config->regmap; - else + else if (dev_get_regmap(dev, NULL)) rdev->regmap = dev_get_regmap(dev, NULL); + else if (dev->parent) + rdev->regmap = dev_get_regmap(dev->parent, NULL); INIT_LIST_HEAD(&rdev->consumer_list); INIT_LIST_HEAD(&rdev->list); BLOCKING_INIT_NOTIFIER_HEAD(&rdev->notifier); diff --git a/drivers/regulator/da9052-regulator.c b/drivers/regulator/da9052-regulator.c index 903299cf15c..27355b1199e 100644 --- a/drivers/regulator/da9052-regulator.c +++ b/drivers/regulator/da9052-regulator.c @@ -133,8 +133,8 @@ static int da9052_dcdc_set_current_limit(struct regulator_dev *rdev, int min_uA, max_uA < da9052_current_limits[row][DA9052_MIN_UA]) return -EINVAL; - for (i = 0; i < DA9052_CURRENT_RANGE; i++) { - if (min_uA <= da9052_current_limits[row][i]) { + for (i = DA9052_CURRENT_RANGE - 1; i >= 0; i--) { + if (da9052_current_limits[row][i] <= max_uA) { reg_val = i; break; } diff --git a/drivers/regulator/dummy.c b/drivers/regulator/dummy.c index 86f655c7f7a..03a1d7c11ef 100644 --- a/drivers/regulator/dummy.c +++ b/drivers/regulator/dummy.c @@ -30,7 +30,7 @@ static struct regulator_init_data dummy_initdata; static struct regulator_ops dummy_ops; static struct regulator_desc dummy_desc = { - .name = "dummy", + .name = "regulator-dummy", .id = -1, .type = REGULATOR_VOLTAGE, .owner = THIS_MODULE, diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c new file mode 100644 index 00000000000..339f4d732e9 --- /dev/null +++ b/drivers/regulator/fan53555.c @@ -0,0 +1,322 @@ +/* + * FAN53555 Fairchild Digitally Programmable TinyBuck Regulator Driver. + * + * Supported Part Numbers: + * FAN53555UC00X/01X/03X/04X/05X + * + * Copyright (c) 2012 Marvell Technology Ltd. + * Yunfan Zhang <yfzhang@marvell.com> + * + * This package is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/module.h> +#include <linux/param.h> +#include <linux/err.h> +#include <linux/platform_device.h> +#include <linux/regulator/driver.h> +#include <linux/regulator/machine.h> +#include <linux/i2c.h> +#include <linux/slab.h> +#include <linux/regmap.h> +#include <linux/regulator/fan53555.h> + +/* Voltage setting */ +#define FAN53555_VSEL0 0x00 +#define FAN53555_VSEL1 0x01 +/* Control register */ +#define FAN53555_CONTROL 0x02 +/* IC Type */ +#define FAN53555_ID1 0x03 +/* IC mask version */ +#define FAN53555_ID2 0x04 +/* Monitor register */ +#define FAN53555_MONITOR 0x05 + +/* VSEL bit definitions */ +#define VSEL_BUCK_EN (1 << 7) +#define VSEL_MODE (1 << 6) +#define VSEL_NSEL_MASK 0x3F +/* Chip ID and Verison */ +#define DIE_ID 0x0F /* ID1 */ +#define DIE_REV 0x0F /* ID2 */ +/* Control bit definitions */ +#define CTL_OUTPUT_DISCHG (1 << 7) +#define CTL_SLEW_MASK (0x7 << 4) +#define CTL_SLEW_SHIFT 4 +#define CTL_RESET (1 << 2) + +#define FAN53555_NVOLTAGES 64 /* Numbers of voltages */ + +/* IC Type */ +enum { + FAN53555_CHIP_ID_00 = 0, + FAN53555_CHIP_ID_01, + FAN53555_CHIP_ID_02, + FAN53555_CHIP_ID_03, + FAN53555_CHIP_ID_04, + FAN53555_CHIP_ID_05, +}; + +struct fan53555_device_info { + struct regmap *regmap; + struct device *dev; + struct regulator_desc desc; + struct regulator_dev *rdev; + struct regulator_init_data *regulator; + /* IC Type and Rev */ + int chip_id; + int chip_rev; + /* Voltage setting register */ + unsigned int vol_reg; + unsigned int sleep_reg; + /* Voltage range and step(linear) */ + unsigned int vsel_min; + unsigned int vsel_step; + /* Voltage slew rate limiting */ + unsigned int slew_rate; + /* Sleep voltage cache */ + unsigned int sleep_vol_cache; +}; + +static int fan53555_set_suspend_voltage(struct regulator_dev *rdev, int uV) +{ + struct fan53555_device_info *di = rdev_get_drvdata(rdev); + int ret; + + if (di->sleep_vol_cache == uV) + return 0; + ret = regulator_map_voltage_linear(rdev, uV, uV); + if (ret < 0) + return -EINVAL; + ret = regmap_update_bits(di->regmap, di->sleep_reg, + VSEL_NSEL_MASK, ret); + if (ret < 0) + return -EINVAL; + /* Cache the sleep voltage setting. + * Might not be the real voltage which is rounded */ + di->sleep_vol_cache = uV; + + return 0; +} + +static int fan53555_set_mode(struct regulator_dev *rdev, unsigned int mode) +{ + struct fan53555_device_info *di = rdev_get_drvdata(rdev); + + switch (mode) { + case REGULATOR_MODE_FAST: + regmap_update_bits(di->regmap, di->vol_reg, + VSEL_MODE, VSEL_MODE); + break; + case REGULATOR_MODE_NORMAL: + regmap_update_bits(di->regmap, di->vol_reg, VSEL_MODE, 0); + break; + default: + return -EINVAL; + } + return 0; +} + +static unsigned int fan53555_get_mode(struct regulator_dev *rdev) +{ + struct fan53555_device_info *di = rdev_get_drvdata(rdev); + unsigned int val; + int ret = 0; + + ret = regmap_read(di->regmap, di->vol_reg, &val); + if (ret < 0) + return ret; + if (val & VSEL_MODE) + return REGULATOR_MODE_FAST; + else + return REGULATOR_MODE_NORMAL; +} + +static struct regulator_ops fan53555_regulator_ops = { + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .map_voltage = regulator_map_voltage_linear, + .list_voltage = regulator_list_voltage_linear, + .set_suspend_voltage = fan53555_set_suspend_voltage, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .set_mode = fan53555_set_mode, + .get_mode = fan53555_get_mode, +}; + +/* For 00,01,03,05 options: + * VOUT = 0.60V + NSELx * 10mV, from 0.60 to 1.23V. + * For 04 option: + * VOUT = 0.603V + NSELx * 12.826mV, from 0.603 to 1.411V. + * */ +static int fan53555_device_setup(struct fan53555_device_info *di, + struct fan53555_platform_data *pdata) +{ + unsigned int reg, data, mask; + + /* Setup voltage control register */ + switch (pdata->sleep_vsel_id) { + case FAN53555_VSEL_ID_0: + di->sleep_reg = FAN53555_VSEL0; + di->vol_reg = FAN53555_VSEL1; + break; + case FAN53555_VSEL_ID_1: + di->sleep_reg = FAN53555_VSEL1; + di->vol_reg = FAN53555_VSEL0; + break; + default: + dev_err(di->dev, "Invalid VSEL ID!\n"); + return -EINVAL; + } + /* Init voltage range and step */ + switch (di->chip_id) { + case FAN53555_CHIP_ID_00: + case FAN53555_CHIP_ID_01: + case FAN53555_CHIP_ID_03: + case FAN53555_CHIP_ID_05: + di->vsel_min = 600000; + di->vsel_step = 10000; + break; + case FAN53555_CHIP_ID_04: + di->vsel_min = 603000; + di->vsel_step = 12826; + break; + default: + dev_err(di->dev, + "Chip ID[%d]\n not supported!\n", di->chip_id); + return -EINVAL; + } + /* Init slew rate */ + if (pdata->slew_rate & 0x7) + di->slew_rate = pdata->slew_rate; + else + di->slew_rate = FAN53555_SLEW_RATE_64MV; + reg = FAN53555_CONTROL; + data = di->slew_rate << CTL_SLEW_SHIFT; + mask = CTL_SLEW_MASK; + return regmap_update_bits(di->regmap, reg, mask, data); +} + +static int fan53555_regulator_register(struct fan53555_device_info *di, + struct regulator_config *config) +{ + struct regulator_desc *rdesc = &di->desc; + + rdesc->name = "fan53555-reg"; + rdesc->ops = &fan53555_regulator_ops; + rdesc->type = REGULATOR_VOLTAGE; + rdesc->n_voltages = FAN53555_NVOLTAGES; + rdesc->enable_reg = di->vol_reg; + rdesc->enable_mask = VSEL_BUCK_EN; + rdesc->min_uV = di->vsel_min; + rdesc->uV_step = di->vsel_step; + rdesc->vsel_reg = di->vol_reg; + rdesc->vsel_mask = VSEL_NSEL_MASK; + rdesc->owner = THIS_MODULE; + + di->rdev = regulator_register(&di->desc, config); + if (IS_ERR(di->rdev)) + return PTR_ERR(di->rdev); + return 0; + +} + +static struct regmap_config fan53555_regmap_config = { + .reg_bits = 8, + .val_bits = 8, +}; + +static int __devinit fan53555_regulator_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct fan53555_device_info *di; + struct fan53555_platform_data *pdata; + struct regulator_config config = { }; + unsigned int val; + int ret; + + pdata = client->dev.platform_data; + if (!pdata || !pdata->regulator) { + dev_err(&client->dev, "Platform data not found!\n"); + return -ENODEV; + } + + di = devm_kzalloc(&client->dev, sizeof(struct fan53555_device_info), + GFP_KERNEL); + if (!di) { + dev_err(&client->dev, "Failed to allocate device info data!\n"); + return -ENOMEM; + } + di->regmap = devm_regmap_init_i2c(client, &fan53555_regmap_config); + if (IS_ERR(di->regmap)) { + dev_err(&client->dev, "Failed to allocate regmap!\n"); + return PTR_ERR(di->regmap); + } + di->dev = &client->dev; + di->regulator = pdata->regulator; + i2c_set_clientdata(client, di); + /* Get chip ID */ + ret = regmap_read(di->regmap, FAN53555_ID1, &val); + if (ret < 0) { + dev_err(&client->dev, "Failed to get chip ID!\n"); + return -ENODEV; + } + di->chip_id = val & DIE_ID; + /* Get chip revision */ + ret = regmap_read(di->regmap, FAN53555_ID2, &val); + if (ret < 0) { + dev_err(&client->dev, "Failed to get chip Rev!\n"); + return -ENODEV; + } + di->chip_rev = val & DIE_REV; + dev_info(&client->dev, "FAN53555 Option[%d] Rev[%d] Detected!\n", + di->chip_id, di->chip_rev); + /* Device init */ + ret = fan53555_device_setup(di, pdata); + if (ret < 0) { + dev_err(&client->dev, "Failed to setup device!\n"); + return ret; + } + /* Register regulator */ + config.dev = di->dev; + config.init_data = di->regulator; + config.regmap = di->regmap; + config.driver_data = di; + ret = fan53555_regulator_register(di, &config); + if (ret < 0) + dev_err(&client->dev, "Failed to register regulator!\n"); + return ret; + +} + +static int __devexit fan53555_regulator_remove(struct i2c_client *client) +{ + struct fan53555_device_info *di = i2c_get_clientdata(client); + + regulator_unregister(di->rdev); + return 0; +} + +static const struct i2c_device_id fan53555_id[] = { + {"fan53555", -1}, + { }, +}; + +static struct i2c_driver fan53555_regulator_driver = { + .driver = { + .name = "fan53555-regulator", + }, + .probe = fan53555_regulator_probe, + .remove = __devexit_p(fan53555_regulator_remove), + .id_table = fan53555_id, +}; + +module_i2c_driver(fan53555_regulator_driver); + +MODULE_AUTHOR("Yunfan Zhang <yfzhang@marvell.com>"); +MODULE_DESCRIPTION("FAN53555 regulator driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/regulator/isl6271a-regulator.c b/drivers/regulator/isl6271a-regulator.c index 1d145a07ada..d8ecf49a577 100644 --- a/drivers/regulator/isl6271a-regulator.c +++ b/drivers/regulator/isl6271a-regulator.c @@ -73,13 +73,7 @@ static struct regulator_ops isl_core_ops = { .map_voltage = regulator_map_voltage_linear, }; -static int isl6271a_get_fixed_voltage(struct regulator_dev *dev) -{ - return dev->desc->min_uV; -} - static struct regulator_ops isl_fixed_ops = { - .get_voltage = isl6271a_get_fixed_voltage, .list_voltage = regulator_list_voltage_linear, }; diff --git a/drivers/regulator/lp872x.c b/drivers/regulator/lp872x.c index 212c38eaba7..708f4b6a17d 100644 --- a/drivers/regulator/lp872x.c +++ b/drivers/regulator/lp872x.c @@ -86,6 +86,10 @@ #define EXTERN_DVS_USED 0 #define MAX_DELAY 6 +/* Default DVS Mode */ +#define LP8720_DEFAULT_DVS 0 +#define LP8725_DEFAULT_DVS BIT(2) + /* dump registers in regmap-debugfs */ #define MAX_REGISTERS 0x0F @@ -269,9 +273,9 @@ static int lp872x_regulator_enable_time(struct regulator_dev *rdev) return val > MAX_DELAY ? 0 : val * time_step_us; } -static void lp872x_set_dvs(struct lp872x *lp, int gpio) +static void lp872x_set_dvs(struct lp872x *lp, enum lp872x_dvs_sel dvs_sel, + int gpio) { - enum lp872x_dvs_sel dvs_sel = lp->pdata->dvs->vsel; enum lp872x_dvs_state state; state = dvs_sel == SEL_V1 ? DVS_HIGH : DVS_LOW; @@ -339,10 +343,10 @@ static int lp872x_buck_set_voltage_sel(struct regulator_dev *rdev, struct lp872x *lp = rdev_get_drvdata(rdev); enum lp872x_regulator_id buck = rdev_get_id(rdev); u8 addr, mask = LP872X_VOUT_M; - struct lp872x_dvs *dvs = lp->pdata->dvs; + struct lp872x_dvs *dvs = lp->pdata ? lp->pdata->dvs : NULL; if (dvs && gpio_is_valid(dvs->gpio)) - lp872x_set_dvs(lp, dvs->gpio); + lp872x_set_dvs(lp, dvs->vsel, dvs->gpio); addr = lp872x_select_buck_vout_addr(lp, buck); if (!lp872x_is_valid_buck_addr(addr)) @@ -374,8 +378,8 @@ static int lp8725_buck_set_current_limit(struct regulator_dev *rdev, { struct lp872x *lp = rdev_get_drvdata(rdev); enum lp872x_regulator_id buck = rdev_get_id(rdev); - int i, max = ARRAY_SIZE(lp8725_buck_uA); - u8 addr, val; + int i; + u8 addr; switch (buck) { case LP8725_ID_BUCK1: @@ -388,17 +392,15 @@ static int lp8725_buck_set_current_limit(struct regulator_dev *rdev, return -EINVAL; } - for (i = 0 ; i < max ; i++) + for (i = ARRAY_SIZE(lp8725_buck_uA) - 1 ; i >= 0; i--) { if (lp8725_buck_uA[i] >= min_uA && lp8725_buck_uA[i] <= max_uA) - break; - - if (i == max) - return -EINVAL; - - val = i << LP8725_BUCK_CL_S; + return lp872x_update_bits(lp, addr, + LP8725_BUCK_CL_M, + i << LP8725_BUCK_CL_S); + } - return lp872x_update_bits(lp, addr, LP8725_BUCK_CL_M, val); + return -EINVAL; } static int lp8725_buck_get_current_limit(struct regulator_dev *rdev) @@ -727,39 +729,16 @@ static struct regulator_desc lp8725_regulator_desc[] = { }, }; -static int lp872x_check_dvs_validity(struct lp872x *lp) -{ - struct lp872x_dvs *dvs = lp->pdata->dvs; - u8 val = 0; - int ret; - - ret = lp872x_read_byte(lp, LP872X_GENERAL_CFG, &val); - if (ret) - return ret; - - ret = 0; - if (lp->chipid == LP8720) { - if (val & LP8720_EXT_DVS_M) - ret = dvs ? 0 : -EINVAL; - } else { - if ((val & LP8725_DVS1_M) == EXTERN_DVS_USED) - ret = dvs ? 0 : -EINVAL; - } - - return ret; -} - static int lp872x_init_dvs(struct lp872x *lp) { int ret, gpio; - struct lp872x_dvs *dvs = lp->pdata->dvs; + struct lp872x_dvs *dvs = lp->pdata ? lp->pdata->dvs : NULL; enum lp872x_dvs_state pinstate; + u8 mask[] = { LP8720_EXT_DVS_M, LP8725_DVS1_M | LP8725_DVS2_M }; + u8 default_dvs_mode[] = { LP8720_DEFAULT_DVS, LP8725_DEFAULT_DVS }; - ret = lp872x_check_dvs_validity(lp); - if (ret) { - dev_warn(lp->dev, "invalid dvs data: %d\n", ret); - return ret; - } + if (!dvs) + goto set_default_dvs_mode; gpio = dvs->gpio; if (!gpio_is_valid(gpio)) { @@ -778,6 +757,10 @@ static int lp872x_init_dvs(struct lp872x *lp) lp->dvs_gpio = gpio; return 0; + +set_default_dvs_mode: + return lp872x_update_bits(lp, LP872X_GENERAL_CFG, mask[lp->chipid], + default_dvs_mode[lp->chipid]); } static int lp872x_config(struct lp872x *lp) @@ -785,24 +768,29 @@ static int lp872x_config(struct lp872x *lp) struct lp872x_platform_data *pdata = lp->pdata; int ret; - if (!pdata->update_config) - return 0; + if (!pdata || !pdata->update_config) + goto init_dvs; ret = lp872x_write_byte(lp, LP872X_GENERAL_CFG, pdata->general_config); if (ret) return ret; +init_dvs: return lp872x_init_dvs(lp); } static struct regulator_init_data *lp872x_find_regulator_init_data(int id, struct lp872x *lp) { + struct lp872x_platform_data *pdata = lp->pdata; int i; + if (!pdata) + return NULL; + for (i = 0; i < lp->num_regulators; i++) { - if (lp->pdata->regulator_data[i].id == id) - return lp->pdata->regulator_data[i].init_data; + if (pdata->regulator_data[i].id == id) + return pdata->regulator_data[i].init_data; } return NULL; @@ -863,18 +851,12 @@ static const struct regmap_config lp872x_regmap_config = { static int lp872x_probe(struct i2c_client *cl, const struct i2c_device_id *id) { struct lp872x *lp; - struct lp872x_platform_data *pdata = cl->dev.platform_data; int ret, size, num_regulators; const int lp872x_num_regulators[] = { [LP8720] = LP8720_NUM_REGULATORS, [LP8725] = LP8725_NUM_REGULATORS, }; - if (!pdata) { - dev_err(&cl->dev, "no platform data\n"); - return -EINVAL; - } - lp = devm_kzalloc(&cl->dev, sizeof(struct lp872x), GFP_KERNEL); if (!lp) goto err_mem; @@ -894,7 +876,7 @@ static int lp872x_probe(struct i2c_client *cl, const struct i2c_device_id *id) } lp->dev = &cl->dev; - lp->pdata = pdata; + lp->pdata = cl->dev.platform_data; lp->chipid = id->driver_data; lp->num_regulators = num_regulators; i2c_set_clientdata(cl, lp); diff --git a/drivers/regulator/lp8788-buck.c b/drivers/regulator/lp8788-buck.c index 6356e821400..ba3e0aa402d 100644 --- a/drivers/regulator/lp8788-buck.c +++ b/drivers/regulator/lp8788-buck.c @@ -69,6 +69,9 @@ #define PIN_HIGH 1 #define ENABLE_TIME_USEC 32 +#define BUCK_FPWM_MASK(x) (1 << (x)) +#define BUCK_FPWM_SHIFT(x) (x) + enum lp8788_dvs_state { DVS_LOW = GPIOF_OUT_INIT_LOW, DVS_HIGH = GPIOF_OUT_INIT_HIGH, @@ -86,15 +89,9 @@ enum lp8788_buck_id { BUCK4, }; -struct lp8788_pwm_map { - u8 mask; - u8 shift; -}; - struct lp8788_buck { struct lp8788 *lp; struct regulator_dev *regulator; - struct lp8788_pwm_map *pmap; void *dvs; }; @@ -106,29 +103,6 @@ static const int lp8788_buck_vtbl[] = { 1950000, 2000000, }; -/* buck pwm mode selection : used for set/get_mode in regulator ops - * @forced pwm : fast mode - * @auto pwm : normal mode - */ -static struct lp8788_pwm_map buck_pmap[] = { - [BUCK1] = { - .mask = LP8788_FPWM_BUCK1_M, - .shift = LP8788_FPWM_BUCK1_S, - }, - [BUCK2] = { - .mask = LP8788_FPWM_BUCK2_M, - .shift = LP8788_FPWM_BUCK2_S, - }, - [BUCK3] = { - .mask = LP8788_FPWM_BUCK3_M, - .shift = LP8788_FPWM_BUCK3_S, - }, - [BUCK4] = { - .mask = LP8788_FPWM_BUCK4_M, - .shift = LP8788_FPWM_BUCK4_S, - }, -}; - static const u8 buck1_vout_addr[] = { LP8788_BUCK1_VOUT0, LP8788_BUCK1_VOUT1, LP8788_BUCK1_VOUT2, LP8788_BUCK1_VOUT3, @@ -347,41 +321,37 @@ static int lp8788_buck_enable_time(struct regulator_dev *rdev) static int lp8788_buck_set_mode(struct regulator_dev *rdev, unsigned int mode) { struct lp8788_buck *buck = rdev_get_drvdata(rdev); - struct lp8788_pwm_map *pmap = buck->pmap; - u8 val; - - if (!pmap) - return -EINVAL; + enum lp8788_buck_id id = rdev_get_id(rdev); + u8 mask, val; + mask = BUCK_FPWM_MASK(id); switch (mode) { case REGULATOR_MODE_FAST: - val = LP8788_FORCE_PWM << pmap->shift; + val = LP8788_FORCE_PWM << BUCK_FPWM_SHIFT(id); break; case REGULATOR_MODE_NORMAL: - val = LP8788_AUTO_PWM << pmap->shift; + val = LP8788_AUTO_PWM << BUCK_FPWM_SHIFT(id); break; default: return -EINVAL; } - return lp8788_update_bits(buck->lp, LP8788_BUCK_PWM, pmap->mask, val); + return lp8788_update_bits(buck->lp, LP8788_BUCK_PWM, mask, val); } static unsigned int lp8788_buck_get_mode(struct regulator_dev *rdev) { struct lp8788_buck *buck = rdev_get_drvdata(rdev); - struct lp8788_pwm_map *pmap = buck->pmap; + enum lp8788_buck_id id = rdev_get_id(rdev); u8 val; int ret; - if (!pmap) - return -EINVAL; - ret = lp8788_read_byte(buck->lp, LP8788_BUCK_PWM, &val); if (ret) return ret; - return val & pmap->mask ? REGULATOR_MODE_FAST : REGULATOR_MODE_NORMAL; + return val & BUCK_FPWM_MASK(id) ? + REGULATOR_MODE_FAST : REGULATOR_MODE_NORMAL; } static struct regulator_ops lp8788_buck12_ops = { @@ -459,27 +429,6 @@ static struct regulator_desc lp8788_buck_desc[] = { }, }; -static int lp8788_set_default_dvs_ctrl_mode(struct lp8788 *lp, - enum lp8788_buck_id id) -{ - u8 mask, val; - - switch (id) { - case BUCK1: - mask = LP8788_BUCK1_DVS_SEL_M; - val = LP8788_BUCK1_DVS_I2C; - break; - case BUCK2: - mask = LP8788_BUCK2_DVS_SEL_M; - val = LP8788_BUCK2_DVS_I2C; - break; - default: - return 0; - } - - return lp8788_update_bits(lp, LP8788_BUCK_DVS_SEL, mask, val); -} - static int _gpio_request(struct lp8788_buck *buck, int gpio, char *name) { struct device *dev = buck->lp->dev; @@ -530,6 +479,7 @@ static int lp8788_init_dvs(struct lp8788_buck *buck, enum lp8788_buck_id id) struct lp8788_platform_data *pdata = buck->lp->pdata; u8 mask[] = { LP8788_BUCK1_DVS_SEL_M, LP8788_BUCK2_DVS_SEL_M }; u8 val[] = { LP8788_BUCK1_DVS_PIN, LP8788_BUCK2_DVS_PIN }; + u8 default_dvs_mode[] = { LP8788_BUCK1_DVS_I2C, LP8788_BUCK2_DVS_I2C }; /* no dvs for buck3, 4 */ if (id == BUCK3 || id == BUCK4) @@ -550,7 +500,8 @@ static int lp8788_init_dvs(struct lp8788_buck *buck, enum lp8788_buck_id id) val[id]); set_default_dvs_mode: - return lp8788_set_default_dvs_ctrl_mode(buck->lp, id); + return lp8788_update_bits(buck->lp, LP8788_BUCK_DVS_SEL, mask[id], + default_dvs_mode[id]); } static __devinit int lp8788_buck_probe(struct platform_device *pdev) @@ -567,7 +518,6 @@ static __devinit int lp8788_buck_probe(struct platform_device *pdev) return -ENOMEM; buck->lp = lp; - buck->pmap = &buck_pmap[id]; ret = lp8788_init_dvs(buck, id); if (ret) diff --git a/drivers/regulator/lp8788-ldo.c b/drivers/regulator/lp8788-ldo.c index d2122e41a96..6796eeb47dc 100644 --- a/drivers/regulator/lp8788-ldo.c +++ b/drivers/regulator/lp8788-ldo.c @@ -496,6 +496,7 @@ static struct regulator_desc lp8788_dldo_desc[] = { .name = "dldo12", .id = DLDO12, .ops = &lp8788_ldo_voltage_fixed_ops, + .n_voltages = 1, .type = REGULATOR_VOLTAGE, .owner = THIS_MODULE, .enable_reg = LP8788_EN_LDO_B, @@ -521,6 +522,7 @@ static struct regulator_desc lp8788_aldo_desc[] = { .name = "aldo2", .id = ALDO2, .ops = &lp8788_ldo_voltage_fixed_ops, + .n_voltages = 1, .type = REGULATOR_VOLTAGE, .owner = THIS_MODULE, .enable_reg = LP8788_EN_LDO_B, @@ -530,6 +532,7 @@ static struct regulator_desc lp8788_aldo_desc[] = { .name = "aldo3", .id = ALDO3, .ops = &lp8788_ldo_voltage_fixed_ops, + .n_voltages = 1, .type = REGULATOR_VOLTAGE, .owner = THIS_MODULE, .enable_reg = LP8788_EN_LDO_B, @@ -539,6 +542,7 @@ static struct regulator_desc lp8788_aldo_desc[] = { .name = "aldo4", .id = ALDO4, .ops = &lp8788_ldo_voltage_fixed_ops, + .n_voltages = 1, .type = REGULATOR_VOLTAGE, .owner = THIS_MODULE, .enable_reg = LP8788_EN_LDO_B, @@ -548,6 +552,7 @@ static struct regulator_desc lp8788_aldo_desc[] = { .name = "aldo5", .id = ALDO5, .ops = &lp8788_ldo_voltage_fixed_ops, + .n_voltages = 1, .type = REGULATOR_VOLTAGE, .owner = THIS_MODULE, .enable_reg = LP8788_EN_LDO_C, @@ -583,6 +588,7 @@ static struct regulator_desc lp8788_aldo_desc[] = { .name = "aldo8", .id = ALDO8, .ops = &lp8788_ldo_voltage_fixed_ops, + .n_voltages = 1, .type = REGULATOR_VOLTAGE, .owner = THIS_MODULE, .enable_reg = LP8788_EN_LDO_C, @@ -592,6 +598,7 @@ static struct regulator_desc lp8788_aldo_desc[] = { .name = "aldo9", .id = ALDO9, .ops = &lp8788_ldo_voltage_fixed_ops, + .n_voltages = 1, .type = REGULATOR_VOLTAGE, .owner = THIS_MODULE, .enable_reg = LP8788_EN_LDO_C, @@ -601,6 +608,7 @@ static struct regulator_desc lp8788_aldo_desc[] = { .name = "aldo10", .id = ALDO10, .ops = &lp8788_ldo_voltage_fixed_ops, + .n_voltages = 1, .type = REGULATOR_VOLTAGE, .owner = THIS_MODULE, .enable_reg = LP8788_EN_LDO_C, diff --git a/drivers/regulator/max77686.c b/drivers/regulator/max77686.c index c564af6f05a..2a67d08658a 100644 --- a/drivers/regulator/max77686.c +++ b/drivers/regulator/max77686.c @@ -66,7 +66,7 @@ enum max77686_ramp_rate { }; struct max77686_data { - struct regulator_dev **rdev; + struct regulator_dev *rdev[MAX77686_REGULATORS]; }; static int max77686_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay) @@ -265,6 +265,7 @@ static int max77686_pmic_dt_parse_pdata(struct max77686_dev *iodev, rmatch.of_node = NULL; of_regulator_match(iodev->dev, regulators_np, &rmatch, 1); rdata[i].initdata = rmatch.init_data; + rdata[i].of_node = rmatch.of_node; } pdata->regulators = rdata; @@ -283,10 +284,8 @@ static __devinit int max77686_pmic_probe(struct platform_device *pdev) { struct max77686_dev *iodev = dev_get_drvdata(pdev->dev.parent); struct max77686_platform_data *pdata = dev_get_platdata(iodev->dev); - struct regulator_dev **rdev; struct max77686_data *max77686; - int i, size; - int ret = 0; + int i, ret = 0; struct regulator_config config = { }; dev_dbg(&pdev->dev, "%s\n", __func__); @@ -313,45 +312,38 @@ static __devinit int max77686_pmic_probe(struct platform_device *pdev) if (!max77686) return -ENOMEM; - size = sizeof(struct regulator_dev *) * MAX77686_REGULATORS; - max77686->rdev = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); - if (!max77686->rdev) - return -ENOMEM; - - rdev = max77686->rdev; config.dev = &pdev->dev; config.regmap = iodev->regmap; platform_set_drvdata(pdev, max77686); for (i = 0; i < MAX77686_REGULATORS; i++) { config.init_data = pdata->regulators[i].initdata; + config.of_node = pdata->regulators[i].of_node; - rdev[i] = regulator_register(®ulators[i], &config); - if (IS_ERR(rdev[i])) { - ret = PTR_ERR(rdev[i]); + max77686->rdev[i] = regulator_register(®ulators[i], &config); + if (IS_ERR(max77686->rdev[i])) { + ret = PTR_ERR(max77686->rdev[i]); dev_err(&pdev->dev, "regulator init failed for %d\n", i); - rdev[i] = NULL; - goto err; + max77686->rdev[i] = NULL; + goto err; } } return 0; err: while (--i >= 0) - regulator_unregister(rdev[i]); + regulator_unregister(max77686->rdev[i]); return ret; } static int __devexit max77686_pmic_remove(struct platform_device *pdev) { struct max77686_data *max77686 = platform_get_drvdata(pdev); - struct regulator_dev **rdev = max77686->rdev; int i; for (i = 0; i < MAX77686_REGULATORS; i++) - if (rdev[i]) - regulator_unregister(rdev[i]); + regulator_unregister(max77686->rdev[i]); return 0; } diff --git a/drivers/regulator/max8907-regulator.c b/drivers/regulator/max8907-regulator.c new file mode 100644 index 00000000000..af7607515ab --- /dev/null +++ b/drivers/regulator/max8907-regulator.c @@ -0,0 +1,408 @@ +/* + * max8907-regulator.c -- support regulators in max8907 + * + * Copyright (C) 2010 Gyungoh Yoo <jack.yoo@maxim-ic.com> + * Copyright (C) 2010-2012, NVIDIA CORPORATION. All rights reserved. + * + * Portions based on drivers/regulator/tps65910-regulator.c, + * Copyright 2010 Texas Instruments Inc. + * Author: Graeme Gregory <gg@slimlogic.co.uk> + * Author: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/err.h> +#include <linux/init.h> +#include <linux/mfd/core.h> +#include <linux/mfd/max8907.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/regulator/driver.h> +#include <linux/regulator/machine.h> +#include <linux/regulator/of_regulator.h> +#include <linux/regmap.h> +#include <linux/slab.h> + +#define MAX8907_II2RR_VERSION_MASK 0xF0 +#define MAX8907_II2RR_VERSION_REV_A 0x00 +#define MAX8907_II2RR_VERSION_REV_B 0x10 +#define MAX8907_II2RR_VERSION_REV_C 0x30 + +struct max8907_regulator { + struct regulator_desc desc[MAX8907_NUM_REGULATORS]; + struct regulator_dev *rdev[MAX8907_NUM_REGULATORS]; +}; + +#define REG_MBATT() \ + [MAX8907_MBATT] = { \ + .name = "MBATT", \ + .supply_name = "mbatt", \ + .id = MAX8907_MBATT, \ + .ops = &max8907_mbatt_ops, \ + .type = REGULATOR_VOLTAGE, \ + .owner = THIS_MODULE, \ + } + +#define REG_LDO(ids, supply, base, min, max, step) \ + [MAX8907_##ids] = { \ + .name = #ids, \ + .supply_name = supply, \ + .id = MAX8907_##ids, \ + .n_voltages = ((max) - (min)) / (step) + 1, \ + .ops = &max8907_ldo_ops, \ + .type = REGULATOR_VOLTAGE, \ + .owner = THIS_MODULE, \ + .min_uV = (min), \ + .uV_step = (step), \ + .vsel_reg = (base) + MAX8907_VOUT, \ + .vsel_mask = 0x3f, \ + .enable_reg = (base) + MAX8907_CTL, \ + .enable_mask = MAX8907_MASK_LDO_EN, \ + } + +#define REG_FIXED(ids, supply, voltage) \ + [MAX8907_##ids] = { \ + .name = #ids, \ + .supply_name = supply, \ + .id = MAX8907_##ids, \ + .n_voltages = 1, \ + .ops = &max8907_fixed_ops, \ + .type = REGULATOR_VOLTAGE, \ + .owner = THIS_MODULE, \ + .min_uV = (voltage), \ + } + +#define REG_OUT5V(ids, supply, base, voltage) \ + [MAX8907_##ids] = { \ + .name = #ids, \ + .supply_name = supply, \ + .id = MAX8907_##ids, \ + .n_voltages = 1, \ + .ops = &max8907_out5v_ops, \ + .type = REGULATOR_VOLTAGE, \ + .owner = THIS_MODULE, \ + .min_uV = (voltage), \ + .enable_reg = (base), \ + .enable_mask = MAX8907_MASK_OUT5V_EN, \ + } + +#define REG_BBAT(ids, supply, base, min, max, step) \ + [MAX8907_##ids] = { \ + .name = #ids, \ + .supply_name = supply, \ + .id = MAX8907_##ids, \ + .n_voltages = ((max) - (min)) / (step) + 1, \ + .ops = &max8907_bbat_ops, \ + .type = REGULATOR_VOLTAGE, \ + .owner = THIS_MODULE, \ + .min_uV = (min), \ + .uV_step = (step), \ + .vsel_reg = (base), \ + .vsel_mask = MAX8907_MASK_VBBATTCV, \ + } + +#define LDO_750_50(id, supply, base) REG_LDO(id, supply, (base), \ + 750000, 3900000, 50000) +#define LDO_650_25(id, supply, base) REG_LDO(id, supply, (base), \ + 650000, 2225000, 25000) + +static struct regulator_ops max8907_mbatt_ops = { +}; + +static struct regulator_ops max8907_ldo_ops = { + .list_voltage = regulator_list_voltage_linear, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, +}; + +static struct regulator_ops max8907_ldo_hwctl_ops = { + .list_voltage = regulator_list_voltage_linear, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, +}; + +static struct regulator_ops max8907_fixed_ops = { + .list_voltage = regulator_list_voltage_linear, +}; + +static struct regulator_ops max8907_out5v_ops = { + .list_voltage = regulator_list_voltage_linear, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, +}; + +static struct regulator_ops max8907_out5v_hwctl_ops = { + .list_voltage = regulator_list_voltage_linear, +}; + +static struct regulator_ops max8907_bbat_ops = { + .list_voltage = regulator_list_voltage_linear, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, +}; + +static struct regulator_desc max8907_regulators[] = { + REG_MBATT(), + REG_LDO(SD1, "in-v1", MAX8907_REG_SDCTL1, 650000, 2225000, 25000), + REG_LDO(SD2, "in-v2", MAX8907_REG_SDCTL2, 637500, 1425000, 12500), + REG_LDO(SD3, "in-v3", MAX8907_REG_SDCTL3, 750000, 3900000, 50000), + LDO_750_50(LDO1, "in1", MAX8907_REG_LDOCTL1), + LDO_650_25(LDO2, "in2", MAX8907_REG_LDOCTL2), + LDO_650_25(LDO3, "in3", MAX8907_REG_LDOCTL3), + LDO_750_50(LDO4, "in4", MAX8907_REG_LDOCTL4), + LDO_750_50(LDO5, "in5", MAX8907_REG_LDOCTL5), + LDO_750_50(LDO6, "in6", MAX8907_REG_LDOCTL6), + LDO_750_50(LDO7, "in7", MAX8907_REG_LDOCTL7), + LDO_750_50(LDO8, "in8", MAX8907_REG_LDOCTL8), + LDO_750_50(LDO9, "in9", MAX8907_REG_LDOCTL9), + LDO_750_50(LDO10, "in10", MAX8907_REG_LDOCTL10), + LDO_750_50(LDO11, "in11", MAX8907_REG_LDOCTL11), + LDO_750_50(LDO12, "in12", MAX8907_REG_LDOCTL12), + LDO_750_50(LDO13, "in13", MAX8907_REG_LDOCTL13), + LDO_750_50(LDO14, "in14", MAX8907_REG_LDOCTL14), + LDO_750_50(LDO15, "in15", MAX8907_REG_LDOCTL15), + LDO_750_50(LDO16, "in16", MAX8907_REG_LDOCTL16), + LDO_650_25(LDO17, "in17", MAX8907_REG_LDOCTL17), + LDO_650_25(LDO18, "in18", MAX8907_REG_LDOCTL18), + LDO_750_50(LDO19, "in19", MAX8907_REG_LDOCTL19), + LDO_750_50(LDO20, "in20", MAX8907_REG_LDOCTL20), + REG_OUT5V(OUT5V, "mbatt", MAX8907_REG_OUT5VEN, 5000000), + REG_OUT5V(OUT33V, "mbatt", MAX8907_REG_OUT33VEN, 3300000), + REG_BBAT(BBAT, "MBATT", MAX8907_REG_BBAT_CNFG, + 2400000, 3000000, 200000), + REG_FIXED(SDBY, "MBATT", 1200000), + REG_FIXED(VRTC, "MBATT", 3300000), +}; + +#ifdef CONFIG_OF + +#define MATCH(_name, _id) \ + [MAX8907_##_id] = { \ + .name = #_name, \ + .driver_data = (void *)&max8907_regulators[MAX8907_##_id], \ + } + +static struct of_regulator_match max8907_matches[] = { + MATCH(mbatt, MBATT), + MATCH(sd1, SD1), + MATCH(sd2, SD2), + MATCH(sd3, SD3), + MATCH(ldo1, LDO1), + MATCH(ldo2, LDO2), + MATCH(ldo3, LDO3), + MATCH(ldo4, LDO4), + MATCH(ldo5, LDO5), + MATCH(ldo6, LDO6), + MATCH(ldo7, LDO7), + MATCH(ldo8, LDO8), + MATCH(ldo9, LDO9), + MATCH(ldo10, LDO10), + MATCH(ldo11, LDO11), + MATCH(ldo12, LDO12), + MATCH(ldo13, LDO13), + MATCH(ldo14, LDO14), + MATCH(ldo15, LDO15), + MATCH(ldo16, LDO16), + MATCH(ldo17, LDO17), + MATCH(ldo18, LDO18), + MATCH(ldo19, LDO19), + MATCH(ldo20, LDO20), + MATCH(out5v, OUT5V), + MATCH(out33v, OUT33V), + MATCH(bbat, BBAT), + MATCH(sdby, SDBY), + MATCH(vrtc, VRTC), +}; + +static int max8907_regulator_parse_dt(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.parent->of_node; + struct device_node *regulators; + int ret; + + if (!pdev->dev.parent->of_node) + return 0; + + regulators = of_find_node_by_name(np, "regulators"); + if (!regulators) { + dev_err(&pdev->dev, "regulators node not found\n"); + return -EINVAL; + } + + ret = of_regulator_match(pdev->dev.parent, regulators, + max8907_matches, + ARRAY_SIZE(max8907_matches)); + if (ret < 0) { + dev_err(&pdev->dev, "Error parsing regulator init data: %d\n", + ret); + return ret; + } + + return 0; +} + +static inline struct regulator_init_data *match_init_data(int index) +{ + return max8907_matches[index].init_data; +} + +static inline struct device_node *match_of_node(int index) +{ + return max8907_matches[index].of_node; +} +#else +static int max8907_regulator_parse_dt(struct platform_device *pdev) +{ + return 0; +} + +static inline struct regulator_init_data *match_init_data(int index) +{ + return NULL; +} + +static inline struct device_node *match_of_node(int index) +{ + return NULL; +} +#endif + +static __devinit int max8907_regulator_probe(struct platform_device *pdev) +{ + struct max8907 *max8907 = dev_get_drvdata(pdev->dev.parent); + struct max8907_platform_data *pdata = dev_get_platdata(max8907->dev); + int ret; + struct max8907_regulator *pmic; + unsigned int val; + int i; + struct regulator_config config = {}; + struct regulator_init_data *idata; + const char *mbatt_rail_name = NULL; + + ret = max8907_regulator_parse_dt(pdev); + if (ret) + return ret; + + pmic = devm_kzalloc(&pdev->dev, sizeof(*pmic), GFP_KERNEL); + if (!pmic) { + dev_err(&pdev->dev, "Failed to alloc pmic\n"); + return -ENOMEM; + } + platform_set_drvdata(pdev, pmic); + + memcpy(pmic->desc, max8907_regulators, sizeof(pmic->desc)); + + /* Backwards compatibility with MAX8907B; SD1 uses different voltages */ + regmap_read(max8907->regmap_gen, MAX8907_REG_II2RR, &val); + if ((val & MAX8907_II2RR_VERSION_MASK) == + MAX8907_II2RR_VERSION_REV_B) { + pmic->desc[MAX8907_SD1].min_uV = 637500; + pmic->desc[MAX8907_SD1].uV_step = 12500; + pmic->desc[MAX8907_SD1].n_voltages = + (1425000 - 637500) / 12500 + 1; + } + + for (i = 0; i < MAX8907_NUM_REGULATORS; i++) { + config.dev = pdev->dev.parent; + if (pdata) + idata = pdata->init_data[i]; + else + idata = match_init_data(i); + config.init_data = idata; + config.driver_data = pmic; + config.regmap = max8907->regmap_gen; + config.of_node = match_of_node(i); + + switch (pmic->desc[i].id) { + case MAX8907_MBATT: + if (idata && idata->constraints.name) + mbatt_rail_name = idata->constraints.name; + else + mbatt_rail_name = pmic->desc[i].name; + break; + case MAX8907_BBAT: + case MAX8907_SDBY: + case MAX8907_VRTC: + idata->supply_regulator = mbatt_rail_name; + break; + } + + if (pmic->desc[i].ops == &max8907_ldo_ops) { + regmap_read(config.regmap, pmic->desc[i].enable_reg, + &val); + if ((val & MAX8907_MASK_LDO_SEQ) != + MAX8907_MASK_LDO_SEQ) + pmic->desc[i].ops = &max8907_ldo_hwctl_ops; + } else if (pmic->desc[i].ops == &max8907_out5v_ops) { + regmap_read(config.regmap, pmic->desc[i].enable_reg, + &val); + if ((val & (MAX8907_MASK_OUT5V_VINEN | + MAX8907_MASK_OUT5V_ENSRC)) != + MAX8907_MASK_OUT5V_ENSRC) + pmic->desc[i].ops = &max8907_out5v_hwctl_ops; + } + + pmic->rdev[i] = regulator_register(&pmic->desc[i], &config); + if (IS_ERR(pmic->rdev[i])) { + dev_err(&pdev->dev, + "failed to register %s regulator\n", + pmic->desc[i].name); + ret = PTR_ERR(pmic->rdev[i]); + goto err_unregister_regulator; + } + } + + return 0; + +err_unregister_regulator: + while (--i >= 0) + regulator_unregister(pmic->rdev[i]); + return ret; +} + +static __devexit int max8907_regulator_remove(struct platform_device *pdev) +{ + struct max8907_regulator *pmic = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < MAX8907_NUM_REGULATORS; i++) + regulator_unregister(pmic->rdev[i]); + + return 0; +} + +static struct platform_driver max8907_regulator_driver = { + .driver = { + .name = "max8907-regulator", + .owner = THIS_MODULE, + }, + .probe = max8907_regulator_probe, + .remove = __devexit_p(max8907_regulator_remove), +}; + +static int __init max8907_regulator_init(void) +{ + return platform_driver_register(&max8907_regulator_driver); +} + +subsys_initcall(max8907_regulator_init); + +static void __exit max8907_reg_exit(void) +{ + platform_driver_unregister(&max8907_regulator_driver); +} + +module_exit(max8907_reg_exit); + +MODULE_DESCRIPTION("MAX8907 regulator driver"); +MODULE_AUTHOR("Gyungoh Yoo <jack.yoo@maxim-ic.com>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:max8907-regulator"); diff --git a/drivers/regulator/mc13783-regulator.c b/drivers/regulator/mc13783-regulator.c index 4932e3449fe..0801a6d0c12 100644 --- a/drivers/regulator/mc13783-regulator.c +++ b/drivers/regulator/mc13783-regulator.c @@ -21,6 +21,30 @@ #include <linux/module.h> #include "mc13xxx.h" +#define MC13783_REG_SWITCHERS0 24 +/* Enable does not exist for SW1A */ +#define MC13783_REG_SWITCHERS0_SW1AEN 0 +#define MC13783_REG_SWITCHERS0_SW1AVSEL 0 +#define MC13783_REG_SWITCHERS0_SW1AVSEL_M (63 << 0) + +#define MC13783_REG_SWITCHERS1 25 +/* Enable does not exist for SW1B */ +#define MC13783_REG_SWITCHERS1_SW1BEN 0 +#define MC13783_REG_SWITCHERS1_SW1BVSEL 0 +#define MC13783_REG_SWITCHERS1_SW1BVSEL_M (63 << 0) + +#define MC13783_REG_SWITCHERS2 26 +/* Enable does not exist for SW2A */ +#define MC13783_REG_SWITCHERS2_SW2AEN 0 +#define MC13783_REG_SWITCHERS2_SW2AVSEL 0 +#define MC13783_REG_SWITCHERS2_SW2AVSEL_M (63 << 0) + +#define MC13783_REG_SWITCHERS3 27 +/* Enable does not exist for SW2B */ +#define MC13783_REG_SWITCHERS3_SW2BEN 0 +#define MC13783_REG_SWITCHERS3_SW2BVSEL 0 +#define MC13783_REG_SWITCHERS3_SW2BVSEL_M (63 << 0) + #define MC13783_REG_SWITCHERS5 29 #define MC13783_REG_SWITCHERS5_SW3EN (1 << 20) #define MC13783_REG_SWITCHERS5_SW3VSEL 18 @@ -93,6 +117,44 @@ /* Voltage Values */ +static const int mc13783_sw1x_val[] = { + 900000, 925000, 950000, 975000, + 1000000, 1025000, 1050000, 1075000, + 1100000, 1125000, 1150000, 1175000, + 1200000, 1225000, 1250000, 1275000, + 1300000, 1325000, 1350000, 1375000, + 1400000, 1425000, 1450000, 1475000, + 1500000, 1525000, 1550000, 1575000, + 1600000, 1625000, 1650000, 1675000, + 1700000, 1700000, 1700000, 1700000, + 1800000, 1800000, 1800000, 1800000, + 1850000, 1850000, 1850000, 1850000, + 2000000, 2000000, 2000000, 2000000, + 2100000, 2100000, 2100000, 2100000, + 2200000, 2200000, 2200000, 2200000, + 2200000, 2200000, 2200000, 2200000, + 2200000, 2200000, 2200000, 2200000, +}; + +static const int mc13783_sw2x_val[] = { + 900000, 925000, 950000, 975000, + 1000000, 1025000, 1050000, 1075000, + 1100000, 1125000, 1150000, 1175000, + 1200000, 1225000, 1250000, 1275000, + 1300000, 1325000, 1350000, 1375000, + 1400000, 1425000, 1450000, 1475000, + 1500000, 1525000, 1550000, 1575000, + 1600000, 1625000, 1650000, 1675000, + 1700000, 1700000, 1700000, 1700000, + 1800000, 1800000, 1800000, 1800000, + 1900000, 1900000, 1900000, 1900000, + 2000000, 2000000, 2000000, 2000000, + 2100000, 2100000, 2100000, 2100000, + 2200000, 2200000, 2200000, 2200000, + 2200000, 2200000, 2200000, 2200000, + 2200000, 2200000, 2200000, 2200000, +}; + static const unsigned int mc13783_sw3_val[] = { 5000000, 5000000, 5000000, 5500000, }; @@ -188,6 +250,10 @@ static struct regulator_ops mc13783_gpo_regulator_ops; MC13783_DEFINE(REG, _name, _reg, _vsel_reg, _voltages) static struct mc13xxx_regulator mc13783_regulators[] = { + MC13783_DEFINE_SW(SW1A, SWITCHERS0, SWITCHERS0, mc13783_sw1x_val), + MC13783_DEFINE_SW(SW1B, SWITCHERS1, SWITCHERS1, mc13783_sw1x_val), + MC13783_DEFINE_SW(SW2A, SWITCHERS2, SWITCHERS2, mc13783_sw2x_val), + MC13783_DEFINE_SW(SW2B, SWITCHERS3, SWITCHERS3, mc13783_sw2x_val), MC13783_DEFINE_SW(SW3, SWITCHERS5, SWITCHERS5, mc13783_sw3_val), MC13783_FIXED_DEFINE(REG, VAUDIO, REGULATORMODE0, mc13783_vaudio_val), @@ -238,9 +304,10 @@ static int mc13783_powermisc_rmw(struct mc13xxx_regulator_priv *priv, u32 mask, BUG_ON(val & ~mask); + mc13xxx_lock(priv->mc13xxx); ret = mc13xxx_reg_read(mc13783, MC13783_REG_POWERMISC, &valread); if (ret) - return ret; + goto out; /* Update the stored state for Power Gates. */ priv->powermisc_pwgt_state = @@ -253,7 +320,10 @@ static int mc13783_powermisc_rmw(struct mc13xxx_regulator_priv *priv, u32 mask, valread = (valread & ~MC13783_REG_POWERMISC_PWGTSPI_M) | priv->powermisc_pwgt_state; - return mc13xxx_reg_write(mc13783, MC13783_REG_POWERMISC, valread); + ret = mc13xxx_reg_write(mc13783, MC13783_REG_POWERMISC, valread); +out: + mc13xxx_unlock(priv->mc13xxx); + return ret; } static int mc13783_gpo_regulator_enable(struct regulator_dev *rdev) @@ -261,7 +331,6 @@ static int mc13783_gpo_regulator_enable(struct regulator_dev *rdev) struct mc13xxx_regulator_priv *priv = rdev_get_drvdata(rdev); struct mc13xxx_regulator *mc13xxx_regulators = priv->mc13xxx_regulators; int id = rdev_get_id(rdev); - int ret; u32 en_val = mc13xxx_regulators[id].enable_bit; dev_dbg(rdev_get_dev(rdev), "%s id: %d\n", __func__, id); @@ -271,12 +340,8 @@ static int mc13783_gpo_regulator_enable(struct regulator_dev *rdev) id == MC13783_REG_PWGT2SPI) en_val = 0; - mc13xxx_lock(priv->mc13xxx); - ret = mc13783_powermisc_rmw(priv, mc13xxx_regulators[id].enable_bit, + return mc13783_powermisc_rmw(priv, mc13xxx_regulators[id].enable_bit, en_val); - mc13xxx_unlock(priv->mc13xxx); - - return ret; } static int mc13783_gpo_regulator_disable(struct regulator_dev *rdev) @@ -284,7 +349,6 @@ static int mc13783_gpo_regulator_disable(struct regulator_dev *rdev) struct mc13xxx_regulator_priv *priv = rdev_get_drvdata(rdev); struct mc13xxx_regulator *mc13xxx_regulators = priv->mc13xxx_regulators; int id = rdev_get_id(rdev); - int ret; u32 dis_val = 0; dev_dbg(rdev_get_dev(rdev), "%s id: %d\n", __func__, id); @@ -294,12 +358,8 @@ static int mc13783_gpo_regulator_disable(struct regulator_dev *rdev) id == MC13783_REG_PWGT2SPI) dis_val = mc13xxx_regulators[id].enable_bit; - mc13xxx_lock(priv->mc13xxx); - ret = mc13783_powermisc_rmw(priv, mc13xxx_regulators[id].enable_bit, + return mc13783_powermisc_rmw(priv, mc13xxx_regulators[id].enable_bit, dis_val); - mc13xxx_unlock(priv->mc13xxx); - - return ret; } static int mc13783_gpo_regulator_is_enabled(struct regulator_dev *rdev) @@ -330,7 +390,6 @@ static struct regulator_ops mc13783_gpo_regulator_ops = { .is_enabled = mc13783_gpo_regulator_is_enabled, .list_voltage = regulator_list_voltage_table, .set_voltage = mc13xxx_fixed_regulator_set_voltage, - .get_voltage = mc13xxx_fixed_regulator_get_voltage, }; static int __devinit mc13783_regulator_probe(struct platform_device *pdev) diff --git a/drivers/regulator/mc13892-regulator.c b/drivers/regulator/mc13892-regulator.c index b388b746452..1fa63812f7a 100644 --- a/drivers/regulator/mc13892-regulator.c +++ b/drivers/regulator/mc13892-regulator.c @@ -305,9 +305,10 @@ static int mc13892_powermisc_rmw(struct mc13xxx_regulator_priv *priv, u32 mask, BUG_ON(val & ~mask); + mc13xxx_lock(priv->mc13xxx); ret = mc13xxx_reg_read(mc13892, MC13892_POWERMISC, &valread); if (ret) - return ret; + goto out; /* Update the stored state for Power Gates. */ priv->powermisc_pwgt_state = @@ -320,14 +321,16 @@ static int mc13892_powermisc_rmw(struct mc13xxx_regulator_priv *priv, u32 mask, valread = (valread & ~MC13892_POWERMISC_PWGTSPI_M) | priv->powermisc_pwgt_state; - return mc13xxx_reg_write(mc13892, MC13892_POWERMISC, valread); + ret = mc13xxx_reg_write(mc13892, MC13892_POWERMISC, valread); +out: + mc13xxx_unlock(priv->mc13xxx); + return ret; } static int mc13892_gpo_regulator_enable(struct regulator_dev *rdev) { struct mc13xxx_regulator_priv *priv = rdev_get_drvdata(rdev); int id = rdev_get_id(rdev); - int ret; u32 en_val = mc13892_regulators[id].enable_bit; u32 mask = mc13892_regulators[id].enable_bit; @@ -340,18 +343,13 @@ static int mc13892_gpo_regulator_enable(struct regulator_dev *rdev) if (id == MC13892_GPO4) mask |= MC13892_POWERMISC_GPO4ADINEN; - mc13xxx_lock(priv->mc13xxx); - ret = mc13892_powermisc_rmw(priv, mask, en_val); - mc13xxx_unlock(priv->mc13xxx); - - return ret; + return mc13892_powermisc_rmw(priv, mask, en_val); } static int mc13892_gpo_regulator_disable(struct regulator_dev *rdev) { struct mc13xxx_regulator_priv *priv = rdev_get_drvdata(rdev); int id = rdev_get_id(rdev); - int ret; u32 dis_val = 0; dev_dbg(rdev_get_dev(rdev), "%s id: %d\n", __func__, id); @@ -360,12 +358,8 @@ static int mc13892_gpo_regulator_disable(struct regulator_dev *rdev) if (id == MC13892_PWGT1SPI || id == MC13892_PWGT2SPI) dis_val = mc13892_regulators[id].enable_bit; - mc13xxx_lock(priv->mc13xxx); - ret = mc13892_powermisc_rmw(priv, mc13892_regulators[id].enable_bit, + return mc13892_powermisc_rmw(priv, mc13892_regulators[id].enable_bit, dis_val); - mc13xxx_unlock(priv->mc13xxx); - - return ret; } static int mc13892_gpo_regulator_is_enabled(struct regulator_dev *rdev) @@ -396,14 +390,13 @@ static struct regulator_ops mc13892_gpo_regulator_ops = { .is_enabled = mc13892_gpo_regulator_is_enabled, .list_voltage = regulator_list_voltage_table, .set_voltage = mc13xxx_fixed_regulator_set_voltage, - .get_voltage = mc13xxx_fixed_regulator_get_voltage, }; -static int mc13892_sw_regulator_get_voltage(struct regulator_dev *rdev) +static int mc13892_sw_regulator_get_voltage_sel(struct regulator_dev *rdev) { struct mc13xxx_regulator_priv *priv = rdev_get_drvdata(rdev); int ret, id = rdev_get_id(rdev); - unsigned int val, hi; + unsigned int val; dev_dbg(rdev_get_dev(rdev), "%s id: %d\n", __func__, id); @@ -414,17 +407,11 @@ static int mc13892_sw_regulator_get_voltage(struct regulator_dev *rdev) if (ret) return ret; - hi = val & MC13892_SWITCHERS0_SWxHI; val = (val & mc13892_regulators[id].vsel_mask) >> mc13892_regulators[id].vsel_shift; dev_dbg(rdev_get_dev(rdev), "%s id: %d val: %d\n", __func__, id, val); - if (hi) - val = (25000 * val) + 1100000; - else - val = (25000 * val) + 600000; - return val; } @@ -432,37 +419,25 @@ static int mc13892_sw_regulator_set_voltage_sel(struct regulator_dev *rdev, unsigned selector) { struct mc13xxx_regulator_priv *priv = rdev_get_drvdata(rdev); - int hi, value, mask, id = rdev_get_id(rdev); - u32 valread; + int volt, mask, id = rdev_get_id(rdev); + u32 reg_value; int ret; - value = rdev->desc->volt_table[selector]; + volt = rdev->desc->volt_table[selector]; + mask = mc13892_regulators[id].vsel_mask; + reg_value = selector << mc13892_regulators[id].vsel_shift; + + if (volt > 1375000) { + mask |= MC13892_SWITCHERS0_SWxHI; + reg_value |= MC13892_SWITCHERS0_SWxHI; + } else if (volt < 1100000) { + mask |= MC13892_SWITCHERS0_SWxHI; + reg_value &= ~MC13892_SWITCHERS0_SWxHI; + } mc13xxx_lock(priv->mc13xxx); - ret = mc13xxx_reg_read(priv->mc13xxx, - mc13892_regulators[id].vsel_reg, &valread); - if (ret) - goto err; - - if (value > 1375000) - hi = 1; - else if (value < 1100000) - hi = 0; - else - hi = valread & MC13892_SWITCHERS0_SWxHI; - - if (hi) { - value = (value - 1100000) / 25000; - value |= MC13892_SWITCHERS0_SWxHI; - } else - value = (value - 600000) / 25000; - - mask = mc13892_regulators[id].vsel_mask | MC13892_SWITCHERS0_SWxHI; - valread = (valread & ~mask) | - (value << mc13892_regulators[id].vsel_shift); - ret = mc13xxx_reg_write(priv->mc13xxx, mc13892_regulators[id].vsel_reg, - valread); -err: + ret = mc13xxx_reg_rmw(priv->mc13xxx, mc13892_regulators[id].reg, mask, + reg_value); mc13xxx_unlock(priv->mc13xxx); return ret; @@ -471,7 +446,7 @@ err: static struct regulator_ops mc13892_sw_regulator_ops = { .list_voltage = regulator_list_voltage_table, .set_voltage_sel = mc13892_sw_regulator_set_voltage_sel, - .get_voltage = mc13892_sw_regulator_get_voltage, + .get_voltage_sel = mc13892_sw_regulator_get_voltage_sel, }; static int mc13892_vcam_set_mode(struct regulator_dev *rdev, unsigned int mode) diff --git a/drivers/regulator/mc13xxx-regulator-core.c b/drivers/regulator/mc13xxx-regulator-core.c index d6eda28ca5d..88cbb832d55 100644 --- a/drivers/regulator/mc13xxx-regulator-core.c +++ b/drivers/regulator/mc13xxx-regulator-core.c @@ -143,30 +143,21 @@ int mc13xxx_fixed_regulator_set_voltage(struct regulator_dev *rdev, int min_uV, __func__, id, min_uV, max_uV); if (min_uV <= rdev->desc->volt_table[0] && - rdev->desc->volt_table[0] <= max_uV) + rdev->desc->volt_table[0] <= max_uV) { + *selector = 0; return 0; - else + } else { return -EINVAL; + } } EXPORT_SYMBOL_GPL(mc13xxx_fixed_regulator_set_voltage); -int mc13xxx_fixed_regulator_get_voltage(struct regulator_dev *rdev) -{ - int id = rdev_get_id(rdev); - - dev_dbg(rdev_get_dev(rdev), "%s id: %d\n", __func__, id); - - return rdev->desc->volt_table[0]; -} -EXPORT_SYMBOL_GPL(mc13xxx_fixed_regulator_get_voltage); - struct regulator_ops mc13xxx_fixed_regulator_ops = { .enable = mc13xxx_regulator_enable, .disable = mc13xxx_regulator_disable, .is_enabled = mc13xxx_regulator_is_enabled, .list_voltage = regulator_list_voltage_table, .set_voltage = mc13xxx_fixed_regulator_set_voltage, - .get_voltage = mc13xxx_fixed_regulator_get_voltage, }; EXPORT_SYMBOL_GPL(mc13xxx_fixed_regulator_ops); diff --git a/drivers/regulator/mc13xxx.h b/drivers/regulator/mc13xxx.h index eaff5510b6d..06c8903f182 100644 --- a/drivers/regulator/mc13xxx.h +++ b/drivers/regulator/mc13xxx.h @@ -34,7 +34,6 @@ struct mc13xxx_regulator_priv { extern int mc13xxx_fixed_regulator_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV, unsigned *selector); -extern int mc13xxx_fixed_regulator_get_voltage(struct regulator_dev *rdev); #ifdef CONFIG_OF extern int mc13xxx_get_num_regulators_dt(struct platform_device *pdev); diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c index 3e4106f2bda..6f684916fd7 100644 --- a/drivers/regulator/of_regulator.c +++ b/drivers/regulator/of_regulator.c @@ -92,16 +92,18 @@ struct regulator_init_data *of_get_regulator_init_data(struct device *dev, EXPORT_SYMBOL_GPL(of_get_regulator_init_data); /** - * of_regulator_match - extract regulator init data when node - * property "regulator-compatible" matches with the regulator name. + * of_regulator_match - extract multiple regulator init data from device tree. * @dev: device requesting the data * @node: parent device node of the regulators * @matches: match table for the regulators * @num_matches: number of entries in match table * - * This function uses a match table specified by the regulator driver and - * looks up the corresponding init data in the device tree if - * regulator-compatible matches. Note that the match table is modified + * This function uses a match table specified by the regulator driver to + * parse regulator init data from the device tree. @node is expected to + * contain a set of child nodes, each providing the init data for one + * regulator. The data parsed from a child node will be matched to a regulator + * based on either the deprecated property regulator-compatible if present, + * or otherwise the child node's name. Note that the match table is modified * in place. * * Returns the number of matches found or a negative error code on failure. @@ -112,26 +114,23 @@ int of_regulator_match(struct device *dev, struct device_node *node, { unsigned int count = 0; unsigned int i; - const char *regulator_comp; + const char *name; struct device_node *child; if (!dev || !node) return -EINVAL; for_each_child_of_node(node, child) { - regulator_comp = of_get_property(child, + name = of_get_property(child, "regulator-compatible", NULL); - if (!regulator_comp) { - dev_err(dev, "regulator-compatible is missing for node %s\n", - child->name); - continue; - } + if (!name) + name = child->name; for (i = 0; i < num_matches; i++) { struct of_regulator_match *match = &matches[i]; if (match->of_node) continue; - if (strcmp(match->name, regulator_comp)) + if (strcmp(match->name, name)) continue; match->init_data = diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c index 46c7e88f838..2ba7502fa3b 100644 --- a/drivers/regulator/palmas-regulator.c +++ b/drivers/regulator/palmas-regulator.c @@ -443,44 +443,6 @@ static int palmas_list_voltage_ldo(struct regulator_dev *dev, return 850000 + (selector * 50000); } -static int palmas_get_voltage_ldo_sel(struct regulator_dev *dev) -{ - struct palmas_pmic *pmic = rdev_get_drvdata(dev); - int id = rdev_get_id(dev); - int selector; - unsigned int reg; - unsigned int addr; - - addr = palmas_regs_info[id].vsel_addr; - - palmas_ldo_read(pmic->palmas, addr, ®); - - selector = reg & PALMAS_LDO1_VOLTAGE_VSEL_MASK; - - /* Adjust selector to match list_voltage ranges */ - if (selector > 49) - selector = 49; - - return selector; -} - -static int palmas_set_voltage_ldo_sel(struct regulator_dev *dev, - unsigned selector) -{ - struct palmas_pmic *pmic = rdev_get_drvdata(dev); - int id = rdev_get_id(dev); - unsigned int reg = 0; - unsigned int addr; - - addr = palmas_regs_info[id].vsel_addr; - - reg = selector; - - palmas_ldo_write(pmic->palmas, addr, reg); - - return 0; -} - static int palmas_map_voltage_ldo(struct regulator_dev *rdev, int min_uV, int max_uV) { @@ -505,8 +467,8 @@ static struct regulator_ops palmas_ops_ldo = { .is_enabled = palmas_is_enabled_ldo, .enable = regulator_enable_regmap, .disable = regulator_disable_regmap, - .get_voltage_sel = palmas_get_voltage_ldo_sel, - .set_voltage_sel = palmas_set_voltage_ldo_sel, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_sel = regulator_set_voltage_sel_regmap, .list_voltage = palmas_list_voltage_ldo, .map_voltage = palmas_map_voltage_ldo, }; @@ -757,6 +719,9 @@ static __devinit int palmas_probe(struct platform_device *pdev) pmic->desc[id].type = REGULATOR_VOLTAGE; pmic->desc[id].owner = THIS_MODULE; + pmic->desc[id].vsel_reg = PALMAS_BASE_TO_REG(PALMAS_LDO_BASE, + palmas_regs_info[id].vsel_addr); + pmic->desc[id].vsel_mask = PALMAS_LDO1_VOLTAGE_VSEL_MASK; pmic->desc[id].enable_reg = PALMAS_BASE_TO_REG(PALMAS_LDO_BASE, palmas_regs_info[id].ctrl_addr); pmic->desc[id].enable_mask = PALMAS_LDO1_CTRL_MODE_ACTIVE; diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c index 4669dc9ac74..926f9c8f2fa 100644 --- a/drivers/regulator/s2mps11.c +++ b/drivers/regulator/s2mps11.c @@ -24,7 +24,7 @@ #include <linux/mfd/samsung/s2mps11.h> struct s2mps11_info { - struct regulator_dev **rdev; + struct regulator_dev *rdev[S2MPS11_REGULATOR_MAX]; int ramp_delay2; int ramp_delay34; @@ -236,9 +236,8 @@ static __devinit int s2mps11_pmic_probe(struct platform_device *pdev) struct sec_pmic_dev *iodev = dev_get_drvdata(pdev->dev.parent); struct sec_platform_data *pdata = dev_get_platdata(iodev->dev); struct regulator_config config = { }; - struct regulator_dev **rdev; struct s2mps11_info *s2mps11; - int i, ret, size; + int i, ret; unsigned char ramp_enable, ramp_reg = 0; if (!pdata) { @@ -251,13 +250,6 @@ static __devinit int s2mps11_pmic_probe(struct platform_device *pdev) if (!s2mps11) return -ENOMEM; - size = sizeof(struct regulator_dev *) * S2MPS11_REGULATOR_MAX; - s2mps11->rdev = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); - if (!s2mps11->rdev) { - return -ENOMEM; - } - - rdev = s2mps11->rdev; platform_set_drvdata(pdev, s2mps11); s2mps11->ramp_delay2 = pdata->buck2_ramp_delay; @@ -297,12 +289,12 @@ static __devinit int s2mps11_pmic_probe(struct platform_device *pdev) config.init_data = pdata->regulators[i].initdata; config.driver_data = s2mps11; - rdev[i] = regulator_register(®ulators[i], &config); - if (IS_ERR(rdev[i])) { - ret = PTR_ERR(rdev[i]); + s2mps11->rdev[i] = regulator_register(®ulators[i], &config); + if (IS_ERR(s2mps11->rdev[i])) { + ret = PTR_ERR(s2mps11->rdev[i]); dev_err(&pdev->dev, "regulator init failed for %d\n", i); - rdev[i] = NULL; + s2mps11->rdev[i] = NULL; goto err; } } @@ -310,8 +302,7 @@ static __devinit int s2mps11_pmic_probe(struct platform_device *pdev) return 0; err: for (i = 0; i < S2MPS11_REGULATOR_MAX; i++) - if (rdev[i]) - regulator_unregister(rdev[i]); + regulator_unregister(s2mps11->rdev[i]); return ret; } @@ -319,12 +310,10 @@ err: static int __devexit s2mps11_pmic_remove(struct platform_device *pdev) { struct s2mps11_info *s2mps11 = platform_get_drvdata(pdev); - struct regulator_dev **rdev = s2mps11->rdev; int i; for (i = 0; i < S2MPS11_REGULATOR_MAX; i++) - if (rdev[i]) - regulator_unregister(rdev[i]); + regulator_unregister(s2mps11->rdev[i]); return 0; } diff --git a/drivers/regulator/tps6524x-regulator.c b/drivers/regulator/tps6524x-regulator.c index 947ece933d9..058d2f2675e 100644 --- a/drivers/regulator/tps6524x-regulator.c +++ b/drivers/regulator/tps6524x-regulator.c @@ -502,15 +502,13 @@ static int set_current_limit(struct regulator_dev *rdev, int min_uA, if (info->n_ilimsels == 1) return -EINVAL; - for (i = 0; i < info->n_ilimsels; i++) + for (i = info->n_ilimsels - 1; i >= 0; i--) { if (min_uA <= info->ilimsels[i] && max_uA >= info->ilimsels[i]) - break; - - if (i >= info->n_ilimsels) - return -EINVAL; + return write_field(hw, &info->ilimsel, i); + } - return write_field(hw, &info->ilimsel, i); + return -EINVAL; } static int get_current_limit(struct regulator_dev *rdev) diff --git a/drivers/regulator/tps6586x-regulator.c b/drivers/regulator/tps6586x-regulator.c index 19241fc3005..ce1e7cb8d51 100644 --- a/drivers/regulator/tps6586x-regulator.c +++ b/drivers/regulator/tps6586x-regulator.c @@ -57,9 +57,6 @@ struct tps6586x_regulator { struct regulator_desc desc; - int volt_reg; - int volt_shift; - int volt_nbits; int enable_bit[2]; int enable_reg[2]; @@ -81,10 +78,10 @@ static int tps6586x_set_voltage_sel(struct regulator_dev *rdev, int ret, val, rid = rdev_get_id(rdev); uint8_t mask; - val = selector << ri->volt_shift; - mask = ((1 << ri->volt_nbits) - 1) << ri->volt_shift; + val = selector << (ffs(rdev->desc->vsel_mask) - 1); + mask = rdev->desc->vsel_mask; - ret = tps6586x_update(parent, ri->volt_reg, val, mask); + ret = tps6586x_update(parent, rdev->desc->vsel_reg, val, mask); if (ret) return ret; @@ -100,66 +97,17 @@ static int tps6586x_set_voltage_sel(struct regulator_dev *rdev, return ret; } -static int tps6586x_get_voltage_sel(struct regulator_dev *rdev) -{ - struct tps6586x_regulator *ri = rdev_get_drvdata(rdev); - struct device *parent = to_tps6586x_dev(rdev); - uint8_t val, mask; - int ret; - - ret = tps6586x_read(parent, ri->volt_reg, &val); - if (ret) - return ret; - - mask = ((1 << ri->volt_nbits) - 1) << ri->volt_shift; - val = (val & mask) >> ri->volt_shift; - - if (val >= ri->desc.n_voltages) - BUG(); - - return val; -} - -static int tps6586x_regulator_enable(struct regulator_dev *rdev) -{ - struct tps6586x_regulator *ri = rdev_get_drvdata(rdev); - struct device *parent = to_tps6586x_dev(rdev); - - return tps6586x_set_bits(parent, ri->enable_reg[0], - 1 << ri->enable_bit[0]); -} - -static int tps6586x_regulator_disable(struct regulator_dev *rdev) -{ - struct tps6586x_regulator *ri = rdev_get_drvdata(rdev); - struct device *parent = to_tps6586x_dev(rdev); - - return tps6586x_clr_bits(parent, ri->enable_reg[0], - 1 << ri->enable_bit[0]); -} - -static int tps6586x_regulator_is_enabled(struct regulator_dev *rdev) -{ - struct tps6586x_regulator *ri = rdev_get_drvdata(rdev); - struct device *parent = to_tps6586x_dev(rdev); - uint8_t reg_val; - int ret; - - ret = tps6586x_read(parent, ri->enable_reg[0], ®_val); - if (ret) - return ret; - - return !!(reg_val & (1 << ri->enable_bit[0])); -} - static struct regulator_ops tps6586x_regulator_ops = { .list_voltage = regulator_list_voltage_table, - .get_voltage_sel = tps6586x_get_voltage_sel, + .get_voltage_sel = regulator_get_voltage_sel_regmap, .set_voltage_sel = tps6586x_set_voltage_sel, - .is_enabled = tps6586x_regulator_is_enabled, - .enable = tps6586x_regulator_enable, - .disable = tps6586x_regulator_disable, + .is_enabled = regulator_is_enabled_regmap, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, +}; + +static struct regulator_ops tps6586x_sys_regulator_ops = { }; static const unsigned int tps6586x_ldo0_voltages[] = { @@ -202,10 +150,11 @@ static const unsigned int tps6586x_dvm_voltages[] = { .n_voltages = ARRAY_SIZE(tps6586x_##vdata##_voltages), \ .volt_table = tps6586x_##vdata##_voltages, \ .owner = THIS_MODULE, \ + .enable_reg = TPS6586X_SUPPLY##ereg0, \ + .enable_mask = 1 << (ebit0), \ + .vsel_reg = TPS6586X_##vreg, \ + .vsel_mask = ((1 << (nbits)) - 1) << (shift), \ }, \ - .volt_reg = TPS6586X_##vreg, \ - .volt_shift = (shift), \ - .volt_nbits = (nbits), \ .enable_reg[0] = TPS6586X_SUPPLY##ereg0, \ .enable_bit[0] = (ebit0), \ .enable_reg[1] = TPS6586X_SUPPLY##ereg1, \ @@ -230,15 +179,28 @@ static const unsigned int tps6586x_dvm_voltages[] = { TPS6586X_REGULATOR_DVM_GOREG(goreg, gobit) \ } +#define TPS6586X_SYS_REGULATOR() \ +{ \ + .desc = { \ + .supply_name = "sys", \ + .name = "REG-SYS", \ + .ops = &tps6586x_sys_regulator_ops, \ + .type = REGULATOR_VOLTAGE, \ + .id = TPS6586X_ID_SYS, \ + .owner = THIS_MODULE, \ + }, \ +} + static struct tps6586x_regulator tps6586x_regulator[] = { + TPS6586X_SYS_REGULATOR(), TPS6586X_LDO(LDO_0, "vinldo01", ldo0, SUPPLYV1, 5, 3, ENC, 0, END, 0), TPS6586X_LDO(LDO_3, "vinldo23", ldo, SUPPLYV4, 0, 3, ENC, 2, END, 2), - TPS6586X_LDO(LDO_5, NULL, ldo, SUPPLYV6, 0, 3, ENE, 6, ENE, 6), + TPS6586X_LDO(LDO_5, "REG-SYS", ldo, SUPPLYV6, 0, 3, ENE, 6, ENE, 6), TPS6586X_LDO(LDO_6, "vinldo678", ldo, SUPPLYV3, 0, 3, ENC, 4, END, 4), TPS6586X_LDO(LDO_7, "vinldo678", ldo, SUPPLYV3, 3, 3, ENC, 5, END, 5), TPS6586X_LDO(LDO_8, "vinldo678", ldo, SUPPLYV2, 5, 3, ENC, 6, END, 6), TPS6586X_LDO(LDO_9, "vinldo9", ldo, SUPPLYV6, 3, 3, ENE, 7, ENE, 7), - TPS6586X_LDO(LDO_RTC, NULL, ldo, SUPPLYV4, 3, 3, V4, 7, V4, 7), + TPS6586X_LDO(LDO_RTC, "REG-SYS", ldo, SUPPLYV4, 3, 3, V4, 7, V4, 7), TPS6586X_LDO(LDO_1, "vinldo01", dvm, SUPPLYV1, 0, 5, ENC, 1, END, 1), TPS6586X_LDO(SM_2, "vin-sm2", sm2, SUPPLYV2, 0, 5, ENC, 7, END, 7), diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c index 77a71a5c17c..7eb986a4074 100644 --- a/drivers/regulator/twl-regulator.c +++ b/drivers/regulator/twl-regulator.c @@ -10,6 +10,8 @@ */ #include <linux/module.h> +#include <linux/string.h> +#include <linux/slab.h> #include <linux/init.h> #include <linux/err.h> #include <linux/platform_device.h> @@ -624,18 +626,9 @@ static int twlfixed_list_voltage(struct regulator_dev *rdev, unsigned index) return info->min_mV * 1000; } -static int twlfixed_get_voltage(struct regulator_dev *rdev) -{ - struct twlreg_info *info = rdev_get_drvdata(rdev); - - return info->min_mV * 1000; -} - static struct regulator_ops twl4030fixed_ops = { .list_voltage = twlfixed_list_voltage, - .get_voltage = twlfixed_get_voltage, - .enable = twl4030reg_enable, .disable = twl4030reg_disable, .is_enabled = twl4030reg_is_enabled, @@ -648,8 +641,6 @@ static struct regulator_ops twl4030fixed_ops = { static struct regulator_ops twl6030fixed_ops = { .list_voltage = twlfixed_list_voltage, - .get_voltage = twlfixed_get_voltage, - .enable = twl6030reg_enable, .disable = twl6030reg_disable, .is_enabled = twl6030reg_is_enabled, @@ -659,13 +650,6 @@ static struct regulator_ops twl6030fixed_ops = { .get_status = twl6030reg_get_status, }; -static struct regulator_ops twl6030_fixed_resource = { - .enable = twl6030reg_enable, - .disable = twl6030reg_disable, - .is_enabled = twl6030reg_is_enabled, - .get_status = twl6030reg_get_status, -}; - /* * SMPS status and control */ @@ -757,37 +741,32 @@ static int twl6030smps_list_voltage(struct regulator_dev *rdev, unsigned index) return voltage; } -static int -twl6030smps_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV, - unsigned int *selector) +static int twl6030smps_map_voltage(struct regulator_dev *rdev, int min_uV, + int max_uV) { - struct twlreg_info *info = rdev_get_drvdata(rdev); - int vsel = 0; + struct twlreg_info *info = rdev_get_drvdata(rdev); + int vsel = 0; switch (info->flags) { case 0: if (min_uV == 0) vsel = 0; else if ((min_uV >= 600000) && (min_uV <= 1300000)) { - int calc_uV; vsel = DIV_ROUND_UP(min_uV - 600000, 12500); vsel++; - calc_uV = twl6030smps_list_voltage(rdev, vsel); - if (calc_uV > max_uV) - return -EINVAL; } /* Values 1..57 for vsel are linear and can be calculated * values 58..62 are non linear. */ - else if ((min_uV > 1900000) && (max_uV >= 2100000)) + else if ((min_uV > 1900000) && (min_uV <= 2100000)) vsel = 62; - else if ((min_uV > 1800000) && (max_uV >= 1900000)) + else if ((min_uV > 1800000) && (min_uV <= 1900000)) vsel = 61; - else if ((min_uV > 1500000) && (max_uV >= 1800000)) + else if ((min_uV > 1500000) && (min_uV <= 1800000)) vsel = 60; - else if ((min_uV > 1350000) && (max_uV >= 1500000)) + else if ((min_uV > 1350000) && (min_uV <= 1500000)) vsel = 59; - else if ((min_uV > 1300000) && (max_uV >= 1350000)) + else if ((min_uV > 1300000) && (min_uV <= 1350000)) vsel = 58; else return -EINVAL; @@ -796,25 +775,21 @@ twl6030smps_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV, if (min_uV == 0) vsel = 0; else if ((min_uV >= 700000) && (min_uV <= 1420000)) { - int calc_uV; vsel = DIV_ROUND_UP(min_uV - 700000, 12500); vsel++; - calc_uV = twl6030smps_list_voltage(rdev, vsel); - if (calc_uV > max_uV) - return -EINVAL; } /* Values 1..57 for vsel are linear and can be calculated * values 58..62 are non linear. */ - else if ((min_uV > 1900000) && (max_uV >= 2100000)) + else if ((min_uV > 1900000) && (min_uV <= 2100000)) vsel = 62; - else if ((min_uV > 1800000) && (max_uV >= 1900000)) + else if ((min_uV > 1800000) && (min_uV <= 1900000)) vsel = 61; - else if ((min_uV > 1350000) && (max_uV >= 1800000)) + else if ((min_uV > 1350000) && (min_uV <= 1800000)) vsel = 60; - else if ((min_uV > 1350000) && (max_uV >= 1500000)) + else if ((min_uV > 1350000) && (min_uV <= 1500000)) vsel = 59; - else if ((min_uV > 1300000) && (max_uV >= 1350000)) + else if ((min_uV > 1300000) && (min_uV <= 1350000)) vsel = 58; else return -EINVAL; @@ -830,17 +805,23 @@ twl6030smps_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV, case SMPS_OFFSET_EN|SMPS_EXTENDED_EN: if (min_uV == 0) { vsel = 0; - } else if ((min_uV >= 2161000) && (max_uV <= 4321000)) { + } else if ((min_uV >= 2161000) && (min_uV <= 4321000)) { vsel = DIV_ROUND_UP(min_uV - 2161000, 38600); vsel++; } break; } - *selector = vsel; + return vsel; +} + +static int twl6030smps_set_voltage_sel(struct regulator_dev *rdev, + unsigned int selector) +{ + struct twlreg_info *info = rdev_get_drvdata(rdev); return twlreg_write(info, TWL_MODULE_PM_RECEIVER, VREG_VOLTAGE_SMPS, - vsel); + selector); } static int twl6030smps_get_voltage_sel(struct regulator_dev *rdev) @@ -852,8 +833,9 @@ static int twl6030smps_get_voltage_sel(struct regulator_dev *rdev) static struct regulator_ops twlsmps_ops = { .list_voltage = twl6030smps_list_voltage, + .map_voltage = twl6030smps_map_voltage, - .set_voltage = twl6030smps_set_voltage, + .set_voltage_sel = twl6030smps_set_voltage_sel, .get_voltage_sel = twl6030smps_get_voltage_sel, .enable = twl6030reg_enable, @@ -876,7 +858,7 @@ static struct regulator_ops twlsmps_ops = { 0x0, TWL6030, twl6030fixed_ops) #define TWL4030_ADJUSTABLE_LDO(label, offset, num, turnon_delay, remap_conf) \ -static struct twlreg_info TWL4030_INFO_##label = { \ +static const struct twlreg_info TWL4030_INFO_##label = { \ .base = offset, \ .id = num, \ .table_len = ARRAY_SIZE(label##_VSEL_table), \ @@ -894,7 +876,7 @@ static struct twlreg_info TWL4030_INFO_##label = { \ } #define TWL4030_ADJUSTABLE_SMPS(label, offset, num, turnon_delay, remap_conf) \ -static struct twlreg_info TWL4030_INFO_##label = { \ +static const struct twlreg_info TWL4030_INFO_##label = { \ .base = offset, \ .id = num, \ .remap = remap_conf, \ @@ -909,7 +891,7 @@ static struct twlreg_info TWL4030_INFO_##label = { \ } #define TWL6030_ADJUSTABLE_SMPS(label) \ -static struct twlreg_info TWL6030_INFO_##label = { \ +static const struct twlreg_info TWL6030_INFO_##label = { \ .desc = { \ .name = #label, \ .id = TWL6030_REG_##label, \ @@ -920,7 +902,7 @@ static struct twlreg_info TWL6030_INFO_##label = { \ } #define TWL6030_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts) \ -static struct twlreg_info TWL6030_INFO_##label = { \ +static const struct twlreg_info TWL6030_INFO_##label = { \ .base = offset, \ .min_mV = min_mVolts, \ .max_mV = max_mVolts, \ @@ -935,7 +917,7 @@ static struct twlreg_info TWL6030_INFO_##label = { \ } #define TWL6025_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts) \ -static struct twlreg_info TWL6025_INFO_##label = { \ +static const struct twlreg_info TWL6025_INFO_##label = { \ .base = offset, \ .min_mV = min_mVolts, \ .max_mV = max_mVolts, \ @@ -951,7 +933,7 @@ static struct twlreg_info TWL6025_INFO_##label = { \ #define TWL_FIXED_LDO(label, offset, mVolts, num, turnon_delay, remap_conf, \ family, operations) \ -static struct twlreg_info TWLFIXED_INFO_##label = { \ +static const struct twlreg_info TWLFIXED_INFO_##label = { \ .base = offset, \ .id = num, \ .min_mV = mVolts, \ @@ -981,7 +963,7 @@ static struct twlreg_info TWLRES_INFO_##label = { \ } #define TWL6025_ADJUSTABLE_SMPS(label, offset) \ -static struct twlreg_info TWLSMPS_INFO_##label = { \ +static const struct twlreg_info TWLSMPS_INFO_##label = { \ .base = offset, \ .min_mV = 600, \ .max_mV = 2100, \ @@ -1138,6 +1120,7 @@ static int __devinit twlreg_probe(struct platform_device *pdev) { int i, id; struct twlreg_info *info; + const struct twlreg_info *template; struct regulator_init_data *initdata; struct regulation_constraints *c; struct regulator_dev *rdev; @@ -1147,17 +1130,17 @@ static int __devinit twlreg_probe(struct platform_device *pdev) match = of_match_device(twl_of_match, &pdev->dev); if (match) { - info = match->data; - id = info->desc.id; + template = match->data; + id = template->desc.id; initdata = of_get_regulator_init_data(&pdev->dev, pdev->dev.of_node); drvdata = NULL; } else { id = pdev->id; initdata = pdev->dev.platform_data; - for (i = 0, info = NULL; i < ARRAY_SIZE(twl_of_match); i++) { - info = twl_of_match[i].data; - if (info && info->desc.id == id) + for (i = 0, template = NULL; i < ARRAY_SIZE(twl_of_match); i++) { + template = twl_of_match[i].data; + if (template && template->desc.id == id) break; } if (i == ARRAY_SIZE(twl_of_match)) @@ -1168,12 +1151,16 @@ static int __devinit twlreg_probe(struct platform_device *pdev) return -EINVAL; } - if (!info) + if (!template) return -ENODEV; if (!initdata) return -EINVAL; + info = kmemdup(template, sizeof (*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + if (drvdata) { /* copy the driver data into regulator data */ info->features = drvdata->features; @@ -1234,6 +1221,7 @@ static int __devinit twlreg_probe(struct platform_device *pdev) if (IS_ERR(rdev)) { dev_err(&pdev->dev, "can't register %s, %ld\n", info->desc.name, PTR_ERR(rdev)); + kfree(info); return PTR_ERR(rdev); } platform_set_drvdata(pdev, rdev); @@ -1255,7 +1243,11 @@ static int __devinit twlreg_probe(struct platform_device *pdev) static int __devexit twlreg_remove(struct platform_device *pdev) { - regulator_unregister(platform_get_drvdata(pdev)); + struct regulator_dev *rdev = platform_get_drvdata(pdev); + struct twlreg_info *info = rdev->reg_data; + + regulator_unregister(rdev); + kfree(info); return 0; } diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c index 7413885be01..90cbcc68370 100644 --- a/drivers/regulator/wm831x-dcdc.c +++ b/drivers/regulator/wm831x-dcdc.c @@ -339,16 +339,15 @@ static int wm831x_buckv_set_current_limit(struct regulator_dev *rdev, u16 reg = dcdc->base + WM831X_DCDC_CONTROL_2; int i; - for (i = 0; i < ARRAY_SIZE(wm831x_dcdc_ilim); i++) { + for (i = ARRAY_SIZE(wm831x_dcdc_ilim) - 1; i >= 0; i--) { if ((min_uA <= wm831x_dcdc_ilim[i]) && (wm831x_dcdc_ilim[i] <= max_uA)) - break; + return wm831x_set_bits(wm831x, reg, + WM831X_DC1_HC_THR_MASK, + i << WM831X_DC1_HC_THR_SHIFT); } - if (i == ARRAY_SIZE(wm831x_dcdc_ilim)) - return -EINVAL; - return wm831x_set_bits(wm831x, reg, WM831X_DC1_HC_THR_MASK, - i << WM831X_DC1_HC_THR_SHIFT); + return -EINVAL; } static int wm831x_buckv_get_current_limit(struct regulator_dev *rdev) diff --git a/drivers/regulator/wm831x-ldo.c b/drivers/regulator/wm831x-ldo.c index 5cb70ca1e98..9af512672be 100644 --- a/drivers/regulator/wm831x-ldo.c +++ b/drivers/regulator/wm831x-ldo.c @@ -205,6 +205,8 @@ static int wm831x_gp_ldo_get_status(struct regulator_dev *rdev) /* Is it reporting under voltage? */ ret = wm831x_reg_read(wm831x, WM831X_LDO_UV_STATUS); + if (ret < 0) + return ret; if (ret & mask) return REGULATOR_STATUS_ERROR; @@ -237,6 +239,8 @@ static struct regulator_ops wm831x_gp_ldo_ops = { .set_mode = wm831x_gp_ldo_set_mode, .get_status = wm831x_gp_ldo_get_status, .get_optimum_mode = wm831x_gp_ldo_get_optimum_mode, + .get_bypass = regulator_get_bypass_regmap, + .set_bypass = regulator_set_bypass_regmap, .is_enabled = regulator_is_enabled_regmap, .enable = regulator_enable_regmap, @@ -293,6 +297,8 @@ static __devinit int wm831x_gp_ldo_probe(struct platform_device *pdev) ldo->desc.vsel_mask = WM831X_LDO1_ON_VSEL_MASK; ldo->desc.enable_reg = WM831X_LDO_ENABLE; ldo->desc.enable_mask = 1 << id; + ldo->desc.bypass_reg = ldo->base; + ldo->desc.bypass_mask = WM831X_LDO1_SWI; config.dev = pdev->dev.parent; if (pdata) @@ -469,6 +475,8 @@ static int wm831x_aldo_get_status(struct regulator_dev *rdev) /* Is it reporting under voltage? */ ret = wm831x_reg_read(wm831x, WM831X_LDO_UV_STATUS); + if (ret < 0) + return ret; if (ret & mask) return REGULATOR_STATUS_ERROR; @@ -488,6 +496,8 @@ static struct regulator_ops wm831x_aldo_ops = { .get_mode = wm831x_aldo_get_mode, .set_mode = wm831x_aldo_set_mode, .get_status = wm831x_aldo_get_status, + .set_bypass = regulator_set_bypass_regmap, + .get_bypass = regulator_get_bypass_regmap, .is_enabled = regulator_is_enabled_regmap, .enable = regulator_enable_regmap, @@ -544,6 +554,8 @@ static __devinit int wm831x_aldo_probe(struct platform_device *pdev) ldo->desc.vsel_mask = WM831X_LDO7_ON_VSEL_MASK; ldo->desc.enable_reg = WM831X_LDO_ENABLE; ldo->desc.enable_mask = 1 << id; + ldo->desc.bypass_reg = ldo->base; + ldo->desc.bypass_mask = WM831X_LDO7_SWI; config.dev = pdev->dev.parent; if (pdata) diff --git a/drivers/regulator/wm8400-regulator.c b/drivers/regulator/wm8400-regulator.c index 9035dd05361..27c746ef063 100644 --- a/drivers/regulator/wm8400-regulator.c +++ b/drivers/regulator/wm8400-regulator.c @@ -120,13 +120,8 @@ static int wm8400_dcdc_set_mode(struct regulator_dev *dev, unsigned int mode) case REGULATOR_MODE_IDLE: /* Datasheet: standby */ - ret = wm8400_set_bits(wm8400, WM8400_DCDC1_CONTROL_1 + offset, - WM8400_DC1_ACTIVE, 0); - if (ret != 0) - return ret; return wm8400_set_bits(wm8400, WM8400_DCDC1_CONTROL_1 + offset, - WM8400_DC1_SLEEP, 0); - + WM8400_DC1_ACTIVE | WM8400_DC1_SLEEP, 0); default: return -EINVAL; } diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 590cfafc7c1..1859f71372e 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -1008,8 +1008,8 @@ static int rpmsg_probe(struct virtio_device *vdev) return 0; free_coherent: - dma_free_coherent(vdev->dev.parent, RPMSG_TOTAL_BUF_SPACE, bufs_va, - vrp->bufs_dma); + dma_free_coherent(vdev->dev.parent->parent, RPMSG_TOTAL_BUF_SPACE, + bufs_va, vrp->bufs_dma); vqs_del: vdev->config->del_vqs(vrp->vdev); free_vrp: @@ -1043,7 +1043,7 @@ static void __devexit rpmsg_remove(struct virtio_device *vdev) vdev->config->del_vqs(vrp->vdev); - dma_free_coherent(vdev->dev.parent, RPMSG_TOTAL_BUF_SPACE, + dma_free_coherent(vdev->dev.parent->parent, RPMSG_TOTAL_BUF_SPACE, vrp->rbufs, vrp->bufs_dma); kfree(vrp); diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 15370a2c5ff..0595c763daf 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -534,11 +534,11 @@ static void dasd_change_state(struct dasd_device *device) if (rc) device->target = device->state; - if (device->state == device->target) - wake_up(&dasd_init_waitq); - /* let user-space know that the device status changed */ kobject_uevent(&device->cdev->dev.kobj, KOBJ_CHANGE); + + if (device->state == device->target) + wake_up(&dasd_init_waitq); } /* @@ -2157,6 +2157,7 @@ static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible) test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) && (!dasd_eer_enabled(device))) { cqr->status = DASD_CQR_FAILED; + cqr->intrc = -EAGAIN; continue; } /* Don't try to start requests if device is stopped */ @@ -3270,6 +3271,16 @@ void dasd_generic_path_event(struct ccw_device *cdev, int *path_event) dasd_schedule_device_bh(device); } if (path_event[chp] & PE_PATHGROUP_ESTABLISHED) { + if (!(device->path_data.opm & eventlpm) && + !(device->path_data.tbvpm & eventlpm)) { + /* + * we can not establish a pathgroup on an + * unavailable path, so trigger a path + * verification first + */ + device->path_data.tbvpm |= eventlpm; + dasd_schedule_device_bh(device); + } DBF_DEV_EVENT(DBF_WARNING, device, "%s", "Pathgroup re-established\n"); if (device->discipline->kick_validate) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 157defe5e06..6b556995bb3 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -384,6 +384,29 @@ static void _remove_device_from_lcu(struct alias_lcu *lcu, group->next = NULL; }; +static int +suborder_not_supported(struct dasd_ccw_req *cqr) +{ + char *sense; + char reason; + char msg_format; + char msg_no; + + sense = dasd_get_sense(&cqr->irb); + if (!sense) + return 0; + + reason = sense[0]; + msg_format = (sense[7] & 0xF0); + msg_no = (sense[7] & 0x0F); + + /* command reject, Format 0 MSG 4 - invalid parameter */ + if ((reason == 0x80) && (msg_format == 0x00) && (msg_no == 0x04)) + return 1; + + return 0; +} + static int read_unit_address_configuration(struct dasd_device *device, struct alias_lcu *lcu) { @@ -435,6 +458,8 @@ static int read_unit_address_configuration(struct dasd_device *device, do { rc = dasd_sleep_on(cqr); + if (rc && suborder_not_supported(cqr)) + return -EOPNOTSUPP; } while (rc && (cqr->retries > 0)); if (rc) { spin_lock_irqsave(&lcu->lock, flags); @@ -521,7 +546,7 @@ static void lcu_update_work(struct work_struct *work) * processing the data */ spin_lock_irqsave(&lcu->lock, flags); - if (rc || (lcu->flags & NEED_UAC_UPDATE)) { + if ((rc && (rc != -EOPNOTSUPP)) || (lcu->flags & NEED_UAC_UPDATE)) { DBF_DEV_EVENT(DBF_WARNING, device, "could not update" " alias data in lcu (rc = %d), retry later", rc); schedule_delayed_work(&lcu->ruac_data.dwork, 30*HZ); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 2fb2b9ea97e..c48c72abbef 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1507,7 +1507,8 @@ static struct dasd_ccw_req *dasd_eckd_build_psf_ssc(struct dasd_device *device, * call might change behaviour of DASD devices. */ static int -dasd_eckd_psf_ssc(struct dasd_device *device, int enable_pav) +dasd_eckd_psf_ssc(struct dasd_device *device, int enable_pav, + unsigned long flags) { struct dasd_ccw_req *cqr; int rc; @@ -1516,10 +1517,19 @@ dasd_eckd_psf_ssc(struct dasd_device *device, int enable_pav) if (IS_ERR(cqr)) return PTR_ERR(cqr); + /* + * set flags e.g. turn on failfast, to prevent blocking + * the calling function should handle failed requests + */ + cqr->flags |= flags; + rc = dasd_sleep_on(cqr); if (!rc) /* trigger CIO to reprobe devices */ css_schedule_reprobe(); + else if (cqr->intrc == -EAGAIN) + rc = -EAGAIN; + dasd_sfree_request(cqr, cqr->memdev); return rc; } @@ -1527,7 +1537,8 @@ dasd_eckd_psf_ssc(struct dasd_device *device, int enable_pav) /* * Valide storage server of current device. */ -static void dasd_eckd_validate_server(struct dasd_device *device) +static int dasd_eckd_validate_server(struct dasd_device *device, + unsigned long flags) { int rc; struct dasd_eckd_private *private; @@ -1536,17 +1547,18 @@ static void dasd_eckd_validate_server(struct dasd_device *device) private = (struct dasd_eckd_private *) device->private; if (private->uid.type == UA_BASE_PAV_ALIAS || private->uid.type == UA_HYPER_PAV_ALIAS) - return; + return 0; if (dasd_nopav || MACHINE_IS_VM) enable_pav = 0; else enable_pav = 1; - rc = dasd_eckd_psf_ssc(device, enable_pav); + rc = dasd_eckd_psf_ssc(device, enable_pav, flags); /* may be requested feature is not available on server, * therefore just report error and go ahead */ DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "PSF-SSC for SSID %04x " "returned rc=%d", private->uid.ssid, rc); + return rc; } /* @@ -1556,7 +1568,13 @@ static void dasd_eckd_do_validate_server(struct work_struct *work) { struct dasd_device *device = container_of(work, struct dasd_device, kick_validate); - dasd_eckd_validate_server(device); + if (dasd_eckd_validate_server(device, DASD_CQR_FLAGS_FAILFAST) + == -EAGAIN) { + /* schedule worker again if failed */ + schedule_work(&device->kick_validate); + return; + } + dasd_put_device(device); } @@ -1685,7 +1703,7 @@ dasd_eckd_check_characteristics(struct dasd_device *device) if (rc) goto out_err2; - dasd_eckd_validate_server(device); + dasd_eckd_validate_server(device, 0); /* device may report different configuration data after LCU setup */ rc = dasd_eckd_read_conf(device); @@ -4153,7 +4171,7 @@ static int dasd_eckd_restore_device(struct dasd_device *device) rc = dasd_alias_make_device_known_to_lcu(device); if (rc) return rc; - dasd_eckd_validate_server(device); + dasd_eckd_validate_server(device, DASD_CQR_FLAGS_FAILFAST); /* RE-Read Configuration Data */ rc = dasd_eckd_read_conf(device); diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index ed25c8740a9..fc916f5d731 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1426,6 +1426,8 @@ static enum io_sch_action sch_get_action(struct subchannel *sch) return IO_SCH_REPROBE; if (cdev->online) return IO_SCH_VERIFY; + if (cdev->private->state == DEV_STATE_NOT_OPER) + return IO_SCH_UNREG_ATTACH; return IO_SCH_NOP; } @@ -1519,11 +1521,14 @@ static int io_subchannel_sch_event(struct subchannel *sch, int process) goto out; break; case IO_SCH_UNREG_ATTACH: + spin_lock_irqsave(sch->lock, flags); if (cdev->private->flags.resuming) { /* Device will be handled later. */ rc = 0; - goto out; + goto out_unlock; } + sch_set_cdev(sch, NULL); + spin_unlock_irqrestore(sch->lock, flags); /* Unregister ccw device. */ ccw_device_unregister(cdev); break; diff --git a/drivers/scsi/aic7xxx/aic79xx_core.c b/drivers/scsi/aic7xxx/aic79xx_core.c index 25417d0e7ac..0bcacf71aef 100644 --- a/drivers/scsi/aic7xxx/aic79xx_core.c +++ b/drivers/scsi/aic7xxx/aic79xx_core.c @@ -2888,7 +2888,7 @@ ahd_handle_lqiphase_error(struct ahd_softc *ahd, u_int lqistat1) ahd_outb(ahd, CLRINT, CLRSCSIINT); ahd_unpause(ahd); } else { - printk("Reseting Channel for LQI Phase error\n"); + printk("Resetting Channel for LQI Phase error\n"); ahd_dump_card_state(ahd); ahd_reset_channel(ahd, 'A', /*Initiate Reset*/TRUE); } diff --git a/drivers/scsi/bfa/bfa_ioc.c b/drivers/scsi/bfa/bfa_ioc.c index 8cdb79c2fcd..21ad2902e5c 100644 --- a/drivers/scsi/bfa/bfa_ioc.c +++ b/drivers/scsi/bfa/bfa_ioc.c @@ -5587,7 +5587,7 @@ static bfa_status_t bfa_dconf_flash_write(struct bfa_dconf_mod_s *dconf); static void bfa_dconf_init_cb(void *arg, bfa_status_t status); /* - * Begining state of dconf module. Waiting for an event to start. + * Beginning state of dconf module. Waiting for an event to start. */ static void bfa_dconf_sm_uninit(struct bfa_dconf_mod_s *dconf, enum bfa_dconf_event event) diff --git a/drivers/scsi/bfa/bfa_ioc.h b/drivers/scsi/bfa/bfa_ioc.h index 1a99d4b5b50..7b916e04ca5 100644 --- a/drivers/scsi/bfa/bfa_ioc.h +++ b/drivers/scsi/bfa/bfa_ioc.h @@ -530,7 +530,7 @@ struct bfa_diag_results_fwping { struct bfa_diag_qtest_result_s { u32 status; - u16 count; /* sucessful queue test count */ + u16 count; /* successful queue test count */ u8 queue; u8 rsvd; /* 64-bit align */ }; diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index ae1cb7639d9..e0558656c64 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -908,7 +908,7 @@ static void bnx2fc_indicate_netevent(void *context, unsigned long event, return; default: - printk(KERN_ERR PFX "Unkonwn netevent %ld", event); + printk(KERN_ERR PFX "Unknown netevent %ld", event); return; } @@ -1738,7 +1738,7 @@ static int bnx2fc_ulp_get_stats(void *handle) /** * bnx2fc_ulp_start - cnic callback to initialize & start adapter instance * - * @handle: transport handle pointing to adapter struture + * @handle: transport handle pointing to adapter structure * * This function maps adapter structure to pcidev structure and initiates * firmware handshake to enable/initialize on-chip FCoE components. diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index 33d6630529d..91eec60252e 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -1264,6 +1264,9 @@ int bnx2i_send_fw_iscsi_init_msg(struct bnx2i_hba *hba) int rc = 0; u64 mask64; + memset(&iscsi_init, 0x00, sizeof(struct iscsi_kwqe_init1)); + memset(&iscsi_init2, 0x00, sizeof(struct iscsi_kwqe_init2)); + bnx2i_adjust_qp_size(hba); iscsi_init.flags = diff --git a/drivers/scsi/gdth.h b/drivers/scsi/gdth.h index d3e4d7c6f57..fbf6f0f4b0d 100644 --- a/drivers/scsi/gdth.h +++ b/drivers/scsi/gdth.h @@ -49,15 +49,6 @@ /* GDT_ISA */ #define GDT2_ID 0x0120941c /* GDT2000/2020 */ -/* vendor ID, device IDs (PCI) */ -/* these defines should already exist in <linux/pci.h> */ -#ifndef PCI_VENDOR_ID_VORTEX -#define PCI_VENDOR_ID_VORTEX 0x1119 /* PCI controller vendor ID */ -#endif -#ifndef PCI_VENDOR_ID_INTEL -#define PCI_VENDOR_ID_INTEL 0x8086 -#endif - #ifndef PCI_DEVICE_ID_VORTEX_GDT60x0 /* GDT_PCI */ #define PCI_DEVICE_ID_VORTEX_GDT60x0 0 /* GDT6000/6020/6050 */ diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 796482badf1..2b4261cb774 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -1315,8 +1315,9 @@ static void complete_scsi_command(struct CommandList *cp) } break; case CMD_PROTOCOL_ERR: + cmd->result = DID_ERROR << 16; dev_warn(&h->pdev->dev, "cp %p has " - "protocol error \n", cp); + "protocol error\n", cp); break; case CMD_HARDWARE_ERR: cmd->result = DID_ERROR << 16; diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 467dc38246f..1059c99690e 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -192,7 +192,7 @@ static const struct ipr_chip_t ipr_chip[] = { { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, IPR_USE_MSI, IPR_SIS64, IPR_MMIO, &ipr_chip_cfg[2] } }; -static int ipr_max_bus_speeds [] = { +static int ipr_max_bus_speeds[] = { IPR_80MBs_SCSI_RATE, IPR_U160_SCSI_RATE, IPR_U320_SCSI_RATE }; @@ -562,7 +562,7 @@ static void ipr_trc_hook(struct ipr_cmnd *ipr_cmd, trace_entry->u.add_data = add_data; } #else -#define ipr_trc_hook(ipr_cmd, type, add_data) do { } while(0) +#define ipr_trc_hook(ipr_cmd, type, add_data) do { } while (0) #endif /** @@ -1002,7 +1002,7 @@ static void ipr_send_hcam(struct ipr_ioa_cfg *ioa_cfg, u8 type, **/ static void ipr_update_ata_class(struct ipr_resource_entry *res, unsigned int proto) { - switch(proto) { + switch (proto) { case IPR_PROTO_SATA: case IPR_PROTO_SAS_STP: res->ata_class = ATA_DEV_ATA; @@ -3043,7 +3043,7 @@ static void ipr_get_ioa_dump(struct ipr_ioa_cfg *ioa_cfg, struct ipr_dump *dump) } #else -#define ipr_get_ioa_dump(ioa_cfg, dump) do { } while(0) +#define ipr_get_ioa_dump(ioa_cfg, dump) do { } while (0) #endif /** @@ -3055,7 +3055,7 @@ static void ipr_get_ioa_dump(struct ipr_ioa_cfg *ioa_cfg, struct ipr_dump *dump) **/ static void ipr_release_dump(struct kref *kref) { - struct ipr_dump *dump = container_of(kref,struct ipr_dump,kref); + struct ipr_dump *dump = container_of(kref, struct ipr_dump, kref); struct ipr_ioa_cfg *ioa_cfg = dump->ioa_cfg; unsigned long lock_flags = 0; int i; @@ -3142,7 +3142,7 @@ restart: break; } } - } while(did_work); + } while (did_work); list_for_each_entry(res, &ioa_cfg->used_res_q, queue) { if (res->add_to_ml) { @@ -3268,7 +3268,7 @@ static ssize_t ipr_show_log_level(struct device *dev, * number of bytes printed to buffer **/ static ssize_t ipr_store_log_level(struct device *dev, - struct device_attribute *attr, + struct device_attribute *attr, const char *buf, size_t count) { struct Scsi_Host *shost = class_to_shost(dev); @@ -3315,7 +3315,7 @@ static ssize_t ipr_store_diagnostics(struct device *dev, return -EACCES; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - while(ioa_cfg->in_reset_reload) { + while (ioa_cfg->in_reset_reload) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); @@ -3682,7 +3682,7 @@ static int ipr_update_ioa_ucode(struct ipr_ioa_cfg *ioa_cfg, unsigned long lock_flags; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - while(ioa_cfg->in_reset_reload) { + while (ioa_cfg->in_reset_reload) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); @@ -3746,7 +3746,7 @@ static ssize_t ipr_store_update_fw(struct device *dev, len = snprintf(fname, 99, "%s", buf); fname[len-1] = '\0'; - if(request_firmware(&fw_entry, fname, &ioa_cfg->pdev->dev)) { + if (request_firmware(&fw_entry, fname, &ioa_cfg->pdev->dev)) { dev_err(&ioa_cfg->pdev->dev, "Firmware file %s not found\n", fname); return -EIO; } @@ -4612,7 +4612,7 @@ static int ipr_slave_alloc(struct scsi_device *sdev) * Return value: * SUCCESS / FAILED **/ -static int __ipr_eh_host_reset(struct scsi_cmnd * scsi_cmd) +static int __ipr_eh_host_reset(struct scsi_cmnd *scsi_cmd) { struct ipr_ioa_cfg *ioa_cfg; int rc; @@ -4634,7 +4634,7 @@ static int __ipr_eh_host_reset(struct scsi_cmnd * scsi_cmd) return rc; } -static int ipr_eh_host_reset(struct scsi_cmnd * cmd) +static int ipr_eh_host_reset(struct scsi_cmnd *cmd) { int rc; @@ -4701,7 +4701,7 @@ static int ipr_device_reset(struct ipr_ioa_cfg *ioa_cfg, } LEAVE; - return (IPR_IOASC_SENSE_KEY(ioasc) ? -EIO : 0); + return IPR_IOASC_SENSE_KEY(ioasc) ? -EIO : 0; } /** @@ -4725,7 +4725,7 @@ static int ipr_sata_reset(struct ata_link *link, unsigned int *classes, ENTER; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - while(ioa_cfg->in_reset_reload) { + while (ioa_cfg->in_reset_reload) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); @@ -4753,7 +4753,7 @@ static int ipr_sata_reset(struct ata_link *link, unsigned int *classes, * Return value: * SUCCESS / FAILED **/ -static int __ipr_eh_dev_reset(struct scsi_cmnd * scsi_cmd) +static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd) { struct ipr_cmnd *ipr_cmd; struct ipr_ioa_cfg *ioa_cfg; @@ -4811,10 +4811,10 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd * scsi_cmd) res->resetting_device = 0; LEAVE; - return (rc ? FAILED : SUCCESS); + return rc ? FAILED : SUCCESS; } -static int ipr_eh_dev_reset(struct scsi_cmnd * cmd) +static int ipr_eh_dev_reset(struct scsi_cmnd *cmd) { int rc; @@ -4910,7 +4910,7 @@ static void ipr_abort_timeout(struct ipr_cmnd *ipr_cmd) * Return value: * SUCCESS / FAILED **/ -static int ipr_cancel_op(struct scsi_cmnd * scsi_cmd) +static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) { struct ipr_cmnd *ipr_cmd; struct ipr_ioa_cfg *ioa_cfg; @@ -4979,7 +4979,7 @@ static int ipr_cancel_op(struct scsi_cmnd * scsi_cmd) res->needs_sync_complete = 1; LEAVE; - return (IPR_IOASC_SENSE_KEY(ioasc) ? FAILED : SUCCESS); + return IPR_IOASC_SENSE_KEY(ioasc) ? FAILED : SUCCESS; } /** @@ -4989,7 +4989,7 @@ static int ipr_cancel_op(struct scsi_cmnd * scsi_cmd) * Return value: * SUCCESS / FAILED **/ -static int ipr_eh_abort(struct scsi_cmnd * scsi_cmd) +static int ipr_eh_abort(struct scsi_cmnd *scsi_cmd) { unsigned long flags; int rc; @@ -5907,7 +5907,7 @@ static int ipr_ioctl(struct scsi_device *sdev, int cmd, void __user *arg) * Return value: * pointer to buffer with description string **/ -static const char * ipr_ioa_info(struct Scsi_Host *host) +static const char *ipr_ioa_info(struct Scsi_Host *host) { static char buffer[512]; struct ipr_ioa_cfg *ioa_cfg; @@ -5965,7 +5965,7 @@ static void ipr_ata_phy_reset(struct ata_port *ap) ENTER; spin_lock_irqsave(ioa_cfg->host->host_lock, flags); - while(ioa_cfg->in_reset_reload) { + while (ioa_cfg->in_reset_reload) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); spin_lock_irqsave(ioa_cfg->host->host_lock, flags); @@ -6005,7 +6005,7 @@ static void ipr_ata_post_internal(struct ata_queued_cmd *qc) unsigned long flags; spin_lock_irqsave(ioa_cfg->host->host_lock, flags); - while(ioa_cfg->in_reset_reload) { + while (ioa_cfg->in_reset_reload) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); spin_lock_irqsave(ioa_cfg->host->host_lock, flags); @@ -6330,7 +6330,7 @@ static int ipr_invalid_adapter(struct ipr_ioa_cfg *ioa_cfg) int i; if ((ioa_cfg->type == 0x5702) && (ioa_cfg->pdev->revision < 4)) { - for (i = 0; i < ARRAY_SIZE(ipr_blocked_processors); i++){ + for (i = 0; i < ARRAY_SIZE(ipr_blocked_processors); i++) { if (__is_processor(ipr_blocked_processors[i])) return 1; } @@ -6608,7 +6608,7 @@ static void ipr_scsi_bus_speed_limit(struct ipr_ioa_cfg *ioa_cfg) * none **/ static void ipr_modify_ioafp_mode_page_28(struct ipr_ioa_cfg *ioa_cfg, - struct ipr_mode_pages *mode_pages) + struct ipr_mode_pages *mode_pages) { int i, entry_length; struct ipr_dev_bus_entry *bus; @@ -8022,7 +8022,7 @@ static void ipr_reset_ioa_job(struct ipr_cmnd *ipr_cmd) ipr_reinit_ipr_cmnd(ipr_cmd); ipr_cmd->job_step_failed = ipr_reset_cmd_failed; rc = ipr_cmd->job_step(ipr_cmd); - } while(rc == IPR_RC_JOB_CONTINUE); + } while (rc == IPR_RC_JOB_CONTINUE); } /** @@ -8283,7 +8283,7 @@ static void ipr_free_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) } if (ioa_cfg->ipr_cmd_pool) - pci_pool_destroy (ioa_cfg->ipr_cmd_pool); + pci_pool_destroy(ioa_cfg->ipr_cmd_pool); kfree(ioa_cfg->ipr_cmnd_list); kfree(ioa_cfg->ipr_cmnd_list_dma); @@ -8363,8 +8363,8 @@ static int __devinit ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) dma_addr_t dma_addr; int i; - ioa_cfg->ipr_cmd_pool = pci_pool_create (IPR_NAME, ioa_cfg->pdev, - sizeof(struct ipr_cmnd), 512, 0); + ioa_cfg->ipr_cmd_pool = pci_pool_create(IPR_NAME, ioa_cfg->pdev, + sizeof(struct ipr_cmnd), 512, 0); if (!ioa_cfg->ipr_cmd_pool) return -ENOMEM; @@ -8378,7 +8378,7 @@ static int __devinit ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) } for (i = 0; i < IPR_NUM_CMD_BLKS; i++) { - ipr_cmd = pci_pool_alloc (ioa_cfg->ipr_cmd_pool, GFP_KERNEL, &dma_addr); + ipr_cmd = pci_pool_alloc(ioa_cfg->ipr_cmd_pool, GFP_KERNEL, &dma_addr); if (!ipr_cmd) { ipr_free_cmd_blks(ioa_cfg); @@ -8964,7 +8964,7 @@ static void ipr_scan_vsets(struct ipr_ioa_cfg *ioa_cfg) int target, lun; for (target = 0; target < IPR_MAX_NUM_TARGETS_PER_BUS; target++) - for (lun = 0; lun < IPR_MAX_NUM_VSET_LUNS_PER_TARGET; lun++ ) + for (lun = 0; lun < IPR_MAX_NUM_VSET_LUNS_PER_TARGET; lun++) scsi_add_device(ioa_cfg->host, IPR_VSET_BUS, target, lun); } @@ -9010,7 +9010,7 @@ static void __ipr_remove(struct pci_dev *pdev) ENTER; spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags); - while(ioa_cfg->in_reset_reload) { + while (ioa_cfg->in_reset_reload) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags); @@ -9139,7 +9139,7 @@ static void ipr_shutdown(struct pci_dev *pdev) unsigned long lock_flags = 0; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - while(ioa_cfg->in_reset_reload) { + while (ioa_cfg->in_reset_reload) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); diff --git a/drivers/scsi/isci/host.c b/drivers/scsi/isci/host.c index 45385f53164..b334fdc1726 100644 --- a/drivers/scsi/isci/host.c +++ b/drivers/scsi/isci/host.c @@ -492,7 +492,7 @@ static void sci_controller_process_completions(struct isci_host *ihost) u32 event_cycle; dev_dbg(&ihost->pdev->dev, - "%s: completion queue begining get:0x%08x\n", + "%s: completion queue beginning get:0x%08x\n", __func__, ihost->completion_queue_get); diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c index 92c1d86d1fc..9be45a2b223 100644 --- a/drivers/scsi/isci/init.c +++ b/drivers/scsi/isci/init.c @@ -222,7 +222,7 @@ static struct sas_domain_function_template isci_transport_ops = { * @isci_host: This parameter specifies the lldd specific wrapper for the * libsas sas_ha struct. * - * This method returns an error code indicating sucess or failure. The user + * This method returns an error code indicating success or failure. The user * should check for possible memory allocation error return otherwise, a zero * indicates success. */ diff --git a/drivers/scsi/isci/port.c b/drivers/scsi/isci/port.c index 2fb85bf7544..13098b09a82 100644 --- a/drivers/scsi/isci/port.c +++ b/drivers/scsi/isci/port.c @@ -212,7 +212,7 @@ static void isci_port_link_up(struct isci_host *isci_host, memcpy(iphy->sas_phy.attached_sas_addr, iphy->frame_rcvd.iaf.sas_addr, SAS_ADDR_SIZE); } else { - dev_err(&isci_host->pdev->dev, "%s: unkown target\n", __func__); + dev_err(&isci_host->pdev->dev, "%s: unknown target\n", __func__); success = false; } diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index 7a0431c7349..c1bafc3f3fb 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -2240,7 +2240,7 @@ static enum sci_status atapi_data_tc_completion_handler(struct isci_request *ire status = ireq->sci_status; sci_change_state(&idev->sm, SCI_STP_DEV_ATAPI_ERROR); } else { - /* If receiving any non-sucess TC status, no UF + /* If receiving any non-success TC status, no UF * received yet, then an UF for the status fis * is coming after (XXX: suspect this is * actually a protocol error or a bug like the diff --git a/drivers/scsi/isci/task.c b/drivers/scsi/isci/task.c index 6bc74eb012c..b6f19a1db78 100644 --- a/drivers/scsi/isci/task.c +++ b/drivers/scsi/isci/task.c @@ -532,7 +532,7 @@ int isci_task_abort_task(struct sas_task *task) /* The request has already completed and there * is nothing to do here other than to set the task * done bit, and indicate that the task abort function - * was sucessful. + * was successful. */ spin_lock_irqsave(&task->task_state_lock, flags); task->task_state_flags |= SAS_TASK_STATE_DONE; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 45c15208be9..29937b606c8 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -6607,7 +6607,7 @@ out_error: * we just use some constant number as place holder. * * Return codes - * 0 - sucessful + * 0 - successful * -ENOMEM - No availble memory * -EIO - The mailbox failed to complete successfully. **/ diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 9cbd20b1328..0e7e144507b 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -4739,7 +4739,7 @@ lpfc_sli4_read_rev(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq, * is attached to. * * Return codes - * 0 - sucessful + * 0 - successful * otherwise - failed to retrieve physical port name **/ static int @@ -15209,7 +15209,7 @@ lpfc_check_next_fcf_pri_level(struct lpfc_hba *phba) /* * if next_fcf_pri was not set above and the list is not empty then * we have failed flogis on all of them. So reset flogi failed - * and start at the begining. + * and start at the beginning. */ if (!next_fcf_pri && !list_empty(&phba->fcf.fcf_pri_list)) { list_for_each_entry(fcf_pri, &phba->fcf.fcf_pri_list, list) { diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 97825f11695..76ad72d32c3 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -305,12 +305,11 @@ mega_query_adapter(adapter_t *adapter) adapter->host->sg_tablesize = adapter->sglen; - /* use HP firmware and bios version encoding Note: fw_version[0|1] and bios_version[0|1] were originally shifted right 8 bits making them zero. This 0 value was hardcoded to fix sparse warnings. */ - if (adapter->product_info.subsysvid == HP_SUBSYS_VID) { + if (adapter->product_info.subsysvid == PCI_VENDOR_ID_HP) { sprintf (adapter->fw_version, "%c%d%d.%d%d", adapter->product_info.fw_version[2], 0, @@ -4716,7 +4715,7 @@ megaraid_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) * support, since this firmware cannot handle 64 bit * addressing */ - if ((subsysvid == HP_SUBSYS_VID) && + if ((subsysvid == PCI_VENDOR_ID_HP) && ((subsysid == 0x60E7) || (subsysid == 0x60E8))) { /* * which firmware diff --git a/drivers/scsi/megaraid.h b/drivers/scsi/megaraid.h index 9a7897f8ca4..4fb2adf6b80 100644 --- a/drivers/scsi/megaraid.h +++ b/drivers/scsi/megaraid.h @@ -45,45 +45,10 @@ #define MAX_DEV_TYPE 32 -#ifndef PCI_VENDOR_ID_LSI_LOGIC -#define PCI_VENDOR_ID_LSI_LOGIC 0x1000 -#endif - -#ifndef PCI_VENDOR_ID_AMI -#define PCI_VENDOR_ID_AMI 0x101E -#endif - -#ifndef PCI_VENDOR_ID_DELL -#define PCI_VENDOR_ID_DELL 0x1028 -#endif - -#ifndef PCI_VENDOR_ID_INTEL -#define PCI_VENDOR_ID_INTEL 0x8086 -#endif - -#ifndef PCI_DEVICE_ID_AMI_MEGARAID -#define PCI_DEVICE_ID_AMI_MEGARAID 0x9010 -#endif - -#ifndef PCI_DEVICE_ID_AMI_MEGARAID2 -#define PCI_DEVICE_ID_AMI_MEGARAID2 0x9060 -#endif - -#ifndef PCI_DEVICE_ID_AMI_MEGARAID3 -#define PCI_DEVICE_ID_AMI_MEGARAID3 0x1960 -#endif - #define PCI_DEVICE_ID_DISCOVERY 0x000E #define PCI_DEVICE_ID_PERC4_DI 0x000F #define PCI_DEVICE_ID_PERC4_QC_VERDE 0x0407 -/* Sub-System Vendor IDs */ -#define AMI_SUBSYS_VID 0x101E -#define DELL_SUBSYS_VID 0x1028 -#define HP_SUBSYS_VID 0x103C -#define LSI_SUBSYS_VID 0x1000 -#define INTEL_SUBSYS_VID 0x8086 - #define HBA_SIGNATURE 0x3344 #define HBA_SIGNATURE_471 0xCCCC #define HBA_SIGNATURE_64BIT 0x0299 diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c index b25757d1e91..9d5a56c4b33 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -1209,6 +1209,13 @@ _base_check_enable_msix(struct MPT2SAS_ADAPTER *ioc) u16 message_control; + /* Check whether controller SAS2008 B0 controller, + if it is SAS2008 B0 controller use IO-APIC instead of MSIX */ + if (ioc->pdev->device == MPI2_MFGPAGE_DEVID_SAS2008 && + ioc->pdev->revision == 0x01) { + return -EINVAL; + } + base = pci_find_capability(ioc->pdev, PCI_CAP_ID_MSIX); if (!base) { dfailprintk(ioc, printk(MPT2SAS_INFO_FMT "msix not " diff --git a/drivers/scsi/mvumi.c b/drivers/scsi/mvumi.c index 88cf1db21a7..783edc7c6b9 100644 --- a/drivers/scsi/mvumi.c +++ b/drivers/scsi/mvumi.c @@ -122,7 +122,7 @@ static struct mvumi_res *mvumi_alloc_mem_resource(struct mvumi_hba *mhba, if (!res) { dev_err(&mhba->pdev->dev, - "Failed to allocate memory for resouce manager.\n"); + "Failed to allocate memory for resource manager.\n"); return NULL; } @@ -1007,13 +1007,13 @@ static int mvumi_handshake(struct mvumi_hba *mhba) tmp |= INT_MAP_COMAOUT | INT_MAP_COMAERR; iowrite32(tmp, regs + CPU_ENPOINTA_MASK_REG); iowrite32(mhba->list_num_io, mhba->ib_shadow); - /* Set InBound List Avaliable count shadow */ + /* Set InBound List Available count shadow */ iowrite32(lower_32_bits(mhba->ib_shadow_phys), regs + CLA_INB_AVAL_COUNT_BASEL); iowrite32(upper_32_bits(mhba->ib_shadow_phys), regs + CLA_INB_AVAL_COUNT_BASEH); - /* Set OutBound List Avaliable count shadow */ + /* Set OutBound List Available count shadow */ iowrite32((mhba->list_num_io-1) | CL_POINTER_TOGGLE, mhba->ob_shadow); iowrite32(lower_32_bits(mhba->ob_shadow_phys), regs + 0x5B0); diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 9da426628b9..487e3c8411c 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -803,7 +803,7 @@ static void qla4xxx_conn_get_stats(struct iscsi_cls_conn *cls_conn, iscsi_stats_dma); if (ret != QLA_SUCCESS) { ql4_printk(KERN_ERR, ha, - "Unable to retreive iscsi stats\n"); + "Unable to retrieve iscsi stats\n"); goto free_stats; } @@ -4338,7 +4338,7 @@ static int qla4xxx_compare_tuple_ddb(struct scsi_qla_host *ha, return QLA_ERROR; /* For multi sessions, driver generates the ISID, so do not compare - * ISID in reset path since it would be a comparision between the + * ISID in reset path since it would be a comparison between the * driver generated ISID and firmware generated ISID. This could * lead to adding duplicated DDBs in the list as driver generated * ISID would not match firmware generated ISID. @@ -5326,7 +5326,7 @@ static void qla4xxx_destroy_fw_ddb_session(struct scsi_qla_host *ha) } } /** - * qla4xxx_remove_adapter - calback function to remove adapter. + * qla4xxx_remove_adapter - callback function to remove adapter. * @pci_dev: PCI device pointer **/ static void __devexit qla4xxx_remove_adapter(struct pci_dev *pdev) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index c7030fbee79..3e79a2f0004 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -331,7 +331,7 @@ static void virtscsi_map_sgl(struct scatterlist *sg, unsigned int *p_idx, int i; for_each_sg(table->sgl, sg_elem, table->nents, i) - sg_set_buf(&sg[idx++], sg_virt(sg_elem), sg_elem->length); + sg[idx++] = *sg_elem; *p_idx = idx; } diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index 4411d422440..20b3a483c2c 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -295,7 +295,7 @@ static void ll_adapter_reset(const struct pvscsi_adapter *adapter) static void ll_bus_reset(const struct pvscsi_adapter *adapter) { - dev_dbg(pvscsi_dev(adapter), "Reseting bus on %p\n", adapter); + dev_dbg(pvscsi_dev(adapter), "Resetting bus on %p\n", adapter); pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_RESET_BUS, NULL, 0); } @@ -304,7 +304,7 @@ static void ll_device_reset(const struct pvscsi_adapter *adapter, u32 target) { struct PVSCSICmdDescResetDevice cmd = { 0 }; - dev_dbg(pvscsi_dev(adapter), "Reseting device: target=%u\n", target); + dev_dbg(pvscsi_dev(adapter), "Resetting device: target=%u\n", target); cmd.target = target; diff --git a/drivers/sh/intc/core.c b/drivers/sh/intc/core.c index 32c26d795ed..8f32a1323a7 100644 --- a/drivers/sh/intc/core.c +++ b/drivers/sh/intc/core.c @@ -355,7 +355,7 @@ int __init register_intc_controller(struct intc_desc *desc) if (unlikely(res)) { if (res == -EEXIST) { res = irq_domain_associate(d->domain, - irq, irq); + irq2, irq2); if (unlikely(res)) { pr_err("domain association " "failure\n"); diff --git a/drivers/sh/pfc/pinctrl.c b/drivers/sh/pfc/pinctrl.c index 2804eaae804..0646bf6e788 100644 --- a/drivers/sh/pfc/pinctrl.c +++ b/drivers/sh/pfc/pinctrl.c @@ -208,10 +208,13 @@ static int sh_pfc_gpio_request_enable(struct pinctrl_dev *pctldev, break; case PINMUX_TYPE_GPIO: + case PINMUX_TYPE_INPUT: + case PINMUX_TYPE_OUTPUT: break; default: pr_err("Unsupported mux type (%d), bailing...\n", pinmux_type); - return -ENOTSUPP; + ret = -ENOTSUPP; + goto err; } ret = 0; diff --git a/drivers/spi/spi-au1550.c b/drivers/spi/spi-au1550.c index 5784c879961..4de66d1cfe5 100644 --- a/drivers/spi/spi-au1550.c +++ b/drivers/spi/spi-au1550.c @@ -475,7 +475,7 @@ static irqreturn_t au1550_spi_dma_irq_callback(struct au1550_spi *hw) /* * due to an spi error we consider transfer as done, * so mask all events until before next transfer start - * and stop the possibly running dma immediatelly + * and stop the possibly running dma immediately */ au1550_spi_mask_ack_all(hw); au1xxx_dbdma_stop(hw->dma_rx_ch); diff --git a/drivers/spi/spi-bfin-sport.c b/drivers/spi/spi-bfin-sport.c index 1fe51198a62..6555ecd0730 100644 --- a/drivers/spi/spi-bfin-sport.c +++ b/drivers/spi/spi-bfin-sport.c @@ -467,7 +467,7 @@ bfin_sport_spi_pump_transfers(unsigned long data) dev_dbg(drv_data->dev, "IO write error!\n"); drv_data->state = ERROR_STATE; } else { - /* Update total byte transfered */ + /* Update total byte transferred */ message->actual_length += transfer->len; /* Move to next transfer of this msg */ drv_data->state = bfin_sport_spi_next_transfer(drv_data); diff --git a/drivers/spi/spi-oc-tiny.c b/drivers/spi/spi-oc-tiny.c index 698018fd992..9d9071b730b 100644 --- a/drivers/spi/spi-oc-tiny.c +++ b/drivers/spi/spi-oc-tiny.c @@ -129,7 +129,7 @@ static int tiny_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t) unsigned int i; if (hw->irq >= 0) { - /* use intrrupt driven data transfer */ + /* use interrupt driven data transfer */ hw->len = t->len; hw->txp = t->tx_buf; hw->rxp = t->rx_buf; diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c index 75ac9d48ef4..7a85f22b647 100644 --- a/drivers/spi/spi-ppc4xx.c +++ b/drivers/spi/spi-ppc4xx.c @@ -101,7 +101,7 @@ struct spi_ppc4xx_regs { u8 dummy; /* * Clock divisor modulus register - * This uses the follwing formula: + * This uses the following formula: * SCPClkOut = OPBCLK/(4(CDM + 1)) * or * CDM = (OPBCLK/4*SCPClkOut) - 1 @@ -201,7 +201,7 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t) return -EINVAL; } - /* Write new configration */ + /* Write new configuration */ out_8(&hw->regs->mode, cs->mode); /* Set the clock */ diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c index cd56dcf4632..1284c9b7465 100644 --- a/drivers/spi/spi-topcliff-pch.c +++ b/drivers/spi/spi-topcliff-pch.c @@ -505,7 +505,7 @@ static int pch_spi_transfer(struct spi_device *pspi, struct spi_message *pmsg) } if (unlikely(pspi->max_speed_hz == 0)) { - dev_err(&pspi->dev, "%s pch_spi_tranfer maxspeed=%d\n", + dev_err(&pspi->dev, "%s pch_spi_transfer maxspeed=%d\n", __func__, pspi->max_speed_hz); retval = -EINVAL; goto err_out; diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index 0c4760fabfc..240f7aa76ed 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -662,7 +662,7 @@ int iscsi_extract_key_value(char *textbuf, char **key, char **value) { *value = strchr(textbuf, '='); if (!*value) { - pr_err("Unable to locate \"=\" seperator for key," + pr_err("Unable to locate \"=\" separator for key," " ignoring request.\n"); return -1; } @@ -1269,7 +1269,7 @@ static int iscsi_check_value(struct iscsi_param *param, char *value) comma_ptr = strchr(value, ','); if (comma_ptr && !IS_TYPE_VALUE_LIST(param)) { - pr_err("Detected value seperator \",\", but" + pr_err("Detected value separator \",\", but" " key \"%s\" does not allow a value list," " protocol error.\n", param->name); return -1; diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c index d9569658476..3440812b4a8 100644 --- a/drivers/usb/core/devices.c +++ b/drivers/usb/core/devices.c @@ -624,7 +624,7 @@ static ssize_t usb_device_read(struct file *file, char __user *buf, /* print devices for all busses */ list_for_each_entry(bus, &usb_bus_list, bus_list) { /* recurse through all children of the root hub */ - if (!bus->root_hub) + if (!bus_to_hcd(bus)->rh_registered) continue; usb_lock_device(bus->root_hub); ret = usb_device_dump(&buf, &nbytes, &skip_bytes, ppos, diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index bc84106ac05..75ba2091f9b 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1011,10 +1011,7 @@ static int register_root_hub(struct usb_hcd *hcd) if (retval) { dev_err (parent_dev, "can't register root hub for %s, %d\n", dev_name(&usb_dev->dev), retval); - } - mutex_unlock(&usb_bus_list_lock); - - if (retval == 0) { + } else { spin_lock_irq (&hcd_root_hub_lock); hcd->rh_registered = 1; spin_unlock_irq (&hcd_root_hub_lock); @@ -1023,6 +1020,7 @@ static int register_root_hub(struct usb_hcd *hcd) if (HCD_DEAD(hcd)) usb_hc_died (hcd); /* This time clean up */ } + mutex_unlock(&usb_bus_list_lock); return retval; } diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c index aaa8d2bce21..0bf72f943b0 100644 --- a/drivers/usb/host/ohci-at91.c +++ b/drivers/usb/host/ohci-at91.c @@ -467,7 +467,8 @@ static irqreturn_t ohci_hcd_at91_overcurrent_irq(int irq, void *data) /* From the GPIO notifying the over-current situation, find * out the corresponding port */ at91_for_each_port(port) { - if (gpio_to_irq(pdata->overcurrent_pin[port]) == irq) { + if (gpio_is_valid(pdata->overcurrent_pin[port]) && + gpio_to_irq(pdata->overcurrent_pin[port]) == irq) { gpio = pdata->overcurrent_pin[port]; break; } diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 211a4920b88..d8dedc7d391 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -76,9 +76,24 @@ static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) schedule_work(&virqfd->inject); } - if (flags & POLLHUP) - /* The eventfd is closing, detach from VFIO */ - virqfd_deactivate(virqfd); + if (flags & POLLHUP) { + unsigned long flags; + spin_lock_irqsave(&virqfd->vdev->irqlock, flags); + + /* + * The eventfd is closing, if the virqfd has not yet been + * queued for release, as determined by testing whether the + * vdev pointer to it is still valid, queue it now. As + * with kvm irqfds, we know we won't race against the virqfd + * going away because we hold wqh->lock to get here. + */ + if (*(virqfd->pvirqfd) == virqfd) { + *(virqfd->pvirqfd) = NULL; + virqfd_deactivate(virqfd); + } + + spin_unlock_irqrestore(&virqfd->vdev->irqlock, flags); + } return 0; } @@ -93,7 +108,6 @@ static void virqfd_ptable_queue_proc(struct file *file, static void virqfd_shutdown(struct work_struct *work) { struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); - struct virqfd **pvirqfd = virqfd->pvirqfd; u64 cnt; eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt); @@ -101,7 +115,6 @@ static void virqfd_shutdown(struct work_struct *work) eventfd_ctx_put(virqfd->eventfd); kfree(virqfd); - *pvirqfd = NULL; } static void virqfd_inject(struct work_struct *work) @@ -122,15 +135,11 @@ static int virqfd_enable(struct vfio_pci_device *vdev, int ret = 0; unsigned int events; - if (*pvirqfd) - return -EBUSY; - virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); if (!virqfd) return -ENOMEM; virqfd->pvirqfd = pvirqfd; - *pvirqfd = virqfd; virqfd->vdev = vdev; virqfd->handler = handler; virqfd->thread = thread; @@ -154,6 +163,23 @@ static int virqfd_enable(struct vfio_pci_device *vdev, virqfd->eventfd = ctx; /* + * virqfds can be released by closing the eventfd or directly + * through ioctl. These are both done through a workqueue, so + * we update the pointer to the virqfd under lock to avoid + * pushing multiple jobs to release the same virqfd. + */ + spin_lock_irq(&vdev->irqlock); + + if (*pvirqfd) { + spin_unlock_irq(&vdev->irqlock); + ret = -EBUSY; + goto fail; + } + *pvirqfd = virqfd; + + spin_unlock_irq(&vdev->irqlock); + + /* * Install our own custom wake-up handling so we are notified via * a callback whenever someone signals the underlying eventfd. */ @@ -187,19 +213,29 @@ fail: fput(file); kfree(virqfd); - *pvirqfd = NULL; return ret; } -static void virqfd_disable(struct virqfd *virqfd) +static void virqfd_disable(struct vfio_pci_device *vdev, + struct virqfd **pvirqfd) { - if (!virqfd) - return; + unsigned long flags; + + spin_lock_irqsave(&vdev->irqlock, flags); + + if (*pvirqfd) { + virqfd_deactivate(*pvirqfd); + *pvirqfd = NULL; + } - virqfd_deactivate(virqfd); + spin_unlock_irqrestore(&vdev->irqlock, flags); - /* Block until we know all outstanding shutdown jobs have completed. */ + /* + * Block until we know all outstanding shutdown jobs have completed. + * Even if we don't queue the job, flush the wq to be sure it's + * been released. + */ flush_workqueue(vfio_irqfd_cleanup_wq); } @@ -392,8 +428,8 @@ static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd) static void vfio_intx_disable(struct vfio_pci_device *vdev) { vfio_intx_set_signal(vdev, -1); - virqfd_disable(vdev->ctx[0].unmask); - virqfd_disable(vdev->ctx[0].mask); + virqfd_disable(vdev, &vdev->ctx[0].unmask); + virqfd_disable(vdev, &vdev->ctx[0].mask); vdev->irq_type = VFIO_PCI_NUM_IRQS; vdev->num_ctx = 0; kfree(vdev->ctx); @@ -539,8 +575,8 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); for (i = 0; i < vdev->num_ctx; i++) { - virqfd_disable(vdev->ctx[i].unmask); - virqfd_disable(vdev->ctx[i].mask); + virqfd_disable(vdev, &vdev->ctx[i].unmask); + virqfd_disable(vdev, &vdev->ctx[i].mask); } if (msix) { @@ -577,7 +613,7 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev, vfio_send_intx_eventfd, NULL, &vdev->ctx[0].unmask, fd); - virqfd_disable(vdev->ctx[0].unmask); + virqfd_disable(vdev, &vdev->ctx[0].unmask); } return 0; diff --git a/drivers/video/backlight/88pm860x_bl.c b/drivers/video/backlight/88pm860x_bl.c index f75da8758ad..f49181c7311 100644 --- a/drivers/video/backlight/88pm860x_bl.c +++ b/drivers/video/backlight/88pm860x_bl.c @@ -228,7 +228,6 @@ static int pm860x_backlight_probe(struct platform_device *pdev) data->port = pdata->flags; if (data->port < 0) { dev_err(&pdev->dev, "wrong platform data is assigned"); - kfree(data); return -EINVAL; } diff --git a/drivers/video/exynos/exynos_mipi_dsi.c b/drivers/video/exynos/exynos_mipi_dsi.c index 4bc2b8a5dd8..663c308d0e7 100644 --- a/drivers/video/exynos/exynos_mipi_dsi.c +++ b/drivers/video/exynos/exynos_mipi_dsi.c @@ -461,7 +461,7 @@ static int exynos_mipi_dsi_probe(struct platform_device *pdev) done: platform_set_drvdata(pdev, dsim); - dev_dbg(&pdev->dev, "%s() completed sucessfuly (%s mode)\n", __func__, + dev_dbg(&pdev->dev, "%s() completed successfully (%s mode)\n", __func__, dsim_config->e_interface == DSIM_COMMAND ? "CPU" : "RGB"); return 0; diff --git a/drivers/video/tmiofb.c b/drivers/video/tmiofb.c index 8e4a446b5ed..b244f060f15 100644 --- a/drivers/video/tmiofb.c +++ b/drivers/video/tmiofb.c @@ -694,6 +694,10 @@ static int __devinit tmiofb_probe(struct platform_device *dev) dev_err(&dev->dev, "NULL platform data!\n"); return -EINVAL; } + if (ccr == NULL || lcr == NULL || vram == NULL || irq < 0) { + dev_err(&dev->dev, "missing resources\n"); + return -EINVAL; + } info = framebuffer_alloc(sizeof(struct tmiofb_par), &dev->dev); diff --git a/drivers/w1/masters/ds1wm.c b/drivers/w1/masters/ds1wm.c index 530a2d30906..7c294f4dc0e 100644 --- a/drivers/w1/masters/ds1wm.c +++ b/drivers/w1/masters/ds1wm.c @@ -349,7 +349,7 @@ static void ds1wm_search(void *data, struct w1_master *master_dev, "pass: %d entering ASM\n", pass); ds1wm_write_register(ds1wm_data, DS1WM_CMD, DS1WM_CMD_SRA); dev_dbg(&ds1wm_data->pdev->dev, - "pass: %d begining nibble loop\n", pass); + "pass: %d beginning nibble loop\n", pass); r_prime = 0; d = 0; diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index 1eff743ec49..ae60406ea8a 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -814,6 +814,9 @@ static int __devinit hpwdt_init_one(struct pci_dev *dev, hpwdt_timer_reg = pci_mem_addr + 0x70; hpwdt_timer_con = pci_mem_addr + 0x72; + /* Make sure that timer is disabled until /dev/watchdog is opened */ + hpwdt_stop(); + /* Make sure that we have a valid soft_margin */ if (hpwdt_change_timer(soft_margin)) hpwdt_change_timer(DEFAULT_MARGIN); diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c index 6aa46a90ff0..3796434991f 100644 --- a/drivers/watchdog/watchdog_core.c +++ b/drivers/watchdog/watchdog_core.c @@ -128,11 +128,12 @@ EXPORT_SYMBOL_GPL(watchdog_register_device); void watchdog_unregister_device(struct watchdog_device *wdd) { int ret; - int devno = wdd->cdev.dev; + int devno; if (wdd == NULL) return; + devno = wdd->cdev.dev; ret = watchdog_dev_unregister(wdd); if (ret) pr_err("error unregistering /dev/watchdog (err=%d)\n", ret); diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 1ffd03bf8e1..7f124160848 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -314,8 +314,9 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) } } - err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, - pages, true); + err = gnttab_unmap_refs(map->unmap_ops + offset, + use_ptemod ? map->kmap_ops + offset : NULL, map->pages + offset, + pages); if (err) return err; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 0bfc1ef1125..006726688ba 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -870,7 +870,8 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, EXPORT_SYMBOL_GPL(gnttab_map_refs); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, - struct page **pages, unsigned int count, bool clear_pte) + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count) { int i, ret; bool lazy = false; @@ -888,7 +889,8 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, } for (i = 0; i < count; i++) { - ret = m2p_remove_override(pages[i], clear_pte); + ret = m2p_remove_override(pages[i], kmap_ops ? + &kmap_ops[i] : NULL); if (ret) return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0d195b50766..9821b672f5a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -116,7 +116,7 @@ struct btrfs_ordered_sum; #define BTRFS_FREE_SPACE_OBJECTID -11ULL /* - * The inode number assigned to the special inode for sotring + * The inode number assigned to the special inode for storing * free ino cache */ #define BTRFS_FREE_INO_OBJECTID -12ULL diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index ab530059584..c9d703693df 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -18,7 +18,7 @@ #ifndef __DELAYED_REF__ #define __DELAYED_REF__ -/* these are the possible values of struct btrfs_delayed_ref->action */ +/* these are the possible values of struct btrfs_delayed_ref_node->action */ #define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ #define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ec154f95464..316b07a866d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1971,8 +1971,8 @@ out: ordered_extent->len - 1, NULL, GFP_NOFS); /* - * This needs to be dont to make sure anybody waiting knows we are done - * upating everything for this ordered extent. + * This needs to be done to make sure anybody waiting knows we are done + * updating everything for this ordered extent. */ btrfs_remove_ordered_extent(inode, ordered_extent); diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 7dab9c04ad5..53cf2aabce8 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -328,7 +328,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, } ctoUTF16_out: - return i; + return j; } #ifdef CONFIG_CIFS_SMB2 diff --git a/fs/dcache.c b/fs/dcache.c index 16521a9f203..693f95bf1ca 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1134,6 +1134,8 @@ positive: return 1; rename_retry: + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; @@ -1141,7 +1143,7 @@ rename_retry: EXPORT_SYMBOL(have_submounts); /* - * Search the dentry child list for the specified parent, + * Search the dentry child list of the specified parent, * and move any unused dentries to the end of the unused * list for prune_dcache(). We descend to the next level * whenever the d_subdirs list is non-empty and continue @@ -1236,6 +1238,8 @@ out: rename_retry: if (found) return found; + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; @@ -3035,6 +3039,8 @@ resume: return; rename_retry: + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 2340f6978d6..c5ca6ae5a30 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -526,73 +526,51 @@ struct array_data { u32 elements; }; -static int u32_array_open(struct inode *inode, struct file *file) -{ - file->private_data = NULL; - return nonseekable_open(inode, file); -} - -static size_t format_array(char *buf, size_t bufsize, const char *fmt, - u32 *array, u32 array_size) +static size_t u32_format_array(char *buf, size_t bufsize, + u32 *array, int array_size) { size_t ret = 0; - u32 i; - for (i = 0; i < array_size; i++) { + while (--array_size >= 0) { size_t len; + char term = array_size ? ' ' : '\n'; - len = snprintf(buf, bufsize, fmt, array[i]); - len++; /* ' ' or '\n' */ + len = snprintf(buf, bufsize, "%u%c", *array++, term); ret += len; - if (buf) { - buf += len; - bufsize -= len; - buf[-1] = (i == array_size-1) ? '\n' : ' '; - } + buf += len; + bufsize -= len; } - - ret++; /* \0 */ - if (buf) - *buf = '\0'; - return ret; } -static char *format_array_alloc(const char *fmt, u32 *array, - u32 array_size) +static int u32_array_open(struct inode *inode, struct file *file) { - size_t len = format_array(NULL, 0, fmt, array, array_size); - char *ret; - - ret = kmalloc(len, GFP_KERNEL); - if (ret == NULL) - return NULL; + struct array_data *data = inode->i_private; + int size, elements = data->elements; + char *buf; + + /* + * Max size: + * - 10 digits + ' '/'\n' = 11 bytes per number + * - terminating NUL character + */ + size = elements*11; + buf = kmalloc(size+1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + buf[size] = 0; + + file->private_data = buf; + u32_format_array(buf, size, data->array, data->elements); - format_array(ret, len, fmt, array, array_size); - return ret; + return nonseekable_open(inode, file); } static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { - struct inode *inode = file->f_path.dentry->d_inode; - struct array_data *data = inode->i_private; - size_t size; - - if (*ppos == 0) { - if (file->private_data) { - kfree(file->private_data); - file->private_data = NULL; - } - - file->private_data = format_array_alloc("%u", data->array, - data->elements); - } - - size = 0; - if (file->private_data) - size = strlen(file->private_data); + size_t size = strlen(file->private_data); return simple_read_from_buffer(buf, len, ppos, file->private_data, size); diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 376aa77f3ca..2616d0ea5c5 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -479,7 +479,7 @@ void ext2_discard_reservation(struct inode *inode) /** * ext2_free_blocks() -- Free given blocks and update quota and i_blocks * @inode: inode - * @block: start physcial block to free + * @block: start physical block to free * @count: number of blocks to free */ void ext2_free_blocks (struct inode * inode, unsigned long block, diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 90d901f0486..7320a66e958 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -483,7 +483,7 @@ void ext3_discard_reservation(struct inode *inode) * ext3_free_blocks_sb() -- Free given blocks and update quota * @handle: handle to this transaction * @sb: super block - * @block: start physcial block to free + * @block: start physical block to free * @count: number of blocks to free * @pdquot_freed_blocks: pointer to quota */ diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index ff574b4e345..7e87e37a372 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3207,7 +3207,7 @@ out_brelse: * * - Within generic_file_write() for O_SYNC files. * Here, there will be no transaction running. We wait for any running - * trasnaction to commit. + * transaction to commit. * * - Within sys_sync(), kupdate and such. * We wait on commit, if tol to. diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index dff171c3a12..c862ee5fe79 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3313,7 +3313,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle, * handle: The journal handle * inode: The files inode * page: A locked page that contains the offset "from" - * from: The starting byte offset (from the begining of the file) + * from: The starting byte offset (from the beginning of the file) * to begin discarding * len: The length of bytes to discard * flags: Optional flags that may be used: @@ -3321,11 +3321,11 @@ int ext4_discard_partial_page_buffers(handle_t *handle, * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED * Only zero the regions of the page whose buffer heads * have already been unmapped. This flag is appropriate - * for updateing the contents of a page whose blocks may + * for updating the contents of a page whose blocks may * have already been released, and we only want to zero * out the regions that correspond to those released blocks. * - * Returns zero on sucess or negative on failure. + * Returns zero on success or negative on failure. */ static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, struct inode *inode, struct page *page, loff_t from, @@ -3486,7 +3486,7 @@ int ext4_can_truncate(struct inode *inode) * @offset: The offset where the hole will begin * @len: The length of the hole * - * Returns: 0 on sucess or negative on failure + * Returns: 0 on success or negative on failure */ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) @@ -4008,7 +4008,7 @@ static int ext4_inode_blocks_set(handle_t *handle, if (i_blocks <= ~0U) { /* - * i_blocks can be represnted in a 32 bit variable + * i_blocks can be represented in a 32 bit variable * as multiple of 512 bytes */ raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); @@ -4169,7 +4169,7 @@ out_brelse: * * - Within generic_file_write() for O_SYNC files. * Here, there will be no transaction running. We wait for any running - * trasnaction to commit. + * transaction to commit. * * - Within sys_sync(), kupdate and such. * We wait on commit, if tol to. @@ -4413,7 +4413,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) * worse case, the indexs blocks spread over different block groups * * If datablocks are discontiguous, they are possible to spread over - * different block groups too. If they are contiuguous, with flexbg, + * different block groups too. If they are contiguous, with flexbg, * they could still across block group boundary. * * Also account for superblock, inode, quota and xattr blocks diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8eae94771c4..08778f6cdfe 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4709,7 +4709,7 @@ error_return: * ext4_group_add_blocks() -- Add given blocks to an existing group * @handle: handle to this transaction * @sb: super block - * @block: start physcial block to add to the block group + * @block: start physical block to add to the block group * @count: number of blocks to free * * This marks the blocks as free in the bitmap and buddy. diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index be3efc4f64f..6d46c0d7833 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -577,10 +577,6 @@ static long writeback_chunk_size(struct backing_dev_info *bdi, /* * Write a portion of b_io inodes which belong to @sb. * - * If @only_this_sb is true, then find and write all such - * inodes. Otherwise write only ones which go sequentially - * in reverse order. - * * Return the number of pages and/or inodes written. */ static long writeback_sb_inodes(struct super_block *sb, diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index d6526347d38..01c4975da4b 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -612,6 +612,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, struct gfs2_sbd *sdp = GFS2_SB(mapping->host); struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); unsigned int data_blocks = 0, ind_blocks = 0, rblocks; + unsigned requested = 0; int alloc_required; int error = 0; pgoff_t index = pos >> PAGE_CACHE_SHIFT; @@ -641,7 +642,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, if (error) goto out_unlock; - error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); + requested = data_blocks + ind_blocks; + error = gfs2_inplace_reserve(ip, requested); if (error) goto out_qunlock; } @@ -654,7 +656,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, if (&ip->i_inode == sdp->sd_rindex) rblocks += 2 * RES_STATFS; if (alloc_required) - rblocks += gfs2_rg_blocks(ip); + rblocks += gfs2_rg_blocks(ip, requested); error = gfs2_trans_begin(sdp, rblocks, PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); @@ -868,8 +870,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, brelse(dibh); failed: gfs2_trans_end(sdp); - if (gfs2_mb_reserved(ip)) - gfs2_inplace_release(ip); + gfs2_inplace_release(ip); if (ip->i_res->rs_qa_qd_num) gfs2_quota_unlock(ip); if (inode == sdp->sd_rindex) { @@ -1023,7 +1024,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, offset, nr_segs, gfs2_get_block_direct, NULL, NULL, 0); out: - gfs2_glock_dq_m(1, &gh); + gfs2_glock_dq(&gh); gfs2_holder_uninit(&gh); return rv; } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 49cd7dd4a9f..1fd3ae237bd 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -786,7 +786,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, goto out_rlist; if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */ - gfs2_rs_deltree(ip->i_res); + gfs2_rs_deltree(ip, ip->i_res); error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + RES_INDIRECT + RES_STATFS + RES_QUOTA, diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 382000ffac1..30e21997a1a 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -441,7 +441,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) rblocks += data_blocks ? data_blocks : 1; if (ind_blocks || data_blocks) { rblocks += RES_STATFS + RES_QUOTA; - rblocks += gfs2_rg_blocks(ip); + rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); } ret = gfs2_trans_begin(sdp, rblocks, 0); if (ret) @@ -845,7 +845,7 @@ retry: &max_bytes, &data_blocks, &ind_blocks); rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + - RES_RG_HDR + gfs2_rg_blocks(ip); + RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 1ed81f40da0..e6c2fd53cab 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -186,20 +186,6 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) } /** - * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list - * @gl: the glock - * - * If the glock is demotable, then we add it (or move it) to the end - * of the glock LRU list. - */ - -static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) -{ - if (demote_ok(gl)) - gfs2_glock_add_to_lru(gl); -} - -/** * gfs2_glock_put_nolock() - Decrement reference count on glock * @gl: The glock to put * @@ -883,7 +869,14 @@ static int gfs2_glock_demote_wait(void *word) return 0; } -static void wait_on_holder(struct gfs2_holder *gh) +/** + * gfs2_glock_wait - wait on a glock acquisition + * @gh: the glock holder + * + * Returns: 0 on success + */ + +int gfs2_glock_wait(struct gfs2_holder *gh) { unsigned long time1 = jiffies; @@ -894,12 +887,7 @@ static void wait_on_holder(struct gfs2_holder *gh) gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time + GL_GLOCK_HOLD_INCR, GL_GLOCK_MAX_HOLD); -} - -static void wait_on_demote(struct gfs2_glock *gl) -{ - might_sleep(); - wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); + return gh->gh_error; } /** @@ -929,19 +917,6 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, trace_gfs2_demote_rq(gl); } -/** - * gfs2_glock_wait - wait on a glock acquisition - * @gh: the glock holder - * - * Returns: 0 on success - */ - -int gfs2_glock_wait(struct gfs2_holder *gh) -{ - wait_on_holder(gh); - return gh->gh_error; -} - void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) { struct va_format vaf; @@ -979,7 +954,7 @@ __acquires(&gl->gl_spin) struct gfs2_sbd *sdp = gl->gl_sbd; struct list_head *insert_pt = NULL; struct gfs2_holder *gh2; - int try_lock = 0; + int try_futile = 0; BUG_ON(gh->gh_owner_pid == NULL); if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) @@ -987,7 +962,7 @@ __acquires(&gl->gl_spin) if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { if (test_bit(GLF_LOCK, &gl->gl_flags)) - try_lock = 1; + try_futile = !may_grant(gl, gh); if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) goto fail; } @@ -996,9 +971,8 @@ __acquires(&gl->gl_spin) if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid && (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK))) goto trap_recursive; - if (try_lock && - !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) && - !may_grant(gl, gh)) { + if (try_futile && + !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { fail: gh->gh_error = GLR_TRYFAILED; gfs2_holder_wake(gh); @@ -1121,8 +1095,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh) !test_bit(GLF_DEMOTE, &gl->gl_flags)) fast_path = 1; } - if (!test_bit(GLF_LFLUSH, &gl->gl_flags)) - __gfs2_glock_schedule_for_reclaim(gl); + if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl)) + gfs2_glock_add_to_lru(gl); + trace_gfs2_glock_queue(gh, 0); spin_unlock(&gl->gl_spin); if (likely(fast_path)) @@ -1141,7 +1116,8 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; gfs2_glock_dq(gh); - wait_on_demote(gl); + might_sleep(); + wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); } /** diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 4bdcf378418..32cc4fde975 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -94,6 +94,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) /* A shortened, inline version of gfs2_trans_begin() */ tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); tr.tr_ip = (unsigned long)__builtin_return_address(0); + sb_start_intwrite(sdp->sd_vfs); gfs2_log_reserve(sdp, tr.tr_reserved); BUG_ON(current->journal_info); current->journal_info = &tr; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index aaecc8085fc..3d469d37345 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -99,9 +99,26 @@ struct gfs2_rgrpd { #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ spinlock_t rd_rsspin; /* protects reservation related vars */ struct rb_root rd_rstree; /* multi-block reservation tree */ - u32 rd_rs_cnt; /* count of current reservations */ }; +struct gfs2_rbm { + struct gfs2_rgrpd *rgd; + struct gfs2_bitmap *bi; /* Bitmap must belong to the rgd */ + u32 offset; /* The offset is bitmap relative */ +}; + +static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm) +{ + return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + rbm->offset; +} + +static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1, + const struct gfs2_rbm *rbm2) +{ + return (rbm1->rgd == rbm2->rgd) && (rbm1->bi == rbm2->bi) && + (rbm1->offset == rbm2->offset); +} + enum gfs2_state_bits { BH_Pinned = BH_PrivateStart, BH_Escaped = BH_PrivateStart + 1, @@ -250,18 +267,11 @@ struct gfs2_blkreserv { /* components used during write (step 1): */ atomic_t rs_sizehint; /* hint of the write size */ - /* components used during inplace_reserve (step 2): */ - u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */ - - /* components used during get_local_rgrp (step 3): */ - struct gfs2_rgrpd *rs_rgd; /* pointer to the gfs2_rgrpd */ struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */ struct rb_node rs_node; /* link to other block reservations */ - - /* components used during block searches and assignments (step 4): */ - struct gfs2_bitmap *rs_bi; /* bitmap for the current allocation */ - u32 rs_biblk; /* start block relative to the bi */ + struct gfs2_rbm rs_rbm; /* Start of reservation */ u32 rs_free; /* how many blocks are still free */ + u64 rs_inum; /* Inode number for reservation */ /* ancillary quota stuff */ struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS]; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 753af3d86bb..381893ceefa 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -712,14 +712,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock2; - /* The newly created inode needs a reservation so it can allocate - xattrs. At the same time, we want new blocks allocated to the new - dinode to be as contiguous as possible. Since we allocated the - dinode block under the directory's reservation, we transfer - ownership of that reservation to the new inode. The directory - doesn't need a reservation unless it needs a new allocation. */ - ip->i_res = dip->i_res; - dip->i_res = NULL; + error = gfs2_rs_alloc(ip); + if (error) + goto fail_gunlock2; error = gfs2_acl_create(dip, inode); if (error) @@ -737,10 +732,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, brelse(bh); gfs2_trans_end(sdp); - /* Check if we reserved space in the rgrp. Function link_dinode may - not, depending on whether alloc is required. */ - if (gfs2_mb_reserved(dip)) - gfs2_inplace_release(dip); + gfs2_inplace_release(dip); gfs2_quota_unlock(dip); mark_inode_dirty(inode); gfs2_glock_dq_uninit_m(2, ghs); @@ -897,7 +889,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, goto out_gunlock_q; error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - gfs2_rg_blocks(dip) + + gfs2_rg_blocks(dip, sdp->sd_max_dirres) + 2 * RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (error) @@ -1378,7 +1370,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, goto out_gunlock_q; error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - gfs2_rg_blocks(ndip) + + gfs2_rg_blocks(ndip, sdp->sd_max_dirres) + 4 * RES_DINODE + 4 * RES_LEAF + RES_STATFS + RES_QUOTA + 4, 0); if (error) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index e5af9dc420e..e443966c810 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -19,6 +19,7 @@ #include <linux/mount.h> #include <linux/gfs2_ondisk.h> #include <linux/quotaops.h> +#include <linux/lockdep.h> #include "gfs2.h" #include "incore.h" @@ -766,6 +767,7 @@ fail: return error; } +static struct lock_class_key gfs2_quota_imutex_key; static int init_inodes(struct gfs2_sbd *sdp, int undo) { @@ -803,6 +805,12 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) fs_err(sdp, "can't get quota file inode: %d\n", error); goto fail_rindex; } + /* + * i_mutex on quota files is special. Since this inode is hidden system + * file, we are safe to define locking ourselves. + */ + lockdep_set_class(&sdp->sd_quota_inode->i_mutex, + &gfs2_quota_imutex_key); error = gfs2_rindex_update(sdp); if (error) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index a3bde91645c..4021deca61e 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -765,6 +765,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) struct gfs2_holder *ghs, i_gh; unsigned int qx, x; struct gfs2_quota_data *qd; + unsigned reserved; loff_t offset; unsigned int nalloc = 0, blocks; int error; @@ -781,7 +782,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) return -ENOMEM; sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); - mutex_lock_nested(&ip->i_inode.i_mutex, I_MUTEX_QUOTA); + mutex_lock(&ip->i_inode.i_mutex); for (qx = 0; qx < num_qd; qx++) { error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, &ghs[qx]); @@ -811,13 +812,13 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) * two blocks need to be updated instead of 1 */ blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; - error = gfs2_inplace_reserve(ip, 1 + - (nalloc * (data_blocks + ind_blocks))); + reserved = 1 + (nalloc * (data_blocks + ind_blocks)); + error = gfs2_inplace_reserve(ip, reserved); if (error) goto out_alloc; if (nalloc) - blocks += gfs2_rg_blocks(ip) + nalloc * ind_blocks + RES_STATFS; + blocks += gfs2_rg_blocks(ip, reserved) + nalloc * ind_blocks + RES_STATFS; error = gfs2_trans_begin(sdp, blocks, 0); if (error) @@ -1598,7 +1599,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, error = gfs2_inplace_reserve(ip, blocks); if (error) goto out_i; - blocks += gfs2_rg_blocks(ip); + blocks += gfs2_rg_blocks(ip, blocks); } /* Some quotas span block boundaries and can update two blocks, diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c9ed814eeb6..3cc402ce6fe 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -35,9 +35,6 @@ #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) -#define RSRV_CONTENTION_FACTOR 4 -#define RGRP_RSRV_MAX_CONTENDERS 2 - #if BITS_PER_LONG == 32 #define LBITMASK (0x55555555UL) #define LBITSKIP55 (0x55555555UL) @@ -67,53 +64,48 @@ static const char valid_change[16] = { 1, 0, 0, 0 }; -static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, - unsigned char old_state, - struct gfs2_bitmap **rbi); +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, + const struct gfs2_inode *ip, bool nowrap); + /** * gfs2_setbit - Set a bit in the bitmaps - * @rgd: the resource group descriptor - * @buf2: the clone buffer that holds the bitmaps - * @bi: the bitmap structure - * @block: the block to set + * @rbm: The position of the bit to set + * @do_clone: Also set the clone bitmap, if it exists * @new_state: the new state of the block * */ -static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2, - struct gfs2_bitmap *bi, u32 block, +static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, unsigned char new_state) { unsigned char *byte1, *byte2, *end, cur_state; - unsigned int buflen = bi->bi_len; - const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; + unsigned int buflen = rbm->bi->bi_len; + const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; - byte1 = bi->bi_bh->b_data + bi->bi_offset + (block / GFS2_NBBY); - end = bi->bi_bh->b_data + bi->bi_offset + buflen; + byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); + end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen; BUG_ON(byte1 >= end); cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; if (unlikely(!valid_change[new_state * 4 + cur_state])) { - printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, " - "new_state=%d\n", - (unsigned long long)block, cur_state, new_state); - printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n", - (unsigned long long)rgd->rd_addr, - (unsigned long)bi->bi_start); - printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n", - (unsigned long)bi->bi_offset, - (unsigned long)bi->bi_len); + printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, " + "new_state=%d\n", rbm->offset, cur_state, new_state); + printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n", + (unsigned long long)rbm->rgd->rd_addr, + rbm->bi->bi_start); + printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n", + rbm->bi->bi_offset, rbm->bi->bi_len); dump_stack(); - gfs2_consist_rgrpd(rgd); + gfs2_consist_rgrpd(rbm->rgd); return; } *byte1 ^= (cur_state ^ new_state) << bit; - if (buf2) { - byte2 = buf2 + bi->bi_offset + (block / GFS2_NBBY); + if (do_clone && rbm->bi->bi_clone) { + byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; *byte2 ^= (cur_state ^ new_state) << bit; } @@ -121,30 +113,21 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2, /** * gfs2_testbit - test a bit in the bitmaps - * @rgd: the resource group descriptor - * @buffer: the buffer that holds the bitmaps - * @buflen: the length (in bytes) of the buffer - * @block: the block to read + * @rbm: The bit to test * + * Returns: The two bit block state of the requested bit */ -static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, - const unsigned char *buffer, - unsigned int buflen, u32 block) +static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) { - const unsigned char *byte, *end; - unsigned char cur_state; + const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset; + const u8 *byte; unsigned int bit; - byte = buffer + (block / GFS2_NBBY); - bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; - end = buffer + buflen; - - gfs2_assert(rgd->rd_sbd, byte < end); + byte = buffer + (rbm->offset / GFS2_NBBY); + bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; - cur_state = (*byte >> bit) & GFS2_BIT_MASK; - - return cur_state; + return (*byte >> bit) & GFS2_BIT_MASK; } /** @@ -192,7 +175,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) */ static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) { - u64 startblk = gfs2_rs_startblk(rs); + u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm); if (blk >= startblk + rs->rs_free) return 1; @@ -202,36 +185,6 @@ static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) } /** - * rs_find - Find a rgrp multi-block reservation that contains a given block - * @rgd: The rgrp - * @rgblk: The block we're looking for, relative to the rgrp - */ -static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk) -{ - struct rb_node **newn; - int rc; - u64 fsblk = rgblk + rgd->rd_data0; - - spin_lock(&rgd->rd_rsspin); - newn = &rgd->rd_rstree.rb_node; - while (*newn) { - struct gfs2_blkreserv *cur = - rb_entry(*newn, struct gfs2_blkreserv, rs_node); - rc = rs_cmp(fsblk, 1, cur); - if (rc < 0) - newn = &((*newn)->rb_left); - else if (rc > 0) - newn = &((*newn)->rb_right); - else { - spin_unlock(&rgd->rd_rsspin); - return cur; - } - } - spin_unlock(&rgd->rd_rsspin); - return NULL; -} - -/** * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing * a block in a given allocation state. * @buf: the buffer that holds the bitmaps @@ -262,8 +215,6 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, u64 mask = 0x5555555555555555ULL; u32 bit; - BUG_ON(state > 3); - /* Mask off bits we don't care about at the start of the search */ mask <<= spoint; tmp = gfs2_bit_search(ptr, mask, state); @@ -285,6 +236,131 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, } /** + * gfs2_rbm_from_block - Set the rbm based upon rgd and block number + * @rbm: The rbm with rgd already set correctly + * @block: The block number (filesystem relative) + * + * This sets the bi and offset members of an rbm based on a + * resource group and a filesystem relative block number. The + * resource group must be set in the rbm on entry, the bi and + * offset members will be set by this function. + * + * Returns: 0 on success, or an error code + */ + +static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) +{ + u64 rblock = block - rbm->rgd->rd_data0; + u32 goal = (u32)rblock; + int x; + + if (WARN_ON_ONCE(rblock > UINT_MAX)) + return -EINVAL; + if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) + return -E2BIG; + + for (x = 0; x < rbm->rgd->rd_length; x++) { + rbm->bi = rbm->rgd->rd_bits + x; + if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { + rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); + break; + } + } + + return 0; +} + +/** + * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned + * @rbm: Position to search (value/result) + * @n_unaligned: Number of unaligned blocks to check + * @len: Decremented for each block found (terminate on zero) + * + * Returns: true if a non-free block is encountered + */ + +static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) +{ + u64 block; + u32 n; + u8 res; + + for (n = 0; n < n_unaligned; n++) { + res = gfs2_testbit(rbm); + if (res != GFS2_BLKST_FREE) + return true; + (*len)--; + if (*len == 0) + return true; + block = gfs2_rbm_to_block(rbm); + if (gfs2_rbm_from_block(rbm, block + 1)) + return true; + } + + return false; +} + +/** + * gfs2_free_extlen - Return extent length of free blocks + * @rbm: Starting position + * @len: Max length to check + * + * Starting at the block specified by the rbm, see how many free blocks + * there are, not reading more than len blocks ahead. This can be done + * using memchr_inv when the blocks are byte aligned, but has to be done + * on a block by block basis in case of unaligned blocks. Also this + * function can cope with bitmap boundaries (although it must stop on + * a resource group boundary) + * + * Returns: Number of free blocks in the extent + */ + +static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) +{ + struct gfs2_rbm rbm = *rrbm; + u32 n_unaligned = rbm.offset & 3; + u32 size = len; + u32 bytes; + u32 chunk_size; + u8 *ptr, *start, *end; + u64 block; + + if (n_unaligned && + gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) + goto out; + + n_unaligned = len & 3; + /* Start is now byte aligned */ + while (len > 3) { + start = rbm.bi->bi_bh->b_data; + if (rbm.bi->bi_clone) + start = rbm.bi->bi_clone; + end = start + rbm.bi->bi_bh->b_size; + start += rbm.bi->bi_offset; + BUG_ON(rbm.offset & 3); + start += (rbm.offset / GFS2_NBBY); + bytes = min_t(u32, len / GFS2_NBBY, (end - start)); + ptr = memchr_inv(start, 0, bytes); + chunk_size = ((ptr == NULL) ? bytes : (ptr - start)); + chunk_size *= GFS2_NBBY; + BUG_ON(len < chunk_size); + len -= chunk_size; + block = gfs2_rbm_to_block(&rbm); + gfs2_rbm_from_block(&rbm, block + chunk_size); + n_unaligned = 3; + if (ptr) + break; + n_unaligned = len & 3; + } + + /* Deal with any bits left over at the end */ + if (n_unaligned) + gfs2_unaligned_extlen(&rbm, n_unaligned, &len); +out: + return size - len; +} + +/** * gfs2_bitcount - count the number of bits in a certain state * @rgd: the resource group descriptor * @buffer: the buffer that holds the bitmaps @@ -487,6 +563,8 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) if (!res) error = -ENOMEM; + RB_CLEAR_NODE(&res->rs_node); + down_write(&ip->i_rw_mutex); if (ip->i_res) kmem_cache_free(gfs2_rsrv_cachep, res); @@ -496,11 +574,12 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) return error; } -static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) +static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) { - gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", - rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk, - rs->rs_free); + gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n", + (unsigned long long)rs->rs_inum, + (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), + rs->rs_rbm.offset, rs->rs_free); } /** @@ -508,41 +587,26 @@ static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) * @rs: The reservation to remove * */ -static void __rs_deltree(struct gfs2_blkreserv *rs) +static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) { struct gfs2_rgrpd *rgd; if (!gfs2_rs_active(rs)) return; - rgd = rs->rs_rgd; - /* We can't do this: The reason is that when the rgrp is invalidated, - it's in the "middle" of acquiring the glock, but the HOLDER bit - isn't set yet: - BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/ - trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL); - - if (!RB_EMPTY_ROOT(&rgd->rd_rstree)) - rb_erase(&rs->rs_node, &rgd->rd_rstree); - BUG_ON(!rgd->rd_rs_cnt); - rgd->rd_rs_cnt--; + rgd = rs->rs_rbm.rgd; + trace_gfs2_rs(rs, TRACE_RS_TREEDEL); + rb_erase(&rs->rs_node, &rgd->rd_rstree); + RB_CLEAR_NODE(&rs->rs_node); if (rs->rs_free) { /* return reserved blocks to the rgrp and the ip */ - BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free); - rs->rs_rgd->rd_reserved -= rs->rs_free; + BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); + rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; rs->rs_free = 0; - clear_bit(GBF_FULL, &rs->rs_bi->bi_flags); + clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags); smp_mb__after_clear_bit(); } - /* We can't change any of the step 1 or step 2 components of the rs. - E.g. We can't set rs_rgd to NULL because the rgd glock is held and - dequeued through this pointer. - Can't: atomic_set(&rs->rs_sizehint, 0); - Can't: rs->rs_requested = 0; - Can't: rs->rs_rgd = NULL;*/ - rs->rs_bi = NULL; - rs->rs_biblk = 0; } /** @@ -550,17 +614,16 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) * @rs: The reservation to remove * */ -void gfs2_rs_deltree(struct gfs2_blkreserv *rs) +void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) { struct gfs2_rgrpd *rgd; - if (!gfs2_rs_active(rs)) - return; - - rgd = rs->rs_rgd; - spin_lock(&rgd->rd_rsspin); - __rs_deltree(rs); - spin_unlock(&rgd->rd_rsspin); + rgd = rs->rs_rbm.rgd; + if (rgd) { + spin_lock(&rgd->rd_rsspin); + __rs_deltree(ip, rs); + spin_unlock(&rgd->rd_rsspin); + } } /** @@ -572,8 +635,7 @@ void gfs2_rs_delete(struct gfs2_inode *ip) { down_write(&ip->i_rw_mutex); if (ip->i_res) { - gfs2_rs_deltree(ip->i_res); - trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE); + gfs2_rs_deltree(ip, ip->i_res); BUG_ON(ip->i_res->rs_free); kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); ip->i_res = NULL; @@ -597,7 +659,7 @@ static void return_all_reservations(struct gfs2_rgrpd *rgd) spin_lock(&rgd->rd_rsspin); while ((n = rb_first(&rgd->rd_rstree))) { rs = rb_entry(n, struct gfs2_blkreserv, rs_node); - __rs_deltree(rs); + __rs_deltree(NULL, rs); } spin_unlock(&rgd->rd_rsspin); } @@ -1270,211 +1332,276 @@ out: /** * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree - * @bi: the bitmap with the blocks * @ip: the inode structure - * @biblk: the 32-bit block number relative to the start of the bitmap - * @amount: the number of blocks to reserve * - * Returns: NULL - reservation was already taken, so not inserted - * pointer to the inserted reservation */ -static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, - struct gfs2_inode *ip, u32 biblk, - int amount) +static void rs_insert(struct gfs2_inode *ip) { struct rb_node **newn, *parent = NULL; int rc; struct gfs2_blkreserv *rs = ip->i_res; - struct gfs2_rgrpd *rgd = rs->rs_rgd; - u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0; + struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; + u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm); + + BUG_ON(gfs2_rs_active(rs)); spin_lock(&rgd->rd_rsspin); newn = &rgd->rd_rstree.rb_node; - BUG_ON(!ip->i_res); - BUG_ON(gfs2_rs_active(rs)); - /* Figure out where to put new node */ - /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ while (*newn) { struct gfs2_blkreserv *cur = rb_entry(*newn, struct gfs2_blkreserv, rs_node); parent = *newn; - rc = rs_cmp(fsblock, amount, cur); + rc = rs_cmp(fsblock, rs->rs_free, cur); if (rc > 0) newn = &((*newn)->rb_right); else if (rc < 0) newn = &((*newn)->rb_left); else { spin_unlock(&rgd->rd_rsspin); - return NULL; /* reservation already in use */ + WARN_ON(1); + return; } } - /* Do our reservation work */ - rs = ip->i_res; - rs->rs_free = amount; - rs->rs_biblk = biblk; - rs->rs_bi = bi; rb_link_node(&rs->rs_node, parent, newn); rb_insert_color(&rs->rs_node, &rgd->rd_rstree); - /* Do our inode accounting for the reservation */ - /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ - /* Do our rgrp accounting for the reservation */ - rgd->rd_reserved += amount; /* blocks reserved */ - rgd->rd_rs_cnt++; /* number of in-tree reservations */ + rgd->rd_reserved += rs->rs_free; /* blocks reserved */ spin_unlock(&rgd->rd_rsspin); - trace_gfs2_rs(ip, rs, TRACE_RS_INSERT); - return rs; -} - -/** - * unclaimed_blocks - return number of blocks that aren't spoken for - */ -static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd) -{ - return rgd->rd_free_clone - rgd->rd_reserved; + trace_gfs2_rs(rs, TRACE_RS_INSERT); } /** - * rg_mblk_search - find a group of multiple free blocks + * rg_mblk_search - find a group of multiple free blocks to form a reservation * @rgd: the resource group descriptor - * @rs: the block reservation * @ip: pointer to the inode for which we're reserving blocks + * @requested: number of blocks required for this allocation * - * This is very similar to rgblk_search, except we're looking for whole - * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing - * on aligned dwords for speed's sake. - * - * Returns: 0 if successful or BFITNOENT if there isn't enough free space */ -static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) +static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, + unsigned requested) { - struct gfs2_bitmap *bi = rgd->rd_bits; - const u32 length = rgd->rd_length; - u32 blk; - unsigned int buf, x, search_bytes; - u8 *buffer = NULL; - u8 *ptr, *end, *nonzero; - u32 goal, rsv_bytes; - struct gfs2_blkreserv *rs; - u32 best_rs_bytes, unclaimed; - int best_rs_blocks; + struct gfs2_rbm rbm = { .rgd = rgd, }; + u64 goal; + struct gfs2_blkreserv *rs = ip->i_res; + u32 extlen; + u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved; + int ret; + + extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested); + extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks); + if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen)) + return; /* Find bitmap block that contains bits for goal block */ if (rgrp_contains_block(rgd, ip->i_goal)) - goal = ip->i_goal - rgd->rd_data0; + goal = ip->i_goal; else - goal = rgd->rd_last_alloc; - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - /* Convert scope of "goal" from rgrp-wide to within - found bit block */ - if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { - goal -= bi->bi_start * GFS2_NBBY; - goto do_search; - } + goal = rgd->rd_last_alloc + rgd->rd_data0; + + if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) + return; + + ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true); + if (ret == 0) { + rs->rs_rbm = rbm; + rs->rs_free = extlen; + rs->rs_inum = ip->i_no_addr; + rs_insert(ip); } - buf = 0; - goal = 0; - -do_search: - best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint), - (RGRP_RSRV_MINBLKS * rgd->rd_length)); - best_rs_bytes = (best_rs_blocks * - (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) / - GFS2_NBBY; /* 1 + is for our not-yet-created reservation */ - best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64)); - unclaimed = unclaimed_blocks(rgd); - if (best_rs_bytes * GFS2_NBBY > unclaimed) - best_rs_bytes = unclaimed >> GFS2_BIT_SIZE; - - for (x = 0; x <= length; x++) { - bi = rgd->rd_bits + buf; +} + +/** + * gfs2_next_unreserved_block - Return next block that is not reserved + * @rgd: The resource group + * @block: The starting block + * @length: The required length + * @ip: Ignore any reservations for this inode + * + * If the block does not appear in any reservation, then return the + * block number unchanged. If it does appear in the reservation, then + * keep looking through the tree of reservations in order to find the + * first block number which is not reserved. + */ - if (test_bit(GBF_FULL, &bi->bi_flags)) - goto skip; +static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, + u32 length, + const struct gfs2_inode *ip) +{ + struct gfs2_blkreserv *rs; + struct rb_node *n; + int rc; - WARN_ON(!buffer_uptodate(bi->bi_bh)); - if (bi->bi_clone) - buffer = bi->bi_clone + bi->bi_offset; + spin_lock(&rgd->rd_rsspin); + n = rgd->rd_rstree.rb_node; + while (n) { + rs = rb_entry(n, struct gfs2_blkreserv, rs_node); + rc = rs_cmp(block, length, rs); + if (rc < 0) + n = n->rb_left; + else if (rc > 0) + n = n->rb_right; else - buffer = bi->bi_bh->b_data + bi->bi_offset; - - /* We have to keep the reservations aligned on u64 boundaries - otherwise we could get situations where a byte can't be - used because it's after a reservation, but a free bit still - is within the reservation's area. */ - ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64)); - end = (buffer + bi->bi_len); - while (ptr < end) { - rsv_bytes = 0; - if ((ptr + best_rs_bytes) <= end) - search_bytes = best_rs_bytes; - else - search_bytes = end - ptr; - BUG_ON(!search_bytes); - nonzero = memchr_inv(ptr, 0, search_bytes); - /* If the lot is all zeroes, reserve the whole size. If - there's enough zeroes to satisfy the request, use - what we can. If there's not enough, keep looking. */ - if (nonzero == NULL) - rsv_bytes = search_bytes; - else if ((nonzero - ptr) * GFS2_NBBY >= - ip->i_res->rs_requested) - rsv_bytes = (nonzero - ptr); - - if (rsv_bytes) { - blk = ((ptr - buffer) * GFS2_NBBY); - BUG_ON(blk >= bi->bi_len * GFS2_NBBY); - rs = rs_insert(bi, ip, blk, - rsv_bytes * GFS2_NBBY); - if (IS_ERR(rs)) - return PTR_ERR(rs); - if (rs) - return 0; - } - ptr += ALIGN(search_bytes, sizeof(u64)); + break; + } + + if (n) { + while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) { + block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free; + n = n->rb_right; + if (n == NULL) + break; + rs = rb_entry(n, struct gfs2_blkreserv, rs_node); } -skip: - /* Try next bitmap block (wrap back to rgrp header - if at end) */ - buf++; - buf %= length; - goal = 0; } - return BFITNOENT; + spin_unlock(&rgd->rd_rsspin); + return block; } /** - * try_rgrp_fit - See if a given reservation will fit in a given RG - * @rgd: the RG data - * @ip: the inode + * gfs2_reservation_check_and_update - Check for reservations during block alloc + * @rbm: The current position in the resource group + * @ip: The inode for which we are searching for blocks + * @minext: The minimum extent length * - * If there's room for the requested blocks to be allocated from the RG: - * This will try to get a multi-block reservation first, and if that doesn't - * fit, it will take what it can. + * This checks the current position in the rgrp to see whether there is + * a reservation covering this block. If not then this function is a + * no-op. If there is, then the position is moved to the end of the + * contiguous reservation(s) so that we are pointing at the first + * non-reserved block. * - * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) + * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error */ -static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) +static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, + const struct gfs2_inode *ip, + u32 minext) { - struct gfs2_blkreserv *rs = ip->i_res; + u64 block = gfs2_rbm_to_block(rbm); + u32 extlen = 1; + u64 nblock; + int ret; - if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) + /* + * If we have a minimum extent length, then skip over any extent + * which is less than the min extent length in size. + */ + if (minext) { + extlen = gfs2_free_extlen(rbm, minext); + nblock = block + extlen; + if (extlen < minext) + goto fail; + } + + /* + * Check the extent which has been found against the reservations + * and skip if parts of it are already reserved + */ + nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); + if (nblock == block) return 0; - /* Look for a multi-block reservation. */ - if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS && - rg_mblk_search(rgd, ip) != BFITNOENT) - return 1; - if (unclaimed_blocks(rgd) >= rs->rs_requested) - return 1; +fail: + ret = gfs2_rbm_from_block(rbm, nblock); + if (ret < 0) + return ret; + return 1; +} - return 0; +/** + * gfs2_rbm_find - Look for blocks of a particular state + * @rbm: Value/result starting position and final position + * @state: The state which we want to find + * @minext: The requested extent length (0 for a single block) + * @ip: If set, check for reservations + * @nowrap: Stop looking at the end of the rgrp, rather than wrapping + * around until we've reached the starting point. + * + * Side effects: + * - If looking for free blocks, we set GBF_FULL on each bitmap which + * has no free blocks in it. + * + * Returns: 0 on success, -ENOSPC if there is no block of the requested state + */ + +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, + const struct gfs2_inode *ip, bool nowrap) +{ + struct buffer_head *bh; + struct gfs2_bitmap *initial_bi; + u32 initial_offset; + u32 offset; + u8 *buffer; + int index; + int n = 0; + int iters = rbm->rgd->rd_length; + int ret; + + /* If we are not starting at the beginning of a bitmap, then we + * need to add one to the bitmap count to ensure that we search + * the starting bitmap twice. + */ + if (rbm->offset != 0) + iters++; + + while(1) { + if (test_bit(GBF_FULL, &rbm->bi->bi_flags) && + (state == GFS2_BLKST_FREE)) + goto next_bitmap; + + bh = rbm->bi->bi_bh; + buffer = bh->b_data + rbm->bi->bi_offset; + WARN_ON(!buffer_uptodate(bh)); + if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone) + buffer = rbm->bi->bi_clone + rbm->bi->bi_offset; + initial_offset = rbm->offset; + offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state); + if (offset == BFITNOENT) + goto bitmap_full; + rbm->offset = offset; + if (ip == NULL) + return 0; + + initial_bi = rbm->bi; + ret = gfs2_reservation_check_and_update(rbm, ip, minext); + if (ret == 0) + return 0; + if (ret > 0) { + n += (rbm->bi - initial_bi); + goto next_iter; + } + if (ret == -E2BIG) { + index = 0; + rbm->offset = 0; + n += (rbm->bi - initial_bi); + goto res_covered_end_of_rgrp; + } + return ret; + +bitmap_full: /* Mark bitmap as full and fall through */ + if ((state == GFS2_BLKST_FREE) && initial_offset == 0) + set_bit(GBF_FULL, &rbm->bi->bi_flags); + +next_bitmap: /* Find next bitmap in the rgrp */ + rbm->offset = 0; + index = rbm->bi - rbm->rgd->rd_bits; + index++; + if (index == rbm->rgd->rd_length) + index = 0; +res_covered_end_of_rgrp: + rbm->bi = &rbm->rgd->rd_bits[index]; + if ((index == 0) && nowrap) + break; + n++; +next_iter: + if (n >= iters) + break; + } + + return -ENOSPC; } /** @@ -1489,34 +1616,33 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) { - u32 goal = 0, block; - u64 no_addr; + u64 block; struct gfs2_sbd *sdp = rgd->rd_sbd; struct gfs2_glock *gl; struct gfs2_inode *ip; int error; int found = 0; - struct gfs2_bitmap *bi; + struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 }; - while (goal < rgd->rd_data) { + while (1) { down_write(&sdp->sd_log_flush_lock); - block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, &bi); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true); up_write(&sdp->sd_log_flush_lock); - if (block == BFITNOENT) + if (error == -ENOSPC) + break; + if (WARN_ON_ONCE(error)) break; - block = gfs2_bi2rgd_blk(bi, block); - /* rgblk_search can return a block < goal, so we need to - keep it marching forward. */ - no_addr = block + rgd->rd_data0; - goal = max(block + 1, goal + 1); - if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) + block = gfs2_rbm_to_block(&rbm); + if (gfs2_rbm_from_block(&rbm, block + 1)) + break; + if (*last_unlinked != NO_BLOCK && block <= *last_unlinked) continue; - if (no_addr == skip) + if (block == skip) continue; - *last_unlinked = no_addr; + *last_unlinked = block; - error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl); + error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl); if (error) continue; @@ -1543,6 +1669,19 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip return; } +static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) +{ + struct gfs2_rgrpd *rgd = *pos; + + rgd = gfs2_rgrpd_get_next(rgd); + if (rgd == NULL) + rgd = gfs2_rgrpd_get_next(NULL); + *pos = rgd; + if (rgd != begin) /* If we didn't wrap */ + return true; + return false; +} + /** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for @@ -1562,103 +1701,96 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; - rs->rs_requested = requested; - if (gfs2_assert_warn(sdp, requested)) { - error = -EINVAL; - goto out; - } + if (gfs2_assert_warn(sdp, requested)) + return -EINVAL; if (gfs2_rs_active(rs)) { - begin = rs->rs_rgd; + begin = rs->rs_rbm.rgd; flags = 0; /* Yoda: Do or do not. There is no try */ } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { - rs->rs_rgd = begin = ip->i_rgd; + rs->rs_rbm.rgd = begin = ip->i_rgd; } else { - rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); + rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); } - if (rs->rs_rgd == NULL) + if (rs->rs_rbm.rgd == NULL) return -EBADSLT; while (loops < 3) { - rg_locked = 0; - - if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { - rg_locked = 1; - error = 0; - } else if (!loops && !gfs2_rs_active(rs) && - rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) { - /* If the rgrp already is maxed out for contenders, - we can eliminate it as a "first pass" without even - requesting the rgrp glock. */ - error = GLR_TRYFAILED; - } else { - error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl, + rg_locked = 1; + + if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { + rg_locked = 0; + error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, LM_ST_EXCLUSIVE, flags, &rs->rs_rgd_gh); - if (!error && sdp->sd_args.ar_rgrplvb) { - error = update_rgrp_lvb(rs->rs_rgd); - if (error) { + if (error == GLR_TRYFAILED) + goto next_rgrp; + if (unlikely(error)) + return error; + if (sdp->sd_args.ar_rgrplvb) { + error = update_rgrp_lvb(rs->rs_rbm.rgd); + if (unlikely(error)) { gfs2_glock_dq_uninit(&rs->rs_rgd_gh); return error; } } } - switch (error) { - case 0: - if (gfs2_rs_active(rs)) { - if (unclaimed_blocks(rs->rs_rgd) + - rs->rs_free >= rs->rs_requested) { - ip->i_rgd = rs->rs_rgd; - return 0; - } - /* We have a multi-block reservation, but the - rgrp doesn't have enough free blocks to - satisfy the request. Free the reservation - and look for a suitable rgrp. */ - gfs2_rs_deltree(rs); - } - if (try_rgrp_fit(rs->rs_rgd, ip)) { - if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rgd); - ip->i_rgd = rs->rs_rgd; - return 0; - } - if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) { - if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rgd); - try_rgrp_unlink(rs->rs_rgd, &last_unlinked, - ip->i_no_addr); - } - if (!rg_locked) - gfs2_glock_dq_uninit(&rs->rs_rgd_gh); - /* fall through */ - case GLR_TRYFAILED: - rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd); - rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */ - if (rs->rs_rgd != begin) /* If we didn't wrap */ - break; - flags &= ~LM_FLAG_TRY; - loops++; - /* Check that fs hasn't grown if writing to rindex */ - if (ip == GFS2_I(sdp->sd_rindex) && - !sdp->sd_rindex_uptodate) { - error = gfs2_ri_update(ip); - if (error) - goto out; - } else if (loops == 2) - /* Flushing the log may release space */ - gfs2_log_flush(sdp, NULL); - break; - default: - goto out; + /* Skip unuseable resource groups */ + if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) + goto skip_rgrp; + + if (sdp->sd_args.ar_rgrplvb) + gfs2_rgrp_bh_get(rs->rs_rbm.rgd); + + /* Get a reservation if we don't already have one */ + if (!gfs2_rs_active(rs)) + rg_mblk_search(rs->rs_rbm.rgd, ip, requested); + + /* Skip rgrps when we can't get a reservation on first pass */ + if (!gfs2_rs_active(rs) && (loops < 1)) + goto check_rgrp; + + /* If rgrp has enough free space, use it */ + if (rs->rs_rbm.rgd->rd_free_clone >= requested) { + ip->i_rgd = rs->rs_rbm.rgd; + return 0; + } + + /* Drop reservation, if we couldn't use reserved rgrp */ + if (gfs2_rs_active(rs)) + gfs2_rs_deltree(ip, rs); +check_rgrp: + /* Check for unlinked inodes which can be reclaimed */ + if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) + try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, + ip->i_no_addr); +skip_rgrp: + /* Unlock rgrp if required */ + if (!rg_locked) + gfs2_glock_dq_uninit(&rs->rs_rgd_gh); +next_rgrp: + /* Find the next rgrp, and continue looking */ + if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) + continue; + + /* If we've scanned all the rgrps, but found no free blocks + * then this checks for some less likely conditions before + * trying again. + */ + flags &= ~LM_FLAG_TRY; + loops++; + /* Check that fs hasn't grown if writing to rindex */ + if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { + error = gfs2_ri_update(ip); + if (error) + return error; } + /* Flushing the log may release space */ + if (loops == 2) + gfs2_log_flush(sdp, NULL); } - error = -ENOSPC; -out: - if (error) - rs->rs_requested = 0; - return error; + return -ENOSPC; } /** @@ -1672,15 +1804,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip) { struct gfs2_blkreserv *rs = ip->i_res; - if (!rs) - return; - - if (!rs->rs_free) - gfs2_rs_deltree(rs); - if (rs->rs_rgd_gh.gh_gl) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); - rs->rs_requested = 0; } /** @@ -1693,173 +1818,47 @@ void gfs2_inplace_release(struct gfs2_inode *ip) static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) { - struct gfs2_bitmap *bi = NULL; - u32 length, rgrp_block, buf_block; - unsigned int buf; - unsigned char type; - - length = rgd->rd_length; - rgrp_block = block - rgd->rd_data0; - - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY) - break; - } - - gfs2_assert(rgd->rd_sbd, buf < length); - buf_block = rgrp_block - bi->bi_start * GFS2_NBBY; + struct gfs2_rbm rbm = { .rgd = rgd, }; + int ret; - type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, - bi->bi_len, buf_block); + ret = gfs2_rbm_from_block(&rbm, block); + WARN_ON_ONCE(ret != 0); - return type; + return gfs2_testbit(&rbm); } -/** - * rgblk_search - find a block in @state - * @rgd: the resource group descriptor - * @goal: the goal block within the RG (start here to search for avail block) - * @state: GFS2_BLKST_XXX the before-allocation state to find - * @rbi: address of the pointer to the bitmap containing the block found - * - * Walk rgrp's bitmap to find bits that represent a block in @state. - * - * This function never fails, because we wouldn't call it unless we - * know (from reservation results, etc.) that a block is available. - * - * Scope of @goal is just within rgrp, not the whole filesystem. - * Scope of @returned block is just within bitmap, not the whole filesystem. - * - * Returns: the block number found relative to the bitmap rbi - */ - -static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, unsigned char state, - struct gfs2_bitmap **rbi) -{ - struct gfs2_bitmap *bi = NULL; - const u32 length = rgd->rd_length; - u32 biblk = BFITNOENT; - unsigned int buf, x; - const u8 *buffer = NULL; - - *rbi = NULL; - /* Find bitmap block that contains bits for goal block */ - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - /* Convert scope of "goal" from rgrp-wide to within found bit block */ - if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { - goal -= bi->bi_start * GFS2_NBBY; - goto do_search; - } - } - buf = 0; - goal = 0; - -do_search: - /* Search (up to entire) bitmap in this rgrp for allocatable block. - "x <= length", instead of "x < length", because we typically start - the search in the middle of a bit block, but if we can't find an - allocatable block anywhere else, we want to be able wrap around and - search in the first part of our first-searched bit block. */ - for (x = 0; x <= length; x++) { - bi = rgd->rd_bits + buf; - - if (test_bit(GBF_FULL, &bi->bi_flags) && - (state == GFS2_BLKST_FREE)) - goto skip; - - /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone - bitmaps, so we must search the originals for that. */ - buffer = bi->bi_bh->b_data + bi->bi_offset; - WARN_ON(!buffer_uptodate(bi->bi_bh)); - if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) - buffer = bi->bi_clone + bi->bi_offset; - - while (1) { - struct gfs2_blkreserv *rs; - u32 rgblk; - - biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); - if (biblk == BFITNOENT) - break; - /* Check if this block is reserved() */ - rgblk = gfs2_bi2rgd_blk(bi, biblk); - rs = rs_find(rgd, rgblk); - if (rs == NULL) - break; - - BUG_ON(rs->rs_bi != bi); - biblk = BFITNOENT; - /* This should jump to the first block after the - reservation. */ - goal = rs->rs_biblk + rs->rs_free; - if (goal >= bi->bi_len * GFS2_NBBY) - break; - } - if (biblk != BFITNOENT) - break; - - if ((goal == 0) && (state == GFS2_BLKST_FREE)) - set_bit(GBF_FULL, &bi->bi_flags); - - /* Try next bitmap block (wrap back to rgrp header if at end) */ -skip: - buf++; - buf %= length; - goal = 0; - } - - if (biblk != BFITNOENT) - *rbi = bi; - - return biblk; -} /** * gfs2_alloc_extent - allocate an extent from a given bitmap - * @rgd: the resource group descriptor - * @bi: the bitmap within the rgrp - * @blk: the block within the bitmap + * @rbm: the resource group information * @dinode: TRUE if the first block we allocate is for a dinode - * @n: The extent length + * @n: The extent length (value/result) * - * Add the found bitmap buffer to the transaction. + * Add the bitmap buffer to the transaction. * Set the found bits to @new_state to change block's allocation state. - * Returns: starting block number of the extent (fs scope) */ -static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, - u32 blk, bool dinode, unsigned int *n) +static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, + unsigned int *n) { + struct gfs2_rbm pos = { .rgd = rbm->rgd, }; const unsigned int elen = *n; - u32 goal, rgblk; - const u8 *buffer = NULL; - struct gfs2_blkreserv *rs; - - *n = 0; - buffer = bi->bi_bh->b_data + bi->bi_offset; - gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); - gfs2_setbit(rgd, bi->bi_clone, bi, blk, - dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); - (*n)++; - goal = blk; + u64 block; + int ret; + + *n = 1; + block = gfs2_rbm_to_block(rbm); + gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1); + gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); + block++; while (*n < elen) { - goal++; - if (goal >= (bi->bi_len * GFS2_NBBY)) - break; - rgblk = gfs2_bi2rgd_blk(bi, goal); - rs = rs_find(rgd, rgblk); - if (rs) /* Oops, we bumped into someone's reservation */ - break; - if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != - GFS2_BLKST_FREE) + ret = gfs2_rbm_from_block(&pos, block); + if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) break; - gfs2_setbit(rgd, bi->bi_clone, bi, goal, GFS2_BLKST_USED); + gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); + gfs2_setbit(&pos, true, GFS2_BLKST_USED); (*n)++; + block++; } - blk = gfs2_bi2rgd_blk(bi, blk); - rgd->rd_last_alloc = blk + *n - 1; - return rgd->rd_data0 + blk; } /** @@ -1875,46 +1874,30 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, u32 blen, unsigned char new_state) { - struct gfs2_rgrpd *rgd; - struct gfs2_bitmap *bi = NULL; - u32 length, rgrp_blk, buf_blk; - unsigned int buf; + struct gfs2_rbm rbm; - rgd = gfs2_blk2rgrpd(sdp, bstart, 1); - if (!rgd) { + rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1); + if (!rbm.rgd) { if (gfs2_consist(sdp)) fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); return NULL; } - length = rgd->rd_length; - - rgrp_blk = bstart - rgd->rd_data0; - while (blen--) { - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY) - break; + gfs2_rbm_from_block(&rbm, bstart); + bstart++; + if (!rbm.bi->bi_clone) { + rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size, + GFP_NOFS | __GFP_NOFAIL); + memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset, + rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, + rbm.bi->bi_len); } - - gfs2_assert(rgd->rd_sbd, buf < length); - - buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY; - rgrp_blk++; - - if (!bi->bi_clone) { - bi->bi_clone = kmalloc(bi->bi_bh->b_size, - GFP_NOFS | __GFP_NOFAIL); - memcpy(bi->bi_clone + bi->bi_offset, - bi->bi_bh->b_data + bi->bi_offset, - bi->bi_len); - } - gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); - gfs2_setbit(rgd, NULL, bi, buf_blk, new_state); + gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1); + gfs2_setbit(&rbm, false, new_state); } - return rgd; + return rbm.rgd; } /** @@ -1956,46 +1939,41 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) } /** - * claim_reserved_blks - Claim previously reserved blocks - * @ip: the inode that's claiming the reservation - * @dinode: 1 if this block is a dinode block, otherwise data block - * @nblocks: desired extent length + * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation + * @ip: The inode we have just allocated blocks for + * @rbm: The start of the allocated blocks + * @len: The extent length * - * Lay claim to previously reserved blocks. - * Returns: Starting block number of the blocks claimed. - * Sets *nblocks to the actual extent length allocated. + * Adjusts a reservation after an allocation has taken place. If the + * reservation does not match the allocation, or if it is now empty + * then it is removed. */ -static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, - unsigned int *nblocks) + +static void gfs2_adjust_reservation(struct gfs2_inode *ip, + const struct gfs2_rbm *rbm, unsigned len) { struct gfs2_blkreserv *rs = ip->i_res; - struct gfs2_rgrpd *rgd = rs->rs_rgd; - struct gfs2_bitmap *bi; - u64 start_block = gfs2_rs_startblk(rs); - const unsigned int elen = *nblocks; + struct gfs2_rgrpd *rgd = rbm->rgd; + unsigned rlen; + u64 block; + int ret; - bi = rs->rs_bi; - gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); - - for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) { - if (gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, - bi->bi_len, rs->rs_biblk) != GFS2_BLKST_FREE) - break; - gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk, - dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); - rs->rs_biblk++; - rs->rs_free--; - - BUG_ON(!rgd->rd_reserved); - rgd->rd_reserved--; - dinode = false; + spin_lock(&rgd->rd_rsspin); + if (gfs2_rs_active(rs)) { + if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) { + block = gfs2_rbm_to_block(rbm); + ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len); + rlen = min(rs->rs_free, len); + rs->rs_free -= rlen; + rgd->rd_reserved -= rlen; + trace_gfs2_rs(rs, TRACE_RS_CLAIM); + if (rs->rs_free && !ret) + goto out; + } + __rs_deltree(ip, rs); } - - trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); - if (!rs->rs_free || *nblocks != elen) - gfs2_rs_deltree(rs); - - return start_block; +out: + spin_unlock(&rgd->rd_rsspin); } /** @@ -2014,47 +1992,40 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; - struct gfs2_rgrpd *rgd; + struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; unsigned int ndata; - u32 goal, blk; /* block, within the rgrp scope */ + u64 goal; u64 block; /* block, within the file system scope */ int error; - struct gfs2_bitmap *bi; - /* Only happens if there is a bug in gfs2, return something distinctive - * to ensure that it is noticed. - */ - if (ip->i_res->rs_requested == 0) - return -ECANCELED; - - /* If we have a reservation, claim blocks from it. */ - if (gfs2_rs_active(ip->i_res)) { - BUG_ON(!ip->i_res->rs_free); - rgd = ip->i_res->rs_rgd; - block = claim_reserved_blks(ip, dinode, nblocks); - if (*nblocks) - goto found_blocks; - } - - rgd = ip->i_rgd; - - if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) - goal = ip->i_goal - rgd->rd_data0; + if (gfs2_rs_active(ip->i_res)) + goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm); + else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal)) + goal = ip->i_goal; else - goal = rgd->rd_last_alloc; + goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; - blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); + gfs2_rbm_from_block(&rbm, goal); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false); + + if (error == -ENOSPC) { + gfs2_rbm_from_block(&rbm, goal); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false); + } /* Since all blocks are reserved in advance, this shouldn't happen */ - if (blk == BFITNOENT) { - printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", *nblocks); - printk(KERN_WARNING "FULL=%d\n", - test_bit(GBF_FULL, &rgd->rd_bits->bi_flags)); + if (error) { + fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n", + (unsigned long long)ip->i_no_addr, error, *nblocks, + test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); goto rgrp_error; } - block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); -found_blocks: + gfs2_alloc_extent(&rbm, dinode, nblocks); + block = gfs2_rbm_to_block(&rbm); + rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0; + if (gfs2_rs_active(ip->i_res)) + gfs2_adjust_reservation(ip, &rbm, *nblocks); ndata = *nblocks; if (dinode) ndata--; @@ -2071,22 +2042,22 @@ found_blocks: brelse(dibh); } } - if (rgd->rd_free < *nblocks) { + if (rbm.rgd->rd_free < *nblocks) { printk(KERN_WARNING "nblocks=%u\n", *nblocks); goto rgrp_error; } - rgd->rd_free -= *nblocks; + rbm.rgd->rd_free -= *nblocks; if (dinode) { - rgd->rd_dinodes++; - *generation = rgd->rd_igeneration++; + rbm.rgd->rd_dinodes++; + *generation = rbm.rgd->rd_igeneration++; if (*generation == 0) - *generation = rgd->rd_igeneration++; + *generation = rbm.rgd->rd_igeneration++; } - gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); - gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); - gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); + gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1); + gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data); gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); if (dinode) @@ -2100,14 +2071,14 @@ found_blocks: gfs2_quota_change(ip, ndata, ip->i_inode.i_uid, ip->i_inode.i_gid); - rgd->rd_free_clone -= *nblocks; - trace_gfs2_block_alloc(ip, rgd, block, *nblocks, + rbm.rgd->rd_free_clone -= *nblocks; + trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); *bn = block; return 0; rgrp_error: - gfs2_rgrp_error(rgd); + gfs2_rgrp_error(rbm.rgd); return -EIO; } diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index ca6e26729b8..24077958dcf 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -46,7 +46,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); extern int gfs2_rs_alloc(struct gfs2_inode *ip); -extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); +extern void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs); extern void gfs2_rs_delete(struct gfs2_inode *ip); extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); @@ -73,30 +73,10 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); extern int gfs2_fitrim(struct file *filp, void __user *argp); -/* This is how to tell if a multi-block reservation is "inplace" reserved: */ -static inline int gfs2_mb_reserved(struct gfs2_inode *ip) +/* This is how to tell if a reservation is in the rgrp tree: */ +static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs) { - if (ip->i_res && ip->i_res->rs_requested) - return 1; - return 0; -} - -/* This is how to tell if a multi-block reservation is in the rgrp tree: */ -static inline int gfs2_rs_active(struct gfs2_blkreserv *rs) -{ - if (rs && rs->rs_bi) - return 1; - return 0; -} - -static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk) -{ - return (bi->bi_start * GFS2_NBBY) + blk; -} - -static inline u64 gfs2_rs_startblk(const struct gfs2_blkreserv *rs) -{ - return gfs2_bi2rgd_blk(rs->rs_bi, rs->rs_biblk) + rs->rs_rgd->rd_data0; + return rs && !RB_EMPTY_NODE(&rs->rs_node); } #endif /* __RGRP_DOT_H__ */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index fc3168f47a1..a8d90f2f576 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1366,6 +1366,8 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) val = sdp->sd_tune.gt_statfs_quantum; if (val != 30) seq_printf(s, ",statfs_quantum=%d", val); + else if (sdp->sd_tune.gt_statfs_slow) + seq_puts(s, ",statfs_quantum=0"); val = sdp->sd_tune.gt_quota_quantum; if (val != 60) seq_printf(s, ",quota_quantum=%d", val); @@ -1543,6 +1545,11 @@ static void gfs2_evict_inode(struct inode *inode) out_truncate: gfs2_log_flush(sdp, ip->i_gl); + if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) { + struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl); + filemap_fdatawrite(metamapping); + filemap_fdatawait(metamapping); + } write_inode_now(inode, 1); gfs2_ail_flush(ip->i_gl, 0); @@ -1557,7 +1564,7 @@ out_truncate: out_unlock: /* Error path for case 1 */ if (gfs2_rs_active(ip->i_res)) - gfs2_rs_deltree(ip->i_res); + gfs2_rs_deltree(ip, ip->i_res); if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) gfs2_glock_dq(&ip->i_iopen_gh); diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index a25c252fe41..bbdc78af60c 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -509,10 +509,9 @@ TRACE_EVENT(gfs2_block_alloc, /* Keep track of multi-block reservations as they are allocated/freed */ TRACE_EVENT(gfs2_rs, - TP_PROTO(const struct gfs2_inode *ip, const struct gfs2_blkreserv *rs, - u8 func), + TP_PROTO(const struct gfs2_blkreserv *rs, u8 func), - TP_ARGS(ip, rs, func), + TP_ARGS(rs, func), TP_STRUCT__entry( __field( dev_t, dev ) @@ -526,18 +525,17 @@ TRACE_EVENT(gfs2_rs, ), TP_fast_assign( - __entry->dev = rs->rs_rgd ? rs->rs_rgd->rd_sbd->sd_vfs->s_dev : 0; - __entry->rd_addr = rs->rs_rgd ? rs->rs_rgd->rd_addr : 0; - __entry->rd_free_clone = rs->rs_rgd ? rs->rs_rgd->rd_free_clone : 0; - __entry->rd_reserved = rs->rs_rgd ? rs->rs_rgd->rd_reserved : 0; - __entry->inum = ip ? ip->i_no_addr : 0; - __entry->start = gfs2_rs_startblk(rs); + __entry->dev = rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev; + __entry->rd_addr = rs->rs_rbm.rgd->rd_addr; + __entry->rd_free_clone = rs->rs_rbm.rgd->rd_free_clone; + __entry->rd_reserved = rs->rs_rbm.rgd->rd_reserved; + __entry->inum = rs->rs_inum; + __entry->start = gfs2_rbm_to_block(&rs->rs_rbm); __entry->free = rs->rs_free; __entry->func = func; ), - TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s " - "f:%lu", + TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->inum, (unsigned long long)__entry->start, diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 41f42cdccbb..bf2ae9aeee7 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -28,11 +28,10 @@ struct gfs2_glock; /* reserve either the number of blocks to be allocated plus the rg header * block, or all of the blocks in the rg, whichever is smaller */ -static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip) +static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned requested) { - const struct gfs2_blkreserv *rs = ip->i_res; - if (rs && rs->rs_requested < ip->i_rgd->rd_length) - return rs->rs_requested + 1; + if (requested < ip->i_rgd->rd_length) + return requested + 1; return ip->i_rgd->rd_length; } diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 27a0b4a901f..db330e5518c 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -448,17 +448,18 @@ ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size) } /** - * ea_get_unstuffed - actually copies the unstuffed data into the - * request buffer + * ea_iter_unstuffed - copies the unstuffed xattr data to/from the + * request buffer * @ip: The GFS2 inode * @ea: The extended attribute header structure - * @data: The data to be copied + * @din: The data to be copied in + * @dout: The data to be copied out (one of din,dout will be NULL) * * Returns: errno */ -static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, - char *data) +static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, + const char *din, char *dout) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head **bh; @@ -467,6 +468,8 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, __be64 *dataptrs = GFS2_EA2DATAPTRS(ea); unsigned int x; int error = 0; + unsigned char *pos; + unsigned cp_size; bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS); if (!bh) @@ -497,12 +500,21 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, goto out; } - memcpy(data, bh[x]->b_data + sizeof(struct gfs2_meta_header), - (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize); + pos = bh[x]->b_data + sizeof(struct gfs2_meta_header); + cp_size = (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize; - amount -= sdp->sd_jbsize; - data += sdp->sd_jbsize; + if (dout) { + memcpy(dout, pos, cp_size); + dout += sdp->sd_jbsize; + } + + if (din) { + gfs2_trans_add_bh(ip->i_gl, bh[x], 1); + memcpy(pos, din, cp_size); + din += sdp->sd_jbsize; + } + amount -= sdp->sd_jbsize; brelse(bh[x]); } @@ -523,7 +535,7 @@ static int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, memcpy(data, GFS2_EA2DATA(el->el_ea), len); return len; } - ret = ea_get_unstuffed(ip, el->el_ea, data); + ret = gfs2_iter_unstuffed(ip, el->el_ea, NULL, data); if (ret < 0) return ret; return len; @@ -727,7 +739,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, goto out_gunlock_q; error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), - blks + gfs2_rg_blocks(ip) + + blks + gfs2_rg_blocks(ip, blks) + RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (error) goto out_ipres; @@ -1220,69 +1232,23 @@ static int gfs2_xattr_set(struct dentry *dentry, const char *name, size, flags, type); } + static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, char *data) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct buffer_head **bh; unsigned int amount = GFS2_EA_DATA_LEN(ea); unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize); - __be64 *dataptrs = GFS2_EA2DATAPTRS(ea); - unsigned int x; - int error; - - bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS); - if (!bh) - return -ENOMEM; - - error = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0); - if (error) - goto out; - - for (x = 0; x < nptrs; x++) { - error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0, - bh + x); - if (error) { - while (x--) - brelse(bh[x]); - goto fail; - } - dataptrs++; - } - - for (x = 0; x < nptrs; x++) { - error = gfs2_meta_wait(sdp, bh[x]); - if (error) { - for (; x < nptrs; x++) - brelse(bh[x]); - goto fail; - } - if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) { - for (; x < nptrs; x++) - brelse(bh[x]); - error = -EIO; - goto fail; - } - - gfs2_trans_add_bh(ip->i_gl, bh[x], 1); - - memcpy(bh[x]->b_data + sizeof(struct gfs2_meta_header), data, - (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize); - - amount -= sdp->sd_jbsize; - data += sdp->sd_jbsize; - - brelse(bh[x]); - } + int ret; -out: - kfree(bh); - return error; + ret = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0); + if (ret) + return ret; -fail: + ret = gfs2_iter_unstuffed(ip, ea, data, NULL); gfs2_trans_end(sdp); - kfree(bh); - return error; + + return ret; } int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) diff --git a/fs/libfs.c b/fs/libfs.c index a74cb1725ac..7cc37ca19cd 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -874,7 +874,7 @@ struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, EXPORT_SYMBOL_GPL(generic_fh_to_dentry); /** - * generic_fh_to_dentry - generic helper for the fh_to_parent export operation + * generic_fh_to_parent - generic helper for the fh_to_parent export operation * @sb: filesystem to do the file handle conversion on * @fid: file handle to convert * @fh_len: length of the file handle in bytes diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index fb1a2bedbe9..8d80c990dff 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -289,7 +289,6 @@ static void nlmsvc_free_block(struct kref *kref) dprintk("lockd: freeing block %p...\n", block); /* Remove block from file's list of blocks */ - mutex_lock(&file->f_mutex); list_del_init(&block->b_flist); mutex_unlock(&file->f_mutex); @@ -303,7 +302,7 @@ static void nlmsvc_free_block(struct kref *kref) static void nlmsvc_release_block(struct nlm_block *block) { if (block != NULL) - kref_put(&block->b_count, nlmsvc_free_block); + kref_put_mutex(&block->b_count, nlmsvc_free_block, &block->b_file->f_mutex); } /* diff --git a/fs/namespace.c b/fs/namespace.c index 4d31f73e256..7bdf7907413 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1886,8 +1886,14 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) return err; err = -EINVAL; - if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(real_mount(path->mnt))) - goto unlock; + if (unlikely(!check_mnt(real_mount(path->mnt)))) { + /* that's acceptable only for automounts done in private ns */ + if (!(mnt_flags & MNT_SHRINKABLE)) + goto unlock; + /* ... and for those we'd better have mountpoint still alive */ + if (!real_mount(path->mnt)->mnt_ns) + goto unlock; + } /* Refuse the same filesystem on the same mount point */ err = -EBUSY; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b8eda700584..d2c7f5db084 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1537,7 +1537,7 @@ static int nfs_parse_mount_options(char *raw, /* * verify that any proto=/mountproto= options match the address - * familiies in the addr=/mountaddr= options. + * families in the addr=/mountaddr= options. */ if (protofamily != AF_UNSPEC && protofamily != mnt->nfs_server.address.ss_family) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d7a9dd735e1..933b7930b86 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -96,6 +96,7 @@ xfs_buf_lru_add( atomic_inc(&bp->b_hold); list_add_tail(&bp->b_lru, &btp->bt_lru); btp->bt_lru_nr++; + bp->b_lru_flags &= ~_XBF_LRU_DISPOSE; } spin_unlock(&btp->bt_lru_lock); } @@ -154,7 +155,8 @@ xfs_buf_stale( struct xfs_buftarg *btp = bp->b_target; spin_lock(&btp->bt_lru_lock); - if (!list_empty(&bp->b_lru)) { + if (!list_empty(&bp->b_lru) && + !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) { list_del_init(&bp->b_lru); btp->bt_lru_nr--; atomic_dec(&bp->b_hold); @@ -1501,6 +1503,7 @@ xfs_buftarg_shrink( */ list_move(&bp->b_lru, &dispose); btp->bt_lru_nr--; + bp->b_lru_flags |= _XBF_LRU_DISPOSE; } spin_unlock(&btp->bt_lru_lock); diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index d03b73b9604..7c0b6a0a155 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -38,27 +38,28 @@ typedef enum { XBRW_ZERO = 3, /* Zero target memory */ } xfs_buf_rw_t; -#define XBF_READ (1 << 0) /* buffer intended for reading from device */ -#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ -#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ -#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ -#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ -#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ +#define XBF_READ (1 << 0) /* buffer intended for reading from device */ +#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ +#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ +#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ +#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ +#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ /* I/O hints for the BIO layer */ -#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ -#define XBF_FUA (1 << 11)/* force cache write through mode */ -#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ +#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ +#define XBF_FUA (1 << 11)/* force cache write through mode */ +#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ /* flags used only as arguments to access routines */ -#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ -#define XBF_UNMAPPED (1 << 17)/* do not map the buffer */ +#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ +#define XBF_UNMAPPED (1 << 17)/* do not map the buffer */ /* flags used only internally */ -#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ -#define _XBF_KMEM (1 << 21)/* backed by heap memory */ -#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ -#define _XBF_COMPOUND (1 << 23)/* compound buffer */ +#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ +#define _XBF_KMEM (1 << 21)/* backed by heap memory */ +#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ +#define _XBF_COMPOUND (1 << 23)/* compound buffer */ +#define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */ typedef unsigned int xfs_buf_flags_t; @@ -72,12 +73,13 @@ typedef unsigned int xfs_buf_flags_t; { XBF_SYNCIO, "SYNCIO" }, \ { XBF_FUA, "FUA" }, \ { XBF_FLUSH, "FLUSH" }, \ - { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ + { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\ { _XBF_PAGES, "PAGES" }, \ { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \ - { _XBF_COMPOUND, "COMPOUND" } + { _XBF_COMPOUND, "COMPOUND" }, \ + { _XBF_LRU_DISPOSE, "LRU_DISPOSE" } typedef struct xfs_buftarg { dev_t bt_dev; @@ -124,7 +126,12 @@ typedef struct xfs_buf { xfs_buf_flags_t b_flags; /* status flags */ struct semaphore b_sema; /* semaphore for lockables */ + /* + * concurrent access to b_lru and b_lru_flags are protected by + * bt_lru_lock and not by b_sema + */ struct list_head b_lru; /* lru list */ + xfs_buf_flags_t b_lru_flags; /* internal lru status flags */ wait_queue_head_t b_waiters; /* unpin waiters */ struct list_head b_list; struct xfs_perag *b_pag; /* contains rbtree root */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index bdaf4cb9f4a..19e2380fb86 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -919,6 +919,7 @@ xfs_fs_put_super( struct xfs_mount *mp = XFS_M(sb); xfs_filestream_unmount(mp); + cancel_delayed_work_sync(&mp->m_sync_work); xfs_unmountfs(mp); xfs_syncd_stop(mp); xfs_freesb(mp); diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index 991ef01cd77..3748ec92dcb 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -691,9 +691,11 @@ __SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \ #define __NR_process_vm_writev 271 __SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \ compat_sys_process_vm_writev) +#define __NR_kcmp 272 +__SYSCALL(__NR_kcmp, sys_kcmp) #undef __NR_syscalls -#define __NR_syscalls 272 +#define __NR_syscalls 273 /* * All syscalls below here should go away really, diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 55e6d63d46d..a52f2f4fe03 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -10,6 +10,7 @@ #include <linux/kallsyms.h> #include <linux/linkage.h> #include <linux/bitops.h> +#include <linux/ptrace.h> #include <linux/ktime.h> #include <linux/sched.h> #include <linux/types.h> @@ -18,6 +19,28 @@ #include <asm/ftrace.h> +/* + * If the arch supports passing the variable contents of + * function_trace_op as the third parameter back from the + * mcount call, then the arch should define this as 1. + */ +#ifndef ARCH_SUPPORTS_FTRACE_OPS +#define ARCH_SUPPORTS_FTRACE_OPS 0 +#endif + +/* + * If the arch's mcount caller does not support all of ftrace's + * features, then it must call an indirect function that + * does. Or at least does enough to prevent any unwelcomed side effects. + */ +#if !defined(CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST) || \ + !ARCH_SUPPORTS_FTRACE_OPS +# define FTRACE_FORCE_LIST_FUNC 1 +#else +# define FTRACE_FORCE_LIST_FUNC 0 +#endif + + struct module; struct ftrace_hash; @@ -29,7 +52,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); +struct ftrace_ops; + +typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs); /* * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are @@ -45,12 +71,33 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); * could be controled by following calls: * ftrace_function_local_enable * ftrace_function_local_disable + * SAVE_REGS - The ftrace_ops wants regs saved at each function called + * and passed to the callback. If this flag is set, but the + * architecture does not support passing regs + * (ARCH_SUPPORTS_FTRACE_SAVE_REGS is not defined), then the + * ftrace_ops will fail to register, unless the next flag + * is set. + * SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the + * handler can handle an arch that does not save regs + * (the handler tests if regs == NULL), then it can set + * this flag instead. It will not fail registering the ftrace_ops + * but, the regs field will be NULL if the arch does not support + * passing regs to the handler. + * Note, if this flag is set, the SAVE_REGS flag will automatically + * get set upon registering the ftrace_ops, if the arch supports it. + * RECURSION_SAFE - The ftrace_ops can set this to tell the ftrace infrastructure + * that the call back has its own recursion protection. If it does + * not set this, then the ftrace infrastructure will add recursion + * protection for the caller. */ enum { - FTRACE_OPS_FL_ENABLED = 1 << 0, - FTRACE_OPS_FL_GLOBAL = 1 << 1, - FTRACE_OPS_FL_DYNAMIC = 1 << 2, - FTRACE_OPS_FL_CONTROL = 1 << 3, + FTRACE_OPS_FL_ENABLED = 1 << 0, + FTRACE_OPS_FL_GLOBAL = 1 << 1, + FTRACE_OPS_FL_DYNAMIC = 1 << 2, + FTRACE_OPS_FL_CONTROL = 1 << 3, + FTRACE_OPS_FL_SAVE_REGS = 1 << 4, + FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 5, + FTRACE_OPS_FL_RECURSION_SAFE = 1 << 6, }; struct ftrace_ops { @@ -163,7 +210,8 @@ static inline int ftrace_function_local_disabled(struct ftrace_ops *ops) return *this_cpu_ptr(ops->disabled); } -extern void ftrace_stub(unsigned long a0, unsigned long a1); +extern void ftrace_stub(unsigned long a0, unsigned long a1, + struct ftrace_ops *op, struct pt_regs *regs); #else /* !CONFIG_FUNCTION_TRACER */ /* @@ -172,6 +220,10 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1); */ #define register_ftrace_function(ops) ({ 0; }) #define unregister_ftrace_function(ops) ({ 0; }) +static inline int ftrace_nr_registered_ops(void) +{ + return 0; +} static inline void clear_ftrace_function(void) { } static inline void ftrace_kill(void) { } static inline void ftrace_stop(void) { } @@ -227,12 +279,33 @@ extern void unregister_ftrace_function_probe_all(char *glob); extern int ftrace_text_reserved(void *start, void *end); +extern int ftrace_nr_registered_ops(void); + +/* + * The dyn_ftrace record's flags field is split into two parts. + * the first part which is '0-FTRACE_REF_MAX' is a counter of + * the number of callbacks that have registered the function that + * the dyn_ftrace descriptor represents. + * + * The second part is a mask: + * ENABLED - the function is being traced + * REGS - the record wants the function to save regs + * REGS_EN - the function is set up to save regs. + * + * When a new ftrace_ops is registered and wants a function to save + * pt_regs, the rec->flag REGS is set. When the function has been + * set up to save regs, the REG_EN flag is set. Once a function + * starts saving regs it will do so until all ftrace_ops are removed + * from tracing that function. + */ enum { - FTRACE_FL_ENABLED = (1 << 30), + FTRACE_FL_ENABLED = (1UL << 29), + FTRACE_FL_REGS = (1UL << 30), + FTRACE_FL_REGS_EN = (1UL << 31) }; -#define FTRACE_FL_MASK (0x3UL << 30) -#define FTRACE_REF_MAX ((1 << 30) - 1) +#define FTRACE_FL_MASK (0x7UL << 29) +#define FTRACE_REF_MAX ((1UL << 29) - 1) struct dyn_ftrace { union { @@ -244,6 +317,8 @@ struct dyn_ftrace { }; int ftrace_force_update(void); +int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, + int remove, int reset); int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, int len, int reset); int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, @@ -263,9 +338,23 @@ enum { FTRACE_STOP_FUNC_RET = (1 << 4), }; +/* + * The FTRACE_UPDATE_* enum is used to pass information back + * from the ftrace_update_record() and ftrace_test_record() + * functions. These are called by the code update routines + * to find out what is to be done for a given function. + * + * IGNORE - The function is already what we want it to be + * MAKE_CALL - Start tracing the function + * MODIFY_CALL - Stop saving regs for the function + * MODIFY_CALL_REGS - Start saving regs for the function + * MAKE_NOP - Stop tracing the function + */ enum { FTRACE_UPDATE_IGNORE, FTRACE_UPDATE_MAKE_CALL, + FTRACE_UPDATE_MODIFY_CALL, + FTRACE_UPDATE_MODIFY_CALL_REGS, FTRACE_UPDATE_MAKE_NOP, }; @@ -317,7 +406,9 @@ extern int ftrace_dyn_arch_init(void *data); extern void ftrace_replace_code(int enable); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); +extern void ftrace_regs_caller(void); extern void ftrace_call(void); +extern void ftrace_regs_call(void); extern void mcount_call(void); void ftrace_modify_all_code(int command); @@ -325,6 +416,15 @@ void ftrace_modify_all_code(int command); #ifndef FTRACE_ADDR #define FTRACE_ADDR ((unsigned long)ftrace_caller) #endif + +#ifndef FTRACE_REGS_ADDR +#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS +# define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller) +#else +# define FTRACE_REGS_ADDR FTRACE_ADDR +#endif +#endif + #ifdef CONFIG_FUNCTION_GRAPH_TRACER extern void ftrace_graph_caller(void); extern int ftrace_enable_ftrace_graph_caller(void); @@ -380,6 +480,39 @@ extern int ftrace_make_nop(struct module *mod, */ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); +#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS +/** + * ftrace_modify_call - convert from one addr to another (no nop) + * @rec: the mcount call site record + * @old_addr: the address expected to be currently called to + * @addr: the address to change to + * + * This is a very sensitive operation and great care needs + * to be taken by the arch. The operation should carefully + * read the location, check to see if what is read is indeed + * what we expect it to be, and then on success of the compare, + * it should write to the location. + * + * The code segment at @rec->ip should be a caller to @old_addr + * + * Return must be: + * 0 on success + * -EFAULT on error reading the location + * -EINVAL on a failed compare of the contents + * -EPERM on error writing to the location + * Any other value will be considered a failure. + */ +extern int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr); +#else +/* Should never be called */ +static inline int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + return -EINVAL; +} +#endif + /* May be defined in arch */ extern int ftrace_arch_read_dyn_info(char *buf, int size); @@ -387,7 +520,7 @@ extern int skip_trace(unsigned long ip); extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); -#else +#else /* CONFIG_DYNAMIC_FTRACE */ static inline int skip_trace(unsigned long ip) { return 0; } static inline int ftrace_force_update(void) { return 0; } static inline void ftrace_disable_daemon(void) { } @@ -405,6 +538,10 @@ static inline int ftrace_text_reserved(void *start, void *end) { return 0; } +static inline unsigned long ftrace_location(unsigned long ip) +{ + return 0; +} /* * Again users of functions that have ftrace_ops may not @@ -413,6 +550,7 @@ static inline int ftrace_text_reserved(void *start, void *end) */ #define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; }) #define ftrace_set_early_filter(ops, buf, enable) do { } while (0) +#define ftrace_set_filter_ip(ops, ip, remove, reset) ({ -ENODEV; }) #define ftrace_set_filter(ops, buf, len, reset) ({ -ENODEV; }) #define ftrace_set_notrace(ops, buf, len, reset) ({ -ENODEV; }) #define ftrace_free_filter(ops) do { } while (0) diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 305f23cd7cf..cab3da3d094 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -132,11 +132,11 @@ extern void synchronize_irq(unsigned int irq); struct task_struct; #if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING) -static inline void account_system_vtime(struct task_struct *tsk) +static inline void vtime_account(struct task_struct *tsk) { } #else -extern void account_system_vtime(struct task_struct *tsk); +extern void vtime_account(struct task_struct *tsk); #endif #if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) @@ -162,7 +162,7 @@ extern void rcu_nmi_exit(void); */ #define __irq_enter() \ do { \ - account_system_vtime(current); \ + vtime_account(current); \ add_preempt_count(HARDIRQ_OFFSET); \ trace_hardirq_enter(); \ } while (0) @@ -178,7 +178,7 @@ extern void irq_enter(void); #define __irq_exit() \ do { \ trace_hardirq_exit(); \ - account_system_vtime(current); \ + vtime_account(current); \ sub_preempt_count(HARDIRQ_OFFSET); \ } while (0) diff --git a/include/linux/hid.h b/include/linux/hid.h index 42970de1b40..7e1f37db758 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -414,7 +414,7 @@ struct hid_field { __u16 dpad; /* dpad input code */ }; -#define HID_MAX_FIELDS 128 +#define HID_MAX_FIELDS 256 struct hid_report { struct list_head list; @@ -626,6 +626,7 @@ struct hid_usage_id { * @report_fixup: called before report descriptor parsing (NULL means nop) * @input_mapping: invoked on input registering before mapping an usage * @input_mapped: invoked on input registering after mapping an usage + * @input_configured: invoked just before the device is registered * @feature_mapping: invoked on feature registering * @suspend: invoked on suspend (NULL means nop) * @resume: invoked on resume if device was not reset (NULL means nop) @@ -670,6 +671,8 @@ struct hid_driver { int (*input_mapped)(struct hid_device *hdev, struct hid_input *hidinput, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max); + void (*input_configured)(struct hid_device *hdev, + struct hid_input *hidinput); void (*feature_mapping)(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage); diff --git a/include/linux/input.h b/include/linux/input.h index 725dcd0f63a..ba487430293 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -1169,6 +1169,18 @@ struct ff_effect { #include <linux/mod_devicetable.h> /** + * struct input_value - input value representation + * @type: type of value (EV_KEY, EV_ABS, etc) + * @code: the value code + * @value: the value + */ +struct input_value { + __u16 type; + __u16 code; + __s32 value; +}; + +/** * struct input_dev - represents an input device * @name: name of the device * @phys: physical path to the device in the system hierarchy @@ -1203,11 +1215,7 @@ struct ff_effect { * software autorepeat * @timer: timer for software autorepeat * @rep: current values for autorepeat parameters (delay, rate) - * @mt: pointer to array of struct input_mt_slot holding current values - * of tracked contacts - * @mtsize: number of MT slots the device uses - * @slot: MT slot currently being transmitted - * @trkid: stores MT tracking ID for the current contact + * @mt: pointer to multitouch state * @absinfo: array of &struct input_absinfo elements holding information * about absolute axes (current value, min, max, flat, fuzz, * resolution) @@ -1244,7 +1252,6 @@ struct ff_effect { * last user closes the device * @going_away: marks devices that are in a middle of unregistering and * causes input_open_device*() fail with -ENODEV. - * @sync: set to %true when there were no new events since last EV_SYN * @dev: driver model's view of this device * @h_list: list of input handles associated with the device. When * accessing the list dev->mutex must be held @@ -1287,10 +1294,7 @@ struct input_dev { int rep[REP_CNT]; - struct input_mt_slot *mt; - int mtsize; - int slot; - int trkid; + struct input_mt *mt; struct input_absinfo *absinfo; @@ -1312,12 +1316,14 @@ struct input_dev { unsigned int users; bool going_away; - bool sync; - struct device dev; struct list_head h_list; struct list_head node; + + unsigned int num_vals; + unsigned int max_vals; + struct input_value *vals; }; #define to_input_dev(d) container_of(d, struct input_dev, dev) @@ -1378,6 +1384,9 @@ struct input_handle; * @event: event handler. This method is being called by input core with * interrupts disabled and dev->event_lock spinlock held and so * it may not sleep + * @events: event sequence handler. This method is being called by + * input core with interrupts disabled and dev->event_lock + * spinlock held and so it may not sleep * @filter: similar to @event; separates normal event handlers from * "filters". * @match: called after comparing device's id with handler's id_table @@ -1414,6 +1423,8 @@ struct input_handler { void *private; void (*event)(struct input_handle *handle, unsigned int type, unsigned int code, int value); + void (*events)(struct input_handle *handle, + const struct input_value *vals, unsigned int count); bool (*filter)(struct input_handle *handle, unsigned int type, unsigned int code, int value); bool (*match)(struct input_handler *handler, struct input_dev *dev); int (*connect)(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id); diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h index f86737586e1..cc5cca774ba 100644 --- a/include/linux/input/mt.h +++ b/include/linux/input/mt.h @@ -15,12 +15,41 @@ #define TRKID_MAX 0xffff +#define INPUT_MT_POINTER 0x0001 /* pointer device, e.g. trackpad */ +#define INPUT_MT_DIRECT 0x0002 /* direct device, e.g. touchscreen */ +#define INPUT_MT_DROP_UNUSED 0x0004 /* drop contacts not seen in frame */ +#define INPUT_MT_TRACK 0x0008 /* use in-kernel tracking */ + /** * struct input_mt_slot - represents the state of an input MT slot * @abs: holds current values of ABS_MT axes for this slot + * @frame: last frame at which input_mt_report_slot_state() was called + * @key: optional driver designation of this slot */ struct input_mt_slot { int abs[ABS_MT_LAST - ABS_MT_FIRST + 1]; + unsigned int frame; + unsigned int key; +}; + +/** + * struct input_mt - state of tracked contacts + * @trkid: stores MT tracking ID for the next contact + * @num_slots: number of MT slots the device uses + * @slot: MT slot currently being transmitted + * @flags: input_mt operation flags + * @frame: increases every time input_mt_sync_frame() is called + * @red: reduced cost matrix for in-kernel tracking + * @slots: array of slots holding current values of tracked contacts + */ +struct input_mt { + int trkid; + int num_slots; + int slot; + unsigned int flags; + unsigned int frame; + int *red; + struct input_mt_slot slots[]; }; static inline void input_mt_set_value(struct input_mt_slot *slot, @@ -35,12 +64,18 @@ static inline int input_mt_get_value(const struct input_mt_slot *slot, return slot->abs[code - ABS_MT_FIRST]; } -int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots); +static inline bool input_mt_is_active(const struct input_mt_slot *slot) +{ + return input_mt_get_value(slot, ABS_MT_TRACKING_ID) >= 0; +} + +int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots, + unsigned int flags); void input_mt_destroy_slots(struct input_dev *dev); -static inline int input_mt_new_trkid(struct input_dev *dev) +static inline int input_mt_new_trkid(struct input_mt *mt) { - return dev->trkid++ & TRKID_MAX; + return mt->trkid++ & TRKID_MAX; } static inline void input_mt_slot(struct input_dev *dev, int slot) @@ -64,4 +99,20 @@ void input_mt_report_slot_state(struct input_dev *dev, void input_mt_report_finger_count(struct input_dev *dev, int count); void input_mt_report_pointer_emulation(struct input_dev *dev, bool use_count); +void input_mt_sync_frame(struct input_dev *dev); + +/** + * struct input_mt_pos - contact position + * @x: horizontal coordinate + * @y: vertical coordinate + */ +struct input_mt_pos { + s16 x, y; +}; + +int input_mt_assign_slots(struct input_dev *dev, int *slots, + const struct input_mt_pos *pos, int num_pos); + +int input_mt_get_slot_by_key(struct input_dev *dev, int key); + #endif diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c5f856a040b..5e4e6170f43 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -430,6 +430,8 @@ enum NR_SOFTIRQS }; +#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ)) + /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 7e83370e6fd..f3b99e1c104 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -256,72 +256,78 @@ static inline void iommu_set_fault_handler(struct iommu_domain *domain, { } -int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) +static inline int iommu_attach_group(struct iommu_domain *domain, + struct iommu_group *group) { return -ENODEV; } -void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) +static inline void iommu_detach_group(struct iommu_domain *domain, + struct iommu_group *group) { } -struct iommu_group *iommu_group_alloc(void) +static inline struct iommu_group *iommu_group_alloc(void) { return ERR_PTR(-ENODEV); } -void *iommu_group_get_iommudata(struct iommu_group *group) +static inline void *iommu_group_get_iommudata(struct iommu_group *group) { return NULL; } -void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data, - void (*release)(void *iommu_data)) +static inline void iommu_group_set_iommudata(struct iommu_group *group, + void *iommu_data, + void (*release)(void *iommu_data)) { } -int iommu_group_set_name(struct iommu_group *group, const char *name) +static inline int iommu_group_set_name(struct iommu_group *group, + const char *name) { return -ENODEV; } -int iommu_group_add_device(struct iommu_group *group, struct device *dev) +static inline int iommu_group_add_device(struct iommu_group *group, + struct device *dev) { return -ENODEV; } -void iommu_group_remove_device(struct device *dev) +static inline void iommu_group_remove_device(struct device *dev) { } -int iommu_group_for_each_dev(struct iommu_group *group, void *data, - int (*fn)(struct device *, void *)) +static inline int iommu_group_for_each_dev(struct iommu_group *group, + void *data, + int (*fn)(struct device *, void *)) { return -ENODEV; } -struct iommu_group *iommu_group_get(struct device *dev) +static inline struct iommu_group *iommu_group_get(struct device *dev) { return NULL; } -void iommu_group_put(struct iommu_group *group) +static inline void iommu_group_put(struct iommu_group *group) { } -int iommu_group_register_notifier(struct iommu_group *group, - struct notifier_block *nb) +static inline int iommu_group_register_notifier(struct iommu_group *group, + struct notifier_block *nb) { return -ENODEV; } -int iommu_group_unregister_notifier(struct iommu_group *group, - struct notifier_block *nb) +static inline int iommu_group_unregister_notifier(struct iommu_group *group, + struct notifier_block *nb) { return 0; } -int iommu_group_id(struct iommu_group *group) +static inline int iommu_group_id(struct iommu_group *group) { return -ENODEV; } diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 9a323d12de1..0ba014c5505 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -10,12 +10,10 @@ struct irq_affinity_notify; struct proc_dir_entry; -struct timer_rand_state; struct module; /** * struct irq_desc - interrupt descriptor * @irq_data: per irq and chip data passed down to chip functions - * @timer_rand_state: pointer to timer rand state struct * @kstat_irqs: irq stats per cpu * @handle_irq: highlevel irq-events handler * @preflow_handler: handler called before the flow handler (currently used by sparc) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 594b419b7d2..2451f1f7a1d 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -91,7 +91,7 @@ { \ typeof(x) __x = x; \ typeof(divisor) __d = divisor; \ - (((typeof(x))-1) >= 0 || (__x) >= 0) ? \ + (((typeof(x))-1) > 0 || (__x) > 0) ? \ (((__x) + ((__d) / 2)) / (__d)) : \ (((__x) - ((__d) / 2)) / (__d)); \ } \ diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 2fbd9053c2d..36d12f0884c 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -130,4 +130,12 @@ extern void account_process_tick(struct task_struct *, int user); extern void account_steal_ticks(unsigned long ticks); extern void account_idle_ticks(unsigned long ticks); +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +extern void vtime_task_switch(struct task_struct *prev); +extern void vtime_account_system(struct task_struct *tsk); +extern void vtime_account_idle(struct task_struct *tsk); +#else +static inline void vtime_task_switch(struct task_struct *prev) { } +#endif + #endif /* _LINUX_KERNEL_STAT_H */ diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index b6e1f8c0057..23755ba42ab 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -38,6 +38,7 @@ #include <linux/spinlock.h> #include <linux/rcupdate.h> #include <linux/mutex.h> +#include <linux/ftrace.h> #ifdef CONFIG_KPROBES #include <asm/kprobes.h> @@ -48,14 +49,26 @@ #define KPROBE_REENTER 0x00000004 #define KPROBE_HIT_SSDONE 0x00000008 +/* + * If function tracer is enabled and the arch supports full + * passing of pt_regs to function tracing, then kprobes can + * optimize on top of function tracing. + */ +#if defined(CONFIG_FUNCTION_TRACER) && defined(ARCH_SUPPORTS_FTRACE_SAVE_REGS) \ + && defined(ARCH_SUPPORTS_KPROBES_ON_FTRACE) +# define KPROBES_CAN_USE_FTRACE +#endif + /* Attach to insert probes on any functions which should be ignored*/ #define __kprobes __attribute__((__section__(".kprobes.text"))) + #else /* CONFIG_KPROBES */ typedef int kprobe_opcode_t; struct arch_specific_insn { int dummy; }; #define __kprobes + #endif /* CONFIG_KPROBES */ struct kprobe; @@ -128,6 +141,7 @@ struct kprobe { * NOTE: * this flag is only for optimized_kprobe. */ +#define KPROBE_FLAG_FTRACE 8 /* probe is using ftrace */ /* Has this kprobe gone ? */ static inline int kprobe_gone(struct kprobe *p) @@ -146,6 +160,13 @@ static inline int kprobe_optimized(struct kprobe *p) { return p->flags & KPROBE_FLAG_OPTIMIZED; } + +/* Is this kprobe uses ftrace ? */ +static inline int kprobe_ftrace(struct kprobe *p) +{ + return p->flags & KPROBE_FLAG_FTRACE; +} + /* * Special probe type that uses setjmp-longjmp type tricks to resume * execution at a specified entry with a matching prototype corresponding @@ -295,6 +316,12 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table, #endif #endif /* CONFIG_OPTPROBES */ +#ifdef KPROBES_CAN_USE_FTRACE +extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs); +extern int arch_prepare_kprobe_ftrace(struct kprobe *p); +#endif + /* Get the kprobe at this addr (if any) - called with preemption disabled */ struct kprobe *get_kprobe(void *addr); diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 22ccf9dee17..8d816646f76 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -14,6 +14,11 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), kthread_create_on_node(threadfn, data, -1, namefmt, ##arg) +struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), + void *data, + unsigned int cpu, + const char *namefmt); + /** * kthread_run - create and wake a thread. * @threadfn: the function to run until signal_pending(current). @@ -34,9 +39,13 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void kthread_bind(struct task_struct *k, unsigned int cpu); int kthread_stop(struct task_struct *k); -int kthread_should_stop(void); +bool kthread_should_stop(void); +bool kthread_should_park(void); bool kthread_freezable_should_stop(bool *was_frozen); void *kthread_data(struct task_struct *k); +int kthread_park(struct task_struct *k); +void kthread_unpark(struct task_struct *k); +void kthread_parkme(void); int kthreadd(void *unused); extern struct task_struct *kthreadd_task; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b70b48b0109..8a59e0abe5f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -685,7 +685,7 @@ static inline int kvm_deassign_device(struct kvm *kvm, static inline void kvm_guest_enter(void) { BUG_ON(preemptible()); - account_system_vtime(current); + vtime_account(current); current->flags |= PF_VCPU; /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode @@ -699,7 +699,7 @@ static inline void kvm_guest_enter(void) static inline void kvm_guest_exit(void) { - account_system_vtime(current); + vtime_account(current); current->flags &= ~PF_VCPU; } diff --git a/include/linux/mfd/max77686.h b/include/linux/mfd/max77686.h index 3d7ae4d7fd3..46c0f320ed7 100644 --- a/include/linux/mfd/max77686.h +++ b/include/linux/mfd/max77686.h @@ -74,6 +74,7 @@ enum max77686_regulators { struct max77686_regulator_data { int id; struct regulator_init_data *initdata; + struct device_node *of_node; }; enum max77686_opmode { diff --git a/include/linux/mfd/max8998.h b/include/linux/mfd/max8998.h index f4f0dfa4698..6823548d0c0 100644 --- a/include/linux/mfd/max8998.h +++ b/include/linux/mfd/max8998.h @@ -67,7 +67,7 @@ struct max8998_regulator_data { /** * struct max8998_board - packages regulator init data * @regulators: array of defined regulators - * @num_regulators: number of regultors used + * @num_regulators: number of regulators used * @irq_base: base IRQ number for max8998, required for IRQs * @ono: power onoff IRQ number for max8998 * @buck_voltage_lock: Do NOT change the values of the following six diff --git a/include/linux/mfd/tps6586x.h b/include/linux/mfd/tps6586x.h index f350fd0ba1d..94514710a03 100644 --- a/include/linux/mfd/tps6586x.h +++ b/include/linux/mfd/tps6586x.h @@ -14,6 +14,7 @@ #define TPS6586X_SLEW_RATE_MASK 0x07 enum { + TPS6586X_ID_SYS, TPS6586X_ID_SM_0, TPS6586X_ID_SM_1, TPS6586X_ID_SM_2, diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 61f0905bdc4..de201203bc7 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -1,3 +1,15 @@ +/* + * include/linux/micrel_phy.h + * + * Micrel PHY IDs + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + #ifndef _MICREL_PHY_H #define _MICREL_PHY_H @@ -5,10 +17,11 @@ #define PHY_ID_KSZ9021 0x00221610 #define PHY_ID_KS8737 0x00221720 -#define PHY_ID_KS8041 0x00221510 -#define PHY_ID_KS8051 0x00221550 +#define PHY_ID_KSZ8021 0x00221555 +#define PHY_ID_KSZ8041 0x00221510 +#define PHY_ID_KSZ8051 0x00221550 /* both for ks8001 Rev. A/B, and for ks8721 Rev 3. */ -#define PHY_ID_KS8001 0x0022161A +#define PHY_ID_KSZ8001 0x0022161A /* struct phy_device dev_flags definitions */ #define MICREL_PHY_50MHZ_CLK 0x00000001 diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 9490a00529f..c25cccaa555 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -35,8 +35,10 @@ struct nvme_bar { __u64 acq; /* Admin CQ Base Address */ }; +#define NVME_CAP_MQES(cap) ((cap) & 0xffff) #define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff) #define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf) +#define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) enum { NVME_CC_ENABLE = 1 << 0, diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 33ed9d605f9..599afc4bb67 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -130,8 +130,10 @@ enum perf_event_sample_format { PERF_SAMPLE_STREAM_ID = 1U << 9, PERF_SAMPLE_RAW = 1U << 10, PERF_SAMPLE_BRANCH_STACK = 1U << 11, + PERF_SAMPLE_REGS_USER = 1U << 12, + PERF_SAMPLE_STACK_USER = 1U << 13, - PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 14, /* non-ABI */ }; /* @@ -163,6 +165,15 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_HV) /* + * Values to determine ABI of the registers dump. + */ +enum perf_sample_regs_abi { + PERF_SAMPLE_REGS_ABI_NONE = 0, + PERF_SAMPLE_REGS_ABI_32 = 1, + PERF_SAMPLE_REGS_ABI_64 = 2, +}; + +/* * The format of the data returned by read() on a perf event fd, * as specified by attr.read_format: * @@ -194,6 +205,8 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ #define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ #define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ +#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */ + /* add: sample_stack_user */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -255,7 +268,10 @@ struct perf_event_attr { exclude_host : 1, /* don't count in host */ exclude_guest : 1, /* don't count in guest */ - __reserved_1 : 43; + exclude_callchain_kernel : 1, /* exclude kernel callchains */ + exclude_callchain_user : 1, /* exclude user callchains */ + + __reserved_1 : 41; union { __u32 wakeup_events; /* wakeup every n events */ @@ -271,9 +287,25 @@ struct perf_event_attr { __u64 bp_len; __u64 config2; /* extension of config1 */ }; - __u64 branch_sample_type; /* enum branch_sample_type */ + __u64 branch_sample_type; /* enum perf_branch_sample_type */ + + /* + * Defines set of user regs to dump on samples. + * See asm/perf_regs.h for details. + */ + __u64 sample_regs_user; + + /* + * Defines size of the user stack to dump on samples. + */ + __u32 sample_stack_user; + + /* Align to u64. */ + __u32 __reserved_2; }; +#define perf_flags(attr) (*(&(attr)->read_format + 1)) + /* * Ioctls that can be done on a perf event fd: */ @@ -548,6 +580,13 @@ enum perf_event_type { * char data[size];}&& PERF_SAMPLE_RAW * * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * + * { u64 abi; # enum perf_sample_regs_abi + * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER + * + * { u64 size; + * char data[size]; + * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * }; */ PERF_RECORD_SAMPLE = 9, @@ -609,6 +648,7 @@ struct perf_guest_info_callbacks { #include <linux/static_key.h> #include <linux/atomic.h> #include <linux/sysfs.h> +#include <linux/perf_regs.h> #include <asm/local.h> struct perf_callchain_entry { @@ -654,6 +694,11 @@ struct perf_branch_stack { struct perf_branch_entry entries[0]; }; +struct perf_regs_user { + __u64 abi; + struct pt_regs *regs; +}; + struct task_struct; /* @@ -1133,6 +1178,8 @@ struct perf_sample_data { struct perf_callchain_entry *callchain; struct perf_raw_record *raw; struct perf_branch_stack *br_stack; + struct perf_regs_user regs_user; + u64 stack_user_size; }; static inline void perf_sample_data_init(struct perf_sample_data *data, @@ -1142,7 +1189,10 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->addr = addr; data->raw = NULL; data->br_stack = NULL; - data->period = period; + data->period = period; + data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE; + data->regs_user.regs = NULL; + data->stack_user_size = 0; } extern void perf_output_sample(struct perf_output_handle *handle, @@ -1290,8 +1340,10 @@ static inline bool has_branch_stack(struct perf_event *event) extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size); extern void perf_output_end(struct perf_output_handle *handle); -extern void perf_output_copy(struct perf_output_handle *handle, +extern unsigned int perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); +extern unsigned int perf_output_skip(struct perf_output_handle *handle, + unsigned int len); extern int perf_swevent_get_recursion_context(void); extern void perf_swevent_put_recursion_context(int rctx); extern void perf_event_enable(struct perf_event *event); diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h new file mode 100644 index 00000000000..3c73d5fe18b --- /dev/null +++ b/include/linux/perf_regs.h @@ -0,0 +1,25 @@ +#ifndef _LINUX_PERF_REGS_H +#define _LINUX_PERF_REGS_H + +#ifdef CONFIG_HAVE_PERF_REGS +#include <asm/perf_regs.h> +u64 perf_reg_value(struct pt_regs *regs, int idx); +int perf_reg_validate(u64 mask); +u64 perf_reg_abi(struct task_struct *task); +#else +static inline u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + return 0; +} + +static inline int perf_reg_validate(u64 mask) +{ + return mask ? -ENOSYS : 0; +} + +static inline u64 perf_reg_abi(struct task_struct *task) +{ + return PERF_SAMPLE_REGS_ABI_NONE; +} +#endif /* CONFIG_HAVE_PERF_REGS */ +#endif /* _LINUX_PERF_REGS_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 115ead2b515..7c968e4f929 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -191,6 +191,21 @@ extern void rcu_idle_enter(void); extern void rcu_idle_exit(void); extern void rcu_irq_enter(void); extern void rcu_irq_exit(void); + +#ifdef CONFIG_RCU_USER_QS +extern void rcu_user_enter(void); +extern void rcu_user_exit(void); +extern void rcu_user_enter_after_irq(void); +extern void rcu_user_exit_after_irq(void); +extern void rcu_user_hooks_switch(struct task_struct *prev, + struct task_struct *next); +#else +static inline void rcu_user_enter(void) { } +static inline void rcu_user_exit(void) { } +static inline void rcu_user_enter_after_irq(void) { } +static inline void rcu_user_exit_after_irq(void) { } +#endif /* CONFIG_RCU_USER_QS */ + extern void exit_rcu(void); /** @@ -210,14 +225,12 @@ extern void exit_rcu(void); * to nest RCU_NONIDLE() wrappers, but the nesting level is currently * quite limited. If deeper nesting is required, it will be necessary * to adjust DYNTICK_TASK_NESTING_VALUE accordingly. - * - * This macro may be used from process-level code only. */ #define RCU_NONIDLE(a) \ do { \ - rcu_idle_exit(); \ + rcu_irq_enter(); \ do { a; } while (0); \ - rcu_idle_enter(); \ + rcu_irq_exit(); \ } while (0) /* diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 7f7e00df3ad..e3bcc3f4dcb 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -285,6 +285,7 @@ struct regmap_irq { * @ack_base: Base ack address. If zero then the chip is clear on read. * @wake_base: Base address for wake enables. If zero unsupported. * @irq_reg_stride: Stride to use for chips where registers are not contiguous. + * @runtime_pm: Hold a runtime PM lock on the device when accessing it. * * @num_regs: Number of registers in each control bank. * @irqs: Descriptors for individual IRQs. Interrupt numbers are @@ -299,6 +300,8 @@ struct regmap_irq_chip { unsigned int ack_base; unsigned int wake_base; unsigned int irq_reg_stride; + unsigned int mask_invert; + bool runtime_pm; int num_regs; diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index da339fd8c75..c43cd3556b1 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -177,6 +177,8 @@ int regulator_set_mode(struct regulator *regulator, unsigned int mode); unsigned int regulator_get_mode(struct regulator *regulator); int regulator_set_optimum_mode(struct regulator *regulator, int load_uA); +int regulator_allow_bypass(struct regulator *regulator, bool allow); + /* regulator notifier block */ int regulator_register_notifier(struct regulator *regulator, struct notifier_block *nb); @@ -328,6 +330,12 @@ static inline int regulator_set_optimum_mode(struct regulator *regulator, return REGULATOR_MODE_NORMAL; } +static inline int regulator_allow_bypass(struct regulator *regulator, + bool allow) +{ + return 0; +} + static inline int regulator_register_notifier(struct regulator *regulator, struct notifier_block *nb) { @@ -352,4 +360,11 @@ static inline void regulator_set_drvdata(struct regulator *regulator, #endif +static inline int regulator_set_voltage_tol(struct regulator *regulator, + int new_uV, int tol_uV) +{ + return regulator_set_voltage(regulator, + new_uV - tol_uV, new_uV + tol_uV); +} + #endif diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index bac4c871f3b..7932a3bf21b 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -32,6 +32,8 @@ enum regulator_status { REGULATOR_STATUS_NORMAL, REGULATOR_STATUS_IDLE, REGULATOR_STATUS_STANDBY, + /* The regulator is enabled but not regulating */ + REGULATOR_STATUS_BYPASS, /* in case that any other status doesn't apply */ REGULATOR_STATUS_UNDEFINED, }; @@ -58,6 +60,7 @@ enum regulator_status { * regulator_desc.n_voltages. Voltages may be reported in any order. * * @set_current_limit: Configure a limit for a current-limited regulator. + * The driver should select the current closest to max_uA. * @get_current_limit: Get the configured limit for a current-limited regulator. * * @set_mode: Set the configured operating mode for the regulator. @@ -67,6 +70,9 @@ enum regulator_status { * @get_optimum_mode: Get the most efficient operating mode for the regulator * when running with the specified parameters. * + * @set_bypass: Set the regulator in bypass mode. + * @get_bypass: Get the regulator bypass mode state. + * * @enable_time: Time taken for the regulator voltage output voltage to * stabilise after being enabled, in microseconds. * @set_ramp_delay: Set the ramp delay for the regulator. The driver should @@ -133,6 +139,10 @@ struct regulator_ops { unsigned int (*get_optimum_mode) (struct regulator_dev *, int input_uV, int output_uV, int load_uA); + /* control and report on bypass mode */ + int (*set_bypass)(struct regulator_dev *dev, bool enable); + int (*get_bypass)(struct regulator_dev *dev, bool *enable); + /* the operations below are for configuration of regulator state when * its parent PMIC enters a global STANDBY/HIBERNATE state */ @@ -205,6 +215,8 @@ struct regulator_desc { unsigned int vsel_mask; unsigned int enable_reg; unsigned int enable_mask; + unsigned int bypass_reg; + unsigned int bypass_mask; unsigned int enable_time; }; @@ -221,7 +233,8 @@ struct regulator_desc { * @driver_data: private regulator data * @of_node: OpenFirmware node to parse for device tree bindings (may be * NULL). - * @regmap: regmap to use for core regmap helpers + * @regmap: regmap to use for core regmap helpers if dev_get_regulator() is + * insufficient. * @ena_gpio: GPIO controlling regulator enable. * @ena_gpio_invert: Sense for GPIO enable control. * @ena_gpio_flags: Flags to use when calling gpio_request_one() @@ -253,6 +266,7 @@ struct regulator_dev { int exclusive; u32 use_count; u32 open_count; + u32 bypass_count; /* lists we belong to */ struct list_head list; /* list of all regulators */ @@ -310,6 +324,8 @@ int regulator_disable_regmap(struct regulator_dev *rdev); int regulator_set_voltage_time_sel(struct regulator_dev *rdev, unsigned int old_selector, unsigned int new_selector); +int regulator_set_bypass_regmap(struct regulator_dev *rdev, bool enable); +int regulator_get_bypass_regmap(struct regulator_dev *rdev, bool *enable); void *regulator_get_init_drvdata(struct regulator_init_data *reg_init_data); diff --git a/include/linux/regulator/fan53555.h b/include/linux/regulator/fan53555.h new file mode 100644 index 00000000000..5c45c85d52c --- /dev/null +++ b/include/linux/regulator/fan53555.h @@ -0,0 +1,60 @@ +/* + * fan53555.h - Fairchild Regulator FAN53555 Driver + * + * Copyright (C) 2012 Marvell Technology Ltd. + * Yunfan Zhang <yfzhang@marvell.com> + * + * This package is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef __FAN53555_H__ + +/* VSEL ID */ +enum { + FAN53555_VSEL_ID_0 = 0, + FAN53555_VSEL_ID_1, +}; + +/* Transition slew rate limiting from a low to high voltage. + * ----------------------- + * Bin |Slew Rate(mV/uS) + * ------|---------------- + * 000 | 64.00 + * ------|---------------- + * 001 | 32.00 + * ------|---------------- + * 010 | 16.00 + * ------|---------------- + * 011 | 8.00 + * ------|---------------- + * 100 | 4.00 + * ------|---------------- + * 101 | 2.00 + * ------|---------------- + * 110 | 1.00 + * ------|---------------- + * 111 | 0.50 + * ----------------------- + */ +enum { + FAN53555_SLEW_RATE_64MV = 0, + FAN53555_SLEW_RATE_32MV, + FAN53555_SLEW_RATE_16MV, + FAN53555_SLEW_RATE_8MV, + FAN53555_SLEW_RATE_4MV, + FAN53555_SLEW_RATE_2MV, + FAN53555_SLEW_RATE_1MV, + FAN53555_SLEW_RATE_0_5MV, +}; + +struct fan53555_platform_data { + struct regulator_init_data *regulator; + unsigned int slew_rate; + /* Sleep VSEL ID */ + unsigned int sleep_vsel_id; +}; + +#endif /* __FAN53555_H__ */ diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 40dd0a394cf..36adbc82de6 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -32,6 +32,7 @@ struct regulator; * board/machine. * STATUS: Regulator can be enabled and disabled. * DRMS: Dynamic Regulator Mode Switching is enabled for this regulator. + * BYPASS: Regulator can be put into bypass mode */ #define REGULATOR_CHANGE_VOLTAGE 0x1 @@ -39,6 +40,7 @@ struct regulator; #define REGULATOR_CHANGE_MODE 0x4 #define REGULATOR_CHANGE_STATUS 0x8 #define REGULATOR_CHANGE_DRMS 0x10 +#define REGULATOR_CHANGE_BYPASS 0x20 /** * struct regulator_state - regulator state during low power system states diff --git a/include/linux/sched.h b/include/linux/sched.h index 23bddac4bad..765dffbb085 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -273,11 +273,11 @@ extern void init_idle_bootup_task(struct task_struct *idle); extern int runqueue_is_locked(int cpu); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) -extern void select_nohz_load_balancer(int stop_tick); +extern void nohz_balance_enter_idle(int cpu); extern void set_cpu_sd_state_idle(void); extern int get_nohz_timer_target(void); #else -static inline void select_nohz_load_balancer(int stop_tick) { } +static inline void nohz_balance_enter_idle(int cpu) { } static inline void set_cpu_sd_state_idle(void) { } #endif @@ -446,6 +446,9 @@ extern int get_dumpable(struct mm_struct *mm); #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ #define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ +#define MMF_HAS_UPROBES 19 /* has uprobes */ +#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ + #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) struct sighand_struct { @@ -678,11 +681,6 @@ struct signal_struct { * (notably. ptrace) */ }; -/* Context switch must be unlocked if interrupts are to be enabled */ -#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW -# define __ARCH_WANT_UNLOCKED_CTXSW -#endif - /* * Bits in flags field of signal_struct. */ @@ -860,7 +858,6 @@ enum cpu_idle_type { #define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ #define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ -#define SD_PREFER_LOCAL 0x0040 /* Prefer to keep tasks local to this domain */ #define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */ #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ @@ -1885,6 +1882,14 @@ static inline void rcu_copy_process(struct task_struct *p) #endif +static inline void rcu_switch(struct task_struct *prev, + struct task_struct *next) +{ +#ifdef CONFIG_RCU_USER_QS + rcu_user_hooks_switch(prev, next); +#endif +} + static inline void tsk_restore_flags(struct task_struct *task, unsigned long orig_flags, unsigned long flags) { diff --git a/include/linux/security.h b/include/linux/security.h index 3dea6a9d568..d143b8e0195 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -118,6 +118,7 @@ void reset_security_ops(void); extern unsigned long mmap_min_addr; extern unsigned long dac_mmap_min_addr; #else +#define mmap_min_addr 0UL #define dac_mmap_min_addr 0UL #endif diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h new file mode 100644 index 00000000000..e0106d8581d --- /dev/null +++ b/include/linux/smpboot.h @@ -0,0 +1,43 @@ +#ifndef _LINUX_SMPBOOT_H +#define _LINUX_SMPBOOT_H + +#include <linux/types.h> + +struct task_struct; +/* Cookie handed to the thread_fn*/ +struct smpboot_thread_data; + +/** + * struct smp_hotplug_thread - CPU hotplug related thread descriptor + * @store: Pointer to per cpu storage for the task pointers + * @list: List head for core management + * @thread_should_run: Check whether the thread should run or not. Called with + * preemption disabled. + * @thread_fn: The associated thread function + * @setup: Optional setup function, called when the thread gets + * operational the first time + * @cleanup: Optional cleanup function, called when the thread + * should stop (module exit) + * @park: Optional park function, called when the thread is + * parked (cpu offline) + * @unpark: Optional unpark function, called when the thread is + * unparked (cpu online) + * @thread_comm: The base name of the thread + */ +struct smp_hotplug_thread { + struct task_struct __percpu **store; + struct list_head list; + int (*thread_should_run)(unsigned int cpu); + void (*thread_fn)(unsigned int cpu); + void (*setup)(unsigned int cpu); + void (*cleanup)(unsigned int cpu, bool online); + void (*park)(unsigned int cpu); + void (*unpark)(unsigned int cpu); + const char *thread_comm; +}; + +int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread); +void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread); +int smpboot_thread_schedule(void); + +#endif diff --git a/include/linux/task_work.h b/include/linux/task_work.h index fb46b03b185..ca5a1cf27da 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -18,8 +18,7 @@ void task_work_run(void); static inline void exit_task_work(struct task_struct *task) { - if (unlikely(task->task_works)) - task_work_run(); + task_work_run(); } #endif /* _LINUX_TASK_WORK_H */ diff --git a/include/linux/timer.h b/include/linux/timer.h index 6abd9138bed..8c5a197e158 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -49,147 +49,112 @@ extern struct tvec_base boot_tvec_bases; #endif /* - * Note that all tvec_bases are 2 byte aligned and lower bit of - * base in timer_list is guaranteed to be zero. Use the LSB to - * indicate whether the timer is deferrable. + * Note that all tvec_bases are at least 4 byte aligned and lower two bits + * of base in timer_list is guaranteed to be zero. Use them for flags. * * A deferrable timer will work normally when the system is busy, but * will not cause a CPU to come out of idle just to service it; instead, * the timer will be serviced when the CPU eventually wakes up with a * subsequent non-deferrable timer. + * + * An irqsafe timer is executed with IRQ disabled and it's safe to wait for + * the completion of the running instance from IRQ handlers, for example, + * by calling del_timer_sync(). + * + * Note: The irq disabled callback execution is a special case for + * workqueue locking issues. It's not meant for executing random crap + * with interrupts disabled. Abuse is monitored! */ -#define TBASE_DEFERRABLE_FLAG (0x1) +#define TIMER_DEFERRABLE 0x1LU +#define TIMER_IRQSAFE 0x2LU -#define TIMER_INITIALIZER(_function, _expires, _data) { \ +#define TIMER_FLAG_MASK 0x3LU + +#define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \ .entry = { .prev = TIMER_ENTRY_STATIC }, \ .function = (_function), \ .expires = (_expires), \ .data = (_data), \ - .base = &boot_tvec_bases, \ + .base = (void *)((unsigned long)&boot_tvec_bases + (_flags)), \ .slack = -1, \ __TIMER_LOCKDEP_MAP_INITIALIZER( \ __FILE__ ":" __stringify(__LINE__)) \ } -#define TBASE_MAKE_DEFERRED(ptr) ((struct tvec_base *) \ - ((unsigned char *)(ptr) + TBASE_DEFERRABLE_FLAG)) +#define TIMER_INITIALIZER(_function, _expires, _data) \ + __TIMER_INITIALIZER((_function), (_expires), (_data), 0) -#define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data) {\ - .entry = { .prev = TIMER_ENTRY_STATIC }, \ - .function = (_function), \ - .expires = (_expires), \ - .data = (_data), \ - .base = TBASE_MAKE_DEFERRED(&boot_tvec_bases), \ - __TIMER_LOCKDEP_MAP_INITIALIZER( \ - __FILE__ ":" __stringify(__LINE__)) \ - } +#define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data) \ + __TIMER_INITIALIZER((_function), (_expires), (_data), TIMER_DEFERRABLE) #define DEFINE_TIMER(_name, _function, _expires, _data) \ struct timer_list _name = \ TIMER_INITIALIZER(_function, _expires, _data) -void init_timer_key(struct timer_list *timer, - const char *name, - struct lock_class_key *key); -void init_timer_deferrable_key(struct timer_list *timer, - const char *name, - struct lock_class_key *key); +void init_timer_key(struct timer_list *timer, unsigned int flags, + const char *name, struct lock_class_key *key); + +#ifdef CONFIG_DEBUG_OBJECTS_TIMERS +extern void init_timer_on_stack_key(struct timer_list *timer, + unsigned int flags, const char *name, + struct lock_class_key *key); +extern void destroy_timer_on_stack(struct timer_list *timer); +#else +static inline void destroy_timer_on_stack(struct timer_list *timer) { } +static inline void init_timer_on_stack_key(struct timer_list *timer, + unsigned int flags, const char *name, + struct lock_class_key *key) +{ + init_timer_key(timer, flags, name, key); +} +#endif #ifdef CONFIG_LOCKDEP -#define init_timer(timer) \ +#define __init_timer(_timer, _flags) \ do { \ static struct lock_class_key __key; \ - init_timer_key((timer), #timer, &__key); \ + init_timer_key((_timer), (_flags), #_timer, &__key); \ } while (0) -#define init_timer_deferrable(timer) \ +#define __init_timer_on_stack(_timer, _flags) \ do { \ static struct lock_class_key __key; \ - init_timer_deferrable_key((timer), #timer, &__key); \ + init_timer_on_stack_key((_timer), (_flags), #_timer, &__key); \ } while (0) +#else +#define __init_timer(_timer, _flags) \ + init_timer_key((_timer), (_flags), NULL, NULL) +#define __init_timer_on_stack(_timer, _flags) \ + init_timer_on_stack_key((_timer), (_flags), NULL, NULL) +#endif +#define init_timer(timer) \ + __init_timer((timer), 0) +#define init_timer_deferrable(timer) \ + __init_timer((timer), TIMER_DEFERRABLE) #define init_timer_on_stack(timer) \ + __init_timer_on_stack((timer), 0) + +#define __setup_timer(_timer, _fn, _data, _flags) \ do { \ - static struct lock_class_key __key; \ - init_timer_on_stack_key((timer), #timer, &__key); \ + __init_timer((_timer), (_flags)); \ + (_timer)->function = (_fn); \ + (_timer)->data = (_data); \ } while (0) -#define setup_timer(timer, fn, data) \ +#define __setup_timer_on_stack(_timer, _fn, _data, _flags) \ do { \ - static struct lock_class_key __key; \ - setup_timer_key((timer), #timer, &__key, (fn), (data));\ + __init_timer_on_stack((_timer), (_flags)); \ + (_timer)->function = (_fn); \ + (_timer)->data = (_data); \ } while (0) +#define setup_timer(timer, fn, data) \ + __setup_timer((timer), (fn), (data), 0) #define setup_timer_on_stack(timer, fn, data) \ - do { \ - static struct lock_class_key __key; \ - setup_timer_on_stack_key((timer), #timer, &__key, \ - (fn), (data)); \ - } while (0) + __setup_timer_on_stack((timer), (fn), (data), 0) #define setup_deferrable_timer_on_stack(timer, fn, data) \ - do { \ - static struct lock_class_key __key; \ - setup_deferrable_timer_on_stack_key((timer), #timer, \ - &__key, (fn), \ - (data)); \ - } while (0) -#else -#define init_timer(timer)\ - init_timer_key((timer), NULL, NULL) -#define init_timer_deferrable(timer)\ - init_timer_deferrable_key((timer), NULL, NULL) -#define init_timer_on_stack(timer)\ - init_timer_on_stack_key((timer), NULL, NULL) -#define setup_timer(timer, fn, data)\ - setup_timer_key((timer), NULL, NULL, (fn), (data)) -#define setup_timer_on_stack(timer, fn, data)\ - setup_timer_on_stack_key((timer), NULL, NULL, (fn), (data)) -#define setup_deferrable_timer_on_stack(timer, fn, data)\ - setup_deferrable_timer_on_stack_key((timer), NULL, NULL, (fn), (data)) -#endif - -#ifdef CONFIG_DEBUG_OBJECTS_TIMERS -extern void init_timer_on_stack_key(struct timer_list *timer, - const char *name, - struct lock_class_key *key); -extern void destroy_timer_on_stack(struct timer_list *timer); -#else -static inline void destroy_timer_on_stack(struct timer_list *timer) { } -static inline void init_timer_on_stack_key(struct timer_list *timer, - const char *name, - struct lock_class_key *key) -{ - init_timer_key(timer, name, key); -} -#endif - -static inline void setup_timer_key(struct timer_list * timer, - const char *name, - struct lock_class_key *key, - void (*function)(unsigned long), - unsigned long data) -{ - timer->function = function; - timer->data = data; - init_timer_key(timer, name, key); -} - -static inline void setup_timer_on_stack_key(struct timer_list *timer, - const char *name, - struct lock_class_key *key, - void (*function)(unsigned long), - unsigned long data) -{ - timer->function = function; - timer->data = data; - init_timer_on_stack_key(timer, name, key); -} - -extern void setup_deferrable_timer_on_stack_key(struct timer_list *timer, - const char *name, - struct lock_class_key *key, - void (*function)(unsigned long), - unsigned long data); + __setup_timer_on_stack((timer), (fn), (data), TIMER_DEFERRABLE) /** * timer_pending - is a timer pending? diff --git a/include/linux/topology.h b/include/linux/topology.h index fec12d66721..d3cf0d6e771 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -129,7 +129,6 @@ int arch_update_cpu_topology(void); | 1*SD_BALANCE_FORK \ | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ - | 0*SD_PREFER_LOCAL \ | 0*SD_SHARE_CPUPOWER \ | 1*SD_SHARE_PKG_RESOURCES \ | 0*SD_SERIALIZE \ @@ -160,7 +159,6 @@ int arch_update_cpu_topology(void); | 1*SD_BALANCE_FORK \ | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ - | 0*SD_PREFER_LOCAL \ | 0*SD_SHARE_CPUPOWER \ | 0*SD_SHARE_PKG_RESOURCES \ | 0*SD_SERIALIZE \ diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 802de56c41e..2f322c38bd4 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -136,6 +136,22 @@ static inline void tracepoint_synchronize_unregister(void) postrcu; \ } while (0) +#ifndef MODULE +#define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) \ + static inline void trace_##name##_rcuidle(proto) \ + { \ + if (static_key_false(&__tracepoint_##name.key)) \ + __DO_TRACE(&__tracepoint_##name, \ + TP_PROTO(data_proto), \ + TP_ARGS(data_args), \ + TP_CONDITION(cond), \ + rcu_idle_exit(), \ + rcu_idle_enter()); \ + } +#else +#define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) +#endif + /* * Make sure the alignment of the structure in the __tracepoints section will * not add unwanted padding between the beginning of the section and the @@ -151,16 +167,8 @@ static inline void tracepoint_synchronize_unregister(void) TP_ARGS(data_args), \ TP_CONDITION(cond),,); \ } \ - static inline void trace_##name##_rcuidle(proto) \ - { \ - if (static_key_false(&__tracepoint_##name.key)) \ - __DO_TRACE(&__tracepoint_##name, \ - TP_PROTO(data_proto), \ - TP_ARGS(data_args), \ - TP_CONDITION(cond), \ - rcu_idle_exit(), \ - rcu_idle_enter()); \ - } \ + __DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \ + PARAMS(cond), PARAMS(data_proto), PARAMS(data_args)) \ static inline int \ register_trace_##name(void (*probe)(data_proto), void *data) \ { \ diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index efe4b3308c7..e6f0331e3d4 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -99,25 +99,27 @@ struct xol_area { struct uprobes_state { struct xol_area *xol_area; - atomic_t count; }; + extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); -extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr, bool verify); +extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_mmap(struct vm_area_struct *vma); extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); +extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); extern void uprobe_free_utask(struct task_struct *t); extern void uprobe_copy_process(struct task_struct *t); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); +extern void __weak arch_uprobe_enable_step(struct arch_uprobe *arch); +extern void __weak arch_uprobe_disable_step(struct arch_uprobe *arch); extern int uprobe_post_sstep_notifier(struct pt_regs *regs); extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); extern void uprobe_notify_resume(struct pt_regs *regs); extern bool uprobe_deny_signal(void); extern bool __weak arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); extern void uprobe_clear_state(struct mm_struct *mm); -extern void uprobe_reset_state(struct mm_struct *mm); #else /* !CONFIG_UPROBES */ struct uprobes_state { }; @@ -138,6 +140,10 @@ static inline void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { } +static inline void +uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) +{ +} static inline void uprobe_notify_resume(struct pt_regs *regs) { } @@ -158,8 +164,5 @@ static inline void uprobe_copy_process(struct task_struct *t) static inline void uprobe_clear_state(struct mm_struct *mm) { } -static inline void uprobe_reset_state(struct mm_struct *mm) -{ -} #endif /* !CONFIG_UPROBES */ #endif /* _LINUX_UPROBES_H */ diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 22e61fdf75a..28e493b5b94 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -84,6 +84,8 @@ struct xfrm_replay_state { __u32 bitmap; }; +#define XFRMA_REPLAY_ESN_MAX 4096 + struct xfrm_replay_state_esn { unsigned int bmp_len; __u32 oseq; diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 0fedbd8d747..9fc7114159e 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -111,9 +111,8 @@ struct rt6_info { struct inet6_dev *rt6i_idev; unsigned long _rt6i_peer; -#ifdef CONFIG_XFRM - u32 rt6i_flow_cache_genid; -#endif + u32 rt6i_genid; + /* more non-fragment space at head required */ unsigned short rt6i_nfheader_len; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index ae1cd6c9ba5..fd87963a0ea 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -102,6 +102,7 @@ struct net { #endif struct netns_ipvs *ipvs; struct sock *diag_nlsk; + atomic_t rt_genid; }; @@ -300,5 +301,14 @@ static inline void unregister_net_sysctl_table(struct ctl_table_header *header) } #endif +static inline int rt_genid(struct net *net) +{ + return atomic_read(&net->rt_genid); +} + +static inline void rt_genid_bump(struct net *net) +{ + atomic_inc(&net->rt_genid); +} #endif /* __NET_NET_NAMESPACE_H */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 1474dd65c66..eb24dbccd81 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -65,7 +65,6 @@ struct netns_ipv4 { unsigned int sysctl_ping_group_range[2]; long sysctl_tcp_mem[3]; - atomic_t rt_genid; atomic_t dev_addr_genid; #ifdef CONFIG_IP_MROUTE diff --git a/include/net/route.h b/include/net/route.h index 776a27f1ab7..da22243d276 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -108,7 +108,7 @@ extern struct ip_rt_acct __percpu *ip_rt_acct; struct in_device; extern int ip_rt_init(void); -extern void rt_cache_flush(struct net *net, int how); +extern void rt_cache_flush(struct net *net); extern void rt_flush_dev(struct net_device *dev); extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index b0b4eb24d59..1905ca8dd39 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -1,5 +1,5 @@ /* - * Trace files that want to automate creationg of all tracepoints defined + * Trace files that want to automate creation of all tracepoints defined * in their file should include this file. The following are macros that the * trace file may define: * diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 5f889f16b0c..08fa27244da 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -214,7 +214,7 @@ TRACE_EVENT(mm_page_alloc, TP_printk("page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s", __entry->page, - page_to_pfn(__entry->page), + __entry->page ? page_to_pfn(__entry->page) : 0, __entry->order, __entry->migratetype, show_gfp_flags(__entry->gfp_flags)) @@ -240,7 +240,7 @@ DECLARE_EVENT_CLASS(mm_page, TP_printk("page=%p pfn=%lu order=%u migratetype=%d percpu_refill=%d", __entry->page, - page_to_pfn(__entry->page), + __entry->page ? page_to_pfn(__entry->page) : 0, __entry->order, __entry->migratetype, __entry->order == 0) diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 11e27c3af3c..f19fff8650e 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -187,6 +187,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, - struct page **pages, unsigned int count, bool clear_pte); + struct gnttab_map_grant_ref *kunmap_ops, + struct page **pages, unsigned int count); #endif /* __ASM_GNTTAB_H__ */ diff --git a/init/Kconfig b/init/Kconfig index af6c7f8ba01..3466a6e017b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -267,6 +267,106 @@ config POSIX_MQUEUE_SYSCTL depends on SYSCTL default y +config FHANDLE + bool "open by fhandle syscalls" + select EXPORTFS + help + If you say Y here, a user level program will be able to map + file names to handle and then later use the handle for + different file system operations. This is useful in implementing + userspace file servers, which now track files using handles instead + of names. The handle would remain the same even if file names + get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2) + syscalls. + +config AUDIT + bool "Auditing support" + depends on NET + help + Enable auditing infrastructure that can be used with another + kernel subsystem, such as SELinux (which requires this for + logging of avc messages output). Does not do system-call + auditing without CONFIG_AUDITSYSCALL. + +config AUDITSYSCALL + bool "Enable system-call auditing support" + depends on AUDIT && (X86 || PPC || S390 || IA64 || UML || SPARC64 || SUPERH || (ARM && AEABI && !OABI_COMPAT)) + default y if SECURITY_SELINUX + help + Enable low-overhead system-call auditing infrastructure that + can be used independently or with another kernel subsystem, + such as SELinux. + +config AUDIT_WATCH + def_bool y + depends on AUDITSYSCALL + select FSNOTIFY + +config AUDIT_TREE + def_bool y + depends on AUDITSYSCALL + select FSNOTIFY + +config AUDIT_LOGINUID_IMMUTABLE + bool "Make audit loginuid immutable" + depends on AUDIT + help + The config option toggles if a task setting its loginuid requires + CAP_SYS_AUDITCONTROL or if that task should require no special permissions + but should instead only allow setting its loginuid if it was never + previously set. On systems which use systemd or a similar central + process to restart login services this should be set to true. On older + systems in which an admin would typically have to directly stop and + start processes this should be set to false. Setting this to true allows + one to drop potentially dangerous capabilites from the login tasks, + but may not be backwards compatible with older init systems. + +source "kernel/irq/Kconfig" +source "kernel/time/Kconfig" + +menu "CPU/Task time and stats accounting" + +choice + prompt "Cputime accounting" + default TICK_CPU_ACCOUNTING if !PPC64 + default VIRT_CPU_ACCOUNTING if PPC64 + +# Kind of a stub config for the pure tick based cputime accounting +config TICK_CPU_ACCOUNTING + bool "Simple tick based cputime accounting" + depends on !S390 + help + This is the basic tick based cputime accounting that maintains + statistics about user, system and idle time spent on per jiffies + granularity. + + If unsure, say Y. + +config VIRT_CPU_ACCOUNTING + bool "Deterministic task and CPU time accounting" + depends on HAVE_VIRT_CPU_ACCOUNTING + help + Select this option to enable more accurate task and CPU time + accounting. This is done by reading a CPU counter on each + kernel entry and exit and on transitions within the kernel + between system, softirq and hardirq state, so there is a + small performance impact. In the case of s390 or IBM POWER > 5, + this also enables accounting of stolen time on logically-partitioned + systems. + +config IRQ_TIME_ACCOUNTING + bool "Fine granularity task level IRQ time accounting" + depends on HAVE_IRQ_TIME_ACCOUNTING + help + Select this option to enable fine granularity task irq time + accounting. This is done by reading a timestamp on each + transitions between softirq and hardirq state, so there can be a + small performance impact. + + If in doubt, say N here. + +endchoice + config BSD_PROCESS_ACCT bool "BSD Process Accounting" help @@ -292,18 +392,6 @@ config BSD_PROCESS_ACCT_V3 for processing it. A preliminary version of these tools is available at <http://www.gnu.org/software/acct/>. -config FHANDLE - bool "open by fhandle syscalls" - select EXPORTFS - help - If you say Y here, a user level program will be able to map - file names to handle and then later use the handle for - different file system operations. This is useful in implementing - userspace file servers, which now track files using handles instead - of names. The handle would remain the same even if file names - get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2) - syscalls. - config TASKSTATS bool "Export task/process statistics through netlink (EXPERIMENTAL)" depends on NET @@ -346,50 +434,7 @@ config TASK_IO_ACCOUNTING Say N if unsure. -config AUDIT - bool "Auditing support" - depends on NET - help - Enable auditing infrastructure that can be used with another - kernel subsystem, such as SELinux (which requires this for - logging of avc messages output). Does not do system-call - auditing without CONFIG_AUDITSYSCALL. - -config AUDITSYSCALL - bool "Enable system-call auditing support" - depends on AUDIT && (X86 || PPC || S390 || IA64 || UML || SPARC64 || SUPERH || (ARM && AEABI && !OABI_COMPAT)) - default y if SECURITY_SELINUX - help - Enable low-overhead system-call auditing infrastructure that - can be used independently or with another kernel subsystem, - such as SELinux. - -config AUDIT_WATCH - def_bool y - depends on AUDITSYSCALL - select FSNOTIFY - -config AUDIT_TREE - def_bool y - depends on AUDITSYSCALL - select FSNOTIFY - -config AUDIT_LOGINUID_IMMUTABLE - bool "Make audit loginuid immutable" - depends on AUDIT - help - The config option toggles if a task setting its loginuid requires - CAP_SYS_AUDITCONTROL or if that task should require no special permissions - but should instead only allow setting its loginuid if it was never - previously set. On systems which use systemd or a similar central - process to restart login services this should be set to true. On older - systems in which an admin would typically have to directly stop and - start processes this should be set to false. Setting this to true allows - one to drop potentially dangerous capabilites from the login tasks, - but may not be backwards compatible with older init systems. - -source "kernel/irq/Kconfig" -source "kernel/time/Kconfig" +endmenu # "CPU/Task time and stats accounting" menu "RCU Subsystem" @@ -441,6 +486,24 @@ config PREEMPT_RCU This option enables preemptible-RCU code that is common between the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. +config RCU_USER_QS + bool "Consider userspace as in RCU extended quiescent state" + depends on HAVE_RCU_USER_QS && SMP + help + This option sets hooks on kernel / userspace boundaries and + puts RCU in extended quiescent state when the CPU runs in + userspace. It means that when a CPU runs in userspace, it is + excluded from the global RCU state machine and thus doesn't + to keep the timer tick on for RCU. + +config RCU_USER_QS_FORCE + bool "Force userspace extended QS by default" + depends on RCU_USER_QS + help + Set the hooks in user/kernel boundaries by default in order to + test this feature that treats userspace as an extended quiescent + state until we have a real user like a full adaptive nohz option. + config RCU_FANOUT int "Tree-based hierarchical RCU fanout value" range 2 64 if 64BIT diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 2251882daf5..44511d100ea 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -87,6 +87,9 @@ config ARCH_INLINE_WRITE_UNLOCK_IRQ config ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE bool +config UNINLINE_SPIN_UNLOCK + bool + # # lock_* functions are inlined when: # - DEBUG_SPINLOCK=n and GENERIC_LOCKBREAK=n and ARCH_INLINE_*LOCK=y @@ -103,100 +106,120 @@ config ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE # - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y # +if !DEBUG_SPINLOCK + config INLINE_SPIN_TRYLOCK - def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_TRYLOCK + def_bool y + depends on ARCH_INLINE_SPIN_TRYLOCK config INLINE_SPIN_TRYLOCK_BH - def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_TRYLOCK_BH + def_bool y + depends on ARCH_INLINE_SPIN_TRYLOCK_BH config INLINE_SPIN_LOCK - def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_SPIN_LOCK + def_bool y + depends on !GENERIC_LOCKBREAK && ARCH_INLINE_SPIN_LOCK config INLINE_SPIN_LOCK_BH - def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ - ARCH_INLINE_SPIN_LOCK_BH + def_bool y + depends on !GENERIC_LOCKBREAK && ARCH_INLINE_SPIN_LOCK_BH config INLINE_SPIN_LOCK_IRQ - def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ - ARCH_INLINE_SPIN_LOCK_IRQ + def_bool y + depends on !GENERIC_LOCKBREAK && ARCH_INLINE_SPIN_LOCK_IRQ config INLINE_SPIN_LOCK_IRQSAVE - def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ - ARCH_INLINE_SPIN_LOCK_IRQSAVE - -config UNINLINE_SPIN_UNLOCK - bool + def_bool y + depends on !GENERIC_LOCKBREAK && ARCH_INLINE_SPIN_LOCK_IRQSAVE config INLINE_SPIN_UNLOCK_BH - def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_BH + def_bool y + depends on ARCH_INLINE_SPIN_UNLOCK_BH config INLINE_SPIN_UNLOCK_IRQ - def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK_BH) + def_bool y + depends on !PREEMPT || ARCH_INLINE_SPIN_UNLOCK_BH config INLINE_SPIN_UNLOCK_IRQRESTORE - def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE + def_bool y + depends on ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE config INLINE_READ_TRYLOCK - def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_TRYLOCK + def_bool y + depends on ARCH_INLINE_READ_TRYLOCK config INLINE_READ_LOCK - def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_READ_LOCK + def_bool y + depends on !GENERIC_LOCKBREAK && ARCH_INLINE_READ_LOCK config INLINE_READ_LOCK_BH - def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ - ARCH_INLINE_READ_LOCK_BH + def_bool y + depends on !GENERIC_LOCKBREAK && ARCH_INLINE_READ_LOCK_BH config INLINE_READ_LOCK_IRQ - def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ - ARCH_INLINE_READ_LOCK_IRQ + def_bool y + depends on !GENERIC_LOCKBREAK && ARCH_INLINE_READ_LOCK_IRQ config INLINE_READ_LOCK_IRQSAVE - def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ - ARCH_INLINE_READ_LOCK_IRQSAVE + def_bool y + depends on !GENERIC_LOCKBREAK && ARCH_INLINE_READ_LOCK_IRQSAVE config INLINE_READ_UNLOCK - def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_READ_UNLOCK) + def_bool y + depends on !PREEMPT || ARCH_INLINE_READ_UNLOCK config INLINE_READ_UNLOCK_BH - def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_UNLOCK_BH + def_bool y + depends on ARCH_INLINE_READ_UNLOCK_BH config INLINE_READ_UNLOCK_IRQ - def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_READ_UNLOCK_BH) + def_bool y + depends on !PREEMPT || ARCH_INLINE_READ_UNLOCK_BH config INLINE_READ_UNLOCK_IRQRESTORE - def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_UNLOCK_IRQRESTORE + def_bool y + depends on ARCH_INLINE_READ_UNLOCK_IRQRESTORE config INLINE_WRITE_TRYLOCK - def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_TRYLOCK + def_bool y + depends on ARCH_INLINE_WRITE_TRYLOCK config INLINE_WRITE_LOCK - def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_WRITE_LOCK + def_bool y + depends on !GENERIC_LOCKBREAK && ARCH_INLINE_WRITE_LOCK config INLINE_WRITE_LOCK_BH - def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ - ARCH_INLINE_WRITE_LOCK_BH + def_bool y + depends on !GENERIC_LOCKBREAK && ARCH_INLINE_WRITE_LOCK_BH config INLINE_WRITE_LOCK_IRQ - def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ - ARCH_INLINE_WRITE_LOCK_IRQ + def_bool y + depends on !GENERIC_LOCKBREAK && ARCH_INLINE_WRITE_LOCK_IRQ config INLINE_WRITE_LOCK_IRQSAVE - def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ - ARCH_INLINE_WRITE_LOCK_IRQSAVE + def_bool y + depends on !GENERIC_LOCKBREAK && ARCH_INLINE_WRITE_LOCK_IRQSAVE config INLINE_WRITE_UNLOCK - def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_WRITE_UNLOCK) + def_bool y + depends on !PREEMPT || ARCH_INLINE_WRITE_UNLOCK config INLINE_WRITE_UNLOCK_BH - def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_BH + def_bool y + depends on ARCH_INLINE_WRITE_UNLOCK_BH config INLINE_WRITE_UNLOCK_IRQ - def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_WRITE_UNLOCK_BH) + def_bool y + depends on !PREEMPT || ARCH_INLINE_WRITE_UNLOCK_BH config INLINE_WRITE_UNLOCK_IRQRESTORE - def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + def_bool y + depends on ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + +endif config MUTEX_SPIN_ON_OWNER - def_bool SMP && !DEBUG_MUTEXES + def_bool y + depends on SMP && !DEBUG_MUTEXES diff --git a/kernel/Makefile b/kernel/Makefile index c0cc67ad764..5404911eaee 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o printk.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o cred.o \ - async.o range.o groups.o lglock.o + async.o range.o groups.o lglock.o smpboot.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files @@ -46,7 +46,6 @@ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_SMP) += smpboot.o ifneq ($(CONFIG_SMP),y) obj-y += up.o endif @@ -98,7 +97,7 @@ obj-$(CONFIG_COMPAT_BINFMT_ELF) += elfcore.o obj-$(CONFIG_BINFMT_ELF_FDPIC) += elfcore.o obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ -obj-$(CONFIG_X86_DS) += trace/ +obj-$(CONFIG_TRACE_CLOCK) += trace/ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_TRACEPOINTS) += trace/ obj-$(CONFIG_IRQ_WORK) += irq_work.o diff --git a/kernel/cpu.c b/kernel/cpu.c index 14d32588ccc..f560598807c 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -280,12 +280,13 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) __func__, cpu); goto out_release; } + smpboot_park_threads(cpu); err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); if (err) { /* CPU didn't die: tell everyone. Can't complain. */ + smpboot_unpark_threads(cpu); cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); - goto out_release; } BUG_ON(cpu_online(cpu)); @@ -354,6 +355,10 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) goto out; } + ret = smpboot_create_threads(cpu); + if (ret) + goto out; + ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); if (ret) { nr_calls--; @@ -368,6 +373,9 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) goto out_notify; BUG_ON(!cpu_online(cpu)); + /* Wake the per cpu threads */ + smpboot_unpark_threads(cpu); + /* Now call notifier in preparation. */ cpu_notify(CPU_ONLINE | mod, hcpu); @@ -439,14 +447,6 @@ EXPORT_SYMBOL_GPL(cpu_up); #ifdef CONFIG_PM_SLEEP_SMP static cpumask_var_t frozen_cpus; -void __weak arch_disable_nonboot_cpus_begin(void) -{ -} - -void __weak arch_disable_nonboot_cpus_end(void) -{ -} - int disable_nonboot_cpus(void) { int cpu, first_cpu, error = 0; @@ -458,7 +458,6 @@ int disable_nonboot_cpus(void) * with the userspace trying to use the CPU hotplug at the same time */ cpumask_clear(frozen_cpus); - arch_disable_nonboot_cpus_begin(); printk("Disabling non-boot CPUs ...\n"); for_each_online_cpu(cpu) { @@ -474,8 +473,6 @@ int disable_nonboot_cpus(void) } } - arch_disable_nonboot_cpus_end(); - if (!error) { BUG_ON(num_online_cpus() > 1); /* Make sure the CPUs won't be enabled by someone else */ diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 98d4597f43d..c77206184b8 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -159,6 +159,11 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) int rctx; struct perf_callchain_entry *entry; + int kernel = !event->attr.exclude_callchain_kernel; + int user = !event->attr.exclude_callchain_user; + + if (!kernel && !user) + return NULL; entry = get_callchain_entry(&rctx); if (rctx == -1) @@ -169,24 +174,29 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) entry->nr = 0; - if (!user_mode(regs)) { + if (kernel && !user_mode(regs)) { perf_callchain_store(entry, PERF_CONTEXT_KERNEL); perf_callchain_kernel(entry, regs); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; } - if (regs) { - /* - * Disallow cross-task user callchains. - */ - if (event->ctx->task && event->ctx->task != current) - goto exit_put; - - perf_callchain_store(entry, PERF_CONTEXT_USER); - perf_callchain_user(entry, regs); + if (user) { + if (!user_mode(regs)) { + if (current->mm) + regs = task_pt_regs(current); + else + regs = NULL; + } + + if (regs) { + /* + * Disallow cross-task user callchains. + */ + if (event->ctx->task && event->ctx->task != current) + goto exit_put; + + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_user(entry, regs); + } } exit_put: diff --git a/kernel/events/core.c b/kernel/events/core.c index 7fee567153f..7b9df353ba1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -36,6 +36,7 @@ #include <linux/perf_event.h> #include <linux/ftrace_event.h> #include <linux/hw_breakpoint.h> +#include <linux/mm_types.h> #include "internal.h" @@ -3764,6 +3765,132 @@ int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) } EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); +static void +perf_output_sample_regs(struct perf_output_handle *handle, + struct pt_regs *regs, u64 mask) +{ + int bit; + + for_each_set_bit(bit, (const unsigned long *) &mask, + sizeof(mask) * BITS_PER_BYTE) { + u64 val; + + val = perf_reg_value(regs, bit); + perf_output_put(handle, val); + } +} + +static void perf_sample_regs_user(struct perf_regs_user *regs_user, + struct pt_regs *regs) +{ + if (!user_mode(regs)) { + if (current->mm) + regs = task_pt_regs(current); + else + regs = NULL; + } + + if (regs) { + regs_user->regs = regs; + regs_user->abi = perf_reg_abi(current); + } +} + +/* + * Get remaining task size from user stack pointer. + * + * It'd be better to take stack vma map and limit this more + * precisly, but there's no way to get it safely under interrupt, + * so using TASK_SIZE as limit. + */ +static u64 perf_ustack_task_size(struct pt_regs *regs) +{ + unsigned long addr = perf_user_stack_pointer(regs); + + if (!addr || addr >= TASK_SIZE) + return 0; + + return TASK_SIZE - addr; +} + +static u16 +perf_sample_ustack_size(u16 stack_size, u16 header_size, + struct pt_regs *regs) +{ + u64 task_size; + + /* No regs, no stack pointer, no dump. */ + if (!regs) + return 0; + + /* + * Check if we fit in with the requested stack size into the: + * - TASK_SIZE + * If we don't, we limit the size to the TASK_SIZE. + * + * - remaining sample size + * If we don't, we customize the stack size to + * fit in to the remaining sample size. + */ + + task_size = min((u64) USHRT_MAX, perf_ustack_task_size(regs)); + stack_size = min(stack_size, (u16) task_size); + + /* Current header size plus static size and dynamic size. */ + header_size += 2 * sizeof(u64); + + /* Do we fit in with the current stack dump size? */ + if ((u16) (header_size + stack_size) < header_size) { + /* + * If we overflow the maximum size for the sample, + * we customize the stack dump size to fit in. + */ + stack_size = USHRT_MAX - header_size - sizeof(u64); + stack_size = round_up(stack_size, sizeof(u64)); + } + + return stack_size; +} + +static void +perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size, + struct pt_regs *regs) +{ + /* Case of a kernel thread, nothing to dump */ + if (!regs) { + u64 size = 0; + perf_output_put(handle, size); + } else { + unsigned long sp; + unsigned int rem; + u64 dyn_size; + + /* + * We dump: + * static size + * - the size requested by user or the best one we can fit + * in to the sample max size + * data + * - user stack dump data + * dynamic size + * - the actual dumped size + */ + + /* Static size. */ + perf_output_put(handle, dump_size); + + /* Data. */ + sp = perf_user_stack_pointer(regs); + rem = __output_copy_user(handle, (void *) sp, dump_size); + dyn_size = dump_size - rem; + + perf_output_skip(handle, rem); + + /* Dynamic size. */ + perf_output_put(handle, dyn_size); + } +} + static void __perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, struct perf_event *event) @@ -4024,6 +4151,28 @@ void perf_output_sample(struct perf_output_handle *handle, perf_output_put(handle, nr); } } + + if (sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi = data->regs_user.abi; + + /* + * If there are no regs to dump, notice it through + * first u64 being zero (PERF_SAMPLE_REGS_ABI_NONE). + */ + perf_output_put(handle, abi); + + if (abi) { + u64 mask = event->attr.sample_regs_user; + perf_output_sample_regs(handle, + data->regs_user.regs, + mask); + } + } + + if (sample_type & PERF_SAMPLE_STACK_USER) + perf_output_sample_ustack(handle, + data->stack_user_size, + data->regs_user.regs); } void perf_prepare_sample(struct perf_event_header *header, @@ -4075,6 +4224,49 @@ void perf_prepare_sample(struct perf_event_header *header, } header->size += size; } + + if (sample_type & PERF_SAMPLE_REGS_USER) { + /* regs dump ABI info */ + int size = sizeof(u64); + + perf_sample_regs_user(&data->regs_user, regs); + + if (data->regs_user.regs) { + u64 mask = event->attr.sample_regs_user; + size += hweight64(mask) * sizeof(u64); + } + + header->size += size; + } + + if (sample_type & PERF_SAMPLE_STACK_USER) { + /* + * Either we need PERF_SAMPLE_STACK_USER bit to be allways + * processed as the last one or have additional check added + * in case new sample type is added, because we could eat + * up the rest of the sample size. + */ + struct perf_regs_user *uregs = &data->regs_user; + u16 stack_size = event->attr.sample_stack_user; + u16 size = sizeof(u64); + + if (!uregs->abi) + perf_sample_regs_user(uregs, regs); + + stack_size = perf_sample_ustack_size(stack_size, header->size, + uregs->regs); + + /* + * If there is something to dump, add space for the dump + * itself and for the field that tells the dynamic size, + * which is how many have been actually dumped. + */ + if (stack_size) + size += sizeof(u64) + stack_size; + + data->stack_user_size = stack_size; + header->size += size; + } } static void perf_event_output(struct perf_event *event, @@ -6151,6 +6343,28 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, attr->branch_sample_type = mask; } } + + if (attr->sample_type & PERF_SAMPLE_REGS_USER) { + ret = perf_reg_validate(attr->sample_regs_user); + if (ret) + return ret; + } + + if (attr->sample_type & PERF_SAMPLE_STACK_USER) { + if (!arch_perf_have_user_stack_dump()) + return -ENOSYS; + + /* + * We have __u32 type for the size, but so far + * we can only use __u16 as maximum due to the + * __u16 sample size limit. + */ + if (attr->sample_stack_user >= USHRT_MAX) + ret = -EINVAL; + else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64))) + ret = -EINVAL; + } + out: return ret; diff --git a/kernel/events/internal.h b/kernel/events/internal.h index a096c19f2c2..d56a64c99a8 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -2,6 +2,7 @@ #define _KERNEL_EVENTS_INTERNAL_H #include <linux/hardirq.h> +#include <linux/uaccess.h> /* Buffer handling */ @@ -76,30 +77,53 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb) return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); } -static inline void -__output_copy(struct perf_output_handle *handle, - const void *buf, unsigned int len) +#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \ +static inline unsigned int \ +func_name(struct perf_output_handle *handle, \ + const void *buf, unsigned int len) \ +{ \ + unsigned long size, written; \ + \ + do { \ + size = min_t(unsigned long, handle->size, len); \ + \ + written = memcpy_func(handle->addr, buf, size); \ + \ + len -= written; \ + handle->addr += written; \ + buf += written; \ + handle->size -= written; \ + if (!handle->size) { \ + struct ring_buffer *rb = handle->rb; \ + \ + handle->page++; \ + handle->page &= rb->nr_pages - 1; \ + handle->addr = rb->data_pages[handle->page]; \ + handle->size = PAGE_SIZE << page_order(rb); \ + } \ + } while (len && written == size); \ + \ + return len; \ +} + +static inline int memcpy_common(void *dst, const void *src, size_t n) { - do { - unsigned long size = min_t(unsigned long, handle->size, len); - - memcpy(handle->addr, buf, size); - - len -= size; - handle->addr += size; - buf += size; - handle->size -= size; - if (!handle->size) { - struct ring_buffer *rb = handle->rb; - - handle->page++; - handle->page &= rb->nr_pages - 1; - handle->addr = rb->data_pages[handle->page]; - handle->size = PAGE_SIZE << page_order(rb); - } - } while (len); + memcpy(dst, src, n); + return n; } +DEFINE_OUTPUT_COPY(__output_copy, memcpy_common) + +#define MEMCPY_SKIP(dst, src, n) (n) + +DEFINE_OUTPUT_COPY(__output_skip, MEMCPY_SKIP) + +#ifndef arch_perf_out_copy_user +#define arch_perf_out_copy_user __copy_from_user_inatomic +#endif + +DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) + /* Callchain handling */ extern struct perf_callchain_entry * perf_callchain(struct perf_event *event, struct pt_regs *regs); @@ -134,4 +158,20 @@ static inline void put_recursion_context(int *recursion, int rctx) recursion[rctx]--; } +#ifdef CONFIG_HAVE_PERF_USER_STACK_DUMP +static inline bool arch_perf_have_user_stack_dump(void) +{ + return true; +} + +#define perf_user_stack_pointer(regs) user_stack_pointer(regs) +#else +static inline bool arch_perf_have_user_stack_dump(void) +{ + return false; +} + +#define perf_user_stack_pointer(regs) 0 +#endif /* CONFIG_HAVE_PERF_USER_STACK_DUMP */ + #endif /* _KERNEL_EVENTS_INTERNAL_H */ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 6ddaba43fb7..23cb34ff397 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -182,10 +182,16 @@ out: return -ENOSPC; } -void perf_output_copy(struct perf_output_handle *handle, +unsigned int perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len) { - __output_copy(handle, buf, len); + return __output_copy(handle, buf, len); +} + +unsigned int perf_output_skip(struct perf_output_handle *handle, + unsigned int len) +{ + return __output_skip(handle, NULL, len); } void perf_output_end(struct perf_output_handle *handle) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index c08a22d02f7..912ef48d28a 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -280,12 +280,10 @@ static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_ if (ret <= 0) return ret; - lock_page(page); vaddr_new = kmap_atomic(page); vaddr &= ~PAGE_MASK; memcpy(opcode, vaddr_new + vaddr, UPROBE_SWBP_INSN_SIZE); kunmap_atomic(vaddr_new); - unlock_page(page); put_page(page); @@ -334,7 +332,7 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned */ result = is_swbp_at_addr(mm, vaddr); if (result == 1) - return -EEXIST; + return 0; if (result) return result; @@ -347,24 +345,22 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned * @mm: the probed process address space. * @auprobe: arch specific probepoint information. * @vaddr: the virtual address to insert the opcode. - * @verify: if true, verify existance of breakpoint instruction. * * For mm @mm, restore the original opcode (opcode) at @vaddr. * Return 0 (success) or a negative errno. */ int __weak -set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, bool verify) +set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { - if (verify) { - int result; + int result; + + result = is_swbp_at_addr(mm, vaddr); + if (!result) + return -EINVAL; - result = is_swbp_at_addr(mm, vaddr); - if (!result) - return -EINVAL; + if (result != 1) + return result; - if (result != 1) - return result; - } return write_opcode(auprobe, mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); } @@ -415,11 +411,10 @@ static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset) static struct uprobe *find_uprobe(struct inode *inode, loff_t offset) { struct uprobe *uprobe; - unsigned long flags; - spin_lock_irqsave(&uprobes_treelock, flags); + spin_lock(&uprobes_treelock); uprobe = __find_uprobe(inode, offset); - spin_unlock_irqrestore(&uprobes_treelock, flags); + spin_unlock(&uprobes_treelock); return uprobe; } @@ -466,12 +461,11 @@ static struct uprobe *__insert_uprobe(struct uprobe *uprobe) */ static struct uprobe *insert_uprobe(struct uprobe *uprobe) { - unsigned long flags; struct uprobe *u; - spin_lock_irqsave(&uprobes_treelock, flags); + spin_lock(&uprobes_treelock); u = __insert_uprobe(uprobe); - spin_unlock_irqrestore(&uprobes_treelock, flags); + spin_unlock(&uprobes_treelock); /* For now assume that the instruction need not be single-stepped */ uprobe->flags |= UPROBE_SKIP_SSTEP; @@ -649,6 +643,7 @@ static int install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long vaddr) { + bool first_uprobe; int ret; /* @@ -659,7 +654,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, * Hence behave as if probe already existed. */ if (!uprobe->consumers) - return -EEXIST; + return 0; if (!(uprobe->flags & UPROBE_COPY_INSN)) { ret = copy_insn(uprobe, vma->vm_file); @@ -681,17 +676,18 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, } /* - * Ideally, should be updating the probe count after the breakpoint - * has been successfully inserted. However a thread could hit the - * breakpoint we just inserted even before the probe count is - * incremented. If this is the first breakpoint placed, breakpoint - * notifier might ignore uprobes and pass the trap to the thread. - * Hence increment before and decrement on failure. + * set MMF_HAS_UPROBES in advance for uprobe_pre_sstep_notifier(), + * the task can hit this breakpoint right after __replace_page(). */ - atomic_inc(&mm->uprobes_state.count); + first_uprobe = !test_bit(MMF_HAS_UPROBES, &mm->flags); + if (first_uprobe) + set_bit(MMF_HAS_UPROBES, &mm->flags); + ret = set_swbp(&uprobe->arch, mm, vaddr); - if (ret) - atomic_dec(&mm->uprobes_state.count); + if (!ret) + clear_bit(MMF_RECALC_UPROBES, &mm->flags); + else if (first_uprobe) + clear_bit(MMF_HAS_UPROBES, &mm->flags); return ret; } @@ -699,8 +695,12 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, static void remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) { - if (!set_orig_insn(&uprobe->arch, mm, vaddr, true)) - atomic_dec(&mm->uprobes_state.count); + /* can happen if uprobe_register() fails */ + if (!test_bit(MMF_HAS_UPROBES, &mm->flags)) + return; + + set_bit(MMF_RECALC_UPROBES, &mm->flags); + set_orig_insn(&uprobe->arch, mm, vaddr); } /* @@ -710,11 +710,9 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vad */ static void delete_uprobe(struct uprobe *uprobe) { - unsigned long flags; - - spin_lock_irqsave(&uprobes_treelock, flags); + spin_lock(&uprobes_treelock); rb_erase(&uprobe->rb_node, &uprobes_tree); - spin_unlock_irqrestore(&uprobes_treelock, flags); + spin_unlock(&uprobes_treelock); iput(uprobe->inode); put_uprobe(uprobe); atomic_dec(&uprobe_events); @@ -831,17 +829,11 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) vaddr_to_offset(vma, info->vaddr) != uprobe->offset) goto unlock; - if (is_register) { + if (is_register) err = install_breakpoint(uprobe, mm, vma, info->vaddr); - /* - * We can race against uprobe_mmap(), see the - * comment near uprobe_hash(). - */ - if (err == -EEXIST) - err = 0; - } else { + else remove_breakpoint(uprobe, mm, info->vaddr); - } + unlock: up_write(&mm->mmap_sem); free: @@ -908,7 +900,8 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * } mutex_unlock(uprobes_hash(inode)); - put_uprobe(uprobe); + if (uprobe) + put_uprobe(uprobe); return ret; } @@ -978,7 +971,6 @@ static void build_probe_list(struct inode *inode, struct list_head *head) { loff_t min, max; - unsigned long flags; struct rb_node *n, *t; struct uprobe *u; @@ -986,7 +978,7 @@ static void build_probe_list(struct inode *inode, min = vaddr_to_offset(vma, start); max = min + (end - start) - 1; - spin_lock_irqsave(&uprobes_treelock, flags); + spin_lock(&uprobes_treelock); n = find_node_in_range(inode, min, max); if (n) { for (t = n; t; t = rb_prev(t)) { @@ -1004,27 +996,20 @@ static void build_probe_list(struct inode *inode, atomic_inc(&u->ref); } } - spin_unlock_irqrestore(&uprobes_treelock, flags); + spin_unlock(&uprobes_treelock); } /* - * Called from mmap_region. - * called with mm->mmap_sem acquired. + * Called from mmap_region/vma_adjust with mm->mmap_sem acquired. * - * Return -ve no if we fail to insert probes and we cannot - * bail-out. - * Return 0 otherwise. i.e: - * - * - successful insertion of probes - * - (or) no possible probes to be inserted. - * - (or) insertion of probes failed but we can bail-out. + * Currently we ignore all errors and always return 0, the callers + * can't handle the failure anyway. */ int uprobe_mmap(struct vm_area_struct *vma) { struct list_head tmp_list; struct uprobe *uprobe, *u; struct inode *inode; - int ret, count; if (!atomic_read(&uprobe_events) || !valid_vma(vma, true)) return 0; @@ -1036,44 +1021,35 @@ int uprobe_mmap(struct vm_area_struct *vma) mutex_lock(uprobes_mmap_hash(inode)); build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); - ret = 0; - count = 0; - list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { - if (!ret) { + if (!fatal_signal_pending(current)) { unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); - - ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); - /* - * We can race against uprobe_register(), see the - * comment near uprobe_hash(). - */ - if (ret == -EEXIST) { - ret = 0; - - if (!is_swbp_at_addr(vma->vm_mm, vaddr)) - continue; - - /* - * Unable to insert a breakpoint, but - * breakpoint lies underneath. Increment the - * probe count. - */ - atomic_inc(&vma->vm_mm->uprobes_state.count); - } - - if (!ret) - count++; + install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); } put_uprobe(uprobe); } - mutex_unlock(uprobes_mmap_hash(inode)); - if (ret) - atomic_sub(count, &vma->vm_mm->uprobes_state.count); + return 0; +} - return ret; +static bool +vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long end) +{ + loff_t min, max; + struct inode *inode; + struct rb_node *n; + + inode = vma->vm_file->f_mapping->host; + + min = vaddr_to_offset(vma, start); + max = min + (end - start) - 1; + + spin_lock(&uprobes_treelock); + n = find_node_in_range(inode, min, max); + spin_unlock(&uprobes_treelock); + + return !!n; } /* @@ -1081,37 +1057,18 @@ int uprobe_mmap(struct vm_area_struct *vma) */ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - struct list_head tmp_list; - struct uprobe *uprobe, *u; - struct inode *inode; - if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) return; if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ return; - if (!atomic_read(&vma->vm_mm->uprobes_state.count)) - return; - - inode = vma->vm_file->f_mapping->host; - if (!inode) + if (!test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags) || + test_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags)) return; - mutex_lock(uprobes_mmap_hash(inode)); - build_probe_list(inode, vma, start, end, &tmp_list); - - list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { - unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); - /* - * An unregister could have removed the probe before - * unmap. So check before we decrement the count. - */ - if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1) - atomic_dec(&vma->vm_mm->uprobes_state.count); - put_uprobe(uprobe); - } - mutex_unlock(uprobes_mmap_hash(inode)); + if (vma_has_uprobes(vma, start, end)) + set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags); } /* Slot allocation for XOL */ @@ -1213,13 +1170,15 @@ void uprobe_clear_state(struct mm_struct *mm) kfree(area); } -/* - * uprobe_reset_state - Free the area allocated for slots. - */ -void uprobe_reset_state(struct mm_struct *mm) +void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) { - mm->uprobes_state.xol_area = NULL; - atomic_set(&mm->uprobes_state.count, 0); + newmm->uprobes_state.xol_area = NULL; + + if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) { + set_bit(MMF_HAS_UPROBES, &newmm->flags); + /* unconditionally, dup_mmap() skips VM_DONTCOPY vmas */ + set_bit(MMF_RECALC_UPROBES, &newmm->flags); + } } /* @@ -1437,6 +1396,25 @@ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs) return false; } +static void mmf_recalc_uprobes(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (!valid_vma(vma, false)) + continue; + /* + * This is not strictly accurate, we can race with + * uprobe_unregister() and see the already removed + * uprobe if delete_uprobe() was not yet called. + */ + if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end)) + return; + } + + clear_bit(MMF_HAS_UPROBES, &mm->flags); +} + static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) { struct mm_struct *mm = current->mm; @@ -1458,11 +1436,24 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) } else { *is_swbp = -EFAULT; } + + if (!uprobe && test_and_clear_bit(MMF_RECALC_UPROBES, &mm->flags)) + mmf_recalc_uprobes(mm); up_read(&mm->mmap_sem); return uprobe; } +void __weak arch_uprobe_enable_step(struct arch_uprobe *arch) +{ + user_enable_single_step(current); +} + +void __weak arch_uprobe_disable_step(struct arch_uprobe *arch) +{ + user_disable_single_step(current); +} + /* * Run handler and ask thread to singlestep. * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. @@ -1509,7 +1500,7 @@ static void handle_swbp(struct pt_regs *regs) utask->state = UTASK_SSTEP; if (!pre_ssout(uprobe, regs, bp_vaddr)) { - user_enable_single_step(current); + arch_uprobe_enable_step(&uprobe->arch); return; } @@ -1518,17 +1509,15 @@ cleanup_ret: utask->active_uprobe = NULL; utask->state = UTASK_RUNNING; } - if (uprobe) { - if (!(uprobe->flags & UPROBE_SKIP_SSTEP)) + if (!(uprobe->flags & UPROBE_SKIP_SSTEP)) - /* - * cannot singlestep; cannot skip instruction; - * re-execute the instruction. - */ - instruction_pointer_set(regs, bp_vaddr); + /* + * cannot singlestep; cannot skip instruction; + * re-execute the instruction. + */ + instruction_pointer_set(regs, bp_vaddr); - put_uprobe(uprobe); - } + put_uprobe(uprobe); } /* @@ -1547,10 +1536,10 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) else WARN_ON_ONCE(1); + arch_uprobe_disable_step(&uprobe->arch); put_uprobe(uprobe); utask->active_uprobe = NULL; utask->state = UTASK_RUNNING; - user_disable_single_step(current); xol_free_insn_slot(current); spin_lock_irq(¤t->sighand->siglock); @@ -1589,8 +1578,7 @@ int uprobe_pre_sstep_notifier(struct pt_regs *regs) { struct uprobe_task *utask; - if (!current->mm || !atomic_read(¤t->mm->uprobes_state.count)) - /* task is currently not uprobed */ + if (!current->mm || !test_bit(MMF_HAS_UPROBES, ¤t->mm->flags)) return 0; utask = current->utask; diff --git a/kernel/fork.c b/kernel/fork.c index 2c8857e1285..5a0e74d89a5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -353,6 +353,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) down_write(&oldmm->mmap_sem); flush_cache_dup_mm(oldmm); + uprobe_dup_mmap(oldmm, mm); /* * Not linked in yet - no deadlock potential: */ @@ -454,9 +455,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) if (retval) goto out; - - if (file) - uprobe_mmap(tmp); } /* a new mm has just been created */ arch_dup_mmap(oldmm, mm); @@ -839,8 +837,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk) #ifdef CONFIG_TRANSPARENT_HUGEPAGE mm->pmd_huge_pte = NULL; #endif - uprobe_reset_state(mm); - if (!mm_init(mm, tsk)) goto fail_nomem; @@ -1280,11 +1276,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; -#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW - p->hardirqs_enabled = 1; -#else p->hardirqs_enabled = 0; -#endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index eebd6d5cfb4..57d86d07221 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -671,6 +671,7 @@ irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, irq_set_chip(irq, chip); __irq_set_handler(irq, handle, 0, name); } +EXPORT_SYMBOL_GPL(irq_set_chip_and_handler_name); void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) { diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c index b5fcd96c710..988dc58e884 100644 --- a/kernel/irq/dummychip.c +++ b/kernel/irq/dummychip.c @@ -6,6 +6,7 @@ */ #include <linux/interrupt.h> #include <linux/irq.h> +#include <linux/export.h> #include "internals.h" @@ -57,3 +58,4 @@ struct irq_chip dummy_irq_chip = { .irq_mask = noop, .irq_unmask = noop, }; +EXPORT_SYMBOL_GPL(dummy_irq_chip); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index c62b8546cc9..098f396aa40 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -561,9 +561,9 @@ static __kprobes void kprobe_optimizer(struct work_struct *work) { LIST_HEAD(free_list); + mutex_lock(&kprobe_mutex); /* Lock modules while optimizing kprobes */ mutex_lock(&module_mutex); - mutex_lock(&kprobe_mutex); /* * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed) @@ -586,8 +586,8 @@ static __kprobes void kprobe_optimizer(struct work_struct *work) /* Step 4: Free cleaned kprobes after quiesence period */ do_free_cleaned_kprobes(&free_list); - mutex_unlock(&kprobe_mutex); mutex_unlock(&module_mutex); + mutex_unlock(&kprobe_mutex); /* Step 5: Kick optimizer again if needed */ if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) @@ -759,20 +759,32 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p) struct kprobe *ap; struct optimized_kprobe *op; + /* Impossible to optimize ftrace-based kprobe */ + if (kprobe_ftrace(p)) + return; + + /* For preparing optimization, jump_label_text_reserved() is called */ + jump_label_lock(); + mutex_lock(&text_mutex); + ap = alloc_aggr_kprobe(p); if (!ap) - return; + goto out; op = container_of(ap, struct optimized_kprobe, kp); if (!arch_prepared_optinsn(&op->optinsn)) { /* If failed to setup optimizing, fallback to kprobe */ arch_remove_optimized_kprobe(op); kfree(op); - return; + goto out; } init_aggr_kprobe(ap, p); - optimize_kprobe(ap); + optimize_kprobe(ap); /* This just kicks optimizer thread */ + +out: + mutex_unlock(&text_mutex); + jump_label_unlock(); } #ifdef CONFIG_SYSCTL @@ -907,9 +919,64 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) } #endif /* CONFIG_OPTPROBES */ +#ifdef KPROBES_CAN_USE_FTRACE +static struct ftrace_ops kprobe_ftrace_ops __read_mostly = { + .func = kprobe_ftrace_handler, + .flags = FTRACE_OPS_FL_SAVE_REGS, +}; +static int kprobe_ftrace_enabled; + +/* Must ensure p->addr is really on ftrace */ +static int __kprobes prepare_kprobe(struct kprobe *p) +{ + if (!kprobe_ftrace(p)) + return arch_prepare_kprobe(p); + + return arch_prepare_kprobe_ftrace(p); +} + +/* Caller must lock kprobe_mutex */ +static void __kprobes arm_kprobe_ftrace(struct kprobe *p) +{ + int ret; + + ret = ftrace_set_filter_ip(&kprobe_ftrace_ops, + (unsigned long)p->addr, 0, 0); + WARN(ret < 0, "Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret); + kprobe_ftrace_enabled++; + if (kprobe_ftrace_enabled == 1) { + ret = register_ftrace_function(&kprobe_ftrace_ops); + WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret); + } +} + +/* Caller must lock kprobe_mutex */ +static void __kprobes disarm_kprobe_ftrace(struct kprobe *p) +{ + int ret; + + kprobe_ftrace_enabled--; + if (kprobe_ftrace_enabled == 0) { + ret = unregister_ftrace_function(&kprobe_ftrace_ops); + WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret); + } + ret = ftrace_set_filter_ip(&kprobe_ftrace_ops, + (unsigned long)p->addr, 1, 0); + WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret); +} +#else /* !KPROBES_CAN_USE_FTRACE */ +#define prepare_kprobe(p) arch_prepare_kprobe(p) +#define arm_kprobe_ftrace(p) do {} while (0) +#define disarm_kprobe_ftrace(p) do {} while (0) +#endif + /* Arm a kprobe with text_mutex */ static void __kprobes arm_kprobe(struct kprobe *kp) { + if (unlikely(kprobe_ftrace(kp))) { + arm_kprobe_ftrace(kp); + return; + } /* * Here, since __arm_kprobe() doesn't use stop_machine(), * this doesn't cause deadlock on text_mutex. So, we don't @@ -921,11 +988,15 @@ static void __kprobes arm_kprobe(struct kprobe *kp) } /* Disarm a kprobe with text_mutex */ -static void __kprobes disarm_kprobe(struct kprobe *kp) +static void __kprobes disarm_kprobe(struct kprobe *kp, bool reopt) { + if (unlikely(kprobe_ftrace(kp))) { + disarm_kprobe_ftrace(kp); + return; + } /* Ditto */ mutex_lock(&text_mutex); - __disarm_kprobe(kp, true); + __disarm_kprobe(kp, reopt); mutex_unlock(&text_mutex); } @@ -1144,12 +1215,6 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p) if (p->post_handler && !ap->post_handler) ap->post_handler = aggr_post_handler; - if (kprobe_disabled(ap) && !kprobe_disabled(p)) { - ap->flags &= ~KPROBE_FLAG_DISABLED; - if (!kprobes_all_disarmed) - /* Arm the breakpoint again. */ - __arm_kprobe(ap); - } return 0; } @@ -1189,11 +1254,22 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p, int ret = 0; struct kprobe *ap = orig_p; + /* For preparing optimization, jump_label_text_reserved() is called */ + jump_label_lock(); + /* + * Get online CPUs to avoid text_mutex deadlock.with stop machine, + * which is invoked by unoptimize_kprobe() in add_new_kprobe() + */ + get_online_cpus(); + mutex_lock(&text_mutex); + if (!kprobe_aggrprobe(orig_p)) { /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */ ap = alloc_aggr_kprobe(orig_p); - if (!ap) - return -ENOMEM; + if (!ap) { + ret = -ENOMEM; + goto out; + } init_aggr_kprobe(ap, orig_p); } else if (kprobe_unused(ap)) /* This probe is going to die. Rescue it */ @@ -1213,7 +1289,7 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p, * free aggr_probe. It will be used next time, or * freed by unregister_kprobe. */ - return ret; + goto out; /* Prepare optimized instructions if possible. */ prepare_optimized_kprobe(ap); @@ -1228,7 +1304,20 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p, /* Copy ap's insn slot to p */ copy_kprobe(ap, p); - return add_new_kprobe(ap, p); + ret = add_new_kprobe(ap, p); + +out: + mutex_unlock(&text_mutex); + put_online_cpus(); + jump_label_unlock(); + + if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) { + ap->flags &= ~KPROBE_FLAG_DISABLED; + if (!kprobes_all_disarmed) + /* Arm the breakpoint again. */ + arm_kprobe(ap); + } + return ret; } static int __kprobes in_kprobes_functions(unsigned long addr) @@ -1313,71 +1402,96 @@ static inline int check_kprobe_rereg(struct kprobe *p) return ret; } -int __kprobes register_kprobe(struct kprobe *p) +static __kprobes int check_kprobe_address_safe(struct kprobe *p, + struct module **probed_mod) { int ret = 0; - struct kprobe *old_p; - struct module *probed_mod; - kprobe_opcode_t *addr; - - addr = kprobe_addr(p); - if (IS_ERR(addr)) - return PTR_ERR(addr); - p->addr = addr; + unsigned long ftrace_addr; - ret = check_kprobe_rereg(p); - if (ret) - return ret; + /* + * If the address is located on a ftrace nop, set the + * breakpoint to the following instruction. + */ + ftrace_addr = ftrace_location((unsigned long)p->addr); + if (ftrace_addr) { +#ifdef KPROBES_CAN_USE_FTRACE + /* Given address is not on the instruction boundary */ + if ((unsigned long)p->addr != ftrace_addr) + return -EILSEQ; + p->flags |= KPROBE_FLAG_FTRACE; +#else /* !KPROBES_CAN_USE_FTRACE */ + return -EINVAL; +#endif + } jump_label_lock(); preempt_disable(); + + /* Ensure it is not in reserved area nor out of text */ if (!kernel_text_address((unsigned long) p->addr) || in_kprobes_functions((unsigned long) p->addr) || - ftrace_text_reserved(p->addr, p->addr) || jump_label_text_reserved(p->addr, p->addr)) { ret = -EINVAL; - goto cannot_probe; + goto out; } - /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ - p->flags &= KPROBE_FLAG_DISABLED; - - /* - * Check if are we probing a module. - */ - probed_mod = __module_text_address((unsigned long) p->addr); - if (probed_mod) { - /* Return -ENOENT if fail. */ - ret = -ENOENT; + /* Check if are we probing a module */ + *probed_mod = __module_text_address((unsigned long) p->addr); + if (*probed_mod) { /* * We must hold a refcount of the probed module while updating * its code to prohibit unexpected unloading. */ - if (unlikely(!try_module_get(probed_mod))) - goto cannot_probe; + if (unlikely(!try_module_get(*probed_mod))) { + ret = -ENOENT; + goto out; + } /* * If the module freed .init.text, we couldn't insert * kprobes in there. */ - if (within_module_init((unsigned long)p->addr, probed_mod) && - probed_mod->state != MODULE_STATE_COMING) { - module_put(probed_mod); - goto cannot_probe; + if (within_module_init((unsigned long)p->addr, *probed_mod) && + (*probed_mod)->state != MODULE_STATE_COMING) { + module_put(*probed_mod); + *probed_mod = NULL; + ret = -ENOENT; } - /* ret will be updated by following code */ } +out: preempt_enable(); jump_label_unlock(); + return ret; +} + +int __kprobes register_kprobe(struct kprobe *p) +{ + int ret; + struct kprobe *old_p; + struct module *probed_mod; + kprobe_opcode_t *addr; + + /* Adjust probe address from symbol */ + addr = kprobe_addr(p); + if (IS_ERR(addr)) + return PTR_ERR(addr); + p->addr = addr; + + ret = check_kprobe_rereg(p); + if (ret) + return ret; + + /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ + p->flags &= KPROBE_FLAG_DISABLED; p->nmissed = 0; INIT_LIST_HEAD(&p->list); - mutex_lock(&kprobe_mutex); - jump_label_lock(); /* needed to call jump_label_text_reserved() */ + ret = check_kprobe_address_safe(p, &probed_mod); + if (ret) + return ret; - get_online_cpus(); /* For avoiding text_mutex deadlock. */ - mutex_lock(&text_mutex); + mutex_lock(&kprobe_mutex); old_p = get_kprobe(p->addr); if (old_p) { @@ -1386,7 +1500,9 @@ int __kprobes register_kprobe(struct kprobe *p) goto out; } - ret = arch_prepare_kprobe(p); + mutex_lock(&text_mutex); /* Avoiding text modification */ + ret = prepare_kprobe(p); + mutex_unlock(&text_mutex); if (ret) goto out; @@ -1395,26 +1511,18 @@ int __kprobes register_kprobe(struct kprobe *p) &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); if (!kprobes_all_disarmed && !kprobe_disabled(p)) - __arm_kprobe(p); + arm_kprobe(p); /* Try to optimize kprobe */ try_to_optimize_kprobe(p); out: - mutex_unlock(&text_mutex); - put_online_cpus(); - jump_label_unlock(); mutex_unlock(&kprobe_mutex); if (probed_mod) module_put(probed_mod); return ret; - -cannot_probe: - preempt_enable(); - jump_label_unlock(); - return ret; } EXPORT_SYMBOL_GPL(register_kprobe); @@ -1451,7 +1559,7 @@ static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p) /* Try to disarm and disable this/parent probe */ if (p == orig_p || aggr_kprobe_disabled(orig_p)) { - disarm_kprobe(orig_p); + disarm_kprobe(orig_p, true); orig_p->flags |= KPROBE_FLAG_DISABLED; } } @@ -2049,10 +2157,11 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, if (!pp) pp = p; - seq_printf(pi, "%s%s%s\n", + seq_printf(pi, "%s%s%s%s\n", (kprobe_gone(p) ? "[GONE]" : ""), ((kprobe_disabled(p) && !kprobe_gone(p)) ? "[DISABLED]" : ""), - (kprobe_optimized(pp) ? "[OPTIMIZED]" : "")); + (kprobe_optimized(pp) ? "[OPTIMIZED]" : ""), + (kprobe_ftrace(pp) ? "[FTRACE]" : "")); } static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) @@ -2131,14 +2240,12 @@ static void __kprobes arm_all_kprobes(void) goto already_enabled; /* Arming kprobes doesn't optimize kprobe itself */ - mutex_lock(&text_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; hlist_for_each_entry_rcu(p, node, head, hlist) if (!kprobe_disabled(p)) - __arm_kprobe(p); + arm_kprobe(p); } - mutex_unlock(&text_mutex); kprobes_all_disarmed = false; printk(KERN_INFO "Kprobes globally enabled\n"); @@ -2166,15 +2273,13 @@ static void __kprobes disarm_all_kprobes(void) kprobes_all_disarmed = true; printk(KERN_INFO "Kprobes globally disabled\n"); - mutex_lock(&text_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; hlist_for_each_entry_rcu(p, node, head, hlist) { if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) - __disarm_kprobe(p, false); + disarm_kprobe(p, false); } } - mutex_unlock(&text_mutex); mutex_unlock(&kprobe_mutex); /* Wait for disarming all kprobes by optimizer */ diff --git a/kernel/kthread.c b/kernel/kthread.c index b579af57ea1..146a6fa9682 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -37,11 +37,20 @@ struct kthread_create_info }; struct kthread { - int should_stop; + unsigned long flags; + unsigned int cpu; void *data; + struct completion parked; struct completion exited; }; +enum KTHREAD_BITS { + KTHREAD_IS_PER_CPU = 0, + KTHREAD_SHOULD_STOP, + KTHREAD_SHOULD_PARK, + KTHREAD_IS_PARKED, +}; + #define to_kthread(tsk) \ container_of((tsk)->vfork_done, struct kthread, exited) @@ -52,13 +61,29 @@ struct kthread { * and this will return true. You should then return, and your return * value will be passed through to kthread_stop(). */ -int kthread_should_stop(void) +bool kthread_should_stop(void) { - return to_kthread(current)->should_stop; + return test_bit(KTHREAD_SHOULD_STOP, &to_kthread(current)->flags); } EXPORT_SYMBOL(kthread_should_stop); /** + * kthread_should_park - should this kthread park now? + * + * When someone calls kthread_park() on your kthread, it will be woken + * and this will return true. You should then do the necessary + * cleanup and call kthread_parkme() + * + * Similar to kthread_should_stop(), but this keeps the thread alive + * and in a park position. kthread_unpark() "restarts" the thread and + * calls the thread function again. + */ +bool kthread_should_park(void) +{ + return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(current)->flags); +} + +/** * kthread_freezable_should_stop - should this freezable kthread return now? * @was_frozen: optional out parameter, indicates whether %current was frozen * @@ -96,6 +121,24 @@ void *kthread_data(struct task_struct *task) return to_kthread(task)->data; } +static void __kthread_parkme(struct kthread *self) +{ + __set_current_state(TASK_INTERRUPTIBLE); + while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) { + if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags)) + complete(&self->parked); + schedule(); + __set_current_state(TASK_INTERRUPTIBLE); + } + clear_bit(KTHREAD_IS_PARKED, &self->flags); + __set_current_state(TASK_RUNNING); +} + +void kthread_parkme(void) +{ + __kthread_parkme(to_kthread(current)); +} + static int kthread(void *_create) { /* Copy data: it's on kthread's stack */ @@ -105,9 +148,10 @@ static int kthread(void *_create) struct kthread self; int ret; - self.should_stop = 0; + self.flags = 0; self.data = data; init_completion(&self.exited); + init_completion(&self.parked); current->vfork_done = &self.exited; /* OK, tell user we're spawned, wait for stop or wakeup */ @@ -117,9 +161,11 @@ static int kthread(void *_create) schedule(); ret = -EINTR; - if (!self.should_stop) - ret = threadfn(data); + if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) { + __kthread_parkme(&self); + ret = threadfn(data); + } /* we can't just return, we must preserve "self" on stack */ do_exit(ret); } @@ -172,8 +218,7 @@ static void create_kthread(struct kthread_create_info *create) * Returns a task_struct or ERR_PTR(-ENOMEM). */ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), - void *data, - int node, + void *data, int node, const char namefmt[], ...) { @@ -210,6 +255,13 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), } EXPORT_SYMBOL(kthread_create_on_node); +static void __kthread_bind(struct task_struct *p, unsigned int cpu) +{ + /* It's safe because the task is inactive. */ + do_set_cpus_allowed(p, cpumask_of(cpu)); + p->flags |= PF_THREAD_BOUND; +} + /** * kthread_bind - bind a just-created kthread to a cpu. * @p: thread created by kthread_create(). @@ -226,14 +278,112 @@ void kthread_bind(struct task_struct *p, unsigned int cpu) WARN_ON(1); return; } - - /* It's safe because the task is inactive. */ - do_set_cpus_allowed(p, cpumask_of(cpu)); - p->flags |= PF_THREAD_BOUND; + __kthread_bind(p, cpu); } EXPORT_SYMBOL(kthread_bind); /** + * kthread_create_on_cpu - Create a cpu bound kthread + * @threadfn: the function to run until signal_pending(current). + * @data: data ptr for @threadfn. + * @cpu: The cpu on which the thread should be bound, + * @namefmt: printf-style name for the thread. Format is restricted + * to "name.*%u". Code fills in cpu number. + * + * Description: This helper function creates and names a kernel thread + * The thread will be woken and put into park mode. + */ +struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), + void *data, unsigned int cpu, + const char *namefmt) +{ + struct task_struct *p; + + p = kthread_create_on_node(threadfn, data, cpu_to_node(cpu), namefmt, + cpu); + if (IS_ERR(p)) + return p; + set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags); + to_kthread(p)->cpu = cpu; + /* Park the thread to get it out of TASK_UNINTERRUPTIBLE state */ + kthread_park(p); + return p; +} + +static struct kthread *task_get_live_kthread(struct task_struct *k) +{ + struct kthread *kthread; + + get_task_struct(k); + kthread = to_kthread(k); + /* It might have exited */ + barrier(); + if (k->vfork_done != NULL) + return kthread; + return NULL; +} + +/** + * kthread_unpark - unpark a thread created by kthread_create(). + * @k: thread created by kthread_create(). + * + * Sets kthread_should_park() for @k to return false, wakes it, and + * waits for it to return. If the thread is marked percpu then its + * bound to the cpu again. + */ +void kthread_unpark(struct task_struct *k) +{ + struct kthread *kthread = task_get_live_kthread(k); + + if (kthread) { + clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); + /* + * We clear the IS_PARKED bit here as we don't wait + * until the task has left the park code. So if we'd + * park before that happens we'd see the IS_PARKED bit + * which might be about to be cleared. + */ + if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { + if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) + __kthread_bind(k, kthread->cpu); + wake_up_process(k); + } + } + put_task_struct(k); +} + +/** + * kthread_park - park a thread created by kthread_create(). + * @k: thread created by kthread_create(). + * + * Sets kthread_should_park() for @k to return true, wakes it, and + * waits for it to return. This can also be called after kthread_create() + * instead of calling wake_up_process(): the thread will park without + * calling threadfn(). + * + * Returns 0 if the thread is parked, -ENOSYS if the thread exited. + * If called by the kthread itself just the park bit is set. + */ +int kthread_park(struct task_struct *k) +{ + struct kthread *kthread = task_get_live_kthread(k); + int ret = -ENOSYS; + + if (kthread) { + if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) { + set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); + if (k != current) { + wake_up_process(k); + wait_for_completion(&kthread->parked); + } + } + ret = 0; + } + put_task_struct(k); + return ret; +} + +/** * kthread_stop - stop a thread created by kthread_create(). * @k: thread created by kthread_create(). * @@ -250,16 +400,13 @@ EXPORT_SYMBOL(kthread_bind); */ int kthread_stop(struct task_struct *k) { - struct kthread *kthread; + struct kthread *kthread = task_get_live_kthread(k); int ret; trace_sched_kthread_stop(k); - get_task_struct(k); - - kthread = to_kthread(k); - barrier(); /* it might have exited */ - if (k->vfork_done != NULL) { - kthread->should_stop = 1; + if (kthread) { + set_bit(KTHREAD_SHOULD_STOP, &kthread->flags); + clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); wake_up_process(k); wait_for_completion(&kthread->exited); } diff --git a/kernel/lockdep.c b/kernel/lockdep.c index ea9ee4518c3..7981e5b2350 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -2998,6 +2998,42 @@ EXPORT_SYMBOL_GPL(lockdep_init_map); struct lock_class_key __lockdep_no_validate__; +static int +print_lock_nested_lock_not_held(struct task_struct *curr, + struct held_lock *hlock, + unsigned long ip) +{ + if (!debug_locks_off()) + return 0; + if (debug_locks_silent) + return 0; + + printk("\n"); + printk("==================================\n"); + printk("[ BUG: Nested lock was not taken ]\n"); + print_kernel_ident(); + printk("----------------------------------\n"); + + printk("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr)); + print_lock(hlock); + + printk("\nbut this task is not holding:\n"); + printk("%s\n", hlock->nest_lock->name); + + printk("\nstack backtrace:\n"); + dump_stack(); + + printk("\nother info that might help us debug this:\n"); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +static int __lock_is_held(struct lockdep_map *lock); + /* * This gets called for every mutex_lock*()/spin_lock*() operation. * We maintain the dependency maps and validate the locking attempt: @@ -3139,6 +3175,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, } chain_key = iterate_chain_key(chain_key, id); + if (nest_lock && !__lock_is_held(nest_lock)) + return print_lock_nested_lock_not_held(curr, hlock, ip); + if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) return 0; diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 4e6a61b15e8..29ca1c6da59 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -45,6 +45,7 @@ #include <linux/mutex.h> #include <linux/export.h> #include <linux/hardirq.h> +#include <linux/delay.h> #define CREATE_TRACE_POINTS #include <trace/events/rcu.h> @@ -81,6 +82,9 @@ void __rcu_read_unlock(void) } else { barrier(); /* critical section before exit code. */ t->rcu_read_lock_nesting = INT_MIN; +#ifdef CONFIG_PROVE_RCU_DELAY + udelay(10); /* Make preemption more probable. */ +#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ barrier(); /* assign before ->rcu_read_unlock_special load */ if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) rcu_read_unlock_special(t); diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 547b1fe5b05..e4c6a598d6f 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -56,25 +56,28 @@ static void __call_rcu(struct rcu_head *head, static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ -static void rcu_idle_enter_common(long long oldval) +static void rcu_idle_enter_common(long long newval) { - if (rcu_dynticks_nesting) { + if (newval) { RCU_TRACE(trace_rcu_dyntick("--=", - oldval, rcu_dynticks_nesting)); + rcu_dynticks_nesting, newval)); + rcu_dynticks_nesting = newval; return; } - RCU_TRACE(trace_rcu_dyntick("Start", oldval, rcu_dynticks_nesting)); + RCU_TRACE(trace_rcu_dyntick("Start", rcu_dynticks_nesting, newval)); if (!is_idle_task(current)) { struct task_struct *idle = idle_task(smp_processor_id()); RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task", - oldval, rcu_dynticks_nesting)); + rcu_dynticks_nesting, newval)); ftrace_dump(DUMP_ALL); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ } rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ + barrier(); + rcu_dynticks_nesting = newval; } /* @@ -84,17 +87,16 @@ static void rcu_idle_enter_common(long long oldval) void rcu_idle_enter(void) { unsigned long flags; - long long oldval; + long long newval; local_irq_save(flags); - oldval = rcu_dynticks_nesting; WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0); if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) - rcu_dynticks_nesting = 0; + newval = 0; else - rcu_dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; - rcu_idle_enter_common(oldval); + newval = rcu_dynticks_nesting - DYNTICK_TASK_NEST_VALUE; + rcu_idle_enter_common(newval); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_idle_enter); @@ -105,15 +107,15 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter); void rcu_irq_exit(void) { unsigned long flags; - long long oldval; + long long newval; local_irq_save(flags); - oldval = rcu_dynticks_nesting; - rcu_dynticks_nesting--; - WARN_ON_ONCE(rcu_dynticks_nesting < 0); - rcu_idle_enter_common(oldval); + newval = rcu_dynticks_nesting - 1; + WARN_ON_ONCE(newval < 0); + rcu_idle_enter_common(newval); local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(rcu_irq_exit); /* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */ static void rcu_idle_exit_common(long long oldval) @@ -171,6 +173,7 @@ void rcu_irq_enter(void) rcu_idle_exit_common(oldval); local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(rcu_irq_enter); #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 918fd1e8509..3d019028220 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -278,7 +278,7 @@ static int rcu_boost(void) rcu_preempt_ctrlblk.exp_tasks == NULL) return 0; /* Nothing to boost. */ - raw_local_irq_save(flags); + local_irq_save(flags); /* * Recheck with irqs disabled: all tasks in need of boosting @@ -287,7 +287,7 @@ static int rcu_boost(void) */ if (rcu_preempt_ctrlblk.boost_tasks == NULL && rcu_preempt_ctrlblk.exp_tasks == NULL) { - raw_local_irq_restore(flags); + local_irq_restore(flags); return 0; } @@ -317,7 +317,7 @@ static int rcu_boost(void) t = container_of(tb, struct task_struct, rcu_node_entry); rt_mutex_init_proxy_locked(&mtx, t); t->rcu_boost_mutex = &mtx; - raw_local_irq_restore(flags); + local_irq_restore(flags); rt_mutex_lock(&mtx); rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ @@ -991,9 +991,9 @@ static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n) { unsigned long flags; - raw_local_irq_save(flags); + local_irq_save(flags); rcp->qlen -= n; - raw_local_irq_restore(flags); + local_irq_restore(flags); } /* diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 25b15033c61..aaa7b9f3532 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -53,10 +53,11 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@fre static int nreaders = -1; /* # reader threads, defaults to 2*ncpus */ static int nfakewriters = 4; /* # fake writer threads */ -static int stat_interval; /* Interval between stats, in seconds. */ - /* Defaults to "only at end of test". */ +static int stat_interval = 60; /* Interval between stats, in seconds. */ + /* Zero means "only at end of test". */ static bool verbose; /* Print more debug info. */ -static bool test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */ +static bool test_no_idle_hz = true; + /* Test RCU support for tickless idle CPUs. */ static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/ static int stutter = 5; /* Start/stop testing interval (in sec) */ static int irqreader = 1; /* RCU readers from irq (timers). */ @@ -119,11 +120,11 @@ MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); #define TORTURE_FLAG "-torture:" #define PRINTK_STRING(s) \ - do { printk(KERN_ALERT "%s" TORTURE_FLAG s "\n", torture_type); } while (0) + do { pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) #define VERBOSE_PRINTK_STRING(s) \ - do { if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG s "\n", torture_type); } while (0) + do { if (verbose) pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) #define VERBOSE_PRINTK_ERRSTRING(s) \ - do { if (verbose) printk(KERN_ALERT "%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) + do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) static char printk_buf[4096]; @@ -176,8 +177,14 @@ static long n_rcu_torture_boosts; static long n_rcu_torture_timers; static long n_offline_attempts; static long n_offline_successes; +static unsigned long sum_offline; +static int min_offline = -1; +static int max_offline; static long n_online_attempts; static long n_online_successes; +static unsigned long sum_online; +static int min_online = -1; +static int max_online; static long n_barrier_attempts; static long n_barrier_successes; static struct list_head rcu_torture_removed; @@ -235,7 +242,7 @@ rcutorture_shutdown_notify(struct notifier_block *unused1, if (fullstop == FULLSTOP_DONTSTOP) fullstop = FULLSTOP_SHUTDOWN; else - printk(KERN_WARNING /* but going down anyway, so... */ + pr_warn(/* but going down anyway, so... */ "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); mutex_unlock(&fullstop_mutex); return NOTIFY_DONE; @@ -248,7 +255,7 @@ rcutorture_shutdown_notify(struct notifier_block *unused1, static void rcutorture_shutdown_absorb(char *title) { if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { - printk(KERN_NOTICE + pr_notice( "rcutorture thread %s parking due to system shutdown\n", title); schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT); @@ -1214,11 +1221,13 @@ rcu_torture_printk(char *page) n_rcu_torture_boost_failure, n_rcu_torture_boosts, n_rcu_torture_timers); - cnt += sprintf(&page[cnt], "onoff: %ld/%ld:%ld/%ld ", - n_online_successes, - n_online_attempts, - n_offline_successes, - n_offline_attempts); + cnt += sprintf(&page[cnt], + "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ", + n_online_successes, n_online_attempts, + n_offline_successes, n_offline_attempts, + min_online, max_online, + min_offline, max_offline, + sum_online, sum_offline, HZ); cnt += sprintf(&page[cnt], "barrier: %ld/%ld:%ld", n_barrier_successes, n_barrier_attempts, @@ -1267,7 +1276,7 @@ rcu_torture_stats_print(void) int cnt; cnt = rcu_torture_printk(printk_buf); - printk(KERN_ALERT "%s", printk_buf); + pr_alert("%s", printk_buf); } /* @@ -1380,20 +1389,20 @@ rcu_torture_stutter(void *arg) static inline void rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag) { - printk(KERN_ALERT "%s" TORTURE_FLAG - "--- %s: nreaders=%d nfakewriters=%d " - "stat_interval=%d verbose=%d test_no_idle_hz=%d " - "shuffle_interval=%d stutter=%d irqreader=%d " - "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " - "test_boost=%d/%d test_boost_interval=%d " - "test_boost_duration=%d shutdown_secs=%d " - "onoff_interval=%d onoff_holdoff=%d\n", - torture_type, tag, nrealreaders, nfakewriters, - stat_interval, verbose, test_no_idle_hz, shuffle_interval, - stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, - test_boost, cur_ops->can_boost, - test_boost_interval, test_boost_duration, shutdown_secs, - onoff_interval, onoff_holdoff); + pr_alert("%s" TORTURE_FLAG + "--- %s: nreaders=%d nfakewriters=%d " + "stat_interval=%d verbose=%d test_no_idle_hz=%d " + "shuffle_interval=%d stutter=%d irqreader=%d " + "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " + "test_boost=%d/%d test_boost_interval=%d " + "test_boost_duration=%d shutdown_secs=%d " + "onoff_interval=%d onoff_holdoff=%d\n", + torture_type, tag, nrealreaders, nfakewriters, + stat_interval, verbose, test_no_idle_hz, shuffle_interval, + stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, + test_boost, cur_ops->can_boost, + test_boost_interval, test_boost_duration, shutdown_secs, + onoff_interval, onoff_holdoff); } static struct notifier_block rcutorture_shutdown_nb = { @@ -1460,9 +1469,9 @@ rcu_torture_shutdown(void *arg) !kthread_should_stop()) { delta = shutdown_time - jiffies_snap; if (verbose) - printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_shutdown task: %lu jiffies remaining\n", - torture_type, delta); + pr_alert("%s" TORTURE_FLAG + "rcu_torture_shutdown task: %lu jiffies remaining\n", + torture_type, delta); schedule_timeout_interruptible(delta); jiffies_snap = ACCESS_ONCE(jiffies); } @@ -1490,8 +1499,10 @@ static int __cpuinit rcu_torture_onoff(void *arg) { int cpu; + unsigned long delta; int maxcpu = -1; DEFINE_RCU_RANDOM(rand); + unsigned long starttime; VERBOSE_PRINTK_STRING("rcu_torture_onoff task started"); for_each_online_cpu(cpu) @@ -1506,29 +1517,51 @@ rcu_torture_onoff(void *arg) cpu = (rcu_random(&rand) >> 4) % (maxcpu + 1); if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) { if (verbose) - printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_onoff task: offlining %d\n", - torture_type, cpu); + pr_alert("%s" TORTURE_FLAG + "rcu_torture_onoff task: offlining %d\n", + torture_type, cpu); + starttime = jiffies; n_offline_attempts++; if (cpu_down(cpu) == 0) { if (verbose) - printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_onoff task: offlined %d\n", - torture_type, cpu); + pr_alert("%s" TORTURE_FLAG + "rcu_torture_onoff task: offlined %d\n", + torture_type, cpu); n_offline_successes++; + delta = jiffies - starttime; + sum_offline += delta; + if (min_offline < 0) { + min_offline = delta; + max_offline = delta; + } + if (min_offline > delta) + min_offline = delta; + if (max_offline < delta) + max_offline = delta; } } else if (cpu_is_hotpluggable(cpu)) { if (verbose) - printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_onoff task: onlining %d\n", - torture_type, cpu); + pr_alert("%s" TORTURE_FLAG + "rcu_torture_onoff task: onlining %d\n", + torture_type, cpu); + starttime = jiffies; n_online_attempts++; if (cpu_up(cpu) == 0) { if (verbose) - printk(KERN_ALERT "%s" TORTURE_FLAG - "rcu_torture_onoff task: onlined %d\n", - torture_type, cpu); + pr_alert("%s" TORTURE_FLAG + "rcu_torture_onoff task: onlined %d\n", + torture_type, cpu); n_online_successes++; + delta = jiffies - starttime; + sum_online += delta; + if (min_online < 0) { + min_online = delta; + max_online = delta; + } + if (min_online > delta) + min_online = delta; + if (max_online < delta) + max_online = delta; } } schedule_timeout_interruptible(onoff_interval * HZ); @@ -1593,14 +1626,14 @@ static int __cpuinit rcu_torture_stall(void *args) if (!kthread_should_stop()) { stop_at = get_seconds() + stall_cpu; /* RCU CPU stall is expected behavior in following code. */ - printk(KERN_ALERT "rcu_torture_stall start.\n"); + pr_alert("rcu_torture_stall start.\n"); rcu_read_lock(); preempt_disable(); while (ULONG_CMP_LT(get_seconds(), stop_at)) continue; /* Induce RCU CPU stall warning. */ preempt_enable(); rcu_read_unlock(); - printk(KERN_ALERT "rcu_torture_stall end.\n"); + pr_alert("rcu_torture_stall end.\n"); } rcutorture_shutdown_absorb("rcu_torture_stall"); while (!kthread_should_stop()) @@ -1716,12 +1749,12 @@ static int rcu_torture_barrier_init(void) if (n_barrier_cbs == 0) return 0; if (cur_ops->call == NULL || cur_ops->cb_barrier == NULL) { - printk(KERN_ALERT "%s" TORTURE_FLAG - " Call or barrier ops missing for %s,\n", - torture_type, cur_ops->name); - printk(KERN_ALERT "%s" TORTURE_FLAG - " RCU barrier testing omitted from run.\n", - torture_type); + pr_alert("%s" TORTURE_FLAG + " Call or barrier ops missing for %s,\n", + torture_type, cur_ops->name); + pr_alert("%s" TORTURE_FLAG + " RCU barrier testing omitted from run.\n", + torture_type); return 0; } atomic_set(&barrier_cbs_count, 0); @@ -1814,7 +1847,7 @@ rcu_torture_cleanup(void) mutex_lock(&fullstop_mutex); rcutorture_record_test_transition(); if (fullstop == FULLSTOP_SHUTDOWN) { - printk(KERN_WARNING /* but going down anyway, so... */ + pr_warn(/* but going down anyway, so... */ "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); mutex_unlock(&fullstop_mutex); schedule_timeout_uninterruptible(10); @@ -1938,17 +1971,17 @@ rcu_torture_init(void) break; } if (i == ARRAY_SIZE(torture_ops)) { - printk(KERN_ALERT "rcu-torture: invalid torture type: \"%s\"\n", - torture_type); - printk(KERN_ALERT "rcu-torture types:"); + pr_alert("rcu-torture: invalid torture type: \"%s\"\n", + torture_type); + pr_alert("rcu-torture types:"); for (i = 0; i < ARRAY_SIZE(torture_ops); i++) - printk(KERN_ALERT " %s", torture_ops[i]->name); - printk(KERN_ALERT "\n"); + pr_alert(" %s", torture_ops[i]->name); + pr_alert("\n"); mutex_unlock(&fullstop_mutex); return -EINVAL; } if (cur_ops->fqs == NULL && fqs_duration != 0) { - printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n"); + pr_alert("rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n"); fqs_duration = 0; } if (cur_ops->init) @@ -1996,14 +2029,15 @@ rcu_torture_init(void) /* Start up the kthreads. */ VERBOSE_PRINTK_STRING("Creating rcu_torture_writer task"); - writer_task = kthread_run(rcu_torture_writer, NULL, - "rcu_torture_writer"); + writer_task = kthread_create(rcu_torture_writer, NULL, + "rcu_torture_writer"); if (IS_ERR(writer_task)) { firsterr = PTR_ERR(writer_task); VERBOSE_PRINTK_ERRSTRING("Failed to create writer"); writer_task = NULL; goto unwind; } + wake_up_process(writer_task); fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]), GFP_KERNEL); if (fakewriter_tasks == NULL) { @@ -2118,14 +2152,15 @@ rcu_torture_init(void) } if (shutdown_secs > 0) { shutdown_time = jiffies + shutdown_secs * HZ; - shutdown_task = kthread_run(rcu_torture_shutdown, NULL, - "rcu_torture_shutdown"); + shutdown_task = kthread_create(rcu_torture_shutdown, NULL, + "rcu_torture_shutdown"); if (IS_ERR(shutdown_task)) { firsterr = PTR_ERR(shutdown_task); VERBOSE_PRINTK_ERRSTRING("Failed to create shutdown"); shutdown_task = NULL; goto unwind; } + wake_up_process(shutdown_task); } i = rcu_torture_onoff_init(); if (i != 0) { diff --git a/kernel/rcutree.c b/kernel/rcutree.c index f280e542e3e..4fb2376ddf0 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -52,6 +52,7 @@ #include <linux/prefetch.h> #include <linux/delay.h> #include <linux/stop_machine.h> +#include <linux/random.h> #include "rcutree.h" #include <trace/events/rcu.h> @@ -61,6 +62,7 @@ /* Data structures. */ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; +static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; #define RCU_STATE_INITIALIZER(sname, cr) { \ .level = { &sname##_state.node[0] }, \ @@ -72,7 +74,6 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; .orphan_nxttail = &sname##_state.orphan_nxtlist, \ .orphan_donetail = &sname##_state.orphan_donelist, \ .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ - .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.fqslock), \ .name = #sname, \ } @@ -88,7 +89,7 @@ LIST_HEAD(rcu_struct_flavors); /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; -module_param(rcu_fanout_leaf, int, 0); +module_param(rcu_fanout_leaf, int, 0444); int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */ NUM_RCU_LVL_0, @@ -133,13 +134,12 @@ static int rcu_scheduler_fully_active __read_mostly; */ static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); -DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); DEFINE_PER_CPU(char, rcu_cpu_has_work); #endif /* #ifdef CONFIG_RCU_BOOST */ -static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); +static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); static void invoke_rcu_core(void); static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); @@ -175,8 +175,6 @@ void rcu_sched_qs(int cpu) { struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); - rdp->passed_quiesce_gpnum = rdp->gpnum; - barrier(); if (rdp->passed_quiesce == 0) trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs"); rdp->passed_quiesce = 1; @@ -186,8 +184,6 @@ void rcu_bh_qs(int cpu) { struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); - rdp->passed_quiesce_gpnum = rdp->gpnum; - barrier(); if (rdp->passed_quiesce == 0) trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs"); rdp->passed_quiesce = 1; @@ -210,15 +206,18 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch); DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, .dynticks = ATOMIC_INIT(1), +#if defined(CONFIG_RCU_USER_QS) && !defined(CONFIG_RCU_USER_QS_FORCE) + .ignore_user_qs = true, +#endif }; static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ static int qhimark = 10000; /* If this many pending, ignore blimit. */ static int qlowmark = 100; /* Once only this many pending, use blimit. */ -module_param(blimit, int, 0); -module_param(qhimark, int, 0); -module_param(qlowmark, int, 0); +module_param(blimit, int, 0444); +module_param(qhimark, int, 0444); +module_param(qlowmark, int, 0444); int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; @@ -226,7 +225,14 @@ int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; module_param(rcu_cpu_stall_suppress, int, 0644); module_param(rcu_cpu_stall_timeout, int, 0644); -static void force_quiescent_state(struct rcu_state *rsp, int relaxed); +static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS; +static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS; + +module_param(jiffies_till_first_fqs, ulong, 0644); +module_param(jiffies_till_next_fqs, ulong, 0644); + +static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)); +static void force_quiescent_state(struct rcu_state *rsp); static int rcu_pending(int cpu); /* @@ -252,7 +258,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); */ void rcu_bh_force_quiescent_state(void) { - force_quiescent_state(&rcu_bh_state, 0); + force_quiescent_state(&rcu_bh_state); } EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); @@ -286,7 +292,7 @@ EXPORT_SYMBOL_GPL(rcutorture_record_progress); */ void rcu_sched_force_quiescent_state(void) { - force_quiescent_state(&rcu_sched_state, 0); + force_quiescent_state(&rcu_sched_state); } EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); @@ -305,7 +311,9 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) static int cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) { - return *rdp->nxttail[RCU_DONE_TAIL] && !rcu_gp_in_progress(rsp); + return *rdp->nxttail[RCU_DONE_TAIL + + ACCESS_ONCE(rsp->completed) != rdp->completed] && + !rcu_gp_in_progress(rsp); } /* @@ -317,45 +325,17 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) } /* - * If the specified CPU is offline, tell the caller that it is in - * a quiescent state. Otherwise, whack it with a reschedule IPI. - * Grace periods can end up waiting on an offline CPU when that - * CPU is in the process of coming online -- it will be added to the - * rcu_node bitmasks before it actually makes it online. The same thing - * can happen while a CPU is in the process of coming online. Because this - * race is quite rare, we check for it after detecting that the grace - * period has been delayed rather than checking each and every CPU - * each and every time we start a new grace period. - */ -static int rcu_implicit_offline_qs(struct rcu_data *rdp) -{ - /* - * If the CPU is offline for more than a jiffy, it is in a quiescent - * state. We can trust its state not to change because interrupts - * are disabled. The reason for the jiffy's worth of slack is to - * handle CPUs initializing on the way up and finding their way - * to the idle loop on the way down. - */ - if (cpu_is_offline(rdp->cpu) && - ULONG_CMP_LT(rdp->rsp->gp_start + 2, jiffies)) { - trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); - rdp->offline_fqs++; - return 1; - } - return 0; -} - -/* - * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle + * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state * * If the new value of the ->dynticks_nesting counter now is zero, * we really have entered idle, and must do the appropriate accounting. * The caller must have disabled interrupts. */ -static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) +static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, + bool user) { trace_rcu_dyntick("Start", oldval, 0); - if (!is_idle_task(current)) { + if (!user && !is_idle_task(current)) { struct task_struct *idle = idle_task(smp_processor_id()); trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); @@ -372,7 +352,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); /* - * The idle task is not permitted to enter the idle loop while + * It is illegal to enter an extended quiescent state while * in an RCU read-side critical section. */ rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), @@ -383,6 +363,25 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) "Illegal idle entry in RCU-sched read-side critical section."); } +/* + * Enter an RCU extended quiescent state, which can be either the + * idle loop or adaptive-tickless usermode execution. + */ +static void rcu_eqs_enter(bool user) +{ + long long oldval; + struct rcu_dynticks *rdtp; + + rdtp = &__get_cpu_var(rcu_dynticks); + oldval = rdtp->dynticks_nesting; + WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); + if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) + rdtp->dynticks_nesting = 0; + else + rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; + rcu_eqs_enter_common(rdtp, oldval, user); +} + /** * rcu_idle_enter - inform RCU that current CPU is entering idle * @@ -398,21 +397,70 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) void rcu_idle_enter(void) { unsigned long flags; - long long oldval; + + local_irq_save(flags); + rcu_eqs_enter(false); + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(rcu_idle_enter); + +#ifdef CONFIG_RCU_USER_QS +/** + * rcu_user_enter - inform RCU that we are resuming userspace. + * + * Enter RCU idle mode right before resuming userspace. No use of RCU + * is permitted between this call and rcu_user_exit(). This way the + * CPU doesn't need to maintain the tick for RCU maintenance purposes + * when the CPU runs in userspace. + */ +void rcu_user_enter(void) +{ + unsigned long flags; struct rcu_dynticks *rdtp; + /* + * Some contexts may involve an exception occuring in an irq, + * leading to that nesting: + * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() + * This would mess up the dyntick_nesting count though. And rcu_irq_*() + * helpers are enough to protect RCU uses inside the exception. So + * just return immediately if we detect we are in an IRQ. + */ + if (in_interrupt()) + return; + + WARN_ON_ONCE(!current->mm); + local_irq_save(flags); rdtp = &__get_cpu_var(rcu_dynticks); - oldval = rdtp->dynticks_nesting; - WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); - if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) - rdtp->dynticks_nesting = 0; - else - rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; - rcu_idle_enter_common(rdtp, oldval); + if (!rdtp->ignore_user_qs && !rdtp->in_user) { + rdtp->in_user = true; + rcu_eqs_enter(true); + } local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(rcu_idle_enter); + +/** + * rcu_user_enter_after_irq - inform RCU that we are going to resume userspace + * after the current irq returns. + * + * This is similar to rcu_user_enter() but in the context of a non-nesting + * irq. After this call, RCU enters into idle mode when the interrupt + * returns. + */ +void rcu_user_enter_after_irq(void) +{ + unsigned long flags; + struct rcu_dynticks *rdtp; + + local_irq_save(flags); + rdtp = &__get_cpu_var(rcu_dynticks); + /* Ensure this irq is interrupting a non-idle RCU state. */ + WARN_ON_ONCE(!(rdtp->dynticks_nesting & DYNTICK_TASK_MASK)); + rdtp->dynticks_nesting = 1; + local_irq_restore(flags); +} +#endif /* CONFIG_RCU_USER_QS */ /** * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle @@ -444,18 +492,19 @@ void rcu_irq_exit(void) if (rdtp->dynticks_nesting) trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting); else - rcu_idle_enter_common(rdtp, oldval); + rcu_eqs_enter_common(rdtp, oldval, true); local_irq_restore(flags); } /* - * rcu_idle_exit_common - inform RCU that current CPU is moving away from idle + * rcu_eqs_exit_common - current CPU moving away from extended quiescent state * * If the new value of the ->dynticks_nesting counter was previously zero, * we really have exited idle, and must do the appropriate accounting. * The caller must have disabled interrupts. */ -static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) +static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval, + int user) { smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ atomic_inc(&rdtp->dynticks); @@ -464,7 +513,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); rcu_cleanup_after_idle(smp_processor_id()); trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); - if (!is_idle_task(current)) { + if (!user && !is_idle_task(current)) { struct task_struct *idle = idle_task(smp_processor_id()); trace_rcu_dyntick("Error on exit: not idle task", @@ -476,6 +525,25 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) } } +/* + * Exit an RCU extended quiescent state, which can be either the + * idle loop or adaptive-tickless usermode execution. + */ +static void rcu_eqs_exit(bool user) +{ + struct rcu_dynticks *rdtp; + long long oldval; + + rdtp = &__get_cpu_var(rcu_dynticks); + oldval = rdtp->dynticks_nesting; + WARN_ON_ONCE(oldval < 0); + if (oldval & DYNTICK_TASK_NEST_MASK) + rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; + else + rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; + rcu_eqs_exit_common(rdtp, oldval, user); +} + /** * rcu_idle_exit - inform RCU that current CPU is leaving idle * @@ -490,21 +558,67 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) void rcu_idle_exit(void) { unsigned long flags; + + local_irq_save(flags); + rcu_eqs_exit(false); + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(rcu_idle_exit); + +#ifdef CONFIG_RCU_USER_QS +/** + * rcu_user_exit - inform RCU that we are exiting userspace. + * + * Exit RCU idle mode while entering the kernel because it can + * run a RCU read side critical section anytime. + */ +void rcu_user_exit(void) +{ + unsigned long flags; struct rcu_dynticks *rdtp; - long long oldval; + + /* + * Some contexts may involve an exception occuring in an irq, + * leading to that nesting: + * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() + * This would mess up the dyntick_nesting count though. And rcu_irq_*() + * helpers are enough to protect RCU uses inside the exception. So + * just return immediately if we detect we are in an IRQ. + */ + if (in_interrupt()) + return; local_irq_save(flags); rdtp = &__get_cpu_var(rcu_dynticks); - oldval = rdtp->dynticks_nesting; - WARN_ON_ONCE(oldval < 0); - if (oldval & DYNTICK_TASK_NEST_MASK) - rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; - else - rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; - rcu_idle_exit_common(rdtp, oldval); + if (rdtp->in_user) { + rdtp->in_user = false; + rcu_eqs_exit(true); + } local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(rcu_idle_exit); + +/** + * rcu_user_exit_after_irq - inform RCU that we won't resume to userspace + * idle mode after the current non-nesting irq returns. + * + * This is similar to rcu_user_exit() but in the context of an irq. + * This is called when the irq has interrupted a userspace RCU idle mode + * context. When the current non-nesting interrupt returns after this call, + * the CPU won't restore the RCU idle mode. + */ +void rcu_user_exit_after_irq(void) +{ + unsigned long flags; + struct rcu_dynticks *rdtp; + + local_irq_save(flags); + rdtp = &__get_cpu_var(rcu_dynticks); + /* Ensure we are interrupting an RCU idle mode. */ + WARN_ON_ONCE(rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK); + rdtp->dynticks_nesting += DYNTICK_TASK_EXIT_IDLE; + local_irq_restore(flags); +} +#endif /* CONFIG_RCU_USER_QS */ /** * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle @@ -539,7 +653,7 @@ void rcu_irq_enter(void) if (oldval) trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting); else - rcu_idle_exit_common(rdtp, oldval); + rcu_eqs_exit_common(rdtp, oldval, true); local_irq_restore(flags); } @@ -603,6 +717,21 @@ int rcu_is_cpu_idle(void) } EXPORT_SYMBOL(rcu_is_cpu_idle); +#ifdef CONFIG_RCU_USER_QS +void rcu_user_hooks_switch(struct task_struct *prev, + struct task_struct *next) +{ + struct rcu_dynticks *rdtp; + + /* Interrupts are disabled in context switch */ + rdtp = &__get_cpu_var(rcu_dynticks); + if (!rdtp->ignore_user_qs) { + clear_tsk_thread_flag(prev, TIF_NOHZ); + set_tsk_thread_flag(next, TIF_NOHZ); + } +} +#endif /* #ifdef CONFIG_RCU_USER_QS */ + #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) /* @@ -673,7 +802,7 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) * Return true if the specified CPU has passed through a quiescent * state by virtue of being in or having passed through an dynticks * idle state since the last call to dyntick_save_progress_counter() - * for this same CPU. + * for this same CPU, or by virtue of having been offline. */ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) { @@ -697,8 +826,26 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) return 1; } - /* Go check for the CPU being offline. */ - return rcu_implicit_offline_qs(rdp); + /* + * Check for the CPU being offline, but only if the grace period + * is old enough. We don't need to worry about the CPU changing + * state: If we see it offline even once, it has been through a + * quiescent state. + * + * The reason for insisting that the grace period be at least + * one jiffy old is that CPUs that are not quite online and that + * have just gone offline can still execute RCU read-side critical + * sections. + */ + if (ULONG_CMP_GE(rdp->rsp->gp_start + 2, jiffies)) + return 0; /* Grace period is not old enough. */ + barrier(); + if (cpu_is_offline(rdp->cpu)) { + trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); + rdp->offline_fqs++; + return 1; + } + return 0; } static int jiffies_till_stall_check(void) @@ -755,14 +902,15 @@ static void print_other_cpu_stall(struct rcu_state *rsp) rcu_for_each_leaf_node(rsp, rnp) { raw_spin_lock_irqsave(&rnp->lock, flags); ndetected += rcu_print_task_stall(rnp); + if (rnp->qsmask != 0) { + for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) + if (rnp->qsmask & (1UL << cpu)) { + print_cpu_stall_info(rsp, + rnp->grplo + cpu); + ndetected++; + } + } raw_spin_unlock_irqrestore(&rnp->lock, flags); - if (rnp->qsmask == 0) - continue; - for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) - if (rnp->qsmask & (1UL << cpu)) { - print_cpu_stall_info(rsp, rnp->grplo + cpu); - ndetected++; - } } /* @@ -782,11 +930,11 @@ static void print_other_cpu_stall(struct rcu_state *rsp) else if (!trigger_all_cpu_backtrace()) dump_stack(); - /* If so configured, complain about tasks blocking the grace period. */ + /* Complain about tasks blocking the grace period. */ rcu_print_detail_task_stall(rsp); - force_quiescent_state(rsp, 0); /* Kick them all. */ + force_quiescent_state(rsp); /* Kick them all. */ } static void print_cpu_stall(struct rcu_state *rsp) @@ -827,7 +975,8 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) j = ACCESS_ONCE(jiffies); js = ACCESS_ONCE(rsp->jiffies_stall); rnp = rdp->mynode; - if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { + if (rcu_gp_in_progress(rsp) && + (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { /* We haven't checked in, so go dump stack. */ print_cpu_stall(rsp); @@ -889,12 +1038,8 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct */ rdp->gpnum = rnp->gpnum; trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart"); - if (rnp->qsmask & rdp->grpmask) { - rdp->qs_pending = 1; - rdp->passed_quiesce = 0; - } else { - rdp->qs_pending = 0; - } + rdp->passed_quiesce = 0; + rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); zero_cpu_stall_ticks(rdp); } } @@ -974,10 +1119,13 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat * our behalf. Catch up with this state to avoid noting * spurious new grace periods. If another grace period * has started, then rnp->gpnum will have advanced, so - * we will detect this later on. + * we will detect this later on. Of course, any quiescent + * states we found for the old GP are now invalid. */ - if (ULONG_CMP_LT(rdp->gpnum, rdp->completed)) + if (ULONG_CMP_LT(rdp->gpnum, rdp->completed)) { rdp->gpnum = rdp->completed; + rdp->passed_quiesce = 0; + } /* * If RCU does not need a quiescent state from this CPU, @@ -1021,97 +1169,56 @@ rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat /* Prior grace period ended, so advance callbacks for current CPU. */ __rcu_process_gp_end(rsp, rnp, rdp); - /* - * Because this CPU just now started the new grace period, we know - * that all of its callbacks will be covered by this upcoming grace - * period, even the ones that were registered arbitrarily recently. - * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL. - * - * Other CPUs cannot be sure exactly when the grace period started. - * Therefore, their recently registered callbacks must pass through - * an additional RCU_NEXT_READY stage, so that they will be handled - * by the next RCU grace period. - */ - rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; - rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; - /* Set state so that this CPU will detect the next quiescent state. */ __note_new_gpnum(rsp, rnp, rdp); } /* - * Start a new RCU grace period if warranted, re-initializing the hierarchy - * in preparation for detecting the next grace period. The caller must hold - * the root node's ->lock, which is released before return. Hard irqs must - * be disabled. - * - * Note that it is legal for a dying CPU (which is marked as offline) to - * invoke this function. This can happen when the dying CPU reports its - * quiescent state. + * Initialize a new grace period. */ -static void -rcu_start_gp(struct rcu_state *rsp, unsigned long flags) - __releases(rcu_get_root(rsp)->lock) +static int rcu_gp_init(struct rcu_state *rsp) { - struct rcu_data *rdp = this_cpu_ptr(rsp->rda); + struct rcu_data *rdp; struct rcu_node *rnp = rcu_get_root(rsp); - if (!rcu_scheduler_fully_active || - !cpu_needs_another_gp(rsp, rdp)) { - /* - * Either the scheduler hasn't yet spawned the first - * non-idle task or this CPU does not need another - * grace period. Either way, don't start a new grace - * period. - */ - raw_spin_unlock_irqrestore(&rnp->lock, flags); - return; - } + raw_spin_lock_irq(&rnp->lock); + rsp->gp_flags = 0; /* Clear all flags: New grace period. */ - if (rsp->fqs_active) { - /* - * This CPU needs a grace period, but force_quiescent_state() - * is running. Tell it to start one on this CPU's behalf. - */ - rsp->fqs_need_gp = 1; - raw_spin_unlock_irqrestore(&rnp->lock, flags); - return; + if (rcu_gp_in_progress(rsp)) { + /* Grace period already in progress, don't start another. */ + raw_spin_unlock_irq(&rnp->lock); + return 0; } /* Advance to a new grace period and initialize state. */ rsp->gpnum++; trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); - WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT); - rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */ - rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; record_gp_stall_check_time(rsp); - raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */ + raw_spin_unlock_irq(&rnp->lock); /* Exclude any concurrent CPU-hotplug operations. */ - raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ + get_online_cpus(); /* * Set the quiescent-state-needed bits in all the rcu_node - * structures for all currently online CPUs in breadth-first - * order, starting from the root rcu_node structure. This - * operation relies on the layout of the hierarchy within the - * rsp->node[] array. Note that other CPUs will access only - * the leaves of the hierarchy, which still indicate that no + * structures for all currently online CPUs in breadth-first order, + * starting from the root rcu_node structure, relying on the layout + * of the tree within the rsp->node[] array. Note that other CPUs + * will access only the leaves of the hierarchy, thus seeing that no * grace period is in progress, at least until the corresponding * leaf node has been initialized. In addition, we have excluded * CPU-hotplug operations. * - * Note that the grace period cannot complete until we finish - * the initialization process, as there will be at least one - * qsmask bit set in the root node until that time, namely the - * one corresponding to this CPU, due to the fact that we have - * irqs disabled. + * The grace period cannot complete until the initialization + * process finishes, because this kthread handles both. */ rcu_for_each_node_breadth_first(rsp, rnp) { - raw_spin_lock(&rnp->lock); /* irqs already disabled. */ + raw_spin_lock_irq(&rnp->lock); + rdp = this_cpu_ptr(rsp->rda); rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; rnp->gpnum = rsp->gpnum; + WARN_ON_ONCE(rnp->completed != rsp->completed); rnp->completed = rsp->completed; if (rnp == rdp->mynode) rcu_start_gp_per_cpu(rsp, rnp, rdp); @@ -1119,37 +1226,54 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) trace_rcu_grace_period_init(rsp->name, rnp->gpnum, rnp->level, rnp->grplo, rnp->grphi, rnp->qsmask); - raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock_irq(&rnp->lock); +#ifdef CONFIG_PROVE_RCU_DELAY + if ((random32() % (rcu_num_nodes * 8)) == 0) + schedule_timeout_uninterruptible(2); +#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ + cond_resched(); } - rnp = rcu_get_root(rsp); - raw_spin_lock(&rnp->lock); /* irqs already disabled. */ - rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ - raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ - raw_spin_unlock_irqrestore(&rsp->onofflock, flags); + put_online_cpus(); + return 1; } /* - * Report a full set of quiescent states to the specified rcu_state - * data structure. This involves cleaning up after the prior grace - * period and letting rcu_start_gp() start up the next grace period - * if one is needed. Note that the caller must hold rnp->lock, as - * required by rcu_start_gp(), which will release it. + * Do one round of quiescent-state forcing. */ -static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) - __releases(rcu_get_root(rsp)->lock) +int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) { - unsigned long gp_duration; + int fqs_state = fqs_state_in; struct rcu_node *rnp = rcu_get_root(rsp); - struct rcu_data *rdp = this_cpu_ptr(rsp->rda); - WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); + rsp->n_force_qs++; + if (fqs_state == RCU_SAVE_DYNTICK) { + /* Collect dyntick-idle snapshots. */ + force_qs_rnp(rsp, dyntick_save_progress_counter); + fqs_state = RCU_FORCE_QS; + } else { + /* Handle dyntick-idle and offline CPUs. */ + force_qs_rnp(rsp, rcu_implicit_dynticks_qs); + } + /* Clear flag to prevent immediate re-entry. */ + if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { + raw_spin_lock_irq(&rnp->lock); + rsp->gp_flags &= ~RCU_GP_FLAG_FQS; + raw_spin_unlock_irq(&rnp->lock); + } + return fqs_state; +} - /* - * Ensure that all grace-period and pre-grace-period activity - * is seen before the assignment to rsp->completed. - */ - smp_mb(); /* See above block comment. */ +/* + * Clean up after the old grace period. + */ +static void rcu_gp_cleanup(struct rcu_state *rsp) +{ + unsigned long gp_duration; + struct rcu_data *rdp; + struct rcu_node *rnp = rcu_get_root(rsp); + + raw_spin_lock_irq(&rnp->lock); gp_duration = jiffies - rsp->gp_start; if (gp_duration > rsp->gp_max) rsp->gp_max = gp_duration; @@ -1161,35 +1285,149 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) * they can do to advance the grace period. It is therefore * safe for us to drop the lock in order to mark the grace * period as completed in all of the rcu_node structures. - * - * But if this CPU needs another grace period, it will take - * care of this while initializing the next grace period. - * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL - * because the callbacks have not yet been advanced: Those - * callbacks are waiting on the grace period that just now - * completed. */ - if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { - raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock_irq(&rnp->lock); - /* - * Propagate new ->completed value to rcu_node structures - * so that other CPUs don't have to wait until the start - * of the next grace period to process their callbacks. - */ - rcu_for_each_node_breadth_first(rsp, rnp) { - raw_spin_lock(&rnp->lock); /* irqs already disabled. */ - rnp->completed = rsp->gpnum; - raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ - } - rnp = rcu_get_root(rsp); - raw_spin_lock(&rnp->lock); /* irqs already disabled. */ + /* + * Propagate new ->completed value to rcu_node structures so + * that other CPUs don't have to wait until the start of the next + * grace period to process their callbacks. This also avoids + * some nasty RCU grace-period initialization races by forcing + * the end of the current grace period to be completely recorded in + * all of the rcu_node structures before the beginning of the next + * grace period is recorded in any of the rcu_node structures. + */ + rcu_for_each_node_breadth_first(rsp, rnp) { + raw_spin_lock_irq(&rnp->lock); + rnp->completed = rsp->gpnum; + raw_spin_unlock_irq(&rnp->lock); + cond_resched(); } + rnp = rcu_get_root(rsp); + raw_spin_lock_irq(&rnp->lock); - rsp->completed = rsp->gpnum; /* Declare the grace period complete. */ + rsp->completed = rsp->gpnum; /* Declare grace period done. */ trace_rcu_grace_period(rsp->name, rsp->completed, "end"); rsp->fqs_state = RCU_GP_IDLE; - rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ + rdp = this_cpu_ptr(rsp->rda); + if (cpu_needs_another_gp(rsp, rdp)) + rsp->gp_flags = 1; + raw_spin_unlock_irq(&rnp->lock); +} + +/* + * Body of kthread that handles grace periods. + */ +static int __noreturn rcu_gp_kthread(void *arg) +{ + int fqs_state; + unsigned long j; + int ret; + struct rcu_state *rsp = arg; + struct rcu_node *rnp = rcu_get_root(rsp); + + for (;;) { + + /* Handle grace-period start. */ + for (;;) { + wait_event_interruptible(rsp->gp_wq, + rsp->gp_flags & + RCU_GP_FLAG_INIT); + if ((rsp->gp_flags & RCU_GP_FLAG_INIT) && + rcu_gp_init(rsp)) + break; + cond_resched(); + flush_signals(current); + } + + /* Handle quiescent-state forcing. */ + fqs_state = RCU_SAVE_DYNTICK; + j = jiffies_till_first_fqs; + if (j > HZ) { + j = HZ; + jiffies_till_first_fqs = HZ; + } + for (;;) { + rsp->jiffies_force_qs = jiffies + j; + ret = wait_event_interruptible_timeout(rsp->gp_wq, + (rsp->gp_flags & RCU_GP_FLAG_FQS) || + (!ACCESS_ONCE(rnp->qsmask) && + !rcu_preempt_blocked_readers_cgp(rnp)), + j); + /* If grace period done, leave loop. */ + if (!ACCESS_ONCE(rnp->qsmask) && + !rcu_preempt_blocked_readers_cgp(rnp)) + break; + /* If time for quiescent-state forcing, do it. */ + if (ret == 0 || (rsp->gp_flags & RCU_GP_FLAG_FQS)) { + fqs_state = rcu_gp_fqs(rsp, fqs_state); + cond_resched(); + } else { + /* Deal with stray signal. */ + cond_resched(); + flush_signals(current); + } + j = jiffies_till_next_fqs; + if (j > HZ) { + j = HZ; + jiffies_till_next_fqs = HZ; + } else if (j < 1) { + j = 1; + jiffies_till_next_fqs = 1; + } + } + + /* Handle grace-period end. */ + rcu_gp_cleanup(rsp); + } +} + +/* + * Start a new RCU grace period if warranted, re-initializing the hierarchy + * in preparation for detecting the next grace period. The caller must hold + * the root node's ->lock, which is released before return. Hard irqs must + * be disabled. + * + * Note that it is legal for a dying CPU (which is marked as offline) to + * invoke this function. This can happen when the dying CPU reports its + * quiescent state. + */ +static void +rcu_start_gp(struct rcu_state *rsp, unsigned long flags) + __releases(rcu_get_root(rsp)->lock) +{ + struct rcu_data *rdp = this_cpu_ptr(rsp->rda); + struct rcu_node *rnp = rcu_get_root(rsp); + + if (!rsp->gp_kthread || + !cpu_needs_another_gp(rsp, rdp)) { + /* + * Either we have not yet spawned the grace-period + * task or this CPU does not need another grace period. + * Either way, don't start a new grace period. + */ + raw_spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + + rsp->gp_flags = RCU_GP_FLAG_INIT; + raw_spin_unlock_irqrestore(&rnp->lock, flags); + wake_up(&rsp->gp_wq); +} + +/* + * Report a full set of quiescent states to the specified rcu_state + * data structure. This involves cleaning up after the prior grace + * period and letting rcu_start_gp() start up the next grace period + * if one is needed. Note that the caller must hold rnp->lock, as + * required by rcu_start_gp(), which will release it. + */ +static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) + __releases(rcu_get_root(rsp)->lock) +{ + WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); + raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); + wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ } /* @@ -1258,7 +1496,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, * based on quiescent states detected in an earlier grace period! */ static void -rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastgp) +rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) { unsigned long flags; unsigned long mask; @@ -1266,7 +1504,8 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long las rnp = rdp->mynode; raw_spin_lock_irqsave(&rnp->lock, flags); - if (lastgp != rnp->gpnum || rnp->completed == rnp->gpnum) { + if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum || + rnp->completed == rnp->gpnum) { /* * The grace period in which this quiescent state was @@ -1325,7 +1564,7 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) * Tell RCU we are done (but rcu_report_qs_rdp() will be the * judge of that). */ - rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesce_gpnum); + rcu_report_qs_rdp(rdp->cpu, rsp, rdp); } #ifdef CONFIG_HOTPLUG_CPU @@ -1390,17 +1629,6 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) int i; struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); - /* - * If there is an rcu_barrier() operation in progress, then - * only the task doing that operation is permitted to adopt - * callbacks. To do otherwise breaks rcu_barrier() and friends - * by causing them to fail to wait for the callbacks in the - * orphanage. - */ - if (rsp->rcu_barrier_in_progress && - rsp->rcu_barrier_in_progress != current) - return; - /* Do the accounting first. */ rdp->qlen_lazy += rsp->qlen_lazy; rdp->qlen += rsp->qlen; @@ -1455,9 +1683,8 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) * The CPU has been completely removed, and some other CPU is reporting * this fact from process context. Do the remainder of the cleanup, * including orphaning the outgoing CPU's RCU callbacks, and also - * adopting them, if there is no _rcu_barrier() instance running. - * There can only be one CPU hotplug operation at a time, so no other - * CPU can be attempting to update rcu_cpu_kthread_task. + * adopting them. There can only be one CPU hotplug operation at a time, + * so no other CPU can be attempting to update rcu_cpu_kthread_task. */ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) { @@ -1468,8 +1695,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ /* Adjust any no-longer-needed kthreads. */ - rcu_stop_cpu_kthread(cpu); - rcu_node_kthread_setaffinity(rnp, -1); + rcu_boost_kthread_setaffinity(rnp, -1); /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */ @@ -1515,14 +1741,13 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", cpu, rdp->qlen, rdp->nxtlist); + init_callback_list(rdp); + /* Disallow further callbacks on this CPU. */ + rdp->nxttail[RCU_NEXT_TAIL] = NULL; } #else /* #ifdef CONFIG_HOTPLUG_CPU */ -static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) -{ -} - static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) { } @@ -1687,6 +1912,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) struct rcu_node *rnp; rcu_for_each_leaf_node(rsp, rnp) { + cond_resched(); mask = 0; raw_spin_lock_irqsave(&rnp->lock, flags); if (!rcu_gp_in_progress(rsp)) { @@ -1723,72 +1949,39 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) * Force quiescent states on reluctant CPUs, and also detect which * CPUs are in dyntick-idle mode. */ -static void force_quiescent_state(struct rcu_state *rsp, int relaxed) +static void force_quiescent_state(struct rcu_state *rsp) { unsigned long flags; - struct rcu_node *rnp = rcu_get_root(rsp); - - trace_rcu_utilization("Start fqs"); - if (!rcu_gp_in_progress(rsp)) { - trace_rcu_utilization("End fqs"); - return; /* No grace period in progress, nothing to force. */ - } - if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { - rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ - trace_rcu_utilization("End fqs"); - return; /* Someone else is already on the job. */ - } - if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) - goto unlock_fqs_ret; /* no emergency and done recently. */ - rsp->n_force_qs++; - raw_spin_lock(&rnp->lock); /* irqs already disabled */ - rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; - if(!rcu_gp_in_progress(rsp)) { - rsp->n_force_qs_ngp++; - raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ - goto unlock_fqs_ret; /* no GP in progress, time updated. */ - } - rsp->fqs_active = 1; - switch (rsp->fqs_state) { - case RCU_GP_IDLE: - case RCU_GP_INIT: - - break; /* grace period idle or initializing, ignore. */ - - case RCU_SAVE_DYNTICK: - - raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ - - /* Record dyntick-idle state. */ - force_qs_rnp(rsp, dyntick_save_progress_counter); - raw_spin_lock(&rnp->lock); /* irqs already disabled */ - if (rcu_gp_in_progress(rsp)) - rsp->fqs_state = RCU_FORCE_QS; - break; - - case RCU_FORCE_QS: - - /* Check dyntick-idle state, send IPI to laggarts. */ - raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ - force_qs_rnp(rsp, rcu_implicit_dynticks_qs); - - /* Leave state in case more forcing is required. */ - - raw_spin_lock(&rnp->lock); /* irqs already disabled */ - break; + bool ret; + struct rcu_node *rnp; + struct rcu_node *rnp_old = NULL; + + /* Funnel through hierarchy to reduce memory contention. */ + rnp = per_cpu_ptr(rsp->rda, raw_smp_processor_id())->mynode; + for (; rnp != NULL; rnp = rnp->parent) { + ret = (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) || + !raw_spin_trylock(&rnp->fqslock); + if (rnp_old != NULL) + raw_spin_unlock(&rnp_old->fqslock); + if (ret) { + rsp->n_force_qs_lh++; + return; + } + rnp_old = rnp; } - rsp->fqs_active = 0; - if (rsp->fqs_need_gp) { - raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ - rsp->fqs_need_gp = 0; - rcu_start_gp(rsp, flags); /* releases rnp->lock */ - trace_rcu_utilization("End fqs"); - return; + /* rnp_old == rcu_get_root(rsp), rnp == NULL. */ + + /* Reached the root of the rcu_node tree, acquire lock. */ + raw_spin_lock_irqsave(&rnp_old->lock, flags); + raw_spin_unlock(&rnp_old->fqslock); + if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { + rsp->n_force_qs_lh++; + raw_spin_unlock_irqrestore(&rnp_old->lock, flags); + return; /* Someone beat us to it. */ } - raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ -unlock_fqs_ret: - raw_spin_unlock_irqrestore(&rsp->fqslock, flags); - trace_rcu_utilization("End fqs"); + rsp->gp_flags |= RCU_GP_FLAG_FQS; + raw_spin_unlock_irqrestore(&rnp_old->lock, flags); + wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ } /* @@ -1805,13 +1998,6 @@ __rcu_process_callbacks(struct rcu_state *rsp) WARN_ON_ONCE(rdp->beenonline == 0); /* - * If an RCU GP has gone long enough, go check for dyntick - * idle CPUs and, if needed, send resched IPIs. - */ - if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) - force_quiescent_state(rsp, 1); - - /* * Advance callbacks in response to end of earlier grace * period that some other CPU ended. */ @@ -1838,6 +2024,8 @@ static void rcu_process_callbacks(struct softirq_action *unused) { struct rcu_state *rsp; + if (cpu_is_offline(smp_processor_id())) + return; trace_rcu_utilization("Start RCU core"); for_each_rcu_flavor(rsp) __rcu_process_callbacks(rsp); @@ -1909,12 +2097,11 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, rdp->blimit = LONG_MAX; if (rsp->n_force_qs == rdp->n_force_qs_snap && *rdp->nxttail[RCU_DONE_TAIL] != head) - force_quiescent_state(rsp, 0); + force_quiescent_state(rsp); rdp->n_force_qs_snap = rsp->n_force_qs; rdp->qlen_last_fqs_check = rdp->qlen; } - } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) - force_quiescent_state(rsp, 1); + } } static void @@ -1929,8 +2116,6 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), head->func = func; head->next = NULL; - smp_mb(); /* Ensure RCU update seen before callback registry. */ - /* * Opportunistically note grace-period endings and beginnings. * Note that we might see a beginning right after we see an @@ -1941,6 +2126,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), rdp = this_cpu_ptr(rsp->rda); /* Add the callback to our list. */ + if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL)) { + /* _call_rcu() is illegal on offline CPU; leak the callback. */ + WARN_ON_ONCE(1); + local_irq_restore(flags); + return; + } ACCESS_ONCE(rdp->qlen)++; if (lazy) rdp->qlen_lazy++; @@ -2195,17 +2386,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) /* Is the RCU core waiting for a quiescent state from this CPU? */ if (rcu_scheduler_fully_active && rdp->qs_pending && !rdp->passed_quiesce) { - - /* - * If force_quiescent_state() coming soon and this CPU - * needs a quiescent state, and this is either RCU-sched - * or RCU-bh, force a local reschedule. - */ rdp->n_rp_qs_pending++; - if (!rdp->preemptible && - ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, - jiffies)) - set_need_resched(); } else if (rdp->qs_pending && rdp->passed_quiesce) { rdp->n_rp_report_qs++; return 1; @@ -2235,13 +2416,6 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) return 1; } - /* Has an RCU GP gone long enough to send resched IPIs &c? */ - if (rcu_gp_in_progress(rsp) && - ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) { - rdp->n_rp_need_fqs++; - return 1; - } - /* nothing to do */ rdp->n_rp_need_nothing++; return 0; @@ -2326,13 +2500,10 @@ static void rcu_barrier_func(void *type) static void _rcu_barrier(struct rcu_state *rsp) { int cpu; - unsigned long flags; struct rcu_data *rdp; - struct rcu_data rd; unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done); unsigned long snap_done; - init_rcu_head_on_stack(&rd.barrier_head); _rcu_barrier_trace(rsp, "Begin", -1, snap); /* Take mutex to serialize concurrent rcu_barrier() requests. */ @@ -2372,70 +2543,30 @@ static void _rcu_barrier(struct rcu_state *rsp) /* * Initialize the count to one rather than to zero in order to * avoid a too-soon return to zero in case of a short grace period - * (or preemption of this task). Also flag this task as doing - * an rcu_barrier(). This will prevent anyone else from adopting - * orphaned callbacks, which could cause otherwise failure if a - * CPU went offline and quickly came back online. To see this, - * consider the following sequence of events: - * - * 1. We cause CPU 0 to post an rcu_barrier_callback() callback. - * 2. CPU 1 goes offline, orphaning its callbacks. - * 3. CPU 0 adopts CPU 1's orphaned callbacks. - * 4. CPU 1 comes back online. - * 5. We cause CPU 1 to post an rcu_barrier_callback() callback. - * 6. Both rcu_barrier_callback() callbacks are invoked, awakening - * us -- but before CPU 1's orphaned callbacks are invoked!!! + * (or preemption of this task). Exclude CPU-hotplug operations + * to ensure that no offline CPU has callbacks queued. */ init_completion(&rsp->barrier_completion); atomic_set(&rsp->barrier_cpu_count, 1); - raw_spin_lock_irqsave(&rsp->onofflock, flags); - rsp->rcu_barrier_in_progress = current; - raw_spin_unlock_irqrestore(&rsp->onofflock, flags); + get_online_cpus(); /* - * Force every CPU with callbacks to register a new callback - * that will tell us when all the preceding callbacks have - * been invoked. If an offline CPU has callbacks, wait for - * it to either come back online or to finish orphaning those - * callbacks. + * Force each CPU with callbacks to register a new callback. + * When that callback is invoked, we will know that all of the + * corresponding CPU's preceding callbacks have been invoked. */ - for_each_possible_cpu(cpu) { - preempt_disable(); + for_each_online_cpu(cpu) { rdp = per_cpu_ptr(rsp->rda, cpu); - if (cpu_is_offline(cpu)) { - _rcu_barrier_trace(rsp, "Offline", cpu, - rsp->n_barrier_done); - preempt_enable(); - while (cpu_is_offline(cpu) && ACCESS_ONCE(rdp->qlen)) - schedule_timeout_interruptible(1); - } else if (ACCESS_ONCE(rdp->qlen)) { + if (ACCESS_ONCE(rdp->qlen)) { _rcu_barrier_trace(rsp, "OnlineQ", cpu, rsp->n_barrier_done); smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); - preempt_enable(); } else { _rcu_barrier_trace(rsp, "OnlineNQ", cpu, rsp->n_barrier_done); - preempt_enable(); } } - - /* - * Now that all online CPUs have rcu_barrier_callback() callbacks - * posted, we can adopt all of the orphaned callbacks and place - * an rcu_barrier_callback() callback after them. When that is done, - * we are guaranteed to have an rcu_barrier_callback() callback - * following every callback that could possibly have been - * registered before _rcu_barrier() was called. - */ - raw_spin_lock_irqsave(&rsp->onofflock, flags); - rcu_adopt_orphan_cbs(rsp); - rsp->rcu_barrier_in_progress = NULL; - raw_spin_unlock_irqrestore(&rsp->onofflock, flags); - atomic_inc(&rsp->barrier_cpu_count); - smp_mb__after_atomic_inc(); /* Ensure atomic_inc() before callback. */ - rd.rsp = rsp; - rsp->call(&rd.barrier_head, rcu_barrier_callback); + put_online_cpus(); /* * Now that we have an rcu_barrier_callback() callback on each @@ -2456,8 +2587,6 @@ static void _rcu_barrier(struct rcu_state *rsp) /* Other rcu_barrier() invocations can now safely proceed. */ mutex_unlock(&rsp->barrier_mutex); - - destroy_rcu_head_on_stack(&rd.barrier_head); } /** @@ -2497,6 +2626,9 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->dynticks = &per_cpu(rcu_dynticks, cpu); WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); +#ifdef CONFIG_RCU_USER_QS + WARN_ON_ONCE(rdp->dynticks->in_user); +#endif rdp->cpu = cpu; rdp->rsp = rsp; raw_spin_unlock_irqrestore(&rnp->lock, flags); @@ -2523,6 +2655,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = rsp->n_force_qs; rdp->blimit = blimit; + init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; atomic_set(&rdp->dynticks->dynticks, (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); @@ -2555,7 +2688,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->completed = rnp->completed; rdp->passed_quiesce = 0; rdp->qs_pending = 0; - rdp->passed_quiesce_gpnum = rnp->gpnum - 1; trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl"); } raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ @@ -2594,12 +2726,10 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, break; case CPU_ONLINE: case CPU_DOWN_FAILED: - rcu_node_kthread_setaffinity(rnp, -1); - rcu_cpu_kthread_setrt(cpu, 1); + rcu_boost_kthread_setaffinity(rnp, -1); break; case CPU_DOWN_PREPARE: - rcu_node_kthread_setaffinity(rnp, cpu); - rcu_cpu_kthread_setrt(cpu, 0); + rcu_boost_kthread_setaffinity(rnp, cpu); break; case CPU_DYING: case CPU_DYING_FROZEN: @@ -2627,6 +2757,28 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, } /* + * Spawn the kthread that handles this RCU flavor's grace periods. + */ +static int __init rcu_spawn_gp_kthread(void) +{ + unsigned long flags; + struct rcu_node *rnp; + struct rcu_state *rsp; + struct task_struct *t; + + for_each_rcu_flavor(rsp) { + t = kthread_run(rcu_gp_kthread, rsp, rsp->name); + BUG_ON(IS_ERR(t)); + rnp = rcu_get_root(rsp); + raw_spin_lock_irqsave(&rnp->lock, flags); + rsp->gp_kthread = t; + raw_spin_unlock_irqrestore(&rnp->lock, flags); + } + return 0; +} +early_initcall(rcu_spawn_gp_kthread); + +/* * This function is invoked towards the end of the scheduler's initialization * process. Before this is called, the idle task might contain * RCU read-side critical sections (during which time, this idle @@ -2661,7 +2813,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) int cprv; int i; - cprv = NR_CPUS; + cprv = nr_cpu_ids; for (i = rcu_num_lvls - 1; i >= 0; i--) { ccur = rsp->levelcnt[i]; rsp->levelspread[i] = (cprv + ccur - 1) / ccur; @@ -2676,10 +2828,14 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) static void __init rcu_init_one(struct rcu_state *rsp, struct rcu_data __percpu *rda) { - static char *buf[] = { "rcu_node_level_0", - "rcu_node_level_1", - "rcu_node_level_2", - "rcu_node_level_3" }; /* Match MAX_RCU_LVLS */ + static char *buf[] = { "rcu_node_0", + "rcu_node_1", + "rcu_node_2", + "rcu_node_3" }; /* Match MAX_RCU_LVLS */ + static char *fqs[] = { "rcu_node_fqs_0", + "rcu_node_fqs_1", + "rcu_node_fqs_2", + "rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */ int cpustride = 1; int i; int j; @@ -2704,7 +2860,11 @@ static void __init rcu_init_one(struct rcu_state *rsp, raw_spin_lock_init(&rnp->lock); lockdep_set_class_and_name(&rnp->lock, &rcu_node_class[i], buf[i]); - rnp->gpnum = 0; + raw_spin_lock_init(&rnp->fqslock); + lockdep_set_class_and_name(&rnp->fqslock, + &rcu_fqs_class[i], fqs[i]); + rnp->gpnum = rsp->gpnum; + rnp->completed = rsp->completed; rnp->qsmask = 0; rnp->qsmaskinit = 0; rnp->grplo = j * cpustride; @@ -2727,6 +2887,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, } rsp->rda = rda; + init_waitqueue_head(&rsp->gp_wq); rnp = rsp->level[rcu_num_lvls - 1]; for_each_possible_cpu(i) { while (i > rnp->grphi) @@ -2750,7 +2911,8 @@ static void __init rcu_init_geometry(void) int rcu_capacity[MAX_RCU_LVLS + 1]; /* If the compile-time values are accurate, just leave. */ - if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF) + if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF && + nr_cpu_ids == NR_CPUS) return; /* diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 4d29169f212..5faf05d6832 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -102,6 +102,10 @@ struct rcu_dynticks { /* idle-period nonlazy_posted snapshot. */ int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ +#ifdef CONFIG_RCU_USER_QS + bool ignore_user_qs; /* Treat userspace as extended QS or not */ + bool in_user; /* Is the CPU in userland from RCU POV? */ +#endif }; /* RCU's kthread states for tracing. */ @@ -196,12 +200,7 @@ struct rcu_node { /* Refused to boost: not sure why, though. */ /* This can happen due to race conditions. */ #endif /* #ifdef CONFIG_RCU_BOOST */ - struct task_struct *node_kthread_task; - /* kthread that takes care of this rcu_node */ - /* structure, for example, awakening the */ - /* per-CPU kthreads as needed. */ - unsigned int node_kthread_status; - /* State of node_kthread_task for tracing. */ + raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp; /* @@ -245,8 +244,6 @@ struct rcu_data { /* in order to detect GP end. */ unsigned long gpnum; /* Highest gp number that this CPU */ /* is aware of having started. */ - unsigned long passed_quiesce_gpnum; - /* gpnum at time of quiescent state. */ bool passed_quiesce; /* User-mode/idle loop etc. */ bool qs_pending; /* Core waits for quiesc state. */ bool beenonline; /* CPU online at least once. */ @@ -312,11 +309,13 @@ struct rcu_data { unsigned long n_rp_cpu_needs_gp; unsigned long n_rp_gp_completed; unsigned long n_rp_gp_started; - unsigned long n_rp_need_fqs; unsigned long n_rp_need_nothing; - /* 6) _rcu_barrier() callback. */ + /* 6) _rcu_barrier() and OOM callbacks. */ struct rcu_head barrier_head; +#ifdef CONFIG_RCU_FAST_NO_HZ + struct rcu_head oom_head; +#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ int cpu; struct rcu_state *rsp; @@ -375,20 +374,17 @@ struct rcu_state { u8 fqs_state ____cacheline_internodealigned_in_smp; /* Force QS state. */ - u8 fqs_active; /* force_quiescent_state() */ - /* is running. */ - u8 fqs_need_gp; /* A CPU was prevented from */ - /* starting a new grace */ - /* period because */ - /* force_quiescent_state() */ - /* was running. */ u8 boost; /* Subject to priority boost. */ unsigned long gpnum; /* Current gp number. */ unsigned long completed; /* # of last completed gp. */ + struct task_struct *gp_kthread; /* Task for grace periods. */ + wait_queue_head_t gp_wq; /* Where GP task waits. */ + int gp_flags; /* Commands for GP task. */ /* End of fields guarded by root rcu_node's lock. */ - raw_spinlock_t onofflock; /* exclude on/offline and */ + raw_spinlock_t onofflock ____cacheline_internodealigned_in_smp; + /* exclude on/offline and */ /* starting new GP. */ struct rcu_head *orphan_nxtlist; /* Orphaned callbacks that */ /* need a grace period. */ @@ -398,16 +394,11 @@ struct rcu_state { struct rcu_head **orphan_donetail; /* Tail of above. */ long qlen_lazy; /* Number of lazy callbacks. */ long qlen; /* Total number of callbacks. */ - struct task_struct *rcu_barrier_in_progress; - /* Task doing rcu_barrier(), */ - /* or NULL if no barrier. */ struct mutex barrier_mutex; /* Guards barrier fields. */ atomic_t barrier_cpu_count; /* # CPUs waiting on. */ struct completion barrier_completion; /* Wake at barrier end. */ unsigned long n_barrier_done; /* ++ at start and end of */ /* _rcu_barrier(). */ - raw_spinlock_t fqslock; /* Only one task forcing */ - /* quiescent states. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ /* force_quiescent_state(). */ unsigned long n_force_qs; /* Number of calls to */ @@ -426,6 +417,10 @@ struct rcu_state { struct list_head flavors; /* List of RCU flavors. */ }; +/* Values for rcu_state structure's gp_flags field. */ +#define RCU_GP_FLAG_INIT 0x1 /* Need grace-period initialization. */ +#define RCU_GP_FLAG_FQS 0x2 /* Need grace-period quiescent-state forcing. */ + extern struct list_head rcu_struct_flavors; #define for_each_rcu_flavor(rsp) \ list_for_each_entry((rsp), &rcu_struct_flavors, flavors) @@ -468,7 +463,6 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags); -static void rcu_stop_cpu_kthread(int cpu); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ static void rcu_print_detail_task_stall(struct rcu_state *rsp); static int rcu_print_task_stall(struct rcu_node *rnp); @@ -491,15 +485,9 @@ static void invoke_rcu_callbacks_kthread(void); static bool rcu_is_callbacks_kthread(void); #ifdef CONFIG_RCU_BOOST static void rcu_preempt_do_callbacks(void); -static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, - cpumask_var_t cm); static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, - struct rcu_node *rnp, - int rnp_index); -static void invoke_rcu_node_kthread(struct rcu_node *rnp); -static void rcu_yield(void (*f)(unsigned long), unsigned long arg); + struct rcu_node *rnp); #endif /* #ifdef CONFIG_RCU_BOOST */ -static void rcu_cpu_kthread_setrt(int cpu, int to_rt); static void __cpuinit rcu_prepare_kthreads(int cpu); static void rcu_prepare_for_idle_init(int cpu); static void rcu_cleanup_after_idle(int cpu); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 7f3244c0df0..f9211548818 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -25,6 +25,8 @@ */ #include <linux/delay.h> +#include <linux/oom.h> +#include <linux/smpboot.h> #define RCU_KTHREAD_PRIO 1 @@ -118,7 +120,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed); */ void rcu_force_quiescent_state(void) { - force_quiescent_state(&rcu_preempt_state, 0); + force_quiescent_state(&rcu_preempt_state); } EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); @@ -136,8 +138,6 @@ static void rcu_preempt_qs(int cpu) { struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); - rdp->passed_quiesce_gpnum = rdp->gpnum; - barrier(); if (rdp->passed_quiesce == 0) trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs"); rdp->passed_quiesce = 1; @@ -422,9 +422,11 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) unsigned long flags; struct task_struct *t; - if (!rcu_preempt_blocked_readers_cgp(rnp)) - return; raw_spin_lock_irqsave(&rnp->lock, flags); + if (!rcu_preempt_blocked_readers_cgp(rnp)) { + raw_spin_unlock_irqrestore(&rnp->lock, flags); + return; + } t = list_entry(rnp->gp_tasks, struct task_struct, rcu_node_entry); list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) @@ -584,17 +586,23 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ } + rnp->gp_tasks = NULL; + rnp->exp_tasks = NULL; #ifdef CONFIG_RCU_BOOST - /* In case root is being boosted and leaf is not. */ + rnp->boost_tasks = NULL; + /* + * In case root is being boosted and leaf was not. Make sure + * that we boost the tasks blocking the current grace period + * in this case. + */ raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ if (rnp_root->boost_tasks != NULL && - rnp_root->boost_tasks != rnp_root->gp_tasks) + rnp_root->boost_tasks != rnp_root->gp_tasks && + rnp_root->boost_tasks != rnp_root->exp_tasks) rnp_root->boost_tasks = rnp_root->gp_tasks; raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ #endif /* #ifdef CONFIG_RCU_BOOST */ - rnp->gp_tasks = NULL; - rnp->exp_tasks = NULL; return retval; } @@ -676,7 +684,7 @@ void synchronize_rcu(void) EXPORT_SYMBOL_GPL(synchronize_rcu); static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); -static long sync_rcu_preempt_exp_count; +static unsigned long sync_rcu_preempt_exp_count; static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); /* @@ -791,7 +799,7 @@ void synchronize_rcu_expedited(void) unsigned long flags; struct rcu_node *rnp; struct rcu_state *rsp = &rcu_preempt_state; - long snap; + unsigned long snap; int trycount = 0; smp_mb(); /* Caller's modifications seen first by other CPUs. */ @@ -799,33 +807,47 @@ void synchronize_rcu_expedited(void) smp_mb(); /* Above access cannot bleed into critical section. */ /* + * Block CPU-hotplug operations. This means that any CPU-hotplug + * operation that finds an rcu_node structure with tasks in the + * process of being boosted will know that all tasks blocking + * this expedited grace period will already be in the process of + * being boosted. This simplifies the process of moving tasks + * from leaf to root rcu_node structures. + */ + get_online_cpus(); + + /* * Acquire lock, falling back to synchronize_rcu() if too many * lock-acquisition failures. Of course, if someone does the * expedited grace period for us, just leave. */ while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { + if (ULONG_CMP_LT(snap, + ACCESS_ONCE(sync_rcu_preempt_exp_count))) { + put_online_cpus(); + goto mb_ret; /* Others did our work for us. */ + } if (trycount++ < 10) { udelay(trycount * num_online_cpus()); } else { + put_online_cpus(); synchronize_rcu(); return; } - if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) - goto mb_ret; /* Others did our work for us. */ } - if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) + if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) { + put_online_cpus(); goto unlock_mb_ret; /* Others did our work for us. */ + } /* force all RCU readers onto ->blkd_tasks lists. */ synchronize_sched_expedited(); - raw_spin_lock_irqsave(&rsp->onofflock, flags); - /* Initialize ->expmask for all non-leaf rcu_node structures. */ rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { - raw_spin_lock(&rnp->lock); /* irqs already disabled. */ + raw_spin_lock_irqsave(&rnp->lock, flags); rnp->expmask = rnp->qsmaskinit; - raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock_irqrestore(&rnp->lock, flags); } /* Snapshot current state of ->blkd_tasks lists. */ @@ -834,7 +856,7 @@ void synchronize_rcu_expedited(void) if (NUM_RCU_NODES > 1) sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); - raw_spin_unlock_irqrestore(&rsp->onofflock, flags); + put_online_cpus(); /* Wait for snapshotted ->blkd_tasks lists to drain. */ rnp = rcu_get_root(rsp); @@ -1069,6 +1091,16 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp) #endif /* #else #ifdef CONFIG_RCU_TRACE */ +static void rcu_wake_cond(struct task_struct *t, int status) +{ + /* + * If the thread is yielding, only wake it when this + * is invoked from idle + */ + if (status != RCU_KTHREAD_YIELDING || is_idle_task(current)) + wake_up_process(t); +} + /* * Carry out RCU priority boosting on the task indicated by ->exp_tasks * or ->boost_tasks, advancing the pointer to the next task in the @@ -1141,17 +1173,6 @@ static int rcu_boost(struct rcu_node *rnp) } /* - * Timer handler to initiate waking up of boost kthreads that - * have yielded the CPU due to excessive numbers of tasks to - * boost. We wake up the per-rcu_node kthread, which in turn - * will wake up the booster kthread. - */ -static void rcu_boost_kthread_timer(unsigned long arg) -{ - invoke_rcu_node_kthread((struct rcu_node *)arg); -} - -/* * Priority-boosting kthread. One per leaf rcu_node and one for the * root rcu_node. */ @@ -1174,8 +1195,9 @@ static int rcu_boost_kthread(void *arg) else spincnt = 0; if (spincnt > 10) { + rnp->boost_kthread_status = RCU_KTHREAD_YIELDING; trace_rcu_utilization("End boost kthread@rcu_yield"); - rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); + schedule_timeout_interruptible(2); trace_rcu_utilization("Start boost kthread@rcu_yield"); spincnt = 0; } @@ -1191,9 +1213,9 @@ static int rcu_boost_kthread(void *arg) * kthread to start boosting them. If there is an expedited grace * period in progress, it is always time to boost. * - * The caller must hold rnp->lock, which this function releases, - * but irqs remain disabled. The ->boost_kthread_task is immortal, - * so we don't need to worry about it going away. + * The caller must hold rnp->lock, which this function releases. + * The ->boost_kthread_task is immortal, so we don't need to worry + * about it going away. */ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) { @@ -1213,8 +1235,8 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) rnp->boost_tasks = rnp->gp_tasks; raw_spin_unlock_irqrestore(&rnp->lock, flags); t = rnp->boost_kthread_task; - if (t != NULL) - wake_up_process(t); + if (t) + rcu_wake_cond(t, rnp->boost_kthread_status); } else { rcu_initiate_boost_trace(rnp); raw_spin_unlock_irqrestore(&rnp->lock, flags); @@ -1231,8 +1253,10 @@ static void invoke_rcu_callbacks_kthread(void) local_irq_save(flags); __this_cpu_write(rcu_cpu_has_work, 1); if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && - current != __this_cpu_read(rcu_cpu_kthread_task)) - wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); + current != __this_cpu_read(rcu_cpu_kthread_task)) { + rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task), + __this_cpu_read(rcu_cpu_kthread_status)); + } local_irq_restore(flags); } @@ -1245,21 +1269,6 @@ static bool rcu_is_callbacks_kthread(void) return __get_cpu_var(rcu_cpu_kthread_task) == current; } -/* - * Set the affinity of the boost kthread. The CPU-hotplug locks are - * held, so no one should be messing with the existence of the boost - * kthread. - */ -static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, - cpumask_var_t cm) -{ - struct task_struct *t; - - t = rnp->boost_kthread_task; - if (t != NULL) - set_cpus_allowed_ptr(rnp->boost_kthread_task, cm); -} - #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000) /* @@ -1276,15 +1285,19 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) * Returns zero if all is well, a negated errno otherwise. */ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, - struct rcu_node *rnp, - int rnp_index) + struct rcu_node *rnp) { + int rnp_index = rnp - &rsp->node[0]; unsigned long flags; struct sched_param sp; struct task_struct *t; if (&rcu_preempt_state != rsp) return 0; + + if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0) + return 0; + rsp->boost = 1; if (rnp->boost_kthread_task != NULL) return 0; @@ -1301,25 +1314,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, return 0; } -#ifdef CONFIG_HOTPLUG_CPU - -/* - * Stop the RCU's per-CPU kthread when its CPU goes offline,. - */ -static void rcu_stop_cpu_kthread(int cpu) -{ - struct task_struct *t; - - /* Stop the CPU's kthread. */ - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (t != NULL) { - per_cpu(rcu_cpu_kthread_task, cpu) = NULL; - kthread_stop(t); - } -} - -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ - static void rcu_kthread_do_work(void) { rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); @@ -1327,112 +1321,22 @@ static void rcu_kthread_do_work(void) rcu_preempt_do_callbacks(); } -/* - * Wake up the specified per-rcu_node-structure kthread. - * Because the per-rcu_node kthreads are immortal, we don't need - * to do anything to keep them alive. - */ -static void invoke_rcu_node_kthread(struct rcu_node *rnp) -{ - struct task_struct *t; - - t = rnp->node_kthread_task; - if (t != NULL) - wake_up_process(t); -} - -/* - * Set the specified CPU's kthread to run RT or not, as specified by - * the to_rt argument. The CPU-hotplug locks are held, so the task - * is not going away. - */ -static void rcu_cpu_kthread_setrt(int cpu, int to_rt) +static void rcu_cpu_kthread_setup(unsigned int cpu) { - int policy; struct sched_param sp; - struct task_struct *t; - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (t == NULL) - return; - if (to_rt) { - policy = SCHED_FIFO; - sp.sched_priority = RCU_KTHREAD_PRIO; - } else { - policy = SCHED_NORMAL; - sp.sched_priority = 0; - } - sched_setscheduler_nocheck(t, policy, &sp); + sp.sched_priority = RCU_KTHREAD_PRIO; + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); } -/* - * Timer handler to initiate the waking up of per-CPU kthreads that - * have yielded the CPU due to excess numbers of RCU callbacks. - * We wake up the per-rcu_node kthread, which in turn will wake up - * the booster kthread. - */ -static void rcu_cpu_kthread_timer(unsigned long arg) +static void rcu_cpu_kthread_park(unsigned int cpu) { - struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); - struct rcu_node *rnp = rdp->mynode; - - atomic_or(rdp->grpmask, &rnp->wakemask); - invoke_rcu_node_kthread(rnp); + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; } -/* - * Drop to non-real-time priority and yield, but only after posting a - * timer that will cause us to regain our real-time priority if we - * remain preempted. Either way, we restore our real-time priority - * before returning. - */ -static void rcu_yield(void (*f)(unsigned long), unsigned long arg) +static int rcu_cpu_kthread_should_run(unsigned int cpu) { - struct sched_param sp; - struct timer_list yield_timer; - int prio = current->rt_priority; - - setup_timer_on_stack(&yield_timer, f, arg); - mod_timer(&yield_timer, jiffies + 2); - sp.sched_priority = 0; - sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); - set_user_nice(current, 19); - schedule(); - set_user_nice(current, 0); - sp.sched_priority = prio; - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); - del_timer(&yield_timer); -} - -/* - * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU. - * This can happen while the corresponding CPU is either coming online - * or going offline. We cannot wait until the CPU is fully online - * before starting the kthread, because the various notifier functions - * can wait for RCU grace periods. So we park rcu_cpu_kthread() until - * the corresponding CPU is online. - * - * Return 1 if the kthread needs to stop, 0 otherwise. - * - * Caller must disable bh. This function can momentarily enable it. - */ -static int rcu_cpu_kthread_should_stop(int cpu) -{ - while (cpu_is_offline(cpu) || - !cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu)) || - smp_processor_id() != cpu) { - if (kthread_should_stop()) - return 1; - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; - per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id(); - local_bh_enable(); - schedule_timeout_uninterruptible(1); - if (!cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu))) - set_cpus_allowed_ptr(current, cpumask_of(cpu)); - local_bh_disable(); - } - per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; - return 0; + return __get_cpu_var(rcu_cpu_has_work); } /* @@ -1440,138 +1344,35 @@ static int rcu_cpu_kthread_should_stop(int cpu) * RCU softirq used in flavors and configurations of RCU that do not * support RCU priority boosting. */ -static int rcu_cpu_kthread(void *arg) +static void rcu_cpu_kthread(unsigned int cpu) { - int cpu = (int)(long)arg; - unsigned long flags; - int spincnt = 0; - unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); - char work; - char *workp = &per_cpu(rcu_cpu_has_work, cpu); + unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status); + char work, *workp = &__get_cpu_var(rcu_cpu_has_work); + int spincnt; - trace_rcu_utilization("Start CPU kthread@init"); - for (;;) { - *statusp = RCU_KTHREAD_WAITING; - trace_rcu_utilization("End CPU kthread@rcu_wait"); - rcu_wait(*workp != 0 || kthread_should_stop()); + for (spincnt = 0; spincnt < 10; spincnt++) { trace_rcu_utilization("Start CPU kthread@rcu_wait"); local_bh_disable(); - if (rcu_cpu_kthread_should_stop(cpu)) { - local_bh_enable(); - break; - } *statusp = RCU_KTHREAD_RUNNING; - per_cpu(rcu_cpu_kthread_loops, cpu)++; - local_irq_save(flags); + this_cpu_inc(rcu_cpu_kthread_loops); + local_irq_disable(); work = *workp; *workp = 0; - local_irq_restore(flags); + local_irq_enable(); if (work) rcu_kthread_do_work(); local_bh_enable(); - if (*workp != 0) - spincnt++; - else - spincnt = 0; - if (spincnt > 10) { - *statusp = RCU_KTHREAD_YIELDING; - trace_rcu_utilization("End CPU kthread@rcu_yield"); - rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); - trace_rcu_utilization("Start CPU kthread@rcu_yield"); - spincnt = 0; - } - } - *statusp = RCU_KTHREAD_STOPPED; - trace_rcu_utilization("End CPU kthread@term"); - return 0; -} - -/* - * Spawn a per-CPU kthread, setting up affinity and priority. - * Because the CPU hotplug lock is held, no other CPU will be attempting - * to manipulate rcu_cpu_kthread_task. There might be another CPU - * attempting to access it during boot, but the locking in kthread_bind() - * will enforce sufficient ordering. - * - * Please note that we cannot simply refuse to wake up the per-CPU - * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state, - * which can result in softlockup complaints if the task ends up being - * idle for more than a couple of minutes. - * - * However, please note also that we cannot bind the per-CPU kthread to its - * CPU until that CPU is fully online. We also cannot wait until the - * CPU is fully online before we create its per-CPU kthread, as this would - * deadlock the system when CPU notifiers tried waiting for grace - * periods. So we bind the per-CPU kthread to its CPU only if the CPU - * is online. If its CPU is not yet fully online, then the code in - * rcu_cpu_kthread() will wait until it is fully online, and then do - * the binding. - */ -static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) -{ - struct sched_param sp; - struct task_struct *t; - - if (!rcu_scheduler_fully_active || - per_cpu(rcu_cpu_kthread_task, cpu) != NULL) - return 0; - t = kthread_create_on_node(rcu_cpu_kthread, - (void *)(long)cpu, - cpu_to_node(cpu), - "rcuc/%d", cpu); - if (IS_ERR(t)) - return PTR_ERR(t); - if (cpu_online(cpu)) - kthread_bind(t, cpu); - per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; - WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); - sp.sched_priority = RCU_KTHREAD_PRIO; - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); - per_cpu(rcu_cpu_kthread_task, cpu) = t; - wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */ - return 0; -} - -/* - * Per-rcu_node kthread, which is in charge of waking up the per-CPU - * kthreads when needed. We ignore requests to wake up kthreads - * for offline CPUs, which is OK because force_quiescent_state() - * takes care of this case. - */ -static int rcu_node_kthread(void *arg) -{ - int cpu; - unsigned long flags; - unsigned long mask; - struct rcu_node *rnp = (struct rcu_node *)arg; - struct sched_param sp; - struct task_struct *t; - - for (;;) { - rnp->node_kthread_status = RCU_KTHREAD_WAITING; - rcu_wait(atomic_read(&rnp->wakemask) != 0); - rnp->node_kthread_status = RCU_KTHREAD_RUNNING; - raw_spin_lock_irqsave(&rnp->lock, flags); - mask = atomic_xchg(&rnp->wakemask, 0); - rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ - for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { - if ((mask & 0x1) == 0) - continue; - preempt_disable(); - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (!cpu_online(cpu) || t == NULL) { - preempt_enable(); - continue; - } - per_cpu(rcu_cpu_has_work, cpu) = 1; - sp.sched_priority = RCU_KTHREAD_PRIO; - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); - preempt_enable(); + if (*workp == 0) { + trace_rcu_utilization("End CPU kthread@rcu_wait"); + *statusp = RCU_KTHREAD_WAITING; + return; } } - /* NOTREACHED */ - rnp->node_kthread_status = RCU_KTHREAD_STOPPED; - return 0; + *statusp = RCU_KTHREAD_YIELDING; + trace_rcu_utilization("Start CPU kthread@rcu_yield"); + schedule_timeout_interruptible(2); + trace_rcu_utilization("End CPU kthread@rcu_yield"); + *statusp = RCU_KTHREAD_WAITING; } /* @@ -1583,17 +1384,17 @@ static int rcu_node_kthread(void *arg) * no outgoing CPU. If there are no CPUs left in the affinity set, * this function allows the kthread to execute on any CPU. */ -static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) +static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) { + struct task_struct *t = rnp->boost_kthread_task; + unsigned long mask = rnp->qsmaskinit; cpumask_var_t cm; int cpu; - unsigned long mask = rnp->qsmaskinit; - if (rnp->node_kthread_task == NULL) + if (!t) return; - if (!alloc_cpumask_var(&cm, GFP_KERNEL)) + if (!zalloc_cpumask_var(&cm, GFP_KERNEL)) return; - cpumask_clear(cm); for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) if ((mask & 0x1) && cpu != outgoingcpu) cpumask_set_cpu(cpu, cm); @@ -1603,62 +1404,36 @@ static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) cpumask_clear_cpu(cpu, cm); WARN_ON_ONCE(cpumask_weight(cm) == 0); } - set_cpus_allowed_ptr(rnp->node_kthread_task, cm); - rcu_boost_kthread_setaffinity(rnp, cm); + set_cpus_allowed_ptr(t, cm); free_cpumask_var(cm); } -/* - * Spawn a per-rcu_node kthread, setting priority and affinity. - * Called during boot before online/offline can happen, or, if - * during runtime, with the main CPU-hotplug locks held. So only - * one of these can be executing at a time. - */ -static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, - struct rcu_node *rnp) -{ - unsigned long flags; - int rnp_index = rnp - &rsp->node[0]; - struct sched_param sp; - struct task_struct *t; - - if (!rcu_scheduler_fully_active || - rnp->qsmaskinit == 0) - return 0; - if (rnp->node_kthread_task == NULL) { - t = kthread_create(rcu_node_kthread, (void *)rnp, - "rcun/%d", rnp_index); - if (IS_ERR(t)) - return PTR_ERR(t); - raw_spin_lock_irqsave(&rnp->lock, flags); - rnp->node_kthread_task = t; - raw_spin_unlock_irqrestore(&rnp->lock, flags); - sp.sched_priority = 99; - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); - wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ - } - return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); -} +static struct smp_hotplug_thread rcu_cpu_thread_spec = { + .store = &rcu_cpu_kthread_task, + .thread_should_run = rcu_cpu_kthread_should_run, + .thread_fn = rcu_cpu_kthread, + .thread_comm = "rcuc/%u", + .setup = rcu_cpu_kthread_setup, + .park = rcu_cpu_kthread_park, +}; /* * Spawn all kthreads -- called as soon as the scheduler is running. */ static int __init rcu_spawn_kthreads(void) { - int cpu; struct rcu_node *rnp; + int cpu; rcu_scheduler_fully_active = 1; - for_each_possible_cpu(cpu) { + for_each_possible_cpu(cpu) per_cpu(rcu_cpu_has_work, cpu) = 0; - if (cpu_online(cpu)) - (void)rcu_spawn_one_cpu_kthread(cpu); - } + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); rnp = rcu_get_root(rcu_state); - (void)rcu_spawn_one_node_kthread(rcu_state, rnp); + (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); if (NUM_RCU_NODES > 1) { rcu_for_each_leaf_node(rcu_state, rnp) - (void)rcu_spawn_one_node_kthread(rcu_state, rnp); + (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); } return 0; } @@ -1670,11 +1445,8 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) struct rcu_node *rnp = rdp->mynode; /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ - if (rcu_scheduler_fully_active) { - (void)rcu_spawn_one_cpu_kthread(cpu); - if (rnp->node_kthread_task == NULL) - (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - } + if (rcu_scheduler_fully_active) + (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); } #else /* #ifdef CONFIG_RCU_BOOST */ @@ -1698,19 +1470,7 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) { } -#ifdef CONFIG_HOTPLUG_CPU - -static void rcu_stop_cpu_kthread(int cpu) -{ -} - -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ - -static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) -{ -} - -static void rcu_cpu_kthread_setrt(int cpu, int to_rt) +static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) { } @@ -1997,6 +1757,26 @@ static void rcu_prepare_for_idle(int cpu) if (!tne) return; + /* Adaptive-tick mode, where usermode execution is idle to RCU. */ + if (!is_idle_task(current)) { + rdtp->dyntick_holdoff = jiffies - 1; + if (rcu_cpu_has_nonlazy_callbacks(cpu)) { + trace_rcu_prep_idle("User dyntick with callbacks"); + rdtp->idle_gp_timer_expires = + round_up(jiffies + RCU_IDLE_GP_DELAY, + RCU_IDLE_GP_DELAY); + } else if (rcu_cpu_has_callbacks(cpu)) { + rdtp->idle_gp_timer_expires = + round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY); + trace_rcu_prep_idle("User dyntick with lazy callbacks"); + } else { + return; + } + tp = &rdtp->idle_gp_timer; + mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); + return; + } + /* * If this is an idle re-entry, for example, due to use of * RCU_NONIDLE() or the new idle-loop tracing API within the idle @@ -2075,16 +1855,16 @@ static void rcu_prepare_for_idle(int cpu) #ifdef CONFIG_TREE_PREEMPT_RCU if (per_cpu(rcu_preempt_data, cpu).nxtlist) { rcu_preempt_qs(cpu); - force_quiescent_state(&rcu_preempt_state, 0); + force_quiescent_state(&rcu_preempt_state); } #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ if (per_cpu(rcu_sched_data, cpu).nxtlist) { rcu_sched_qs(cpu); - force_quiescent_state(&rcu_sched_state, 0); + force_quiescent_state(&rcu_sched_state); } if (per_cpu(rcu_bh_data, cpu).nxtlist) { rcu_bh_qs(cpu); - force_quiescent_state(&rcu_bh_state, 0); + force_quiescent_state(&rcu_bh_state); } /* @@ -2112,6 +1892,88 @@ static void rcu_idle_count_callbacks_posted(void) __this_cpu_add(rcu_dynticks.nonlazy_posted, 1); } +/* + * Data for flushing lazy RCU callbacks at OOM time. + */ +static atomic_t oom_callback_count; +static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq); + +/* + * RCU OOM callback -- decrement the outstanding count and deliver the + * wake-up if we are the last one. + */ +static void rcu_oom_callback(struct rcu_head *rhp) +{ + if (atomic_dec_and_test(&oom_callback_count)) + wake_up(&oom_callback_wq); +} + +/* + * Post an rcu_oom_notify callback on the current CPU if it has at + * least one lazy callback. This will unnecessarily post callbacks + * to CPUs that already have a non-lazy callback at the end of their + * callback list, but this is an infrequent operation, so accept some + * extra overhead to keep things simple. + */ +static void rcu_oom_notify_cpu(void *unused) +{ + struct rcu_state *rsp; + struct rcu_data *rdp; + + for_each_rcu_flavor(rsp) { + rdp = __this_cpu_ptr(rsp->rda); + if (rdp->qlen_lazy != 0) { + atomic_inc(&oom_callback_count); + rsp->call(&rdp->oom_head, rcu_oom_callback); + } + } +} + +/* + * If low on memory, ensure that each CPU has a non-lazy callback. + * This will wake up CPUs that have only lazy callbacks, in turn + * ensuring that they free up the corresponding memory in a timely manner. + * Because an uncertain amount of memory will be freed in some uncertain + * timeframe, we do not claim to have freed anything. + */ +static int rcu_oom_notify(struct notifier_block *self, + unsigned long notused, void *nfreed) +{ + int cpu; + + /* Wait for callbacks from earlier instance to complete. */ + wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0); + + /* + * Prevent premature wakeup: ensure that all increments happen + * before there is a chance of the counter reaching zero. + */ + atomic_set(&oom_callback_count, 1); + + get_online_cpus(); + for_each_online_cpu(cpu) { + smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1); + cond_resched(); + } + put_online_cpus(); + + /* Unconditionally decrement: no need to wake ourselves up. */ + atomic_dec(&oom_callback_count); + + return NOTIFY_OK; +} + +static struct notifier_block rcu_oom_nb = { + .notifier_call = rcu_oom_notify +}; + +static int __init rcu_register_oom_notifier(void) +{ + register_oom_notifier(&rcu_oom_nb); + return 0; +} +early_initcall(rcu_register_oom_notifier); + #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ #ifdef CONFIG_RCU_CPU_STALL_INFO @@ -2122,11 +1984,15 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu) { struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); struct timer_list *tltp = &rdtp->idle_gp_timer; + char c; - sprintf(cp, "drain=%d %c timer=%lu", - rdtp->dyntick_drain, - rdtp->dyntick_holdoff == jiffies ? 'H' : '.', - timer_pending(tltp) ? tltp->expires - jiffies : -1); + c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.'; + if (timer_pending(tltp)) + sprintf(cp, "drain=%d %c timer=%lu", + rdtp->dyntick_drain, c, tltp->expires - jiffies); + else + sprintf(cp, "drain=%d %c timer not pending", + rdtp->dyntick_drain, c); } #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ @@ -2194,11 +2060,10 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp) /* Increment ->ticks_this_gp for all flavors of RCU. */ static void increment_cpu_stall_ticks(void) { - __get_cpu_var(rcu_sched_data).ticks_this_gp++; - __get_cpu_var(rcu_bh_data).ticks_this_gp++; -#ifdef CONFIG_TREE_PREEMPT_RCU - __get_cpu_var(rcu_preempt_data).ticks_this_gp++; -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + __this_cpu_ptr(rsp->rda)->ticks_this_gp++; } #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index abffb486e94..693513bc50e 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -51,8 +51,8 @@ static int show_rcubarrier(struct seq_file *m, void *unused) struct rcu_state *rsp; for_each_rcu_flavor(rsp) - seq_printf(m, "%s: %c bcc: %d nbd: %lu\n", - rsp->name, rsp->rcu_barrier_in_progress ? 'B' : '.', + seq_printf(m, "%s: bcc: %d nbd: %lu\n", + rsp->name, atomic_read(&rsp->barrier_cpu_count), rsp->n_barrier_done); return 0; @@ -86,12 +86,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) { if (!rdp->beenonline) return; - seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pgp=%lu qp=%d", + seq_printf(m, "%3d%cc=%lu g=%lu pq=%d qp=%d", rdp->cpu, cpu_is_offline(rdp->cpu) ? '!' : ' ', rdp->completed, rdp->gpnum, - rdp->passed_quiesce, rdp->passed_quiesce_gpnum, - rdp->qs_pending); + rdp->passed_quiesce, rdp->qs_pending); seq_printf(m, " dt=%d/%llx/%d df=%lu", atomic_read(&rdp->dynticks->dynticks), rdp->dynticks->dynticks_nesting, @@ -108,11 +107,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) rdp->nxttail[RCU_WAIT_TAIL]], ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]); #ifdef CONFIG_RCU_BOOST - seq_printf(m, " kt=%d/%c/%d ktl=%x", + seq_printf(m, " kt=%d/%c ktl=%x", per_cpu(rcu_cpu_has_work, rdp->cpu), convert_kthread_status(per_cpu(rcu_cpu_kthread_status, rdp->cpu)), - per_cpu(rcu_cpu_kthread_cpu, rdp->cpu), per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff); #endif /* #ifdef CONFIG_RCU_BOOST */ seq_printf(m, " b=%ld", rdp->blimit); @@ -150,12 +148,11 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) { if (!rdp->beenonline) return; - seq_printf(m, "%d,%s,%lu,%lu,%d,%lu,%d", + seq_printf(m, "%d,%s,%lu,%lu,%d,%d", rdp->cpu, cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"", rdp->completed, rdp->gpnum, - rdp->passed_quiesce, rdp->passed_quiesce_gpnum, - rdp->qs_pending); + rdp->passed_quiesce, rdp->qs_pending); seq_printf(m, ",%d,%llx,%d,%lu", atomic_read(&rdp->dynticks->dynticks), rdp->dynticks->dynticks_nesting, @@ -186,7 +183,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) int cpu; struct rcu_state *rsp; - seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); + seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pq\","); seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); seq_puts(m, "\"of\",\"qll\",\"ql\",\"qs\""); #ifdef CONFIG_RCU_BOOST @@ -386,10 +383,9 @@ static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) rdp->n_rp_report_qs, rdp->n_rp_cb_ready, rdp->n_rp_cpu_needs_gp); - seq_printf(m, "gpc=%ld gps=%ld nf=%ld nn=%ld\n", + seq_printf(m, "gpc=%ld gps=%ld nn=%ld\n", rdp->n_rp_gp_completed, rdp->n_rp_gp_started, - rdp->n_rp_need_fqs, rdp->n_rp_need_nothing); } diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 173ea52f3af..f06d249e103 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -11,7 +11,7 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer endif -obj-y += core.o clock.o idle_task.o fair.o rt.o stop_task.o +obj-y += core.o clock.o cputime.o idle_task.o fair.o rt.o stop_task.o obj-$(CONFIG_SMP) += cpupri.o obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o obj-$(CONFIG_SCHEDSTATS) += stats.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 649c9f876cb..c1774723643 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -740,126 +740,6 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags) dequeue_task(rq, p, flags); } -#ifdef CONFIG_IRQ_TIME_ACCOUNTING - -/* - * There are no locks covering percpu hardirq/softirq time. - * They are only modified in account_system_vtime, on corresponding CPU - * with interrupts disabled. So, writes are safe. - * They are read and saved off onto struct rq in update_rq_clock(). - * This may result in other CPU reading this CPU's irq time and can - * race with irq/account_system_vtime on this CPU. We would either get old - * or new value with a side effect of accounting a slice of irq time to wrong - * task when irq is in progress while we read rq->clock. That is a worthy - * compromise in place of having locks on each irq in account_system_time. - */ -static DEFINE_PER_CPU(u64, cpu_hardirq_time); -static DEFINE_PER_CPU(u64, cpu_softirq_time); - -static DEFINE_PER_CPU(u64, irq_start_time); -static int sched_clock_irqtime; - -void enable_sched_clock_irqtime(void) -{ - sched_clock_irqtime = 1; -} - -void disable_sched_clock_irqtime(void) -{ - sched_clock_irqtime = 0; -} - -#ifndef CONFIG_64BIT -static DEFINE_PER_CPU(seqcount_t, irq_time_seq); - -static inline void irq_time_write_begin(void) -{ - __this_cpu_inc(irq_time_seq.sequence); - smp_wmb(); -} - -static inline void irq_time_write_end(void) -{ - smp_wmb(); - __this_cpu_inc(irq_time_seq.sequence); -} - -static inline u64 irq_time_read(int cpu) -{ - u64 irq_time; - unsigned seq; - - do { - seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu)); - irq_time = per_cpu(cpu_softirq_time, cpu) + - per_cpu(cpu_hardirq_time, cpu); - } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq)); - - return irq_time; -} -#else /* CONFIG_64BIT */ -static inline void irq_time_write_begin(void) -{ -} - -static inline void irq_time_write_end(void) -{ -} - -static inline u64 irq_time_read(int cpu) -{ - return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); -} -#endif /* CONFIG_64BIT */ - -/* - * Called before incrementing preempt_count on {soft,}irq_enter - * and before decrementing preempt_count on {soft,}irq_exit. - */ -void account_system_vtime(struct task_struct *curr) -{ - unsigned long flags; - s64 delta; - int cpu; - - if (!sched_clock_irqtime) - return; - - local_irq_save(flags); - - cpu = smp_processor_id(); - delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); - __this_cpu_add(irq_start_time, delta); - - irq_time_write_begin(); - /* - * We do not account for softirq time from ksoftirqd here. - * We want to continue accounting softirq time to ksoftirqd thread - * in that case, so as not to confuse scheduler with a special task - * that do not consume any time, but still wants to run. - */ - if (hardirq_count()) - __this_cpu_add(cpu_hardirq_time, delta); - else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) - __this_cpu_add(cpu_softirq_time, delta); - - irq_time_write_end(); - local_irq_restore(flags); -} -EXPORT_SYMBOL_GPL(account_system_vtime); - -#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ - -#ifdef CONFIG_PARAVIRT -static inline u64 steal_ticks(u64 steal) -{ - if (unlikely(steal > NSEC_PER_SEC)) - return div_u64(steal, TICK_NSEC); - - return __iter_div_u64_rem(steal, TICK_NSEC, &steal); -} -#endif - static void update_rq_clock_task(struct rq *rq, s64 delta) { /* @@ -920,43 +800,6 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) #endif } -#ifdef CONFIG_IRQ_TIME_ACCOUNTING -static int irqtime_account_hi_update(void) -{ - u64 *cpustat = kcpustat_this_cpu->cpustat; - unsigned long flags; - u64 latest_ns; - int ret = 0; - - local_irq_save(flags); - latest_ns = this_cpu_read(cpu_hardirq_time); - if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ]) - ret = 1; - local_irq_restore(flags); - return ret; -} - -static int irqtime_account_si_update(void) -{ - u64 *cpustat = kcpustat_this_cpu->cpustat; - unsigned long flags; - u64 latest_ns; - int ret = 0; - - local_irq_save(flags); - latest_ns = this_cpu_read(cpu_softirq_time); - if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ]) - ret = 1; - local_irq_restore(flags); - return ret; -} - -#else /* CONFIG_IRQ_TIME_ACCOUNTING */ - -#define sched_clock_irqtime (0) - -#endif - void sched_set_stop_task(int cpu, struct task_struct *stop) { struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; @@ -1518,25 +1361,6 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu) smp_send_reschedule(cpu); } -#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW -static int ttwu_activate_remote(struct task_struct *p, int wake_flags) -{ - struct rq *rq; - int ret = 0; - - rq = __task_rq_lock(p); - if (p->on_cpu) { - ttwu_activate(rq, p, ENQUEUE_WAKEUP); - ttwu_do_wakeup(rq, p, wake_flags); - ret = 1; - } - __task_rq_unlock(rq); - - return ret; - -} -#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ - bool cpus_share_cache(int this_cpu, int that_cpu) { return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); @@ -1597,21 +1421,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) * If the owning (remote) cpu is still in the middle of schedule() with * this task as prev, wait until its done referencing the task. */ - while (p->on_cpu) { -#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW - /* - * In case the architecture enables interrupts in - * context_switch(), we cannot busy wait, since that - * would lead to deadlocks when an interrupt hits and - * tries to wake up @prev. So bail and do a complete - * remote wakeup. - */ - if (ttwu_activate_remote(p, wake_flags)) - goto stat; -#else + while (p->on_cpu) cpu_relax(); -#endif - } /* * Pairs with the smp_wmb() in finish_lock_switch(). */ @@ -1953,14 +1764,9 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) * Manfred Spraul <manfred@colorfullife.com> */ prev_state = prev->state; + vtime_task_switch(prev); finish_arch_switch(prev); -#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW - local_irq_disable(); -#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ perf_event_task_sched_in(prev, current); -#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW - local_irq_enable(); -#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ finish_lock_switch(rq, prev); finish_arch_post_lock_switch(); @@ -2081,6 +1887,7 @@ context_switch(struct rq *rq, struct task_struct *prev, #endif /* Here we just switch the register state and the stack. */ + rcu_switch(prev, next); switch_to(prev, next, prev); barrier(); @@ -2809,404 +2616,6 @@ unsigned long long task_sched_runtime(struct task_struct *p) return ns; } -#ifdef CONFIG_CGROUP_CPUACCT -struct cgroup_subsys cpuacct_subsys; -struct cpuacct root_cpuacct; -#endif - -static inline void task_group_account_field(struct task_struct *p, int index, - u64 tmp) -{ -#ifdef CONFIG_CGROUP_CPUACCT - struct kernel_cpustat *kcpustat; - struct cpuacct *ca; -#endif - /* - * Since all updates are sure to touch the root cgroup, we - * get ourselves ahead and touch it first. If the root cgroup - * is the only cgroup, then nothing else should be necessary. - * - */ - __get_cpu_var(kernel_cpustat).cpustat[index] += tmp; - -#ifdef CONFIG_CGROUP_CPUACCT - if (unlikely(!cpuacct_subsys.active)) - return; - - rcu_read_lock(); - ca = task_ca(p); - while (ca && (ca != &root_cpuacct)) { - kcpustat = this_cpu_ptr(ca->cpustat); - kcpustat->cpustat[index] += tmp; - ca = parent_ca(ca); - } - rcu_read_unlock(); -#endif -} - - -/* - * Account user cpu time to a process. - * @p: the process that the cpu time gets accounted to - * @cputime: the cpu time spent in user space since the last update - * @cputime_scaled: cputime scaled by cpu frequency - */ -void account_user_time(struct task_struct *p, cputime_t cputime, - cputime_t cputime_scaled) -{ - int index; - - /* Add user time to process. */ - p->utime += cputime; - p->utimescaled += cputime_scaled; - account_group_user_time(p, cputime); - - index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; - - /* Add user time to cpustat. */ - task_group_account_field(p, index, (__force u64) cputime); - - /* Account for user time used */ - acct_update_integrals(p); -} - -/* - * Account guest cpu time to a process. - * @p: the process that the cpu time gets accounted to - * @cputime: the cpu time spent in virtual machine since the last update - * @cputime_scaled: cputime scaled by cpu frequency - */ -static void account_guest_time(struct task_struct *p, cputime_t cputime, - cputime_t cputime_scaled) -{ - u64 *cpustat = kcpustat_this_cpu->cpustat; - - /* Add guest time to process. */ - p->utime += cputime; - p->utimescaled += cputime_scaled; - account_group_user_time(p, cputime); - p->gtime += cputime; - - /* Add guest time to cpustat. */ - if (TASK_NICE(p) > 0) { - cpustat[CPUTIME_NICE] += (__force u64) cputime; - cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime; - } else { - cpustat[CPUTIME_USER] += (__force u64) cputime; - cpustat[CPUTIME_GUEST] += (__force u64) cputime; - } -} - -/* - * Account system cpu time to a process and desired cpustat field - * @p: the process that the cpu time gets accounted to - * @cputime: the cpu time spent in kernel space since the last update - * @cputime_scaled: cputime scaled by cpu frequency - * @target_cputime64: pointer to cpustat field that has to be updated - */ -static inline -void __account_system_time(struct task_struct *p, cputime_t cputime, - cputime_t cputime_scaled, int index) -{ - /* Add system time to process. */ - p->stime += cputime; - p->stimescaled += cputime_scaled; - account_group_system_time(p, cputime); - - /* Add system time to cpustat. */ - task_group_account_field(p, index, (__force u64) cputime); - - /* Account for system time used */ - acct_update_integrals(p); -} - -/* - * Account system cpu time to a process. - * @p: the process that the cpu time gets accounted to - * @hardirq_offset: the offset to subtract from hardirq_count() - * @cputime: the cpu time spent in kernel space since the last update - * @cputime_scaled: cputime scaled by cpu frequency - */ -void account_system_time(struct task_struct *p, int hardirq_offset, - cputime_t cputime, cputime_t cputime_scaled) -{ - int index; - - if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { - account_guest_time(p, cputime, cputime_scaled); - return; - } - - if (hardirq_count() - hardirq_offset) - index = CPUTIME_IRQ; - else if (in_serving_softirq()) - index = CPUTIME_SOFTIRQ; - else - index = CPUTIME_SYSTEM; - - __account_system_time(p, cputime, cputime_scaled, index); -} - -/* - * Account for involuntary wait time. - * @cputime: the cpu time spent in involuntary wait - */ -void account_steal_time(cputime_t cputime) -{ - u64 *cpustat = kcpustat_this_cpu->cpustat; - - cpustat[CPUTIME_STEAL] += (__force u64) cputime; -} - -/* - * Account for idle time. - * @cputime: the cpu time spent in idle wait - */ -void account_idle_time(cputime_t cputime) -{ - u64 *cpustat = kcpustat_this_cpu->cpustat; - struct rq *rq = this_rq(); - - if (atomic_read(&rq->nr_iowait) > 0) - cpustat[CPUTIME_IOWAIT] += (__force u64) cputime; - else - cpustat[CPUTIME_IDLE] += (__force u64) cputime; -} - -static __always_inline bool steal_account_process_tick(void) -{ -#ifdef CONFIG_PARAVIRT - if (static_key_false(¶virt_steal_enabled)) { - u64 steal, st = 0; - - steal = paravirt_steal_clock(smp_processor_id()); - steal -= this_rq()->prev_steal_time; - - st = steal_ticks(steal); - this_rq()->prev_steal_time += st * TICK_NSEC; - - account_steal_time(st); - return st; - } -#endif - return false; -} - -#ifndef CONFIG_VIRT_CPU_ACCOUNTING - -#ifdef CONFIG_IRQ_TIME_ACCOUNTING -/* - * Account a tick to a process and cpustat - * @p: the process that the cpu time gets accounted to - * @user_tick: is the tick from userspace - * @rq: the pointer to rq - * - * Tick demultiplexing follows the order - * - pending hardirq update - * - pending softirq update - * - user_time - * - idle_time - * - system time - * - check for guest_time - * - else account as system_time - * - * Check for hardirq is done both for system and user time as there is - * no timer going off while we are on hardirq and hence we may never get an - * opportunity to update it solely in system time. - * p->stime and friends are only updated on system time and not on irq - * softirq as those do not count in task exec_runtime any more. - */ -static void irqtime_account_process_tick(struct task_struct *p, int user_tick, - struct rq *rq) -{ - cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); - u64 *cpustat = kcpustat_this_cpu->cpustat; - - if (steal_account_process_tick()) - return; - - if (irqtime_account_hi_update()) { - cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; - } else if (irqtime_account_si_update()) { - cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; - } else if (this_cpu_ksoftirqd() == p) { - /* - * ksoftirqd time do not get accounted in cpu_softirq_time. - * So, we have to handle it separately here. - * Also, p->stime needs to be updated for ksoftirqd. - */ - __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, - CPUTIME_SOFTIRQ); - } else if (user_tick) { - account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); - } else if (p == rq->idle) { - account_idle_time(cputime_one_jiffy); - } else if (p->flags & PF_VCPU) { /* System time or guest time */ - account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); - } else { - __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, - CPUTIME_SYSTEM); - } -} - -static void irqtime_account_idle_ticks(int ticks) -{ - int i; - struct rq *rq = this_rq(); - - for (i = 0; i < ticks; i++) - irqtime_account_process_tick(current, 0, rq); -} -#else /* CONFIG_IRQ_TIME_ACCOUNTING */ -static void irqtime_account_idle_ticks(int ticks) {} -static void irqtime_account_process_tick(struct task_struct *p, int user_tick, - struct rq *rq) {} -#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ - -/* - * Account a single tick of cpu time. - * @p: the process that the cpu time gets accounted to - * @user_tick: indicates if the tick is a user or a system tick - */ -void account_process_tick(struct task_struct *p, int user_tick) -{ - cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); - struct rq *rq = this_rq(); - - if (sched_clock_irqtime) { - irqtime_account_process_tick(p, user_tick, rq); - return; - } - - if (steal_account_process_tick()) - return; - - if (user_tick) - account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); - else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) - account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, - one_jiffy_scaled); - else - account_idle_time(cputime_one_jiffy); -} - -/* - * Account multiple ticks of steal time. - * @p: the process from which the cpu time has been stolen - * @ticks: number of stolen ticks - */ -void account_steal_ticks(unsigned long ticks) -{ - account_steal_time(jiffies_to_cputime(ticks)); -} - -/* - * Account multiple ticks of idle time. - * @ticks: number of stolen ticks - */ -void account_idle_ticks(unsigned long ticks) -{ - - if (sched_clock_irqtime) { - irqtime_account_idle_ticks(ticks); - return; - } - - account_idle_time(jiffies_to_cputime(ticks)); -} - -#endif - -/* - * Use precise platform statistics if available: - */ -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) -{ - *ut = p->utime; - *st = p->stime; -} - -void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) -{ - struct task_cputime cputime; - - thread_group_cputime(p, &cputime); - - *ut = cputime.utime; - *st = cputime.stime; -} -#else - -#ifndef nsecs_to_cputime -# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) -#endif - -static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) -{ - u64 temp = (__force u64) rtime; - - temp *= (__force u64) utime; - - if (sizeof(cputime_t) == 4) - temp = div_u64(temp, (__force u32) total); - else - temp = div64_u64(temp, (__force u64) total); - - return (__force cputime_t) temp; -} - -void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) -{ - cputime_t rtime, utime = p->utime, total = utime + p->stime; - - /* - * Use CFS's precise accounting: - */ - rtime = nsecs_to_cputime(p->se.sum_exec_runtime); - - if (total) - utime = scale_utime(utime, rtime, total); - else - utime = rtime; - - /* - * Compare with previous values, to keep monotonicity: - */ - p->prev_utime = max(p->prev_utime, utime); - p->prev_stime = max(p->prev_stime, rtime - p->prev_utime); - - *ut = p->prev_utime; - *st = p->prev_stime; -} - -/* - * Must be called with siglock held. - */ -void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) -{ - struct signal_struct *sig = p->signal; - struct task_cputime cputime; - cputime_t rtime, utime, total; - - thread_group_cputime(p, &cputime); - - total = cputime.utime + cputime.stime; - rtime = nsecs_to_cputime(cputime.sum_exec_runtime); - - if (total) - utime = scale_utime(cputime.utime, rtime, total); - else - utime = rtime; - - sig->prev_utime = max(sig->prev_utime, utime); - sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime); - - *ut = sig->prev_utime; - *st = sig->prev_stime; -} -#endif - /* * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. @@ -3367,6 +2776,40 @@ pick_next_task(struct rq *rq) /* * __schedule() is the main scheduler function. + * + * The main means of driving the scheduler and thus entering this function are: + * + * 1. Explicit blocking: mutex, semaphore, waitqueue, etc. + * + * 2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return + * paths. For example, see arch/x86/entry_64.S. + * + * To drive preemption between tasks, the scheduler sets the flag in timer + * interrupt handler scheduler_tick(). + * + * 3. Wakeups don't really cause entry into schedule(). They add a + * task to the run-queue and that's it. + * + * Now, if the new task added to the run-queue preempts the current + * task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets + * called on the nearest possible occasion: + * + * - If the kernel is preemptible (CONFIG_PREEMPT=y): + * + * - in syscall or exception context, at the next outmost + * preempt_enable(). (this might be as soon as the wake_up()'s + * spin_unlock()!) + * + * - in IRQ context, return from interrupt-handler to + * preemptible context + * + * - If the kernel is not preemptible (CONFIG_PREEMPT is not set) + * then at the next: + * + * - cond_resched() call + * - explicit schedule() call + * - return from syscall or exception to user-space + * - return from interrupt-handler to user-space */ static void __sched __schedule(void) { @@ -3468,6 +2911,21 @@ asmlinkage void __sched schedule(void) } EXPORT_SYMBOL(schedule); +#ifdef CONFIG_RCU_USER_QS +asmlinkage void __sched schedule_user(void) +{ + /* + * If we come here after a random call to set_need_resched(), + * or we have been woken up remotely but the IPI has not yet arrived, + * we haven't yet exited the RCU idle mode. Do it here manually until + * we find a better solution. + */ + rcu_user_exit(); + schedule(); + rcu_user_enter(); +} +#endif + /** * schedule_preempt_disabled - called with preemption disabled * @@ -3569,6 +3027,7 @@ asmlinkage void __sched preempt_schedule_irq(void) /* Catch callers which need to be fixed */ BUG_ON(ti->preempt_count || !irqs_disabled()); + rcu_user_exit(); do { add_preempt_count(PREEMPT_ACTIVE); local_irq_enable(); @@ -4868,13 +4327,6 @@ again: */ if (preempt && rq != p_rq) resched_task(p_rq->curr); - } else { - /* - * We might have set it in task_yield_fair(), but are - * not going to schedule(), so don't want to skip - * the next update. - */ - rq->skip_clock_update = 0; } out: @@ -5416,16 +4868,25 @@ static void sd_free_ctl_entry(struct ctl_table **tablep) *tablep = NULL; } +static int min_load_idx = 0; +static int max_load_idx = CPU_LOAD_IDX_MAX; + static void set_table_entry(struct ctl_table *entry, const char *procname, void *data, int maxlen, - umode_t mode, proc_handler *proc_handler) + umode_t mode, proc_handler *proc_handler, + bool load_idx) { entry->procname = procname; entry->data = data; entry->maxlen = maxlen; entry->mode = mode; entry->proc_handler = proc_handler; + + if (load_idx) { + entry->extra1 = &min_load_idx; + entry->extra2 = &max_load_idx; + } } static struct ctl_table * @@ -5437,30 +4898,30 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd) return NULL; set_table_entry(&table[0], "min_interval", &sd->min_interval, - sizeof(long), 0644, proc_doulongvec_minmax); + sizeof(long), 0644, proc_doulongvec_minmax, false); set_table_entry(&table[1], "max_interval", &sd->max_interval, - sizeof(long), 0644, proc_doulongvec_minmax); + sizeof(long), 0644, proc_doulongvec_minmax, false); set_table_entry(&table[2], "busy_idx", &sd->busy_idx, - sizeof(int), 0644, proc_dointvec_minmax); + sizeof(int), 0644, proc_dointvec_minmax, true); set_table_entry(&table[3], "idle_idx", &sd->idle_idx, - sizeof(int), 0644, proc_dointvec_minmax); + sizeof(int), 0644, proc_dointvec_minmax, true); set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx, - sizeof(int), 0644, proc_dointvec_minmax); + sizeof(int), 0644, proc_dointvec_minmax, true); set_table_entry(&table[5], "wake_idx", &sd->wake_idx, - sizeof(int), 0644, proc_dointvec_minmax); + sizeof(int), 0644, proc_dointvec_minmax, true); set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx, - sizeof(int), 0644, proc_dointvec_minmax); + sizeof(int), 0644, proc_dointvec_minmax, true); set_table_entry(&table[7], "busy_factor", &sd->busy_factor, - sizeof(int), 0644, proc_dointvec_minmax); + sizeof(int), 0644, proc_dointvec_minmax, false); set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct, - sizeof(int), 0644, proc_dointvec_minmax); + sizeof(int), 0644, proc_dointvec_minmax, false); set_table_entry(&table[9], "cache_nice_tries", &sd->cache_nice_tries, - sizeof(int), 0644, proc_dointvec_minmax); + sizeof(int), 0644, proc_dointvec_minmax, false); set_table_entry(&table[10], "flags", &sd->flags, - sizeof(int), 0644, proc_dointvec_minmax); + sizeof(int), 0644, proc_dointvec_minmax, false); set_table_entry(&table[11], "name", sd->name, - CORENAME_MAX_SIZE, 0444, proc_dostring); + CORENAME_MAX_SIZE, 0444, proc_dostring, false); /* &table[12] is terminator */ return table; @@ -5604,7 +5065,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) migrate_tasks(cpu); BUG_ON(rq->nr_running != 1); /* the migration thread */ raw_spin_unlock_irqrestore(&rq->lock, flags); + break; + case CPU_DEAD: calc_load_migrate(rq); break; #endif @@ -6537,7 +6000,6 @@ sd_numa_init(struct sched_domain_topology_level *tl, int cpu) | 0*SD_BALANCE_FORK | 0*SD_BALANCE_WAKE | 0*SD_WAKE_AFFINE - | 0*SD_PREFER_LOCAL | 0*SD_SHARE_CPUPOWER | 0*SD_SHARE_PKG_RESOURCES | 1*SD_SERIALIZE @@ -8335,6 +7797,8 @@ struct cgroup_subsys cpu_cgroup_subsys = { * (balbir@in.ibm.com). */ +struct cpuacct root_cpuacct; + /* create a new cpu accounting group */ static struct cgroup_subsys_state *cpuacct_create(struct cgroup *cgrp) { diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c new file mode 100644 index 00000000000..81b763ba58a --- /dev/null +++ b/kernel/sched/cputime.c @@ -0,0 +1,530 @@ +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/tsacct_kern.h> +#include <linux/kernel_stat.h> +#include <linux/static_key.h> +#include "sched.h" + + +#ifdef CONFIG_IRQ_TIME_ACCOUNTING + +/* + * There are no locks covering percpu hardirq/softirq time. + * They are only modified in vtime_account, on corresponding CPU + * with interrupts disabled. So, writes are safe. + * They are read and saved off onto struct rq in update_rq_clock(). + * This may result in other CPU reading this CPU's irq time and can + * race with irq/vtime_account on this CPU. We would either get old + * or new value with a side effect of accounting a slice of irq time to wrong + * task when irq is in progress while we read rq->clock. That is a worthy + * compromise in place of having locks on each irq in account_system_time. + */ +DEFINE_PER_CPU(u64, cpu_hardirq_time); +DEFINE_PER_CPU(u64, cpu_softirq_time); + +static DEFINE_PER_CPU(u64, irq_start_time); +static int sched_clock_irqtime; + +void enable_sched_clock_irqtime(void) +{ + sched_clock_irqtime = 1; +} + +void disable_sched_clock_irqtime(void) +{ + sched_clock_irqtime = 0; +} + +#ifndef CONFIG_64BIT +DEFINE_PER_CPU(seqcount_t, irq_time_seq); +#endif /* CONFIG_64BIT */ + +/* + * Called before incrementing preempt_count on {soft,}irq_enter + * and before decrementing preempt_count on {soft,}irq_exit. + */ +void vtime_account(struct task_struct *curr) +{ + unsigned long flags; + s64 delta; + int cpu; + + if (!sched_clock_irqtime) + return; + + local_irq_save(flags); + + cpu = smp_processor_id(); + delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); + __this_cpu_add(irq_start_time, delta); + + irq_time_write_begin(); + /* + * We do not account for softirq time from ksoftirqd here. + * We want to continue accounting softirq time to ksoftirqd thread + * in that case, so as not to confuse scheduler with a special task + * that do not consume any time, but still wants to run. + */ + if (hardirq_count()) + __this_cpu_add(cpu_hardirq_time, delta); + else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) + __this_cpu_add(cpu_softirq_time, delta); + + irq_time_write_end(); + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(vtime_account); + +static int irqtime_account_hi_update(void) +{ + u64 *cpustat = kcpustat_this_cpu->cpustat; + unsigned long flags; + u64 latest_ns; + int ret = 0; + + local_irq_save(flags); + latest_ns = this_cpu_read(cpu_hardirq_time); + if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ]) + ret = 1; + local_irq_restore(flags); + return ret; +} + +static int irqtime_account_si_update(void) +{ + u64 *cpustat = kcpustat_this_cpu->cpustat; + unsigned long flags; + u64 latest_ns; + int ret = 0; + + local_irq_save(flags); + latest_ns = this_cpu_read(cpu_softirq_time); + if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ]) + ret = 1; + local_irq_restore(flags); + return ret; +} + +#else /* CONFIG_IRQ_TIME_ACCOUNTING */ + +#define sched_clock_irqtime (0) + +#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ + +static inline void task_group_account_field(struct task_struct *p, int index, + u64 tmp) +{ +#ifdef CONFIG_CGROUP_CPUACCT + struct kernel_cpustat *kcpustat; + struct cpuacct *ca; +#endif + /* + * Since all updates are sure to touch the root cgroup, we + * get ourselves ahead and touch it first. If the root cgroup + * is the only cgroup, then nothing else should be necessary. + * + */ + __get_cpu_var(kernel_cpustat).cpustat[index] += tmp; + +#ifdef CONFIG_CGROUP_CPUACCT + if (unlikely(!cpuacct_subsys.active)) + return; + + rcu_read_lock(); + ca = task_ca(p); + while (ca && (ca != &root_cpuacct)) { + kcpustat = this_cpu_ptr(ca->cpustat); + kcpustat->cpustat[index] += tmp; + ca = parent_ca(ca); + } + rcu_read_unlock(); +#endif +} + +/* + * Account user cpu time to a process. + * @p: the process that the cpu time gets accounted to + * @cputime: the cpu time spent in user space since the last update + * @cputime_scaled: cputime scaled by cpu frequency + */ +void account_user_time(struct task_struct *p, cputime_t cputime, + cputime_t cputime_scaled) +{ + int index; + + /* Add user time to process. */ + p->utime += cputime; + p->utimescaled += cputime_scaled; + account_group_user_time(p, cputime); + + index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; + + /* Add user time to cpustat. */ + task_group_account_field(p, index, (__force u64) cputime); + + /* Account for user time used */ + acct_update_integrals(p); +} + +/* + * Account guest cpu time to a process. + * @p: the process that the cpu time gets accounted to + * @cputime: the cpu time spent in virtual machine since the last update + * @cputime_scaled: cputime scaled by cpu frequency + */ +static void account_guest_time(struct task_struct *p, cputime_t cputime, + cputime_t cputime_scaled) +{ + u64 *cpustat = kcpustat_this_cpu->cpustat; + + /* Add guest time to process. */ + p->utime += cputime; + p->utimescaled += cputime_scaled; + account_group_user_time(p, cputime); + p->gtime += cputime; + + /* Add guest time to cpustat. */ + if (TASK_NICE(p) > 0) { + cpustat[CPUTIME_NICE] += (__force u64) cputime; + cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime; + } else { + cpustat[CPUTIME_USER] += (__force u64) cputime; + cpustat[CPUTIME_GUEST] += (__force u64) cputime; + } +} + +/* + * Account system cpu time to a process and desired cpustat field + * @p: the process that the cpu time gets accounted to + * @cputime: the cpu time spent in kernel space since the last update + * @cputime_scaled: cputime scaled by cpu frequency + * @target_cputime64: pointer to cpustat field that has to be updated + */ +static inline +void __account_system_time(struct task_struct *p, cputime_t cputime, + cputime_t cputime_scaled, int index) +{ + /* Add system time to process. */ + p->stime += cputime; + p->stimescaled += cputime_scaled; + account_group_system_time(p, cputime); + + /* Add system time to cpustat. */ + task_group_account_field(p, index, (__force u64) cputime); + + /* Account for system time used */ + acct_update_integrals(p); +} + +/* + * Account system cpu time to a process. + * @p: the process that the cpu time gets accounted to + * @hardirq_offset: the offset to subtract from hardirq_count() + * @cputime: the cpu time spent in kernel space since the last update + * @cputime_scaled: cputime scaled by cpu frequency + */ +void account_system_time(struct task_struct *p, int hardirq_offset, + cputime_t cputime, cputime_t cputime_scaled) +{ + int index; + + if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { + account_guest_time(p, cputime, cputime_scaled); + return; + } + + if (hardirq_count() - hardirq_offset) + index = CPUTIME_IRQ; + else if (in_serving_softirq()) + index = CPUTIME_SOFTIRQ; + else + index = CPUTIME_SYSTEM; + + __account_system_time(p, cputime, cputime_scaled, index); +} + +/* + * Account for involuntary wait time. + * @cputime: the cpu time spent in involuntary wait + */ +void account_steal_time(cputime_t cputime) +{ + u64 *cpustat = kcpustat_this_cpu->cpustat; + + cpustat[CPUTIME_STEAL] += (__force u64) cputime; +} + +/* + * Account for idle time. + * @cputime: the cpu time spent in idle wait + */ +void account_idle_time(cputime_t cputime) +{ + u64 *cpustat = kcpustat_this_cpu->cpustat; + struct rq *rq = this_rq(); + + if (atomic_read(&rq->nr_iowait) > 0) + cpustat[CPUTIME_IOWAIT] += (__force u64) cputime; + else + cpustat[CPUTIME_IDLE] += (__force u64) cputime; +} + +static __always_inline bool steal_account_process_tick(void) +{ +#ifdef CONFIG_PARAVIRT + if (static_key_false(¶virt_steal_enabled)) { + u64 steal, st = 0; + + steal = paravirt_steal_clock(smp_processor_id()); + steal -= this_rq()->prev_steal_time; + + st = steal_ticks(steal); + this_rq()->prev_steal_time += st * TICK_NSEC; + + account_steal_time(st); + return st; + } +#endif + return false; +} + +#ifndef CONFIG_VIRT_CPU_ACCOUNTING + +#ifdef CONFIG_IRQ_TIME_ACCOUNTING +/* + * Account a tick to a process and cpustat + * @p: the process that the cpu time gets accounted to + * @user_tick: is the tick from userspace + * @rq: the pointer to rq + * + * Tick demultiplexing follows the order + * - pending hardirq update + * - pending softirq update + * - user_time + * - idle_time + * - system time + * - check for guest_time + * - else account as system_time + * + * Check for hardirq is done both for system and user time as there is + * no timer going off while we are on hardirq and hence we may never get an + * opportunity to update it solely in system time. + * p->stime and friends are only updated on system time and not on irq + * softirq as those do not count in task exec_runtime any more. + */ +static void irqtime_account_process_tick(struct task_struct *p, int user_tick, + struct rq *rq) +{ + cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); + u64 *cpustat = kcpustat_this_cpu->cpustat; + + if (steal_account_process_tick()) + return; + + if (irqtime_account_hi_update()) { + cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; + } else if (irqtime_account_si_update()) { + cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; + } else if (this_cpu_ksoftirqd() == p) { + /* + * ksoftirqd time do not get accounted in cpu_softirq_time. + * So, we have to handle it separately here. + * Also, p->stime needs to be updated for ksoftirqd. + */ + __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, + CPUTIME_SOFTIRQ); + } else if (user_tick) { + account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); + } else if (p == rq->idle) { + account_idle_time(cputime_one_jiffy); + } else if (p->flags & PF_VCPU) { /* System time or guest time */ + account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); + } else { + __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, + CPUTIME_SYSTEM); + } +} + +static void irqtime_account_idle_ticks(int ticks) +{ + int i; + struct rq *rq = this_rq(); + + for (i = 0; i < ticks; i++) + irqtime_account_process_tick(current, 0, rq); +} +#else /* CONFIG_IRQ_TIME_ACCOUNTING */ +static void irqtime_account_idle_ticks(int ticks) {} +static void irqtime_account_process_tick(struct task_struct *p, int user_tick, + struct rq *rq) {} +#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ + +/* + * Account a single tick of cpu time. + * @p: the process that the cpu time gets accounted to + * @user_tick: indicates if the tick is a user or a system tick + */ +void account_process_tick(struct task_struct *p, int user_tick) +{ + cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); + struct rq *rq = this_rq(); + + if (sched_clock_irqtime) { + irqtime_account_process_tick(p, user_tick, rq); + return; + } + + if (steal_account_process_tick()) + return; + + if (user_tick) + account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); + else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) + account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, + one_jiffy_scaled); + else + account_idle_time(cputime_one_jiffy); +} + +/* + * Account multiple ticks of steal time. + * @p: the process from which the cpu time has been stolen + * @ticks: number of stolen ticks + */ +void account_steal_ticks(unsigned long ticks) +{ + account_steal_time(jiffies_to_cputime(ticks)); +} + +/* + * Account multiple ticks of idle time. + * @ticks: number of stolen ticks + */ +void account_idle_ticks(unsigned long ticks) +{ + + if (sched_clock_irqtime) { + irqtime_account_idle_ticks(ticks); + return; + } + + account_idle_time(jiffies_to_cputime(ticks)); +} + +#endif + +/* + * Use precise platform statistics if available: + */ +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) +{ + *ut = p->utime; + *st = p->stime; +} + +void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) +{ + struct task_cputime cputime; + + thread_group_cputime(p, &cputime); + + *ut = cputime.utime; + *st = cputime.stime; +} + +/* + * Archs that account the whole time spent in the idle task + * (outside irq) as idle time can rely on this and just implement + * vtime_account_system() and vtime_account_idle(). Archs that + * have other meaning of the idle time (s390 only includes the + * time spent by the CPU when it's in low power mode) must override + * vtime_account(). + */ +#ifndef __ARCH_HAS_VTIME_ACCOUNT +void vtime_account(struct task_struct *tsk) +{ + unsigned long flags; + + local_irq_save(flags); + + if (in_interrupt() || !is_idle_task(tsk)) + vtime_account_system(tsk); + else + vtime_account_idle(tsk); + + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(vtime_account); +#endif /* __ARCH_HAS_VTIME_ACCOUNT */ + +#else + +#ifndef nsecs_to_cputime +# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) +#endif + +static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) +{ + u64 temp = (__force u64) rtime; + + temp *= (__force u64) utime; + + if (sizeof(cputime_t) == 4) + temp = div_u64(temp, (__force u32) total); + else + temp = div64_u64(temp, (__force u64) total); + + return (__force cputime_t) temp; +} + +void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) +{ + cputime_t rtime, utime = p->utime, total = utime + p->stime; + + /* + * Use CFS's precise accounting: + */ + rtime = nsecs_to_cputime(p->se.sum_exec_runtime); + + if (total) + utime = scale_utime(utime, rtime, total); + else + utime = rtime; + + /* + * Compare with previous values, to keep monotonicity: + */ + p->prev_utime = max(p->prev_utime, utime); + p->prev_stime = max(p->prev_stime, rtime - p->prev_utime); + + *ut = p->prev_utime; + *st = p->prev_stime; +} + +/* + * Must be called with siglock held. + */ +void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) +{ + struct signal_struct *sig = p->signal; + struct task_cputime cputime; + cputime_t rtime, utime, total; + + thread_group_cputime(p, &cputime); + + total = cputime.utime + cputime.stime; + rtime = nsecs_to_cputime(cputime.sum_exec_runtime); + + if (total) + utime = scale_utime(cputime.utime, rtime, total); + else + utime = rtime; + + sig->prev_utime = max(sig->prev_utime, utime); + sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime); + + *ut = sig->prev_utime; + *st = sig->prev_stime; +} +#endif diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 96e2b18b628..6b800a14b99 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -597,7 +597,7 @@ calc_delta_fair(unsigned long delta, struct sched_entity *se) /* * The idea is to set a period in which each task runs once. * - * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch + * When there are too many tasks (sched_nr_latency) we have to stretch * this period because otherwise the slices get too small. * * p = (nr <= nl) ? l : l*nr/nl @@ -2700,7 +2700,6 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) int prev_cpu = task_cpu(p); int new_cpu = cpu; int want_affine = 0; - int want_sd = 1; int sync = wake_flags & WF_SYNC; if (p->nr_cpus_allowed == 1) @@ -2718,48 +2717,21 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) continue; /* - * If power savings logic is enabled for a domain, see if we - * are not overloaded, if so, don't balance wider. - */ - if (tmp->flags & (SD_PREFER_LOCAL)) { - unsigned long power = 0; - unsigned long nr_running = 0; - unsigned long capacity; - int i; - - for_each_cpu(i, sched_domain_span(tmp)) { - power += power_of(i); - nr_running += cpu_rq(i)->cfs.nr_running; - } - - capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE); - - if (nr_running < capacity) - want_sd = 0; - } - - /* * If both cpu and prev_cpu are part of this domain, * cpu is a valid SD_WAKE_AFFINE target. */ if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { affine_sd = tmp; - want_affine = 0; - } - - if (!want_sd && !want_affine) break; + } - if (!(tmp->flags & sd_flag)) - continue; - - if (want_sd) + if (tmp->flags & sd_flag) sd = tmp; } if (affine_sd) { - if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) + if (cpu != prev_cpu && wake_affine(affine_sd, p, sync)) prev_cpu = cpu; new_cpu = select_idle_sibling(p, prev_cpu); @@ -4295,7 +4267,7 @@ redo: goto out_balanced; } - BUG_ON(busiest == this_rq); + BUG_ON(busiest == env.dst_rq); schedstat_add(sd, lb_imbalance[idle], env.imbalance); @@ -4316,7 +4288,7 @@ redo: update_h_load(env.src_cpu); more_balance: local_irq_save(flags); - double_rq_lock(this_rq, busiest); + double_rq_lock(env.dst_rq, busiest); /* * cur_ld_moved - load moved in current iteration @@ -4324,7 +4296,7 @@ more_balance: */ cur_ld_moved = move_tasks(&env); ld_moved += cur_ld_moved; - double_rq_unlock(this_rq, busiest); + double_rq_unlock(env.dst_rq, busiest); local_irq_restore(flags); if (env.flags & LBF_NEED_BREAK) { @@ -4360,8 +4332,7 @@ more_balance: if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0 && lb_iterations++ < max_lb_iterations) { - this_rq = cpu_rq(env.new_dst_cpu); - env.dst_rq = this_rq; + env.dst_rq = cpu_rq(env.new_dst_cpu); env.dst_cpu = env.new_dst_cpu; env.flags &= ~LBF_SOME_PINNED; env.loop = 0; @@ -4646,7 +4617,7 @@ static void nohz_balancer_kick(int cpu) return; } -static inline void clear_nohz_tick_stopped(int cpu) +static inline void nohz_balance_exit_idle(int cpu) { if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); @@ -4686,28 +4657,23 @@ void set_cpu_sd_state_idle(void) } /* - * This routine will record that this cpu is going idle with tick stopped. + * This routine will record that the cpu is going idle with tick stopped. * This info will be used in performing idle load balancing in the future. */ -void select_nohz_load_balancer(int stop_tick) +void nohz_balance_enter_idle(int cpu) { - int cpu = smp_processor_id(); - /* * If this cpu is going down, then nothing needs to be done. */ if (!cpu_active(cpu)) return; - if (stop_tick) { - if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) - return; + if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) + return; - cpumask_set_cpu(cpu, nohz.idle_cpus_mask); - atomic_inc(&nohz.nr_cpus); - set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); - } - return; + cpumask_set_cpu(cpu, nohz.idle_cpus_mask); + atomic_inc(&nohz.nr_cpus); + set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); } static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, @@ -4715,7 +4681,7 @@ static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, { switch (action & ~CPU_TASKS_FROZEN) { case CPU_DYING: - clear_nohz_tick_stopped(smp_processor_id()); + nohz_balance_exit_idle(smp_processor_id()); return NOTIFY_OK; default: return NOTIFY_DONE; @@ -4837,14 +4803,15 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) if (need_resched()) break; - raw_spin_lock_irq(&this_rq->lock); - update_rq_clock(this_rq); - update_idle_cpu_load(this_rq); - raw_spin_unlock_irq(&this_rq->lock); + rq = cpu_rq(balance_cpu); + + raw_spin_lock_irq(&rq->lock); + update_rq_clock(rq); + update_idle_cpu_load(rq); + raw_spin_unlock_irq(&rq->lock); rebalance_domains(balance_cpu, CPU_IDLE); - rq = cpu_rq(balance_cpu); if (time_after(this_rq->next_balance, rq->next_balance)) this_rq->next_balance = rq->next_balance; } @@ -4875,7 +4842,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu) * busy tick after returning from idle, we will update the busy stats. */ set_cpu_sd_state_busy(); - clear_nohz_tick_stopped(cpu); + nohz_balance_exit_idle(cpu); /* * None are in tickless mode and hence no need for NOHZ idle load diff --git a/kernel/sched/features.h b/kernel/sched/features.h index de00a486c5c..eebefcad702 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -12,14 +12,6 @@ SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true) SCHED_FEAT(START_DEBIT, true) /* - * Based on load and program behaviour, see if it makes sense to place - * a newly woken task on the same cpu as the task that woke it -- - * improve cache locality. Typically used with SYNC wakeups as - * generated by pipes and the like, see also SYNC_WAKEUPS. - */ -SCHED_FEAT(AFFINE_WAKEUPS, true) - -/* * Prefer to schedule the task we woke last (assuming it failed * wakeup-preemption), since its likely going to consume data we * touched, increases cache locality. @@ -42,7 +34,7 @@ SCHED_FEAT(CACHE_HOT_BUDDY, true) /* * Use arch dependent cpu power functions */ -SCHED_FEAT(ARCH_POWER, false) +SCHED_FEAT(ARCH_POWER, true) SCHED_FEAT(HRTICK, false) SCHED_FEAT(DOUBLE_TICK, false) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index e0b7ba9c040..418feb01344 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1632,11 +1632,6 @@ static int push_rt_task(struct rq *rq) if (!next_task) return 0; -#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW - if (unlikely(task_running(rq, next_task))) - return 0; -#endif - retry: if (unlikely(next_task == rq->curr)) { WARN_ON(1); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0848fa36c38..7a7db09cfab 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -737,11 +737,7 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) */ next->on_cpu = 1; #endif -#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW - raw_spin_unlock_irq(&rq->lock); -#else raw_spin_unlock(&rq->lock); -#endif } static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) @@ -755,9 +751,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) smp_wmb(); prev->on_cpu = 0; #endif -#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW local_irq_enable(); -#endif } #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ @@ -891,6 +885,9 @@ struct cpuacct { struct kernel_cpustat __percpu *cpustat; }; +extern struct cgroup_subsys cpuacct_subsys; +extern struct cpuacct root_cpuacct; + /* return cpu accounting group corresponding to this container */ static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp) { @@ -917,6 +914,16 @@ extern void cpuacct_charge(struct task_struct *tsk, u64 cputime); static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} #endif +#ifdef CONFIG_PARAVIRT +static inline u64 steal_ticks(u64 steal) +{ + if (unlikely(steal > NSEC_PER_SEC)) + return div_u64(steal, TICK_NSEC); + + return __iter_div_u64_rem(steal, TICK_NSEC, &steal); +} +#endif + static inline void inc_nr_running(struct rq *rq) { rq->nr_running++; @@ -1156,3 +1163,53 @@ enum rq_nohz_flag_bits { #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) #endif + +#ifdef CONFIG_IRQ_TIME_ACCOUNTING + +DECLARE_PER_CPU(u64, cpu_hardirq_time); +DECLARE_PER_CPU(u64, cpu_softirq_time); + +#ifndef CONFIG_64BIT +DECLARE_PER_CPU(seqcount_t, irq_time_seq); + +static inline void irq_time_write_begin(void) +{ + __this_cpu_inc(irq_time_seq.sequence); + smp_wmb(); +} + +static inline void irq_time_write_end(void) +{ + smp_wmb(); + __this_cpu_inc(irq_time_seq.sequence); +} + +static inline u64 irq_time_read(int cpu) +{ + u64 irq_time; + unsigned seq; + + do { + seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu)); + irq_time = per_cpu(cpu_softirq_time, cpu) + + per_cpu(cpu_hardirq_time, cpu); + } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq)); + + return irq_time; +} +#else /* CONFIG_64BIT */ +static inline void irq_time_write_begin(void) +{ +} + +static inline void irq_time_write_end(void) +{ +} + +static inline u64 irq_time_read(int cpu) +{ + return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); +} +#endif /* CONFIG_64BIT */ +#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ + diff --git a/kernel/signal.c b/kernel/signal.c index be4f856d52f..2c681f11b7d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1971,13 +1971,8 @@ static void ptrace_do_notify(int signr, int exit_code, int why) void ptrace_notify(int exit_code) { BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP); - if (unlikely(current->task_works)) { - if (test_and_clear_ti_thread_flag(current_thread_info(), - TIF_NOTIFY_RESUME)) { - smp_mb__after_clear_bit(); - task_work_run(); - } - } + if (unlikely(current->task_works)) + task_work_run(); spin_lock_irq(¤t->sighand->siglock); ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED); @@ -2198,13 +2193,8 @@ int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct signal_struct *signal = current->signal; int signr; - if (unlikely(current->task_works)) { - if (test_and_clear_ti_thread_flag(current_thread_info(), - TIF_NOTIFY_RESUME)) { - smp_mb__after_clear_bit(); - task_work_run(); - } - } + if (unlikely(current->task_works)) + task_work_run(); if (unlikely(uprobe_deny_signal())) return 0; diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 98f60c5caa1..d6c5fc05424 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -1,14 +1,22 @@ /* * Common SMP CPU bringup/teardown functions */ +#include <linux/cpu.h> #include <linux/err.h> #include <linux/smp.h> #include <linux/init.h> +#include <linux/list.h> +#include <linux/slab.h> #include <linux/sched.h> +#include <linux/export.h> #include <linux/percpu.h> +#include <linux/kthread.h> +#include <linux/smpboot.h> #include "smpboot.h" +#ifdef CONFIG_SMP + #ifdef CONFIG_GENERIC_SMP_IDLE_THREAD /* * For the hotplug case we keep the task structs around and reuse @@ -65,3 +73,228 @@ void __init idle_threads_init(void) } } #endif + +#endif /* #ifdef CONFIG_SMP */ + +static LIST_HEAD(hotplug_threads); +static DEFINE_MUTEX(smpboot_threads_lock); + +struct smpboot_thread_data { + unsigned int cpu; + unsigned int status; + struct smp_hotplug_thread *ht; +}; + +enum { + HP_THREAD_NONE = 0, + HP_THREAD_ACTIVE, + HP_THREAD_PARKED, +}; + +/** + * smpboot_thread_fn - percpu hotplug thread loop function + * @data: thread data pointer + * + * Checks for thread stop and park conditions. Calls the necessary + * setup, cleanup, park and unpark functions for the registered + * thread. + * + * Returns 1 when the thread should exit, 0 otherwise. + */ +static int smpboot_thread_fn(void *data) +{ + struct smpboot_thread_data *td = data; + struct smp_hotplug_thread *ht = td->ht; + + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + preempt_disable(); + if (kthread_should_stop()) { + set_current_state(TASK_RUNNING); + preempt_enable(); + if (ht->cleanup) + ht->cleanup(td->cpu, cpu_online(td->cpu)); + kfree(td); + return 0; + } + + if (kthread_should_park()) { + __set_current_state(TASK_RUNNING); + preempt_enable(); + if (ht->park && td->status == HP_THREAD_ACTIVE) { + BUG_ON(td->cpu != smp_processor_id()); + ht->park(td->cpu); + td->status = HP_THREAD_PARKED; + } + kthread_parkme(); + /* We might have been woken for stop */ + continue; + } + + BUG_ON(td->cpu != smp_processor_id()); + + /* Check for state change setup */ + switch (td->status) { + case HP_THREAD_NONE: + preempt_enable(); + if (ht->setup) + ht->setup(td->cpu); + td->status = HP_THREAD_ACTIVE; + preempt_disable(); + break; + case HP_THREAD_PARKED: + preempt_enable(); + if (ht->unpark) + ht->unpark(td->cpu); + td->status = HP_THREAD_ACTIVE; + preempt_disable(); + break; + } + + if (!ht->thread_should_run(td->cpu)) { + preempt_enable(); + schedule(); + } else { + set_current_state(TASK_RUNNING); + preempt_enable(); + ht->thread_fn(td->cpu); + } + } +} + +static int +__smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu) +{ + struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); + struct smpboot_thread_data *td; + + if (tsk) + return 0; + + td = kzalloc_node(sizeof(*td), GFP_KERNEL, cpu_to_node(cpu)); + if (!td) + return -ENOMEM; + td->cpu = cpu; + td->ht = ht; + + tsk = kthread_create_on_cpu(smpboot_thread_fn, td, cpu, + ht->thread_comm); + if (IS_ERR(tsk)) { + kfree(td); + return PTR_ERR(tsk); + } + + get_task_struct(tsk); + *per_cpu_ptr(ht->store, cpu) = tsk; + return 0; +} + +int smpboot_create_threads(unsigned int cpu) +{ + struct smp_hotplug_thread *cur; + int ret = 0; + + mutex_lock(&smpboot_threads_lock); + list_for_each_entry(cur, &hotplug_threads, list) { + ret = __smpboot_create_thread(cur, cpu); + if (ret) + break; + } + mutex_unlock(&smpboot_threads_lock); + return ret; +} + +static void smpboot_unpark_thread(struct smp_hotplug_thread *ht, unsigned int cpu) +{ + struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); + + kthread_unpark(tsk); +} + +void smpboot_unpark_threads(unsigned int cpu) +{ + struct smp_hotplug_thread *cur; + + mutex_lock(&smpboot_threads_lock); + list_for_each_entry(cur, &hotplug_threads, list) + smpboot_unpark_thread(cur, cpu); + mutex_unlock(&smpboot_threads_lock); +} + +static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu) +{ + struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); + + if (tsk) + kthread_park(tsk); +} + +void smpboot_park_threads(unsigned int cpu) +{ + struct smp_hotplug_thread *cur; + + mutex_lock(&smpboot_threads_lock); + list_for_each_entry_reverse(cur, &hotplug_threads, list) + smpboot_park_thread(cur, cpu); + mutex_unlock(&smpboot_threads_lock); +} + +static void smpboot_destroy_threads(struct smp_hotplug_thread *ht) +{ + unsigned int cpu; + + /* We need to destroy also the parked threads of offline cpus */ + for_each_possible_cpu(cpu) { + struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); + + if (tsk) { + kthread_stop(tsk); + put_task_struct(tsk); + *per_cpu_ptr(ht->store, cpu) = NULL; + } + } +} + +/** + * smpboot_register_percpu_thread - Register a per_cpu thread related to hotplug + * @plug_thread: Hotplug thread descriptor + * + * Creates and starts the threads on all online cpus. + */ +int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) +{ + unsigned int cpu; + int ret = 0; + + mutex_lock(&smpboot_threads_lock); + for_each_online_cpu(cpu) { + ret = __smpboot_create_thread(plug_thread, cpu); + if (ret) { + smpboot_destroy_threads(plug_thread); + goto out; + } + smpboot_unpark_thread(plug_thread, cpu); + } + list_add(&plug_thread->list, &hotplug_threads); +out: + mutex_unlock(&smpboot_threads_lock); + return ret; +} +EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread); + +/** + * smpboot_unregister_percpu_thread - Unregister a per_cpu thread related to hotplug + * @plug_thread: Hotplug thread descriptor + * + * Stops all threads on all possible cpus. + */ +void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread) +{ + get_online_cpus(); + mutex_lock(&smpboot_threads_lock); + list_del(&plug_thread->list); + smpboot_destroy_threads(plug_thread); + mutex_unlock(&smpboot_threads_lock); + put_online_cpus(); +} +EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread); diff --git a/kernel/smpboot.h b/kernel/smpboot.h index 6ef9433e1c7..72415a0eb95 100644 --- a/kernel/smpboot.h +++ b/kernel/smpboot.h @@ -13,4 +13,8 @@ static inline void idle_thread_set_boot_cpu(void) { } static inline void idle_threads_init(void) { } #endif +int smpboot_create_threads(unsigned int cpu); +void smpboot_park_threads(unsigned int cpu); +void smpboot_unpark_threads(unsigned int cpu); + #endif diff --git a/kernel/softirq.c b/kernel/softirq.c index b73e681df09..cc96bdc0c2c 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -23,6 +23,7 @@ #include <linux/rcupdate.h> #include <linux/ftrace.h> #include <linux/smp.h> +#include <linux/smpboot.h> #include <linux/tick.h> #define CREATE_TRACE_POINTS @@ -220,7 +221,7 @@ asmlinkage void __do_softirq(void) current->flags &= ~PF_MEMALLOC; pending = local_softirq_pending(); - account_system_vtime(current); + vtime_account(current); __local_bh_disable((unsigned long)__builtin_return_address(0), SOFTIRQ_OFFSET); @@ -271,7 +272,7 @@ restart: lockdep_softirq_exit(); - account_system_vtime(current); + vtime_account(current); __local_bh_enable(SOFTIRQ_OFFSET); tsk_restore_flags(current, old_flags, PF_MEMALLOC); } @@ -340,7 +341,7 @@ static inline void invoke_softirq(void) */ void irq_exit(void) { - account_system_vtime(current); + vtime_account(current); trace_hardirq_exit(); sub_preempt_count(IRQ_EXIT_OFFSET); if (!in_interrupt() && local_softirq_pending()) @@ -742,49 +743,22 @@ void __init softirq_init(void) open_softirq(HI_SOFTIRQ, tasklet_hi_action); } -static int run_ksoftirqd(void * __bind_cpu) +static int ksoftirqd_should_run(unsigned int cpu) { - set_current_state(TASK_INTERRUPTIBLE); - - while (!kthread_should_stop()) { - preempt_disable(); - if (!local_softirq_pending()) { - schedule_preempt_disabled(); - } - - __set_current_state(TASK_RUNNING); - - while (local_softirq_pending()) { - /* Preempt disable stops cpu going offline. - If already offline, we'll be on wrong CPU: - don't process */ - if (cpu_is_offline((long)__bind_cpu)) - goto wait_to_die; - local_irq_disable(); - if (local_softirq_pending()) - __do_softirq(); - local_irq_enable(); - sched_preempt_enable_no_resched(); - cond_resched(); - preempt_disable(); - rcu_note_context_switch((long)__bind_cpu); - } - preempt_enable(); - set_current_state(TASK_INTERRUPTIBLE); - } - __set_current_state(TASK_RUNNING); - return 0; + return local_softirq_pending(); +} -wait_to_die: - preempt_enable(); - /* Wait for kthread_stop */ - set_current_state(TASK_INTERRUPTIBLE); - while (!kthread_should_stop()) { - schedule(); - set_current_state(TASK_INTERRUPTIBLE); +static void run_ksoftirqd(unsigned int cpu) +{ + local_irq_disable(); + if (local_softirq_pending()) { + __do_softirq(); + rcu_note_context_switch(cpu); + local_irq_enable(); + cond_resched(); + return; } - __set_current_state(TASK_RUNNING); - return 0; + local_irq_enable(); } #ifdef CONFIG_HOTPLUG_CPU @@ -850,50 +824,14 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { - int hotcpu = (unsigned long)hcpu; - struct task_struct *p; - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - p = kthread_create_on_node(run_ksoftirqd, - hcpu, - cpu_to_node(hotcpu), - "ksoftirqd/%d", hotcpu); - if (IS_ERR(p)) { - printk("ksoftirqd for %i failed\n", hotcpu); - return notifier_from_errno(PTR_ERR(p)); - } - kthread_bind(p, hotcpu); - per_cpu(ksoftirqd, hotcpu) = p; - break; - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - wake_up_process(per_cpu(ksoftirqd, hotcpu)); - break; #ifdef CONFIG_HOTPLUG_CPU - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - if (!per_cpu(ksoftirqd, hotcpu)) - break; - /* Unbind so it can run. Fall thru. */ - kthread_bind(per_cpu(ksoftirqd, hotcpu), - cpumask_any(cpu_online_mask)); case CPU_DEAD: - case CPU_DEAD_FROZEN: { - static const struct sched_param param = { - .sched_priority = MAX_RT_PRIO-1 - }; - - p = per_cpu(ksoftirqd, hotcpu); - per_cpu(ksoftirqd, hotcpu) = NULL; - sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); - kthread_stop(p); - takeover_tasklets(hotcpu); + case CPU_DEAD_FROZEN: + takeover_tasklets((unsigned long)hcpu); break; - } #endif /* CONFIG_HOTPLUG_CPU */ - } + } return NOTIFY_OK; } @@ -901,14 +839,19 @@ static struct notifier_block __cpuinitdata cpu_nfb = { .notifier_call = cpu_callback }; +static struct smp_hotplug_thread softirq_threads = { + .store = &ksoftirqd, + .thread_should_run = ksoftirqd_should_run, + .thread_fn = run_ksoftirqd, + .thread_comm = "ksoftirqd/%u", +}; + static __init int spawn_ksoftirqd(void) { - void *cpu = (void *)(long)smp_processor_id(); - int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); - - BUG_ON(err != NOTIFY_OK); - cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); register_cpu_notifier(&cpu_nfb); + + BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); + return 0; } early_initcall(spawn_ksoftirqd); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 87174ef5916..81c7b1a1a30 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -307,7 +307,7 @@ static struct ctl_table kern_table[] = { .extra2 = &max_sched_tunable_scaling, }, { - .procname = "sched_migration_cost", + .procname = "sched_migration_cost_ns", .data = &sysctl_sched_migration_cost, .maxlen = sizeof(unsigned int), .mode = 0644, @@ -321,14 +321,14 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, { - .procname = "sched_time_avg", + .procname = "sched_time_avg_ms", .data = &sysctl_sched_time_avg, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "sched_shares_window", + .procname = "sched_shares_window_ns", .data = &sysctl_sched_shares_window, .maxlen = sizeof(unsigned int), .mode = 0644, diff --git a/kernel/task_work.c b/kernel/task_work.c index d320d44903b..65bd3c92d6f 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -2,26 +2,20 @@ #include <linux/task_work.h> #include <linux/tracehook.h> +static struct callback_head work_exited; /* all we need is ->next == NULL */ + int -task_work_add(struct task_struct *task, struct callback_head *twork, bool notify) +task_work_add(struct task_struct *task, struct callback_head *work, bool notify) { - struct callback_head *last, *first; - unsigned long flags; + struct callback_head *head; - /* - * Not inserting the new work if the task has already passed - * exit_task_work() is the responisbility of callers. - */ - raw_spin_lock_irqsave(&task->pi_lock, flags); - last = task->task_works; - first = last ? last->next : twork; - twork->next = first; - if (last) - last->next = twork; - task->task_works = twork; - raw_spin_unlock_irqrestore(&task->pi_lock, flags); + do { + head = ACCESS_ONCE(task->task_works); + if (unlikely(head == &work_exited)) + return -ESRCH; + work->next = head; + } while (cmpxchg(&task->task_works, head, work) != head); - /* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */ if (notify) set_notify_resume(task); return 0; @@ -30,52 +24,69 @@ task_work_add(struct task_struct *task, struct callback_head *twork, bool notify struct callback_head * task_work_cancel(struct task_struct *task, task_work_func_t func) { + struct callback_head **pprev = &task->task_works; + struct callback_head *work = NULL; unsigned long flags; - struct callback_head *last, *res = NULL; - + /* + * If cmpxchg() fails we continue without updating pprev. + * Either we raced with task_work_add() which added the + * new entry before this work, we will find it again. Or + * we raced with task_work_run(), *pprev == NULL/exited. + */ raw_spin_lock_irqsave(&task->pi_lock, flags); - last = task->task_works; - if (last) { - struct callback_head *q = last, *p = q->next; - while (1) { - if (p->func == func) { - q->next = p->next; - if (p == last) - task->task_works = q == p ? NULL : q; - res = p; - break; - } - if (p == last) - break; - q = p; - p = q->next; - } + while ((work = ACCESS_ONCE(*pprev))) { + read_barrier_depends(); + if (work->func != func) + pprev = &work->next; + else if (cmpxchg(pprev, work, work->next) == work) + break; } raw_spin_unlock_irqrestore(&task->pi_lock, flags); - return res; + + return work; } void task_work_run(void) { struct task_struct *task = current; - struct callback_head *p, *q; + struct callback_head *work, *head, *next; + + for (;;) { + /* + * work->func() can do task_work_add(), do not set + * work_exited unless the list is empty. + */ + do { + work = ACCESS_ONCE(task->task_works); + head = !work && (task->flags & PF_EXITING) ? + &work_exited : NULL; + } while (cmpxchg(&task->task_works, work, head) != work); - while (1) { - raw_spin_lock_irq(&task->pi_lock); - p = task->task_works; - task->task_works = NULL; - raw_spin_unlock_irq(&task->pi_lock); + if (!work) + break; + /* + * Synchronize with task_work_cancel(). It can't remove + * the first entry == work, cmpxchg(task_works) should + * fail, but it can play with *work and other entries. + */ + raw_spin_unlock_wait(&task->pi_lock); + smp_mb(); - if (unlikely(!p)) - return; + /* Reverse the list to run the works in fifo order */ + head = NULL; + do { + next = work->next; + work->next = head; + head = work; + work = next; + } while (work); - q = p->next; /* head */ - p->next = NULL; /* cut it */ - while (q) { - p = q->next; - q->func(q); - q = p; + work = head; + do { + next = work->next; + work->func(work); + work = next; cond_resched(); - } + } while (work); } } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3a9e5d5c109..f423bdd035c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -372,7 +372,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, * the scheduler tick in nohz_restart_sched_tick. */ if (!ts->tick_stopped) { - select_nohz_load_balancer(1); + nohz_balance_enter_idle(cpu); calc_load_enter_idle(); ts->last_tick = hrtimer_get_expires(&ts->sched_timer); @@ -436,7 +436,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) if (unlikely(local_softirq_pending() && cpu_online(cpu))) { static int ratelimit; - if (ratelimit < 10) { + if (ratelimit < 10 && + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", (unsigned int) local_softirq_pending()); ratelimit++; @@ -569,7 +570,6 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) { /* Update jiffies first */ - select_nohz_load_balancer(0); tick_do_update_jiffies64(now); update_cpu_load_nohz(); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 34e5eac8142..d3b91e75cec 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -303,10 +303,11 @@ void getnstimeofday(struct timespec *ts) seq = read_seqbegin(&tk->lock); ts->tv_sec = tk->xtime_sec; - ts->tv_nsec = timekeeping_get_ns(tk); + nsecs = timekeeping_get_ns(tk); } while (read_seqretry(&tk->lock, seq)); + ts->tv_nsec = 0; timespec_add_ns(ts, nsecs); } EXPORT_SYMBOL(getnstimeofday); @@ -345,6 +346,7 @@ void ktime_get_ts(struct timespec *ts) { struct timekeeper *tk = &timekeeper; struct timespec tomono; + s64 nsec; unsigned int seq; WARN_ON(timekeeping_suspended); @@ -352,13 +354,14 @@ void ktime_get_ts(struct timespec *ts) do { seq = read_seqbegin(&tk->lock); ts->tv_sec = tk->xtime_sec; - ts->tv_nsec = timekeeping_get_ns(tk); + nsec = timekeeping_get_ns(tk); tomono = tk->wall_to_monotonic; } while (read_seqretry(&tk->lock, seq)); - set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec, - ts->tv_nsec + tomono.tv_nsec); + ts->tv_sec += tomono.tv_sec; + ts->tv_nsec = 0; + timespec_add_ns(ts, nsec + tomono.tv_nsec); } EXPORT_SYMBOL_GPL(ktime_get_ts); @@ -1244,6 +1247,7 @@ void get_monotonic_boottime(struct timespec *ts) { struct timekeeper *tk = &timekeeper; struct timespec tomono, sleep; + s64 nsec; unsigned int seq; WARN_ON(timekeeping_suspended); @@ -1251,14 +1255,15 @@ void get_monotonic_boottime(struct timespec *ts) do { seq = read_seqbegin(&tk->lock); ts->tv_sec = tk->xtime_sec; - ts->tv_nsec = timekeeping_get_ns(tk); + nsec = timekeeping_get_ns(tk); tomono = tk->wall_to_monotonic; sleep = tk->total_sleep_time; } while (read_seqretry(&tk->lock, seq)); - set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec, - ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec); + ts->tv_sec += tomono.tv_sec + sleep.tv_sec; + ts->tv_nsec = 0; + timespec_add_ns(ts, nsec + tomono.tv_nsec + sleep.tv_nsec); } EXPORT_SYMBOL_GPL(get_monotonic_boottime); diff --git a/kernel/timer.c b/kernel/timer.c index 8c5e7b908c6..d5de1b2292a 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -92,24 +92,25 @@ static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; /* Functions below help us manage 'deferrable' flag */ static inline unsigned int tbase_get_deferrable(struct tvec_base *base) { - return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG); + return ((unsigned int)(unsigned long)base & TIMER_DEFERRABLE); } -static inline struct tvec_base *tbase_get_base(struct tvec_base *base) +static inline unsigned int tbase_get_irqsafe(struct tvec_base *base) { - return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG)); + return ((unsigned int)(unsigned long)base & TIMER_IRQSAFE); } -static inline void timer_set_deferrable(struct timer_list *timer) +static inline struct tvec_base *tbase_get_base(struct tvec_base *base) { - timer->base = TBASE_MAKE_DEFERRED(timer->base); + return ((struct tvec_base *)((unsigned long)base & ~TIMER_FLAG_MASK)); } static inline void timer_set_base(struct timer_list *timer, struct tvec_base *new_base) { - timer->base = (struct tvec_base *)((unsigned long)(new_base) | - tbase_get_deferrable(timer->base)); + unsigned long flags = (unsigned long)timer->base & TIMER_FLAG_MASK; + + timer->base = (struct tvec_base *)((unsigned long)(new_base) | flags); } static unsigned long round_jiffies_common(unsigned long j, int cpu, @@ -563,16 +564,14 @@ static inline void debug_timer_assert_init(struct timer_list *timer) debug_object_assert_init(timer, &timer_debug_descr); } -static void __init_timer(struct timer_list *timer, - const char *name, - struct lock_class_key *key); +static void do_init_timer(struct timer_list *timer, unsigned int flags, + const char *name, struct lock_class_key *key); -void init_timer_on_stack_key(struct timer_list *timer, - const char *name, - struct lock_class_key *key) +void init_timer_on_stack_key(struct timer_list *timer, unsigned int flags, + const char *name, struct lock_class_key *key) { debug_object_init_on_stack(timer, &timer_debug_descr); - __init_timer(timer, name, key); + do_init_timer(timer, flags, name, key); } EXPORT_SYMBOL_GPL(init_timer_on_stack_key); @@ -613,12 +612,13 @@ static inline void debug_assert_init(struct timer_list *timer) debug_timer_assert_init(timer); } -static void __init_timer(struct timer_list *timer, - const char *name, - struct lock_class_key *key) +static void do_init_timer(struct timer_list *timer, unsigned int flags, + const char *name, struct lock_class_key *key) { + struct tvec_base *base = __raw_get_cpu_var(tvec_bases); + timer->entry.next = NULL; - timer->base = __raw_get_cpu_var(tvec_bases); + timer->base = (void *)((unsigned long)base | flags); timer->slack = -1; #ifdef CONFIG_TIMER_STATS timer->start_site = NULL; @@ -628,22 +628,10 @@ static void __init_timer(struct timer_list *timer, lockdep_init_map(&timer->lockdep_map, name, key, 0); } -void setup_deferrable_timer_on_stack_key(struct timer_list *timer, - const char *name, - struct lock_class_key *key, - void (*function)(unsigned long), - unsigned long data) -{ - timer->function = function; - timer->data = data; - init_timer_on_stack_key(timer, name, key); - timer_set_deferrable(timer); -} -EXPORT_SYMBOL_GPL(setup_deferrable_timer_on_stack_key); - /** * init_timer_key - initialize a timer * @timer: the timer to be initialized + * @flags: timer flags * @name: name of the timer * @key: lockdep class key of the fake lock used for tracking timer * sync lock dependencies @@ -651,24 +639,14 @@ EXPORT_SYMBOL_GPL(setup_deferrable_timer_on_stack_key); * init_timer_key() must be done to a timer prior calling *any* of the * other timer functions. */ -void init_timer_key(struct timer_list *timer, - const char *name, - struct lock_class_key *key) +void init_timer_key(struct timer_list *timer, unsigned int flags, + const char *name, struct lock_class_key *key) { debug_init(timer); - __init_timer(timer, name, key); + do_init_timer(timer, flags, name, key); } EXPORT_SYMBOL(init_timer_key); -void init_timer_deferrable_key(struct timer_list *timer, - const char *name, - struct lock_class_key *key) -{ - init_timer_key(timer, name, key); - timer_set_deferrable(timer); -} -EXPORT_SYMBOL(init_timer_deferrable_key); - static inline void detach_timer(struct timer_list *timer, bool clear_pending) { struct list_head *entry = &timer->entry; @@ -686,7 +664,7 @@ detach_expired_timer(struct timer_list *timer, struct tvec_base *base) { detach_timer(timer, true); if (!tbase_get_deferrable(timer->base)) - timer->base->active_timers--; + base->active_timers--; } static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, @@ -697,7 +675,7 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, detach_timer(timer, clear_pending); if (!tbase_get_deferrable(timer->base)) { - timer->base->active_timers--; + base->active_timers--; if (timer->expires == base->next_timer) base->next_timer = base->timer_jiffies; } @@ -1029,14 +1007,14 @@ EXPORT_SYMBOL(try_to_del_timer_sync); * * Synchronization rules: Callers must prevent restarting of the timer, * otherwise this function is meaningless. It must not be called from - * interrupt contexts. The caller must not hold locks which would prevent - * completion of the timer's handler. The timer's handler must not call - * add_timer_on(). Upon exit the timer is not queued and the handler is - * not running on any CPU. + * interrupt contexts unless the timer is an irqsafe one. The caller must + * not hold locks which would prevent completion of the timer's + * handler. The timer's handler must not call add_timer_on(). Upon exit the + * timer is not queued and the handler is not running on any CPU. * - * Note: You must not hold locks that are held in interrupt context - * while calling this function. Even if the lock has nothing to do - * with the timer in question. Here's why: + * Note: For !irqsafe timers, you must not hold locks that are held in + * interrupt context while calling this function. Even if the lock has + * nothing to do with the timer in question. Here's why: * * CPU0 CPU1 * ---- ---- @@ -1073,7 +1051,7 @@ int del_timer_sync(struct timer_list *timer) * don't use it in hardirq context, because it * could lead to deadlock. */ - WARN_ON(in_irq()); + WARN_ON(in_irq() && !tbase_get_irqsafe(timer->base)); for (;;) { int ret = try_to_del_timer_sync(timer); if (ret >= 0) @@ -1180,19 +1158,27 @@ static inline void __run_timers(struct tvec_base *base) while (!list_empty(head)) { void (*fn)(unsigned long); unsigned long data; + bool irqsafe; timer = list_first_entry(head, struct timer_list,entry); fn = timer->function; data = timer->data; + irqsafe = tbase_get_irqsafe(timer->base); timer_stats_account_timer(timer); base->running_timer = timer; detach_expired_timer(timer, base); - spin_unlock_irq(&base->lock); - call_timer_fn(timer, fn, data); - spin_lock_irq(&base->lock); + if (irqsafe) { + spin_unlock(&base->lock); + call_timer_fn(timer, fn, data); + spin_lock(&base->lock); + } else { + spin_unlock_irq(&base->lock); + call_timer_fn(timer, fn, data); + spin_lock_irq(&base->lock); + } } } base->running_timer = NULL; @@ -1791,9 +1777,13 @@ static struct notifier_block __cpuinitdata timers_nb = { void __init init_timers(void) { - int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, - (void *)(long)smp_processor_id()); + int err; + + /* ensure there are enough low bits for flags in timer->base pointer */ + BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK); + err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, + (void *)(long)smp_processor_id()); init_timer_stats(); BUG_ON(err != NOTIFY_OK); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 8c4c07071cc..4cea4f41c1d 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -49,6 +49,11 @@ config HAVE_SYSCALL_TRACEPOINTS help See Documentation/trace/ftrace-design.txt +config HAVE_FENTRY + bool + help + Arch supports the gcc options -pg with -mfentry + config HAVE_C_RECORDMCOUNT bool help @@ -57,8 +62,12 @@ config HAVE_C_RECORDMCOUNT config TRACER_MAX_TRACE bool +config TRACE_CLOCK + bool + config RING_BUFFER bool + select TRACE_CLOCK config FTRACE_NMI_ENTER bool @@ -109,6 +118,7 @@ config TRACING select NOP_TRACER select BINARY_PRINTF select EVENT_TRACING + select TRACE_CLOCK config GENERIC_TRACER bool diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index b831087c820..d7e2068e4b7 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -5,10 +5,12 @@ ifdef CONFIG_FUNCTION_TRACER ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) +ifdef CONFIG_FTRACE_SELFTEST # selftest needs instrumentation CFLAGS_trace_selftest_dynamic.o = -pg obj-y += trace_selftest_dynamic.o endif +endif # If unlikely tracing is enabled, do not trace these files ifdef CONFIG_TRACING_BRANCHES @@ -17,11 +19,7 @@ endif CFLAGS_trace_events_filter.o := -I$(src) -# -# Make the trace clocks available generally: it's infrastructure -# relied on by ptrace for example: -# -obj-y += trace_clock.o +obj-$(CONFIG_TRACE_CLOCK) += trace_clock.o obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o obj-$(CONFIG_RING_BUFFER) += ring_buffer.o diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b4f20fba09f..9dcf15d3838 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -64,12 +64,20 @@ #define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL) +static struct ftrace_ops ftrace_list_end __read_mostly = { + .func = ftrace_stub, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, +}; + /* ftrace_enabled is a method to turn ftrace on or off */ int ftrace_enabled __read_mostly; static int last_ftrace_enabled; /* Quick disabling of function tracer. */ -int function_trace_stop; +int function_trace_stop __read_mostly; + +/* Current function tracing op */ +struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end; /* List for set_ftrace_pid's pids. */ LIST_HEAD(ftrace_pids); @@ -86,22 +94,43 @@ static int ftrace_disabled __read_mostly; static DEFINE_MUTEX(ftrace_lock); -static struct ftrace_ops ftrace_list_end __read_mostly = { - .func = ftrace_stub, -}; - static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; -static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub; -ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; static struct ftrace_ops global_ops; static struct ftrace_ops control_ops; -static void -ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); +#if ARCH_SUPPORTS_FTRACE_OPS +static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs); +#else +/* See comment below, where ftrace_ops_list_func is defined */ +static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); +#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) +#endif + +/** + * ftrace_nr_registered_ops - return number of ops registered + * + * Returns the number of ftrace_ops registered and tracing functions + */ +int ftrace_nr_registered_ops(void) +{ + struct ftrace_ops *ops; + int cnt = 0; + + mutex_lock(&ftrace_lock); + + for (ops = ftrace_ops_list; + ops != &ftrace_list_end; ops = ops->next) + cnt++; + + mutex_unlock(&ftrace_lock); + + return cnt; +} /* * Traverse the ftrace_global_list, invoking all entries. The reason that we @@ -112,29 +141,29 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); * * Silly Alpha and silly pointer-speculation compiler optimizations! */ -static void ftrace_global_list_func(unsigned long ip, - unsigned long parent_ip) +static void +ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs) { - struct ftrace_ops *op; - if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT))) return; trace_recursion_set(TRACE_GLOBAL_BIT); op = rcu_dereference_raw(ftrace_global_list); /*see above*/ while (op != &ftrace_list_end) { - op->func(ip, parent_ip); + op->func(ip, parent_ip, op, regs); op = rcu_dereference_raw(op->next); /*see above*/ }; trace_recursion_clear(TRACE_GLOBAL_BIT); } -static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip) +static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs) { if (!test_tsk_trace_trace(current)) return; - ftrace_pid_function(ip, parent_ip); + ftrace_pid_function(ip, parent_ip, op, regs); } static void set_ftrace_pid_function(ftrace_func_t func) @@ -153,25 +182,9 @@ static void set_ftrace_pid_function(ftrace_func_t func) void clear_ftrace_function(void) { ftrace_trace_function = ftrace_stub; - __ftrace_trace_function = ftrace_stub; - __ftrace_trace_function_delay = ftrace_stub; ftrace_pid_function = ftrace_stub; } -#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST -/* - * For those archs that do not test ftrace_trace_stop in their - * mcount call site, we need to do it from C. - */ -static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip) -{ - if (function_trace_stop) - return; - - __ftrace_trace_function(ip, parent_ip); -} -#endif - static void control_ops_disable_all(struct ftrace_ops *ops) { int cpu; @@ -230,28 +243,27 @@ static void update_ftrace_function(void) /* * If we are at the end of the list and this ops is - * not dynamic, then have the mcount trampoline call - * the function directly + * recursion safe and not dynamic and the arch supports passing ops, + * then have the mcount trampoline call the function directly. */ if (ftrace_ops_list == &ftrace_list_end || (ftrace_ops_list->next == &ftrace_list_end && - !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC))) + !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC) && + (ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) && + !FTRACE_FORCE_LIST_FUNC)) { + /* Set the ftrace_ops that the arch callback uses */ + if (ftrace_ops_list == &global_ops) + function_trace_op = ftrace_global_list; + else + function_trace_op = ftrace_ops_list; func = ftrace_ops_list->func; - else + } else { + /* Just use the default ftrace_ops */ + function_trace_op = &ftrace_list_end; func = ftrace_ops_list_func; + } -#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST ftrace_trace_function = func; -#else -#ifdef CONFIG_DYNAMIC_FTRACE - /* do not update till all functions have been modified */ - __ftrace_trace_function_delay = func; -#else - __ftrace_trace_function = func; -#endif - ftrace_trace_function = - (func == ftrace_stub) ? func : ftrace_test_stop_func; -#endif } static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) @@ -325,6 +337,20 @@ static int __register_ftrace_function(struct ftrace_ops *ops) if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK) return -EINVAL; +#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS + /* + * If the ftrace_ops specifies SAVE_REGS, then it only can be used + * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set. + * Setting SAVE_REGS_IF_SUPPORTED makes SAVE_REGS irrelevant. + */ + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS && + !(ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED)) + return -EINVAL; + + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED) + ops->flags |= FTRACE_OPS_FL_SAVE_REGS; +#endif + if (!core_kernel_data((unsigned long)ops)) ops->flags |= FTRACE_OPS_FL_DYNAMIC; @@ -773,7 +799,8 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip) } static void -function_profile_call(unsigned long ip, unsigned long parent_ip) +function_profile_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) { struct ftrace_profile_stat *stat; struct ftrace_profile *rec; @@ -803,7 +830,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip) #ifdef CONFIG_FUNCTION_GRAPH_TRACER static int profile_graph_entry(struct ftrace_graph_ent *trace) { - function_profile_call(trace->func, 0); + function_profile_call(trace->func, 0, NULL, NULL); return 1; } @@ -863,6 +890,7 @@ static void unregister_ftrace_profiler(void) #else static struct ftrace_ops ftrace_profile_ops __read_mostly = { .func = function_profile_call, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static int register_ftrace_profiler(void) @@ -1045,6 +1073,7 @@ static struct ftrace_ops global_ops = { .func = ftrace_stub, .notrace_hash = EMPTY_HASH, .filter_hash = EMPTY_HASH, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static DEFINE_MUTEX(ftrace_regex_lock); @@ -1525,6 +1554,12 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, rec->flags++; if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == FTRACE_REF_MAX)) return; + /* + * If any ops wants regs saved for this function + * then all ops will get saved regs. + */ + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) + rec->flags |= FTRACE_FL_REGS; } else { if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == 0)) return; @@ -1616,18 +1651,59 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) if (enable && (rec->flags & ~FTRACE_FL_MASK)) flag = FTRACE_FL_ENABLED; + /* + * If enabling and the REGS flag does not match the REGS_EN, then + * do not ignore this record. Set flags to fail the compare against + * ENABLED. + */ + if (flag && + (!(rec->flags & FTRACE_FL_REGS) != !(rec->flags & FTRACE_FL_REGS_EN))) + flag |= FTRACE_FL_REGS; + /* If the state of this record hasn't changed, then do nothing */ if ((rec->flags & FTRACE_FL_ENABLED) == flag) return FTRACE_UPDATE_IGNORE; if (flag) { - if (update) + /* Save off if rec is being enabled (for return value) */ + flag ^= rec->flags & FTRACE_FL_ENABLED; + + if (update) { rec->flags |= FTRACE_FL_ENABLED; - return FTRACE_UPDATE_MAKE_CALL; + if (flag & FTRACE_FL_REGS) { + if (rec->flags & FTRACE_FL_REGS) + rec->flags |= FTRACE_FL_REGS_EN; + else + rec->flags &= ~FTRACE_FL_REGS_EN; + } + } + + /* + * If this record is being updated from a nop, then + * return UPDATE_MAKE_CALL. + * Otherwise, if the EN flag is set, then return + * UPDATE_MODIFY_CALL_REGS to tell the caller to convert + * from the non-save regs, to a save regs function. + * Otherwise, + * return UPDATE_MODIFY_CALL to tell the caller to convert + * from the save regs, to a non-save regs function. + */ + if (flag & FTRACE_FL_ENABLED) + return FTRACE_UPDATE_MAKE_CALL; + else if (rec->flags & FTRACE_FL_REGS_EN) + return FTRACE_UPDATE_MODIFY_CALL_REGS; + else + return FTRACE_UPDATE_MODIFY_CALL; } - if (update) - rec->flags &= ~FTRACE_FL_ENABLED; + if (update) { + /* If there's no more users, clear all flags */ + if (!(rec->flags & ~FTRACE_FL_MASK)) + rec->flags = 0; + else + /* Just disable the record (keep REGS state) */ + rec->flags &= ~FTRACE_FL_ENABLED; + } return FTRACE_UPDATE_MAKE_NOP; } @@ -1662,13 +1738,17 @@ int ftrace_test_record(struct dyn_ftrace *rec, int enable) static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable) { + unsigned long ftrace_old_addr; unsigned long ftrace_addr; int ret; - ftrace_addr = (unsigned long)FTRACE_ADDR; - ret = ftrace_update_record(rec, enable); + if (rec->flags & FTRACE_FL_REGS) + ftrace_addr = (unsigned long)FTRACE_REGS_ADDR; + else + ftrace_addr = (unsigned long)FTRACE_ADDR; + switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; @@ -1678,6 +1758,15 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) case FTRACE_UPDATE_MAKE_NOP: return ftrace_make_nop(NULL, rec, ftrace_addr); + + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: + if (rec->flags & FTRACE_FL_REGS) + ftrace_old_addr = (unsigned long)FTRACE_ADDR; + else + ftrace_old_addr = (unsigned long)FTRACE_REGS_ADDR; + + return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr); } return -1; /* unknow ftrace bug */ @@ -1882,16 +1971,6 @@ static void ftrace_run_update_code(int command) */ arch_ftrace_update_code(command); -#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST - /* - * For archs that call ftrace_test_stop_func(), we must - * wait till after we update all the function callers - * before we update the callback. This keeps different - * ops that record different functions from corrupting - * each other. - */ - __ftrace_trace_function = __ftrace_trace_function_delay; -#endif function_trace_stop--; ret = ftrace_arch_code_modify_post_process(); @@ -2441,8 +2520,9 @@ static int t_show(struct seq_file *m, void *v) seq_printf(m, "%ps", (void *)rec->ip); if (iter->flags & FTRACE_ITER_ENABLED) - seq_printf(m, " (%ld)", - rec->flags & ~FTRACE_FL_MASK); + seq_printf(m, " (%ld)%s", + rec->flags & ~FTRACE_FL_MASK, + rec->flags & FTRACE_FL_REGS ? " R" : ""); seq_printf(m, "\n"); return 0; @@ -2790,8 +2870,8 @@ static int __init ftrace_mod_cmd_init(void) } device_initcall(ftrace_mod_cmd_init); -static void -function_trace_probe_call(unsigned long ip, unsigned long parent_ip) +static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs) { struct ftrace_func_probe *entry; struct hlist_head *hhd; @@ -3162,8 +3242,27 @@ ftrace_notrace_write(struct file *file, const char __user *ubuf, } static int -ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, - int reset, int enable) +ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) +{ + struct ftrace_func_entry *entry; + + if (!ftrace_location(ip)) + return -EINVAL; + + if (remove) { + entry = ftrace_lookup_ip(hash, ip); + if (!entry) + return -ENOENT; + free_hash_entry(hash, entry); + return 0; + } + + return add_hash_entry(hash, ip); +} + +static int +ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, + unsigned long ip, int remove, int reset, int enable) { struct ftrace_hash **orig_hash; struct ftrace_hash *hash; @@ -3192,6 +3291,11 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, ret = -EINVAL; goto out_regex_unlock; } + if (ip) { + ret = ftrace_match_addr(hash, ip, remove); + if (ret < 0) + goto out_regex_unlock; + } mutex_lock(&ftrace_lock); ret = ftrace_hash_move(ops, enable, orig_hash, hash); @@ -3208,6 +3312,37 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, return ret; } +static int +ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove, + int reset, int enable) +{ + return ftrace_set_hash(ops, 0, 0, ip, remove, reset, enable); +} + +/** + * ftrace_set_filter_ip - set a function to filter on in ftrace by address + * @ops - the ops to set the filter with + * @ip - the address to add to or remove from the filter. + * @remove - non zero to remove the ip from the filter + * @reset - non zero to reset all filters before applying this filter. + * + * Filters denote which functions should be enabled when tracing is enabled + * If @ip is NULL, it failes to update filter. + */ +int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, + int remove, int reset) +{ + return ftrace_set_addr(ops, ip, remove, reset, 1); +} +EXPORT_SYMBOL_GPL(ftrace_set_filter_ip); + +static int +ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, + int reset, int enable) +{ + return ftrace_set_hash(ops, buf, len, 0, 0, reset, enable); +} + /** * ftrace_set_filter - set a function to filter on in ftrace * @ops - the ops to set the filter with @@ -3912,6 +4047,7 @@ void __init ftrace_init(void) static struct ftrace_ops global_ops = { .func = ftrace_stub, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static int __init ftrace_nodyn_init(void) @@ -3942,10 +4078,9 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) #endif /* CONFIG_DYNAMIC_FTRACE */ static void -ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip) +ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs) { - struct ftrace_ops *op; - if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT))) return; @@ -3959,7 +4094,7 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip) while (op != &ftrace_list_end) { if (!ftrace_function_local_disabled(op) && ftrace_ops_test(op, ip)) - op->func(ip, parent_ip); + op->func(ip, parent_ip, op, regs); op = rcu_dereference_raw(op->next); }; @@ -3969,13 +4104,18 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops control_ops = { .func = ftrace_ops_control_func, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; -static void -ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) +static inline void +__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ignored, struct pt_regs *regs) { struct ftrace_ops *op; + if (function_trace_stop) + return; + if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT))) return; @@ -3988,13 +4128,39 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) op = rcu_dereference_raw(ftrace_ops_list); while (op != &ftrace_list_end) { if (ftrace_ops_test(op, ip)) - op->func(ip, parent_ip); + op->func(ip, parent_ip, op, regs); op = rcu_dereference_raw(op->next); }; preempt_enable_notrace(); trace_recursion_clear(TRACE_INTERNAL_BIT); } +/* + * Some archs only support passing ip and parent_ip. Even though + * the list function ignores the op parameter, we do not want any + * C side effects, where a function is called without the caller + * sending a third parameter. + * Archs are to support both the regs and ftrace_ops at the same time. + * If they support ftrace_ops, it is assumed they support regs. + * If call backs want to use regs, they must either check for regs + * being NULL, or ARCH_SUPPORTS_FTRACE_SAVE_REGS. + * Note, ARCH_SUPPORT_SAVE_REGS expects a full regs to be saved. + * An architecture can pass partial regs with ftrace_ops and still + * set the ARCH_SUPPORT_FTARCE_OPS. + */ +#if ARCH_SUPPORTS_FTRACE_OPS +static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs) +{ + __ftrace_ops_list_func(ip, parent_ip, NULL, regs); +} +#else +static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) +{ + __ftrace_ops_list_func(ip, parent_ip, NULL, NULL); +} +#endif + static void clear_ftrace_swapper(void) { struct task_struct *p; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 49491fa7daa..b32ed0e385a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2816,7 +2816,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable); * to the buffer after this will fail and return NULL. * * This is different than ring_buffer_record_disable() as - * it works like an on/off switch, where as the disable() verison + * it works like an on/off switch, where as the disable() version * must be paired with a enable(). */ void ring_buffer_record_off(struct ring_buffer *buffer) @@ -2839,7 +2839,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_off); * ring_buffer_record_off(). * * This is different than ring_buffer_record_enable() as - * it works like an on/off switch, where as the enable() verison + * it works like an on/off switch, where as the enable() version * must be paired with a disable(). */ void ring_buffer_record_on(struct ring_buffer *buffer) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5c38c81496c..1ec5c1dab62 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -328,7 +328,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | - TRACE_ITER_IRQ_INFO; + TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS; static int trace_stop_count; static DEFINE_RAW_SPINLOCK(tracing_start_lock); @@ -426,15 +426,15 @@ __setup("trace_buf_size=", set_buf_size); static int __init set_tracing_thresh(char *str) { - unsigned long threshhold; + unsigned long threshold; int ret; if (!str) return 0; - ret = strict_strtoul(str, 0, &threshhold); + ret = strict_strtoul(str, 0, &threshold); if (ret < 0) return 0; - tracing_thresh = threshhold * 1000; + tracing_thresh = threshold * 1000; return 1; } __setup("tracing_thresh=", set_tracing_thresh); @@ -470,6 +470,7 @@ static const char *trace_options[] = { "overwrite", "disable_on_free", "irq-info", + "markers", NULL }; @@ -3886,6 +3887,9 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (tracing_disabled) return -EINVAL; + if (!(trace_flags & TRACE_ITER_MARKERS)) + return -EINVAL; + if (cnt > TRACE_BUF_SIZE) cnt = TRACE_BUF_SIZE; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 55e1f7f0db1..63a2da0b9a6 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -472,11 +472,11 @@ extern void trace_find_cmdline(int pid, char comm[]); #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; +#endif #define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func extern int DYN_FTRACE_TEST_NAME(void); #define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2 extern int DYN_FTRACE_TEST_NAME2(void); -#endif extern int ring_buffer_expanded; extern bool tracing_selftest_disabled; @@ -680,6 +680,7 @@ enum trace_iterator_flags { TRACE_ITER_OVERWRITE = 0x200000, TRACE_ITER_STOP_ON_FREE = 0x400000, TRACE_ITER_IRQ_INFO = 0x800000, + TRACE_ITER_MARKERS = 0x1000000, }; /* diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 8a6d2ee2086..84b1e045fab 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -258,7 +258,8 @@ EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); #ifdef CONFIG_FUNCTION_TRACER static void -perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip) +perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *pt_regs) { struct ftrace_entry *entry; struct hlist_head *head; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 29111da1d10..d608d09d08c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1199,6 +1199,31 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, return 0; } +static void event_remove(struct ftrace_event_call *call) +{ + ftrace_event_enable_disable(call, 0); + if (call->event.funcs) + __unregister_ftrace_event(&call->event); + list_del(&call->list); +} + +static int event_init(struct ftrace_event_call *call) +{ + int ret = 0; + + if (WARN_ON(!call->name)) + return -EINVAL; + + if (call->class->raw_init) { + ret = call->class->raw_init(call); + if (ret < 0 && ret != -ENOSYS) + pr_warn("Could not initialize trace events/%s\n", + call->name); + } + + return ret; +} + static int __trace_add_event_call(struct ftrace_event_call *call, struct module *mod, const struct file_operations *id, @@ -1209,19 +1234,9 @@ __trace_add_event_call(struct ftrace_event_call *call, struct module *mod, struct dentry *d_events; int ret; - /* The linker may leave blanks */ - if (!call->name) - return -EINVAL; - - if (call->class->raw_init) { - ret = call->class->raw_init(call); - if (ret < 0) { - if (ret != -ENOSYS) - pr_warning("Could not initialize trace events/%s\n", - call->name); - return ret; - } - } + ret = event_init(call); + if (ret < 0) + return ret; d_events = event_trace_events_dir(); if (!d_events) @@ -1272,13 +1287,10 @@ static void remove_subsystem_dir(const char *name) */ static void __trace_remove_event_call(struct ftrace_event_call *call) { - ftrace_event_enable_disable(call, 0); - if (call->event.funcs) - __unregister_ftrace_event(&call->event); - debugfs_remove_recursive(call->dir); - list_del(&call->list); + event_remove(call); trace_destroy_fields(call); destroy_preds(call); + debugfs_remove_recursive(call->dir); remove_subsystem_dir(call->class->system); } @@ -1450,15 +1462,43 @@ static __init int setup_trace_event(char *str) } __setup("trace_event=", setup_trace_event); +static __init int event_trace_enable(void) +{ + struct ftrace_event_call **iter, *call; + char *buf = bootup_event_buf; + char *token; + int ret; + + for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) { + + call = *iter; + ret = event_init(call); + if (!ret) + list_add(&call->list, &ftrace_events); + } + + while (true) { + token = strsep(&buf, ","); + + if (!token) + break; + if (!*token) + continue; + + ret = ftrace_set_clr_event(token, 1); + if (ret) + pr_warn("Failed to enable trace event: %s\n", token); + } + return 0; +} + static __init int event_trace_init(void) { - struct ftrace_event_call **call; + struct ftrace_event_call *call; struct dentry *d_tracer; struct dentry *entry; struct dentry *d_events; int ret; - char *buf = bootup_event_buf; - char *token; d_tracer = tracing_init_dentry(); if (!d_tracer) @@ -1497,24 +1537,19 @@ static __init int event_trace_init(void) if (trace_define_common_fields()) pr_warning("tracing: Failed to allocate common fields"); - for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { - __trace_add_event_call(*call, NULL, &ftrace_event_id_fops, + /* + * Early initialization already enabled ftrace event. + * Now it's only necessary to create the event directory. + */ + list_for_each_entry(call, &ftrace_events, list) { + + ret = event_create_dir(call, d_events, + &ftrace_event_id_fops, &ftrace_enable_fops, &ftrace_event_filter_fops, &ftrace_event_format_fops); - } - - while (true) { - token = strsep(&buf, ","); - - if (!token) - break; - if (!*token) - continue; - - ret = ftrace_set_clr_event(token, 1); - if (ret) - pr_warning("Failed to enable trace event: %s\n", token); + if (ret < 0) + event_remove(call); } ret = register_module_notifier(&trace_module_nb); @@ -1523,6 +1558,7 @@ static __init int event_trace_init(void) return 0; } +core_initcall(event_trace_enable); fs_initcall(event_trace_init); #ifdef CONFIG_FTRACE_STARTUP_TEST @@ -1646,9 +1682,11 @@ static __init void event_trace_self_tests(void) event_test_stuff(); ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0); - if (WARN_ON_ONCE(ret)) + if (WARN_ON_ONCE(ret)) { pr_warning("error disabling system %s\n", system->name); + continue; + } pr_cont("OK\n"); } @@ -1681,7 +1719,8 @@ static __init void event_trace_self_tests(void) static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable); static void -function_test_events_call(unsigned long ip, unsigned long parent_ip) +function_test_events_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs) { struct ring_buffer_event *event; struct ring_buffer *buffer; @@ -1720,6 +1759,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops trace_ops __initdata = { .func = function_test_events_call, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static __init void event_trace_self_test_with_function(void) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 431dba8b754..c154797a7ff 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -2002,7 +2002,7 @@ static int ftrace_function_set_regexp(struct ftrace_ops *ops, int filter, static int __ftrace_function_set_filter(int filter, char *buf, int len, struct function_filter_data *data) { - int i, re_cnt, ret; + int i, re_cnt, ret = -EINVAL; int *reset; char **re; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index a426f410c06..483162a9f90 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -49,7 +49,8 @@ static void function_trace_start(struct trace_array *tr) } static void -function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) +function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs) { struct trace_array *tr = func_trace; struct trace_array_cpu *data; @@ -84,7 +85,9 @@ enum { static struct tracer_flags func_flags; static void -function_trace_call(unsigned long ip, unsigned long parent_ip) +function_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs) + { struct trace_array *tr = func_trace; struct trace_array_cpu *data; @@ -121,7 +124,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) } static void -function_stack_trace_call(unsigned long ip, unsigned long parent_ip) +function_stack_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs) { struct trace_array *tr = func_trace; struct trace_array_cpu *data; @@ -164,13 +168,13 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops trace_ops __read_mostly = { .func = function_trace_call, - .flags = FTRACE_OPS_FL_GLOBAL, + .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, }; static struct ftrace_ops trace_stack_ops __read_mostly = { .func = function_stack_trace_call, - .flags = FTRACE_OPS_FL_GLOBAL, + .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, }; static struct tracer_opt func_opts[] = { diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index ce27c8ba8d3..99b4378393d 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -143,7 +143,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, return; } -#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST +#if defined(CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST) && !defined(CC_USING_FENTRY) /* * The arch may choose to record the frame pointer used * and check it here to make sure that it is what we expect it @@ -154,6 +154,9 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, * * Currently, x86_32 with optimize for size (-Os) makes the latest * gcc do the above. + * + * Note, -mfentry does not use frame pointers, and this test + * is not needed if CC_USING_FENTRY is set. */ if (unlikely(current->ret_stack[index].fp != frame_pointer)) { ftrace_graph_stop(); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 99d20e92036..d98ee8283b2 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -136,7 +136,8 @@ static int func_prolog_dec(struct trace_array *tr, * irqsoff uses its own tracer function to keep the overhead down: */ static void -irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) +irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs) { struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; @@ -153,7 +154,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops trace_ops __read_mostly = { .func = irqsoff_tracer_call, - .flags = FTRACE_OPS_FL_GLOBAL, + .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, }; #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index ff791ea48b5..02170c00c41 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -108,7 +108,8 @@ out_enable: * wakeup uses its own tracer function to keep the overhead down: */ static void -wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) +wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; @@ -129,7 +130,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops trace_ops __read_mostly = { .func = wakeup_tracer_call, - .flags = FTRACE_OPS_FL_GLOBAL, + .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, }; #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 288541f977f..2c00a691a54 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -103,54 +103,67 @@ static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret) static int trace_selftest_test_probe1_cnt; static void trace_selftest_test_probe1_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op, + struct pt_regs *pt_regs) { trace_selftest_test_probe1_cnt++; } static int trace_selftest_test_probe2_cnt; static void trace_selftest_test_probe2_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op, + struct pt_regs *pt_regs) { trace_selftest_test_probe2_cnt++; } static int trace_selftest_test_probe3_cnt; static void trace_selftest_test_probe3_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op, + struct pt_regs *pt_regs) { trace_selftest_test_probe3_cnt++; } static int trace_selftest_test_global_cnt; static void trace_selftest_test_global_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op, + struct pt_regs *pt_regs) { trace_selftest_test_global_cnt++; } static int trace_selftest_test_dyn_cnt; static void trace_selftest_test_dyn_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op, + struct pt_regs *pt_regs) { trace_selftest_test_dyn_cnt++; } static struct ftrace_ops test_probe1 = { .func = trace_selftest_test_probe1_func, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static struct ftrace_ops test_probe2 = { .func = trace_selftest_test_probe2_func, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static struct ftrace_ops test_probe3 = { .func = trace_selftest_test_probe3_func, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static struct ftrace_ops test_global = { - .func = trace_selftest_test_global_func, - .flags = FTRACE_OPS_FL_GLOBAL, + .func = trace_selftest_test_global_func, + .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, }; static void print_counts(void) @@ -393,10 +406,253 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, return ret; } + +static int trace_selftest_recursion_cnt; +static void trace_selftest_test_recursion_func(unsigned long ip, + unsigned long pip, + struct ftrace_ops *op, + struct pt_regs *pt_regs) +{ + /* + * This function is registered without the recursion safe flag. + * The ftrace infrastructure should provide the recursion + * protection. If not, this will crash the kernel! + */ + trace_selftest_recursion_cnt++; + DYN_FTRACE_TEST_NAME(); +} + +static void trace_selftest_test_recursion_safe_func(unsigned long ip, + unsigned long pip, + struct ftrace_ops *op, + struct pt_regs *pt_regs) +{ + /* + * We said we would provide our own recursion. By calling + * this function again, we should recurse back into this function + * and count again. But this only happens if the arch supports + * all of ftrace features and nothing else is using the function + * tracing utility. + */ + if (trace_selftest_recursion_cnt++) + return; + DYN_FTRACE_TEST_NAME(); +} + +static struct ftrace_ops test_rec_probe = { + .func = trace_selftest_test_recursion_func, +}; + +static struct ftrace_ops test_recsafe_probe = { + .func = trace_selftest_test_recursion_safe_func, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, +}; + +static int +trace_selftest_function_recursion(void) +{ + int save_ftrace_enabled = ftrace_enabled; + int save_tracer_enabled = tracer_enabled; + char *func_name; + int len; + int ret; + int cnt; + + /* The previous test PASSED */ + pr_cont("PASSED\n"); + pr_info("Testing ftrace recursion: "); + + + /* enable tracing, and record the filter function */ + ftrace_enabled = 1; + tracer_enabled = 1; + + /* Handle PPC64 '.' name */ + func_name = "*" __stringify(DYN_FTRACE_TEST_NAME); + len = strlen(func_name); + + ret = ftrace_set_filter(&test_rec_probe, func_name, len, 1); + if (ret) { + pr_cont("*Could not set filter* "); + goto out; + } + + ret = register_ftrace_function(&test_rec_probe); + if (ret) { + pr_cont("*could not register callback* "); + goto out; + } + + DYN_FTRACE_TEST_NAME(); + + unregister_ftrace_function(&test_rec_probe); + + ret = -1; + if (trace_selftest_recursion_cnt != 1) { + pr_cont("*callback not called once (%d)* ", + trace_selftest_recursion_cnt); + goto out; + } + + trace_selftest_recursion_cnt = 1; + + pr_cont("PASSED\n"); + pr_info("Testing ftrace recursion safe: "); + + ret = ftrace_set_filter(&test_recsafe_probe, func_name, len, 1); + if (ret) { + pr_cont("*Could not set filter* "); + goto out; + } + + ret = register_ftrace_function(&test_recsafe_probe); + if (ret) { + pr_cont("*could not register callback* "); + goto out; + } + + DYN_FTRACE_TEST_NAME(); + + unregister_ftrace_function(&test_recsafe_probe); + + /* + * If arch supports all ftrace features, and no other task + * was on the list, we should be fine. + */ + if (!ftrace_nr_registered_ops() && !FTRACE_FORCE_LIST_FUNC) + cnt = 2; /* Should have recursed */ + else + cnt = 1; + + ret = -1; + if (trace_selftest_recursion_cnt != cnt) { + pr_cont("*callback not called expected %d times (%d)* ", + cnt, trace_selftest_recursion_cnt); + goto out; + } + + ret = 0; +out: + ftrace_enabled = save_ftrace_enabled; + tracer_enabled = save_tracer_enabled; + + return ret; +} #else # define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; }) +# define trace_selftest_function_recursion() ({ 0; }) #endif /* CONFIG_DYNAMIC_FTRACE */ +static enum { + TRACE_SELFTEST_REGS_START, + TRACE_SELFTEST_REGS_FOUND, + TRACE_SELFTEST_REGS_NOT_FOUND, +} trace_selftest_regs_stat; + +static void trace_selftest_test_regs_func(unsigned long ip, + unsigned long pip, + struct ftrace_ops *op, + struct pt_regs *pt_regs) +{ + if (pt_regs) + trace_selftest_regs_stat = TRACE_SELFTEST_REGS_FOUND; + else + trace_selftest_regs_stat = TRACE_SELFTEST_REGS_NOT_FOUND; +} + +static struct ftrace_ops test_regs_probe = { + .func = trace_selftest_test_regs_func, + .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_SAVE_REGS, +}; + +static int +trace_selftest_function_regs(void) +{ + int save_ftrace_enabled = ftrace_enabled; + int save_tracer_enabled = tracer_enabled; + char *func_name; + int len; + int ret; + int supported = 0; + +#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS + supported = 1; +#endif + + /* The previous test PASSED */ + pr_cont("PASSED\n"); + pr_info("Testing ftrace regs%s: ", + !supported ? "(no arch support)" : ""); + + /* enable tracing, and record the filter function */ + ftrace_enabled = 1; + tracer_enabled = 1; + + /* Handle PPC64 '.' name */ + func_name = "*" __stringify(DYN_FTRACE_TEST_NAME); + len = strlen(func_name); + + ret = ftrace_set_filter(&test_regs_probe, func_name, len, 1); + /* + * If DYNAMIC_FTRACE is not set, then we just trace all functions. + * This test really doesn't care. + */ + if (ret && ret != -ENODEV) { + pr_cont("*Could not set filter* "); + goto out; + } + + ret = register_ftrace_function(&test_regs_probe); + /* + * Now if the arch does not support passing regs, then this should + * have failed. + */ + if (!supported) { + if (!ret) { + pr_cont("*registered save-regs without arch support* "); + goto out; + } + test_regs_probe.flags |= FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED; + ret = register_ftrace_function(&test_regs_probe); + } + if (ret) { + pr_cont("*could not register callback* "); + goto out; + } + + + DYN_FTRACE_TEST_NAME(); + + unregister_ftrace_function(&test_regs_probe); + + ret = -1; + + switch (trace_selftest_regs_stat) { + case TRACE_SELFTEST_REGS_START: + pr_cont("*callback never called* "); + goto out; + + case TRACE_SELFTEST_REGS_FOUND: + if (supported) + break; + pr_cont("*callback received regs without arch support* "); + goto out; + + case TRACE_SELFTEST_REGS_NOT_FOUND: + if (!supported) + break; + pr_cont("*callback received NULL regs* "); + goto out; + } + + ret = 0; +out: + ftrace_enabled = save_ftrace_enabled; + tracer_enabled = save_tracer_enabled; + + return ret; +} + /* * Simple verification test of ftrace function tracer. * Enable ftrace, sleep 1/10 second, and then read the trace @@ -442,7 +698,14 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) ret = trace_selftest_startup_dynamic_tracing(trace, tr, DYN_FTRACE_TEST_NAME); + if (ret) + goto out; + ret = trace_selftest_function_recursion(); + if (ret) + goto out; + + ret = trace_selftest_function_regs(); out: ftrace_enabled = save_ftrace_enabled; tracer_enabled = save_tracer_enabled; @@ -778,6 +1041,8 @@ static int trace_wakeup_test_thread(void *data) set_current_state(TASK_INTERRUPTIBLE); schedule(); + complete(x); + /* we are awake, now wait to disappear */ while (!kthread_should_stop()) { /* @@ -821,24 +1086,21 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) /* reset the max latency */ tracing_max_latency = 0; - /* sleep to let the RT thread sleep too */ - msleep(100); + while (p->on_rq) { + /* + * Sleep to make sure the RT thread is asleep too. + * On virtual machines we can't rely on timings, + * but we want to make sure this test still works. + */ + msleep(100); + } - /* - * Yes this is slightly racy. It is possible that for some - * strange reason that the RT thread we created, did not - * call schedule for 100ms after doing the completion, - * and we do a wakeup on a task that already is awake. - * But that is extremely unlikely, and the worst thing that - * happens in such a case, is that we disable tracing. - * Honestly, if this race does happen something is horrible - * wrong with the system. - */ + init_completion(&isrt); wake_up_process(p); - /* give a little time to let the thread wake up */ - msleep(100); + /* Wait for the task to wake up */ + wait_for_completion(&isrt); /* stop the tracing. */ tracing_stop(); diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index d4545f49242..0c1b165778e 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -111,7 +111,8 @@ static inline void check_stack(void) } static void -stack_trace_call(unsigned long ip, unsigned long parent_ip) +stack_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs) { int cpu; @@ -136,6 +137,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops trace_ops __read_mostly = { .func = stack_trace_call, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static ssize_t diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 6b245f64c8d..2485a7d09b1 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -487,7 +487,7 @@ int __init init_ftrace_syscalls(void) return 0; } -core_initcall(init_ftrace_syscalls); +early_initcall(init_ftrace_syscalls); #ifdef CONFIG_PERF_EVENTS diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 4b1dfba70f7..9d4c8d5a1f5 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -22,6 +22,7 @@ #include <linux/notifier.h> #include <linux/module.h> #include <linux/sysctl.h> +#include <linux/smpboot.h> #include <asm/irq_regs.h> #include <linux/kvm_para.h> @@ -29,16 +30,18 @@ int watchdog_enabled = 1; int __read_mostly watchdog_thresh = 10; +static int __read_mostly watchdog_disabled; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); static DEFINE_PER_CPU(bool, softlockup_touch_sync); static DEFINE_PER_CPU(bool, soft_watchdog_warn); +static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); +static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt); #ifdef CONFIG_HARDLOCKUP_DETECTOR static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, watchdog_nmi_touch); -static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); #endif @@ -248,13 +251,15 @@ static void watchdog_overflow_callback(struct perf_event *event, __this_cpu_write(hard_watchdog_warn, false); return; } +#endif /* CONFIG_HARDLOCKUP_DETECTOR */ + static void watchdog_interrupt_count(void) { __this_cpu_inc(hrtimer_interrupts); } -#else -static inline void watchdog_interrupt_count(void) { return; } -#endif /* CONFIG_HARDLOCKUP_DETECTOR */ + +static int watchdog_nmi_enable(unsigned int cpu); +static void watchdog_nmi_disable(unsigned int cpu); /* watchdog kicker functions */ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) @@ -327,49 +332,68 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) return HRTIMER_RESTART; } +static void watchdog_set_prio(unsigned int policy, unsigned int prio) +{ + struct sched_param param = { .sched_priority = prio }; -/* - * The watchdog thread - touches the timestamp. - */ -static int watchdog(void *unused) + sched_setscheduler(current, policy, ¶m); +} + +static void watchdog_enable(unsigned int cpu) { - struct sched_param param = { .sched_priority = 0 }; struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); - /* initialize timestamp */ - __touch_watchdog(); + if (!watchdog_enabled) { + kthread_park(current); + return; + } + + /* Enable the perf event */ + watchdog_nmi_enable(cpu); /* kick off the timer for the hardlockup detector */ + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer->function = watchdog_timer_fn; + /* done here because hrtimer_start can only pin to smp_processor_id() */ hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()), HRTIMER_MODE_REL_PINNED); - set_current_state(TASK_INTERRUPTIBLE); - /* - * Run briefly (kicked by the hrtimer callback function) once every - * get_sample_period() seconds (4 seconds by default) to reset the - * softlockup timestamp. If this gets delayed for more than - * 2*watchdog_thresh seconds then the debug-printout triggers in - * watchdog_timer_fn(). - */ - while (!kthread_should_stop()) { - __touch_watchdog(); - schedule(); + /* initialize timestamp */ + watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); + __touch_watchdog(); +} - if (kthread_should_stop()) - break; +static void watchdog_disable(unsigned int cpu) +{ + struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); - set_current_state(TASK_INTERRUPTIBLE); - } - /* - * Drop the policy/priority elevation during thread exit to avoid a - * scheduling latency spike. - */ - __set_current_state(TASK_RUNNING); - sched_setscheduler(current, SCHED_NORMAL, ¶m); - return 0; + watchdog_set_prio(SCHED_NORMAL, 0); + hrtimer_cancel(hrtimer); + /* disable the perf event */ + watchdog_nmi_disable(cpu); } +static int watchdog_should_run(unsigned int cpu) +{ + return __this_cpu_read(hrtimer_interrupts) != + __this_cpu_read(soft_lockup_hrtimer_cnt); +} + +/* + * The watchdog thread function - touches the timestamp. + * + * It only runs once every get_sample_period() seconds (4 seconds by + * default) to reset the softlockup timestamp. If this gets delayed + * for more than 2*watchdog_thresh seconds then the debug-printout + * triggers in watchdog_timer_fn(). + */ +static void watchdog(unsigned int cpu) +{ + __this_cpu_write(soft_lockup_hrtimer_cnt, + __this_cpu_read(hrtimer_interrupts)); + __touch_watchdog(); +} #ifdef CONFIG_HARDLOCKUP_DETECTOR /* @@ -379,7 +403,7 @@ static int watchdog(void *unused) */ static unsigned long cpu0_err; -static int watchdog_nmi_enable(int cpu) +static int watchdog_nmi_enable(unsigned int cpu) { struct perf_event_attr *wd_attr; struct perf_event *event = per_cpu(watchdog_ev, cpu); @@ -433,7 +457,7 @@ out: return 0; } -static void watchdog_nmi_disable(int cpu) +static void watchdog_nmi_disable(unsigned int cpu) { struct perf_event *event = per_cpu(watchdog_ev, cpu); @@ -447,107 +471,35 @@ static void watchdog_nmi_disable(int cpu) return; } #else -static int watchdog_nmi_enable(int cpu) { return 0; } -static void watchdog_nmi_disable(int cpu) { return; } +static int watchdog_nmi_enable(unsigned int cpu) { return 0; } +static void watchdog_nmi_disable(unsigned int cpu) { return; } #endif /* CONFIG_HARDLOCKUP_DETECTOR */ /* prepare/enable/disable routines */ -static void watchdog_prepare_cpu(int cpu) -{ - struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu); - - WARN_ON(per_cpu(softlockup_watchdog, cpu)); - hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hrtimer->function = watchdog_timer_fn; -} - -static int watchdog_enable(int cpu) -{ - struct task_struct *p = per_cpu(softlockup_watchdog, cpu); - int err = 0; - - /* enable the perf event */ - err = watchdog_nmi_enable(cpu); - - /* Regardless of err above, fall through and start softlockup */ - - /* create the watchdog thread */ - if (!p) { - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; - p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu); - if (IS_ERR(p)) { - pr_err("softlockup watchdog for %i failed\n", cpu); - if (!err) { - /* if hardlockup hasn't already set this */ - err = PTR_ERR(p); - /* and disable the perf event */ - watchdog_nmi_disable(cpu); - } - goto out; - } - sched_setscheduler(p, SCHED_FIFO, ¶m); - kthread_bind(p, cpu); - per_cpu(watchdog_touch_ts, cpu) = 0; - per_cpu(softlockup_watchdog, cpu) = p; - wake_up_process(p); - } - -out: - return err; -} - -static void watchdog_disable(int cpu) -{ - struct task_struct *p = per_cpu(softlockup_watchdog, cpu); - struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu); - - /* - * cancel the timer first to stop incrementing the stats - * and waking up the kthread - */ - hrtimer_cancel(hrtimer); - - /* disable the perf event */ - watchdog_nmi_disable(cpu); - - /* stop the watchdog thread */ - if (p) { - per_cpu(softlockup_watchdog, cpu) = NULL; - kthread_stop(p); - } -} - /* sysctl functions */ #ifdef CONFIG_SYSCTL static void watchdog_enable_all_cpus(void) { - int cpu; - - watchdog_enabled = 0; - - for_each_online_cpu(cpu) - if (!watchdog_enable(cpu)) - /* if any cpu succeeds, watchdog is considered - enabled for the system */ - watchdog_enabled = 1; - - if (!watchdog_enabled) - pr_err("failed to be enabled on some cpus\n"); + unsigned int cpu; + if (watchdog_disabled) { + watchdog_disabled = 0; + for_each_online_cpu(cpu) + kthread_unpark(per_cpu(softlockup_watchdog, cpu)); + } } static void watchdog_disable_all_cpus(void) { - int cpu; - - for_each_online_cpu(cpu) - watchdog_disable(cpu); + unsigned int cpu; - /* if all watchdogs are disabled, then they are disabled for the system */ - watchdog_enabled = 0; + if (!watchdog_disabled) { + watchdog_disabled = 1; + for_each_online_cpu(cpu) + kthread_park(per_cpu(softlockup_watchdog, cpu)); + } } - /* * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh */ @@ -557,73 +509,36 @@ int proc_dowatchdog(struct ctl_table *table, int write, { int ret; + if (watchdog_disabled < 0) + return -ENODEV; + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret || !write) - goto out; + return ret; if (watchdog_enabled && watchdog_thresh) watchdog_enable_all_cpus(); else watchdog_disable_all_cpus(); -out: return ret; } #endif /* CONFIG_SYSCTL */ - -/* - * Create/destroy watchdog threads as CPUs come and go: - */ -static int __cpuinit -cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - int hotcpu = (unsigned long)hcpu; - - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - watchdog_prepare_cpu(hotcpu); - break; - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - if (watchdog_enabled) - watchdog_enable(hotcpu); - break; -#ifdef CONFIG_HOTPLUG_CPU - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - watchdog_disable(hotcpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - watchdog_disable(hotcpu); - break; -#endif /* CONFIG_HOTPLUG_CPU */ - } - - /* - * hardlockup and softlockup are not important enough - * to block cpu bring up. Just always succeed and - * rely on printk output to flag problems. - */ - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata cpu_nfb = { - .notifier_call = cpu_callback +static struct smp_hotplug_thread watchdog_threads = { + .store = &softlockup_watchdog, + .thread_should_run = watchdog_should_run, + .thread_fn = watchdog, + .thread_comm = "watchdog/%u", + .setup = watchdog_enable, + .park = watchdog_disable, + .unpark = watchdog_enable, }; void __init lockup_detector_init(void) { - void *cpu = (void *)(long)smp_processor_id(); - int err; - - err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); - WARN_ON(notifier_to_errno(err)); - - cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); - register_cpu_notifier(&cpu_nfb); - - return; + if (smpboot_register_percpu_thread(&watchdog_threads)) { + pr_err("Failed to create watchdog threads, disabled\n"); + watchdog_disabled = -ENODEV; + } } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b80065a2450..3c5a79e2134 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3576,18 +3576,17 @@ static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb, #ifdef CONFIG_SMP struct work_for_cpu { - struct completion completion; + struct work_struct work; long (*fn)(void *); void *arg; long ret; }; -static int do_work_for_cpu(void *_wfc) +static void work_for_cpu_fn(struct work_struct *work) { - struct work_for_cpu *wfc = _wfc; + struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work); + wfc->ret = wfc->fn(wfc->arg); - complete(&wfc->completion); - return 0; } /** @@ -3602,19 +3601,11 @@ static int do_work_for_cpu(void *_wfc) */ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) { - struct task_struct *sub_thread; - struct work_for_cpu wfc = { - .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion), - .fn = fn, - .arg = arg, - }; + struct work_for_cpu wfc = { .fn = fn, .arg = arg }; - sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu"); - if (IS_ERR(sub_thread)) - return PTR_ERR(sub_thread); - kthread_bind(sub_thread, cpu); - wake_up_process(sub_thread); - wait_for_completion(&wfc.completion); + INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn); + schedule_work_on(cpu, &wfc.work); + flush_work(&wfc.work); return wfc.ret; } EXPORT_SYMBOL_GPL(work_on_cpu); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2403a63b5da..dacbbe4d7a8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -629,6 +629,20 @@ config PROVE_RCU_REPEATEDLY Say N if you are unsure. +config PROVE_RCU_DELAY + bool "RCU debugging: preemptible RCU race provocation" + depends on DEBUG_KERNEL && PREEMPT_RCU + default n + help + There is a class of races that involve an unlikely preemption + of __rcu_read_unlock() just after ->rcu_read_lock_nesting has + been set to INT_MIN. This feature inserts a delay at that + point to increase the probability of these races. + + Say Y to increase probability of preemption of __rcu_read_unlock(). + + Say N if you are unsure. + config SPARSE_RCU_POINTER bool "RCU debugging: sparse-based checks for pointer usage" default n diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index c785554f952..ebf3bac460b 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -62,7 +62,7 @@ void fprop_global_destroy(struct fprop_global *p) */ bool fprop_new_period(struct fprop_global *p, int periods) { - u64 events; + s64 events; unsigned long flags; local_irq_save(flags); diff --git a/mm/bootmem.c b/mm/bootmem.c index bcb63ac48cc..f468185b3b2 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -419,7 +419,7 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, } /** - * reserve_bootmem - mark a page range as usable + * reserve_bootmem - mark a page range as reserved * @addr: starting address of the range * @size: size of the range in bytes * @flags: reservation flags (see linux/bootmem.h) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 57c4b930901..141dbb69509 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1811,7 +1811,6 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, src_page = pte_page(pteval); copy_user_highpage(page, src_page, address, vma); VM_BUG_ON(page_mapcount(src_page) != 1); - VM_BUG_ON(page_count(src_page) != 2); release_pte_page(src_page); /* * ptl mostly unnecessary, but preempt has to diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 45eb6217bf3..0de83b4541e 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1483,13 +1483,11 @@ static void *kmemleak_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct kmemleak_object *prev_obj = v; struct kmemleak_object *next_obj = NULL; - struct list_head *n = &prev_obj->object_list; + struct kmemleak_object *obj = prev_obj; ++(*pos); - list_for_each_continue_rcu(n, &object_list) { - struct kmemleak_object *obj = - list_entry(n, struct kmemleak_object, object_list); + list_for_each_entry_continue_rcu(obj, &object_list, object_list) { if (get_object(obj)) { next_obj = obj; break; diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index c718fd3664b..4de77ea5fa3 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -105,7 +105,7 @@ static const struct file_operations vlandev_fops = { }; /* - * Proc filesystem derectory entries. + * Proc filesystem directory entries. */ /* Strings */ diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index e877af8bdd1..469daabd90c 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -642,7 +642,8 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, struct batadv_neigh_node *router = NULL; struct batadv_orig_node *orig_node_tmp; struct hlist_node *node; - uint8_t bcast_own_sum_orig, bcast_own_sum_neigh; + int if_num; + uint8_t sum_orig, sum_neigh; uint8_t *neigh_addr; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -727,17 +728,17 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, if (router && (neigh_node->tq_avg == router->tq_avg)) { orig_node_tmp = router->orig_node; spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); - bcast_own_sum_orig = - orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + if_num = router->if_incoming->if_num; + sum_orig = orig_node_tmp->bcast_own_sum[if_num]; spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); orig_node_tmp = neigh_node->orig_node; spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); - bcast_own_sum_neigh = - orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + if_num = neigh_node->if_incoming->if_num; + sum_neigh = orig_node_tmp->bcast_own_sum[if_num]; spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); - if (bcast_own_sum_orig >= bcast_own_sum_neigh) + if (sum_orig >= sum_neigh) goto update_tt; } diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index a081ce1c051..cebaae7e148 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -20,8 +20,8 @@ #ifndef _NET_BATMAN_ADV_BITARRAY_H_ #define _NET_BATMAN_ADV_BITARRAY_H_ -/* returns true if the corresponding bit in the given seq_bits indicates true - * and curr_seqno is within range of last_seqno +/* Returns 1 if the corresponding bit in the given seq_bits indicates true + * and curr_seqno is within range of last_seqno. Otherwise returns 0. */ static inline int batadv_test_bit(const unsigned long *seq_bits, uint32_t last_seqno, uint32_t curr_seqno) @@ -32,7 +32,7 @@ static inline int batadv_test_bit(const unsigned long *seq_bits, if (diff < 0 || diff >= BATADV_TQ_LOCAL_WINDOW_SIZE) return 0; else - return test_bit(diff, seq_bits); + return test_bit(diff, seq_bits) != 0; } /* turn corresponding bit on, so we can remember that we got the packet */ diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 109ea2aae96..21c53577c8d 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -100,18 +100,21 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) { struct batadv_priv *bat_priv = netdev_priv(dev); struct sockaddr *addr = p; + uint8_t old_addr[ETH_ALEN]; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; + memcpy(old_addr, dev->dev_addr, ETH_ALEN); + memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); + /* only modify transtable if it has been initialized before */ if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE) { - batadv_tt_local_remove(bat_priv, dev->dev_addr, + batadv_tt_local_remove(bat_priv, old_addr, "mac address changed", false); batadv_tt_local_add(dev, addr->sa_data, BATADV_NULL_IFINDEX); } - memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); dev->addr_assign_type &= ~NET_ADDR_RANDOM; return 0; } diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 5e5f5b410e0..1eaacf10d19 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -58,7 +58,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long switch (cmd) { case BNEPCONNADD: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&ca, argp, sizeof(ca))) return -EFAULT; @@ -84,7 +84,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long case BNEPCONNDEL: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&cd, argp, sizeof(cd))) return -EFAULT; diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index 311668d1457..32dc83dcb6b 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -72,7 +72,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long switch (cmd) { case CMTPCONNADD: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&ca, argp, sizeof(ca))) return -EFAULT; @@ -97,7 +97,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long case CMTPCONNDEL: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&cd, argp, sizeof(cd))) return -EFAULT; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d4de5db18d5..0b997c8f965 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -734,6 +734,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) cancel_work_sync(&hdev->le_scan); + cancel_delayed_work(&hdev->power_off); + hci_req_cancel(hdev, ENODEV); hci_req_lock(hdev); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 19fdac78e55..d5ace1eda3e 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -490,7 +490,7 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, switch (cmd) { case HCISETRAW: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return -EPERM; @@ -510,12 +510,12 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, case HCIBLOCKADDR: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_sock_blacklist_add(hdev, (void __user *) arg); case HCIUNBLOCKADDR: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_sock_blacklist_del(hdev, (void __user *) arg); default: @@ -546,22 +546,22 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, case HCIDEVUP: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_open(arg); case HCIDEVDOWN: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_close(arg); case HCIDEVRESET: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_reset(arg); case HCIDEVRESTAT: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_reset_stat(arg); case HCISETSCAN: @@ -573,7 +573,7 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, case HCISETACLMTU: case HCISETSCOMTU: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; return hci_dev_cmd(cmd, argp); case HCIINQUIRY: diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 18b3f6892a3..b24fb3bd862 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -56,7 +56,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long switch (cmd) { case HIDPCONNADD: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&ca, argp, sizeof(ca))) return -EFAULT; @@ -91,7 +91,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long case HIDPCONNDEL: if (!capable(CAP_NET_ADMIN)) - return -EACCES; + return -EPERM; if (copy_from_user(&cd, argp, sizeof(cd))) return -EFAULT; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 4ea1710a478..38c00f14220 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1008,7 +1008,7 @@ static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct l2cap_chan *c if (!conn) return; - if (chan->mode == L2CAP_MODE_ERTM) { + if (chan->mode == L2CAP_MODE_ERTM && chan->state == BT_CONNECTED) { __clear_retrans_timer(chan); __clear_monitor_timer(chan); __clear_ack_timer(chan); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ad6613d17ca..eba022de3c2 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2875,6 +2875,22 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) if (scan) hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { + u8 ssp = 1; + + hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, 1, &ssp); + } + + if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { + struct hci_cp_write_le_host_supported cp; + + cp.le = 1; + cp.simul = !!(hdev->features[6] & LMP_SIMUL_LE_BR); + + hci_send_cmd(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, + sizeof(cp), &cp); + } + update_class(hdev); update_name(hdev, hdev->dev_name); update_eir(hdev); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 24c5eea8c45..159aa8bef9e 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1073,16 +1073,13 @@ static int write_partial_msg_pages(struct ceph_connection *con) BUG_ON(kaddr == NULL); base = kaddr + con->out_msg_pos.page_pos + bio_offset; crc = crc32c(crc, base, len); + kunmap(page); msg->footer.data_crc = cpu_to_le32(crc); con->out_msg_pos.did_page_crc = true; } ret = ceph_tcp_sendpage(con->sock, page, con->out_msg_pos.page_pos + bio_offset, len, 1); - - if (do_datacrc) - kunmap(page); - if (ret <= 0) goto out; diff --git a/net/core/dev.c b/net/core/dev.c index d7fe32c946c..89e33a5d4d9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2134,7 +2134,8 @@ static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) static netdev_features_t harmonize_features(struct sk_buff *skb, __be16 protocol, netdev_features_t features) { - if (!can_checksum_protocol(features, protocol)) { + if (skb->ip_summed != CHECKSUM_NONE && + !can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; features &= ~NETIF_F_SG; } else if (illegal_highdma(skb->dev, skb)) { @@ -3322,7 +3323,7 @@ ncls: if (pt_prev) { if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) - ret = -ENOMEM; + goto drop; else ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fe00d120816..e33ebae519c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3502,7 +3502,9 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, if (!skb_cloned(from)) skb_shinfo(from)->nr_frags = 0; - /* if the skb is cloned this does nothing since we set nr_frags to 0 */ + /* if the skb is not cloned this does nothing + * since we set nr_frags to 0. + */ for (i = 0; i < skb_shinfo(from)->nr_frags; i++) skb_frag_ref(from, i); diff --git a/net/core/sock.c b/net/core/sock.c index 30579207612..a6000fbad29 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -691,7 +691,8 @@ set_rcvbuf: case SO_KEEPALIVE: #ifdef CONFIG_INET - if (sk->sk_protocol == IPPROTO_TCP) + if (sk->sk_protocol == IPPROTO_TCP && + sk->sk_type == SOCK_STREAM) tcp_set_keepalive(sk, valbool); #endif sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 77e87aff419..47800459e4c 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1225,7 +1225,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); - rt_cache_flush(dev_net(dev), 0); + rt_cache_flush(dev_net(dev)); break; default: break; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 44bf82e3aef..e12fad77385 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -725,7 +725,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) break; case SIOCSIFFLAGS: - ret = -EACCES; + ret = -EPERM; if (!capable(CAP_NET_ADMIN)) goto out; break; @@ -733,7 +733,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCSIFBRDADDR: /* Set the broadcast address */ case SIOCSIFDSTADDR: /* Set the destination address */ case SIOCSIFNETMASK: /* Set the netmask for the interface */ - ret = -EACCES; + ret = -EPERM; if (!capable(CAP_NET_ADMIN)) goto out; ret = -EINVAL; @@ -1503,7 +1503,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write, if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 || i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) if ((new_value == 0) && (old_value != 0)) - rt_cache_flush(net, 0); + rt_cache_flush(net); } return ret; @@ -1537,7 +1537,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, dev_disable_lro(idev->dev); } rtnl_unlock(); - rt_cache_flush(net, 0); + rt_cache_flush(net); } } @@ -1554,7 +1554,7 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write, struct net *net = ctl->extra2; if (write && *valp != val) - rt_cache_flush(net, 0); + rt_cache_flush(net); return ret; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c43ae3fba79..8e2b475da9f 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -148,7 +148,7 @@ static void fib_flush(struct net *net) } if (flushed) - rt_cache_flush(net, -1); + rt_cache_flush(net); } /* @@ -999,11 +999,11 @@ static void nl_fib_lookup_exit(struct net *net) net->ipv4.fibnl = NULL; } -static void fib_disable_ip(struct net_device *dev, int force, int delay) +static void fib_disable_ip(struct net_device *dev, int force) { if (fib_sync_down_dev(dev, force)) fib_flush(dev_net(dev)); - rt_cache_flush(dev_net(dev), delay); + rt_cache_flush(dev_net(dev)); arp_ifdown(dev); } @@ -1020,7 +1020,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, fib_sync_up(dev); #endif atomic_inc(&net->ipv4.dev_addr_genid); - rt_cache_flush(dev_net(dev), -1); + rt_cache_flush(dev_net(dev)); break; case NETDEV_DOWN: fib_del_ifaddr(ifa, NULL); @@ -1029,9 +1029,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, /* Last address was deleted from this interface. * Disable IP. */ - fib_disable_ip(dev, 1, 0); + fib_disable_ip(dev, 1); } else { - rt_cache_flush(dev_net(dev), -1); + rt_cache_flush(dev_net(dev)); } break; } @@ -1045,7 +1045,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo struct net *net = dev_net(dev); if (event == NETDEV_UNREGISTER) { - fib_disable_ip(dev, 2, -1); + fib_disable_ip(dev, 2); rt_flush_dev(dev); return NOTIFY_DONE; } @@ -1062,14 +1062,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo fib_sync_up(dev); #endif atomic_inc(&net->ipv4.dev_addr_genid); - rt_cache_flush(dev_net(dev), -1); + rt_cache_flush(dev_net(dev)); break; case NETDEV_DOWN: - fib_disable_ip(dev, 0, 0); + fib_disable_ip(dev, 0); break; case NETDEV_CHANGEMTU: case NETDEV_CHANGE: - rt_cache_flush(dev_net(dev), 0); + rt_cache_flush(dev_net(dev)); break; case NETDEV_UNREGISTER_BATCH: break; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index a83d74e498d..274309d3ade 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -259,7 +259,7 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) static void fib4_rule_flush_cache(struct fib_rules_ops *ops) { - rt_cache_flush(ops->fro_net, -1); + rt_cache_flush(ops->fro_net); } static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 57bd978483e..d1b93595b4a 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1286,7 +1286,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); + rt_cache_flush(cfg->fc_nlinfo.nl_net); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); @@ -1333,7 +1333,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) list_add_tail_rcu(&new_fa->fa_list, (fa ? &fa->fa_list : fa_head)); - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); + rt_cache_flush(cfg->fc_nlinfo.nl_net); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, 0); succeeded: @@ -1708,7 +1708,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) trie_leaf_remove(t, l); if (fa->fa_state & FA_S_ACCESSED) - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); + rt_cache_flush(cfg->fc_nlinfo.nl_net); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e1e0a4e8fd3..c7527f6b9ad 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -510,7 +510,10 @@ relookup: secure_ipv6_id(daddr->addr.a6)); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; - p->rate_last = 0; + /* 60*HZ is arbitrary, but chosen enough high so that the first + * calculation of tokens is at its maximum. + */ + p->rate_last = jiffies - 60*HZ; INIT_LIST_HEAD(&p->gc_list); /* Link the node. */ diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index ff0f071969e..d23c6571ba1 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -131,18 +131,20 @@ found: * 0 - deliver * 1 - block */ -static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) +static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) { - int type; + struct icmphdr _hdr; + const struct icmphdr *hdr; - if (!pskb_may_pull(skb, sizeof(struct icmphdr))) + hdr = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (!hdr) return 1; - type = icmp_hdr(skb)->type; - if (type < 32) { + if (hdr->type < 32) { __u32 data = raw_sk(sk)->filter.data; - return ((1 << type) & data) != 0; + return ((1U << hdr->type) & data) != 0; } /* Do not block unknown ICMP types */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 82cf2a722b2..fd9af60397b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -202,11 +202,6 @@ EXPORT_SYMBOL(ip_tos2prio); static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) -static inline int rt_genid(struct net *net) -{ - return atomic_read(&net->ipv4.rt_genid); -} - #ifdef CONFIG_PROC_FS static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) { @@ -447,27 +442,9 @@ static inline bool rt_is_expired(const struct rtable *rth) return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); } -/* - * Perturbation of rt_genid by a small quantity [1..256] - * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() - * many times (2^24) without giving recent rt_genid. - * Jenkins hash is strong enough that litle changes of rt_genid are OK. - */ -static void rt_cache_invalidate(struct net *net) +void rt_cache_flush(struct net *net) { - unsigned char shuffle; - - get_random_bytes(&shuffle, sizeof(shuffle)); - atomic_add(shuffle + 1U, &net->ipv4.rt_genid); -} - -/* - * delay < 0 : invalidate cache (fast : entries will be deleted later) - * delay >= 0 : invalidate & flush cache (can be long) - */ -void rt_cache_flush(struct net *net, int delay) -{ - rt_cache_invalidate(net); + rt_genid_bump(net); } static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, @@ -2345,7 +2322,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) void ip_rt_multicast_event(struct in_device *in_dev) { - rt_cache_flush(dev_net(in_dev->dev), 0); + rt_cache_flush(dev_net(in_dev->dev)); } #ifdef CONFIG_SYSCTL @@ -2354,16 +2331,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, size_t *lenp, loff_t *ppos) { if (write) { - int flush_delay; - ctl_table ctl; - struct net *net; - - memcpy(&ctl, __ctl, sizeof(ctl)); - ctl.data = &flush_delay; - proc_dointvec(&ctl, write, buffer, lenp, ppos); - - net = (struct net *)__ctl->extra1; - rt_cache_flush(net, flush_delay); + rt_cache_flush((struct net *)__ctl->extra1); return 0; } @@ -2533,8 +2501,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { static __net_init int rt_genid_init(struct net *net) { - get_random_bytes(&net->ipv4.rt_genid, - sizeof(net->ipv4.rt_genid)); + atomic_set(&net->rt_genid, 0); get_random_bytes(&net->ipv4.dev_addr_genid, sizeof(net->ipv4.dev_addr_genid)); return 0; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2109ff4a1da..5f641934182 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1762,8 +1762,14 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } #ifdef CONFIG_NET_DMA - if (tp->ucopy.dma_chan) - dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + if (tp->ucopy.dma_chan) { + if (tp->rcv_wnd == 0 && + !skb_queue_empty(&sk->sk_async_wait_queue)) { + tcp_service_net_dma(sk, true); + tcp_cleanup_rbuf(sk, copied); + } else + dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + } #endif if (copied >= target) { /* Do not sleep, just process backlog. */ @@ -2325,10 +2331,17 @@ static int tcp_repair_options_est(struct tcp_sock *tp, tp->rx_opt.mss_clamp = opt.opt_val; break; case TCPOPT_WINDOW: - if (opt.opt_val > 14) - return -EFBIG; + { + u16 snd_wscale = opt.opt_val & 0xFFFF; + u16 rcv_wscale = opt.opt_val >> 16; + + if (snd_wscale > 14 || rcv_wscale > 14) + return -EFBIG; - tp->rx_opt.snd_wscale = opt.opt_val; + tp->rx_opt.snd_wscale = snd_wscale; + tp->rx_opt.rcv_wscale = rcv_wscale; + tp->rx_opt.wscale_ok = 1; + } break; case TCPOPT_SACK_PERM: if (opt.opt_val != 0) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6e38c6c23ca..d377f4854cb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4661,7 +4661,7 @@ queue_and_out: if (eaten > 0) kfree_skb_partial(skb, fragstolen); - else if (!sock_flag(sk, SOCK_DEAD)) + if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk, 0); return; } @@ -5556,8 +5556,7 @@ no_ack: #endif if (eaten) kfree_skb_partial(skb, fragstolen); - else - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk, 0); return 0; } } diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 0251a6005be..c4f934176ca 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -175,33 +175,12 @@ void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst, const struct in6_addr *saddr) { __ip6_dst_store(sk, dst, daddr, saddr); - -#ifdef CONFIG_XFRM - { - struct rt6_info *rt = (struct rt6_info *)dst; - rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid); - } -#endif } static inline struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) { - struct dst_entry *dst; - - dst = __sk_dst_check(sk, cookie); - -#ifdef CONFIG_XFRM - if (dst) { - struct rt6_info *rt = (struct rt6_info *)dst; - if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) { - __sk_dst_reset(sk); - dst = NULL; - } - } -#endif - - return dst; + return __sk_dst_check(sk, cookie); } static struct dst_entry *inet6_csk_route_socket(struct sock *sk, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 13690d650c3..286acfc2125 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -819,6 +819,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) offsetof(struct rt6_info, rt6i_src), allow_create, replace_required); + if (IS_ERR(sn)) { + err = PTR_ERR(sn); + sn = NULL; + } if (!sn) { /* If it is failed, discard just allocated root, and then (in st_failure) stale node diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 5b087c31d87..0f9bdc5ee9f 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -86,28 +86,30 @@ static int mip6_mh_len(int type) static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) { - struct ip6_mh *mh; + struct ip6_mh _hdr; + const struct ip6_mh *mh; - if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) || - !pskb_may_pull(skb, (skb_transport_offset(skb) + - ((skb_transport_header(skb)[1] + 1) << 3)))) + mh = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (!mh) return -1; - mh = (struct ip6_mh *)skb_transport_header(skb); + if (((mh->ip6mh_hdrlen + 1) << 3) > skb->len) + return -1; if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); - mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) - - skb_network_header(skb))); + mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) + + skb_network_header_len(skb)); return -1; } if (mh->ip6mh_proto != IPPROTO_NONE) { LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", mh->ip6mh_proto); - mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) - - skb_network_header(skb))); + mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) + + skb_network_header_len(skb)); return -1; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ef0579d5bca..4a5f78b5049 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -107,21 +107,20 @@ found: * 0 - deliver * 1 - block */ -static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) +static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb) { - struct icmp6hdr *icmph; - struct raw6_sock *rp = raw6_sk(sk); - - if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) { - __u32 *data = &rp->filter.data[0]; - int bit_nr; + struct icmp6hdr *_hdr; + const struct icmp6hdr *hdr; - icmph = (struct icmp6hdr *) skb->data; - bit_nr = icmph->icmp6_type; + hdr = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (hdr) { + const __u32 *data = &raw6_sk(sk)->filter.data[0]; + unsigned int type = hdr->icmp6_type; - return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0; + return (data[type >> 5] & (1U << (type & 31))) != 0; } - return 0; + return 1; } #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8e80fd27910..854e4018d20 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -226,7 +226,7 @@ static struct rt6_info ip6_null_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .obsolete = -1, + .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -ENETUNREACH, .input = ip6_pkt_discard, .output = ip6_pkt_discard_out, @@ -246,7 +246,7 @@ static struct rt6_info ip6_prohibit_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .obsolete = -1, + .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -EACCES, .input = ip6_pkt_prohibit, .output = ip6_pkt_prohibit_out, @@ -261,7 +261,7 @@ static struct rt6_info ip6_blk_hole_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .obsolete = -1, + .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -EINVAL, .input = dst_discard, .output = dst_discard, @@ -281,13 +281,14 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, struct fib6_table *table) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, - 0, DST_OBSOLETE_NONE, flags); + 0, DST_OBSOLETE_FORCE_CHK, flags); if (rt) { struct dst_entry *dst = &rt->dst; memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); + rt->rt6i_genid = rt_genid(net); } return rt; } @@ -1031,6 +1032,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) rt = (struct rt6_info *) dst; + /* All IPV6 dsts are created with ->obsolete set to the value + * DST_OBSOLETE_FORCE_CHK which forces validation calls down + * into this function always. + */ + if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev))) + return NULL; + if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { if (rt->rt6i_peer_genid != rt6_peer_genid()) { if (!rt6_has_peer(rt)) @@ -1397,8 +1405,6 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } - rt->dst.obsolete = -1; - if (cfg->fc_flags & RTF_EXPIRES) rt6_set_expires(rt, jiffies + clock_t_to_jiffies(cfg->fc_expires)); @@ -2080,7 +2086,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->dst.input = ip6_input; rt->dst.output = ip6_output; rt->rt6i_idev = idev; - rt->dst.obsolete = -1; rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; if (anycast) diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index d71cd9229a4..6f936358d66 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -80,8 +80,8 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, &l2tp_nl_family, 0, L2TP_CMD_NOOP); - if (IS_ERR(hdr)) { - ret = PTR_ERR(hdr); + if (!hdr) { + ret = -EMSGSIZE; goto err_out; } @@ -250,8 +250,8 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_TUNNEL_GET); - if (IS_ERR(hdr)) - return PTR_ERR(hdr); + if (!hdr) + return -EMSGSIZE; if (nla_put_u8(skb, L2TP_ATTR_PROTO_VERSION, tunnel->version) || nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) || @@ -617,8 +617,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags sk = tunnel->sock; hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET); - if (IS_ERR(hdr)) - return PTR_ERR(hdr); + if (!hdr) + return -EMSGSIZE; if (nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) || nla_put_u32(skb, L2TP_ATTR_SESSION_ID, session->session_id) || diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 5c22ce8ab30..a4c1e4528ca 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -117,11 +117,11 @@ static int limit_mt_check(const struct xt_mtchk_param *par) /* For SMP, we only want to use one set of state. */ r->master = priv; + /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * + 128. */ + priv->prev = jiffies; + priv->credit = user2credits(r->avg * r->burst); /* Credits full. */ if (r->cost == 0) { - /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * - 128. */ - priv->prev = jiffies; - priv->credit = user2credits(r->avg * r->burst); /* Credits full. */ r->credit_cap = priv->credit; /* Credits full. */ r->cost = user2credits(r->avg); } diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 1b9024ee963..7261eb81974 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -601,7 +601,7 @@ static int nr_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!capable(CAP_NET_BIND_SERVICE)) { dev_put(dev); release_sock(sk); - return -EACCES; + return -EPERM; } nr->user_addr = addr->fsa_digipeater[0]; nr->source_addr = addr->fsa_ax25.sax25_call; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index e4723d31fdd..211a2121704 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -865,7 +865,10 @@ static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl) if (mask) { struct qfq_group *next = qfq_ffs(q, mask); if (qfq_gt(roundedF, next->F)) { - cl->S = next->F; + if (qfq_gt(limit, next->F)) + cl->S = next->F; + else /* preserve timestamp correctness */ + cl->S = limit; return; } } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2ded3c7fad0..72d170ca340 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -350,6 +350,9 @@ static void reg_regdb_search(struct work_struct *work) struct reg_regdb_search_request *request; const struct ieee80211_regdomain *curdom, *regdom; int i, r; + bool set_reg = false; + + mutex_lock(&cfg80211_mutex); mutex_lock(®_regdb_search_mutex); while (!list_empty(®_regdb_search_list)) { @@ -365,9 +368,7 @@ static void reg_regdb_search(struct work_struct *work) r = reg_copy_regd(®dom, curdom); if (r) break; - mutex_lock(&cfg80211_mutex); - set_regdom(regdom); - mutex_unlock(&cfg80211_mutex); + set_reg = true; break; } } @@ -375,6 +376,11 @@ static void reg_regdb_search(struct work_struct *work) kfree(request); } mutex_unlock(®_regdb_search_mutex); + + if (set_reg) + set_regdom(regdom); + + mutex_unlock(&cfg80211_mutex); } static DECLARE_WORK(reg_regdb_work, reg_regdb_search); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 5a2aa17e4d3..387848e9007 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -585,6 +585,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) xfrm_pol_hold(policy); net->xfrm.policy_count[dir]++; atomic_inc(&flow_cache_genid); + rt_genid_bump(net); if (delpol) __xfrm_policy_unlink(delpol, dir); policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir); @@ -1763,7 +1764,7 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family, if (!afinfo) { dst_release(dst_orig); - ret = ERR_PTR(-EINVAL); + return ERR_PTR(-EINVAL); } else { ret = afinfo->blackhole_route(net, dst_orig); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e75d8e47f35..289f4bf18ff 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -123,9 +123,21 @@ static inline int verify_replay(struct xfrm_usersa_info *p, struct nlattr **attrs) { struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; + struct xfrm_replay_state_esn *rs; - if ((p->flags & XFRM_STATE_ESN) && !rt) - return -EINVAL; + if (p->flags & XFRM_STATE_ESN) { + if (!rt) + return -EINVAL; + + rs = nla_data(rt); + + if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) + return -EINVAL; + + if (nla_len(rt) < xfrm_replay_state_esn_len(rs) && + nla_len(rt) != sizeof(*rs)) + return -EINVAL; + } if (!rt) return 0; @@ -370,14 +382,15 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es struct nlattr *rp) { struct xfrm_replay_state_esn *up; + int ulen; if (!replay_esn || !rp) return 0; up = nla_data(rp); + ulen = xfrm_replay_state_esn_len(up); - if (xfrm_replay_state_esn_len(replay_esn) != - xfrm_replay_state_esn_len(up)) + if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) return -EINVAL; return 0; @@ -388,22 +401,28 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn struct nlattr *rta) { struct xfrm_replay_state_esn *p, *pp, *up; + int klen, ulen; if (!rta) return 0; up = nla_data(rta); + klen = xfrm_replay_state_esn_len(up); + ulen = nla_len(rta) >= klen ? klen : sizeof(*up); - p = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL); + p = kzalloc(klen, GFP_KERNEL); if (!p) return -ENOMEM; - pp = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL); + pp = kzalloc(klen, GFP_KERNEL); if (!pp) { kfree(p); return -ENOMEM; } + memcpy(p, up, ulen); + memcpy(pp, up, ulen); + *replay_esn = p; *preplay_esn = pp; @@ -442,10 +461,11 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * * somehow made shareable and move it to xfrm_state.c - JHS * */ -static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs) +static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs, + int update_esn) { struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; - struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL]; + struct nlattr *re = update_esn ? attrs[XFRMA_REPLAY_ESN_VAL] : NULL; struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; @@ -555,7 +575,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; /* override default values from above */ - xfrm_update_ae_params(x, attrs); + xfrm_update_ae_params(x, attrs, 0); return x; @@ -689,6 +709,7 @@ out: static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { + memset(p, 0, sizeof(*p)); memcpy(&p->id, &x->id, sizeof(p->id)); memcpy(&p->sel, &x->sel, sizeof(p->sel)); memcpy(&p->lft, &x->lft, sizeof(p->lft)); @@ -742,7 +763,7 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb) return -EMSGSIZE; algo = nla_data(nla); - strcpy(algo->alg_name, auth->alg_name); + strncpy(algo->alg_name, auth->alg_name, sizeof(algo->alg_name)); memcpy(algo->alg_key, auth->alg_key, (auth->alg_key_len + 7) / 8); algo->alg_key_len = auth->alg_key_len; @@ -878,6 +899,7 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, { struct xfrm_dump_info info; struct sk_buff *skb; + int err; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) @@ -888,9 +910,10 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, info.nlmsg_seq = seq; info.nlmsg_flags = 0; - if (dump_one_state(x, 0, &info)) { + err = dump_one_state(x, 0, &info); + if (err) { kfree_skb(skb); - return NULL; + return ERR_PTR(err); } return skb; @@ -1317,6 +1340,7 @@ static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir) { + memset(p, 0, sizeof(*p)); memcpy(&p->sel, &xp->selector, sizeof(p->sel)); memcpy(&p->lft, &xp->lft, sizeof(p->lft)); memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft)); @@ -1421,6 +1445,7 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) struct xfrm_user_tmpl *up = &vec[i]; struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; + memset(up, 0, sizeof(*up)); memcpy(&up->id, &kp->id, sizeof(up->id)); up->family = kp->encap_family; memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr)); @@ -1546,6 +1571,7 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, { struct xfrm_dump_info info; struct sk_buff *skb; + int err; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) @@ -1556,9 +1582,10 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, info.nlmsg_seq = seq; info.nlmsg_flags = 0; - if (dump_one_policy(xp, dir, 0, &info) < 0) { + err = dump_one_policy(xp, dir, 0, &info); + if (err) { kfree_skb(skb); - return NULL; + return ERR_PTR(err); } return skb; @@ -1822,7 +1849,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; spin_lock_bh(&x->lock); - xfrm_update_ae_params(x, attrs); + xfrm_update_ae_params(x, attrs, 1); spin_unlock_bh(&x->lock); c.event = nlh->nlmsg_type; diff --git a/scripts/Makefile.fwinst b/scripts/Makefile.fwinst index c3f69ae275d..4d908d16c03 100644 --- a/scripts/Makefile.fwinst +++ b/scripts/Makefile.fwinst @@ -27,7 +27,7 @@ endif installed-mod-fw := $(addprefix $(INSTALL_FW_PATH)/,$(mod-fw)) installed-fw := $(addprefix $(INSTALL_FW_PATH)/,$(fw-shipped-all)) -installed-fw-dirs := $(sort $(dir $(installed-fw))) $(INSTALL_FW_PATH)/. +installed-fw-dirs := $(sort $(dir $(installed-fw))) $(INSTALL_FW_PATH)/./ # Workaround for make < 3.81, where .SECONDEXPANSION doesn't work. PHONY += $(INSTALL_FW_PATH)/$$(%) install-all-dirs @@ -42,7 +42,7 @@ quiet_cmd_install = INSTALL $(subst $(srctree)/,,$@) $(installed-fw-dirs): $(call cmd,mkdir) -$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% | $$(dir $(INSTALL_FW_PATH)/%) +$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% | $(INSTALL_FW_PATH)/$$(dir %) $(call cmd,install) PHONY += __fw_install __fw_modinst FORCE diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh index d24810fc6af..fd8fa9aa7c4 100755 --- a/scripts/checksyscalls.sh +++ b/scripts/checksyscalls.sh @@ -200,7 +200,7 @@ EOF syscall_list() { grep '^[0-9]' "$1" | sort -n | ( while read nr abi name entry ; do - echo <<EOF + cat <<EOF #if !defined(__NR_${name}) && !defined(__IGNORE_${name}) #warning syscall ${name} not implemented #endif diff --git a/scripts/coccinelle/api/memdup_user.cocci b/scripts/coccinelle/api/memdup_user.cocci index 2efac289fd5..2b131a8a130 100644 --- a/scripts/coccinelle/api/memdup_user.cocci +++ b/scripts/coccinelle/api/memdup_user.cocci @@ -51,10 +51,10 @@ statement S1,S2; p << r.p; @@ -coccilib.org.print_todo(p[0], "WARNING opportunity for memdep_user") +coccilib.org.print_todo(p[0], "WARNING opportunity for memdup_user") @script:python depends on report@ p << r.p; @@ -coccilib.report.print_report(p[0], "WARNING opportunity for memdep_user") +coccilib.report.print_report(p[0], "WARNING opportunity for memdup_user") diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl index 2fbbbc1ddea..33689396953 100644 --- a/scripts/kconfig/streamline_config.pl +++ b/scripts/kconfig/streamline_config.pl @@ -100,7 +100,7 @@ my @searchconfigs = ( }, ); -sub find_config { +sub read_config { foreach my $conf (@searchconfigs) { my $file = $conf->{"file"}; @@ -115,17 +115,15 @@ sub find_config { print STDERR "using config: '$file'\n"; - open(CIN, "$exec $file |") || die "Failed to run $exec $file"; - return; + open(my $infile, '-|', "$exec $file") || die "Failed to run $exec $file"; + my @x = <$infile>; + close $infile; + return @x; } die "No config file found"; } -find_config; - -# Read in the entire config file into config_file -my @config_file = <CIN>; -close CIN; +my @config_file = read_config; # Parse options my $localmodconfig = 0; @@ -135,7 +133,7 @@ GetOptions("localmodconfig" => \$localmodconfig, "localyesconfig" => \$localyesconfig); # Get the build source and top level Kconfig file (passed in) -my $ksource = $ARGV[0]; +my $ksource = ($ARGV[0] ? $ARGV[0] : '.'); my $kconfig = $ARGV[1]; my $lsmod_file = $ENV{'LSMOD'}; @@ -173,8 +171,8 @@ sub read_kconfig { $source =~ s/\$$env/$ENV{$env}/; } - open(KIN, "$source") || die "Can't open $kconfig"; - while (<KIN>) { + open(my $kinfile, '<', $source) || die "Can't open $kconfig"; + while (<$kinfile>) { chomp; # Make sure that lines ending with \ continue @@ -251,10 +249,10 @@ sub read_kconfig { $state = "NONE"; } } - close(KIN); + close($kinfile); # read in any configs that were found. - foreach $kconfig (@kconfigs) { + foreach my $kconfig (@kconfigs) { if (!defined($read_kconfigs{$kconfig})) { $read_kconfigs{$kconfig} = 1; read_kconfig($kconfig); @@ -295,8 +293,8 @@ foreach my $makefile (@makefiles) { my $line = ""; my %make_vars; - open(MIN,$makefile) || die "Can't open $makefile"; - while (<MIN>) { + open(my $infile, '<', $makefile) || die "Can't open $makefile"; + while (<$infile>) { # if this line ends with a backslash, continue chomp; if (/^(.*)\\$/) { @@ -343,10 +341,11 @@ foreach my $makefile (@makefiles) { } } } - close(MIN); + close($infile); } my %modules; +my $linfile; if (defined($lsmod_file)) { if ( ! -f $lsmod_file) { @@ -356,13 +355,10 @@ if (defined($lsmod_file)) { die "$lsmod_file not found"; } } - if ( -x $lsmod_file) { - # the file is executable, run it - open(LIN, "$lsmod_file|"); - } else { - # Just read the contents - open(LIN, "$lsmod_file"); - } + + my $otype = ( -x $lsmod_file) ? '-|' : '<'; + open($linfile, $otype, $lsmod_file); + } else { # see what modules are loaded on this system @@ -379,16 +375,16 @@ if (defined($lsmod_file)) { $lsmod = "lsmod"; } - open(LIN,"$lsmod|") || die "Can not call lsmod with $lsmod"; + open($linfile, '-|', $lsmod) || die "Can not call lsmod with $lsmod"; } -while (<LIN>) { +while (<$linfile>) { next if (/^Module/); # Skip the first line. if (/^(\S+)/) { $modules{$1} = 1; } } -close (LIN); +close ($linfile); # add to the configs hash all configs that are needed to enable # a loaded module. This is a direct obj-${CONFIG_FOO} += bar.o @@ -605,6 +601,8 @@ foreach my $line (@config_file) { if (defined($configs{$1})) { if ($localyesconfig) { $setconfigs{$1} = 'y'; + print "$1=y\n"; + next; } else { $setconfigs{$1} = $2; } diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index 54e35c1e594..9d1421e63ff 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -261,11 +261,13 @@ static unsigned get_mcountsym(Elf_Sym const *const sym0, &sym0[Elf_r_sym(relp)]; char const *symname = &str0[w(symp->st_name)]; char const *mcount = gpfx == '_' ? "_mcount" : "mcount"; + char const *fentry = "__fentry__"; if (symname[0] == '.') ++symname; /* ppc64 hack */ if (strcmp(mcount, symname) == 0 || - (altmcount && strcmp(altmcount, symname) == 0)) + (altmcount && strcmp(altmcount, symname) == 0) || + (strcmp(fentry, symname) == 0)) mcountsym = Elf_r_sym(relp); return mcountsym; diff --git a/security/apparmor/.gitignore b/security/apparmor/.gitignore index 4d995aeaebc..9cdec70d72b 100644 --- a/security/apparmor/.gitignore +++ b/security/apparmor/.gitignore @@ -1,6 +1,5 @@ # # Generated include files # -af_names.h capability_names.h rlim_names.h diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 3364fbf4680..6cfc6478863 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1486,7 +1486,6 @@ long keyctl_session_to_parent(void) oldwork = NULL; parent = me->real_parent; - task_lock(parent); /* the parent mustn't be init and mustn't be a kernel thread */ if (parent->pid <= 1 || !parent->mm) goto unlock; @@ -1530,7 +1529,6 @@ long keyctl_session_to_parent(void) if (!ret) newwork = NULL; unlock: - task_unlock(parent); write_unlock_irq(&tasklist_lock); rcu_read_unlock(); if (oldwork) diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index c220f314709..65f67cb0aef 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -51,6 +51,7 @@ int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall); static inline void selinux_xfrm_notify_policyload(void) { atomic_inc(&flow_cache_genid); + rt_genid_bump(&init_net); } #else static inline int selinux_xfrm_enabled(void) diff --git a/sound/oss/.gitignore b/sound/oss/.gitignore index 7efb12b4550..12a3920d6fb 100644 --- a/sound/oss/.gitignore +++ b/sound/oss/.gitignore @@ -1,4 +1,3 @@ #Ignore generated files -maui_boot.h pss_boot.h trix_boot.h diff --git a/sound/soc/codecs/wm2000.c b/sound/soc/codecs/wm2000.c index 3fd5b29dc93..a3acb7a85f6 100644 --- a/sound/soc/codecs/wm2000.c +++ b/sound/soc/codecs/wm2000.c @@ -702,7 +702,7 @@ static bool wm2000_readable_reg(struct device *dev, unsigned int reg) } static const struct regmap_config wm2000_regmap = { - .reg_bits = 8, + .reg_bits = 16, .val_bits = 8, .max_register = WM2000_REG_IF_CTL, diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index d6e2bb49c59..060dccb9ec7 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -197,7 +197,13 @@ static void prepare_outbound_urb(struct snd_usb_endpoint *ep, /* no data provider, so send silence */ unsigned int offs = 0; for (i = 0; i < ctx->packets; ++i) { - int counts = ctx->packet_size[i]; + int counts; + + if (ctx->packet_size[i]) + counts = ctx->packet_size[i]; + else + counts = snd_usb_endpoint_next_packet_size(ep); + urb->iso_frame_desc[i].offset = offs * ep->stride; urb->iso_frame_desc[i].length = counts * ep->stride; offs += counts; diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 14131cb0522..04d959fa022 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -129,7 +129,7 @@ CFLAGS ?= -g -Wall # Append required CFLAGS override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) -override CFLAGS += $(udis86-flags) +override CFLAGS += $(udis86-flags) -D_GNU_SOURCE ifeq ($(VERBOSE),1) Q = diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 5f34aa371b5..47264b4652b 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -24,13 +24,14 @@ * Frederic Weisbecker gave his permission to relicense the code to * the Lesser General Public License. */ -#define _GNU_SOURCE #include <stdio.h> #include <stdlib.h> #include <string.h> #include <stdarg.h> #include <ctype.h> #include <errno.h> +#include <stdint.h> +#include <limits.h> #include "event-parse.h" #include "event-utils.h" @@ -117,14 +118,7 @@ void breakpoint(void) struct print_arg *alloc_arg(void) { - struct print_arg *arg; - - arg = malloc_or_die(sizeof(*arg)); - if (!arg) - return NULL; - memset(arg, 0, sizeof(*arg)); - - return arg; + return calloc(1, sizeof(struct print_arg)); } struct cmdline { @@ -158,7 +152,9 @@ static int cmdline_init(struct pevent *pevent) struct cmdline *cmdlines; int i; - cmdlines = malloc_or_die(sizeof(*cmdlines) * pevent->cmdline_count); + cmdlines = malloc(sizeof(*cmdlines) * pevent->cmdline_count); + if (!cmdlines) + return -1; i = 0; while (cmdlist) { @@ -186,8 +182,8 @@ static char *find_cmdline(struct pevent *pevent, int pid) if (!pid) return "<idle>"; - if (!pevent->cmdlines) - cmdline_init(pevent); + if (!pevent->cmdlines && cmdline_init(pevent)) + return "<not enough memory for cmdlines!>"; key.pid = pid; @@ -215,8 +211,8 @@ int pevent_pid_is_registered(struct pevent *pevent, int pid) if (!pid) return 1; - if (!pevent->cmdlines) - cmdline_init(pevent); + if (!pevent->cmdlines && cmdline_init(pevent)) + return 0; key.pid = pid; @@ -258,10 +254,14 @@ static int add_new_comm(struct pevent *pevent, const char *comm, int pid) return -1; } - cmdlines[pevent->cmdline_count].pid = pid; cmdlines[pevent->cmdline_count].comm = strdup(comm); - if (!cmdlines[pevent->cmdline_count].comm) - die("malloc comm"); + if (!cmdlines[pevent->cmdline_count].comm) { + free(cmdlines); + errno = ENOMEM; + return -1; + } + + cmdlines[pevent->cmdline_count].pid = pid; if (cmdlines[pevent->cmdline_count].comm) pevent->cmdline_count++; @@ -288,10 +288,15 @@ int pevent_register_comm(struct pevent *pevent, const char *comm, int pid) if (pevent->cmdlines) return add_new_comm(pevent, comm, pid); - item = malloc_or_die(sizeof(*item)); + item = malloc(sizeof(*item)); + if (!item) + return -1; + item->comm = strdup(comm); - if (!item->comm) - die("malloc comm"); + if (!item->comm) { + free(item); + return -1; + } item->pid = pid; item->next = pevent->cmdlist; @@ -355,7 +360,10 @@ static int func_map_init(struct pevent *pevent) struct func_map *func_map; int i; - func_map = malloc_or_die(sizeof(*func_map) * (pevent->func_count + 1)); + func_map = malloc(sizeof(*func_map) * (pevent->func_count + 1)); + if (!func_map) + return -1; + funclist = pevent->funclist; i = 0; @@ -455,25 +463,36 @@ pevent_find_function_address(struct pevent *pevent, unsigned long long addr) int pevent_register_function(struct pevent *pevent, char *func, unsigned long long addr, char *mod) { - struct func_list *item; + struct func_list *item = malloc(sizeof(*item)); - item = malloc_or_die(sizeof(*item)); + if (!item) + return -1; item->next = pevent->funclist; item->func = strdup(func); - if (mod) + if (!item->func) + goto out_free; + + if (mod) { item->mod = strdup(mod); - else + if (!item->mod) + goto out_free_func; + } else item->mod = NULL; item->addr = addr; - if (!item->func || (mod && !item->mod)) - die("malloc func"); - pevent->funclist = item; pevent->func_count++; return 0; + +out_free_func: + free(item->func); + item->func = NULL; +out_free: + free(item); + errno = ENOMEM; + return -1; } /** @@ -524,14 +543,16 @@ static int printk_cmp(const void *a, const void *b) return 0; } -static void printk_map_init(struct pevent *pevent) +static int printk_map_init(struct pevent *pevent) { struct printk_list *printklist; struct printk_list *item; struct printk_map *printk_map; int i; - printk_map = malloc_or_die(sizeof(*printk_map) * (pevent->printk_count + 1)); + printk_map = malloc(sizeof(*printk_map) * (pevent->printk_count + 1)); + if (!printk_map) + return -1; printklist = pevent->printklist; @@ -549,6 +570,8 @@ static void printk_map_init(struct pevent *pevent) pevent->printk_map = printk_map; pevent->printklist = NULL; + + return 0; } static struct printk_map * @@ -557,8 +580,8 @@ find_printk(struct pevent *pevent, unsigned long long addr) struct printk_map *printk; struct printk_map key; - if (!pevent->printk_map) - printk_map_init(pevent); + if (!pevent->printk_map && printk_map_init(pevent)) + return NULL; key.addr = addr; @@ -580,21 +603,27 @@ find_printk(struct pevent *pevent, unsigned long long addr) int pevent_register_print_string(struct pevent *pevent, char *fmt, unsigned long long addr) { - struct printk_list *item; + struct printk_list *item = malloc(sizeof(*item)); - item = malloc_or_die(sizeof(*item)); + if (!item) + return -1; item->next = pevent->printklist; - item->printk = strdup(fmt); item->addr = addr; + item->printk = strdup(fmt); if (!item->printk) - die("malloc fmt"); + goto out_free; pevent->printklist = item; pevent->printk_count++; return 0; + +out_free: + free(item); + errno = ENOMEM; + return -1; } /** @@ -619,24 +648,18 @@ void pevent_print_printk(struct pevent *pevent) static struct event_format *alloc_event(void) { - struct event_format *event; - - event = malloc(sizeof(*event)); - if (!event) - return NULL; - memset(event, 0, sizeof(*event)); - - return event; + return calloc(1, sizeof(struct event_format)); } -static void add_event(struct pevent *pevent, struct event_format *event) +static int add_event(struct pevent *pevent, struct event_format *event) { int i; + struct event_format **events = realloc(pevent->events, sizeof(event) * + (pevent->nr_events + 1)); + if (!events) + return -1; - pevent->events = realloc(pevent->events, sizeof(event) * - (pevent->nr_events + 1)); - if (!pevent->events) - die("Can not allocate events"); + pevent->events = events; for (i = 0; i < pevent->nr_events; i++) { if (pevent->events[i]->id > event->id) @@ -651,6 +674,8 @@ static void add_event(struct pevent *pevent, struct event_format *event) pevent->nr_events++; event->pevent = pevent; + + return 0; } static int event_item_type(enum event_type type) @@ -827,9 +852,9 @@ static enum event_type __read_token(char **tok) switch (type) { case EVENT_NEWLINE: case EVENT_DELIM: - *tok = malloc_or_die(2); - (*tok)[0] = ch; - (*tok)[1] = 0; + if (asprintf(tok, "%c", ch) < 0) + return EVENT_ERROR; + return type; case EVENT_OP: @@ -1240,8 +1265,10 @@ static int event_read_fields(struct event_format *event, struct format_field **f last_token = token; - field = malloc_or_die(sizeof(*field)); - memset(field, 0, sizeof(*field)); + field = calloc(1, sizeof(*field)); + if (!field) + goto fail; + field->event = event; /* read the rest of the type */ @@ -1282,7 +1309,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f } if (!field->type) { - die("no type found"); + do_warning("%s: no type found", __func__); goto fail; } field->name = last_token; @@ -1329,7 +1356,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f free_token(token); type = read_token(&token); if (type == EVENT_NONE) { - die("failed to find token"); + do_warning("failed to find token"); goto fail; } } @@ -1538,6 +1565,14 @@ process_cond(struct event_format *event, struct print_arg *top, char **tok) left = alloc_arg(); right = alloc_arg(); + if (!arg || !left || !right) { + do_warning("%s: not enough memory!", __func__); + /* arg will be freed at out_free */ + free_arg(left); + free_arg(right); + goto out_free; + } + arg->type = PRINT_OP; arg->op.left = left; arg->op.right = right; @@ -1580,6 +1615,12 @@ process_array(struct event_format *event, struct print_arg *top, char **tok) char *token = NULL; arg = alloc_arg(); + if (!arg) { + do_warning("%s: not enough memory!", __func__); + /* '*tok' is set to top->op.op. No need to free. */ + *tok = NULL; + return EVENT_ERROR; + } *tok = NULL; type = process_arg(event, arg, &token); @@ -1595,8 +1636,7 @@ process_array(struct event_format *event, struct print_arg *top, char **tok) return type; out_free: - free_token(*tok); - *tok = NULL; + free_token(token); free_arg(arg); return EVENT_ERROR; } @@ -1682,7 +1722,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) if (arg->type == PRINT_OP && !arg->op.left) { /* handle single op */ if (token[1]) { - die("bad op token %s", token); + do_warning("bad op token %s", token); goto out_free; } switch (token[0]) { @@ -1699,10 +1739,16 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) /* make an empty left */ left = alloc_arg(); + if (!left) + goto out_warn_free; + left->type = PRINT_NULL; arg->op.left = left; right = alloc_arg(); + if (!right) + goto out_warn_free; + arg->op.right = right; /* do not free the token, it belongs to an op */ @@ -1712,6 +1758,9 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) } else if (strcmp(token, "?") == 0) { left = alloc_arg(); + if (!left) + goto out_warn_free; + /* copy the top arg to the left */ *left = *arg; @@ -1720,6 +1769,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) arg->op.left = left; arg->op.prio = 0; + /* it will set arg->op.right */ type = process_cond(event, arg, tok); } else if (strcmp(token, ">>") == 0 || @@ -1739,6 +1789,8 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) strcmp(token, "!=") == 0) { left = alloc_arg(); + if (!left) + goto out_warn_free; /* copy the top arg to the left */ *left = *arg; @@ -1746,6 +1798,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) arg->type = PRINT_OP; arg->op.op = token; arg->op.left = left; + arg->op.right = NULL; if (set_op_prio(arg) == -1) { event->flags |= EVENT_FL_FAILED; @@ -1762,12 +1815,14 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) type == EVENT_DELIM && (strcmp(token, ")") == 0)) { char *new_atom; - if (left->type != PRINT_ATOM) - die("bad pointer type"); + if (left->type != PRINT_ATOM) { + do_warning("bad pointer type"); + goto out_free; + } new_atom = realloc(left->atom.atom, strlen(left->atom.atom) + 3); if (!new_atom) - goto out_free; + goto out_warn_free; left->atom.atom = new_atom; strcat(left->atom.atom, " *"); @@ -1779,12 +1834,18 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) } right = alloc_arg(); + if (!right) + goto out_warn_free; + type = process_arg_token(event, right, tok, type); arg->op.right = right; } else if (strcmp(token, "[") == 0) { left = alloc_arg(); + if (!left) + goto out_warn_free; + *left = *arg; arg->type = PRINT_OP; @@ -1793,6 +1854,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) arg->op.prio = 0; + /* it will set arg->op.right */ type = process_array(event, arg, tok); } else { @@ -1816,14 +1878,16 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) return type; - out_free: +out_warn_free: + do_warning("%s: not enough memory!", __func__); +out_free: free_token(token); *tok = NULL; return EVENT_ERROR; } static enum event_type -process_entry(struct event_format *event __unused, struct print_arg *arg, +process_entry(struct event_format *event __maybe_unused, struct print_arg *arg, char **tok) { enum event_type type; @@ -1880,7 +1944,11 @@ eval_type_str(unsigned long long val, const char *type, int pointer) return val; } - ref = malloc_or_die(len); + ref = malloc(len); + if (!ref) { + do_warning("%s: not enough memory!", __func__); + return val; + } memcpy(ref, type, len); /* chop off the " *" */ @@ -1957,8 +2025,10 @@ eval_type_str(unsigned long long val, const char *type, int pointer) static unsigned long long eval_type(unsigned long long val, struct print_arg *arg, int pointer) { - if (arg->type != PRINT_TYPE) - die("expected type argument"); + if (arg->type != PRINT_TYPE) { + do_warning("expected type argument"); + return 0; + } return eval_type_str(val, arg->typecast.type, pointer); } @@ -2143,7 +2213,7 @@ static char *arg_eval (struct print_arg *arg) case PRINT_STRING: case PRINT_BSTRING: default: - die("invalid eval type %d", arg->type); + do_warning("invalid eval type %d", arg->type); break; } @@ -2166,6 +2236,8 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char ** break; arg = alloc_arg(); + if (!arg) + goto out_free; free_token(token); type = process_arg(event, arg, &token); @@ -2179,30 +2251,33 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char ** if (test_type_token(type, token, EVENT_DELIM, ",")) goto out_free; - field = malloc_or_die(sizeof(*field)); - memset(field, 0, sizeof(*field)); + field = calloc(1, sizeof(*field)); + if (!field) + goto out_free; value = arg_eval(arg); if (value == NULL) - goto out_free; + goto out_free_field; field->value = strdup(value); if (field->value == NULL) - goto out_free; + goto out_free_field; free_arg(arg); arg = alloc_arg(); + if (!arg) + goto out_free; free_token(token); type = process_arg(event, arg, &token); if (test_type_token(type, token, EVENT_OP, "}")) - goto out_free; + goto out_free_field; value = arg_eval(arg); if (value == NULL) - goto out_free; + goto out_free_field; field->str = strdup(value); if (field->str == NULL) - goto out_free; + goto out_free_field; free_arg(arg); arg = NULL; @@ -2216,6 +2291,8 @@ process_fields(struct event_format *event, struct print_flag_sym **list, char ** *tok = token; return type; +out_free_field: + free_flag_sym(field); out_free: free_arg(arg); free_token(token); @@ -2235,6 +2312,10 @@ process_flags(struct event_format *event, struct print_arg *arg, char **tok) arg->type = PRINT_FLAGS; field = alloc_arg(); + if (!field) { + do_warning("%s: not enough memory!", __func__); + goto out_free; + } type = process_arg(event, field, &token); @@ -2243,7 +2324,7 @@ process_flags(struct event_format *event, struct print_arg *arg, char **tok) type = process_op(event, field, &token); if (test_type_token(type, token, EVENT_DELIM, ",")) - goto out_free; + goto out_free_field; free_token(token); arg->flags.field = field; @@ -2265,7 +2346,9 @@ process_flags(struct event_format *event, struct print_arg *arg, char **tok) type = read_token_item(tok); return type; - out_free: +out_free_field: + free_arg(field); +out_free: free_token(token); *tok = NULL; return EVENT_ERROR; @@ -2282,10 +2365,14 @@ process_symbols(struct event_format *event, struct print_arg *arg, char **tok) arg->type = PRINT_SYMBOL; field = alloc_arg(); + if (!field) { + do_warning("%s: not enough memory!", __func__); + goto out_free; + } type = process_arg(event, field, &token); if (test_type_token(type, token, EVENT_DELIM, ",")) - goto out_free; + goto out_free_field; arg->symbol.field = field; @@ -2297,7 +2384,9 @@ process_symbols(struct event_format *event, struct print_arg *arg, char **tok) type = read_token_item(tok); return type; - out_free: +out_free_field: + free_arg(field); +out_free: free_token(token); *tok = NULL; return EVENT_ERROR; @@ -2314,6 +2403,11 @@ process_hex(struct event_format *event, struct print_arg *arg, char **tok) arg->type = PRINT_HEX; field = alloc_arg(); + if (!field) { + do_warning("%s: not enough memory!", __func__); + goto out_free; + } + type = process_arg(event, field, &token); if (test_type_token(type, token, EVENT_DELIM, ",")) @@ -2324,6 +2418,12 @@ process_hex(struct event_format *event, struct print_arg *arg, char **tok) free_token(token); field = alloc_arg(); + if (!field) { + do_warning("%s: not enough memory!", __func__); + *tok = NULL; + return EVENT_ERROR; + } + type = process_arg(event, field, &token); if (test_type_token(type, token, EVENT_DELIM, ")")) @@ -2381,6 +2481,12 @@ process_dynamic_array(struct event_format *event, struct print_arg *arg, char ** free_token(token); arg = alloc_arg(); + if (!field) { + do_warning("%s: not enough memory!", __func__); + *tok = NULL; + return EVENT_ERROR; + } + type = process_arg(event, arg, &token); if (type == EVENT_ERROR) goto out_free_arg; @@ -2434,10 +2540,16 @@ process_paren(struct event_format *event, struct print_arg *arg, char **tok) /* make this a typecast and contine */ /* prevous must be an atom */ - if (arg->type != PRINT_ATOM) - die("previous needed to be PRINT_ATOM"); + if (arg->type != PRINT_ATOM) { + do_warning("previous needed to be PRINT_ATOM"); + goto out_free; + } item_arg = alloc_arg(); + if (!item_arg) { + do_warning("%s: not enough memory!", __func__); + goto out_free; + } arg->type = PRINT_TYPE; arg->typecast.type = arg->atom.atom; @@ -2457,7 +2569,8 @@ process_paren(struct event_format *event, struct print_arg *arg, char **tok) static enum event_type -process_str(struct event_format *event __unused, struct print_arg *arg, char **tok) +process_str(struct event_format *event __maybe_unused, struct print_arg *arg, + char **tok) { enum event_type type; char *token; @@ -2532,6 +2645,11 @@ process_func_handler(struct event_format *event, struct pevent_function_handler next_arg = &(arg->func.args); for (i = 0; i < func->nr_args; i++) { farg = alloc_arg(); + if (!farg) { + do_warning("%s: not enough memory!", __func__); + return EVENT_ERROR; + } + type = process_arg(event, farg, &token); if (i < (func->nr_args - 1)) test = ","; @@ -2676,7 +2794,8 @@ process_arg_token(struct event_format *event, struct print_arg *arg, case EVENT_ERROR ... EVENT_NEWLINE: default: - die("unexpected type %d", type); + do_warning("unexpected type %d", type); + return EVENT_ERROR; } *tok = token; @@ -2697,6 +2816,10 @@ static int event_read_print_args(struct event_format *event, struct print_arg ** } arg = alloc_arg(); + if (!arg) { + do_warning("%s: not enough memory!", __func__); + return -1; + } type = process_arg(event, arg, &token); @@ -2768,10 +2891,8 @@ static int event_read_print(struct event_format *event) if (type == EVENT_DQUOTE) { char *cat; - cat = malloc_or_die(strlen(event->print_fmt.format) + - strlen(token) + 1); - strcpy(cat, event->print_fmt.format); - strcat(cat, token); + if (asprintf(&cat, "%s%s", event->print_fmt.format, token) < 0) + goto fail; free_token(token); free_token(event->print_fmt.format); event->print_fmt.format = NULL; @@ -2925,8 +3046,10 @@ static int get_common_info(struct pevent *pevent, * All events should have the same common elements. * Pick any event to find where the type is; */ - if (!pevent->events) - die("no event_list!"); + if (!pevent->events) { + do_warning("no event_list!"); + return -1; + } event = pevent->events[0]; field = pevent_find_common_field(event, type); @@ -3084,7 +3207,8 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg if (!arg->field.field) { arg->field.field = pevent_find_any_field(event, arg->field.name); if (!arg->field.field) - die("field %s not found", arg->field.name); + goto out_warning_field; + } /* must be a number */ val = pevent_read_number(pevent, data + arg->field.field->offset, @@ -3145,8 +3269,10 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg if (!larg->field.field) { larg->field.field = pevent_find_any_field(event, larg->field.name); - if (!larg->field.field) - die("field %s not found", larg->field.name); + if (!larg->field.field) { + arg = larg; + goto out_warning_field; + } } field_size = larg->field.field->elementsize; offset = larg->field.field->offset + @@ -3182,7 +3308,7 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg val = left != right; break; default: - die("unknown op '%s'", arg->op.op); + goto out_warning_op; } break; case '~': @@ -3212,7 +3338,7 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg val = left <= right; break; default: - die("unknown op '%s'", arg->op.op); + goto out_warning_op; } break; case '>': @@ -3227,12 +3353,13 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg val = left >= right; break; default: - die("unknown op '%s'", arg->op.op); + goto out_warning_op; } break; case '=': if (arg->op.op[1] != '=') - die("unknown op '%s'", arg->op.op); + goto out_warning_op; + val = left == right; break; case '-': @@ -3248,13 +3375,21 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg val = left * right; break; default: - die("unknown op '%s'", arg->op.op); + goto out_warning_op; } break; default: /* not sure what to do there */ return 0; } return val; + +out_warning_op: + do_warning("%s: unknown op '%s'", __func__, arg->op.op); + return 0; + +out_warning_field: + do_warning("%s: field %s not found", __func__, arg->field.name); + return 0; } struct flag { @@ -3331,8 +3466,10 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, field = arg->field.field; if (!field) { field = pevent_find_any_field(event, arg->field.name); - if (!field) - die("field %s not found", arg->field.name); + if (!field) { + str = arg->field.name; + goto out_warning_field; + } arg->field.field = field; } /* Zero sized fields, mean the rest of the data */ @@ -3349,7 +3486,11 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, trace_seq_printf(s, "%lx", addr); break; } - str = malloc_or_die(len + 1); + str = malloc(len + 1); + if (!str) { + do_warning("%s: not enough memory!", __func__); + return; + } memcpy(str, data + field->offset, len); str[len] = 0; print_str_to_seq(s, format, len_arg, str); @@ -3389,7 +3530,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, str = arg->hex.field->field.name; field = pevent_find_any_field(event, str); if (!field) - die("field %s not found", str); + goto out_warning_field; arg->hex.field->field.field = field; } hex = data + field->offset; @@ -3441,6 +3582,11 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, /* well... */ break; } + + return; + +out_warning_field: + do_warning("%s: field %s not found", __func__, arg->field.name); } static unsigned long long @@ -3467,7 +3613,11 @@ process_defined_func(struct trace_seq *s, void *data, int size, farg = arg->func.args; param = func_handle->params; - args = malloc_or_die(sizeof(*args) * func_handle->nr_args); + ret = ULLONG_MAX; + args = malloc(sizeof(*args) * func_handle->nr_args); + if (!args) + goto out; + for (i = 0; i < func_handle->nr_args; i++) { switch (param->type) { case PEVENT_FUNC_ARG_INT: @@ -3479,13 +3629,19 @@ process_defined_func(struct trace_seq *s, void *data, int size, trace_seq_init(&str); print_str_arg(&str, data, size, event, "%s", -1, farg); trace_seq_terminate(&str); - string = malloc_or_die(sizeof(*string)); + string = malloc(sizeof(*string)); + if (!string) { + do_warning("%s(%d): malloc str", __func__, __LINE__); + goto out_free; + } string->next = strings; string->str = strdup(str.buffer); - if (!string->str) - die("malloc str"); - - args[i] = (unsigned long long)string->str; + if (!string->str) { + free(string); + do_warning("%s(%d): malloc str", __func__, __LINE__); + goto out_free; + } + args[i] = (uintptr_t)string->str; strings = string; trace_seq_destroy(&str); break; @@ -3494,14 +3650,15 @@ process_defined_func(struct trace_seq *s, void *data, int size, * Something went totally wrong, this is not * an input error, something in this code broke. */ - die("Unexpected end of arguments\n"); - break; + do_warning("Unexpected end of arguments\n"); + goto out_free; } farg = farg->next; param = param->next; } ret = (*func_handle->func)(s, args); +out_free: free(args); while (strings) { string = strings; @@ -3515,6 +3672,18 @@ process_defined_func(struct trace_seq *s, void *data, int size, return ret; } +static void free_args(struct print_arg *args) +{ + struct print_arg *next; + + while (args) { + next = args->next; + + free_arg(args); + args = next; + } +} + static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struct event_format *event) { struct pevent *pevent = event->pevent; @@ -3530,11 +3699,15 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc if (!field) { field = pevent_find_field(event, "buf"); - if (!field) - die("can't find buffer field for binary printk"); + if (!field) { + do_warning("can't find buffer field for binary printk"); + return NULL; + } ip_field = pevent_find_field(event, "ip"); - if (!ip_field) - die("can't find ip field for binary printk"); + if (!ip_field) { + do_warning("can't find ip field for binary printk"); + return NULL; + } pevent->bprint_buf_field = field; pevent->bprint_ip_field = ip_field; } @@ -3545,13 +3718,18 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc * The first arg is the IP pointer. */ args = alloc_arg(); + if (!args) { + do_warning("%s(%d): not enough memory!", __func__, __LINE__); + return NULL; + } arg = args; arg->next = NULL; next = &arg->next; arg->type = PRINT_ATOM; - arg->atom.atom = malloc_or_die(32); - sprintf(arg->atom.atom, "%lld", ip); + + if (asprintf(&arg->atom.atom, "%lld", ip) < 0) + goto out_free; /* skip the first "%pf : " */ for (ptr = fmt + 6, bptr = data + field->offset; @@ -3606,10 +3784,17 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc val = pevent_read_number(pevent, bptr, vsize); bptr += vsize; arg = alloc_arg(); + if (!arg) { + do_warning("%s(%d): not enough memory!", + __func__, __LINE__); + goto out_free; + } arg->next = NULL; arg->type = PRINT_ATOM; - arg->atom.atom = malloc_or_die(32); - sprintf(arg->atom.atom, "%lld", val); + if (asprintf(&arg->atom.atom, "%lld", val) < 0) { + free(arg); + goto out_free; + } *next = arg; next = &arg->next; /* @@ -3622,11 +3807,16 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc break; case 's': arg = alloc_arg(); + if (!arg) { + do_warning("%s(%d): not enough memory!", + __func__, __LINE__); + goto out_free; + } arg->next = NULL; arg->type = PRINT_BSTRING; arg->string.string = strdup(bptr); if (!arg->string.string) - break; + goto out_free; bptr += strlen(bptr) + 1; *next = arg; next = &arg->next; @@ -3637,22 +3827,15 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc } return args; -} -static void free_args(struct print_arg *args) -{ - struct print_arg *next; - - while (args) { - next = args->next; - - free_arg(args); - args = next; - } +out_free: + free_args(args); + return NULL; } static char * -get_bprint_format(void *data, int size __unused, struct event_format *event) +get_bprint_format(void *data, int size __maybe_unused, + struct event_format *event) { struct pevent *pevent = event->pevent; unsigned long long addr; @@ -3665,8 +3848,10 @@ get_bprint_format(void *data, int size __unused, struct event_format *event) if (!field) { field = pevent_find_field(event, "fmt"); - if (!field) - die("can't find format field for binary printk"); + if (!field) { + do_warning("can't find format field for binary printk"); + return NULL; + } pevent->bprint_fmt_field = field; } @@ -3674,9 +3859,8 @@ get_bprint_format(void *data, int size __unused, struct event_format *event) printk = find_printk(pevent, addr); if (!printk) { - format = malloc_or_die(45); - sprintf(format, "%%pf : (NO FORMAT FOUND at %llx)\n", - addr); + if (asprintf(&format, "%%pf : (NO FORMAT FOUND at %llx)\n", addr) < 0) + return NULL; return format; } @@ -3684,8 +3868,8 @@ get_bprint_format(void *data, int size __unused, struct event_format *event) /* Remove any quotes. */ if (*p == '"') p++; - format = malloc_or_die(strlen(p) + 10); - sprintf(format, "%s : %s", "%pf", p); + if (asprintf(&format, "%s : %s", "%pf", p) < 0) + return NULL; /* remove ending quotes and new line since we will add one too */ p = format + strlen(format) - 1; if (*p == '"') @@ -3720,8 +3904,11 @@ static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size, if (!arg->field.field) { arg->field.field = pevent_find_any_field(event, arg->field.name); - if (!arg->field.field) - die("field %s not found", arg->field.name); + if (!arg->field.field) { + do_warning("%s: field %s not found", + __func__, arg->field.name); + return; + } } if (arg->field.field->size != 6) { trace_seq_printf(s, "INVALIDMAC"); @@ -3888,8 +4075,11 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event goto cont_process; case '*': /* The argument is the length. */ - if (!arg) - die("no argument match"); + if (!arg) { + do_warning("no argument match"); + event->flags |= EVENT_FL_FAILED; + goto out_failed; + } len_arg = eval_num_arg(data, size, event, arg); len_as_arg = 1; arg = arg->next; @@ -3922,15 +4112,21 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event case 'x': case 'X': case 'u': - if (!arg) - die("no argument match"); + if (!arg) { + do_warning("no argument match"); + event->flags |= EVENT_FL_FAILED; + goto out_failed; + } len = ((unsigned long)ptr + 1) - (unsigned long)saveptr; /* should never happen */ - if (len > 31) - die("bad format!"); + if (len > 31) { + do_warning("bad format!"); + event->flags |= EVENT_FL_FAILED; + len = 31; + } memcpy(format, saveptr, len); format[len] = 0; @@ -3994,19 +4190,26 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event trace_seq_printf(s, format, (long long)val); break; default: - die("bad count (%d)", ls); + do_warning("bad count (%d)", ls); + event->flags |= EVENT_FL_FAILED; } break; case 's': - if (!arg) - die("no matching argument"); + if (!arg) { + do_warning("no matching argument"); + event->flags |= EVENT_FL_FAILED; + goto out_failed; + } len = ((unsigned long)ptr + 1) - (unsigned long)saveptr; /* should never happen */ - if (len > 31) - die("bad format!"); + if (len > 31) { + do_warning("bad format!"); + event->flags |= EVENT_FL_FAILED; + len = 31; + } memcpy(format, saveptr, len); format[len] = 0; @@ -4024,6 +4227,11 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event trace_seq_putc(s, *ptr); } + if (event->flags & EVENT_FL_FAILED) { +out_failed: + trace_seq_printf(s, "[FAILED TO PARSE]"); + } + if (args) { free_args(args); free(bprint_fmt); @@ -4356,7 +4564,10 @@ get_event_fields(const char *type, const char *name, struct format_field *field; int i = 0; - fields = malloc_or_die(sizeof(*fields) * (count + 1)); + fields = malloc(sizeof(*fields) * (count + 1)); + if (!fields) + return NULL; + for (field = list; field; field = field->next) { fields[i++] = field; if (i == count + 1) { @@ -4672,8 +4883,7 @@ static int find_event_handle(struct pevent *pevent, struct event_format *event) } /** - * pevent_parse_event - parse the event format - * @pevent: the handle to the pevent + * __pevent_parse_format - parse the event format * @buf: the buffer storing the event format string * @size: the size of @buf * @sys: the system the event belongs to @@ -4685,28 +4895,27 @@ static int find_event_handle(struct pevent *pevent, struct event_format *event) * * /sys/kernel/debug/tracing/events/.../.../format */ -int pevent_parse_event(struct pevent *pevent, - const char *buf, unsigned long size, - const char *sys) +enum pevent_errno __pevent_parse_format(struct event_format **eventp, + struct pevent *pevent, const char *buf, + unsigned long size, const char *sys) { struct event_format *event; int ret; init_input_buf(buf, size); - event = alloc_event(); + *eventp = event = alloc_event(); if (!event) - return -ENOMEM; + return PEVENT_ERRNO__MEM_ALLOC_FAILED; event->name = event_read_name(); if (!event->name) { /* Bad event? */ - free(event); - return -1; + ret = PEVENT_ERRNO__MEM_ALLOC_FAILED; + goto event_alloc_failed; } if (strcmp(sys, "ftrace") == 0) { - event->flags |= EVENT_FL_ISFTRACE; if (strcmp(event->name, "bprint") == 0) @@ -4714,74 +4923,189 @@ int pevent_parse_event(struct pevent *pevent, } event->id = event_read_id(); - if (event->id < 0) - die("failed to read event id"); + if (event->id < 0) { + ret = PEVENT_ERRNO__READ_ID_FAILED; + /* + * This isn't an allocation error actually. + * But as the ID is critical, just bail out. + */ + goto event_alloc_failed; + } event->system = strdup(sys); - if (!event->system) - die("failed to allocate system"); - - /* Add pevent to event so that it can be referenced */ - event->pevent = pevent; + if (!event->system) { + ret = PEVENT_ERRNO__MEM_ALLOC_FAILED; + goto event_alloc_failed; + } ret = event_read_format(event); if (ret < 0) { - do_warning("failed to read event format for %s", event->name); - goto event_failed; + ret = PEVENT_ERRNO__READ_FORMAT_FAILED; + goto event_parse_failed; } /* * If the event has an override, don't print warnings if the event * print format fails to parse. */ - if (find_event_handle(pevent, event)) + if (pevent && find_event_handle(pevent, event)) show_warning = 0; ret = event_read_print(event); - if (ret < 0) { - do_warning("failed to read event print fmt for %s", - event->name); - show_warning = 1; - goto event_failed; - } show_warning = 1; - add_event(pevent, event); + if (ret < 0) { + ret = PEVENT_ERRNO__READ_PRINT_FAILED; + goto event_parse_failed; + } if (!ret && (event->flags & EVENT_FL_ISFTRACE)) { struct format_field *field; struct print_arg *arg, **list; /* old ftrace had no args */ - list = &event->print_fmt.args; for (field = event->format.fields; field; field = field->next) { arg = alloc_arg(); - *list = arg; - list = &arg->next; + if (!arg) { + event->flags |= EVENT_FL_FAILED; + return PEVENT_ERRNO__OLD_FTRACE_ARG_FAILED; + } arg->type = PRINT_FIELD; arg->field.name = strdup(field->name); if (!arg->field.name) { - do_warning("failed to allocate field name"); event->flags |= EVENT_FL_FAILED; - return -1; + free_arg(arg); + return PEVENT_ERRNO__OLD_FTRACE_ARG_FAILED; } arg->field.field = field; + *list = arg; + list = &arg->next; } return 0; } + return 0; + + event_parse_failed: + event->flags |= EVENT_FL_FAILED; + return ret; + + event_alloc_failed: + free(event->system); + free(event->name); + free(event); + *eventp = NULL; + return ret; +} + +/** + * pevent_parse_format - parse the event format + * @buf: the buffer storing the event format string + * @size: the size of @buf + * @sys: the system the event belongs to + * + * This parses the event format and creates an event structure + * to quickly parse raw data for a given event. + * + * These files currently come from: + * + * /sys/kernel/debug/tracing/events/.../.../format + */ +enum pevent_errno pevent_parse_format(struct event_format **eventp, const char *buf, + unsigned long size, const char *sys) +{ + return __pevent_parse_format(eventp, NULL, buf, size, sys); +} + +/** + * pevent_parse_event - parse the event format + * @pevent: the handle to the pevent + * @buf: the buffer storing the event format string + * @size: the size of @buf + * @sys: the system the event belongs to + * + * This parses the event format and creates an event structure + * to quickly parse raw data for a given event. + * + * These files currently come from: + * + * /sys/kernel/debug/tracing/events/.../.../format + */ +enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf, + unsigned long size, const char *sys) +{ + struct event_format *event = NULL; + int ret = __pevent_parse_format(&event, pevent, buf, size, sys); + + if (event == NULL) + return ret; + + /* Add pevent to event so that it can be referenced */ + event->pevent = pevent; + + if (add_event(pevent, event)) { + ret = PEVENT_ERRNO__MEM_ALLOC_FAILED; + goto event_add_failed; + } + #define PRINT_ARGS 0 if (PRINT_ARGS && event->print_fmt.args) print_args(event->print_fmt.args); return 0; - event_failed: - event->flags |= EVENT_FL_FAILED; - /* still add it even if it failed */ - add_event(pevent, event); - return -1; +event_add_failed: + pevent_free_format(event); + return ret; +} + +#undef _PE +#define _PE(code, str) str +static const char * const pevent_error_str[] = { + PEVENT_ERRORS +}; +#undef _PE + +int pevent_strerror(struct pevent *pevent, enum pevent_errno errnum, + char *buf, size_t buflen) +{ + int idx; + const char *msg; + + if (errnum >= 0) { + msg = strerror_r(errnum, buf, buflen); + if (msg != buf) { + size_t len = strlen(msg); + memcpy(buf, msg, min(buflen - 1, len)); + *(buf + min(buflen - 1, len)) = '\0'; + } + return 0; + } + + if (errnum <= __PEVENT_ERRNO__START || + errnum >= __PEVENT_ERRNO__END) + return -1; + + idx = errnum - __PEVENT_ERRNO__START - 1; + msg = pevent_error_str[idx]; + + switch (errnum) { + case PEVENT_ERRNO__MEM_ALLOC_FAILED: + case PEVENT_ERRNO__PARSE_EVENT_FAILED: + case PEVENT_ERRNO__READ_ID_FAILED: + case PEVENT_ERRNO__READ_FORMAT_FAILED: + case PEVENT_ERRNO__READ_PRINT_FAILED: + case PEVENT_ERRNO__OLD_FTRACE_ARG_FAILED: + snprintf(buf, buflen, "%s", msg); + break; + + default: + /* cannot reach here */ + break; + } + + return 0; } int get_field_val(struct trace_seq *s, struct format_field *field, @@ -5000,6 +5324,7 @@ int pevent_register_print_function(struct pevent *pevent, struct pevent_func_params *param; enum pevent_func_arg_type type; va_list ap; + int ret; func_handle = find_func_handler(pevent, name); if (func_handle) { @@ -5012,14 +5337,20 @@ int pevent_register_print_function(struct pevent *pevent, remove_func_handler(pevent, name); } - func_handle = malloc_or_die(sizeof(*func_handle)); - memset(func_handle, 0, sizeof(*func_handle)); + func_handle = calloc(1, sizeof(*func_handle)); + if (!func_handle) { + do_warning("Failed to allocate function handler"); + return PEVENT_ERRNO__MEM_ALLOC_FAILED; + } func_handle->ret_type = ret_type; func_handle->name = strdup(name); func_handle->func = func; - if (!func_handle->name) - die("Failed to allocate function name"); + if (!func_handle->name) { + do_warning("Failed to allocate function name"); + free(func_handle); + return PEVENT_ERRNO__MEM_ALLOC_FAILED; + } next_param = &(func_handle->params); va_start(ap, name); @@ -5029,11 +5360,17 @@ int pevent_register_print_function(struct pevent *pevent, break; if (type < 0 || type >= PEVENT_FUNC_ARG_MAX_TYPES) { - warning("Invalid argument type %d", type); + do_warning("Invalid argument type %d", type); + ret = PEVENT_ERRNO__INVALID_ARG_TYPE; goto out_free; } - param = malloc_or_die(sizeof(*param)); + param = malloc(sizeof(*param)); + if (!param) { + do_warning("Failed to allocate function param"); + ret = PEVENT_ERRNO__MEM_ALLOC_FAILED; + goto out_free; + } param->type = type; param->next = NULL; @@ -5051,7 +5388,7 @@ int pevent_register_print_function(struct pevent *pevent, out_free: va_end(ap); free_func_handle(func_handle); - return -1; + return ret; } /** @@ -5103,8 +5440,12 @@ int pevent_register_event_handler(struct pevent *pevent, not_found: /* Save for later use. */ - handle = malloc_or_die(sizeof(*handle)); - memset(handle, 0, sizeof(*handle)); + handle = calloc(1, sizeof(*handle)); + if (!handle) { + do_warning("Failed to allocate event handler"); + return PEVENT_ERRNO__MEM_ALLOC_FAILED; + } + handle->id = id; if (event_name) handle->event_name = strdup(event_name); @@ -5113,7 +5454,11 @@ int pevent_register_event_handler(struct pevent *pevent, if ((event_name && !handle->event_name) || (sys_name && !handle->sys_name)) { - die("Failed to allocate event/sys name"); + do_warning("Failed to allocate event/sys name"); + free((void *)handle->event_name); + free((void *)handle->sys_name); + free(handle); + return PEVENT_ERRNO__MEM_ALLOC_FAILED; } handle->func = func; @@ -5129,13 +5474,10 @@ int pevent_register_event_handler(struct pevent *pevent, */ struct pevent *pevent_alloc(void) { - struct pevent *pevent; + struct pevent *pevent = calloc(1, sizeof(*pevent)); - pevent = malloc(sizeof(*pevent)); - if (!pevent) - return NULL; - memset(pevent, 0, sizeof(*pevent)); - pevent->ref_count = 1; + if (pevent) + pevent->ref_count = 1; return pevent; } @@ -5164,7 +5506,7 @@ static void free_formats(struct format *format) free_format_fields(format->fields); } -static void free_event(struct event_format *event) +void pevent_free_format(struct event_format *event) { free(event->name); free(event->system); @@ -5250,7 +5592,7 @@ void pevent_free(struct pevent *pevent) } for (i = 0; i < pevent->nr_events; i++) - free_event(pevent->events[i]); + pevent_free_format(pevent->events[i]); while (pevent->handlers) { handle = pevent->handlers; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 5772ad8cb38..24a4bbabc5d 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -24,8 +24,8 @@ #include <stdarg.h> #include <regex.h> -#ifndef __unused -#define __unused __attribute__ ((unused)) +#ifndef __maybe_unused +#define __maybe_unused __attribute__((unused)) #endif /* ----------------------- trace_seq ----------------------- */ @@ -49,7 +49,7 @@ struct pevent_record { int cpu; int ref_count; int locked; /* Do not free, even if ref_count is zero */ - void *private; + void *priv; #if DEBUG_RECORD struct pevent_record *prev; struct pevent_record *next; @@ -106,7 +106,7 @@ struct plugin_option { char *plugin_alias; char *description; char *value; - void *private; + void *priv; int set; }; @@ -345,6 +345,35 @@ enum pevent_flag { PEVENT_NSEC_OUTPUT = 1, /* output in NSECS */ }; +#define PEVENT_ERRORS \ + _PE(MEM_ALLOC_FAILED, "failed to allocate memory"), \ + _PE(PARSE_EVENT_FAILED, "failed to parse event"), \ + _PE(READ_ID_FAILED, "failed to read event id"), \ + _PE(READ_FORMAT_FAILED, "failed to read event format"), \ + _PE(READ_PRINT_FAILED, "failed to read event print fmt"), \ + _PE(OLD_FTRACE_ARG_FAILED,"failed to allocate field name for ftrace"),\ + _PE(INVALID_ARG_TYPE, "invalid argument type") + +#undef _PE +#define _PE(__code, __str) PEVENT_ERRNO__ ## __code +enum pevent_errno { + PEVENT_ERRNO__SUCCESS = 0, + + /* + * Choose an arbitrary negative big number not to clash with standard + * errno since SUS requires the errno has distinct positive values. + * See 'Issue 6' in the link below. + * + * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html + */ + __PEVENT_ERRNO__START = -100000, + + PEVENT_ERRORS, + + __PEVENT_ERRNO__END, +}; +#undef _PE + struct cmdline; struct cmdline_list; struct func_map; @@ -509,8 +538,11 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s, int pevent_parse_header_page(struct pevent *pevent, char *buf, unsigned long size, int long_size); -int pevent_parse_event(struct pevent *pevent, const char *buf, - unsigned long size, const char *sys); +enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf, + unsigned long size, const char *sys); +enum pevent_errno pevent_parse_format(struct event_format **eventp, const char *buf, + unsigned long size, const char *sys); +void pevent_free_format(struct event_format *event); void *pevent_get_field_raw(struct trace_seq *s, struct event_format *event, const char *name, struct pevent_record *record, @@ -561,6 +593,8 @@ int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec); const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid); void pevent_event_info(struct trace_seq *s, struct event_format *event, struct pevent_record *record); +int pevent_strerror(struct pevent *pevent, enum pevent_errno errnum, + char *buf, size_t buflen); struct event_format **pevent_list_events(struct pevent *pevent, enum event_sort_type); struct format_field **pevent_event_common_fields(struct event_format *event); diff --git a/tools/lib/traceevent/event-utils.h b/tools/lib/traceevent/event-utils.h index 08296383d1e..bc075006966 100644 --- a/tools/lib/traceevent/event-utils.h +++ b/tools/lib/traceevent/event-utils.h @@ -39,6 +39,12 @@ void __vdie(const char *fmt, ...); void __vwarning(const char *fmt, ...); void __vpr_stat(const char *fmt, ...); +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) + static inline char *strim(char *string) { char *ret; diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 26b823b61aa..8f8fbc227a4 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -21,3 +21,5 @@ config.mak config.mak.autogen *-bison.* *-flex.* +*.pyc +*.pyo diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index ca600e09c8d..9f2e44f2b17 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -195,10 +195,10 @@ install-pdf: pdf #install-html: html # '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) -../PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE - $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) PERF-VERSION-FILE +$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE + $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) $(OUTPUT)PERF-VERSION-FILE --include ../PERF-VERSION-FILE +-include $(OUTPUT)PERF-VERSION-FILE # # Determine "include::" file references in asciidoc files. diff --git a/tools/perf/Documentation/jit-interface.txt b/tools/perf/Documentation/jit-interface.txt new file mode 100644 index 00000000000..a8656f56491 --- /dev/null +++ b/tools/perf/Documentation/jit-interface.txt @@ -0,0 +1,15 @@ +perf supports a simple JIT interface to resolve symbols for dynamic code generated +by a JIT. + +The JIT has to write a /tmp/perf-%d.map (%d = pid of process) file + +This is a text file. + +Each line has the following format, fields separated with spaces: + +START SIZE symbolname + +START and SIZE are hex numbers without 0x. +symbolname is the rest of the line, so it could contain special characters. + +The ownership of the file has to match the process. diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index c89f9e1453f..c8ffd9fd5c6 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -85,6 +85,9 @@ OPTIONS -M:: --disassembler-style=:: Set disassembler style for objdump. +--objdump=<path>:: + Path to objdump binary. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index 74d7481ed7a..ab7f667de1b 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -17,6 +17,9 @@ captured via perf record. If no parameters are passed it will assume perf.data.old and perf.data. +The differential profile is displayed only for events matching both +specified perf.data files. + OPTIONS ------- -M:: diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt index dd84cb2f0a8..326f2cb333c 100644 --- a/tools/perf/Documentation/perf-kvm.txt +++ b/tools/perf/Documentation/perf-kvm.txt @@ -12,7 +12,7 @@ SYNOPSIS [--guestkallsyms=<path> --guestmodules=<path> | --guestvmlinux=<path>]] {top|record|report|diff|buildid-list} 'perf kvm' [--host] [--guest] [--guestkallsyms=<path> --guestmodules=<path> - | --guestvmlinux=<path>] {top|record|report|diff|buildid-list} + | --guestvmlinux=<path>] {top|record|report|diff|buildid-list|stat} DESCRIPTION ----------- @@ -38,6 +38,18 @@ There are a couple of variants of perf kvm: so that other tools can be used to fetch packages with matching symbol tables for use by perf report. + 'perf kvm stat <command>' to run a command and gather performance counter + statistics. + Especially, perf 'kvm stat record/report' generates a statistical analysis + of KVM events. Currently, vmexit, mmio and ioport events are supported. + 'perf kvm stat record <command>' records kvm events and the events between + start and end <command>. + And this command produces a file which contains tracing results of kvm + events. + + 'perf kvm stat report' reports statistical data which includes events + handled time, samples, and so on. + OPTIONS ------- -i:: @@ -68,7 +80,21 @@ OPTIONS --guestvmlinux=<path>:: Guest os kernel vmlinux. +STAT REPORT OPTIONS +------------------- +--vcpu=<value>:: + analyze events which occures on this vcpu. (default: all vcpus) + +--events=<value>:: + events to be analyzed. Possible values: vmexit, mmio, ioport. + (default: vmexit) +-k:: +--key=<value>:: + Sorting key. Possible values: sample (default, sort by samples + number), time (sort by average time). + SEE ALSO -------- linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1], -linkperf:perf-diff[1], linkperf:perf-buildid-list[1] +linkperf:perf-diff[1], linkperf:perf-buildid-list[1], +linkperf:perf-stat[1] diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index ddc22525228..d1e39dc8c81 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -15,24 +15,43 @@ DESCRIPTION This command displays the symbolic event types which can be selected in the various perf commands with the -e option. +[[EVENT_MODIFIERS]] EVENT MODIFIERS --------------- Events can optionally have a modifer by appending a colon and one or -more modifiers. Modifiers allow the user to restrict when events are -counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor. -Additional modifiers are 'G' for guest counting (in KVM guests) and 'H' -for host counting (not in KVM guests). +more modifiers. Modifiers allow the user to restrict the events to be +counted. The following modifiers exist: + + u - user-space counting + k - kernel counting + h - hypervisor counting + G - guest counting (in KVM guests) + H - host counting (not in KVM guests) + p - precise level The 'p' modifier can be used for specifying how precise the instruction -address should be. The 'p' modifier is currently only implemented for -Intel PEBS and can be specified multiple times: - 0 - SAMPLE_IP can have arbitrary skid - 1 - SAMPLE_IP must have constant skid - 2 - SAMPLE_IP requested to have 0 skid - 3 - SAMPLE_IP must have 0 skid +address should be. The 'p' modifier can be specified multiple times: + + 0 - SAMPLE_IP can have arbitrary skid + 1 - SAMPLE_IP must have constant skid + 2 - SAMPLE_IP requested to have 0 skid + 3 - SAMPLE_IP must have 0 skid + +For Intel systems precise event sampling is implemented with PEBS +which supports up to precise-level 2. -The PEBS implementation now supports up to 2. +On AMD systems it is implemented using IBS (up to precise-level 2). +The precise modifier works with event types 0x76 (cpu-cycles, CPU +clocks not halted) and 0xC1 (micro-ops retired). Both events map to +IBS execution sampling (IBS op) with the IBS Op Counter Control bit +(IbsOpCntCtl) set respectively (see AMD64 Architecture Programmer’s +Manual Volume 2: System Programming, 13.3 Instruction-Based +Sampling). Examples to use IBS: + + perf record -a -e cpu-cycles:p ... # use ibs op counting cycles + perf record -a -e r076:p ... # same as -e cpu-cycles:p + perf record -a -e r0C1:p ... # use ibs op counting micro-ops RAW HARDWARE EVENT DESCRIPTOR ----------------------------- @@ -44,6 +63,11 @@ layout of IA32_PERFEVTSELx MSRs (see [Intel® 64 and IA-32 Architectures Softwar of IA32_PERFEVTSELx MSRs) or AMD's PerfEvtSeln (see [AMD64 Architecture Programmer’s Manual Volume 2: System Programming], Page 344, Figure 13-7 Performance Event-Select Register (PerfEvtSeln)). +Note: Only the following bit fields can be set in x86 counter +registers: event, umask, edge, inv, cmask. Esp. guest/host only and +OS/user mode flags must be setup using <<EVENT_MODIFIERS, EVENT +MODIFIERS>>. + Example: If the Intel docs for a QM720 Core i7 describe an event as: @@ -91,4 +115,4 @@ SEE ALSO linkperf:perf-stat[1], linkperf:perf-top[1], linkperf:perf-record[1], http://www.intel.com/Assets/PDF/manual/253669.pdf[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide], -http://support.amd.com/us/Processor_TechDocs/24593.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming] +http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 495210a612c..f4d91bebd59 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -168,6 +168,9 @@ OPTIONS branch stacks and it will automatically switch to the branch view mode, unless --no-branch-stack is used. +--objdump=<path>:: + Path to objdump binary. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-annotate[1] diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt index 3152cca1550..d00bef23134 100644 --- a/tools/perf/Documentation/perf-script-perl.txt +++ b/tools/perf/Documentation/perf-script-perl.txt @@ -116,8 +116,8 @@ search path and 'use'ing a few support modules (see module descriptions below): ---- - use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/perf-script-Util/lib"; - use lib "./perf-script-Util/lib"; + use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; + use lib "./Perf-Trace-Util/lib"; use Perf::Trace::Core; use Perf::Trace::Context; use Perf::Trace::Util; diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt index 47102206911..a4027f221a5 100644 --- a/tools/perf/Documentation/perf-script-python.txt +++ b/tools/perf/Documentation/perf-script-python.txt @@ -129,7 +129,7 @@ import os import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/perf-script-Util/lib/Perf/Trace') + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * @@ -216,7 +216,7 @@ import os import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/perf-script-Util/lib/Perf/Trace') + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * @@ -279,7 +279,7 @@ import os import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/perf-script-Util/lib/Perf/Trace') + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * @@ -391,7 +391,7 @@ drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 . drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 .. drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin -rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-script.py -drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 perf-script-Util +drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util -rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py ---- @@ -518,7 +518,7 @@ descriptions below): import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/perf-script-Util/lib/Perf/Trace') + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt new file mode 100644 index 00000000000..3a2ae37310a --- /dev/null +++ b/tools/perf/Documentation/perf-trace.txt @@ -0,0 +1,53 @@ +perf-trace(1) +============= + +NAME +---- +perf-trace - strace inspired tool + +SYNOPSIS +-------- +[verse] +'perf trace' + +DESCRIPTION +----------- +This command will show the events associated with the target, initially +syscalls, but other system events like pagefaults, task lifetime events, +scheduling events, etc. + +Initially this is a live mode only tool, but eventually will work with +perf.data files like the other tools, allowing a detached 'record' from +analysis phases. + +OPTIONS +------- + +--all-cpus:: + System-wide collection from all CPUs. + +-p:: +--pid=:: + Record events on existing process ID (comma separated list). + +--tid=:: + Record events on existing thread ID (comma separated list). + +--uid=:: + Record events in threads owned by uid. Name or number. + +--no-inherit:: + Child tasks do not inherit counters. + +--mmap-pages=:: + Number of mmap data pages. Must be a power of two. + +--cpu:: +Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a +comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. +In per-thread mode with inheritance mode on (default), Events are captured only when +the thread executes on the designated CPUs. Default is to monitor all CPUs. + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index b4b572e8c10..80db3f4bcf7 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -10,8 +10,12 @@ include/linux/stringify.h lib/rbtree.c include/linux/swab.h arch/*/include/asm/unistd*.h +arch/*/include/asm/perf_regs.h arch/*/lib/memcpy*.S arch/*/lib/memset*.S include/linux/poison.h include/linux/magic.h include/linux/hw_breakpoint.h +arch/x86/include/asm/svm.h +arch/x86/include/asm/vmx.h +arch/x86/include/asm/kvm_host.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 35655c3a7b7..e5e71e7d95a 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -37,7 +37,14 @@ include config/utilities.mak # # Define NO_NEWT if you do not want TUI support. # +# Define NO_GTK2 if you do not want GTK+ GUI support. +# # Define NO_DEMANGLE if you do not want C++ symbol demangling. +# +# Define NO_LIBELF if you do not want libelf dependency (e.g. cross-builds) +# +# Define NO_LIBUNWIND if you do not want libunwind dependency for dwarf +# backtrace post unwind. $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) @@ -50,16 +57,19 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ -e s/sh[234].*/sh/ ) +NO_PERF_REGS := 1 CC = $(CROSS_COMPILE)gcc AR = $(CROSS_COMPILE)ar # Additional ARCH settings for x86 ifeq ($(ARCH),i386) - ARCH := x86 + override ARCH := x86 + NO_PERF_REGS := 0 + LIBUNWIND_LIBS = -lunwind -lunwind-x86 endif ifeq ($(ARCH),x86_64) - ARCH := x86 + override ARCH := x86 IS_X86_64 := 0 ifeq (, $(findstring m32,$(EXTRA_CFLAGS))) IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -xc - | tail -n 1) @@ -69,6 +79,8 @@ ifeq ($(ARCH),x86_64) ARCH_CFLAGS := -DARCH_X86_64 ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S endif + NO_PERF_REGS := 0 + LIBUNWIND_LIBS = -lunwind -lunwind-x86_64 endif # Treat warnings as errors unless directed not to @@ -89,7 +101,7 @@ ifdef PARSER_DEBUG PARSER_DEBUG_CFLAGS := -DPARSER_DEBUG endif -CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS) +CFLAGS = -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS) EXTLIBS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE ALL_LDFLAGS = $(LDFLAGS) @@ -186,10 +198,10 @@ SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) TRACE_EVENT_DIR = ../lib/traceevent/ -ifeq ("$(origin O)", "command line") - TE_PATH=$(OUTPUT)/ +ifneq ($(OUTPUT),) + TE_PATH=$(OUTPUT) else - TE_PATH=$(TRACE_EVENT_DIR)/ + TE_PATH=$(TRACE_EVENT_DIR) endif LIBTRACEEVENT = $(TE_PATH)libtraceevent.a @@ -221,13 +233,13 @@ export PERL_PATH FLEX = flex BISON= bison -$(OUTPUT)util/parse-events-flex.c: util/parse-events.l +$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) -t util/parse-events.l > $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c: util/parse-events.y $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -$(OUTPUT)util/pmu-flex.c: util/pmu.l +$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/pmu-flex.h -t util/pmu.l > $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c: util/pmu.y @@ -252,6 +264,7 @@ LIB_H += util/include/linux/ctype.h LIB_H += util/include/linux/kernel.h LIB_H += util/include/linux/list.h LIB_H += util/include/linux/export.h +LIB_H += util/include/linux/magic.h LIB_H += util/include/linux/poison.h LIB_H += util/include/linux/prefetch.h LIB_H += util/include/linux/rbtree.h @@ -321,6 +334,10 @@ LIB_H += $(TRACE_EVENT_DIR)event-parse.h LIB_H += util/target.h LIB_H += util/rblist.h LIB_H += util/intlist.h +LIB_H += util/perf_regs.h +LIB_H += util/unwind.h +LIB_H += ui/helpline.h +LIB_H += util/vdso.h LIB_OBJS += $(OUTPUT)util/abspath.o LIB_OBJS += $(OUTPUT)util/alias.o @@ -356,6 +373,7 @@ LIB_OBJS += $(OUTPUT)util/usage.o LIB_OBJS += $(OUTPUT)util/wrapper.o LIB_OBJS += $(OUTPUT)util/sigchain.o LIB_OBJS += $(OUTPUT)util/symbol.o +LIB_OBJS += $(OUTPUT)util/symbol-elf.o LIB_OBJS += $(OUTPUT)util/dso-test-data.o LIB_OBJS += $(OUTPUT)util/color.o LIB_OBJS += $(OUTPUT)util/pager.o @@ -387,11 +405,15 @@ LIB_OBJS += $(OUTPUT)util/cgroup.o LIB_OBJS += $(OUTPUT)util/target.o LIB_OBJS += $(OUTPUT)util/rblist.o LIB_OBJS += $(OUTPUT)util/intlist.o +LIB_OBJS += $(OUTPUT)util/vdso.o +LIB_OBJS += $(OUTPUT)util/stat.o -BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o +LIB_OBJS += $(OUTPUT)ui/helpline.o +LIB_OBJS += $(OUTPUT)ui/hist.o +LIB_OBJS += $(OUTPUT)ui/stdio/hist.o +BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o BUILTIN_OBJS += $(OUTPUT)builtin-bench.o - # Benchmark modules BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o @@ -449,34 +471,73 @@ PYRF_OBJS += $(OUTPUT)util/xyarray.o -include config.mak.autogen -include config.mak -ifndef NO_DWARF -FLAGS_DWARF=$(ALL_CFLAGS) -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS) -ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y) - msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); +ifdef NO_LIBELF NO_DWARF := 1 -endif # Dwarf support -endif # NO_DWARF - --include arch/$(ARCH)/Makefile - -ifneq ($(OUTPUT),) - BASIC_CFLAGS += -I$(OUTPUT) -endif - + NO_DEMANGLE := 1 + NO_LIBUNWIND := 1 +else FLAGS_LIBELF=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) ifneq ($(call try-cc,$(SOURCE_LIBELF),$(FLAGS_LIBELF)),y) FLAGS_GLIBC=$(ALL_CFLAGS) $(ALL_LDFLAGS) ifneq ($(call try-cc,$(SOURCE_GLIBC),$(FLAGS_GLIBC)),y) msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static); else - msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel); + NO_LIBELF := 1 + NO_DWARF := 1 + NO_DEMANGLE := 1 endif endif +endif # NO_LIBELF + +ifndef NO_LIBUNWIND +# for linking with debug library, run like: +# make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ +ifdef LIBUNWIND_DIR + LIBUNWIND_CFLAGS := -I$(LIBUNWIND_DIR)/include + LIBUNWIND_LDFLAGS := -L$(LIBUNWIND_DIR)/lib +endif + +FLAGS_UNWIND=$(LIBUNWIND_CFLAGS) $(ALL_CFLAGS) $(LIBUNWIND_LDFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) $(LIBUNWIND_LIBS) +ifneq ($(call try-cc,$(SOURCE_LIBUNWIND),$(FLAGS_UNWIND)),y) + msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 0.99); + NO_LIBUNWIND := 1 +endif # Libunwind support +endif # NO_LIBUNWIND + +-include arch/$(ARCH)/Makefile + +ifneq ($(OUTPUT),) + BASIC_CFLAGS += -I$(OUTPUT) +endif + +ifdef NO_LIBELF +BASIC_CFLAGS += -DNO_LIBELF_SUPPORT + +EXTLIBS := $(filter-out -lelf,$(EXTLIBS)) + +# Remove ELF/DWARF dependent codes +LIB_OBJS := $(filter-out $(OUTPUT)util/symbol-elf.o,$(LIB_OBJS)) +LIB_OBJS := $(filter-out $(OUTPUT)util/dwarf-aux.o,$(LIB_OBJS)) +LIB_OBJS := $(filter-out $(OUTPUT)util/probe-event.o,$(LIB_OBJS)) +LIB_OBJS := $(filter-out $(OUTPUT)util/probe-finder.o,$(LIB_OBJS)) + +BUILTIN_OBJS := $(filter-out $(OUTPUT)builtin-probe.o,$(BUILTIN_OBJS)) + +# Use minimal symbol handling +LIB_OBJS += $(OUTPUT)util/symbol-minimal.o + +else # NO_LIBELF ifneq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_COMMON)),y) BASIC_CFLAGS += -DLIBELF_NO_MMAP endif +FLAGS_DWARF=$(ALL_CFLAGS) -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS) +ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y) + msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); + NO_DWARF := 1 +endif # Dwarf support + ifndef NO_DWARF ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); @@ -487,6 +548,29 @@ else LIB_OBJS += $(OUTPUT)util/dwarf-aux.o endif # PERF_HAVE_DWARF_REGS endif # NO_DWARF +endif # NO_LIBELF + +ifdef NO_LIBUNWIND + BASIC_CFLAGS += -DNO_LIBUNWIND_SUPPORT +else + EXTLIBS += $(LIBUNWIND_LIBS) + BASIC_CFLAGS := $(LIBUNWIND_CFLAGS) $(BASIC_CFLAGS) + BASIC_LDFLAGS := $(LIBUNWIND_LDFLAGS) $(BASIC_LDFLAGS) + LIB_OBJS += $(OUTPUT)util/unwind.o +endif + +ifdef NO_LIBAUDIT + BASIC_CFLAGS += -DNO_LIBAUDIT_SUPPORT +else + FLAGS_LIBAUDIT = $(ALL_CFLAGS) $(ALL_LDFLAGS) -laudit + ifneq ($(call try-cc,$(SOURCE_LIBAUDIT),$(FLAGS_LIBAUDIT)),y) + msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev); + BASIC_CFLAGS += -DNO_LIBAUDIT_SUPPORT + else + BUILTIN_OBJS += $(OUTPUT)builtin-trace.o + EXTLIBS += -laudit + endif +endif ifdef NO_NEWT BASIC_CFLAGS += -DNO_NEWT_SUPPORT @@ -504,14 +588,13 @@ else LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o LIB_OBJS += $(OUTPUT)ui/browsers/hists.o LIB_OBJS += $(OUTPUT)ui/browsers/map.o - LIB_OBJS += $(OUTPUT)ui/helpline.o LIB_OBJS += $(OUTPUT)ui/progress.o LIB_OBJS += $(OUTPUT)ui/util.o LIB_OBJS += $(OUTPUT)ui/tui/setup.o LIB_OBJS += $(OUTPUT)ui/tui/util.o + LIB_OBJS += $(OUTPUT)ui/tui/helpline.o LIB_H += ui/browser.h LIB_H += ui/browsers/map.h - LIB_H += ui/helpline.h LIB_H += ui/keysyms.h LIB_H += ui/libslang.h LIB_H += ui/progress.h @@ -523,7 +606,7 @@ endif ifdef NO_GTK2 BASIC_CFLAGS += -DNO_GTK2_SUPPORT else - FLAGS_GTK2=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) $(shell pkg-config --libs --cflags gtk+-2.0) + FLAGS_GTK2=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) ifneq ($(call try-cc,$(SOURCE_GTK2),$(FLAGS_GTK2)),y) msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev); BASIC_CFLAGS += -DNO_GTK2_SUPPORT @@ -531,11 +614,12 @@ else ifeq ($(call try-cc,$(SOURCE_GTK2_INFOBAR),$(FLAGS_GTK2)),y) BASIC_CFLAGS += -DHAVE_GTK_INFO_BAR endif - BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0) - EXTLIBS += $(shell pkg-config --libs gtk+-2.0) + BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0 2>/dev/null) + EXTLIBS += $(shell pkg-config --libs gtk+-2.0 2>/dev/null) LIB_OBJS += $(OUTPUT)ui/gtk/browser.o LIB_OBJS += $(OUTPUT)ui/gtk/setup.o LIB_OBJS += $(OUTPUT)ui/gtk/util.o + LIB_OBJS += $(OUTPUT)ui/gtk/helpline.o # Make sure that it'd be included only once. ifneq ($(findstring -DNO_NEWT_SUPPORT,$(BASIC_CFLAGS)),) LIB_OBJS += $(OUTPUT)ui/setup.o @@ -644,7 +728,7 @@ else EXTLIBS += -liberty BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE else - FLAGS_BFD=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -lbfd + FLAGS_BFD=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -DPACKAGE='perf' -lbfd has_bfd := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD)) ifeq ($(has_bfd),y) EXTLIBS += -lbfd @@ -674,6 +758,13 @@ else endif endif +ifeq ($(NO_PERF_REGS),0) + ifeq ($(ARCH),x86) + LIB_H += arch/x86/include/perf_regs.h + endif +else + BASIC_CFLAGS += -DNO_PERF_REGS +endif ifdef NO_STRLCPY BASIC_CFLAGS += -DNO_STRLCPY @@ -683,6 +774,14 @@ else endif endif +ifdef NO_BACKTRACE + BASIC_CFLAGS += -DNO_BACKTRACE +else + ifneq ($(call try-cc,$(SOURCE_BACKTRACE),),y) + BASIC_CFLAGS += -DNO_BACKTRACE + endif +endif + ifdef ASCIIDOC8 export ASCIIDOC8 endif @@ -700,6 +799,7 @@ perfexecdir_SQ = $(subst ','\'',$(perfexecdir)) template_dir_SQ = $(subst ','\'',$(template_dir)) htmldir_SQ = $(subst ','\'',$(htmldir)) prefix_SQ = $(subst ','\'',$(prefix)) +sysconfdir_SQ = $(subst ','\'',$(sysconfdir)) SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) @@ -767,10 +867,10 @@ $(OUTPUT)perf.o perf.spec \ # over the general rule for .o $(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -Iutil/ -w $< + $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(ALL_CFLAGS) -w $< $(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -Iutil/ -w $< + $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(ALL_CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $< $(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $< @@ -842,7 +942,10 @@ $(LIB_FILE): $(LIB_OBJS) # libtraceevent.a $(LIBTRACEEVENT): - $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) $(COMMAND_O) libtraceevent.a + $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libtraceevent.a + +$(LIBTRACEEVENT)-clean: + $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) clean help: @echo 'Perf make targets:' @@ -951,6 +1054,8 @@ install: all $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace' $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python' $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d' + $(INSTALL) bash_completion '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf' install-python_ext: $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)' @@ -981,7 +1086,7 @@ quick-install-html: ### Cleaning rules -clean: +clean: $(LIBTRACEEVENT)-clean $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS) $(RM) $(ALL_PROGRAMS) perf $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 744e629797b..815841c04eb 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -2,4 +2,7 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif +ifndef NO_LIBUNWIND +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind.o +endif LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h new file mode 100644 index 00000000000..46fc9f15c6b --- /dev/null +++ b/tools/perf/arch/x86/include/perf_regs.h @@ -0,0 +1,80 @@ +#ifndef ARCH_PERF_REGS_H +#define ARCH_PERF_REGS_H + +#include <stdlib.h> +#include "../../util/types.h" +#include "../../../../../arch/x86/include/asm/perf_regs.h" + +#ifndef ARCH_X86_64 +#define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1) +#else +#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ + (1ULL << PERF_REG_X86_ES) | \ + (1ULL << PERF_REG_X86_FS) | \ + (1ULL << PERF_REG_X86_GS)) +#define PERF_REGS_MASK (((1ULL << PERF_REG_X86_64_MAX) - 1) & ~REG_NOSUPPORT) +#endif +#define PERF_REG_IP PERF_REG_X86_IP +#define PERF_REG_SP PERF_REG_X86_SP + +static inline const char *perf_reg_name(int id) +{ + switch (id) { + case PERF_REG_X86_AX: + return "AX"; + case PERF_REG_X86_BX: + return "BX"; + case PERF_REG_X86_CX: + return "CX"; + case PERF_REG_X86_DX: + return "DX"; + case PERF_REG_X86_SI: + return "SI"; + case PERF_REG_X86_DI: + return "DI"; + case PERF_REG_X86_BP: + return "BP"; + case PERF_REG_X86_SP: + return "SP"; + case PERF_REG_X86_IP: + return "IP"; + case PERF_REG_X86_FLAGS: + return "FLAGS"; + case PERF_REG_X86_CS: + return "CS"; + case PERF_REG_X86_SS: + return "SS"; + case PERF_REG_X86_DS: + return "DS"; + case PERF_REG_X86_ES: + return "ES"; + case PERF_REG_X86_FS: + return "FS"; + case PERF_REG_X86_GS: + return "GS"; +#ifdef ARCH_X86_64 + case PERF_REG_X86_R8: + return "R8"; + case PERF_REG_X86_R9: + return "R9"; + case PERF_REG_X86_R10: + return "R10"; + case PERF_REG_X86_R11: + return "R11"; + case PERF_REG_X86_R12: + return "R12"; + case PERF_REG_X86_R13: + return "R13"; + case PERF_REG_X86_R14: + return "R14"; + case PERF_REG_X86_R15: + return "R15"; +#endif /* ARCH_X86_64 */ + default: + return NULL; + } + + return NULL; +} + +#endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/x86/util/unwind.c b/tools/perf/arch/x86/util/unwind.c new file mode 100644 index 00000000000..78d956eff96 --- /dev/null +++ b/tools/perf/arch/x86/util/unwind.c @@ -0,0 +1,111 @@ + +#include <errno.h> +#include <libunwind.h> +#include "perf_regs.h" +#include "../../util/unwind.h" + +#ifdef ARCH_X86_64 +int unwind__arch_reg_id(int regnum) +{ + int id; + + switch (regnum) { + case UNW_X86_64_RAX: + id = PERF_REG_X86_AX; + break; + case UNW_X86_64_RDX: + id = PERF_REG_X86_DX; + break; + case UNW_X86_64_RCX: + id = PERF_REG_X86_CX; + break; + case UNW_X86_64_RBX: + id = PERF_REG_X86_BX; + break; + case UNW_X86_64_RSI: + id = PERF_REG_X86_SI; + break; + case UNW_X86_64_RDI: + id = PERF_REG_X86_DI; + break; + case UNW_X86_64_RBP: + id = PERF_REG_X86_BP; + break; + case UNW_X86_64_RSP: + id = PERF_REG_X86_SP; + break; + case UNW_X86_64_R8: + id = PERF_REG_X86_R8; + break; + case UNW_X86_64_R9: + id = PERF_REG_X86_R9; + break; + case UNW_X86_64_R10: + id = PERF_REG_X86_R10; + break; + case UNW_X86_64_R11: + id = PERF_REG_X86_R11; + break; + case UNW_X86_64_R12: + id = PERF_REG_X86_R12; + break; + case UNW_X86_64_R13: + id = PERF_REG_X86_R13; + break; + case UNW_X86_64_R14: + id = PERF_REG_X86_R14; + break; + case UNW_X86_64_R15: + id = PERF_REG_X86_R15; + break; + case UNW_X86_64_RIP: + id = PERF_REG_X86_IP; + break; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } + + return id; +} +#else +int unwind__arch_reg_id(int regnum) +{ + int id; + + switch (regnum) { + case UNW_X86_EAX: + id = PERF_REG_X86_AX; + break; + case UNW_X86_EDX: + id = PERF_REG_X86_DX; + break; + case UNW_X86_ECX: + id = PERF_REG_X86_CX; + break; + case UNW_X86_EBX: + id = PERF_REG_X86_BX; + break; + case UNW_X86_ESI: + id = PERF_REG_X86_SI; + break; + case UNW_X86_EDI: + id = PERF_REG_X86_DI; + break; + case UNW_X86_EBP: + id = PERF_REG_X86_BP; + break; + case UNW_X86_ESP: + id = PERF_REG_X86_SP; + break; + case UNW_X86_EIP: + id = PERF_REG_X86_IP; + break; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } + + return id; +} +#endif /* ARCH_X86_64 */ diff --git a/tools/perf/bash_completion b/tools/perf/bash_completion new file mode 100644 index 00000000000..1958fa539d0 --- /dev/null +++ b/tools/perf/bash_completion @@ -0,0 +1,26 @@ +# perf completion + +have perf && +_perf() +{ + local cur cmd + + COMPREPLY=() + _get_comp_words_by_ref cur prev + + cmd=${COMP_WORDS[0]} + + # List perf subcommands + if [ $COMP_CWORD -eq 1 ]; then + cmds=$($cmd --list-cmds) + COMPREPLY=( $( compgen -W '$cmds' -- "$cur" ) ) + # List possible events for -e option + elif [[ $prev == "-e" && "${COMP_WORDS[1]}" == @(record|stat|top) ]]; then + cmds=$($cmd list --raw-dump) + COMPREPLY=( $( compgen -W '$cmds' -- "$cur" ) ) + # Fall down to list regular files + else + _filedir + fi +} && +complete -F _perf perf diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index a09bece6dad..8f89998eeaf 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -3,7 +3,8 @@ extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); -extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); +extern int bench_mem_memcpy(int argc, const char **argv, + const char *prefix __maybe_unused); extern int bench_mem_memset(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 02dad5d3359..93c83e3cb4a 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -177,7 +177,7 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) } while (0) int bench_mem_memcpy(int argc, const char **argv, - const char *prefix __used) + const char *prefix __maybe_unused) { int i; size_t len; diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c index 350cc955726..c6e4bc52349 100644 --- a/tools/perf/bench/mem-memset.c +++ b/tools/perf/bench/mem-memset.c @@ -171,7 +171,7 @@ static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) } while (0) int bench_mem_memset(int argc, const char **argv, - const char *prefix __used) + const char *prefix __maybe_unused) { int i; size_t len; diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index d1d1b30f99c..cc1190a0849 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -267,7 +267,7 @@ static const char * const bench_sched_message_usage[] = { }; int bench_sched_messaging(int argc, const char **argv, - const char *prefix __used) + const char *prefix __maybe_unused) { unsigned int i, total_children; struct timeval start, stop, diff; diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 0c7454f8b8a..69cfba8d4c6 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -43,7 +43,7 @@ static const char * const bench_sched_pipe_usage[] = { }; int bench_sched_pipe(int argc, const char **argv, - const char *prefix __used) + const char *prefix __maybe_unused) { int pipe_1[2], pipe_2[2]; int m = 0, i; @@ -55,14 +55,14 @@ int bench_sched_pipe(int argc, const char **argv, * discarding returned value of read(), write() * causes error in building environment for perf */ - int __used ret, wait_stat; - pid_t pid, retpid; + int __maybe_unused ret, wait_stat; + pid_t pid, retpid __maybe_unused; argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0); - assert(!pipe(pipe_1)); - assert(!pipe(pipe_2)); + BUG_ON(pipe(pipe_1)); + BUG_ON(pipe(pipe_2)); pid = fork(); assert(pid >= 0); diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 67522cf8740..9ea38540b87 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -239,7 +239,7 @@ static const char * const annotate_usage[] = { NULL }; -int cmd_annotate(int argc, const char **argv, const char *prefix __used) +int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) { struct perf_annotate annotate = { .tool = { @@ -282,6 +282,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used) "Display raw encoding of assembly instructions (default)"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), + OPT_STRING(0, "objdump", &objdump_path, "path", + "objdump binary to use for disassembly and annotations"), OPT_END() }; diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 1f310021644..cae9a5fd2ec 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -173,7 +173,7 @@ static void all_subsystem(void) all_suite(&subsystems[i]); } -int cmd_bench(int argc, const char **argv, const char *prefix __used) +int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused) { int i, j, status = 0; diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 29ad20e6791..83654557e10 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -43,15 +43,16 @@ static int build_id_cache__add_file(const char *filename, const char *debugdir) } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__add_s(sbuild_id, debugdir, filename, false); + err = build_id_cache__add_s(sbuild_id, debugdir, filename, + false, false); if (verbose) pr_info("Adding %s %s: %s\n", sbuild_id, filename, err ? "FAIL" : "Ok"); return err; } -static int build_id_cache__remove_file(const char *filename __used, - const char *debugdir __used) +static int build_id_cache__remove_file(const char *filename __maybe_unused, + const char *debugdir __maybe_unused) { u8 build_id[BUILD_ID_SIZE]; char sbuild_id[BUILD_ID_SIZE * 2 + 1]; @@ -119,7 +120,8 @@ static int __cmd_buildid_cache(void) return 0; } -int cmd_buildid_cache(int argc, const char **argv, const char *prefix __used) +int cmd_buildid_cache(int argc, const char **argv, + const char *prefix __maybe_unused) { argc = parse_options(argc, argv, buildid_cache_options, buildid_cache_usage, 0); diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 6b2bcfbde15..1159feeebb1 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -16,8 +16,6 @@ #include "util/session.h" #include "util/symbol.h" -#include <libelf.h> - static const char *input_name; static bool force; static bool show_kernel; @@ -71,7 +69,7 @@ static int perf_session__list_build_ids(void) { struct perf_session *session; - elf_version(EV_CURRENT); + symbol__elf_init(); session = perf_session__new(input_name, O_RDONLY, force, false, &build_id__mark_dso_hit_ops); @@ -105,7 +103,8 @@ static int __cmd_buildid_list(void) return perf_session__list_build_ids(); } -int cmd_buildid_list(int argc, const char **argv, const char *prefix __used) +int cmd_buildid_list(int argc, const char **argv, + const char *prefix __maybe_unused) { argc = parse_options(argc, argv, options, buildid_list_usage, 0); setup_pager(); diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index d29d350fb2b..761f4197a9e 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -10,6 +10,7 @@ #include "util/event.h" #include "util/hist.h" #include "util/evsel.h" +#include "util/evlist.h" #include "util/session.h" #include "util/tool.h" #include "util/sort.h" @@ -24,11 +25,6 @@ static char diff__default_sort_order[] = "dso,symbol"; static bool force; static bool show_displacement; -struct perf_diff { - struct perf_tool tool; - struct perf_session *session; -}; - static int hists__add_entry(struct hists *self, struct addr_location *al, u64 period) { @@ -37,14 +33,12 @@ static int hists__add_entry(struct hists *self, return -ENOMEM; } -static int diff__process_sample_event(struct perf_tool *tool, +static int diff__process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel __used, + struct perf_evsel *evsel, struct machine *machine) { - struct perf_diff *_diff = container_of(tool, struct perf_diff, tool); - struct perf_session *session = _diff->session; struct addr_location al; if (perf_event__preprocess_sample(event, machine, &al, sample, NULL) < 0) { @@ -56,26 +50,24 @@ static int diff__process_sample_event(struct perf_tool *tool, if (al.filtered || al.sym == NULL) return 0; - if (hists__add_entry(&session->hists, &al, sample->period)) { + if (hists__add_entry(&evsel->hists, &al, sample->period)) { pr_warning("problem incrementing symbol period, skipping event\n"); return -1; } - session->hists.stats.total_period += sample->period; + evsel->hists.stats.total_period += sample->period; return 0; } -static struct perf_diff diff = { - .tool = { - .sample = diff__process_sample_event, - .mmap = perf_event__process_mmap, - .comm = perf_event__process_comm, - .exit = perf_event__process_task, - .fork = perf_event__process_task, - .lost = perf_event__process_lost, - .ordered_samples = true, - .ordering_requires_timestamps = true, - }, +static struct perf_tool tool = { + .sample = diff__process_sample_event, + .mmap = perf_event__process_mmap, + .comm = perf_event__process_comm, + .exit = perf_event__process_task, + .fork = perf_event__process_task, + .lost = perf_event__process_lost, + .ordered_samples = true, + .ordering_requires_timestamps = true, }; static void perf_session__insert_hist_entry_by_name(struct rb_root *root, @@ -146,34 +138,71 @@ static void hists__match(struct hists *older, struct hists *newer) } } +static struct perf_evsel *evsel_match(struct perf_evsel *evsel, + struct perf_evlist *evlist) +{ + struct perf_evsel *e; + + list_for_each_entry(e, &evlist->entries, node) + if (perf_evsel__match2(evsel, e)) + return e; + + return NULL; +} + static int __cmd_diff(void) { int ret, i; #define older (session[0]) #define newer (session[1]) struct perf_session *session[2]; + struct perf_evlist *evlist_new, *evlist_old; + struct perf_evsel *evsel; + bool first = true; older = perf_session__new(input_old, O_RDONLY, force, false, - &diff.tool); + &tool); newer = perf_session__new(input_new, O_RDONLY, force, false, - &diff.tool); + &tool); if (session[0] == NULL || session[1] == NULL) return -ENOMEM; for (i = 0; i < 2; ++i) { - diff.session = session[i]; - ret = perf_session__process_events(session[i], &diff.tool); + ret = perf_session__process_events(session[i], &tool); if (ret) goto out_delete; - hists__output_resort(&session[i]->hists); } - if (show_displacement) - hists__resort_entries(&older->hists); + evlist_old = older->evlist; + evlist_new = newer->evlist; + + list_for_each_entry(evsel, &evlist_new->entries, node) + hists__output_resort(&evsel->hists); + + list_for_each_entry(evsel, &evlist_old->entries, node) { + hists__output_resort(&evsel->hists); + + if (show_displacement) + hists__resort_entries(&evsel->hists); + } + + list_for_each_entry(evsel, &evlist_new->entries, node) { + struct perf_evsel *evsel_old; + + evsel_old = evsel_match(evsel, evlist_old); + if (!evsel_old) + continue; + + fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n", + perf_evsel__name(evsel)); + + first = false; + + hists__match(&evsel_old->hists, &evsel->hists); + hists__fprintf(&evsel->hists, &evsel_old->hists, + show_displacement, true, 0, 0, stdout); + } - hists__match(&older->hists, &newer->hists); - hists__fprintf(&newer->hists, &older->hists, - show_displacement, true, 0, 0, stdout); out_delete: for (i = 0; i < 2; ++i) perf_session__delete(session[i]); @@ -213,7 +242,7 @@ static const struct option options[] = { OPT_END() }; -int cmd_diff(int argc, const char **argv, const char *prefix __used) +int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused) { sort_order = diff__default_sort_order; argc = parse_options(argc, argv, options, diff_usage, 0); @@ -235,6 +264,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix __used) if (symbol__init() < 0) return -1; + perf_hpp__init(true, show_displacement); setup_sorting(diff_usage, options); setup_pager(); diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 0dd5a058f76..1fb164164fd 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -113,7 +113,7 @@ static const char * const evlist_usage[] = { NULL }; -int cmd_evlist(int argc, const char **argv, const char *prefix __used) +int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused) { struct perf_attr_details details = { .verbose = false, }; const char *input_name = NULL; diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 6d5a8a7faf4..25c8b942ff8 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -24,13 +24,14 @@ static struct man_viewer_info_list { } *man_viewer_info_list; enum help_format { + HELP_FORMAT_NONE, HELP_FORMAT_MAN, HELP_FORMAT_INFO, HELP_FORMAT_WEB, }; static bool show_all = false; -static enum help_format help_format = HELP_FORMAT_MAN; +static enum help_format help_format = HELP_FORMAT_NONE; static struct option builtin_help_options[] = { OPT_BOOLEAN('a', "all", &show_all, "print all available commands"), OPT_SET_UINT('m', "man", &help_format, "show man page", HELP_FORMAT_MAN), @@ -54,7 +55,9 @@ static enum help_format parse_help_format(const char *format) return HELP_FORMAT_INFO; if (!strcmp(format, "web") || !strcmp(format, "html")) return HELP_FORMAT_WEB; - die("unrecognized help format '%s'", format); + + pr_err("unrecognized help format '%s'", format); + return HELP_FORMAT_NONE; } static const char *get_man_viewer_info(const char *name) @@ -259,6 +262,8 @@ static int perf_help_config(const char *var, const char *value, void *cb) if (!value) return config_error_nonbool(var); help_format = parse_help_format(value); + if (help_format == HELP_FORMAT_NONE) + return -1; return 0; } if (!strcmp(var, "man.viewer")) { @@ -352,7 +357,7 @@ static void exec_viewer(const char *name, const char *page) warning("'%s': unknown man viewer.", name); } -static void show_man_page(const char *perf_cmd) +static int show_man_page(const char *perf_cmd) { struct man_viewer_list *viewer; const char *page = cmd_to_page(perf_cmd); @@ -365,28 +370,35 @@ static void show_man_page(const char *perf_cmd) if (fallback) exec_viewer(fallback, page); exec_viewer("man", page); - die("no man viewer handled the request"); + + pr_err("no man viewer handled the request"); + return -1; } -static void show_info_page(const char *perf_cmd) +static int show_info_page(const char *perf_cmd) { const char *page = cmd_to_page(perf_cmd); setenv("INFOPATH", system_path(PERF_INFO_PATH), 1); execlp("info", "info", "perfman", page, NULL); + return -1; } -static void get_html_page_path(struct strbuf *page_path, const char *page) +static int get_html_page_path(struct strbuf *page_path, const char *page) { struct stat st; const char *html_path = system_path(PERF_HTML_PATH); /* Check that we have a perf documentation directory. */ if (stat(mkpath("%s/perf.html", html_path), &st) - || !S_ISREG(st.st_mode)) - die("'%s': not a documentation directory.", html_path); + || !S_ISREG(st.st_mode)) { + pr_err("'%s': not a documentation directory.", html_path); + return -1; + } strbuf_init(page_path, 0); strbuf_addf(page_path, "%s/%s.html", html_path, page); + + return 0; } /* @@ -401,19 +413,23 @@ static void open_html(const char *path) } #endif -static void show_html_page(const char *perf_cmd) +static int show_html_page(const char *perf_cmd) { const char *page = cmd_to_page(perf_cmd); struct strbuf page_path; /* it leaks but we exec bellow */ - get_html_page_path(&page_path, page); + if (get_html_page_path(&page_path, page) != 0) + return -1; open_html(page_path.buf); + + return 0; } -int cmd_help(int argc, const char **argv, const char *prefix __used) +int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused) { const char *alias; + int rc = 0; load_command_list("perf-", &main_cmds, &other_cmds); @@ -444,16 +460,20 @@ int cmd_help(int argc, const char **argv, const char *prefix __used) switch (help_format) { case HELP_FORMAT_MAN: - show_man_page(argv[0]); + rc = show_man_page(argv[0]); break; case HELP_FORMAT_INFO: - show_info_page(argv[0]); + rc = show_info_page(argv[0]); break; case HELP_FORMAT_WEB: - show_html_page(argv[0]); + rc = show_html_page(argv[0]); + break; + case HELP_FORMAT_NONE: + /* fall-through */ default: + rc = -1; break; } - return 0; + return rc; } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 3beab489afc..1eaa6617c81 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -17,9 +17,9 @@ static char const *input_name = "-"; static bool inject_build_ids; -static int perf_event__repipe_synth(struct perf_tool *tool __used, +static int perf_event__repipe_synth(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct machine *machine __used) + struct machine *machine __maybe_unused) { uint32_t size; void *buf = event; @@ -40,7 +40,8 @@ static int perf_event__repipe_synth(struct perf_tool *tool __used, static int perf_event__repipe_op2_synth(struct perf_tool *tool, union perf_event *event, - struct perf_session *session __used) + struct perf_session *session + __maybe_unused) { return perf_event__repipe_synth(tool, event, NULL); } @@ -52,13 +53,14 @@ static int perf_event__repipe_event_type_synth(struct perf_tool *tool, } static int perf_event__repipe_tracing_data_synth(union perf_event *event, - struct perf_session *session __used) + struct perf_session *session + __maybe_unused) { return perf_event__repipe_synth(NULL, event, NULL); } static int perf_event__repipe_attr(union perf_event *event, - struct perf_evlist **pevlist __used) + struct perf_evlist **pevlist __maybe_unused) { int ret; ret = perf_event__process_attr(event, pevlist); @@ -70,7 +72,7 @@ static int perf_event__repipe_attr(union perf_event *event, static int perf_event__repipe(struct perf_tool *tool, union perf_event *event, - struct perf_sample *sample __used, + struct perf_sample *sample __maybe_unused, struct machine *machine) { return perf_event__repipe_synth(tool, event, machine); @@ -78,8 +80,8 @@ static int perf_event__repipe(struct perf_tool *tool, static int perf_event__repipe_sample(struct perf_tool *tool, union perf_event *event, - struct perf_sample *sample __used, - struct perf_evsel *evsel __used, + struct perf_sample *sample __maybe_unused, + struct perf_evsel *evsel __maybe_unused, struct machine *machine) { return perf_event__repipe_synth(tool, event, machine); @@ -163,7 +165,7 @@ static int dso__inject_build_id(struct dso *self, struct perf_tool *tool, static int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel __used, + struct perf_evsel *evsel __maybe_unused, struct machine *machine) { struct addr_location al; @@ -191,10 +193,13 @@ static int perf_event__inject_buildid(struct perf_tool *tool, * If this fails, too bad, let the other side * account this as unresolved. */ - } else + } else { +#ifndef NO_LIBELF_SUPPORT pr_warning("no symbols found in %s, maybe " "install a debug package?\n", al.map->dso->long_name); +#endif + } } } @@ -221,7 +226,7 @@ struct perf_tool perf_inject = { extern volatile int session_done; -static void sig_handler(int sig __attribute__((__unused__))) +static void sig_handler(int sig __maybe_unused) { session_done = 1; } @@ -264,7 +269,7 @@ static const struct option options[] = { OPT_END() }; -int cmd_inject(int argc, const char **argv, const char *prefix __used) +int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) { argc = parse_options(argc, argv, options, report_usage, 0); diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index ce35015f2dc..bc912c68f49 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1,6 +1,8 @@ #include "builtin.h" #include "perf.h" +#include "util/evlist.h" +#include "util/evsel.h" #include "util/util.h" #include "util/cache.h" #include "util/symbol.h" @@ -57,46 +59,52 @@ static unsigned long nr_allocs, nr_cross_allocs; #define PATH_SYS_NODE "/sys/devices/system/node" -struct perf_kmem { - struct perf_tool tool; - struct perf_session *session; -}; - -static void init_cpunode_map(void) +static int init_cpunode_map(void) { FILE *fp; - int i; + int i, err = -1; fp = fopen("/sys/devices/system/cpu/kernel_max", "r"); if (!fp) { max_cpu_num = 4096; - return; + return 0; + } + + if (fscanf(fp, "%d", &max_cpu_num) < 1) { + pr_err("Failed to read 'kernel_max' from sysfs"); + goto out_close; } - if (fscanf(fp, "%d", &max_cpu_num) < 1) - die("Failed to read 'kernel_max' from sysfs"); max_cpu_num++; cpunode_map = calloc(max_cpu_num, sizeof(int)); - if (!cpunode_map) - die("calloc"); + if (!cpunode_map) { + pr_err("%s: calloc failed\n", __func__); + goto out_close; + } + for (i = 0; i < max_cpu_num; i++) cpunode_map[i] = -1; + + err = 0; +out_close: fclose(fp); + return err; } -static void setup_cpunode_map(void) +static int setup_cpunode_map(void) { struct dirent *dent1, *dent2; DIR *dir1, *dir2; unsigned int cpu, mem; char buf[PATH_MAX]; - init_cpunode_map(); + if (init_cpunode_map()) + return -1; dir1 = opendir(PATH_SYS_NODE); if (!dir1) - return; + return -1; while ((dent1 = readdir(dir1)) != NULL) { if (dent1->d_type != DT_DIR || @@ -116,10 +124,11 @@ static void setup_cpunode_map(void) closedir(dir2); } closedir(dir1); + return 0; } -static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, - int bytes_req, int bytes_alloc, int cpu) +static int insert_alloc_stat(unsigned long call_site, unsigned long ptr, + int bytes_req, int bytes_alloc, int cpu) { struct rb_node **node = &root_alloc_stat.rb_node; struct rb_node *parent = NULL; @@ -143,8 +152,10 @@ static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, data->bytes_alloc += bytes_alloc; } else { data = malloc(sizeof(*data)); - if (!data) - die("malloc"); + if (!data) { + pr_err("%s: malloc failed\n", __func__); + return -1; + } data->ptr = ptr; data->pingpong = 0; data->hit = 1; @@ -156,9 +167,10 @@ static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, } data->call_site = call_site; data->alloc_cpu = cpu; + return 0; } -static void insert_caller_stat(unsigned long call_site, +static int insert_caller_stat(unsigned long call_site, int bytes_req, int bytes_alloc) { struct rb_node **node = &root_caller_stat.rb_node; @@ -183,8 +195,10 @@ static void insert_caller_stat(unsigned long call_site, data->bytes_alloc += bytes_alloc; } else { data = malloc(sizeof(*data)); - if (!data) - die("malloc"); + if (!data) { + pr_err("%s: malloc failed\n", __func__); + return -1; + } data->call_site = call_site; data->pingpong = 0; data->hit = 1; @@ -194,39 +208,43 @@ static void insert_caller_stat(unsigned long call_site, rb_link_node(&data->node, parent, node); rb_insert_color(&data->node, &root_caller_stat); } + + return 0; } -static void process_alloc_event(void *data, - struct event_format *event, - int cpu, - u64 timestamp __used, - struct thread *thread __used, - int node) +static int perf_evsel__process_alloc_event(struct perf_evsel *evsel, + struct perf_sample *sample) { - unsigned long call_site; - unsigned long ptr; - int bytes_req; - int bytes_alloc; - int node1, node2; - - ptr = raw_field_value(event, "ptr", data); - call_site = raw_field_value(event, "call_site", data); - bytes_req = raw_field_value(event, "bytes_req", data); - bytes_alloc = raw_field_value(event, "bytes_alloc", data); + unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"), + call_site = perf_evsel__intval(evsel, sample, "call_site"); + int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"), + bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc"); - insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu); - insert_caller_stat(call_site, bytes_req, bytes_alloc); + if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) || + insert_caller_stat(call_site, bytes_req, bytes_alloc)) + return -1; total_requested += bytes_req; total_allocated += bytes_alloc; - if (node) { - node1 = cpunode_map[cpu]; - node2 = raw_field_value(event, "node", data); + nr_allocs++; + return 0; +} + +static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel, + struct perf_sample *sample) +{ + int ret = perf_evsel__process_alloc_event(evsel, sample); + + if (!ret) { + int node1 = cpunode_map[sample->cpu], + node2 = perf_evsel__intval(evsel, sample, "node"); + if (node1 != node2) nr_cross_allocs++; } - nr_allocs++; + + return ret; } static int ptr_cmp(struct alloc_stat *, struct alloc_stat *); @@ -257,66 +275,37 @@ static struct alloc_stat *search_alloc_stat(unsigned long ptr, return NULL; } -static void process_free_event(void *data, - struct event_format *event, - int cpu, - u64 timestamp __used, - struct thread *thread __used) +static int perf_evsel__process_free_event(struct perf_evsel *evsel, + struct perf_sample *sample) { - unsigned long ptr; + unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"); struct alloc_stat *s_alloc, *s_caller; - ptr = raw_field_value(event, "ptr", data); - s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); if (!s_alloc) - return; + return 0; - if (cpu != s_alloc->alloc_cpu) { + if ((short)sample->cpu != s_alloc->alloc_cpu) { s_alloc->pingpong++; s_caller = search_alloc_stat(0, s_alloc->call_site, &root_caller_stat, callsite_cmp); - assert(s_caller); + if (!s_caller) + return -1; s_caller->pingpong++; } s_alloc->alloc_cpu = -1; -} -static void process_raw_event(struct perf_tool *tool, - union perf_event *raw_event __used, void *data, - int cpu, u64 timestamp, struct thread *thread) -{ - struct perf_kmem *kmem = container_of(tool, struct perf_kmem, tool); - struct event_format *event; - int type; - - type = trace_parse_common_type(kmem->session->pevent, data); - event = pevent_find_event(kmem->session->pevent, type); - - if (!strcmp(event->name, "kmalloc") || - !strcmp(event->name, "kmem_cache_alloc")) { - process_alloc_event(data, event, cpu, timestamp, thread, 0); - return; - } - - if (!strcmp(event->name, "kmalloc_node") || - !strcmp(event->name, "kmem_cache_alloc_node")) { - process_alloc_event(data, event, cpu, timestamp, thread, 1); - return; - } - - if (!strcmp(event->name, "kfree") || - !strcmp(event->name, "kmem_cache_free")) { - process_free_event(data, event, cpu, timestamp, thread); - return; - } + return 0; } -static int process_sample_event(struct perf_tool *tool, +typedef int (*tracepoint_handler)(struct perf_evsel *evsel, + struct perf_sample *sample); + +static int process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel __used, + struct perf_evsel *evsel, struct machine *machine) { struct thread *thread = machine__findnew_thread(machine, event->ip.pid); @@ -329,18 +318,18 @@ static int process_sample_event(struct perf_tool *tool, dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - process_raw_event(tool, event, sample->raw_data, sample->cpu, - sample->time, thread); + if (evsel->handler.func != NULL) { + tracepoint_handler f = evsel->handler.func; + return f(evsel, sample); + } return 0; } -static struct perf_kmem perf_kmem = { - .tool = { - .sample = process_sample_event, - .comm = perf_event__process_comm, - .ordered_samples = true, - }, +static struct perf_tool perf_kmem = { + .sample = process_sample_event, + .comm = perf_event__process_comm, + .ordered_samples = true, }; static double fragmentation(unsigned long n_req, unsigned long n_alloc) @@ -496,22 +485,32 @@ static int __cmd_kmem(void) { int err = -EINVAL; struct perf_session *session; - - session = perf_session__new(input_name, O_RDONLY, 0, false, - &perf_kmem.tool); + const struct perf_evsel_str_handler kmem_tracepoints[] = { + { "kmem:kmalloc", perf_evsel__process_alloc_event, }, + { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, }, + { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, }, + { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, }, + { "kmem:kfree", perf_evsel__process_free_event, }, + { "kmem:kmem_cache_free", perf_evsel__process_free_event, }, + }; + + session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_kmem); if (session == NULL) return -ENOMEM; - perf_kmem.session = session; - if (perf_session__create_kernel_maps(session) < 0) goto out_delete; if (!perf_session__has_traces(session, "kmem record")) goto out_delete; + if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) { + pr_err("Initializing perf session tracepoint handlers failed\n"); + return -1; + } + setup_pager(); - err = perf_session__process_events(session, &perf_kmem.tool); + err = perf_session__process_events(session, &perf_kmem); if (err != 0) goto out_delete; sort_result(); @@ -635,8 +634,10 @@ static int sort_dimension__add(const char *tok, struct list_head *list) for (i = 0; i < NUM_AVAIL_SORTS; i++) { if (!strcmp(avail_sorts[i]->name, tok)) { sort = malloc(sizeof(*sort)); - if (!sort) - die("malloc"); + if (!sort) { + pr_err("%s: malloc failed\n", __func__); + return -1; + } memcpy(sort, avail_sorts[i], sizeof(*sort)); list_add_tail(&sort->list, list); return 0; @@ -651,8 +652,10 @@ static int setup_sorting(struct list_head *sort_list, const char *arg) char *tok; char *str = strdup(arg); - if (!str) - die("strdup"); + if (!str) { + pr_err("%s: strdup failed\n", __func__); + return -1; + } while (true) { tok = strsep(&str, ","); @@ -669,8 +672,8 @@ static int setup_sorting(struct list_head *sort_list, const char *arg) return 0; } -static int parse_sort_opt(const struct option *opt __used, - const char *arg, int unset __used) +static int parse_sort_opt(const struct option *opt __maybe_unused, + const char *arg, int unset __maybe_unused) { if (!arg) return -1; @@ -683,22 +686,24 @@ static int parse_sort_opt(const struct option *opt __used, return 0; } -static int parse_caller_opt(const struct option *opt __used, - const char *arg __used, int unset __used) +static int parse_caller_opt(const struct option *opt __maybe_unused, + const char *arg __maybe_unused, + int unset __maybe_unused) { caller_flag = (alloc_flag + 1); return 0; } -static int parse_alloc_opt(const struct option *opt __used, - const char *arg __used, int unset __used) +static int parse_alloc_opt(const struct option *opt __maybe_unused, + const char *arg __maybe_unused, + int unset __maybe_unused) { alloc_flag = (caller_flag + 1); return 0; } -static int parse_line_opt(const struct option *opt __used, - const char *arg, int unset __used) +static int parse_line_opt(const struct option *opt __maybe_unused, + const char *arg, int unset __maybe_unused) { int lines; @@ -768,7 +773,7 @@ static int __cmd_record(int argc, const char **argv) return cmd_record(i, rec_argv, NULL); } -int cmd_kmem(int argc, const char **argv, const char *prefix __used) +int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) { argc = parse_options(argc, argv, kmem_options, kmem_usage, 0); @@ -780,7 +785,8 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __used) if (!strncmp(argv[0], "rec", 3)) { return __cmd_record(argc, argv); } else if (!strcmp(argv[0], "stat")) { - setup_cpunode_map(); + if (setup_cpunode_map()) + return -1; if (list_empty(&caller_sort)) setup_sorting(&caller_sort, default_sort_order); diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 9fc6e0fa3dc..a28c9cad904 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1,6 +1,7 @@ #include "builtin.h" #include "perf.h" +#include "util/evsel.h" #include "util/util.h" #include "util/cache.h" #include "util/symbol.h" @@ -10,8 +11,10 @@ #include "util/parse-options.h" #include "util/trace-event.h" - #include "util/debug.h" +#include "util/debugfs.h" +#include "util/tool.h" +#include "util/stat.h" #include <sys/prctl.h> @@ -19,11 +22,836 @@ #include <pthread.h> #include <math.h> -static const char *file_name; +#include "../../arch/x86/include/asm/svm.h" +#include "../../arch/x86/include/asm/vmx.h" +#include "../../arch/x86/include/asm/kvm.h" + +struct event_key { + #define INVALID_KEY (~0ULL) + u64 key; + int info; +}; + +struct kvm_events_ops { + bool (*is_begin_event)(struct perf_evsel *evsel, + struct perf_sample *sample, + struct event_key *key); + bool (*is_end_event)(struct perf_evsel *evsel, + struct perf_sample *sample, struct event_key *key); + void (*decode_key)(struct event_key *key, char decode[20]); + const char *name; +}; + +static void exit_event_get_key(struct perf_evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + key->info = 0; + key->key = perf_evsel__intval(evsel, sample, "exit_reason"); +} + +static bool kvm_exit_event(struct perf_evsel *evsel) +{ + return !strcmp(evsel->name, "kvm:kvm_exit"); +} + +static bool exit_event_begin(struct perf_evsel *evsel, + struct perf_sample *sample, struct event_key *key) +{ + if (kvm_exit_event(evsel)) { + exit_event_get_key(evsel, sample, key); + return true; + } + + return false; +} + +static bool kvm_entry_event(struct perf_evsel *evsel) +{ + return !strcmp(evsel->name, "kvm:kvm_entry"); +} + +static bool exit_event_end(struct perf_evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + return kvm_entry_event(evsel); +} + +struct exit_reasons_table { + unsigned long exit_code; + const char *reason; +}; + +struct exit_reasons_table vmx_exit_reasons[] = { + VMX_EXIT_REASONS +}; + +struct exit_reasons_table svm_exit_reasons[] = { + SVM_EXIT_REASONS +}; + +static int cpu_isa; + +static const char *get_exit_reason(u64 exit_code) +{ + int table_size = ARRAY_SIZE(svm_exit_reasons); + struct exit_reasons_table *table = svm_exit_reasons; + + if (cpu_isa == 1) { + table = vmx_exit_reasons; + table_size = ARRAY_SIZE(vmx_exit_reasons); + } + + while (table_size--) { + if (table->exit_code == exit_code) + return table->reason; + table++; + } + + pr_err("unknown kvm exit code:%lld on %s\n", + (unsigned long long)exit_code, cpu_isa ? "VMX" : "SVM"); + return "UNKNOWN"; +} + +static void exit_event_decode_key(struct event_key *key, char decode[20]) +{ + const char *exit_reason = get_exit_reason(key->key); + + scnprintf(decode, 20, "%s", exit_reason); +} + +static struct kvm_events_ops exit_events = { + .is_begin_event = exit_event_begin, + .is_end_event = exit_event_end, + .decode_key = exit_event_decode_key, + .name = "VM-EXIT" +}; + + /* + * For the mmio events, we treat: + * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry + * the time of MMIO read: kvm_exit -> kvm_mmio(KVM_TRACE_MMIO_READ...). + */ +static void mmio_event_get_key(struct perf_evsel *evsel, struct perf_sample *sample, + struct event_key *key) +{ + key->key = perf_evsel__intval(evsel, sample, "gpa"); + key->info = perf_evsel__intval(evsel, sample, "type"); +} + +#define KVM_TRACE_MMIO_READ_UNSATISFIED 0 +#define KVM_TRACE_MMIO_READ 1 +#define KVM_TRACE_MMIO_WRITE 2 + +static bool mmio_event_begin(struct perf_evsel *evsel, + struct perf_sample *sample, struct event_key *key) +{ + /* MMIO read begin event in kernel. */ + if (kvm_exit_event(evsel)) + return true; + + /* MMIO write begin event in kernel. */ + if (!strcmp(evsel->name, "kvm:kvm_mmio") && + perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) { + mmio_event_get_key(evsel, sample, key); + return true; + } + + return false; +} + +static bool mmio_event_end(struct perf_evsel *evsel, struct perf_sample *sample, + struct event_key *key) +{ + /* MMIO write end event in kernel. */ + if (kvm_entry_event(evsel)) + return true; + + /* MMIO read end event in kernel.*/ + if (!strcmp(evsel->name, "kvm:kvm_mmio") && + perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) { + mmio_event_get_key(evsel, sample, key); + return true; + } + + return false; +} + +static void mmio_event_decode_key(struct event_key *key, char decode[20]) +{ + scnprintf(decode, 20, "%#lx:%s", (unsigned long)key->key, + key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R"); +} + +static struct kvm_events_ops mmio_events = { + .is_begin_event = mmio_event_begin, + .is_end_event = mmio_event_end, + .decode_key = mmio_event_decode_key, + .name = "MMIO Access" +}; + + /* The time of emulation pio access is from kvm_pio to kvm_entry. */ +static void ioport_event_get_key(struct perf_evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + key->key = perf_evsel__intval(evsel, sample, "port"); + key->info = perf_evsel__intval(evsel, sample, "rw"); +} + +static bool ioport_event_begin(struct perf_evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + if (!strcmp(evsel->name, "kvm:kvm_pio")) { + ioport_event_get_key(evsel, sample, key); + return true; + } + + return false; +} + +static bool ioport_event_end(struct perf_evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + return kvm_entry_event(evsel); +} + +static void ioport_event_decode_key(struct event_key *key, char decode[20]) +{ + scnprintf(decode, 20, "%#llx:%s", (unsigned long long)key->key, + key->info ? "POUT" : "PIN"); +} + +static struct kvm_events_ops ioport_events = { + .is_begin_event = ioport_event_begin, + .is_end_event = ioport_event_end, + .decode_key = ioport_event_decode_key, + .name = "IO Port Access" +}; + +static const char *report_event = "vmexit"; +struct kvm_events_ops *events_ops; + +static bool register_kvm_events_ops(void) +{ + bool ret = true; + + if (!strcmp(report_event, "vmexit")) + events_ops = &exit_events; + else if (!strcmp(report_event, "mmio")) + events_ops = &mmio_events; + else if (!strcmp(report_event, "ioport")) + events_ops = &ioport_events; + else { + pr_err("Unknown report event:%s\n", report_event); + ret = false; + } + + return ret; +} + +struct kvm_event_stats { + u64 time; + struct stats stats; +}; + +struct kvm_event { + struct list_head hash_entry; + struct rb_node rb; + + struct event_key key; + + struct kvm_event_stats total; + + #define DEFAULT_VCPU_NUM 8 + int max_vcpu; + struct kvm_event_stats *vcpu; +}; + +struct vcpu_event_record { + int vcpu_id; + u64 start_time; + struct kvm_event *last_event; +}; + +#define EVENTS_BITS 12 +#define EVENTS_CACHE_SIZE (1UL << EVENTS_BITS) + +static u64 total_time; +static u64 total_count; +static struct list_head kvm_events_cache[EVENTS_CACHE_SIZE]; + +static void init_kvm_event_record(void) +{ + int i; + + for (i = 0; i < (int)EVENTS_CACHE_SIZE; i++) + INIT_LIST_HEAD(&kvm_events_cache[i]); +} + +static int kvm_events_hash_fn(u64 key) +{ + return key & (EVENTS_CACHE_SIZE - 1); +} + +static bool kvm_event_expand(struct kvm_event *event, int vcpu_id) +{ + int old_max_vcpu = event->max_vcpu; + + if (vcpu_id < event->max_vcpu) + return true; + + while (event->max_vcpu <= vcpu_id) + event->max_vcpu += DEFAULT_VCPU_NUM; + + event->vcpu = realloc(event->vcpu, + event->max_vcpu * sizeof(*event->vcpu)); + if (!event->vcpu) { + pr_err("Not enough memory\n"); + return false; + } + + memset(event->vcpu + old_max_vcpu, 0, + (event->max_vcpu - old_max_vcpu) * sizeof(*event->vcpu)); + return true; +} + +static struct kvm_event *kvm_alloc_init_event(struct event_key *key) +{ + struct kvm_event *event; + + event = zalloc(sizeof(*event)); + if (!event) { + pr_err("Not enough memory\n"); + return NULL; + } + + event->key = *key; + return event; +} + +static struct kvm_event *find_create_kvm_event(struct event_key *key) +{ + struct kvm_event *event; + struct list_head *head; + + BUG_ON(key->key == INVALID_KEY); + + head = &kvm_events_cache[kvm_events_hash_fn(key->key)]; + list_for_each_entry(event, head, hash_entry) + if (event->key.key == key->key && event->key.info == key->info) + return event; + + event = kvm_alloc_init_event(key); + if (!event) + return NULL; + + list_add(&event->hash_entry, head); + return event; +} + +static bool handle_begin_event(struct vcpu_event_record *vcpu_record, + struct event_key *key, u64 timestamp) +{ + struct kvm_event *event = NULL; + + if (key->key != INVALID_KEY) + event = find_create_kvm_event(key); + + vcpu_record->last_event = event; + vcpu_record->start_time = timestamp; + return true; +} + +static void +kvm_update_event_stats(struct kvm_event_stats *kvm_stats, u64 time_diff) +{ + kvm_stats->time += time_diff; + update_stats(&kvm_stats->stats, time_diff); +} + +static double kvm_event_rel_stddev(int vcpu_id, struct kvm_event *event) +{ + struct kvm_event_stats *kvm_stats = &event->total; + + if (vcpu_id != -1) + kvm_stats = &event->vcpu[vcpu_id]; + + return rel_stddev_stats(stddev_stats(&kvm_stats->stats), + avg_stats(&kvm_stats->stats)); +} + +static bool update_kvm_event(struct kvm_event *event, int vcpu_id, + u64 time_diff) +{ + kvm_update_event_stats(&event->total, time_diff); + + if (!kvm_event_expand(event, vcpu_id)) + return false; + + kvm_update_event_stats(&event->vcpu[vcpu_id], time_diff); + return true; +} + +static bool handle_end_event(struct vcpu_event_record *vcpu_record, + struct event_key *key, u64 timestamp) +{ + struct kvm_event *event; + u64 time_begin, time_diff; + + event = vcpu_record->last_event; + time_begin = vcpu_record->start_time; + + /* The begin event is not caught. */ + if (!time_begin) + return true; + + /* + * In some case, the 'begin event' only records the start timestamp, + * the actual event is recognized in the 'end event' (e.g. mmio-event). + */ + + /* Both begin and end events did not get the key. */ + if (!event && key->key == INVALID_KEY) + return true; + + if (!event) + event = find_create_kvm_event(key); + + if (!event) + return false; + + vcpu_record->last_event = NULL; + vcpu_record->start_time = 0; + + BUG_ON(timestamp < time_begin); + + time_diff = timestamp - time_begin; + return update_kvm_event(event, vcpu_record->vcpu_id, time_diff); +} + +static +struct vcpu_event_record *per_vcpu_record(struct thread *thread, + struct perf_evsel *evsel, + struct perf_sample *sample) +{ + /* Only kvm_entry records vcpu id. */ + if (!thread->priv && kvm_entry_event(evsel)) { + struct vcpu_event_record *vcpu_record; + + vcpu_record = zalloc(sizeof(*vcpu_record)); + if (!vcpu_record) { + pr_err("%s: Not enough memory\n", __func__); + return NULL; + } + + vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, "vcpu_id"); + thread->priv = vcpu_record; + } + + return thread->priv; +} + +static bool handle_kvm_event(struct thread *thread, struct perf_evsel *evsel, + struct perf_sample *sample) +{ + struct vcpu_event_record *vcpu_record; + struct event_key key = {.key = INVALID_KEY}; + + vcpu_record = per_vcpu_record(thread, evsel, sample); + if (!vcpu_record) + return true; + + if (events_ops->is_begin_event(evsel, sample, &key)) + return handle_begin_event(vcpu_record, &key, sample->time); + + if (events_ops->is_end_event(evsel, sample, &key)) + return handle_end_event(vcpu_record, &key, sample->time); + + return true; +} + +typedef int (*key_cmp_fun)(struct kvm_event*, struct kvm_event*, int); +struct kvm_event_key { + const char *name; + key_cmp_fun key; +}; + +static int trace_vcpu = -1; +#define GET_EVENT_KEY(func, field) \ +static u64 get_event_ ##func(struct kvm_event *event, int vcpu) \ +{ \ + if (vcpu == -1) \ + return event->total.field; \ + \ + if (vcpu >= event->max_vcpu) \ + return 0; \ + \ + return event->vcpu[vcpu].field; \ +} + +#define COMPARE_EVENT_KEY(func, field) \ +GET_EVENT_KEY(func, field) \ +static int compare_kvm_event_ ## func(struct kvm_event *one, \ + struct kvm_event *two, int vcpu)\ +{ \ + return get_event_ ##func(one, vcpu) > \ + get_event_ ##func(two, vcpu); \ +} + +GET_EVENT_KEY(time, time); +COMPARE_EVENT_KEY(count, stats.n); +COMPARE_EVENT_KEY(mean, stats.mean); + +#define DEF_SORT_NAME_KEY(name, compare_key) \ + { #name, compare_kvm_event_ ## compare_key } + +static struct kvm_event_key keys[] = { + DEF_SORT_NAME_KEY(sample, count), + DEF_SORT_NAME_KEY(time, mean), + { NULL, NULL } +}; + +static const char *sort_key = "sample"; +static key_cmp_fun compare; + +static bool select_key(void) +{ + int i; + + for (i = 0; keys[i].name; i++) { + if (!strcmp(keys[i].name, sort_key)) { + compare = keys[i].key; + return true; + } + } + + pr_err("Unknown compare key:%s\n", sort_key); + return false; +} + +static struct rb_root result; +static void insert_to_result(struct kvm_event *event, key_cmp_fun bigger, + int vcpu) +{ + struct rb_node **rb = &result.rb_node; + struct rb_node *parent = NULL; + struct kvm_event *p; + + while (*rb) { + p = container_of(*rb, struct kvm_event, rb); + parent = *rb; + + if (bigger(event, p, vcpu)) + rb = &(*rb)->rb_left; + else + rb = &(*rb)->rb_right; + } + + rb_link_node(&event->rb, parent, rb); + rb_insert_color(&event->rb, &result); +} + +static void update_total_count(struct kvm_event *event, int vcpu) +{ + total_count += get_event_count(event, vcpu); + total_time += get_event_time(event, vcpu); +} + +static bool event_is_valid(struct kvm_event *event, int vcpu) +{ + return !!get_event_count(event, vcpu); +} + +static void sort_result(int vcpu) +{ + unsigned int i; + struct kvm_event *event; + + for (i = 0; i < EVENTS_CACHE_SIZE; i++) + list_for_each_entry(event, &kvm_events_cache[i], hash_entry) + if (event_is_valid(event, vcpu)) { + update_total_count(event, vcpu); + insert_to_result(event, compare, vcpu); + } +} + +/* returns left most element of result, and erase it */ +static struct kvm_event *pop_from_result(void) +{ + struct rb_node *node = rb_first(&result); + + if (!node) + return NULL; + + rb_erase(node, &result); + return container_of(node, struct kvm_event, rb); +} + +static void print_vcpu_info(int vcpu) +{ + pr_info("Analyze events for "); + + if (vcpu == -1) + pr_info("all VCPUs:\n\n"); + else + pr_info("VCPU %d:\n\n", vcpu); +} + +static void print_result(int vcpu) +{ + char decode[20]; + struct kvm_event *event; + + pr_info("\n\n"); + print_vcpu_info(vcpu); + pr_info("%20s ", events_ops->name); + pr_info("%10s ", "Samples"); + pr_info("%9s ", "Samples%"); + + pr_info("%9s ", "Time%"); + pr_info("%16s ", "Avg time"); + pr_info("\n\n"); + + while ((event = pop_from_result())) { + u64 ecount, etime; + + ecount = get_event_count(event, vcpu); + etime = get_event_time(event, vcpu); + + events_ops->decode_key(&event->key, decode); + pr_info("%20s ", decode); + pr_info("%10llu ", (unsigned long long)ecount); + pr_info("%8.2f%% ", (double)ecount / total_count * 100); + pr_info("%8.2f%% ", (double)etime / total_time * 100); + pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount/1e3, + kvm_event_rel_stddev(vcpu, event)); + pr_info("\n"); + } + + pr_info("\nTotal Samples:%lld, Total events handled time:%.2fus.\n\n", + (unsigned long long)total_count, total_time / 1e3); +} + +static int process_sample_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine) +{ + struct thread *thread = machine__findnew_thread(machine, sample->tid); + + if (thread == NULL) { + pr_debug("problem processing %d event, skipping it.\n", + event->header.type); + return -1; + } + + if (!handle_kvm_event(thread, evsel, sample)) + return -1; + + return 0; +} + +static struct perf_tool eops = { + .sample = process_sample_event, + .comm = perf_event__process_comm, + .ordered_samples = true, +}; + +static int get_cpu_isa(struct perf_session *session) +{ + char *cpuid = session->header.env.cpuid; + int isa; + + if (strstr(cpuid, "Intel")) + isa = 1; + else if (strstr(cpuid, "AMD")) + isa = 0; + else { + pr_err("CPU %s is not supported.\n", cpuid); + isa = -ENOTSUP; + } + + return isa; +} + +static const char *file_name; + +static int read_events(void) +{ + struct perf_session *kvm_session; + int ret; + + kvm_session = perf_session__new(file_name, O_RDONLY, 0, false, &eops); + if (!kvm_session) { + pr_err("Initializing perf session failed\n"); + return -EINVAL; + } + + if (!perf_session__has_traces(kvm_session, "kvm record")) + return -EINVAL; + + /* + * Do not use 'isa' recorded in kvm_exit tracepoint since it is not + * traced in the old kernel. + */ + ret = get_cpu_isa(kvm_session); + + if (ret < 0) + return ret; + + cpu_isa = ret; + + return perf_session__process_events(kvm_session, &eops); +} + +static bool verify_vcpu(int vcpu) +{ + if (vcpu != -1 && vcpu < 0) { + pr_err("Invalid vcpu:%d.\n", vcpu); + return false; + } + + return true; +} + +static int kvm_events_report_vcpu(int vcpu) +{ + int ret = -EINVAL; + + if (!verify_vcpu(vcpu)) + goto exit; + + if (!select_key()) + goto exit; + + if (!register_kvm_events_ops()) + goto exit; + + init_kvm_event_record(); + setup_pager(); + + ret = read_events(); + if (ret) + goto exit; + + sort_result(vcpu); + print_result(vcpu); +exit: + return ret; +} + +static const char * const record_args[] = { + "record", + "-R", + "-f", + "-m", "1024", + "-c", "1", + "-e", "kvm:kvm_entry", + "-e", "kvm:kvm_exit", + "-e", "kvm:kvm_mmio", + "-e", "kvm:kvm_pio", +}; + +#define STRDUP_FAIL_EXIT(s) \ + ({ char *_p; \ + _p = strdup(s); \ + if (!_p) \ + return -ENOMEM; \ + _p; \ + }) + +static int kvm_events_record(int argc, const char **argv) +{ + unsigned int rec_argc, i, j; + const char **rec_argv; + + rec_argc = ARRAY_SIZE(record_args) + argc + 2; + rec_argv = calloc(rec_argc + 1, sizeof(char *)); + + if (rec_argv == NULL) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(record_args); i++) + rec_argv[i] = STRDUP_FAIL_EXIT(record_args[i]); + + rec_argv[i++] = STRDUP_FAIL_EXIT("-o"); + rec_argv[i++] = STRDUP_FAIL_EXIT(file_name); + + for (j = 1; j < (unsigned int)argc; j++, i++) + rec_argv[i] = argv[j]; + + return cmd_record(i, rec_argv, NULL); +} + +static const char * const kvm_events_report_usage[] = { + "perf kvm stat report [<options>]", + NULL +}; + +static const struct option kvm_events_report_options[] = { + OPT_STRING(0, "event", &report_event, "report event", + "event for reporting: vmexit, mmio, ioport"), + OPT_INTEGER(0, "vcpu", &trace_vcpu, + "vcpu id to report"), + OPT_STRING('k', "key", &sort_key, "sort-key", + "key for sorting: sample(sort by samples number)" + " time (sort by avg time)"), + OPT_END() +}; + +static int kvm_events_report(int argc, const char **argv) +{ + symbol__init(); + + if (argc) { + argc = parse_options(argc, argv, + kvm_events_report_options, + kvm_events_report_usage, 0); + if (argc) + usage_with_options(kvm_events_report_usage, + kvm_events_report_options); + } + + return kvm_events_report_vcpu(trace_vcpu); +} + +static void print_kvm_stat_usage(void) +{ + printf("Usage: perf kvm stat <command>\n\n"); + + printf("# Available commands:\n"); + printf("\trecord: record kvm events\n"); + printf("\treport: report statistical data of kvm events\n"); + + printf("\nOtherwise, it is the alias of 'perf stat':\n"); +} + +static int kvm_cmd_stat(int argc, const char **argv) +{ + if (argc == 1) { + print_kvm_stat_usage(); + goto perf_stat; + } + + if (!strncmp(argv[1], "rec", 3)) + return kvm_events_record(argc - 1, argv + 1); + + if (!strncmp(argv[1], "rep", 3)) + return kvm_events_report(argc - 1 , argv + 1); + +perf_stat: + return cmd_stat(argc, argv, NULL); +} + static char name_buffer[256]; static const char * const kvm_usage[] = { - "perf kvm [<options>] {top|record|report|diff|buildid-list}", + "perf kvm [<options>] {top|record|report|diff|buildid-list|stat}", NULL }; @@ -102,7 +930,7 @@ static int __cmd_buildid_list(int argc, const char **argv) return cmd_buildid_list(i, rec_argv, NULL); } -int cmd_kvm(int argc, const char **argv, const char *prefix __used) +int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused) { perf_host = 0; perf_guest = 1; @@ -135,6 +963,8 @@ int cmd_kvm(int argc, const char **argv, const char *prefix __used) return cmd_top(argc, argv, NULL); else if (!strncmp(argv[0], "buildid-list", 12)) return __cmd_buildid_list(argc, argv); + else if (!strncmp(argv[0], "stat", 4)) + return kvm_cmd_stat(argc, argv); else usage_with_options(kvm_usage, kvm_options); diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 6313b6eb3eb..1948eceb517 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -14,20 +14,20 @@ #include "util/parse-events.h" #include "util/cache.h" -int cmd_list(int argc, const char **argv, const char *prefix __used) +int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) { setup_pager(); if (argc == 1) - print_events(NULL); + print_events(NULL, false); else { int i; for (i = 1; i < argc; ++i) { - if (i > 1) + if (i > 2) putchar('\n'); if (strncmp(argv[i], "tracepoint", 10) == 0) - print_tracepoint_events(NULL, NULL); + print_tracepoint_events(NULL, NULL, false); else if (strcmp(argv[i], "hw") == 0 || strcmp(argv[i], "hardware") == 0) print_events_type(PERF_TYPE_HARDWARE); @@ -36,13 +36,15 @@ int cmd_list(int argc, const char **argv, const char *prefix __used) print_events_type(PERF_TYPE_SOFTWARE); else if (strcmp(argv[i], "cache") == 0 || strcmp(argv[i], "hwcache") == 0) - print_hwcache_events(NULL); + print_hwcache_events(NULL, false); + else if (strcmp(argv[i], "--raw-dump") == 0) + print_events(NULL, true); else { char *sep = strchr(argv[i], ':'), *s; int sep_idx; if (sep == NULL) { - print_events(argv[i]); + print_events(argv[i], false); continue; } sep_idx = sep - argv[i]; @@ -51,7 +53,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __used) return -1; s[sep_idx] = '\0'; - print_tracepoint_events(s, s + sep_idx + 1); + print_tracepoint_events(s, s + sep_idx + 1, false); free(s); } } diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index b3c42854886..7d6e0994988 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1,6 +1,8 @@ #include "builtin.h" #include "perf.h" +#include "util/evlist.h" +#include "util/evsel.h" #include "util/util.h" #include "util/cache.h" #include "util/symbol.h" @@ -40,7 +42,7 @@ struct lock_stat { struct rb_node rb; /* used for sorting */ /* - * FIXME: raw_field_value() returns unsigned long long, + * FIXME: perf_evsel__intval() returns u64, * so address of lockdep_map should be dealed as 64bit. * Is there more better solution? */ @@ -160,8 +162,10 @@ static struct thread_stat *thread_stat_findnew_after_first(u32 tid) return st; st = zalloc(sizeof(struct thread_stat)); - if (!st) - die("memory allocation failed\n"); + if (!st) { + pr_err("memory allocation failed\n"); + return NULL; + } st->tid = tid; INIT_LIST_HEAD(&st->seq_list); @@ -180,8 +184,10 @@ static struct thread_stat *thread_stat_findnew_first(u32 tid) struct thread_stat *st; st = zalloc(sizeof(struct thread_stat)); - if (!st) - die("memory allocation failed\n"); + if (!st) { + pr_err("memory allocation failed\n"); + return NULL; + } st->tid = tid; INIT_LIST_HEAD(&st->seq_list); @@ -247,18 +253,20 @@ struct lock_key keys[] = { { NULL, NULL } }; -static void select_key(void) +static int select_key(void) { int i; for (i = 0; keys[i].name; i++) { if (!strcmp(keys[i].name, sort_key)) { compare = keys[i].key; - return; + return 0; } } - die("Unknown compare key:%s\n", sort_key); + pr_err("Unknown compare key: %s\n", sort_key); + + return -1; } static void insert_to_result(struct lock_stat *st, @@ -323,61 +331,24 @@ static struct lock_stat *lock_stat_findnew(void *addr, const char *name) return new; alloc_failed: - die("memory allocation failed\n"); + pr_err("memory allocation failed\n"); + return NULL; } static const char *input_name; -struct raw_event_sample { - u32 size; - char data[0]; -}; - -struct trace_acquire_event { - void *addr; - const char *name; - int flag; -}; - -struct trace_acquired_event { - void *addr; - const char *name; -}; +struct trace_lock_handler { + int (*acquire_event)(struct perf_evsel *evsel, + struct perf_sample *sample); -struct trace_contended_event { - void *addr; - const char *name; -}; + int (*acquired_event)(struct perf_evsel *evsel, + struct perf_sample *sample); -struct trace_release_event { - void *addr; - const char *name; -}; + int (*contended_event)(struct perf_evsel *evsel, + struct perf_sample *sample); -struct trace_lock_handler { - void (*acquire_event)(struct trace_acquire_event *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); - - void (*acquired_event)(struct trace_acquired_event *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); - - void (*contended_event)(struct trace_contended_event *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); - - void (*release_event)(struct trace_release_event *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); + int (*release_event)(struct perf_evsel *evsel, + struct perf_sample *sample); }; static struct lock_seq_stat *get_seq(struct thread_stat *ts, void *addr) @@ -390,8 +361,10 @@ static struct lock_seq_stat *get_seq(struct thread_stat *ts, void *addr) } seq = zalloc(sizeof(struct lock_seq_stat)); - if (!seq) - die("Not enough memory\n"); + if (!seq) { + pr_err("memory allocation failed\n"); + return NULL; + } seq->state = SEQ_STATE_UNINITIALIZED; seq->addr = addr; @@ -414,33 +387,42 @@ enum acquire_flags { READ_LOCK = 2, }; -static void -report_lock_acquire_event(struct trace_acquire_event *acquire_event, - struct event_format *__event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +static int report_lock_acquire_event(struct perf_evsel *evsel, + struct perf_sample *sample) { + void *addr; struct lock_stat *ls; struct thread_stat *ts; struct lock_seq_stat *seq; + const char *name = perf_evsel__strval(evsel, sample, "name"); + u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr"); + int flag = perf_evsel__intval(evsel, sample, "flag"); + + memcpy(&addr, &tmp, sizeof(void *)); - ls = lock_stat_findnew(acquire_event->addr, acquire_event->name); + ls = lock_stat_findnew(addr, name); + if (!ls) + return -1; if (ls->discard) - return; + return 0; - ts = thread_stat_findnew(thread->pid); - seq = get_seq(ts, acquire_event->addr); + ts = thread_stat_findnew(sample->tid); + if (!ts) + return -1; + + seq = get_seq(ts, addr); + if (!seq) + return -1; switch (seq->state) { case SEQ_STATE_UNINITIALIZED: case SEQ_STATE_RELEASED: - if (!acquire_event->flag) { + if (!flag) { seq->state = SEQ_STATE_ACQUIRING; } else { - if (acquire_event->flag & TRY_LOCK) + if (flag & TRY_LOCK) ls->nr_trylock++; - if (acquire_event->flag & READ_LOCK) + if (flag & READ_LOCK) ls->nr_readlock++; seq->state = SEQ_STATE_READ_ACQUIRED; seq->read_count = 1; @@ -448,7 +430,7 @@ report_lock_acquire_event(struct trace_acquire_event *acquire_event, } break; case SEQ_STATE_READ_ACQUIRED: - if (acquire_event->flag & READ_LOCK) { + if (flag & READ_LOCK) { seq->read_count++; ls->nr_acquired++; goto end; @@ -473,38 +455,46 @@ broken: } ls->nr_acquire++; - seq->prev_event_time = timestamp; + seq->prev_event_time = sample->time; end: - return; + return 0; } -static void -report_lock_acquired_event(struct trace_acquired_event *acquired_event, - struct event_format *__event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +static int report_lock_acquired_event(struct perf_evsel *evsel, + struct perf_sample *sample) { + void *addr; struct lock_stat *ls; struct thread_stat *ts; struct lock_seq_stat *seq; u64 contended_term; + const char *name = perf_evsel__strval(evsel, sample, "name"); + u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr"); + + memcpy(&addr, &tmp, sizeof(void *)); - ls = lock_stat_findnew(acquired_event->addr, acquired_event->name); + ls = lock_stat_findnew(addr, name); + if (!ls) + return -1; if (ls->discard) - return; + return 0; + + ts = thread_stat_findnew(sample->tid); + if (!ts) + return -1; - ts = thread_stat_findnew(thread->pid); - seq = get_seq(ts, acquired_event->addr); + seq = get_seq(ts, addr); + if (!seq) + return -1; switch (seq->state) { case SEQ_STATE_UNINITIALIZED: /* orphan event, do nothing */ - return; + return 0; case SEQ_STATE_ACQUIRING: break; case SEQ_STATE_CONTENDED: - contended_term = timestamp - seq->prev_event_time; + contended_term = sample->time - seq->prev_event_time; ls->wait_time_total += contended_term; if (contended_term < ls->wait_time_min) ls->wait_time_min = contended_term; @@ -529,33 +519,41 @@ report_lock_acquired_event(struct trace_acquired_event *acquired_event, seq->state = SEQ_STATE_ACQUIRED; ls->nr_acquired++; - seq->prev_event_time = timestamp; + seq->prev_event_time = sample->time; end: - return; + return 0; } -static void -report_lock_contended_event(struct trace_contended_event *contended_event, - struct event_format *__event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +static int report_lock_contended_event(struct perf_evsel *evsel, + struct perf_sample *sample) { + void *addr; struct lock_stat *ls; struct thread_stat *ts; struct lock_seq_stat *seq; + const char *name = perf_evsel__strval(evsel, sample, "name"); + u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr"); - ls = lock_stat_findnew(contended_event->addr, contended_event->name); + memcpy(&addr, &tmp, sizeof(void *)); + + ls = lock_stat_findnew(addr, name); + if (!ls) + return -1; if (ls->discard) - return; + return 0; + + ts = thread_stat_findnew(sample->tid); + if (!ts) + return -1; - ts = thread_stat_findnew(thread->pid); - seq = get_seq(ts, contended_event->addr); + seq = get_seq(ts, addr); + if (!seq) + return -1; switch (seq->state) { case SEQ_STATE_UNINITIALIZED: /* orphan event, do nothing */ - return; + return 0; case SEQ_STATE_ACQUIRING: break; case SEQ_STATE_RELEASED: @@ -576,28 +574,36 @@ report_lock_contended_event(struct trace_contended_event *contended_event, seq->state = SEQ_STATE_CONTENDED; ls->nr_contended++; - seq->prev_event_time = timestamp; + seq->prev_event_time = sample->time; end: - return; + return 0; } -static void -report_lock_release_event(struct trace_release_event *release_event, - struct event_format *__event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +static int report_lock_release_event(struct perf_evsel *evsel, + struct perf_sample *sample) { + void *addr; struct lock_stat *ls; struct thread_stat *ts; struct lock_seq_stat *seq; + const char *name = perf_evsel__strval(evsel, sample, "name"); + u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr"); + + memcpy(&addr, &tmp, sizeof(void *)); - ls = lock_stat_findnew(release_event->addr, release_event->name); + ls = lock_stat_findnew(addr, name); + if (!ls) + return -1; if (ls->discard) - return; + return 0; + + ts = thread_stat_findnew(sample->tid); + if (!ts) + return -1; - ts = thread_stat_findnew(thread->pid); - seq = get_seq(ts, release_event->addr); + seq = get_seq(ts, addr); + if (!seq) + return -1; switch (seq->state) { case SEQ_STATE_UNINITIALIZED: @@ -631,7 +637,7 @@ free_seq: list_del(&seq->list); free(seq); end: - return; + return 0; } /* lock oriented handlers */ @@ -645,96 +651,36 @@ static struct trace_lock_handler report_lock_ops = { static struct trace_lock_handler *trace_handler; -static void -process_lock_acquire_event(void *data, - struct event_format *event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) -{ - struct trace_acquire_event acquire_event; - u64 tmp; /* this is required for casting... */ - - tmp = raw_field_value(event, "lockdep_addr", data); - memcpy(&acquire_event.addr, &tmp, sizeof(void *)); - acquire_event.name = (char *)raw_field_ptr(event, "name", data); - acquire_event.flag = (int)raw_field_value(event, "flag", data); - - if (trace_handler->acquire_event) - trace_handler->acquire_event(&acquire_event, event, cpu, timestamp, thread); -} - -static void -process_lock_acquired_event(void *data, - struct event_format *event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +static int perf_evsel__process_lock_acquire(struct perf_evsel *evsel, + struct perf_sample *sample) { - struct trace_acquired_event acquired_event; - u64 tmp; /* this is required for casting... */ - - tmp = raw_field_value(event, "lockdep_addr", data); - memcpy(&acquired_event.addr, &tmp, sizeof(void *)); - acquired_event.name = (char *)raw_field_ptr(event, "name", data); - if (trace_handler->acquire_event) - trace_handler->acquired_event(&acquired_event, event, cpu, timestamp, thread); + return trace_handler->acquire_event(evsel, sample); + return 0; } -static void -process_lock_contended_event(void *data, - struct event_format *event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +static int perf_evsel__process_lock_acquired(struct perf_evsel *evsel, + struct perf_sample *sample) { - struct trace_contended_event contended_event; - u64 tmp; /* this is required for casting... */ - - tmp = raw_field_value(event, "lockdep_addr", data); - memcpy(&contended_event.addr, &tmp, sizeof(void *)); - contended_event.name = (char *)raw_field_ptr(event, "name", data); - - if (trace_handler->acquire_event) - trace_handler->contended_event(&contended_event, event, cpu, timestamp, thread); + if (trace_handler->acquired_event) + return trace_handler->acquired_event(evsel, sample); + return 0; } -static void -process_lock_release_event(void *data, - struct event_format *event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +static int perf_evsel__process_lock_contended(struct perf_evsel *evsel, + struct perf_sample *sample) { - struct trace_release_event release_event; - u64 tmp; /* this is required for casting... */ - - tmp = raw_field_value(event, "lockdep_addr", data); - memcpy(&release_event.addr, &tmp, sizeof(void *)); - release_event.name = (char *)raw_field_ptr(event, "name", data); - - if (trace_handler->acquire_event) - trace_handler->release_event(&release_event, event, cpu, timestamp, thread); + if (trace_handler->contended_event) + return trace_handler->contended_event(evsel, sample); + return 0; } -static void -process_raw_event(void *data, int cpu, u64 timestamp, struct thread *thread) +static int perf_evsel__process_lock_release(struct perf_evsel *evsel, + struct perf_sample *sample) { - struct event_format *event; - int type; - - type = trace_parse_common_type(session->pevent, data); - event = pevent_find_event(session->pevent, type); - - if (!strcmp(event->name, "lock_acquire")) - process_lock_acquire_event(data, event, cpu, timestamp, thread); - if (!strcmp(event->name, "lock_acquired")) - process_lock_acquired_event(data, event, cpu, timestamp, thread); - if (!strcmp(event->name, "lock_contended")) - process_lock_contended_event(data, event, cpu, timestamp, thread); - if (!strcmp(event->name, "lock_release")) - process_lock_release_event(data, event, cpu, timestamp, thread); + if (trace_handler->release_event) + return trace_handler->release_event(evsel, sample); + return 0; } static void print_bad_events(int bad, int total) @@ -836,20 +782,29 @@ static void dump_map(void) } } -static void dump_info(void) +static int dump_info(void) { + int rc = 0; + if (info_threads) dump_threads(); else if (info_map) dump_map(); - else - die("Unknown type of information\n"); + else { + rc = -1; + pr_err("Unknown type of information\n"); + } + + return rc; } -static int process_sample_event(struct perf_tool *tool __used, +typedef int (*tracepoint_handler)(struct perf_evsel *evsel, + struct perf_sample *sample); + +static int process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel __used, + struct perf_evsel *evsel, struct machine *machine) { struct thread *thread = machine__findnew_thread(machine, sample->tid); @@ -860,7 +815,10 @@ static int process_sample_event(struct perf_tool *tool __used, return -1; } - process_raw_event(sample->raw_data, sample->cpu, sample->time, thread); + if (evsel->handler.func != NULL) { + tracepoint_handler f = evsel->handler.func; + return f(evsel, sample); + } return 0; } @@ -871,11 +829,25 @@ static struct perf_tool eops = { .ordered_samples = true, }; +static const struct perf_evsel_str_handler lock_tracepoints[] = { + { "lock:lock_acquire", perf_evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ + { "lock:lock_acquired", perf_evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ + { "lock:lock_contended", perf_evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ + { "lock:lock_release", perf_evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ +}; + static int read_events(void) { session = perf_session__new(input_name, O_RDONLY, 0, false, &eops); - if (!session) - die("Initializing perf session failed\n"); + if (!session) { + pr_err("Initializing perf session failed\n"); + return -1; + } + + if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) { + pr_err("Initializing perf session tracepoint handlers failed\n"); + return -1; + } return perf_session__process_events(session, &eops); } @@ -892,13 +864,18 @@ static void sort_result(void) } } -static void __cmd_report(void) +static int __cmd_report(void) { setup_pager(); - select_key(); - read_events(); + + if ((select_key() != 0) || + (read_events() != 0)) + return -1; + sort_result(); print_result(); + + return 0; } static const char * const report_usage[] = { @@ -944,10 +921,6 @@ static const char *record_args[] = { "-f", "-m", "1024", "-c", "1", - "-e", "lock:lock_acquire", - "-e", "lock:lock_acquired", - "-e", "lock:lock_contended", - "-e", "lock:lock_release", }; static int __cmd_record(int argc, const char **argv) @@ -955,15 +928,31 @@ static int __cmd_record(int argc, const char **argv) unsigned int rec_argc, i, j; const char **rec_argv; + for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) { + if (!is_valid_tracepoint(lock_tracepoints[i].name)) { + pr_err("tracepoint %s is not enabled. " + "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n", + lock_tracepoints[i].name); + return 1; + } + } + rec_argc = ARRAY_SIZE(record_args) + argc - 1; - rec_argv = calloc(rec_argc + 1, sizeof(char *)); + /* factor of 2 is for -e in front of each tracepoint */ + rec_argc += 2 * ARRAY_SIZE(lock_tracepoints); + rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (rec_argv == NULL) return -ENOMEM; for (i = 0; i < ARRAY_SIZE(record_args); i++) rec_argv[i] = strdup(record_args[i]); + for (j = 0; j < ARRAY_SIZE(lock_tracepoints); j++) { + rec_argv[i++] = "-e"; + rec_argv[i++] = strdup(lock_tracepoints[j].name); + } + for (j = 1; j < (unsigned int)argc; j++, i++) rec_argv[i] = argv[j]; @@ -972,9 +961,10 @@ static int __cmd_record(int argc, const char **argv) return cmd_record(i, rec_argv, NULL); } -int cmd_lock(int argc, const char **argv, const char *prefix __used) +int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) { unsigned int i; + int rc = 0; symbol__init(); for (i = 0; i < LOCKHASH_SIZE; i++) @@ -1009,11 +999,13 @@ int cmd_lock(int argc, const char **argv, const char *prefix __used) /* recycling report_lock_ops */ trace_handler = &report_lock_ops; setup_pager(); - read_events(); - dump_info(); + if (read_events() != 0) + rc = -1; + else + rc = dump_info(); } else { usage_with_options(lock_usage, lock_options); } - return 0; + return rc; } diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index e215ae61b2a..118aa894657 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -143,8 +143,8 @@ static int parse_probe_event_argv(int argc, const char **argv) return ret; } -static int opt_add_probe_event(const struct option *opt __used, - const char *str, int unset __used) +static int opt_add_probe_event(const struct option *opt __maybe_unused, + const char *str, int unset __maybe_unused) { if (str) { params.mod_events = true; @@ -153,8 +153,8 @@ static int opt_add_probe_event(const struct option *opt __used, return 0; } -static int opt_del_probe_event(const struct option *opt __used, - const char *str, int unset __used) +static int opt_del_probe_event(const struct option *opt __maybe_unused, + const char *str, int unset __maybe_unused) { if (str) { params.mod_events = true; @@ -166,7 +166,7 @@ static int opt_del_probe_event(const struct option *opt __used, } static int opt_set_target(const struct option *opt, const char *str, - int unset __used) + int unset __maybe_unused) { int ret = -ENOENT; @@ -188,8 +188,8 @@ static int opt_set_target(const struct option *opt, const char *str, } #ifdef DWARF_SUPPORT -static int opt_show_lines(const struct option *opt __used, - const char *str, int unset __used) +static int opt_show_lines(const struct option *opt __maybe_unused, + const char *str, int unset __maybe_unused) { int ret = 0; @@ -209,8 +209,8 @@ static int opt_show_lines(const struct option *opt __used, return ret; } -static int opt_show_vars(const struct option *opt __used, - const char *str, int unset __used) +static int opt_show_vars(const struct option *opt __maybe_unused, + const char *str, int unset __maybe_unused) { struct perf_probe_event *pev = ¶ms.events[params.nevents]; int ret; @@ -229,8 +229,8 @@ static int opt_show_vars(const struct option *opt __used, } #endif -static int opt_set_filter(const struct option *opt __used, - const char *str, int unset __used) +static int opt_set_filter(const struct option *opt __maybe_unused, + const char *str, int unset __maybe_unused) { const char *err; @@ -327,7 +327,7 @@ static const struct option options[] = { OPT_END() }; -int cmd_probe(int argc, const char **argv, const char *prefix __used) +int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) { int ret; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4db6e1ba54e..f14cb5fdb91 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -31,6 +31,15 @@ #include <sched.h> #include <sys/mman.h> +#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: " + +#ifdef NO_LIBUNWIND_SUPPORT +static char callchain_help[] = CALLCHAIN_HELP "[fp]"; +#else +static unsigned long default_stack_dump_size = 8192; +static char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf"; +#endif + enum write_mode_t { WRITE_FORCE, WRITE_APPEND @@ -62,32 +71,38 @@ static void advance_output(struct perf_record *rec, size_t size) rec->bytes_written += size; } -static void write_output(struct perf_record *rec, void *buf, size_t size) +static int write_output(struct perf_record *rec, void *buf, size_t size) { while (size) { int ret = write(rec->output, buf, size); - if (ret < 0) - die("failed to write"); + if (ret < 0) { + pr_err("failed to write\n"); + return -1; + } size -= ret; buf += ret; rec->bytes_written += ret; } + + return 0; } static int process_synthesized_event(struct perf_tool *tool, union perf_event *event, - struct perf_sample *sample __used, - struct machine *machine __used) + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) { struct perf_record *rec = container_of(tool, struct perf_record, tool); - write_output(rec, event, event->header.size); + if (write_output(rec, event, event->header.size) < 0) + return -1; + return 0; } -static void perf_record__mmap_read(struct perf_record *rec, +static int perf_record__mmap_read(struct perf_record *rec, struct perf_mmap *md) { unsigned int head = perf_mmap__read_head(md); @@ -95,9 +110,10 @@ static void perf_record__mmap_read(struct perf_record *rec, unsigned char *data = md->base + rec->page_size; unsigned long size; void *buf; + int rc = 0; if (old == head) - return; + return 0; rec->samples++; @@ -108,17 +124,26 @@ static void perf_record__mmap_read(struct perf_record *rec, size = md->mask + 1 - (old & md->mask); old += size; - write_output(rec, buf, size); + if (write_output(rec, buf, size) < 0) { + rc = -1; + goto out; + } } buf = &data[old & md->mask]; size = head - old; old += size; - write_output(rec, buf, size); + if (write_output(rec, buf, size) < 0) { + rc = -1; + goto out; + } md->prev = old; perf_mmap__write_tail(md, old); + +out: + return rc; } static volatile int done = 0; @@ -134,7 +159,7 @@ static void sig_handler(int sig) signr = sig; } -static void perf_record__sig_exit(int exit_status __used, void *arg) +static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg) { struct perf_record *rec = arg; int status; @@ -163,31 +188,32 @@ static bool perf_evlist__equal(struct perf_evlist *evlist, if (evlist->nr_entries != other->nr_entries) return false; - pair = list_entry(other->entries.next, struct perf_evsel, node); + pair = perf_evlist__first(other); list_for_each_entry(pos, &evlist->entries, node) { if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0)) return false; - pair = list_entry(pair->node.next, struct perf_evsel, node); + pair = perf_evsel__next(pair); } return true; } -static void perf_record__open(struct perf_record *rec) +static int perf_record__open(struct perf_record *rec) { - struct perf_evsel *pos, *first; + struct perf_evsel *pos; struct perf_evlist *evlist = rec->evlist; struct perf_session *session = rec->session; struct perf_record_opts *opts = &rec->opts; - - first = list_entry(evlist->entries.next, struct perf_evsel, node); + int rc = 0; perf_evlist__config_attrs(evlist, opts); + if (opts->group) + perf_evlist__set_leader(evlist); + list_for_each_entry(pos, &evlist->entries, node) { struct perf_event_attr *attr = &pos->attr; - struct xyarray *group_fd = NULL; /* * Check if parse_single_tracepoint_event has already asked for * PERF_SAMPLE_TIME. @@ -202,24 +228,24 @@ static void perf_record__open(struct perf_record *rec) */ bool time_needed = attr->sample_type & PERF_SAMPLE_TIME; - if (opts->group && pos != first) - group_fd = first->fd; fallback_missing_features: if (opts->exclude_guest_missing) attr->exclude_guest = attr->exclude_host = 0; retry_sample_id: attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1; try_again: - if (perf_evsel__open(pos, evlist->cpus, evlist->threads, - opts->group, group_fd) < 0) { + if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) { int err = errno; if (err == EPERM || err == EACCES) { ui__error_paranoid(); - exit(EXIT_FAILURE); + rc = -err; + goto out; } else if (err == ENODEV && opts->target.cpu_list) { - die("No such device - did you specify" - " an out-of-range profile CPU?\n"); + pr_err("No such device - did you specify" + " an out-of-range profile CPU?\n"); + rc = -err; + goto out; } else if (err == EINVAL) { if (!opts->exclude_guest_missing && (attr->exclude_guest || attr->exclude_host)) { @@ -266,42 +292,57 @@ try_again: if (err == ENOENT) { ui__error("The %s event is not supported.\n", perf_evsel__name(pos)); - exit(EXIT_FAILURE); + rc = -err; + goto out; } printf("\n"); - error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n", - err, strerror(err)); + error("sys_perf_event_open() syscall returned with %d " + "(%s) for event %s. /bin/dmesg may provide " + "additional information.\n", + err, strerror(err), perf_evsel__name(pos)); #if defined(__i386__) || defined(__x86_64__) - if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP) - die("No hardware sampling interrupt available." - " No APIC? If so then you can boot the kernel" - " with the \"lapic\" boot parameter to" - " force-enable it.\n"); + if (attr->type == PERF_TYPE_HARDWARE && + err == EOPNOTSUPP) { + pr_err("No hardware sampling interrupt available." + " No APIC? If so then you can boot the kernel" + " with the \"lapic\" boot parameter to" + " force-enable it.\n"); + rc = -err; + goto out; + } #endif - die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + rc = -err; + goto out; } } - if (perf_evlist__set_filters(evlist)) { + if (perf_evlist__apply_filters(evlist)) { error("failed to set filter with %d (%s)\n", errno, strerror(errno)); - exit(-1); + rc = -1; + goto out; } if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) { - if (errno == EPERM) - die("Permission error mapping pages.\n" - "Consider increasing " - "/proc/sys/kernel/perf_event_mlock_kb,\n" - "or try again with a smaller value of -m/--mmap_pages.\n" - "(current value: %d)\n", opts->mmap_pages); - else if (!is_power_of_2(opts->mmap_pages)) - die("--mmap_pages/-m value must be a power of two."); - - die("failed to mmap with %d (%s)\n", errno, strerror(errno)); + if (errno == EPERM) { + pr_err("Permission error mapping pages.\n" + "Consider increasing " + "/proc/sys/kernel/perf_event_mlock_kb,\n" + "or try again with a smaller value of -m/--mmap_pages.\n" + "(current value: %d)\n", opts->mmap_pages); + rc = -errno; + } else if (!is_power_of_2(opts->mmap_pages)) { + pr_err("--mmap_pages/-m value must be a power of two."); + rc = -EINVAL; + } else { + pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno)); + rc = -errno; + } + goto out; } if (rec->file_new) @@ -309,11 +350,14 @@ try_again: else { if (!perf_evlist__equal(session->evlist, evlist)) { fprintf(stderr, "incompatible append\n"); - exit(-1); + rc = -1; + goto out; } } perf_session__set_id_hdr_size(session); +out: + return rc; } static int process_buildids(struct perf_record *rec) @@ -329,10 +373,13 @@ static int process_buildids(struct perf_record *rec) size, &build_id__mark_dso_hit_ops); } -static void perf_record__exit(int status __used, void *arg) +static void perf_record__exit(int status, void *arg) { struct perf_record *rec = arg; + if (status != 0) + return; + if (!rec->opts.pipe_output) { rec->session->header.data_size += rec->bytes_written; @@ -387,17 +434,26 @@ static struct perf_event_header finished_round_event = { .type = PERF_RECORD_FINISHED_ROUND, }; -static void perf_record__mmap_read_all(struct perf_record *rec) +static int perf_record__mmap_read_all(struct perf_record *rec) { int i; + int rc = 0; for (i = 0; i < rec->evlist->nr_mmaps; i++) { - if (rec->evlist->mmap[i].base) - perf_record__mmap_read(rec, &rec->evlist->mmap[i]); + if (rec->evlist->mmap[i].base) { + if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) { + rc = -1; + goto out; + } + } } if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA)) - write_output(rec, &finished_round_event, sizeof(finished_round_event)); + rc = write_output(rec, &finished_round_event, + sizeof(finished_round_event)); + +out: + return rc; } static int __cmd_record(struct perf_record *rec, int argc, const char **argv) @@ -457,7 +513,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) output = open(output_name, flags, S_IRUSR | S_IWUSR); if (output < 0) { perror("failed to create output file"); - exit(-1); + return -1; } rec->output = output; @@ -497,7 +553,10 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) } } - perf_record__open(rec); + if (perf_record__open(rec) != 0) { + err = -1; + goto out_delete_session; + } /* * perf_session__delete(session) will be called at perf_record__exit() @@ -507,19 +566,20 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) if (opts->pipe_output) { err = perf_header__write_pipe(output); if (err < 0) - return err; + goto out_delete_session; } else if (rec->file_new) { err = perf_session__write_header(session, evsel_list, output, false); if (err < 0) - return err; + goto out_delete_session; } if (!rec->no_buildid && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { pr_err("Couldn't generate buildids. " "Use --no-buildid to profile anyway.\n"); - return -1; + err = -1; + goto out_delete_session; } rec->post_processing_offset = lseek(output, 0, SEEK_CUR); @@ -527,7 +587,8 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) machine = perf_session__find_host_machine(session); if (!machine) { pr_err("Couldn't find native kernel information.\n"); - return -1; + err = -1; + goto out_delete_session; } if (opts->pipe_output) { @@ -535,14 +596,14 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) process_synthesized_event); if (err < 0) { pr_err("Couldn't synthesize attrs.\n"); - return err; + goto out_delete_session; } err = perf_event__synthesize_event_types(tool, process_synthesized_event, machine); if (err < 0) { pr_err("Couldn't synthesize event_types.\n"); - return err; + goto out_delete_session; } if (have_tracepoints(&evsel_list->entries)) { @@ -558,7 +619,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) process_synthesized_event); if (err <= 0) { pr_err("Couldn't record tracing data.\n"); - return err; + goto out_delete_session; } advance_output(rec, err); } @@ -586,20 +647,24 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) perf_event__synthesize_guest_os); if (!opts->target.system_wide) - perf_event__synthesize_thread_map(tool, evsel_list->threads, + err = perf_event__synthesize_thread_map(tool, evsel_list->threads, process_synthesized_event, machine); else - perf_event__synthesize_threads(tool, process_synthesized_event, + err = perf_event__synthesize_threads(tool, process_synthesized_event, machine); + if (err != 0) + goto out_delete_session; + if (rec->realtime_prio) { struct sched_param param; param.sched_priority = rec->realtime_prio; if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { pr_err("Could not set realtime priority.\n"); - exit(-1); + err = -1; + goto out_delete_session; } } @@ -614,7 +679,10 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) for (;;) { int hits = rec->samples; - perf_record__mmap_read_all(rec); + if (perf_record__mmap_read_all(rec) < 0) { + err = -1; + goto out_delete_session; + } if (hits == rec->samples) { if (done) @@ -732,6 +800,106 @@ error: return ret; } +#ifndef NO_LIBUNWIND_SUPPORT +static int get_stack_size(char *str, unsigned long *_size) +{ + char *endptr; + unsigned long size; + unsigned long max_size = round_down(USHRT_MAX, sizeof(u64)); + + size = strtoul(str, &endptr, 0); + + do { + if (*endptr) + break; + + size = round_up(size, sizeof(u64)); + if (!size || size > max_size) + break; + + *_size = size; + return 0; + + } while (0); + + pr_err("callchain: Incorrect stack dump size (max %ld): %s\n", + max_size, str); + return -1; +} +#endif /* !NO_LIBUNWIND_SUPPORT */ + +static int +parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg, + int unset) +{ + struct perf_record *rec = (struct perf_record *)opt->value; + char *tok, *name, *saveptr = NULL; + char *buf; + int ret = -1; + + /* --no-call-graph */ + if (unset) + return 0; + + /* We specified default option if none is provided. */ + BUG_ON(!arg); + + /* We need buffer that we know we can write to. */ + buf = malloc(strlen(arg) + 1); + if (!buf) + return -ENOMEM; + + strcpy(buf, arg); + + tok = strtok_r((char *)buf, ",", &saveptr); + name = tok ? : (char *)buf; + + do { + /* Framepointer style */ + if (!strncmp(name, "fp", sizeof("fp"))) { + if (!strtok_r(NULL, ",", &saveptr)) { + rec->opts.call_graph = CALLCHAIN_FP; + ret = 0; + } else + pr_err("callchain: No more arguments " + "needed for -g fp\n"); + break; + +#ifndef NO_LIBUNWIND_SUPPORT + /* Dwarf style */ + } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { + ret = 0; + rec->opts.call_graph = CALLCHAIN_DWARF; + rec->opts.stack_dump_size = default_stack_dump_size; + + tok = strtok_r(NULL, ",", &saveptr); + if (tok) { + unsigned long size = 0; + + ret = get_stack_size(tok, &size); + rec->opts.stack_dump_size = size; + } + + if (!ret) + pr_debug("callchain: stack dump size %d\n", + rec->opts.stack_dump_size); +#endif /* !NO_LIBUNWIND_SUPPORT */ + } else { + pr_err("callchain: Unknown -g option " + "value: %s\n", arg); + break; + } + + } while (0); + + free(buf); + + if (!ret) + pr_debug("callchain: type %d\n", rec->opts.call_graph); + + return ret; +} + static const char * const record_usage[] = { "perf record [<options>] [<command>]", "perf record [<options>] -- <command> [<options>]", @@ -803,8 +971,9 @@ const struct option record_options[] = { "number of mmap data pages"), OPT_BOOLEAN(0, "group", &record.opts.group, "put the counters into a counter group"), - OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph, - "do call-graph (stack chain/backtrace) recording"), + OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]", + callchain_help, &parse_callchain_opt, + "fp"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), @@ -836,7 +1005,7 @@ const struct option record_options[] = { OPT_END() }; -int cmd_record(int argc, const char **argv, const char *prefix __used) +int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) { int err = -ENOMEM; struct perf_evsel *pos; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 7c88a243b5d..1da243dfbc3 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -69,8 +69,8 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { - err = machine__resolve_callchain(machine, al->thread, - sample->callchain, &parent); + err = machine__resolve_callchain(machine, evsel, al->thread, + sample, &parent); if (err) return err; } @@ -93,7 +93,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, struct annotation *notes; err = -ENOMEM; bx = he->branch_info; - if (bx->from.sym && use_browser > 0) { + if (bx->from.sym && use_browser == 1 && sort__has_sym) { notes = symbol__annotation(bx->from.sym); if (!notes->src && symbol__alloc_hist(bx->from.sym) < 0) @@ -107,7 +107,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, goto out; } - if (bx->to.sym && use_browser > 0) { + if (bx->to.sym && use_browser == 1 && sort__has_sym) { notes = symbol__annotation(bx->to.sym); if (!notes->src && symbol__alloc_hist(bx->to.sym) < 0) @@ -140,8 +140,8 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, struct hist_entry *he; if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { - err = machine__resolve_callchain(machine, al->thread, - sample->callchain, &parent); + err = machine__resolve_callchain(machine, evsel, al->thread, + sample, &parent); if (err) return err; } @@ -162,7 +162,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, * so we don't allocated the extra space needed because the stdio * code will not use it. */ - if (he->ms.sym != NULL && use_browser > 0) { + if (he->ms.sym != NULL && use_browser == 1 && sort__has_sym) { struct annotation *notes = symbol__annotation(he->ms.sym); assert(evsel != NULL); @@ -223,9 +223,9 @@ static int process_sample_event(struct perf_tool *tool, static int process_read_event(struct perf_tool *tool, union perf_event *event, - struct perf_sample *sample __used, + struct perf_sample *sample __maybe_unused, struct perf_evsel *evsel, - struct machine *machine __used) + struct machine *machine __maybe_unused) { struct perf_report *rep = container_of(tool, struct perf_report, tool); @@ -287,7 +287,7 @@ static int perf_report__setup_sample_type(struct perf_report *rep) extern volatile int session_done; -static void sig_handler(int sig __used) +static void sig_handler(int sig __maybe_unused) { session_done = 1; } @@ -397,17 +397,17 @@ static int __cmd_report(struct perf_report *rep) desc); } - if (dump_trace) { - perf_session__fprintf_nr_events(session, stdout); - goto out_delete; - } - if (verbose > 3) perf_session__fprintf(session, stdout); if (verbose > 2) perf_session__fprintf_dsos(session, stdout); + if (dump_trace) { + perf_session__fprintf_nr_events(session, stdout); + goto out_delete; + } + nr_samples = 0; list_for_each_entry(pos, &session->evlist->entries, node) { struct hists *hists = &pos->hists; @@ -533,13 +533,14 @@ setup: } static int -parse_branch_mode(const struct option *opt __used, const char *str __used, int unset) +parse_branch_mode(const struct option *opt __maybe_unused, + const char *str __maybe_unused, int unset) { sort__branch_mode = !unset; return 0; } -int cmd_report(int argc, const char **argv, const char *prefix __used) +int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) { struct perf_session *session; struct stat st; @@ -638,6 +639,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) "Show a column with the sum of periods"), OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "", "use branch records for histogram filling", parse_branch_mode), + OPT_STRING(0, "objdump", &objdump_path, "path", + "objdump binary to use for disassembly and annotations"), OPT_END() }; @@ -686,15 +689,19 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) if (strcmp(report.input_name, "-") != 0) setup_browser(true); - else + else { use_browser = 0; + perf_hpp__init(false, false); + } + + setup_sorting(report_usage, options); /* * Only in the newt browser we are doing integrated annotation, * so don't allocate extra space that won't be used in the stdio * implementation. */ - if (use_browser > 0) { + if (use_browser == 1 && sort__has_sym) { symbol_conf.priv_size = sizeof(struct annotation); report.annotate_init = symbol__annotate_init; /* @@ -717,8 +724,6 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) if (symbol__init() < 0) goto error; - setup_sorting(report_usage, options); - if (parent_pattern != default_parent_pattern) { if (sort_dimension__add("parent") < 0) goto error; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 7a9ad2b1ee7..9b9e32eaa80 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -23,31 +23,12 @@ #include <pthread.h> #include <math.h> -static const char *input_name; - -static char default_sort_order[] = "avg, max, switch, runtime"; -static const char *sort_order = default_sort_order; - -static int profile_cpu = -1; - #define PR_SET_NAME 15 /* Set process name */ #define MAX_CPUS 4096 - -static u64 run_measurement_overhead; -static u64 sleep_measurement_overhead; - #define COMM_LEN 20 #define SYM_LEN 129 - #define MAX_PID 65536 -static unsigned long nr_tasks; - -struct perf_sched { - struct perf_tool tool; - struct perf_session *session; -}; - struct sched_atom; struct task_desc { @@ -85,44 +66,6 @@ struct sched_atom { struct task_desc *wakee; }; -static struct task_desc *pid_to_task[MAX_PID]; - -static struct task_desc **tasks; - -static pthread_mutex_t start_work_mutex = PTHREAD_MUTEX_INITIALIZER; -static u64 start_time; - -static pthread_mutex_t work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER; - -static unsigned long nr_run_events; -static unsigned long nr_sleep_events; -static unsigned long nr_wakeup_events; - -static unsigned long nr_sleep_corrections; -static unsigned long nr_run_events_optimized; - -static unsigned long targetless_wakeups; -static unsigned long multitarget_wakeups; - -static u64 cpu_usage; -static u64 runavg_cpu_usage; -static u64 parent_cpu_usage; -static u64 runavg_parent_cpu_usage; - -static unsigned long nr_runs; -static u64 sum_runtime; -static u64 sum_fluct; -static u64 run_avg; - -static unsigned int replay_repeat = 10; -static unsigned long nr_timestamps; -static unsigned long nr_unordered_timestamps; -static unsigned long nr_state_machine_bugs; -static unsigned long nr_context_switch_bugs; -static unsigned long nr_events; -static unsigned long nr_lost_chunks; -static unsigned long nr_lost_events; - #define TASK_STATE_TO_CHAR_STR "RSDTtZX" enum thread_state { @@ -154,11 +97,79 @@ struct work_atoms { typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); -static struct rb_root atom_root, sorted_atom_root; +struct perf_sched; + +struct trace_sched_handler { + int (*switch_event)(struct perf_sched *sched, struct perf_evsel *evsel, + struct perf_sample *sample, struct machine *machine); -static u64 all_runtime; -static u64 all_count; + int (*runtime_event)(struct perf_sched *sched, struct perf_evsel *evsel, + struct perf_sample *sample, struct machine *machine); + int (*wakeup_event)(struct perf_sched *sched, struct perf_evsel *evsel, + struct perf_sample *sample, struct machine *machine); + + int (*fork_event)(struct perf_sched *sched, struct perf_evsel *evsel, + struct perf_sample *sample); + + int (*migrate_task_event)(struct perf_sched *sched, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine); +}; + +struct perf_sched { + struct perf_tool tool; + const char *input_name; + const char *sort_order; + unsigned long nr_tasks; + struct task_desc *pid_to_task[MAX_PID]; + struct task_desc **tasks; + const struct trace_sched_handler *tp_handler; + pthread_mutex_t start_work_mutex; + pthread_mutex_t work_done_wait_mutex; + int profile_cpu; +/* + * Track the current task - that way we can know whether there's any + * weird events, such as a task being switched away that is not current. + */ + int max_cpu; + u32 curr_pid[MAX_CPUS]; + struct thread *curr_thread[MAX_CPUS]; + char next_shortname1; + char next_shortname2; + unsigned int replay_repeat; + unsigned long nr_run_events; + unsigned long nr_sleep_events; + unsigned long nr_wakeup_events; + unsigned long nr_sleep_corrections; + unsigned long nr_run_events_optimized; + unsigned long targetless_wakeups; + unsigned long multitarget_wakeups; + unsigned long nr_runs; + unsigned long nr_timestamps; + unsigned long nr_unordered_timestamps; + unsigned long nr_state_machine_bugs; + unsigned long nr_context_switch_bugs; + unsigned long nr_events; + unsigned long nr_lost_chunks; + unsigned long nr_lost_events; + u64 run_measurement_overhead; + u64 sleep_measurement_overhead; + u64 start_time; + u64 cpu_usage; + u64 runavg_cpu_usage; + u64 parent_cpu_usage; + u64 runavg_parent_cpu_usage; + u64 sum_runtime; + u64 sum_fluct; + u64 run_avg; + u64 all_runtime; + u64 all_count; + u64 cpu_last_switched[MAX_CPUS]; + struct rb_root atom_root, sorted_atom_root; + struct list_head sort_list, cmp_pid; +}; static u64 get_nsecs(void) { @@ -169,13 +180,13 @@ static u64 get_nsecs(void) return ts.tv_sec * 1000000000ULL + ts.tv_nsec; } -static void burn_nsecs(u64 nsecs) +static void burn_nsecs(struct perf_sched *sched, u64 nsecs) { u64 T0 = get_nsecs(), T1; do { T1 = get_nsecs(); - } while (T1 + run_measurement_overhead < T0 + nsecs); + } while (T1 + sched->run_measurement_overhead < T0 + nsecs); } static void sleep_nsecs(u64 nsecs) @@ -188,24 +199,24 @@ static void sleep_nsecs(u64 nsecs) nanosleep(&ts, NULL); } -static void calibrate_run_measurement_overhead(void) +static void calibrate_run_measurement_overhead(struct perf_sched *sched) { u64 T0, T1, delta, min_delta = 1000000000ULL; int i; for (i = 0; i < 10; i++) { T0 = get_nsecs(); - burn_nsecs(0); + burn_nsecs(sched, 0); T1 = get_nsecs(); delta = T1-T0; min_delta = min(min_delta, delta); } - run_measurement_overhead = min_delta; + sched->run_measurement_overhead = min_delta; printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta); } -static void calibrate_sleep_measurement_overhead(void) +static void calibrate_sleep_measurement_overhead(struct perf_sched *sched) { u64 T0, T1, delta, min_delta = 1000000000ULL; int i; @@ -218,7 +229,7 @@ static void calibrate_sleep_measurement_overhead(void) min_delta = min(min_delta, delta); } min_delta -= 10000; - sleep_measurement_overhead = min_delta; + sched->sleep_measurement_overhead = min_delta; printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta); } @@ -251,8 +262,8 @@ static struct sched_atom *last_event(struct task_desc *task) return task->atoms[task->nr_events - 1]; } -static void -add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) +static void add_sched_event_run(struct perf_sched *sched, struct task_desc *task, + u64 timestamp, u64 duration) { struct sched_atom *event, *curr_event = last_event(task); @@ -261,7 +272,7 @@ add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) * to it: */ if (curr_event && curr_event->type == SCHED_EVENT_RUN) { - nr_run_events_optimized++; + sched->nr_run_events_optimized++; curr_event->duration += duration; return; } @@ -271,12 +282,11 @@ add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) event->type = SCHED_EVENT_RUN; event->duration = duration; - nr_run_events++; + sched->nr_run_events++; } -static void -add_sched_event_wakeup(struct task_desc *task, u64 timestamp, - struct task_desc *wakee) +static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *task, + u64 timestamp, struct task_desc *wakee) { struct sched_atom *event, *wakee_event; @@ -286,11 +296,11 @@ add_sched_event_wakeup(struct task_desc *task, u64 timestamp, wakee_event = last_event(wakee); if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { - targetless_wakeups++; + sched->targetless_wakeups++; return; } if (wakee_event->wait_sem) { - multitarget_wakeups++; + sched->multitarget_wakeups++; return; } @@ -299,89 +309,89 @@ add_sched_event_wakeup(struct task_desc *task, u64 timestamp, wakee_event->specific_wait = 1; event->wait_sem = wakee_event->wait_sem; - nr_wakeup_events++; + sched->nr_wakeup_events++; } -static void -add_sched_event_sleep(struct task_desc *task, u64 timestamp, - u64 task_state __used) +static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task, + u64 timestamp, u64 task_state __maybe_unused) { struct sched_atom *event = get_new_event(task, timestamp); event->type = SCHED_EVENT_SLEEP; - nr_sleep_events++; + sched->nr_sleep_events++; } -static struct task_desc *register_pid(unsigned long pid, const char *comm) +static struct task_desc *register_pid(struct perf_sched *sched, + unsigned long pid, const char *comm) { struct task_desc *task; BUG_ON(pid >= MAX_PID); - task = pid_to_task[pid]; + task = sched->pid_to_task[pid]; if (task) return task; task = zalloc(sizeof(*task)); task->pid = pid; - task->nr = nr_tasks; + task->nr = sched->nr_tasks; strcpy(task->comm, comm); /* * every task starts in sleeping state - this gets ignored * if there's no wakeup pointing to this sleep state: */ - add_sched_event_sleep(task, 0, 0); + add_sched_event_sleep(sched, task, 0, 0); - pid_to_task[pid] = task; - nr_tasks++; - tasks = realloc(tasks, nr_tasks*sizeof(struct task_task *)); - BUG_ON(!tasks); - tasks[task->nr] = task; + sched->pid_to_task[pid] = task; + sched->nr_tasks++; + sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_task *)); + BUG_ON(!sched->tasks); + sched->tasks[task->nr] = task; if (verbose) - printf("registered task #%ld, PID %ld (%s)\n", nr_tasks, pid, comm); + printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm); return task; } -static void print_task_traces(void) +static void print_task_traces(struct perf_sched *sched) { struct task_desc *task; unsigned long i; - for (i = 0; i < nr_tasks; i++) { - task = tasks[i]; + for (i = 0; i < sched->nr_tasks; i++) { + task = sched->tasks[i]; printf("task %6ld (%20s:%10ld), nr_events: %ld\n", task->nr, task->comm, task->pid, task->nr_events); } } -static void add_cross_task_wakeups(void) +static void add_cross_task_wakeups(struct perf_sched *sched) { struct task_desc *task1, *task2; unsigned long i, j; - for (i = 0; i < nr_tasks; i++) { - task1 = tasks[i]; + for (i = 0; i < sched->nr_tasks; i++) { + task1 = sched->tasks[i]; j = i + 1; - if (j == nr_tasks) + if (j == sched->nr_tasks) j = 0; - task2 = tasks[j]; - add_sched_event_wakeup(task1, 0, task2); + task2 = sched->tasks[j]; + add_sched_event_wakeup(sched, task1, 0, task2); } } -static void -process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) +static void perf_sched__process_event(struct perf_sched *sched, + struct sched_atom *atom) { int ret = 0; switch (atom->type) { case SCHED_EVENT_RUN: - burn_nsecs(atom->duration); + burn_nsecs(sched, atom->duration); break; case SCHED_EVENT_SLEEP: if (atom->wait_sem) @@ -428,8 +438,8 @@ static int self_open_counters(void) fd = sys_perf_event_open(&attr, 0, -1, -1, 0); if (fd < 0) - die("Error: sys_perf_event_open() syscall returned" - "with %d (%s)\n", fd, strerror(errno)); + pr_err("Error: sys_perf_event_open() syscall returned " + "with %d (%s)\n", fd, strerror(errno)); return fd; } @@ -444,31 +454,41 @@ static u64 get_cpu_usage_nsec_self(int fd) return runtime; } +struct sched_thread_parms { + struct task_desc *task; + struct perf_sched *sched; +}; + static void *thread_func(void *ctx) { - struct task_desc *this_task = ctx; + struct sched_thread_parms *parms = ctx; + struct task_desc *this_task = parms->task; + struct perf_sched *sched = parms->sched; u64 cpu_usage_0, cpu_usage_1; unsigned long i, ret; char comm2[22]; int fd; + free(parms); + sprintf(comm2, ":%s", this_task->comm); prctl(PR_SET_NAME, comm2); fd = self_open_counters(); - + if (fd < 0) + return NULL; again: ret = sem_post(&this_task->ready_for_work); BUG_ON(ret); - ret = pthread_mutex_lock(&start_work_mutex); + ret = pthread_mutex_lock(&sched->start_work_mutex); BUG_ON(ret); - ret = pthread_mutex_unlock(&start_work_mutex); + ret = pthread_mutex_unlock(&sched->start_work_mutex); BUG_ON(ret); cpu_usage_0 = get_cpu_usage_nsec_self(fd); for (i = 0; i < this_task->nr_events; i++) { this_task->curr_event = i; - process_sched_event(this_task, this_task->atoms[i]); + perf_sched__process_event(sched, this_task->atoms[i]); } cpu_usage_1 = get_cpu_usage_nsec_self(fd); @@ -476,15 +496,15 @@ again: ret = sem_post(&this_task->work_done_sem); BUG_ON(ret); - ret = pthread_mutex_lock(&work_done_wait_mutex); + ret = pthread_mutex_lock(&sched->work_done_wait_mutex); BUG_ON(ret); - ret = pthread_mutex_unlock(&work_done_wait_mutex); + ret = pthread_mutex_unlock(&sched->work_done_wait_mutex); BUG_ON(ret); goto again; } -static void create_tasks(void) +static void create_tasks(struct perf_sched *sched) { struct task_desc *task; pthread_attr_t attr; @@ -496,128 +516,129 @@ static void create_tasks(void) err = pthread_attr_setstacksize(&attr, (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); BUG_ON(err); - err = pthread_mutex_lock(&start_work_mutex); + err = pthread_mutex_lock(&sched->start_work_mutex); BUG_ON(err); - err = pthread_mutex_lock(&work_done_wait_mutex); + err = pthread_mutex_lock(&sched->work_done_wait_mutex); BUG_ON(err); - for (i = 0; i < nr_tasks; i++) { - task = tasks[i]; + for (i = 0; i < sched->nr_tasks; i++) { + struct sched_thread_parms *parms = malloc(sizeof(*parms)); + BUG_ON(parms == NULL); + parms->task = task = sched->tasks[i]; + parms->sched = sched; sem_init(&task->sleep_sem, 0, 0); sem_init(&task->ready_for_work, 0, 0); sem_init(&task->work_done_sem, 0, 0); task->curr_event = 0; - err = pthread_create(&task->thread, &attr, thread_func, task); + err = pthread_create(&task->thread, &attr, thread_func, parms); BUG_ON(err); } } -static void wait_for_tasks(void) +static void wait_for_tasks(struct perf_sched *sched) { u64 cpu_usage_0, cpu_usage_1; struct task_desc *task; unsigned long i, ret; - start_time = get_nsecs(); - cpu_usage = 0; - pthread_mutex_unlock(&work_done_wait_mutex); + sched->start_time = get_nsecs(); + sched->cpu_usage = 0; + pthread_mutex_unlock(&sched->work_done_wait_mutex); - for (i = 0; i < nr_tasks; i++) { - task = tasks[i]; + for (i = 0; i < sched->nr_tasks; i++) { + task = sched->tasks[i]; ret = sem_wait(&task->ready_for_work); BUG_ON(ret); sem_init(&task->ready_for_work, 0, 0); } - ret = pthread_mutex_lock(&work_done_wait_mutex); + ret = pthread_mutex_lock(&sched->work_done_wait_mutex); BUG_ON(ret); cpu_usage_0 = get_cpu_usage_nsec_parent(); - pthread_mutex_unlock(&start_work_mutex); + pthread_mutex_unlock(&sched->start_work_mutex); - for (i = 0; i < nr_tasks; i++) { - task = tasks[i]; + for (i = 0; i < sched->nr_tasks; i++) { + task = sched->tasks[i]; ret = sem_wait(&task->work_done_sem); BUG_ON(ret); sem_init(&task->work_done_sem, 0, 0); - cpu_usage += task->cpu_usage; + sched->cpu_usage += task->cpu_usage; task->cpu_usage = 0; } cpu_usage_1 = get_cpu_usage_nsec_parent(); - if (!runavg_cpu_usage) - runavg_cpu_usage = cpu_usage; - runavg_cpu_usage = (runavg_cpu_usage*9 + cpu_usage)/10; + if (!sched->runavg_cpu_usage) + sched->runavg_cpu_usage = sched->cpu_usage; + sched->runavg_cpu_usage = (sched->runavg_cpu_usage * 9 + sched->cpu_usage) / 10; - parent_cpu_usage = cpu_usage_1 - cpu_usage_0; - if (!runavg_parent_cpu_usage) - runavg_parent_cpu_usage = parent_cpu_usage; - runavg_parent_cpu_usage = (runavg_parent_cpu_usage*9 + - parent_cpu_usage)/10; + sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0; + if (!sched->runavg_parent_cpu_usage) + sched->runavg_parent_cpu_usage = sched->parent_cpu_usage; + sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * 9 + + sched->parent_cpu_usage)/10; - ret = pthread_mutex_lock(&start_work_mutex); + ret = pthread_mutex_lock(&sched->start_work_mutex); BUG_ON(ret); - for (i = 0; i < nr_tasks; i++) { - task = tasks[i]; + for (i = 0; i < sched->nr_tasks; i++) { + task = sched->tasks[i]; sem_init(&task->sleep_sem, 0, 0); task->curr_event = 0; } } -static void run_one_test(void) +static void run_one_test(struct perf_sched *sched) { u64 T0, T1, delta, avg_delta, fluct; T0 = get_nsecs(); - wait_for_tasks(); + wait_for_tasks(sched); T1 = get_nsecs(); delta = T1 - T0; - sum_runtime += delta; - nr_runs++; + sched->sum_runtime += delta; + sched->nr_runs++; - avg_delta = sum_runtime / nr_runs; + avg_delta = sched->sum_runtime / sched->nr_runs; if (delta < avg_delta) fluct = avg_delta - delta; else fluct = delta - avg_delta; - sum_fluct += fluct; - if (!run_avg) - run_avg = delta; - run_avg = (run_avg*9 + delta)/10; + sched->sum_fluct += fluct; + if (!sched->run_avg) + sched->run_avg = delta; + sched->run_avg = (sched->run_avg * 9 + delta) / 10; - printf("#%-3ld: %0.3f, ", - nr_runs, (double)delta/1000000.0); + printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / 1000000.0); - printf("ravg: %0.2f, ", - (double)run_avg/1e6); + printf("ravg: %0.2f, ", (double)sched->run_avg / 1e6); printf("cpu: %0.2f / %0.2f", - (double)cpu_usage/1e6, (double)runavg_cpu_usage/1e6); + (double)sched->cpu_usage / 1e6, (double)sched->runavg_cpu_usage / 1e6); #if 0 /* * rusage statistics done by the parent, these are less - * accurate than the sum_exec_runtime based statistics: + * accurate than the sched->sum_exec_runtime based statistics: */ printf(" [%0.2f / %0.2f]", - (double)parent_cpu_usage/1e6, - (double)runavg_parent_cpu_usage/1e6); + (double)sched->parent_cpu_usage/1e6, + (double)sched->runavg_parent_cpu_usage/1e6); #endif printf("\n"); - if (nr_sleep_corrections) - printf(" (%ld sleep corrections)\n", nr_sleep_corrections); - nr_sleep_corrections = 0; + if (sched->nr_sleep_corrections) + printf(" (%ld sleep corrections)\n", sched->nr_sleep_corrections); + sched->nr_sleep_corrections = 0; } -static void test_calibrations(void) +static void test_calibrations(struct perf_sched *sched) { u64 T0, T1; T0 = get_nsecs(); - burn_nsecs(1e6); + burn_nsecs(sched, 1e6); T1 = get_nsecs(); printf("the run test took %" PRIu64 " nsecs\n", T1 - T0); @@ -629,236 +650,92 @@ static void test_calibrations(void) printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0); } -#define FILL_FIELD(ptr, field, event, data) \ - ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data) - -#define FILL_ARRAY(ptr, array, event, data) \ -do { \ - void *__array = raw_field_ptr(event, #array, data); \ - memcpy(ptr.array, __array, sizeof(ptr.array)); \ -} while(0) - -#define FILL_COMMON_FIELDS(ptr, event, data) \ -do { \ - FILL_FIELD(ptr, common_type, event, data); \ - FILL_FIELD(ptr, common_flags, event, data); \ - FILL_FIELD(ptr, common_preempt_count, event, data); \ - FILL_FIELD(ptr, common_pid, event, data); \ - FILL_FIELD(ptr, common_tgid, event, data); \ -} while (0) - - - -struct trace_switch_event { - u32 size; - - u16 common_type; - u8 common_flags; - u8 common_preempt_count; - u32 common_pid; - u32 common_tgid; - - char prev_comm[16]; - u32 prev_pid; - u32 prev_prio; - u64 prev_state; - char next_comm[16]; - u32 next_pid; - u32 next_prio; -}; - -struct trace_runtime_event { - u32 size; - - u16 common_type; - u8 common_flags; - u8 common_preempt_count; - u32 common_pid; - u32 common_tgid; - - char comm[16]; - u32 pid; - u64 runtime; - u64 vruntime; -}; - -struct trace_wakeup_event { - u32 size; - - u16 common_type; - u8 common_flags; - u8 common_preempt_count; - u32 common_pid; - u32 common_tgid; - - char comm[16]; - u32 pid; - - u32 prio; - u32 success; - u32 cpu; -}; - -struct trace_fork_event { - u32 size; - - u16 common_type; - u8 common_flags; - u8 common_preempt_count; - u32 common_pid; - u32 common_tgid; - - char parent_comm[16]; - u32 parent_pid; - char child_comm[16]; - u32 child_pid; -}; - -struct trace_migrate_task_event { - u32 size; - - u16 common_type; - u8 common_flags; - u8 common_preempt_count; - u32 common_pid; - u32 common_tgid; - - char comm[16]; - u32 pid; - - u32 prio; - u32 cpu; -}; - -struct trace_sched_handler { - void (*switch_event)(struct trace_switch_event *, - struct machine *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); - - void (*runtime_event)(struct trace_runtime_event *, - struct machine *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); - - void (*wakeup_event)(struct trace_wakeup_event *, - struct machine *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); - - void (*fork_event)(struct trace_fork_event *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); - - void (*migrate_task_event)(struct trace_migrate_task_event *, - struct machine *machine, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); -}; - - -static void -replay_wakeup_event(struct trace_wakeup_event *wakeup_event, - struct machine *machine __used, - struct event_format *event, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +static int +replay_wakeup_event(struct perf_sched *sched, + struct perf_evsel *evsel, struct perf_sample *sample, + struct machine *machine __maybe_unused) { + const char *comm = perf_evsel__strval(evsel, sample, "comm"); + const u32 pid = perf_evsel__intval(evsel, sample, "pid"); struct task_desc *waker, *wakee; if (verbose) { - printf("sched_wakeup event %p\n", event); + printf("sched_wakeup event %p\n", evsel); - printf(" ... pid %d woke up %s/%d\n", - wakeup_event->common_pid, - wakeup_event->comm, - wakeup_event->pid); + printf(" ... pid %d woke up %s/%d\n", sample->tid, comm, pid); } - waker = register_pid(wakeup_event->common_pid, "<unknown>"); - wakee = register_pid(wakeup_event->pid, wakeup_event->comm); + waker = register_pid(sched, sample->tid, "<unknown>"); + wakee = register_pid(sched, pid, comm); - add_sched_event_wakeup(waker, timestamp, wakee); + add_sched_event_wakeup(sched, waker, sample->time, wakee); + return 0; } -static u64 cpu_last_switched[MAX_CPUS]; - -static void -replay_switch_event(struct trace_switch_event *switch_event, - struct machine *machine __used, - struct event_format *event, - int cpu, - u64 timestamp, - struct thread *thread __used) +static int replay_switch_event(struct perf_sched *sched, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine __maybe_unused) { - struct task_desc *prev, __used *next; - u64 timestamp0; + const char *prev_comm = perf_evsel__strval(evsel, sample, "prev_comm"), + *next_comm = perf_evsel__strval(evsel, sample, "next_comm"); + const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), + next_pid = perf_evsel__intval(evsel, sample, "next_pid"); + const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state"); + struct task_desc *prev, __maybe_unused *next; + u64 timestamp0, timestamp = sample->time; + int cpu = sample->cpu; s64 delta; if (verbose) - printf("sched_switch event %p\n", event); + printf("sched_switch event %p\n", evsel); if (cpu >= MAX_CPUS || cpu < 0) - return; + return 0; - timestamp0 = cpu_last_switched[cpu]; + timestamp0 = sched->cpu_last_switched[cpu]; if (timestamp0) delta = timestamp - timestamp0; else delta = 0; - if (delta < 0) - die("hm, delta: %" PRIu64 " < 0 ?\n", delta); - - if (verbose) { - printf(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", - switch_event->prev_comm, switch_event->prev_pid, - switch_event->next_comm, switch_event->next_pid, - delta); + if (delta < 0) { + pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); + return -1; } - prev = register_pid(switch_event->prev_pid, switch_event->prev_comm); - next = register_pid(switch_event->next_pid, switch_event->next_comm); + pr_debug(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n", + prev_comm, prev_pid, next_comm, next_pid, delta); - cpu_last_switched[cpu] = timestamp; + prev = register_pid(sched, prev_pid, prev_comm); + next = register_pid(sched, next_pid, next_comm); - add_sched_event_run(prev, timestamp, delta); - add_sched_event_sleep(prev, timestamp, switch_event->prev_state); -} + sched->cpu_last_switched[cpu] = timestamp; + add_sched_event_run(sched, prev, timestamp, delta); + add_sched_event_sleep(sched, prev, timestamp, prev_state); -static void -replay_fork_event(struct trace_fork_event *fork_event, - struct event_format *event, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) + return 0; +} + +static int replay_fork_event(struct perf_sched *sched, struct perf_evsel *evsel, + struct perf_sample *sample) { + const char *parent_comm = perf_evsel__strval(evsel, sample, "parent_comm"), + *child_comm = perf_evsel__strval(evsel, sample, "child_comm"); + const u32 parent_pid = perf_evsel__intval(evsel, sample, "parent_pid"), + child_pid = perf_evsel__intval(evsel, sample, "child_pid"); + if (verbose) { - printf("sched_fork event %p\n", event); - printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid); - printf("... child: %s/%d\n", fork_event->child_comm, fork_event->child_pid); + printf("sched_fork event %p\n", evsel); + printf("... parent: %s/%d\n", parent_comm, parent_pid); + printf("... child: %s/%d\n", child_comm, child_pid); } - register_pid(fork_event->parent_pid, fork_event->parent_comm); - register_pid(fork_event->child_pid, fork_event->child_comm); -} -static struct trace_sched_handler replay_ops = { - .wakeup_event = replay_wakeup_event, - .switch_event = replay_switch_event, - .fork_event = replay_fork_event, -}; + register_pid(sched, parent_pid, parent_comm); + register_pid(sched, child_pid, child_comm); + return 0; +} struct sort_dimension { const char *name; @@ -866,8 +743,6 @@ struct sort_dimension { struct list_head list; }; -static LIST_HEAD(cmp_pid); - static int thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r) { @@ -936,43 +811,45 @@ __thread_latency_insert(struct rb_root *root, struct work_atoms *data, rb_insert_color(&data->node, root); } -static void thread_atoms_insert(struct thread *thread) +static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) { struct work_atoms *atoms = zalloc(sizeof(*atoms)); - if (!atoms) - die("No memory"); + if (!atoms) { + pr_err("No memory at %s\n", __func__); + return -1; + } atoms->thread = thread; INIT_LIST_HEAD(&atoms->work_list); - __thread_latency_insert(&atom_root, atoms, &cmp_pid); + __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid); + return 0; } -static void -latency_fork_event(struct trace_fork_event *fork_event __used, - struct event_format *event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +static int latency_fork_event(struct perf_sched *sched __maybe_unused, + struct perf_evsel *evsel __maybe_unused, + struct perf_sample *sample __maybe_unused) { /* should insert the newcomer */ + return 0; } -__used -static char sched_out_state(struct trace_switch_event *switch_event) +static char sched_out_state(u64 prev_state) { const char *str = TASK_STATE_TO_CHAR_STR; - return str[switch_event->prev_state]; + return str[prev_state]; } -static void +static int add_sched_out_event(struct work_atoms *atoms, char run_state, u64 timestamp) { struct work_atom *atom = zalloc(sizeof(*atom)); - if (!atom) - die("Non memory"); + if (!atom) { + pr_err("Non memory at %s", __func__); + return -1; + } atom->sched_out_time = timestamp; @@ -982,10 +859,12 @@ add_sched_out_event(struct work_atoms *atoms, } list_add_tail(&atom->list, &atoms->work_list); + return 0; } static void -add_runtime_event(struct work_atoms *atoms, u64 delta, u64 timestamp __used) +add_runtime_event(struct work_atoms *atoms, u64 delta, + u64 timestamp __maybe_unused) { struct work_atom *atom; @@ -1028,106 +907,128 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp) atoms->nb_atoms++; } -static void -latency_switch_event(struct trace_switch_event *switch_event, - struct machine *machine, - struct event_format *event __used, - int cpu, - u64 timestamp, - struct thread *thread __used) +static int latency_switch_event(struct perf_sched *sched, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine) { + const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), + next_pid = perf_evsel__intval(evsel, sample, "next_pid"); + const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state"); struct work_atoms *out_events, *in_events; struct thread *sched_out, *sched_in; - u64 timestamp0; + u64 timestamp0, timestamp = sample->time; + int cpu = sample->cpu; s64 delta; BUG_ON(cpu >= MAX_CPUS || cpu < 0); - timestamp0 = cpu_last_switched[cpu]; - cpu_last_switched[cpu] = timestamp; + timestamp0 = sched->cpu_last_switched[cpu]; + sched->cpu_last_switched[cpu] = timestamp; if (timestamp0) delta = timestamp - timestamp0; else delta = 0; - if (delta < 0) - die("hm, delta: %" PRIu64 " < 0 ?\n", delta); - + if (delta < 0) { + pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); + return -1; + } - sched_out = machine__findnew_thread(machine, switch_event->prev_pid); - sched_in = machine__findnew_thread(machine, switch_event->next_pid); + sched_out = machine__findnew_thread(machine, prev_pid); + sched_in = machine__findnew_thread(machine, next_pid); - out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); + out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); if (!out_events) { - thread_atoms_insert(sched_out); - out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); - if (!out_events) - die("out-event: Internal tree error"); + if (thread_atoms_insert(sched, sched_out)) + return -1; + out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); + if (!out_events) { + pr_err("out-event: Internal tree error"); + return -1; + } } - add_sched_out_event(out_events, sched_out_state(switch_event), timestamp); + if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp)) + return -1; - in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); + in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); if (!in_events) { - thread_atoms_insert(sched_in); - in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid); - if (!in_events) - die("in-event: Internal tree error"); + if (thread_atoms_insert(sched, sched_in)) + return -1; + in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); + if (!in_events) { + pr_err("in-event: Internal tree error"); + return -1; + } /* * Take came in we have not heard about yet, * add in an initial atom in runnable state: */ - add_sched_out_event(in_events, 'R', timestamp); + if (add_sched_out_event(in_events, 'R', timestamp)) + return -1; } add_sched_in_event(in_events, timestamp); + + return 0; } -static void -latency_runtime_event(struct trace_runtime_event *runtime_event, - struct machine *machine, - struct event_format *event __used, - int cpu, - u64 timestamp, - struct thread *this_thread __used) +static int latency_runtime_event(struct perf_sched *sched, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine) { - struct thread *thread = machine__findnew_thread(machine, runtime_event->pid); - struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); + const u32 pid = perf_evsel__intval(evsel, sample, "pid"); + const u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); + struct thread *thread = machine__findnew_thread(machine, pid); + struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); + u64 timestamp = sample->time; + int cpu = sample->cpu; BUG_ON(cpu >= MAX_CPUS || cpu < 0); if (!atoms) { - thread_atoms_insert(thread); - atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); - if (!atoms) - die("in-event: Internal tree error"); - add_sched_out_event(atoms, 'R', timestamp); + if (thread_atoms_insert(sched, thread)) + return -1; + atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); + if (!atoms) { + pr_err("in-event: Internal tree error"); + return -1; + } + if (add_sched_out_event(atoms, 'R', timestamp)) + return -1; } - add_runtime_event(atoms, runtime_event->runtime, timestamp); + add_runtime_event(atoms, runtime, timestamp); + return 0; } -static void -latency_wakeup_event(struct trace_wakeup_event *wakeup_event, - struct machine *machine, - struct event_format *__event __used, - int cpu __used, - u64 timestamp, - struct thread *thread __used) +static int latency_wakeup_event(struct perf_sched *sched, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine) { + const u32 pid = perf_evsel__intval(evsel, sample, "pid"), + success = perf_evsel__intval(evsel, sample, "success"); struct work_atoms *atoms; struct work_atom *atom; struct thread *wakee; + u64 timestamp = sample->time; /* Note for later, it may be interesting to observe the failing cases */ - if (!wakeup_event->success) - return; + if (!success) + return 0; - wakee = machine__findnew_thread(machine, wakeup_event->pid); - atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); + wakee = machine__findnew_thread(machine, pid); + atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); if (!atoms) { - thread_atoms_insert(wakee); - atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); - if (!atoms) - die("wakeup-event: Internal tree error"); - add_sched_out_event(atoms, 'S', timestamp); + if (thread_atoms_insert(sched, wakee)) + return -1; + atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); + if (!atoms) { + pr_err("wakeup-event: Internal tree error"); + return -1; + } + if (add_sched_out_event(atoms, 'S', timestamp)) + return -1; } BUG_ON(list_empty(&atoms->work_list)); @@ -1139,27 +1040,27 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, * one CPU, or are only looking at only one, so don't * make useless noise. */ - if (profile_cpu == -1 && atom->state != THREAD_SLEEPING) - nr_state_machine_bugs++; + if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING) + sched->nr_state_machine_bugs++; - nr_timestamps++; + sched->nr_timestamps++; if (atom->sched_out_time > timestamp) { - nr_unordered_timestamps++; - return; + sched->nr_unordered_timestamps++; + return 0; } atom->state = THREAD_WAIT_CPU; atom->wake_up_time = timestamp; + return 0; } -static void -latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, - struct machine *machine, - struct event_format *__event __used, - int cpu __used, - u64 timestamp, - struct thread *thread __used) +static int latency_migrate_task_event(struct perf_sched *sched, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine) { + const u32 pid = perf_evsel__intval(evsel, sample, "pid"); + u64 timestamp = sample->time; struct work_atoms *atoms; struct work_atom *atom; struct thread *migrant; @@ -1167,18 +1068,22 @@ latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, /* * Only need to worry about migration when profiling one CPU. */ - if (profile_cpu == -1) - return; + if (sched->profile_cpu == -1) + return 0; - migrant = machine__findnew_thread(machine, migrate_task_event->pid); - atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); + migrant = machine__findnew_thread(machine, pid); + atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); if (!atoms) { - thread_atoms_insert(migrant); - register_pid(migrant->pid, migrant->comm); - atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); - if (!atoms) - die("migration-event: Internal tree error"); - add_sched_out_event(atoms, 'R', timestamp); + if (thread_atoms_insert(sched, migrant)) + return -1; + register_pid(sched, migrant->pid, migrant->comm); + atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); + if (!atoms) { + pr_err("migration-event: Internal tree error"); + return -1; + } + if (add_sched_out_event(atoms, 'R', timestamp)) + return -1; } BUG_ON(list_empty(&atoms->work_list)); @@ -1186,21 +1091,15 @@ latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, atom = list_entry(atoms->work_list.prev, struct work_atom, list); atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; - nr_timestamps++; + sched->nr_timestamps++; if (atom->sched_out_time > timestamp) - nr_unordered_timestamps++; -} + sched->nr_unordered_timestamps++; -static struct trace_sched_handler lat_ops = { - .wakeup_event = latency_wakeup_event, - .switch_event = latency_switch_event, - .runtime_event = latency_runtime_event, - .fork_event = latency_fork_event, - .migrate_task_event = latency_migrate_task_event, -}; + return 0; +} -static void output_lat_thread(struct work_atoms *work_list) +static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list) { int i; int ret; @@ -1214,8 +1113,8 @@ static void output_lat_thread(struct work_atoms *work_list) if (!strcmp(work_list->thread->comm, "swapper")) return; - all_runtime += work_list->total_runtime; - all_count += work_list->nb_atoms; + sched->all_runtime += work_list->total_runtime; + sched->all_count += work_list->nb_atoms; ret = printf(" %s:%d ", work_list->thread->comm, work_list->thread->pid); @@ -1241,11 +1140,6 @@ static int pid_cmp(struct work_atoms *l, struct work_atoms *r) return 0; } -static struct sort_dimension pid_sort_dimension = { - .name = "pid", - .cmp = pid_cmp, -}; - static int avg_cmp(struct work_atoms *l, struct work_atoms *r) { u64 avgl, avgr; @@ -1267,11 +1161,6 @@ static int avg_cmp(struct work_atoms *l, struct work_atoms *r) return 0; } -static struct sort_dimension avg_sort_dimension = { - .name = "avg", - .cmp = avg_cmp, -}; - static int max_cmp(struct work_atoms *l, struct work_atoms *r) { if (l->max_lat < r->max_lat) @@ -1282,11 +1171,6 @@ static int max_cmp(struct work_atoms *l, struct work_atoms *r) return 0; } -static struct sort_dimension max_sort_dimension = { - .name = "max", - .cmp = max_cmp, -}; - static int switch_cmp(struct work_atoms *l, struct work_atoms *r) { if (l->nb_atoms < r->nb_atoms) @@ -1297,11 +1181,6 @@ static int switch_cmp(struct work_atoms *l, struct work_atoms *r) return 0; } -static struct sort_dimension switch_sort_dimension = { - .name = "switch", - .cmp = switch_cmp, -}; - static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) { if (l->total_runtime < r->total_runtime) @@ -1312,28 +1191,38 @@ static int runtime_cmp(struct work_atoms *l, struct work_atoms *r) return 0; } -static struct sort_dimension runtime_sort_dimension = { - .name = "runtime", - .cmp = runtime_cmp, -}; - -static struct sort_dimension *available_sorts[] = { - &pid_sort_dimension, - &avg_sort_dimension, - &max_sort_dimension, - &switch_sort_dimension, - &runtime_sort_dimension, -}; - -#define NB_AVAILABLE_SORTS (int)(sizeof(available_sorts) / sizeof(struct sort_dimension *)) - -static LIST_HEAD(sort_list); - static int sort_dimension__add(const char *tok, struct list_head *list) { - int i; + size_t i; + static struct sort_dimension avg_sort_dimension = { + .name = "avg", + .cmp = avg_cmp, + }; + static struct sort_dimension max_sort_dimension = { + .name = "max", + .cmp = max_cmp, + }; + static struct sort_dimension pid_sort_dimension = { + .name = "pid", + .cmp = pid_cmp, + }; + static struct sort_dimension runtime_sort_dimension = { + .name = "runtime", + .cmp = runtime_cmp, + }; + static struct sort_dimension switch_sort_dimension = { + .name = "switch", + .cmp = switch_cmp, + }; + struct sort_dimension *available_sorts[] = { + &pid_sort_dimension, + &avg_sort_dimension, + &max_sort_dimension, + &switch_sort_dimension, + &runtime_sort_dimension, + }; - for (i = 0; i < NB_AVAILABLE_SORTS; i++) { + for (i = 0; i < ARRAY_SIZE(available_sorts); i++) { if (!strcmp(available_sorts[i]->name, tok)) { list_add_tail(&available_sorts[i]->list, list); @@ -1344,126 +1233,97 @@ static int sort_dimension__add(const char *tok, struct list_head *list) return -1; } -static void setup_sorting(void); - -static void sort_lat(void) +static void perf_sched__sort_lat(struct perf_sched *sched) { struct rb_node *node; for (;;) { struct work_atoms *data; - node = rb_first(&atom_root); + node = rb_first(&sched->atom_root); if (!node) break; - rb_erase(node, &atom_root); + rb_erase(node, &sched->atom_root); data = rb_entry(node, struct work_atoms, node); - __thread_latency_insert(&sorted_atom_root, data, &sort_list); + __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list); } } -static struct trace_sched_handler *trace_handler; - -static void -process_sched_wakeup_event(struct perf_tool *tool __used, - struct event_format *event, - struct perf_sample *sample, - struct machine *machine, - struct thread *thread) +static int process_sched_wakeup_event(struct perf_tool *tool, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine) { - void *data = sample->raw_data; - struct trace_wakeup_event wakeup_event; - - FILL_COMMON_FIELDS(wakeup_event, event, data); + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); - FILL_ARRAY(wakeup_event, comm, event, data); - FILL_FIELD(wakeup_event, pid, event, data); - FILL_FIELD(wakeup_event, prio, event, data); - FILL_FIELD(wakeup_event, success, event, data); - FILL_FIELD(wakeup_event, cpu, event, data); + if (sched->tp_handler->wakeup_event) + return sched->tp_handler->wakeup_event(sched, evsel, sample, machine); - if (trace_handler->wakeup_event) - trace_handler->wakeup_event(&wakeup_event, machine, event, - sample->cpu, sample->time, thread); + return 0; } -/* - * Track the current task - that way we can know whether there's any - * weird events, such as a task being switched away that is not current. - */ -static int max_cpu; - -static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 }; - -static struct thread *curr_thread[MAX_CPUS]; - -static char next_shortname1 = 'A'; -static char next_shortname2 = '0'; - -static void -map_switch_event(struct trace_switch_event *switch_event, - struct machine *machine, - struct event_format *event __used, - int this_cpu, - u64 timestamp, - struct thread *thread __used) +static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, + struct perf_sample *sample, struct machine *machine) { - struct thread *sched_out __used, *sched_in; + const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), + next_pid = perf_evsel__intval(evsel, sample, "next_pid"); + struct thread *sched_out __maybe_unused, *sched_in; int new_shortname; - u64 timestamp0; + u64 timestamp0, timestamp = sample->time; s64 delta; - int cpu; + int cpu, this_cpu = sample->cpu; BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); - if (this_cpu > max_cpu) - max_cpu = this_cpu; + if (this_cpu > sched->max_cpu) + sched->max_cpu = this_cpu; - timestamp0 = cpu_last_switched[this_cpu]; - cpu_last_switched[this_cpu] = timestamp; + timestamp0 = sched->cpu_last_switched[this_cpu]; + sched->cpu_last_switched[this_cpu] = timestamp; if (timestamp0) delta = timestamp - timestamp0; else delta = 0; - if (delta < 0) - die("hm, delta: %" PRIu64 " < 0 ?\n", delta); - + if (delta < 0) { + pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta); + return -1; + } - sched_out = machine__findnew_thread(machine, switch_event->prev_pid); - sched_in = machine__findnew_thread(machine, switch_event->next_pid); + sched_out = machine__findnew_thread(machine, prev_pid); + sched_in = machine__findnew_thread(machine, next_pid); - curr_thread[this_cpu] = sched_in; + sched->curr_thread[this_cpu] = sched_in; printf(" "); new_shortname = 0; if (!sched_in->shortname[0]) { - sched_in->shortname[0] = next_shortname1; - sched_in->shortname[1] = next_shortname2; + sched_in->shortname[0] = sched->next_shortname1; + sched_in->shortname[1] = sched->next_shortname2; - if (next_shortname1 < 'Z') { - next_shortname1++; + if (sched->next_shortname1 < 'Z') { + sched->next_shortname1++; } else { - next_shortname1='A'; - if (next_shortname2 < '9') { - next_shortname2++; + sched->next_shortname1='A'; + if (sched->next_shortname2 < '9') { + sched->next_shortname2++; } else { - next_shortname2='0'; + sched->next_shortname2='0'; } } new_shortname = 1; } - for (cpu = 0; cpu <= max_cpu; cpu++) { + for (cpu = 0; cpu <= sched->max_cpu; cpu++) { if (cpu != this_cpu) printf(" "); else printf("*"); - if (curr_thread[cpu]) { - if (curr_thread[cpu]->pid) - printf("%2s ", curr_thread[cpu]->shortname); + if (sched->curr_thread[cpu]) { + if (sched->curr_thread[cpu]->pid) + printf("%2s ", sched->curr_thread[cpu]->shortname); else printf(". "); } else @@ -1477,134 +1337,97 @@ map_switch_event(struct trace_switch_event *switch_event, } else { printf("\n"); } + + return 0; } -static void -process_sched_switch_event(struct perf_tool *tool __used, - struct event_format *event, - struct perf_sample *sample, - struct machine *machine, - struct thread *thread) +static int process_sched_switch_event(struct perf_tool *tool, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine) { - int this_cpu = sample->cpu; - void *data = sample->raw_data; - struct trace_switch_event switch_event; - - FILL_COMMON_FIELDS(switch_event, event, data); - - FILL_ARRAY(switch_event, prev_comm, event, data); - FILL_FIELD(switch_event, prev_pid, event, data); - FILL_FIELD(switch_event, prev_prio, event, data); - FILL_FIELD(switch_event, prev_state, event, data); - FILL_ARRAY(switch_event, next_comm, event, data); - FILL_FIELD(switch_event, next_pid, event, data); - FILL_FIELD(switch_event, next_prio, event, data); + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); + int this_cpu = sample->cpu, err = 0; + u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), + next_pid = perf_evsel__intval(evsel, sample, "next_pid"); - if (curr_pid[this_cpu] != (u32)-1) { + if (sched->curr_pid[this_cpu] != (u32)-1) { /* * Are we trying to switch away a PID that is * not current? */ - if (curr_pid[this_cpu] != switch_event.prev_pid) - nr_context_switch_bugs++; + if (sched->curr_pid[this_cpu] != prev_pid) + sched->nr_context_switch_bugs++; } - if (trace_handler->switch_event) - trace_handler->switch_event(&switch_event, machine, event, - this_cpu, sample->time, thread); - curr_pid[this_cpu] = switch_event.next_pid; + if (sched->tp_handler->switch_event) + err = sched->tp_handler->switch_event(sched, evsel, sample, machine); + + sched->curr_pid[this_cpu] = next_pid; + return err; } -static void -process_sched_runtime_event(struct perf_tool *tool __used, - struct event_format *event, - struct perf_sample *sample, - struct machine *machine, - struct thread *thread) +static int process_sched_runtime_event(struct perf_tool *tool, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine) { - void *data = sample->raw_data; - struct trace_runtime_event runtime_event; + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); - FILL_ARRAY(runtime_event, comm, event, data); - FILL_FIELD(runtime_event, pid, event, data); - FILL_FIELD(runtime_event, runtime, event, data); - FILL_FIELD(runtime_event, vruntime, event, data); + if (sched->tp_handler->runtime_event) + return sched->tp_handler->runtime_event(sched, evsel, sample, machine); - if (trace_handler->runtime_event) - trace_handler->runtime_event(&runtime_event, machine, event, - sample->cpu, sample->time, thread); + return 0; } -static void -process_sched_fork_event(struct perf_tool *tool __used, - struct event_format *event, - struct perf_sample *sample, - struct machine *machine __used, - struct thread *thread) +static int process_sched_fork_event(struct perf_tool *tool, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine __maybe_unused) { - void *data = sample->raw_data; - struct trace_fork_event fork_event; - - FILL_COMMON_FIELDS(fork_event, event, data); + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); - FILL_ARRAY(fork_event, parent_comm, event, data); - FILL_FIELD(fork_event, parent_pid, event, data); - FILL_ARRAY(fork_event, child_comm, event, data); - FILL_FIELD(fork_event, child_pid, event, data); + if (sched->tp_handler->fork_event) + return sched->tp_handler->fork_event(sched, evsel, sample); - if (trace_handler->fork_event) - trace_handler->fork_event(&fork_event, event, - sample->cpu, sample->time, thread); + return 0; } -static void -process_sched_exit_event(struct perf_tool *tool __used, - struct event_format *event, - struct perf_sample *sample __used, - struct machine *machine __used, - struct thread *thread __used) +static int process_sched_exit_event(struct perf_tool *tool __maybe_unused, + struct perf_evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) { - if (verbose) - printf("sched_exit event %p\n", event); + pr_debug("sched_exit event %p\n", evsel); + return 0; } -static void -process_sched_migrate_task_event(struct perf_tool *tool __used, - struct event_format *event, - struct perf_sample *sample, - struct machine *machine, - struct thread *thread) +static int process_sched_migrate_task_event(struct perf_tool *tool, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine) { - void *data = sample->raw_data; - struct trace_migrate_task_event migrate_task_event; - - FILL_COMMON_FIELDS(migrate_task_event, event, data); + struct perf_sched *sched = container_of(tool, struct perf_sched, tool); - FILL_ARRAY(migrate_task_event, comm, event, data); - FILL_FIELD(migrate_task_event, pid, event, data); - FILL_FIELD(migrate_task_event, prio, event, data); - FILL_FIELD(migrate_task_event, cpu, event, data); + if (sched->tp_handler->migrate_task_event) + return sched->tp_handler->migrate_task_event(sched, evsel, sample, machine); - if (trace_handler->migrate_task_event) - trace_handler->migrate_task_event(&migrate_task_event, machine, - event, sample->cpu, - sample->time, thread); + return 0; } -typedef void (*tracepoint_handler)(struct perf_tool *tool, struct event_format *event, - struct perf_sample *sample, - struct machine *machine, - struct thread *thread); +typedef int (*tracepoint_handler)(struct perf_tool *tool, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct machine *machine); -static int perf_sched__process_tracepoint_sample(struct perf_tool *tool, - union perf_event *event __used, +static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine) { - struct perf_sched *sched = container_of(tool, struct perf_sched, tool); - struct pevent *pevent = sched->session->pevent; struct thread *thread = machine__findnew_thread(machine, sample->pid); + int err = 0; if (thread == NULL) { pr_debug("problem processing %s event, skipping it.\n", @@ -1617,30 +1440,15 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool, if (evsel->handler.func != NULL) { tracepoint_handler f = evsel->handler.func; - - if (evsel->handler.data == NULL) - evsel->handler.data = pevent_find_event(pevent, - evsel->attr.config); - - f(tool, evsel->handler.data, sample, machine, thread); + err = f(tool, evsel, sample, machine); } - return 0; + return err; } -static struct perf_sched perf_sched = { - .tool = { - .sample = perf_sched__process_tracepoint_sample, - .comm = perf_event__process_comm, - .lost = perf_event__process_lost, - .fork = perf_event__process_task, - .ordered_samples = true, - }, -}; - -static void read_events(bool destroy, struct perf_session **psession) +static int perf_sched__read_events(struct perf_sched *sched, bool destroy, + struct perf_session **psession) { - int err = -EINVAL; const struct perf_evsel_str_handler handlers[] = { { "sched:sched_switch", process_sched_switch_event, }, { "sched:sched_stat_runtime", process_sched_runtime_event, }, @@ -1652,24 +1460,25 @@ static void read_events(bool destroy, struct perf_session **psession) }; struct perf_session *session; - session = perf_session__new(input_name, O_RDONLY, 0, false, - &perf_sched.tool); - if (session == NULL) - die("No Memory"); - - perf_sched.session = session; + session = perf_session__new(sched->input_name, O_RDONLY, 0, false, &sched->tool); + if (session == NULL) { + pr_debug("No Memory for session\n"); + return -1; + } - err = perf_session__set_tracepoints_handlers(session, handlers); - assert(err == 0); + if (perf_session__set_tracepoints_handlers(session, handlers)) + goto out_delete; if (perf_session__has_traces(session, "record -R")) { - err = perf_session__process_events(session, &perf_sched.tool); - if (err) - die("Failed to process events, error %d", err); + int err = perf_session__process_events(session, &sched->tool); + if (err) { + pr_err("Failed to process events, error %d", err); + goto out_delete; + } - nr_events = session->hists.stats.nr_events[0]; - nr_lost_events = session->hists.stats.total_lost; - nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST]; + sched->nr_events = session->hists.stats.nr_events[0]; + sched->nr_lost_events = session->hists.stats.total_lost; + sched->nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST]; } if (destroy) @@ -1677,208 +1486,166 @@ static void read_events(bool destroy, struct perf_session **psession) if (psession) *psession = session; + + return 0; + +out_delete: + perf_session__delete(session); + return -1; } -static void print_bad_events(void) +static void print_bad_events(struct perf_sched *sched) { - if (nr_unordered_timestamps && nr_timestamps) { + if (sched->nr_unordered_timestamps && sched->nr_timestamps) { printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n", - (double)nr_unordered_timestamps/(double)nr_timestamps*100.0, - nr_unordered_timestamps, nr_timestamps); + (double)sched->nr_unordered_timestamps/(double)sched->nr_timestamps*100.0, + sched->nr_unordered_timestamps, sched->nr_timestamps); } - if (nr_lost_events && nr_events) { + if (sched->nr_lost_events && sched->nr_events) { printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n", - (double)nr_lost_events/(double)nr_events*100.0, - nr_lost_events, nr_events, nr_lost_chunks); + (double)sched->nr_lost_events/(double)sched->nr_events * 100.0, + sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks); } - if (nr_state_machine_bugs && nr_timestamps) { + if (sched->nr_state_machine_bugs && sched->nr_timestamps) { printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)", - (double)nr_state_machine_bugs/(double)nr_timestamps*100.0, - nr_state_machine_bugs, nr_timestamps); - if (nr_lost_events) + (double)sched->nr_state_machine_bugs/(double)sched->nr_timestamps*100.0, + sched->nr_state_machine_bugs, sched->nr_timestamps); + if (sched->nr_lost_events) printf(" (due to lost events?)"); printf("\n"); } - if (nr_context_switch_bugs && nr_timestamps) { + if (sched->nr_context_switch_bugs && sched->nr_timestamps) { printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", - (double)nr_context_switch_bugs/(double)nr_timestamps*100.0, - nr_context_switch_bugs, nr_timestamps); - if (nr_lost_events) + (double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0, + sched->nr_context_switch_bugs, sched->nr_timestamps); + if (sched->nr_lost_events) printf(" (due to lost events?)"); printf("\n"); } } -static void __cmd_lat(void) +static int perf_sched__lat(struct perf_sched *sched) { struct rb_node *next; struct perf_session *session; setup_pager(); - read_events(false, &session); - sort_lat(); + if (perf_sched__read_events(sched, false, &session)) + return -1; + perf_sched__sort_lat(sched); printf("\n ---------------------------------------------------------------------------------------------------------------\n"); printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); printf(" ---------------------------------------------------------------------------------------------------------------\n"); - next = rb_first(&sorted_atom_root); + next = rb_first(&sched->sorted_atom_root); while (next) { struct work_atoms *work_list; work_list = rb_entry(next, struct work_atoms, node); - output_lat_thread(work_list); + output_lat_thread(sched, work_list); next = rb_next(next); } printf(" -----------------------------------------------------------------------------------------\n"); printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", - (double)all_runtime/1e6, all_count); + (double)sched->all_runtime / 1e6, sched->all_count); printf(" ---------------------------------------------------\n"); - print_bad_events(); + print_bad_events(sched); printf("\n"); perf_session__delete(session); + return 0; } -static struct trace_sched_handler map_ops = { - .wakeup_event = NULL, - .switch_event = map_switch_event, - .runtime_event = NULL, - .fork_event = NULL, -}; - -static void __cmd_map(void) +static int perf_sched__map(struct perf_sched *sched) { - max_cpu = sysconf(_SC_NPROCESSORS_CONF); + sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); setup_pager(); - read_events(true, NULL); - print_bad_events(); + if (perf_sched__read_events(sched, true, NULL)) + return -1; + print_bad_events(sched); + return 0; } -static void __cmd_replay(void) +static int perf_sched__replay(struct perf_sched *sched) { unsigned long i; - calibrate_run_measurement_overhead(); - calibrate_sleep_measurement_overhead(); + calibrate_run_measurement_overhead(sched); + calibrate_sleep_measurement_overhead(sched); - test_calibrations(); + test_calibrations(sched); - read_events(true, NULL); + if (perf_sched__read_events(sched, true, NULL)) + return -1; - printf("nr_run_events: %ld\n", nr_run_events); - printf("nr_sleep_events: %ld\n", nr_sleep_events); - printf("nr_wakeup_events: %ld\n", nr_wakeup_events); + printf("nr_run_events: %ld\n", sched->nr_run_events); + printf("nr_sleep_events: %ld\n", sched->nr_sleep_events); + printf("nr_wakeup_events: %ld\n", sched->nr_wakeup_events); - if (targetless_wakeups) - printf("target-less wakeups: %ld\n", targetless_wakeups); - if (multitarget_wakeups) - printf("multi-target wakeups: %ld\n", multitarget_wakeups); - if (nr_run_events_optimized) + if (sched->targetless_wakeups) + printf("target-less wakeups: %ld\n", sched->targetless_wakeups); + if (sched->multitarget_wakeups) + printf("multi-target wakeups: %ld\n", sched->multitarget_wakeups); + if (sched->nr_run_events_optimized) printf("run atoms optimized: %ld\n", - nr_run_events_optimized); + sched->nr_run_events_optimized); - print_task_traces(); - add_cross_task_wakeups(); + print_task_traces(sched); + add_cross_task_wakeups(sched); - create_tasks(); + create_tasks(sched); printf("------------------------------------------------------------\n"); - for (i = 0; i < replay_repeat; i++) - run_one_test(); -} - - -static const char * const sched_usage[] = { - "perf sched [<options>] {record|latency|map|replay|script}", - NULL -}; - -static const struct option sched_options[] = { - OPT_STRING('i', "input", &input_name, "file", - "input file name"), - OPT_INCR('v', "verbose", &verbose, - "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, - "dump raw trace in ASCII"), - OPT_END() -}; - -static const char * const latency_usage[] = { - "perf sched latency [<options>]", - NULL -}; - -static const struct option latency_options[] = { - OPT_STRING('s', "sort", &sort_order, "key[,key2...]", - "sort by key(s): runtime, switch, avg, max"), - OPT_INCR('v', "verbose", &verbose, - "be more verbose (show symbol address, etc)"), - OPT_INTEGER('C', "CPU", &profile_cpu, - "CPU to profile on"), - OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, - "dump raw trace in ASCII"), - OPT_END() -}; - -static const char * const replay_usage[] = { - "perf sched replay [<options>]", - NULL -}; + for (i = 0; i < sched->replay_repeat; i++) + run_one_test(sched); -static const struct option replay_options[] = { - OPT_UINTEGER('r', "repeat", &replay_repeat, - "repeat the workload replay N times (-1: infinite)"), - OPT_INCR('v', "verbose", &verbose, - "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, - "dump raw trace in ASCII"), - OPT_END() -}; + return 0; +} -static void setup_sorting(void) +static void setup_sorting(struct perf_sched *sched, const struct option *options, + const char * const usage_msg[]) { - char *tmp, *tok, *str = strdup(sort_order); + char *tmp, *tok, *str = strdup(sched->sort_order); for (tok = strtok_r(str, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { - if (sort_dimension__add(tok, &sort_list) < 0) { + if (sort_dimension__add(tok, &sched->sort_list) < 0) { error("Unknown --sort key: `%s'", tok); - usage_with_options(latency_usage, latency_options); + usage_with_options(usage_msg, options); } } free(str); - sort_dimension__add("pid", &cmp_pid); + sort_dimension__add("pid", &sched->cmp_pid); } -static const char *record_args[] = { - "record", - "-a", - "-R", - "-f", - "-m", "1024", - "-c", "1", - "-e", "sched:sched_switch", - "-e", "sched:sched_stat_wait", - "-e", "sched:sched_stat_sleep", - "-e", "sched:sched_stat_iowait", - "-e", "sched:sched_stat_runtime", - "-e", "sched:sched_process_exit", - "-e", "sched:sched_process_fork", - "-e", "sched:sched_wakeup", - "-e", "sched:sched_migrate_task", -}; - static int __cmd_record(int argc, const char **argv) { unsigned int rec_argc, i, j; const char **rec_argv; + const char * const record_args[] = { + "record", + "-a", + "-R", + "-f", + "-m", "1024", + "-c", "1", + "-e", "sched:sched_switch", + "-e", "sched:sched_stat_wait", + "-e", "sched:sched_stat_sleep", + "-e", "sched:sched_stat_iowait", + "-e", "sched:sched_stat_runtime", + "-e", "sched:sched_process_exit", + "-e", "sched:sched_process_fork", + "-e", "sched:sched_wakeup", + "-e", "sched:sched_migrate_task", + }; rec_argc = ARRAY_SIZE(record_args) + argc - 1; rec_argv = calloc(rec_argc + 1, sizeof(char *)); @@ -1897,8 +1664,85 @@ static int __cmd_record(int argc, const char **argv) return cmd_record(i, rec_argv, NULL); } -int cmd_sched(int argc, const char **argv, const char *prefix __used) +int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) { + const char default_sort_order[] = "avg, max, switch, runtime"; + struct perf_sched sched = { + .tool = { + .sample = perf_sched__process_tracepoint_sample, + .comm = perf_event__process_comm, + .lost = perf_event__process_lost, + .fork = perf_event__process_task, + .ordered_samples = true, + }, + .cmp_pid = LIST_HEAD_INIT(sched.cmp_pid), + .sort_list = LIST_HEAD_INIT(sched.sort_list), + .start_work_mutex = PTHREAD_MUTEX_INITIALIZER, + .work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER, + .curr_pid = { [0 ... MAX_CPUS - 1] = -1 }, + .sort_order = default_sort_order, + .replay_repeat = 10, + .profile_cpu = -1, + .next_shortname1 = 'A', + .next_shortname2 = '0', + }; + const struct option latency_options[] = { + OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", + "sort by key(s): runtime, switch, avg, max"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show symbol address, etc)"), + OPT_INTEGER('C', "CPU", &sched.profile_cpu, + "CPU to profile on"), + OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, + "dump raw trace in ASCII"), + OPT_END() + }; + const struct option replay_options[] = { + OPT_UINTEGER('r', "repeat", &sched.replay_repeat, + "repeat the workload replay N times (-1: infinite)"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show symbol address, etc)"), + OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, + "dump raw trace in ASCII"), + OPT_END() + }; + const struct option sched_options[] = { + OPT_STRING('i', "input", &sched.input_name, "file", + "input file name"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show symbol address, etc)"), + OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, + "dump raw trace in ASCII"), + OPT_END() + }; + const char * const latency_usage[] = { + "perf sched latency [<options>]", + NULL + }; + const char * const replay_usage[] = { + "perf sched replay [<options>]", + NULL + }; + const char * const sched_usage[] = { + "perf sched [<options>] {record|latency|map|replay|script}", + NULL + }; + struct trace_sched_handler lat_ops = { + .wakeup_event = latency_wakeup_event, + .switch_event = latency_switch_event, + .runtime_event = latency_runtime_event, + .fork_event = latency_fork_event, + .migrate_task_event = latency_migrate_task_event, + }; + struct trace_sched_handler map_ops = { + .switch_event = map_switch_event, + }; + struct trace_sched_handler replay_ops = { + .wakeup_event = replay_wakeup_event, + .switch_event = replay_switch_event, + .fork_event = replay_fork_event, + }; + argc = parse_options(argc, argv, sched_options, sched_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (!argc) @@ -1914,26 +1758,26 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used) if (!strncmp(argv[0], "rec", 3)) { return __cmd_record(argc, argv); } else if (!strncmp(argv[0], "lat", 3)) { - trace_handler = &lat_ops; + sched.tp_handler = &lat_ops; if (argc > 1) { argc = parse_options(argc, argv, latency_options, latency_usage, 0); if (argc) usage_with_options(latency_usage, latency_options); } - setup_sorting(); - __cmd_lat(); + setup_sorting(&sched, latency_options, latency_usage); + return perf_sched__lat(&sched); } else if (!strcmp(argv[0], "map")) { - trace_handler = &map_ops; - setup_sorting(); - __cmd_map(); + sched.tp_handler = &map_ops; + setup_sorting(&sched, latency_options, latency_usage); + return perf_sched__map(&sched); } else if (!strncmp(argv[0], "rep", 3)) { - trace_handler = &replay_ops; + sched.tp_handler = &replay_ops; if (argc) { argc = parse_options(argc, argv, replay_options, replay_usage, 0); if (argc) usage_with_options(replay_usage, replay_options); } - __cmd_replay(); + return perf_sched__replay(&sched); } else { usage_with_options(sched_usage, sched_options); } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 1e60ab70b2b..1be843aa154 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -14,6 +14,7 @@ #include "util/util.h" #include "util/evlist.h" #include "util/evsel.h" +#include "util/sort.h" #include <linux/bitmap.h> static char const *script_name; @@ -28,11 +29,6 @@ static bool system_wide; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); -struct perf_script { - struct perf_tool tool; - struct perf_session *session; -}; - enum perf_output_field { PERF_OUTPUT_COMM = 1U << 0, PERF_OUTPUT_TID = 1U << 1, @@ -262,14 +258,11 @@ static int perf_session__check_output_opt(struct perf_session *session) return 0; } -static void print_sample_start(struct pevent *pevent, - struct perf_sample *sample, +static void print_sample_start(struct perf_sample *sample, struct thread *thread, struct perf_evsel *evsel) { - int type; struct perf_event_attr *attr = &evsel->attr; - struct event_format *event; const char *evname = NULL; unsigned long secs; unsigned long usecs; @@ -307,20 +300,7 @@ static void print_sample_start(struct pevent *pevent, } if (PRINT_FIELD(EVNAME)) { - if (attr->type == PERF_TYPE_TRACEPOINT) { - /* - * XXX Do we really need this here? - * perf_evlist__set_tracepoint_names should have done - * this already - */ - type = trace_parse_common_type(pevent, - sample->raw_data); - event = pevent_find_event(pevent, type); - if (event) - evname = event->name; - } else - evname = perf_evsel__name(evsel); - + evname = perf_evsel__name(evsel); printf("%s: ", evname ? evname : "[unknown]"); } } @@ -401,7 +381,7 @@ static void print_sample_bts(union perf_event *event, printf(" "); else printf("\n"); - perf_event__print_ip(event, sample, machine, + perf_evsel__print_ip(evsel, event, sample, machine, PRINT_FIELD(SYM), PRINT_FIELD(DSO), PRINT_FIELD(SYMOFFSET)); } @@ -415,19 +395,17 @@ static void print_sample_bts(union perf_event *event, printf("\n"); } -static void process_event(union perf_event *event __unused, - struct pevent *pevent, - struct perf_sample *sample, - struct perf_evsel *evsel, - struct machine *machine, - struct thread *thread) +static void process_event(union perf_event *event, struct perf_sample *sample, + struct perf_evsel *evsel, struct machine *machine, + struct addr_location *al) { struct perf_event_attr *attr = &evsel->attr; + struct thread *thread = al->thread; if (output[attr->type].fields == 0) return; - print_sample_start(pevent, sample, thread, evsel); + print_sample_start(sample, thread, evsel); if (is_bts_event(attr)) { print_sample_bts(event, sample, evsel, machine, thread); @@ -435,9 +413,8 @@ static void process_event(union perf_event *event __unused, } if (PRINT_FIELD(TRACE)) - print_trace_event(pevent, sample->cpu, sample->raw_data, - sample->raw_size); - + event_format__print(evsel->tp_format, sample->cpu, + sample->raw_data, sample->raw_size); if (PRINT_FIELD(ADDR)) print_sample_addr(event, sample, machine, thread, attr); @@ -446,7 +423,7 @@ static void process_event(union perf_event *event __unused, printf(" "); else printf("\n"); - perf_event__print_ip(event, sample, machine, + perf_evsel__print_ip(evsel, event, sample, machine, PRINT_FIELD(SYM), PRINT_FIELD(DSO), PRINT_FIELD(SYMOFFSET)); } @@ -454,9 +431,9 @@ static void process_event(union perf_event *event __unused, printf("\n"); } -static int default_start_script(const char *script __unused, - int argc __unused, - const char **argv __unused) +static int default_start_script(const char *script __maybe_unused, + int argc __maybe_unused, + const char **argv __maybe_unused) { return 0; } @@ -466,8 +443,8 @@ static int default_stop_script(void) return 0; } -static int default_generate_script(struct pevent *pevent __unused, - const char *outfile __unused) +static int default_generate_script(struct pevent *pevent __maybe_unused, + const char *outfile __maybe_unused) { return 0; } @@ -498,14 +475,13 @@ static int cleanup_scripting(void) static const char *input_name; -static int process_sample_event(struct perf_tool *tool __used, +static int process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine) { struct addr_location al; - struct perf_script *scr = container_of(tool, struct perf_script, tool); struct thread *thread = machine__findnew_thread(machine, event->ip.tid); if (thread == NULL) { @@ -537,32 +513,29 @@ static int process_sample_event(struct perf_tool *tool __used, if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) return 0; - scripting_ops->process_event(event, scr->session->pevent, - sample, evsel, machine, thread); + scripting_ops->process_event(event, sample, evsel, machine, &al); evsel->hists.stats.total_period += sample->period; return 0; } -static struct perf_script perf_script = { - .tool = { - .sample = process_sample_event, - .mmap = perf_event__process_mmap, - .comm = perf_event__process_comm, - .exit = perf_event__process_task, - .fork = perf_event__process_task, - .attr = perf_event__process_attr, - .event_type = perf_event__process_event_type, - .tracing_data = perf_event__process_tracing_data, - .build_id = perf_event__process_build_id, - .ordered_samples = true, - .ordering_requires_timestamps = true, - }, +static struct perf_tool perf_script = { + .sample = process_sample_event, + .mmap = perf_event__process_mmap, + .comm = perf_event__process_comm, + .exit = perf_event__process_task, + .fork = perf_event__process_task, + .attr = perf_event__process_attr, + .event_type = perf_event__process_event_type, + .tracing_data = perf_event__process_tracing_data, + .build_id = perf_event__process_build_id, + .ordered_samples = true, + .ordering_requires_timestamps = true, }; extern volatile int session_done; -static void sig_handler(int sig __unused) +static void sig_handler(int sig __maybe_unused) { session_done = 1; } @@ -573,7 +546,7 @@ static int __cmd_script(struct perf_session *session) signal(SIGINT, sig_handler); - ret = perf_session__process_events(session, &perf_script.tool); + ret = perf_session__process_events(session, &perf_script); if (debug_mode) pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered); @@ -672,8 +645,8 @@ static void list_available_languages(void) fprintf(stderr, "\n"); } -static int parse_scriptname(const struct option *opt __used, - const char *str, int unset __used) +static int parse_scriptname(const struct option *opt __maybe_unused, + const char *str, int unset __maybe_unused) { char spec[PATH_MAX]; const char *script, *ext; @@ -718,8 +691,8 @@ static int parse_scriptname(const struct option *opt __used, return 0; } -static int parse_output_fields(const struct option *opt __used, - const char *arg, int unset __used) +static int parse_output_fields(const struct option *opt __maybe_unused, + const char *arg, int unset __maybe_unused) { char *tok; int i, imax = sizeof(all_output_options) / sizeof(struct output_option); @@ -1010,8 +983,9 @@ static char *get_script_root(struct dirent *script_dirent, const char *suffix) return script_root; } -static int list_available_scripts(const struct option *opt __used, - const char *s __used, int unset __used) +static int list_available_scripts(const struct option *opt __maybe_unused, + const char *s __maybe_unused, + int unset __maybe_unused) { struct dirent *script_next, *lang_next, script_dirent, lang_dirent; char scripts_path[MAXPATHLEN]; @@ -1058,6 +1032,61 @@ static int list_available_scripts(const struct option *opt __used, exit(0); } +/* + * Return -1 if none is found, otherwise the actual scripts number. + * + * Currently the only user of this function is the script browser, which + * will list all statically runnable scripts, select one, execute it and + * show the output in a perf browser. + */ +int find_scripts(char **scripts_array, char **scripts_path_array) +{ + struct dirent *script_next, *lang_next, script_dirent, lang_dirent; + char scripts_path[MAXPATHLEN]; + DIR *scripts_dir, *lang_dir; + char lang_path[MAXPATHLEN]; + char *temp; + int i = 0; + + snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path()); + + scripts_dir = opendir(scripts_path); + if (!scripts_dir) + return -1; + + for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) { + snprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path, + lang_dirent.d_name); +#ifdef NO_LIBPERL + if (strstr(lang_path, "perl")) + continue; +#endif +#ifdef NO_LIBPYTHON + if (strstr(lang_path, "python")) + continue; +#endif + + lang_dir = opendir(lang_path); + if (!lang_dir) + continue; + + for_each_script(lang_path, lang_dir, script_dirent, script_next) { + /* Skip those real time scripts: xxxtop.p[yl] */ + if (strstr(script_dirent.d_name, "top.")) + continue; + sprintf(scripts_path_array[i], "%s/%s", lang_path, + script_dirent.d_name); + temp = strchr(script_dirent.d_name, '.'); + snprintf(scripts_array[i], + (temp - script_dirent.d_name) + 1, + "%s", script_dirent.d_name); + i++; + } + } + + return i; +} + static char *get_script_path(const char *script_root, const char *suffix) { struct dirent *script_next, *lang_next, script_dirent, lang_dirent; @@ -1170,6 +1199,8 @@ static const struct option options[] = { parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), + OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", + "only consider these symbols"), OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", "only display events for these comms"), @@ -1181,21 +1212,26 @@ static const struct option options[] = { OPT_END() }; -static bool have_cmd(int argc, const char **argv) +static int have_cmd(int argc, const char **argv) { char **__argv = malloc(sizeof(const char *) * argc); - if (!__argv) - die("malloc"); + if (!__argv) { + pr_err("malloc failed\n"); + return -1; + } + memcpy(__argv, argv, sizeof(const char *) * argc); argc = parse_options(argc, (const char **)__argv, record_options, NULL, PARSE_OPT_STOP_AT_NON_OPTION); free(__argv); - return argc != 0; + system_wide = (argc == 0); + + return 0; } -int cmd_script(int argc, const char **argv, const char *prefix __used) +int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) { char *rec_script_path = NULL; char *rep_script_path = NULL; @@ -1259,13 +1295,13 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) if (pipe(live_pipe) < 0) { perror("failed to create pipe"); - exit(-1); + return -1; } pid = fork(); if (pid < 0) { perror("failed to fork"); - exit(-1); + return -1; } if (!pid) { @@ -1277,13 +1313,18 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) if (is_top_script(argv[0])) { system_wide = true; } else if (!system_wide) { - system_wide = !have_cmd(argc - rep_args, - &argv[rep_args]); + if (have_cmd(argc - rep_args, &argv[rep_args]) != 0) { + err = -1; + goto out; + } } __argv = malloc((argc + 6) * sizeof(const char *)); - if (!__argv) - die("malloc"); + if (!__argv) { + pr_err("malloc failed\n"); + err = -ENOMEM; + goto out; + } __argv[j++] = "/bin/sh"; __argv[j++] = rec_script_path; @@ -1305,8 +1346,12 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) close(live_pipe[1]); __argv = malloc((argc + 4) * sizeof(const char *)); - if (!__argv) - die("malloc"); + if (!__argv) { + pr_err("malloc failed\n"); + err = -ENOMEM; + goto out; + } + j = 0; __argv[j++] = "/bin/sh"; __argv[j++] = rep_script_path; @@ -1331,12 +1376,20 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) if (!rec_script_path) system_wide = false; - else if (!system_wide) - system_wide = !have_cmd(argc - 1, &argv[1]); + else if (!system_wide) { + if (have_cmd(argc - 1, &argv[1]) != 0) { + err = -1; + goto out; + } + } __argv = malloc((argc + 2) * sizeof(const char *)); - if (!__argv) - die("malloc"); + if (!__argv) { + pr_err("malloc failed\n"); + err = -ENOMEM; + goto out; + } + __argv[j++] = "/bin/sh"; __argv[j++] = script_path; if (system_wide) @@ -1356,12 +1409,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) setup_pager(); session = perf_session__new(input_name, O_RDONLY, 0, false, - &perf_script.tool); + &perf_script); if (session == NULL) return -ENOMEM; - perf_script.session = session; - if (cpu_list) { if (perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap)) return -1; @@ -1387,18 +1438,18 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) input = open(session->filename, O_RDONLY); /* input_name */ if (input < 0) { perror("failed to open file"); - exit(-1); + return -1; } err = fstat(input, &perf_stat); if (err < 0) { perror("failed to stat file"); - exit(-1); + return -1; } if (!perf_stat.st_size) { fprintf(stderr, "zero-sized file, nothing to do!\n"); - exit(0); + return 0; } scripting_ops = script_spec__lookup(generate_script_lang); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 861f0aec77a..e8cd4d81b06 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -51,13 +51,13 @@ #include "util/evsel.h" #include "util/debug.h" #include "util/color.h" +#include "util/stat.h" #include "util/header.h" #include "util/cpumap.h" #include "util/thread.h" #include "util/thread_map.h" #include <sys/prctl.h> -#include <math.h> #include <locale.h> #define DEFAULT_SEPARATOR " " @@ -199,11 +199,6 @@ static int output_fd; static volatile int done = 0; -struct stats -{ - double n, mean, M2; -}; - struct perf_stat { struct stats res_stats[3]; }; @@ -220,48 +215,14 @@ static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) evsel->priv = NULL; } -static void update_stats(struct stats *stats, u64 val) -{ - double delta; - - stats->n++; - delta = val - stats->mean; - stats->mean += delta / stats->n; - stats->M2 += delta*(val - stats->mean); -} - -static double avg_stats(struct stats *stats) +static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) { - return stats->mean; + return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus; } -/* - * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance - * - * (\Sum n_i^2) - ((\Sum n_i)^2)/n - * s^2 = ------------------------------- - * n - 1 - * - * http://en.wikipedia.org/wiki/Stddev - * - * The std dev of the mean is related to the std dev by: - * - * s - * s_mean = ------- - * sqrt(n) - * - */ -static double stddev_stats(struct stats *stats) +static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) { - double variance, variance_mean; - - if (!stats->n) - return 0.0; - - variance = stats->M2 / (stats->n - 1); - variance_mean = variance / stats->n; - - return sqrt(variance_mean); + return perf_evsel__cpus(evsel)->nr; } static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; @@ -281,13 +242,9 @@ static int create_perf_stat_counter(struct perf_evsel *evsel, struct perf_evsel *first) { struct perf_event_attr *attr = &evsel->attr; - struct xyarray *group_fd = NULL; bool exclude_guest_missing = false; int ret; - if (group && evsel != first) - group_fd = first->fd; - if (scale) attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; @@ -299,8 +256,7 @@ retry: evsel->attr.exclude_guest = evsel->attr.exclude_host = 0; if (perf_target__has_cpu(&target)) { - ret = perf_evsel__open_per_cpu(evsel, evsel_list->cpus, - group, group_fd); + ret = perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); if (ret) goto check_ret; return 0; @@ -311,8 +267,7 @@ retry: attr->enable_on_exec = 1; } - ret = perf_evsel__open_per_thread(evsel, evsel_list->threads, - group, group_fd); + ret = perf_evsel__open_per_thread(evsel, evsel_list->threads); if (!ret) return 0; /* fall through */ @@ -382,7 +337,7 @@ static int read_counter_aggr(struct perf_evsel *counter) u64 *count = counter->counts->aggr.values; int i; - if (__perf_evsel__read(counter, evsel_list->cpus->nr, + if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter), evsel_list->threads->nr, scale) < 0) return -1; @@ -411,7 +366,7 @@ static int read_counter(struct perf_evsel *counter) u64 *count; int cpu; - for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) { + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0) return -1; @@ -423,7 +378,7 @@ static int read_counter(struct perf_evsel *counter) return 0; } -static int run_perf_stat(int argc __used, const char **argv) +static int run_perf_stat(int argc __maybe_unused, const char **argv) { unsigned long long t0, t1; struct perf_evsel *counter, *first; @@ -434,7 +389,7 @@ static int run_perf_stat(int argc __used, const char **argv) if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { perror("failed to create pipes"); - exit(1); + return -1; } if (forks) { @@ -483,7 +438,10 @@ static int run_perf_stat(int argc __used, const char **argv) close(child_ready_pipe[0]); } - first = list_entry(evsel_list->entries.next, struct perf_evsel, node); + if (group) + perf_evlist__set_leader(evsel_list); + + first = perf_evlist__first(evsel_list); list_for_each_entry(counter, &evsel_list->entries, node) { if (create_perf_stat_counter(counter, first) < 0) { @@ -513,13 +471,14 @@ static int run_perf_stat(int argc __used, const char **argv) } if (child_pid != -1) kill(child_pid, SIGTERM); - die("Not all events could be opened.\n"); + + pr_err("Not all events could be opened.\n"); return -1; } counter->supported = true; } - if (perf_evlist__set_filters(evsel_list)) { + if (perf_evlist__apply_filters(evsel_list)) { error("failed to set filter with %d (%s)\n", errno, strerror(errno)); return -1; @@ -546,12 +505,12 @@ static int run_perf_stat(int argc __used, const char **argv) if (no_aggr) { list_for_each_entry(counter, &evsel_list->entries, node) { read_counter(counter); - perf_evsel__close_fd(counter, evsel_list->cpus->nr, 1); + perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1); } } else { list_for_each_entry(counter, &evsel_list->entries, node) { read_counter_aggr(counter); - perf_evsel__close_fd(counter, evsel_list->cpus->nr, + perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), evsel_list->threads->nr); } } @@ -561,10 +520,7 @@ static int run_perf_stat(int argc __used, const char **argv) static void print_noise_pct(double total, double avg) { - double pct = 0.0; - - if (avg) - pct = 100.0*total/avg; + double pct = rel_stddev_stats(total, avg); if (csv_output) fprintf(output, "%s%.2f%%", csv_sep, pct); @@ -592,7 +548,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) if (no_aggr) sprintf(cpustr, "CPU%*d%s", csv_output ? 0 : -4, - evsel_list->cpus->map[cpu], csv_sep); + perf_evsel__cpus(evsel)->map[cpu], csv_sep); fprintf(output, fmt, cpustr, msecs, csv_sep, perf_evsel__name(evsel)); @@ -636,7 +592,9 @@ static const char *get_ratio_color(enum grc_type type, double ratio) return color; } -static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg) +static void print_stalled_cycles_frontend(int cpu, + struct perf_evsel *evsel + __maybe_unused, double avg) { double total, ratio = 0.0; const char *color; @@ -653,7 +611,9 @@ static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __us fprintf(output, " frontend cycles idle "); } -static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __used, double avg) +static void print_stalled_cycles_backend(int cpu, + struct perf_evsel *evsel + __maybe_unused, double avg) { double total, ratio = 0.0; const char *color; @@ -670,7 +630,9 @@ static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __use fprintf(output, " backend cycles idle "); } -static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg) +static void print_branch_misses(int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) { double total, ratio = 0.0; const char *color; @@ -687,7 +649,9 @@ static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double fprintf(output, " of all branches "); } -static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, double avg) +static void print_l1_dcache_misses(int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) { double total, ratio = 0.0; const char *color; @@ -704,7 +668,9 @@ static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, dou fprintf(output, " of all L1-dcache hits "); } -static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg) +static void print_l1_icache_misses(int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) { double total, ratio = 0.0; const char *color; @@ -721,7 +687,9 @@ static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, dou fprintf(output, " of all L1-icache hits "); } -static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) +static void print_dtlb_cache_misses(int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) { double total, ratio = 0.0; const char *color; @@ -738,7 +706,9 @@ static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, do fprintf(output, " of all dTLB cache hits "); } -static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) +static void print_itlb_cache_misses(int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) { double total, ratio = 0.0; const char *color; @@ -755,7 +725,9 @@ static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, do fprintf(output, " of all iTLB cache hits "); } -static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) +static void print_ll_cache_misses(int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) { double total, ratio = 0.0; const char *color; @@ -788,7 +760,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) if (no_aggr) sprintf(cpustr, "CPU%*d%s", csv_output ? 0 : -4, - evsel_list->cpus->map[cpu], csv_sep); + perf_evsel__cpus(evsel)->map[cpu], csv_sep); else cpu = 0; @@ -949,14 +921,14 @@ static void print_counter(struct perf_evsel *counter) u64 ena, run, val; int cpu; - for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) { + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { val = counter->counts->cpu[cpu].val; ena = counter->counts->cpu[cpu].ena; run = counter->counts->cpu[cpu].run; if (run == 0 || ena == 0) { fprintf(output, "CPU%*d%s%*s%s%*s", csv_output ? 0 : -4, - evsel_list->cpus->map[cpu], csv_sep, + perf_evsel__cpus(counter)->map[cpu], csv_sep, csv_output ? 0 : 18, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, csv_sep, @@ -1061,8 +1033,8 @@ static const char * const stat_usage[] = { NULL }; -static int stat__set_big_num(const struct option *opt __used, - const char *s __used, int unset) +static int stat__set_big_num(const struct option *opt __maybe_unused, + const char *s __maybe_unused, int unset) { big_num_opt = unset ? 0 : 1; return 0; @@ -1156,7 +1128,7 @@ static int add_default_attributes(void) return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); } -int cmd_stat(int argc, const char **argv, const char *prefix __used) +int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) { struct perf_evsel *pos; int status = -ENOMEM; @@ -1192,7 +1164,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) output = fopen(output_name, mode); if (!output) { perror("failed to create output file"); - exit(-1); + return -1; } clock_gettime(CLOCK_REALTIME, &tm); fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); @@ -1255,7 +1227,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) list_for_each_entry(pos, &evsel_list->entries, node) { if (perf_evsel__alloc_stat_priv(pos) < 0 || - perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0) + perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0) goto out_free_fd; } diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 1d592f5cbea..484f26cc0c0 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -14,11 +14,13 @@ #include "util/symbol.h" #include "util/thread_map.h" #include "util/pmu.h" +#include "event-parse.h" #include "../../include/linux/hw_breakpoint.h" #include <sys/mman.h> -static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym) +static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused, + struct symbol *sym) { bool *visited = symbol__priv(sym); *visited = true; @@ -294,7 +296,7 @@ static int test__open_syscall_event(void) goto out_thread_map_delete; } - if (perf_evsel__open_per_thread(evsel, threads, false, NULL) < 0) { + if (perf_evsel__open_per_thread(evsel, threads) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", strerror(errno)); @@ -369,7 +371,7 @@ static int test__open_syscall_event_on_all_cpus(void) goto out_thread_map_delete; } - if (perf_evsel__open(evsel, cpus, threads, false, NULL) < 0) { + if (perf_evsel__open(evsel, cpus, threads) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", strerror(errno)); @@ -533,7 +535,7 @@ static int test__basic_mmap(void) perf_evlist__add(evlist, evsels[i]); - if (perf_evsel__open(evsels[i], cpus, threads, false, NULL) < 0) { + if (perf_evsel__open(evsels[i], cpus, threads) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", strerror(errno)); @@ -562,7 +564,7 @@ static int test__basic_mmap(void) goto out_munmap; } - err = perf_evlist__parse_sample(evlist, event, &sample, false); + err = perf_evlist__parse_sample(evlist, event, &sample); if (err) { pr_err("Can't parse sample, err = %d\n", err); goto out_munmap; @@ -710,7 +712,7 @@ static int test__PERF_RECORD(void) /* * Config the evsels, setting attr->comm on the first one, etc. */ - evsel = list_entry(evlist->entries.next, struct perf_evsel, node); + evsel = perf_evlist__first(evlist); evsel->attr.sample_type |= PERF_SAMPLE_CPU; evsel->attr.sample_type |= PERF_SAMPLE_TID; evsel->attr.sample_type |= PERF_SAMPLE_TIME; @@ -737,7 +739,7 @@ static int test__PERF_RECORD(void) * Call sys_perf_event_open on all the fds on all the evsels, * grouping them if asked to. */ - err = perf_evlist__open(evlist, opts.group); + err = perf_evlist__open(evlist); if (err < 0) { pr_debug("perf_evlist__open: %s\n", strerror(errno)); goto out_delete_evlist; @@ -779,7 +781,7 @@ static int test__PERF_RECORD(void) if (type < PERF_RECORD_MAX) nr_events[type]++; - err = perf_evlist__parse_sample(evlist, event, &sample, false); + err = perf_evlist__parse_sample(evlist, event, &sample); if (err < 0) { if (verbose) perf_event__fprintf(event, stderr); @@ -996,7 +998,9 @@ static u64 mmap_read_self(void *addr) /* * If the RDPMC instruction faults then signal this back to the test parent task: */ -static void segfault_handler(int sig __used, siginfo_t *info __used, void *uc __used) +static void segfault_handler(int sig __maybe_unused, + siginfo_t *info __maybe_unused, + void *uc __maybe_unused) { exit(-1); } @@ -1023,14 +1027,16 @@ static int __test__rdpmc(void) fd = sys_perf_event_open(&attr, 0, -1, -1, 0); if (fd < 0) { - die("Error: sys_perf_event_open() syscall returned " - "with %d (%s)\n", fd, strerror(errno)); + pr_err("Error: sys_perf_event_open() syscall returned " + "with %d (%s)\n", fd, strerror(errno)); + return -1; } addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0); if (addr == (void *)(-1)) { - die("Error: mmap() syscall returned " - "with (%s)\n", strerror(errno)); + pr_err("Error: mmap() syscall returned with (%s)\n", + strerror(errno)); + goto out_close; } for (n = 0; n < 6; n++) { @@ -1051,9 +1057,9 @@ static int __test__rdpmc(void) } munmap(addr, page_size); - close(fd); - pr_debug(" "); +out_close: + close(fd); if (!delta_sum) return -1; @@ -1092,6 +1098,309 @@ static int test__perf_pmu(void) return perf_pmu__test(); } +static int perf_evsel__roundtrip_cache_name_test(void) +{ + char name[128]; + int type, op, err = 0, ret = 0, i, idx; + struct perf_evsel *evsel; + struct perf_evlist *evlist = perf_evlist__new(NULL, NULL); + + if (evlist == NULL) + return -ENOMEM; + + for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { + for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { + /* skip invalid cache type */ + if (!perf_evsel__is_cache_op_valid(type, op)) + continue; + + for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { + __perf_evsel__hw_cache_type_op_res_name(type, op, i, + name, sizeof(name)); + err = parse_events(evlist, name, 0); + if (err) + ret = err; + } + } + } + + idx = 0; + evsel = perf_evlist__first(evlist); + + for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { + for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { + /* skip invalid cache type */ + if (!perf_evsel__is_cache_op_valid(type, op)) + continue; + + for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { + __perf_evsel__hw_cache_type_op_res_name(type, op, i, + name, sizeof(name)); + if (evsel->idx != idx) + continue; + + ++idx; + + if (strcmp(perf_evsel__name(evsel), name)) { + pr_debug("%s != %s\n", perf_evsel__name(evsel), name); + ret = -1; + } + + evsel = perf_evsel__next(evsel); + } + } + } + + perf_evlist__delete(evlist); + return ret; +} + +static int __perf_evsel__name_array_test(const char *names[], int nr_names) +{ + int i, err; + struct perf_evsel *evsel; + struct perf_evlist *evlist = perf_evlist__new(NULL, NULL); + + if (evlist == NULL) + return -ENOMEM; + + for (i = 0; i < nr_names; ++i) { + err = parse_events(evlist, names[i], 0); + if (err) { + pr_debug("failed to parse event '%s', err %d\n", + names[i], err); + goto out_delete_evlist; + } + } + + err = 0; + list_for_each_entry(evsel, &evlist->entries, node) { + if (strcmp(perf_evsel__name(evsel), names[evsel->idx])) { + --err; + pr_debug("%s != %s\n", perf_evsel__name(evsel), names[evsel->idx]); + } + } + +out_delete_evlist: + perf_evlist__delete(evlist); + return err; +} + +#define perf_evsel__name_array_test(names) \ + __perf_evsel__name_array_test(names, ARRAY_SIZE(names)) + +static int perf_evsel__roundtrip_name_test(void) +{ + int err = 0, ret = 0; + + err = perf_evsel__name_array_test(perf_evsel__hw_names); + if (err) + ret = err; + + err = perf_evsel__name_array_test(perf_evsel__sw_names); + if (err) + ret = err; + + err = perf_evsel__roundtrip_cache_name_test(); + if (err) + ret = err; + + return ret; +} + +static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name, + int size, bool should_be_signed) +{ + struct format_field *field = perf_evsel__field(evsel, name); + int is_signed; + int ret = 0; + + if (field == NULL) { + pr_debug("%s: \"%s\" field not found!\n", evsel->name, name); + return -1; + } + + is_signed = !!(field->flags | FIELD_IS_SIGNED); + if (should_be_signed && !is_signed) { + pr_debug("%s: \"%s\" signedness(%d) is wrong, should be %d\n", + evsel->name, name, is_signed, should_be_signed); + ret = -1; + } + + if (field->size != size) { + pr_debug("%s: \"%s\" size (%d) should be %d!\n", + evsel->name, name, field->size, size); + ret = -1; + } + + return ret; +} + +static int perf_evsel__tp_sched_test(void) +{ + struct perf_evsel *evsel = perf_evsel__newtp("sched", "sched_switch", 0); + int ret = 0; + + if (evsel == NULL) { + pr_debug("perf_evsel__new\n"); + return -1; + } + + if (perf_evsel__test_field(evsel, "prev_comm", 16, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "prev_pid", 4, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "prev_prio", 4, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "prev_state", 8, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "next_comm", 16, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "next_pid", 4, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "next_prio", 4, true)) + ret = -1; + + perf_evsel__delete(evsel); + + evsel = perf_evsel__newtp("sched", "sched_wakeup", 0); + + if (perf_evsel__test_field(evsel, "comm", 16, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "pid", 4, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "prio", 4, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "success", 4, true)) + ret = -1; + + if (perf_evsel__test_field(evsel, "target_cpu", 4, true)) + ret = -1; + + return ret; +} + +static int test__syscall_open_tp_fields(void) +{ + struct perf_record_opts opts = { + .target = { + .uid = UINT_MAX, + .uses_mmap = true, + }, + .no_delay = true, + .freq = 1, + .mmap_pages = 256, + .raw_samples = true, + }; + const char *filename = "/etc/passwd"; + int flags = O_RDONLY | O_DIRECTORY; + struct perf_evlist *evlist = perf_evlist__new(NULL, NULL); + struct perf_evsel *evsel; + int err = -1, i, nr_events = 0, nr_polls = 0; + + if (evlist == NULL) { + pr_debug("%s: perf_evlist__new\n", __func__); + goto out; + } + + evsel = perf_evsel__newtp("syscalls", "sys_enter_open", 0); + if (evsel == NULL) { + pr_debug("%s: perf_evsel__newtp\n", __func__); + goto out_delete_evlist; + } + + perf_evlist__add(evlist, evsel); + + err = perf_evlist__create_maps(evlist, &opts.target); + if (err < 0) { + pr_debug("%s: perf_evlist__create_maps\n", __func__); + goto out_delete_evlist; + } + + perf_evsel__config(evsel, &opts, evsel); + + evlist->threads->map[0] = getpid(); + + err = perf_evlist__open(evlist); + if (err < 0) { + pr_debug("perf_evlist__open: %s\n", strerror(errno)); + goto out_delete_evlist; + } + + err = perf_evlist__mmap(evlist, UINT_MAX, false); + if (err < 0) { + pr_debug("perf_evlist__mmap: %s\n", strerror(errno)); + goto out_delete_evlist; + } + + perf_evlist__enable(evlist); + + /* + * Generate the event: + */ + open(filename, flags); + + while (1) { + int before = nr_events; + + for (i = 0; i < evlist->nr_mmaps; i++) { + union perf_event *event; + + while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { + const u32 type = event->header.type; + int tp_flags; + struct perf_sample sample; + + ++nr_events; + + if (type != PERF_RECORD_SAMPLE) + continue; + + err = perf_evsel__parse_sample(evsel, event, &sample); + if (err) { + pr_err("Can't parse sample, err = %d\n", err); + goto out_munmap; + } + + tp_flags = perf_evsel__intval(evsel, &sample, "flags"); + + if (flags != tp_flags) { + pr_debug("%s: Expected flags=%#x, got %#x\n", + __func__, flags, tp_flags); + goto out_munmap; + } + + goto out_ok; + } + } + + if (nr_events == before) + poll(evlist->pollfd, evlist->nr_fds, 10); + + if (++nr_polls > 5) { + pr_debug("%s: no events!\n", __func__); + goto out_munmap; + } + } +out_ok: + err = 0; +out_munmap: + perf_evlist__munmap(evlist); +out_delete_evlist: + perf_evlist__delete(evlist); +out: + return err; +} + static struct test { const char *desc; int (*func)(void); @@ -1135,6 +1444,18 @@ static struct test { .func = dso__test_data, }, { + .desc = "roundtrip evsel->name check", + .func = perf_evsel__roundtrip_name_test, + }, + { + .desc = "Check parsing of sched tracepoints fields", + .func = perf_evsel__tp_sched_test, + }, + { + .desc = "Generate and check syscalls:sys_enter_open event fields", + .func = test__syscall_open_tp_fields, + }, + { .func = NULL, }, }; @@ -1199,7 +1520,7 @@ static int perf_test__list(int argc, const char **argv) return 0; } -int cmd_test(int argc, const char **argv, const char *prefix __used) +int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused) { const char * const test_usage[] = { "perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]", diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 3b75b2e21ea..b1a8a3b841c 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -168,9 +168,8 @@ static struct per_pid *find_create_pid(int pid) return cursor; cursor = cursor->next; } - cursor = malloc(sizeof(struct per_pid)); + cursor = zalloc(sizeof(*cursor)); assert(cursor != NULL); - memset(cursor, 0, sizeof(struct per_pid)); cursor->pid = pid; cursor->next = all_data; all_data = cursor; @@ -195,9 +194,8 @@ static void pid_set_comm(int pid, char *comm) } c = c->next; } - c = malloc(sizeof(struct per_pidcomm)); + c = zalloc(sizeof(*c)); assert(c != NULL); - memset(c, 0, sizeof(struct per_pidcomm)); c->comm = strdup(comm); p->current = c; c->next = p->all; @@ -239,17 +237,15 @@ pid_put_sample(int pid, int type, unsigned int cpu, u64 start, u64 end) p = find_create_pid(pid); c = p->current; if (!c) { - c = malloc(sizeof(struct per_pidcomm)); + c = zalloc(sizeof(*c)); assert(c != NULL); - memset(c, 0, sizeof(struct per_pidcomm)); p->current = c; c->next = p->all; p->all = c; } - sample = malloc(sizeof(struct cpu_sample)); + sample = zalloc(sizeof(*sample)); assert(sample != NULL); - memset(sample, 0, sizeof(struct cpu_sample)); sample->start_time = start; sample->end_time = end; sample->type = type; @@ -275,28 +271,28 @@ static int cpus_cstate_state[MAX_CPUS]; static u64 cpus_pstate_start_times[MAX_CPUS]; static u64 cpus_pstate_state[MAX_CPUS]; -static int process_comm_event(struct perf_tool *tool __used, +static int process_comm_event(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __used, - struct machine *machine __used) + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) { pid_set_comm(event->comm.tid, event->comm.comm); return 0; } -static int process_fork_event(struct perf_tool *tool __used, +static int process_fork_event(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __used, - struct machine *machine __used) + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) { pid_fork(event->fork.pid, event->fork.ppid, event->fork.time); return 0; } -static int process_exit_event(struct perf_tool *tool __used, +static int process_exit_event(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __used, - struct machine *machine __used) + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) { pid_exit(event->fork.pid, event->fork.time); return 0; @@ -373,11 +369,10 @@ static void c_state_start(int cpu, u64 timestamp, int state) static void c_state_end(int cpu, u64 timestamp) { - struct power_event *pwr; - pwr = malloc(sizeof(struct power_event)); + struct power_event *pwr = zalloc(sizeof(*pwr)); + if (!pwr) return; - memset(pwr, 0, sizeof(struct power_event)); pwr->state = cpus_cstate_state[cpu]; pwr->start_time = cpus_cstate_start_times[cpu]; @@ -392,14 +387,13 @@ static void c_state_end(int cpu, u64 timestamp) static void p_state_change(int cpu, u64 timestamp, u64 new_freq) { struct power_event *pwr; - pwr = malloc(sizeof(struct power_event)); if (new_freq > 8000000) /* detect invalid data */ return; + pwr = zalloc(sizeof(*pwr)); if (!pwr) return; - memset(pwr, 0, sizeof(struct power_event)); pwr->state = cpus_pstate_state[cpu]; pwr->start_time = cpus_pstate_start_times[cpu]; @@ -429,15 +423,13 @@ static void p_state_change(int cpu, u64 timestamp, u64 new_freq) static void sched_wakeup(int cpu, u64 timestamp, int pid, struct trace_entry *te) { - struct wake_event *we; struct per_pid *p; struct wakeup_entry *wake = (void *)te; + struct wake_event *we = zalloc(sizeof(*we)); - we = malloc(sizeof(struct wake_event)); if (!we) return; - memset(we, 0, sizeof(struct wake_event)); we->time = timestamp; we->waker = pid; @@ -491,11 +483,11 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te) } -static int process_sample_event(struct perf_tool *tool __used, - union perf_event *event __used, +static int process_sample_event(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, struct perf_sample *sample, struct perf_evsel *evsel, - struct machine *machine __used) + struct machine *machine __maybe_unused) { struct trace_entry *te; @@ -579,13 +571,12 @@ static void end_sample_processing(void) struct power_event *pwr; for (cpu = 0; cpu <= numcpus; cpu++) { - pwr = malloc(sizeof(struct power_event)); + /* C state */ +#if 0 + pwr = zalloc(sizeof(*pwr)); if (!pwr) return; - memset(pwr, 0, sizeof(struct power_event)); - /* C state */ -#if 0 pwr->state = cpus_cstate_state[cpu]; pwr->start_time = cpus_cstate_start_times[cpu]; pwr->end_time = last_time; @@ -597,10 +588,9 @@ static void end_sample_processing(void) #endif /* P state */ - pwr = malloc(sizeof(struct power_event)); + pwr = zalloc(sizeof(*pwr)); if (!pwr) return; - memset(pwr, 0, sizeof(struct power_event)); pwr->state = cpus_pstate_state[cpu]; pwr->start_time = cpus_pstate_start_times[cpu]; @@ -830,11 +820,9 @@ static void draw_process_bars(void) static void add_process_filter(const char *string) { - struct process_filter *filt; - int pid; + int pid = strtoull(string, NULL, 10); + struct process_filter *filt = malloc(sizeof(*filt)); - pid = strtoull(string, NULL, 10); - filt = malloc(sizeof(struct process_filter)); if (!filt) return; @@ -1081,7 +1069,8 @@ static int __cmd_record(int argc, const char **argv) } static int -parse_process(const struct option *opt __used, const char *arg, int __used unset) +parse_process(const struct option *opt __maybe_unused, const char *arg, + int __maybe_unused unset) { if (arg) add_process_filter(arg); @@ -1106,7 +1095,8 @@ static const struct option options[] = { }; -int cmd_timechart(int argc, const char **argv, const char *prefix __used) +int cmd_timechart(int argc, const char **argv, + const char *prefix __maybe_unused) { argc = parse_options(argc, argv, options, timechart_usage, PARSE_OPT_STOP_AT_NON_OPTION); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 68cd61ef6ac..e434a16bb5a 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -95,7 +95,8 @@ static void perf_top__update_print_entries(struct perf_top *top) top->print_entries -= 9; } -static void perf_top__sig_winch(int sig __used, siginfo_t *info __used, void *arg) +static void perf_top__sig_winch(int sig __maybe_unused, + siginfo_t *info __maybe_unused, void *arg) { struct perf_top *top = arg; @@ -509,7 +510,7 @@ static void perf_top__handle_keypress(struct perf_top *top, int c) prompt_integer(&counter, "Enter details event counter"); if (counter >= top->evlist->nr_entries) { - top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node); + top->sym_evsel = perf_evlist__first(top->evlist); fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel)); sleep(1); break; @@ -518,7 +519,7 @@ static void perf_top__handle_keypress(struct perf_top *top, int c) if (top->sym_evsel->idx == counter) break; } else - top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node); + top->sym_evsel = perf_evlist__first(top->evlist); break; case 'f': prompt_integer(&top->count_filter, "Enter display event count filter"); @@ -663,7 +664,7 @@ static const char *skip_symbols[] = { NULL }; -static int symbol_filter(struct map *map __used, struct symbol *sym) +static int symbol_filter(struct map *map __maybe_unused, struct symbol *sym) { const char *name = sym->name; int i; @@ -783,8 +784,10 @@ static void perf_event__process_sample(struct perf_tool *tool, if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { - err = machine__resolve_callchain(machine, al.thread, - sample->callchain, &parent); + err = machine__resolve_callchain(machine, evsel, + al.thread, sample, + &parent); + if (err) return; } @@ -820,7 +823,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) int ret; while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) { - ret = perf_evlist__parse_sample(top->evlist, event, &sample, false); + ret = perf_evlist__parse_sample(top->evlist, event, &sample); if (ret) { pr_err("Can't parse sample, err = %d\n", ret); continue; @@ -884,17 +887,14 @@ static void perf_top__mmap_read(struct perf_top *top) static void perf_top__start_counters(struct perf_top *top) { - struct perf_evsel *counter, *first; + struct perf_evsel *counter; struct perf_evlist *evlist = top->evlist; - first = list_entry(evlist->entries.next, struct perf_evsel, node); + if (top->group) + perf_evlist__set_leader(evlist); list_for_each_entry(counter, &evlist->entries, node) { struct perf_event_attr *attr = &counter->attr; - struct xyarray *group_fd = NULL; - - if (top->group && counter != first) - group_fd = first->fd; attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; @@ -925,8 +925,7 @@ retry_sample_id: attr->sample_id_all = top->sample_id_all_missing ? 0 : 1; try_again: if (perf_evsel__open(counter, top->evlist->cpus, - top->evlist->threads, top->group, - group_fd) < 0) { + top->evlist->threads) < 0) { int err = errno; if (err == EPERM || err == EACCES) { @@ -1165,7 +1164,7 @@ static const char * const top_usage[] = { NULL }; -int cmd_top(int argc, const char **argv, const char *prefix __used) +int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) { struct perf_evsel *pos; int status; @@ -1328,7 +1327,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) pos->attr.sample_period = top.default_interval; } - top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node); + top.sym_evsel = perf_evlist__first(top.evlist); symbol_conf.priv_size = sizeof(struct annotation); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c new file mode 100644 index 00000000000..8f113dab8bf --- /dev/null +++ b/tools/perf/builtin-trace.c @@ -0,0 +1,310 @@ +#include "builtin.h" +#include "util/evlist.h" +#include "util/parse-options.h" +#include "util/thread_map.h" +#include "event-parse.h" + +#include <libaudit.h> +#include <stdlib.h> + +static struct syscall_fmt { + const char *name; + const char *alias; + bool errmsg; + bool timeout; +} syscall_fmts[] = { + { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, + { .name = "fstat", .errmsg = true, .alias = "newfstat", }, + { .name = "fstatat", .errmsg = true, .alias = "newfstatat", }, + { .name = "futex", .errmsg = true, }, + { .name = "poll", .errmsg = true, .timeout = true, }, + { .name = "ppoll", .errmsg = true, .timeout = true, }, + { .name = "read", .errmsg = true, }, + { .name = "recvfrom", .errmsg = true, }, + { .name = "select", .errmsg = true, .timeout = true, }, + { .name = "stat", .errmsg = true, .alias = "newstat", }, +}; + +static int syscall_fmt__cmp(const void *name, const void *fmtp) +{ + const struct syscall_fmt *fmt = fmtp; + return strcmp(name, fmt->name); +} + +static struct syscall_fmt *syscall_fmt__find(const char *name) +{ + const int nmemb = ARRAY_SIZE(syscall_fmts); + return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); +} + +struct syscall { + struct event_format *tp_format; + const char *name; + struct syscall_fmt *fmt; +}; + +struct trace { + int audit_machine; + struct { + int max; + struct syscall *table; + } syscalls; + struct perf_record_opts opts; +}; + +static int trace__read_syscall_info(struct trace *trace, int id) +{ + char tp_name[128]; + struct syscall *sc; + + if (id > trace->syscalls.max) { + struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); + + if (nsyscalls == NULL) + return -1; + + if (trace->syscalls.max != -1) { + memset(nsyscalls + trace->syscalls.max + 1, 0, + (id - trace->syscalls.max) * sizeof(*sc)); + } else { + memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); + } + + trace->syscalls.table = nsyscalls; + trace->syscalls.max = id; + } + + sc = trace->syscalls.table + id; + sc->name = audit_syscall_to_name(id, trace->audit_machine); + if (sc->name == NULL) + return -1; + + sc->fmt = syscall_fmt__find(sc->name); + + snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); + sc->tp_format = event_format__new("syscalls", tp_name); + + if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) { + snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); + sc->tp_format = event_format__new("syscalls", tp_name); + } + + return sc->tp_format != NULL ? 0 : -1; +} + +static size_t syscall__fprintf_args(struct syscall *sc, unsigned long *args, FILE *fp) +{ + int i = 0; + size_t printed = 0; + + if (sc->tp_format != NULL) { + struct format_field *field; + + for (field = sc->tp_format->format.fields->next; field; field = field->next) { + printed += fprintf(fp, "%s%s: %ld", printed ? ", " : "", + field->name, args[i++]); + } + } else { + while (i < 6) { + printed += fprintf(fp, "%sarg%d: %ld", printed ? ", " : "", i, args[i]); + ++i; + } + } + + return printed; +} + +static int trace__run(struct trace *trace) +{ + struct perf_evlist *evlist = perf_evlist__new(NULL, NULL); + struct perf_evsel *evsel, *evsel_enter, *evsel_exit; + int err = -1, i, nr_events = 0, before; + + if (evlist == NULL) { + printf("Not enough memory to run!\n"); + goto out; + } + + evsel_enter = perf_evsel__newtp("raw_syscalls", "sys_enter", 0); + if (evsel_enter == NULL) { + printf("Couldn't read the raw_syscalls:sys_enter tracepoint information!\n"); + goto out_delete_evlist; + } + + perf_evlist__add(evlist, evsel_enter); + + evsel_exit = perf_evsel__newtp("raw_syscalls", "sys_exit", 1); + if (evsel_exit == NULL) { + printf("Couldn't read the raw_syscalls:sys_exit tracepoint information!\n"); + goto out_delete_evlist; + } + + perf_evlist__add(evlist, evsel_exit); + + err = perf_evlist__create_maps(evlist, &trace->opts.target); + if (err < 0) { + printf("Problems parsing the target to trace, check your options!\n"); + goto out_delete_evlist; + } + + perf_evlist__config_attrs(evlist, &trace->opts); + + err = perf_evlist__open(evlist); + if (err < 0) { + printf("Couldn't create the events: %s\n", strerror(errno)); + goto out_delete_evlist; + } + + err = perf_evlist__mmap(evlist, UINT_MAX, false); + if (err < 0) { + printf("Couldn't mmap the events: %s\n", strerror(errno)); + goto out_delete_evlist; + } + + perf_evlist__enable(evlist); +again: + before = nr_events; + + for (i = 0; i < evlist->nr_mmaps; i++) { + union perf_event *event; + + while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { + const u32 type = event->header.type; + struct syscall *sc; + struct perf_sample sample; + int id; + + ++nr_events; + + switch (type) { + case PERF_RECORD_SAMPLE: + break; + case PERF_RECORD_LOST: + printf("LOST %" PRIu64 " events!\n", event->lost.lost); + continue; + default: + printf("Unexpected %s event, skipping...\n", + perf_event__name(type)); + continue; + } + + err = perf_evlist__parse_sample(evlist, event, &sample); + if (err) { + printf("Can't parse sample, err = %d, skipping...\n", err); + continue; + } + + evsel = perf_evlist__id2evsel(evlist, sample.id); + if (evsel == NULL) { + printf("Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); + continue; + } + + id = perf_evsel__intval(evsel, &sample, "id"); + if (id < 0) { + printf("Invalid syscall %d id, skipping...\n", id); + continue; + } + + if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && + trace__read_syscall_info(trace, id)) + continue; + + if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) + continue; + + sc = &trace->syscalls.table[id]; + + if (evlist->threads->map[0] == -1 || evlist->threads->nr > 1) + printf("%d ", sample.tid); + + if (evsel == evsel_enter) { + void *args = perf_evsel__rawptr(evsel, &sample, "args"); + + printf("%s(", sc->name); + syscall__fprintf_args(sc, args, stdout); + } else if (evsel == evsel_exit) { + int ret = perf_evsel__intval(evsel, &sample, "ret"); + + if (ret < 0 && sc->fmt && sc->fmt->errmsg) { + char bf[256]; + const char *emsg = strerror_r(-ret, bf, sizeof(bf)), + *e = audit_errno_to_name(-ret); + + printf(") = -1 %s %s", e, emsg); + } else if (ret == 0 && sc->fmt && sc->fmt->timeout) + printf(") = 0 Timeout"); + else + printf(") = %d", ret); + + putchar('\n'); + } + } + } + + if (nr_events == before) + poll(evlist->pollfd, evlist->nr_fds, -1); + + goto again; + +out_delete_evlist: + perf_evlist__delete(evlist); +out: + return err; +} + +int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) +{ + const char * const trace_usage[] = { + "perf trace [<options>]", + NULL + }; + struct trace trace = { + .audit_machine = audit_detect_machine(), + .syscalls = { + . max = -1, + }, + .opts = { + .target = { + .uid = UINT_MAX, + .uses_mmap = true, + }, + .user_freq = UINT_MAX, + .user_interval = ULLONG_MAX, + .no_delay = true, + .mmap_pages = 1024, + }, + }; + const struct option trace_options[] = { + OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", + "trace events on existing process id"), + OPT_STRING(0, "tid", &trace.opts.target.tid, "tid", + "trace events on existing thread id"), + OPT_BOOLEAN(0, "all-cpus", &trace.opts.target.system_wide, + "system-wide collection from all CPUs"), + OPT_STRING(0, "cpu", &trace.opts.target.cpu_list, "cpu", + "list of cpus to monitor"), + OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, + "child tasks do not inherit counters"), + OPT_UINTEGER(0, "mmap-pages", &trace.opts.mmap_pages, + "number of mmap data pages"), + OPT_STRING(0, "uid", &trace.opts.target.uid_str, "user", + "user to profile"), + OPT_END() + }; + int err; + + argc = parse_options(argc, argv, trace_options, trace_usage, 0); + if (argc) + usage_with_options(trace_usage, trace_options); + + err = perf_target__parse_uid(&trace.opts.target); + if (err) { + char bf[BUFSIZ]; + perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); + printf("%s", bf); + return err; + } + + return trace__run(&trace); +} diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index b382bd551aa..08143bd854c 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -34,6 +34,8 @@ extern int cmd_kmem(int argc, const char **argv, const char *prefix); extern int cmd_lock(int argc, const char **argv, const char *prefix); extern int cmd_kvm(int argc, const char **argv, const char *prefix); extern int cmd_test(int argc, const char **argv, const char *prefix); +extern int cmd_trace(int argc, const char **argv, const char *prefix); extern int cmd_inject(int argc, const char **argv, const char *prefix); +extern int find_scripts(char **scripts_array, char **scripts_path_array); #endif diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index d695fe40fbf..3e86bbd8c2d 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -17,8 +17,9 @@ perf-report mainporcelain common perf-stat mainporcelain common perf-timechart mainporcelain common perf-top mainporcelain common +perf-trace mainporcelain common perf-script mainporcelain common -perf-probe mainporcelain common +perf-probe mainporcelain full perf-kmem mainporcelain common perf-lock mainporcelain common perf-kvm mainporcelain common diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak index 6c18785a641..4add41bb0c7 100644 --- a/tools/perf/config/feature-tests.mak +++ b/tools/perf/config/feature-tests.mak @@ -154,3 +154,53 @@ int main(void) return 0; } endef + +ifndef NO_LIBUNWIND +define SOURCE_LIBUNWIND +#include <libunwind.h> +#include <stdlib.h> + +extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, + unw_word_t ip, + unw_dyn_info_t *di, + unw_proc_info_t *pi, + int need_unwind_info, void *arg); + + +#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) + +int main(void) +{ + unw_addr_space_t addr_space; + addr_space = unw_create_addr_space(NULL, 0); + unw_init_remote(NULL, addr_space, NULL); + dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL); + return 0; +} +endef +endif + +ifndef NO_BACKTRACE +define SOURCE_BACKTRACE +#include <execinfo.h> +#include <stdio.h> + +int main(void) +{ + backtrace(NULL, 0); + backtrace_symbols(NULL, 0); + return 0; +} +endef +endif + +ifndef NO_LIBAUDIT +define SOURCE_LIBAUDIT +#include <libaudit.h> + +int main(void) +{ + return audit_open(); +} +endef +endif
\ No newline at end of file diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh index 95b6f8b6177..e9193062026 100644 --- a/tools/perf/perf-archive.sh +++ b/tools/perf/perf-archive.sh @@ -24,7 +24,7 @@ NOBUILDID=0000000000000000000000000000000000000000 perf buildid-list -i $PERF_DATA --with-hits | grep -v "^$NOBUILDID " > $BUILDIDS if [ ! -s $BUILDIDS ] ; then echo "perf archive: no build-ids found" - rm -f $BUILDIDS + rm $BUILDIDS || true exit 1 fi @@ -39,8 +39,8 @@ while read build_id ; do echo ${filename#$PERF_BUILDID_LINKDIR} >> $MANIFEST done -tar cfj $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST -rm -f $MANIFEST $BUILDIDS +tar cjf $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST +rm $MANIFEST $BUILDIDS || true echo -e "Now please run:\n" echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n" echo "wherever you need to run 'perf report' on." diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 2b2e225a4d4..fc2f770e302 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -14,6 +14,7 @@ #include "util/run-command.h" #include "util/parse-events.h" #include "util/debugfs.h" +#include <pthread.h> const char perf_usage_string[] = "perf [--version] [--help] COMMAND [ARGS]"; @@ -24,6 +25,42 @@ const char perf_more_info_string[] = int use_browser = -1; static int use_pager = -1; +struct cmd_struct { + const char *cmd; + int (*fn)(int, const char **, const char *); + int option; +}; + +static struct cmd_struct commands[] = { + { "buildid-cache", cmd_buildid_cache, 0 }, + { "buildid-list", cmd_buildid_list, 0 }, + { "diff", cmd_diff, 0 }, + { "evlist", cmd_evlist, 0 }, + { "help", cmd_help, 0 }, + { "list", cmd_list, 0 }, + { "record", cmd_record, 0 }, + { "report", cmd_report, 0 }, + { "bench", cmd_bench, 0 }, + { "stat", cmd_stat, 0 }, + { "timechart", cmd_timechart, 0 }, + { "top", cmd_top, 0 }, + { "annotate", cmd_annotate, 0 }, + { "version", cmd_version, 0 }, + { "script", cmd_script, 0 }, + { "sched", cmd_sched, 0 }, +#ifndef NO_LIBELF_SUPPORT + { "probe", cmd_probe, 0 }, +#endif + { "kmem", cmd_kmem, 0 }, + { "lock", cmd_lock, 0 }, + { "kvm", cmd_kvm, 0 }, + { "test", cmd_test, 0 }, +#ifndef NO_LIBAUDIT_SUPPORT + { "trace", cmd_trace, 0 }, +#endif + { "inject", cmd_inject, 0 }, +}; + struct pager_config { const char *cmd; int val; @@ -160,6 +197,14 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) fprintf(stderr, "dir: %s\n", debugfs_mountpoint); if (envchanged) *envchanged = 1; + } else if (!strcmp(cmd, "--list-cmds")) { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(commands); i++) { + struct cmd_struct *p = commands+i; + printf("%s ", p->cmd); + } + exit(0); } else { fprintf(stderr, "Unknown option: %s\n", cmd); usage(perf_usage_string); @@ -245,12 +290,6 @@ const char perf_version_string[] = PERF_VERSION; */ #define NEED_WORK_TREE (1<<2) -struct cmd_struct { - const char *cmd; - int (*fn)(int, const char **, const char *); - int option; -}; - static int run_builtin(struct cmd_struct *p, int argc, const char **argv) { int status; @@ -296,30 +335,6 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) static void handle_internal_command(int argc, const char **argv) { const char *cmd = argv[0]; - static struct cmd_struct commands[] = { - { "buildid-cache", cmd_buildid_cache, 0 }, - { "buildid-list", cmd_buildid_list, 0 }, - { "diff", cmd_diff, 0 }, - { "evlist", cmd_evlist, 0 }, - { "help", cmd_help, 0 }, - { "list", cmd_list, 0 }, - { "record", cmd_record, 0 }, - { "report", cmd_report, 0 }, - { "bench", cmd_bench, 0 }, - { "stat", cmd_stat, 0 }, - { "timechart", cmd_timechart, 0 }, - { "top", cmd_top, 0 }, - { "annotate", cmd_annotate, 0 }, - { "version", cmd_version, 0 }, - { "script", cmd_script, 0 }, - { "sched", cmd_sched, 0 }, - { "probe", cmd_probe, 0 }, - { "kmem", cmd_kmem, 0 }, - { "lock", cmd_lock, 0 }, - { "kvm", cmd_kvm, 0 }, - { "test", cmd_test, 0 }, - { "inject", cmd_inject, 0 }, - }; unsigned int i; static const char ext[] = STRIP_EXTENSION; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index f960ccb2edc..87f4ec6d1f3 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -209,9 +209,15 @@ void pthread__unblock_sigwinch(void); #include "util/target.h" +enum perf_call_graph_mode { + CALLCHAIN_NONE, + CALLCHAIN_FP, + CALLCHAIN_DWARF +}; + struct perf_record_opts { struct perf_target target; - bool call_graph; + int call_graph; bool group; bool inherit_stat; bool no_delay; @@ -230,6 +236,7 @@ struct perf_record_opts { u64 branch_stack; u64 default_interval; u64 user_interval; + u16 stack_dump_size; }; #endif diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py new file mode 100755 index 00000000000..9e0985794e2 --- /dev/null +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py @@ -0,0 +1,94 @@ +# EventClass.py +# +# This is a library defining some events types classes, which could +# be used by other scripts to analyzing the perf samples. +# +# Currently there are just a few classes defined for examples, +# PerfEvent is the base class for all perf event sample, PebsEvent +# is a HW base Intel x86 PEBS event, and user could add more SW/HW +# event classes based on requirements. + +import struct + +# Event types, user could add more here +EVTYPE_GENERIC = 0 +EVTYPE_PEBS = 1 # Basic PEBS event +EVTYPE_PEBS_LL = 2 # PEBS event with load latency info +EVTYPE_IBS = 3 + +# +# Currently we don't have good way to tell the event type, but by +# the size of raw buffer, raw PEBS event with load latency data's +# size is 176 bytes, while the pure PEBS event's size is 144 bytes. +# +def create_event(name, comm, dso, symbol, raw_buf): + if (len(raw_buf) == 144): + event = PebsEvent(name, comm, dso, symbol, raw_buf) + elif (len(raw_buf) == 176): + event = PebsNHM(name, comm, dso, symbol, raw_buf) + else: + event = PerfEvent(name, comm, dso, symbol, raw_buf) + + return event + +class PerfEvent(object): + event_num = 0 + def __init__(self, name, comm, dso, symbol, raw_buf, ev_type=EVTYPE_GENERIC): + self.name = name + self.comm = comm + self.dso = dso + self.symbol = symbol + self.raw_buf = raw_buf + self.ev_type = ev_type + PerfEvent.event_num += 1 + + def show(self): + print "PMU event: name=%12s, symbol=%24s, comm=%8s, dso=%12s" % (self.name, self.symbol, self.comm, self.dso) + +# +# Basic Intel PEBS (Precise Event-based Sampling) event, whose raw buffer +# contains the context info when that event happened: the EFLAGS and +# linear IP info, as well as all the registers. +# +class PebsEvent(PerfEvent): + pebs_num = 0 + def __init__(self, name, comm, dso, symbol, raw_buf, ev_type=EVTYPE_PEBS): + tmp_buf=raw_buf[0:80] + flags, ip, ax, bx, cx, dx, si, di, bp, sp = struct.unpack('QQQQQQQQQQ', tmp_buf) + self.flags = flags + self.ip = ip + self.ax = ax + self.bx = bx + self.cx = cx + self.dx = dx + self.si = si + self.di = di + self.bp = bp + self.sp = sp + + PerfEvent.__init__(self, name, comm, dso, symbol, raw_buf, ev_type) + PebsEvent.pebs_num += 1 + del tmp_buf + +# +# Intel Nehalem and Westmere support PEBS plus Load Latency info which lie +# in the four 64 bit words write after the PEBS data: +# Status: records the IA32_PERF_GLOBAL_STATUS register value +# DLA: Data Linear Address (EIP) +# DSE: Data Source Encoding, where the latency happens, hit or miss +# in L1/L2/L3 or IO operations +# LAT: the actual latency in cycles +# +class PebsNHM(PebsEvent): + pebs_nhm_num = 0 + def __init__(self, name, comm, dso, symbol, raw_buf, ev_type=EVTYPE_PEBS_LL): + tmp_buf=raw_buf[144:176] + status, dla, dse, lat = struct.unpack('QQQQ', tmp_buf) + self.status = status + self.dla = dla + self.dse = dse + self.lat = lat + + PebsEvent.__init__(self, name, comm, dso, symbol, raw_buf, ev_type) + PebsNHM.pebs_nhm_num += 1 + del tmp_buf diff --git a/tools/perf/scripts/python/bin/event_analyzing_sample-record b/tools/perf/scripts/python/bin/event_analyzing_sample-record new file mode 100644 index 00000000000..5ce652dabd0 --- /dev/null +++ b/tools/perf/scripts/python/bin/event_analyzing_sample-record @@ -0,0 +1,8 @@ +#!/bin/bash + +# +# event_analyzing_sample.py can cover all type of perf samples including +# the tracepoints, so no special record requirements, just record what +# you want to analyze. +# +perf record $@ diff --git a/tools/perf/scripts/python/bin/event_analyzing_sample-report b/tools/perf/scripts/python/bin/event_analyzing_sample-report new file mode 100644 index 00000000000..0941fc94e15 --- /dev/null +++ b/tools/perf/scripts/python/bin/event_analyzing_sample-report @@ -0,0 +1,3 @@ +#!/bin/bash +# description: analyze all perf samples +perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/event_analyzing_sample.py diff --git a/tools/perf/scripts/python/event_analyzing_sample.py b/tools/perf/scripts/python/event_analyzing_sample.py new file mode 100644 index 00000000000..163c39fa12d --- /dev/null +++ b/tools/perf/scripts/python/event_analyzing_sample.py @@ -0,0 +1,189 @@ +# event_analyzing_sample.py: general event handler in python +# +# Current perf report is already very powerful with the annotation integrated, +# and this script is not trying to be as powerful as perf report, but +# providing end user/developer a flexible way to analyze the events other +# than trace points. +# +# The 2 database related functions in this script just show how to gather +# the basic information, and users can modify and write their own functions +# according to their specific requirement. +# +# The first function "show_general_events" just does a basic grouping for all +# generic events with the help of sqlite, and the 2nd one "show_pebs_ll" is +# for a x86 HW PMU event: PEBS with load latency data. +# + +import os +import sys +import math +import struct +import sqlite3 + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from EventClass import * + +# +# If the perf.data has a big number of samples, then the insert operation +# will be very time consuming (about 10+ minutes for 10000 samples) if the +# .db database is on disk. Move the .db file to RAM based FS to speedup +# the handling, which will cut the time down to several seconds. +# +con = sqlite3.connect("/dev/shm/perf.db") +con.isolation_level = None + +def trace_begin(): + print "In trace_begin:\n" + + # + # Will create several tables at the start, pebs_ll is for PEBS data with + # load latency info, while gen_events is for general event. + # + con.execute(""" + create table if not exists gen_events ( + name text, + symbol text, + comm text, + dso text + );""") + con.execute(""" + create table if not exists pebs_ll ( + name text, + symbol text, + comm text, + dso text, + flags integer, + ip integer, + status integer, + dse integer, + dla integer, + lat integer + );""") + +# +# Create and insert event object to a database so that user could +# do more analysis with simple database commands. +# +def process_event(param_dict): + event_attr = param_dict["attr"] + sample = param_dict["sample"] + raw_buf = param_dict["raw_buf"] + comm = param_dict["comm"] + name = param_dict["ev_name"] + + # Symbol and dso info are not always resolved + if (param_dict.has_key("dso")): + dso = param_dict["dso"] + else: + dso = "Unknown_dso" + + if (param_dict.has_key("symbol")): + symbol = param_dict["symbol"] + else: + symbol = "Unknown_symbol" + + # Create the event object and insert it to the right table in database + event = create_event(name, comm, dso, symbol, raw_buf) + insert_db(event) + +def insert_db(event): + if event.ev_type == EVTYPE_GENERIC: + con.execute("insert into gen_events values(?, ?, ?, ?)", + (event.name, event.symbol, event.comm, event.dso)) + elif event.ev_type == EVTYPE_PEBS_LL: + event.ip &= 0x7fffffffffffffff + event.dla &= 0x7fffffffffffffff + con.execute("insert into pebs_ll values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + (event.name, event.symbol, event.comm, event.dso, event.flags, + event.ip, event.status, event.dse, event.dla, event.lat)) + +def trace_end(): + print "In trace_end:\n" + # We show the basic info for the 2 type of event classes + show_general_events() + show_pebs_ll() + con.close() + +# +# As the event number may be very big, so we can't use linear way +# to show the histogram in real number, but use a log2 algorithm. +# + +def num2sym(num): + # Each number will have at least one '#' + snum = '#' * (int)(math.log(num, 2) + 1) + return snum + +def show_general_events(): + + # Check the total record number in the table + count = con.execute("select count(*) from gen_events") + for t in count: + print "There is %d records in gen_events table" % t[0] + if t[0] == 0: + return + + print "Statistics about the general events grouped by thread/symbol/dso: \n" + + # Group by thread + commq = con.execute("select comm, count(comm) from gen_events group by comm order by -count(comm)") + print "\n%16s %8s %16s\n%s" % ("comm", "number", "histogram", "="*42) + for row in commq: + print "%16s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by symbol + print "\n%32s %8s %16s\n%s" % ("symbol", "number", "histogram", "="*58) + symbolq = con.execute("select symbol, count(symbol) from gen_events group by symbol order by -count(symbol)") + for row in symbolq: + print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by dso + print "\n%40s %8s %16s\n%s" % ("dso", "number", "histogram", "="*74) + dsoq = con.execute("select dso, count(dso) from gen_events group by dso order by -count(dso)") + for row in dsoq: + print "%40s %8d %s" % (row[0], row[1], num2sym(row[1])) + +# +# This function just shows the basic info, and we could do more with the +# data in the tables, like checking the function parameters when some +# big latency events happen. +# +def show_pebs_ll(): + + count = con.execute("select count(*) from pebs_ll") + for t in count: + print "There is %d records in pebs_ll table" % t[0] + if t[0] == 0: + return + + print "Statistics about the PEBS Load Latency events grouped by thread/symbol/dse/latency: \n" + + # Group by thread + commq = con.execute("select comm, count(comm) from pebs_ll group by comm order by -count(comm)") + print "\n%16s %8s %16s\n%s" % ("comm", "number", "histogram", "="*42) + for row in commq: + print "%16s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by symbol + print "\n%32s %8s %16s\n%s" % ("symbol", "number", "histogram", "="*58) + symbolq = con.execute("select symbol, count(symbol) from pebs_ll group by symbol order by -count(symbol)") + for row in symbolq: + print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by dse + dseq = con.execute("select dse, count(dse) from pebs_ll group by dse order by -count(dse)") + print "\n%32s %8s %16s\n%s" % ("dse", "number", "histogram", "="*58) + for row in dseq: + print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by latency + latq = con.execute("select lat, count(lat) from pebs_ll group by lat order by lat") + print "\n%32s %8s %16s\n%s" % ("latency", "number", "histogram", "="*58) + for row in latq: + print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) + +def trace_unhandled(event_name, context, event_fields_dict): + print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())]) diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 1818a531f1d..4aeb7d5df93 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -269,7 +269,7 @@ int ui_browser__show(struct ui_browser *browser, const char *title, return err ? 0 : -1; } -void ui_browser__hide(struct ui_browser *browser __used) +void ui_browser__hide(struct ui_browser *browser __maybe_unused) { pthread_mutex_lock(&ui__lock); ui_helpline__pop(); @@ -518,7 +518,7 @@ static struct ui_browser__colorset { static int ui_browser__color_config(const char *var, const char *value, - void *data __used) + void *data __maybe_unused) { char *fg = NULL, *bg; int i; @@ -602,7 +602,8 @@ void __ui_browser__vline(struct ui_browser *browser, unsigned int column, SLsmg_set_char_set(0); } -void ui_browser__write_graph(struct ui_browser *browser __used, int graph) +void ui_browser__write_graph(struct ui_browser *browser __maybe_unused, + int graph) { SLsmg_set_char_set(1); SLsmg_write_char(graph); diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 67a2703e666..8f8cd2d73b3 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -54,7 +54,8 @@ static inline struct browser_disasm_line *disasm_line__browser(struct disasm_lin return (struct browser_disasm_line *)(dl + 1); } -static bool disasm_line__filter(struct ui_browser *browser __used, void *entry) +static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, + void *entry) { if (annotate_browser__opts.hide_src_code) { struct disasm_line *dl = list_entry(entry, struct disasm_line, node); @@ -928,7 +929,8 @@ static int annotate_config__cmp(const void *name, const void *cfgp) return strcmp(name, cfg->name); } -static int annotate__config(const char *var, const char *value, void *data __used) +static int annotate__config(const char *var, const char *value, + void *data __maybe_unused) { struct annotate__config *cfg; const char *name; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 413bd62eedb..a21f40bebba 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -24,9 +24,12 @@ struct hist_browser { struct hist_entry *he_selection; struct map_symbol *selection; int print_seq; + bool show_dso; bool has_symbols; }; +extern void hist_browser__init_hpp(void); + static int hists__browser_title(struct hists *hists, char *bf, size_t size, const char *ev_name); @@ -376,12 +379,19 @@ out: } static char *callchain_list__sym_name(struct callchain_list *cl, - char *bf, size_t bfsize) + char *bf, size_t bfsize, bool show_dso) { + int printed; + if (cl->ms.sym) - return cl->ms.sym->name; + printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name); + else + printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip); + + if (show_dso) + scnprintf(bf + printed, bfsize - printed, " %s", + cl->ms.map ? cl->ms.map->dso->short_name : "unknown"); - snprintf(bf, bfsize, "%#" PRIx64, cl->ip); return bf; } @@ -417,7 +427,7 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *browse remaining -= cumul; list_for_each_entry(chain, &child->val, list) { - char ipstr[BITS_PER_LONG / 4 + 1], *alloc_str; + char bf[1024], *alloc_str; const char *str; int color; bool was_first = first; @@ -434,7 +444,8 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *browse } alloc_str = NULL; - str = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); + str = callchain_list__sym_name(chain, bf, sizeof(bf), + browser->show_dso); if (was_first) { double percent = cumul * 100.0 / new_total; @@ -493,7 +504,7 @@ static int hist_browser__show_callchain_node(struct hist_browser *browser, char folded_sign = ' '; list_for_each_entry(chain, &node->val, list) { - char ipstr[BITS_PER_LONG / 4 + 1], *s; + char bf[1024], *s; int color; folded_sign = callchain_list__folded(chain); @@ -510,7 +521,8 @@ static int hist_browser__show_callchain_node(struct hist_browser *browser, *is_current_entry = true; } - s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); + s = callchain_list__sym_name(chain, bf, sizeof(bf), + browser->show_dso); ui_browser__gotorc(&browser->b, row, 0); ui_browser__set_color(&browser->b, color); slsmg_write_nstring(" ", offset); @@ -553,14 +565,47 @@ static int hist_browser__show_callchain(struct hist_browser *browser, return row - first_row; } +#define HPP__COLOR_FN(_name, _field) \ +static int hist_browser__hpp_color_ ## _name(struct perf_hpp *hpp, \ + struct hist_entry *he) \ +{ \ + double percent = 100.0 * he->_field / hpp->total_period; \ + *(double *)hpp->ptr = percent; \ + return scnprintf(hpp->buf, hpp->size, "%6.2f%%", percent); \ +} + +HPP__COLOR_FN(overhead, period) +HPP__COLOR_FN(overhead_sys, period_sys) +HPP__COLOR_FN(overhead_us, period_us) +HPP__COLOR_FN(overhead_guest_sys, period_guest_sys) +HPP__COLOR_FN(overhead_guest_us, period_guest_us) + +#undef HPP__COLOR_FN + +void hist_browser__init_hpp(void) +{ + perf_hpp__init(false, false); + + perf_hpp__format[PERF_HPP__OVERHEAD].color = + hist_browser__hpp_color_overhead; + perf_hpp__format[PERF_HPP__OVERHEAD_SYS].color = + hist_browser__hpp_color_overhead_sys; + perf_hpp__format[PERF_HPP__OVERHEAD_US].color = + hist_browser__hpp_color_overhead_us; + perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].color = + hist_browser__hpp_color_overhead_guest_sys; + perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color = + hist_browser__hpp_color_overhead_guest_us; +} + static int hist_browser__show_entry(struct hist_browser *browser, struct hist_entry *entry, unsigned short row) { char s[256]; double percent; - int printed = 0; - int width = browser->b.width - 6; /* The percentage */ + int i, printed = 0; + int width = browser->b.width; char folded_sign = ' '; bool current_entry = ui_browser__is_current_entry(&browser->b, row); off_t row_offset = entry->row_offset; @@ -576,35 +621,50 @@ static int hist_browser__show_entry(struct hist_browser *browser, } if (row_offset == 0) { - hist_entry__snprintf(entry, s, sizeof(s), browser->hists); - percent = (entry->period * 100.0) / browser->hists->stats.total_period; + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + .total_period = browser->hists->stats.total_period, + }; - ui_browser__set_percent_color(&browser->b, percent, current_entry); ui_browser__gotorc(&browser->b, row, 0); - if (symbol_conf.use_callchain) { - slsmg_printf("%c ", folded_sign); - width -= 2; - } - slsmg_printf(" %5.2f%%", percent); + for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { + if (!perf_hpp__format[i].cond) + continue; - /* The scroll bar isn't being used */ - if (!browser->b.navkeypressed) - width += 1; + if (i) { + slsmg_printf(" "); + width -= 2; + } - if (!current_entry || !browser->b.navkeypressed) - ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL); + if (perf_hpp__format[i].color) { + hpp.ptr = &percent; + /* It will set percent for us. See HPP__COLOR_FN above. */ + width -= perf_hpp__format[i].color(&hpp, entry); - if (symbol_conf.show_nr_samples) { - slsmg_printf(" %11u", entry->nr_events); - width -= 12; - } + ui_browser__set_percent_color(&browser->b, percent, current_entry); + + if (i == 0 && symbol_conf.use_callchain) { + slsmg_printf("%c ", folded_sign); + width -= 2; + } + + slsmg_printf("%s", s); - if (symbol_conf.show_total_period) { - slsmg_printf(" %12" PRIu64, entry->period); - width -= 13; + if (!current_entry || !browser->b.navkeypressed) + ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL); + } else { + width -= perf_hpp__format[i].entry(&hpp, entry); + slsmg_printf("%s", s); + } } + /* The scroll bar isn't being used */ + if (!browser->b.navkeypressed) + width += 1; + + hist_entry__sort_snprintf(entry, s, sizeof(s), browser->hists); slsmg_write_nstring(s, width); ++row; ++printed; @@ -830,7 +890,7 @@ static int hist_browser__fprintf_callchain_node_rb_tree(struct hist_browser *bro remaining -= cumul; list_for_each_entry(chain, &child->val, list) { - char ipstr[BITS_PER_LONG / 4 + 1], *alloc_str; + char bf[1024], *alloc_str; const char *str; bool was_first = first; @@ -842,7 +902,8 @@ static int hist_browser__fprintf_callchain_node_rb_tree(struct hist_browser *bro folded_sign = callchain_list__folded(chain); alloc_str = NULL; - str = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); + str = callchain_list__sym_name(chain, bf, sizeof(bf), + browser->show_dso); if (was_first) { double percent = cumul * 100.0 / new_total; @@ -880,10 +941,10 @@ static int hist_browser__fprintf_callchain_node(struct hist_browser *browser, int printed = 0; list_for_each_entry(chain, &node->val, list) { - char ipstr[BITS_PER_LONG / 4 + 1], *s; + char bf[1024], *s; folded_sign = callchain_list__folded(chain); - s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); + s = callchain_list__sym_name(chain, bf, sizeof(bf), browser->show_dso); printed += fprintf(fp, "%*s%c %s\n", offset, " ", folded_sign, s); } @@ -920,7 +981,7 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, if (symbol_conf.use_callchain) folded_sign = hist_entry__folded(he); - hist_entry__snprintf(he, s, sizeof(s), browser->hists); + hist_entry__sort_snprintf(he, s, sizeof(s), browser->hists); percent = (he->period * 100.0) / browser->hists->stats.total_period; if (symbol_conf.use_callchain) @@ -1133,6 +1194,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, continue; case 'd': goto zoom_dso; + case 'V': + browser->show_dso = !browser->show_dso; + continue; case 't': goto zoom_thread; case '/': @@ -1164,6 +1228,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, "d Zoom into current DSO\n" "t Zoom into current Thread\n" "P Print histograms to perf.hist.N\n" + "V Verbose (DSO names in callchains, etc)\n" "/ Filter symbol by name"); continue; case K_ENTER: diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c index ec12e0b4ded..7ff99ec1d95 100644 --- a/tools/perf/ui/gtk/browser.c +++ b/tools/perf/ui/gtk/browser.c @@ -3,6 +3,7 @@ #include "../evsel.h" #include "../sort.h" #include "../hist.h" +#include "../helpline.h" #include "gtk.h" #include <signal.h> @@ -35,6 +36,57 @@ static void perf_gtk__resize_window(GtkWidget *window) gtk_window_resize(GTK_WINDOW(window), width, height); } +static const char *perf_gtk__get_percent_color(double percent) +{ + if (percent >= MIN_RED) + return "<span fgcolor='red'>"; + if (percent >= MIN_GREEN) + return "<span fgcolor='dark green'>"; + return NULL; +} + +#define HPP__COLOR_FN(_name, _field) \ +static int perf_gtk__hpp_color_ ## _name(struct perf_hpp *hpp, \ + struct hist_entry *he) \ +{ \ + double percent = 100.0 * he->_field / hpp->total_period; \ + const char *markup; \ + int ret = 0; \ + \ + markup = perf_gtk__get_percent_color(percent); \ + if (markup) \ + ret += scnprintf(hpp->buf, hpp->size, "%s", markup); \ + ret += scnprintf(hpp->buf + ret, hpp->size - ret, "%6.2f%%", percent); \ + if (markup) \ + ret += scnprintf(hpp->buf + ret, hpp->size - ret, "</span>"); \ + \ + return ret; \ +} + +HPP__COLOR_FN(overhead, period) +HPP__COLOR_FN(overhead_sys, period_sys) +HPP__COLOR_FN(overhead_us, period_us) +HPP__COLOR_FN(overhead_guest_sys, period_guest_sys) +HPP__COLOR_FN(overhead_guest_us, period_guest_us) + +#undef HPP__COLOR_FN + +void perf_gtk__init_hpp(void) +{ + perf_hpp__init(false, false); + + perf_hpp__format[PERF_HPP__OVERHEAD].color = + perf_gtk__hpp_color_overhead; + perf_hpp__format[PERF_HPP__OVERHEAD_SYS].color = + perf_gtk__hpp_color_overhead_sys; + perf_hpp__format[PERF_HPP__OVERHEAD_US].color = + perf_gtk__hpp_color_overhead_us; + perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].color = + perf_gtk__hpp_color_overhead_guest_sys; + perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color = + perf_gtk__hpp_color_overhead_guest_us; +} + static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists) { GType col_types[MAX_COLUMNS]; @@ -42,15 +94,25 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists) struct sort_entry *se; GtkListStore *store; struct rb_node *nd; - u64 total_period; GtkWidget *view; - int col_idx; + int i, col_idx; int nr_cols; + char s[512]; + + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + .total_period = hists->stats.total_period, + }; nr_cols = 0; - /* The percentage column */ - col_types[nr_cols++] = G_TYPE_STRING; + for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { + if (!perf_hpp__format[i].cond) + continue; + + col_types[nr_cols++] = G_TYPE_STRING; + } list_for_each_entry(se, &hist_entry__sort_list, list) { if (se->elide) @@ -67,11 +129,17 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists) col_idx = 0; - /* The percentage column */ - gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), - -1, "Overhead (%)", - renderer, "text", - col_idx++, NULL); + for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { + if (!perf_hpp__format[i].cond) + continue; + + perf_hpp__format[i].header(&hpp); + + gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), + -1, s, + renderer, "markup", + col_idx++, NULL); + } list_for_each_entry(se, &hist_entry__sort_list, list) { if (se->elide) @@ -87,13 +155,9 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists) g_object_unref(GTK_TREE_MODEL(store)); - total_period = hists->stats.total_period; - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); GtkTreeIter iter; - double percent; - char s[512]; if (h->filtered) continue; @@ -102,11 +166,17 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists) col_idx = 0; - percent = (h->period * 100.0) / total_period; + for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { + if (!perf_hpp__format[i].cond) + continue; - snprintf(s, ARRAY_SIZE(s), "%.2f", percent); + if (perf_hpp__format[i].color) + perf_hpp__format[i].color(&hpp, h); + else + perf_hpp__format[i].entry(&hpp, h); - gtk_list_store_set(store, &iter, col_idx++, s, -1); + gtk_list_store_set(store, &iter, col_idx++, s, -1); + } list_for_each_entry(se, &hist_entry__sort_list, list) { if (se->elide) @@ -166,9 +236,10 @@ static GtkWidget *perf_gtk__setup_statusbar(void) } int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, - const char *help __used, - void (*timer) (void *arg)__used, - void *arg __used, int delay_secs __used) + const char *help, + void (*timer) (void *arg)__maybe_unused, + void *arg __maybe_unused, + int delay_secs __maybe_unused) { struct perf_evsel *pos; GtkWidget *vbox; @@ -233,6 +304,8 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER); + ui_helpline__push(help); + gtk_main(); perf_gtk__deactivate_context(&pgctx); diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h index a4d0f2b4a2d..687af0bba18 100644 --- a/tools/perf/ui/gtk/gtk.h +++ b/tools/perf/ui/gtk/gtk.h @@ -29,6 +29,9 @@ static inline bool perf_gtk__is_active_context(struct perf_gtk_context *ctx) struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window); int perf_gtk__deactivate_context(struct perf_gtk_context **ctx); +void perf_gtk__init_helpline(void); +void perf_gtk__init_hpp(void); + #ifndef HAVE_GTK_INFO_BAR static inline GtkWidget *perf_gtk__setup_info_bar(void) { diff --git a/tools/perf/ui/gtk/helpline.c b/tools/perf/ui/gtk/helpline.c new file mode 100644 index 00000000000..5db4432ff12 --- /dev/null +++ b/tools/perf/ui/gtk/helpline.c @@ -0,0 +1,56 @@ +#include <stdio.h> +#include <string.h> + +#include "gtk.h" +#include "../ui.h" +#include "../helpline.h" +#include "../../util/debug.h" + +static void gtk_helpline_pop(void) +{ + if (!perf_gtk__is_active_context(pgctx)) + return; + + gtk_statusbar_pop(GTK_STATUSBAR(pgctx->statbar), + pgctx->statbar_ctx_id); +} + +static void gtk_helpline_push(const char *msg) +{ + if (!perf_gtk__is_active_context(pgctx)) + return; + + gtk_statusbar_push(GTK_STATUSBAR(pgctx->statbar), + pgctx->statbar_ctx_id, msg); +} + +static struct ui_helpline gtk_helpline_fns = { + .pop = gtk_helpline_pop, + .push = gtk_helpline_push, +}; + +void perf_gtk__init_helpline(void) +{ + helpline_fns = >k_helpline_fns; +} + +int perf_gtk__show_helpline(const char *fmt, va_list ap) +{ + int ret; + char *ptr; + static int backlog; + + ret = vscnprintf(ui_helpline__current + backlog, + sizeof(ui_helpline__current) - backlog, fmt, ap); + backlog += ret; + + /* only first line can be displayed */ + ptr = strchr(ui_helpline__current, '\n'); + if (ptr && (ptr - ui_helpline__current) <= backlog) { + *ptr = '\0'; + ui_helpline__puts(ui_helpline__current); + backlog = 0; + } + + return ret; +} diff --git a/tools/perf/ui/gtk/setup.c b/tools/perf/ui/gtk/setup.c index 92879ce61e2..3c4c6ef7828 100644 --- a/tools/perf/ui/gtk/setup.c +++ b/tools/perf/ui/gtk/setup.c @@ -7,11 +7,15 @@ extern struct perf_error_ops perf_gtk_eops; int perf_gtk__init(void) { perf_error__register(&perf_gtk_eops); + perf_gtk__init_helpline(); + perf_gtk__init_hpp(); return gtk_init_check(NULL, NULL) ? 0 : -1; } -void perf_gtk__exit(bool wait_for_ok __used) +void perf_gtk__exit(bool wait_for_ok __maybe_unused) { + if (!perf_gtk__is_active_context(pgctx)) + return; perf_error__unregister(&perf_gtk_eops); gtk_main_quit(); } diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c index 0ead373c0df..8aada5b3c04 100644 --- a/tools/perf/ui/gtk/util.c +++ b/tools/perf/ui/gtk/util.c @@ -117,13 +117,8 @@ struct perf_error_ops perf_gtk_eops = { * For now, just add stubs for NO_NEWT=1 build. */ #ifdef NO_NEWT_SUPPORT -int ui_helpline__show_help(const char *format __used, va_list ap __used) -{ - return 0; -} - -void ui_progress__update(u64 curr __used, u64 total __used, - const char *title __used) +void ui_progress__update(u64 curr __maybe_unused, u64 total __maybe_unused, + const char *title __maybe_unused) { } #endif diff --git a/tools/perf/ui/helpline.c b/tools/perf/ui/helpline.c index 2f950c2641c..a49bcf3c190 100644 --- a/tools/perf/ui/helpline.c +++ b/tools/perf/ui/helpline.c @@ -5,23 +5,32 @@ #include "../debug.h" #include "helpline.h" #include "ui.h" -#include "libslang.h" -void ui_helpline__pop(void) +char ui_helpline__current[512]; + +static void nop_helpline__pop(void) { } -char ui_helpline__current[512]; +static void nop_helpline__push(const char *msg __maybe_unused) +{ +} -void ui_helpline__push(const char *msg) +static struct ui_helpline default_helpline_fns = { + .pop = nop_helpline__pop, + .push = nop_helpline__push, +}; + +struct ui_helpline *helpline_fns = &default_helpline_fns; + +void ui_helpline__pop(void) { - const size_t sz = sizeof(ui_helpline__current); + helpline_fns->pop(); +} - SLsmg_gotorc(SLtt_Screen_Rows - 1, 0); - SLsmg_set_color(0); - SLsmg_write_nstring((char *)msg, SLtt_Screen_Cols); - SLsmg_refresh(); - strncpy(ui_helpline__current, msg, sz)[sz - 1] = '\0'; +void ui_helpline__push(const char *msg) +{ + helpline_fns->push(msg); } void ui_helpline__vpush(const char *fmt, va_list ap) @@ -50,30 +59,3 @@ void ui_helpline__puts(const char *msg) ui_helpline__pop(); ui_helpline__push(msg); } - -void ui_helpline__init(void) -{ - ui_helpline__puts(" "); -} - -char ui_helpline__last_msg[1024]; - -int ui_helpline__show_help(const char *format, va_list ap) -{ - int ret; - static int backlog; - - pthread_mutex_lock(&ui__lock); - ret = vscnprintf(ui_helpline__last_msg + backlog, - sizeof(ui_helpline__last_msg) - backlog, format, ap); - backlog += ret; - - if (ui_helpline__last_msg[backlog - 1] == '\n') { - ui_helpline__puts(ui_helpline__last_msg); - SLsmg_refresh(); - backlog = 0; - } - pthread_mutex_unlock(&ui__lock); - - return ret; -} diff --git a/tools/perf/ui/helpline.h b/tools/perf/ui/helpline.h index 7bab6b34e35..2b667ee454c 100644 --- a/tools/perf/ui/helpline.h +++ b/tools/perf/ui/helpline.h @@ -4,13 +4,44 @@ #include <stdio.h> #include <stdarg.h> +#include "../util/cache.h" + +struct ui_helpline { + void (*pop)(void); + void (*push)(const char *msg); +}; + +extern struct ui_helpline *helpline_fns; + void ui_helpline__init(void); + void ui_helpline__pop(void); void ui_helpline__push(const char *msg); void ui_helpline__vpush(const char *fmt, va_list ap); void ui_helpline__fpush(const char *fmt, ...); void ui_helpline__puts(const char *msg); -extern char ui_helpline__current[]; +extern char ui_helpline__current[512]; + +#ifdef NO_NEWT_SUPPORT +static inline int ui_helpline__show_help(const char *format __maybe_unused, + va_list ap __maybe_unused) +{ + return 0; +} +#else +extern char ui_helpline__last_msg[]; +int ui_helpline__show_help(const char *format, va_list ap); +#endif /* NO_NEWT_SUPPORT */ + +#ifdef NO_GTK2_SUPPORT +static inline int perf_gtk__show_helpline(const char *format __maybe_unused, + va_list ap __maybe_unused) +{ + return 0; +} +#else +int perf_gtk__show_helpline(const char *format, va_list ap); +#endif /* NO_GTK2_SUPPORT */ #endif /* _PERF_UI_HELPLINE_H_ */ diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c new file mode 100644 index 00000000000..e3f8cd46e7d --- /dev/null +++ b/tools/perf/ui/hist.c @@ -0,0 +1,390 @@ +#include <math.h> + +#include "../util/hist.h" +#include "../util/util.h" +#include "../util/sort.h" + + +/* hist period print (hpp) functions */ +static int hpp__header_overhead(struct perf_hpp *hpp) +{ + const char *fmt = hpp->ptr ? "Baseline" : "Overhead"; + + return scnprintf(hpp->buf, hpp->size, fmt); +} + +static int hpp__width_overhead(struct perf_hpp *hpp __maybe_unused) +{ + return 8; +} + +static int hpp__color_overhead(struct perf_hpp *hpp, struct hist_entry *he) +{ + double percent = 100.0 * he->period / hpp->total_period; + + if (hpp->ptr) { + struct hists *old_hists = hpp->ptr; + u64 total_period = old_hists->stats.total_period; + u64 base_period = he->pair ? he->pair->period : 0; + + if (total_period) + percent = 100.0 * base_period / total_period; + else + percent = 0.0; + } + + return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%%", percent); +} + +static int hpp__entry_overhead(struct perf_hpp *hpp, struct hist_entry *he) +{ + double percent = 100.0 * he->period / hpp->total_period; + const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%%"; + + if (hpp->ptr) { + struct hists *old_hists = hpp->ptr; + u64 total_period = old_hists->stats.total_period; + u64 base_period = he->pair ? he->pair->period : 0; + + if (total_period) + percent = 100.0 * base_period / total_period; + else + percent = 0.0; + } + + return scnprintf(hpp->buf, hpp->size, fmt, percent); +} + +static int hpp__header_overhead_sys(struct perf_hpp *hpp) +{ + const char *fmt = symbol_conf.field_sep ? "%s" : "%7s"; + + return scnprintf(hpp->buf, hpp->size, fmt, "sys"); +} + +static int hpp__width_overhead_sys(struct perf_hpp *hpp __maybe_unused) +{ + return 7; +} + +static int hpp__color_overhead_sys(struct perf_hpp *hpp, struct hist_entry *he) +{ + double percent = 100.0 * he->period_sys / hpp->total_period; + return percent_color_snprintf(hpp->buf, hpp->size, "%6.2f%%", percent); +} + +static int hpp__entry_overhead_sys(struct perf_hpp *hpp, struct hist_entry *he) +{ + double percent = 100.0 * he->period_sys / hpp->total_period; + const char *fmt = symbol_conf.field_sep ? "%.2f" : "%6.2f%%"; + + return scnprintf(hpp->buf, hpp->size, fmt, percent); +} + +static int hpp__header_overhead_us(struct perf_hpp *hpp) +{ + const char *fmt = symbol_conf.field_sep ? "%s" : "%7s"; + + return scnprintf(hpp->buf, hpp->size, fmt, "user"); +} + +static int hpp__width_overhead_us(struct perf_hpp *hpp __maybe_unused) +{ + return 7; +} + +static int hpp__color_overhead_us(struct perf_hpp *hpp, struct hist_entry *he) +{ + double percent = 100.0 * he->period_us / hpp->total_period; + return percent_color_snprintf(hpp->buf, hpp->size, "%6.2f%%", percent); +} + +static int hpp__entry_overhead_us(struct perf_hpp *hpp, struct hist_entry *he) +{ + double percent = 100.0 * he->period_us / hpp->total_period; + const char *fmt = symbol_conf.field_sep ? "%.2f" : "%6.2f%%"; + + return scnprintf(hpp->buf, hpp->size, fmt, percent); +} + +static int hpp__header_overhead_guest_sys(struct perf_hpp *hpp) +{ + return scnprintf(hpp->buf, hpp->size, "guest sys"); +} + +static int hpp__width_overhead_guest_sys(struct perf_hpp *hpp __maybe_unused) +{ + return 9; +} + +static int hpp__color_overhead_guest_sys(struct perf_hpp *hpp, + struct hist_entry *he) +{ + double percent = 100.0 * he->period_guest_sys / hpp->total_period; + return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%% ", percent); +} + +static int hpp__entry_overhead_guest_sys(struct perf_hpp *hpp, + struct hist_entry *he) +{ + double percent = 100.0 * he->period_guest_sys / hpp->total_period; + const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%% "; + + return scnprintf(hpp->buf, hpp->size, fmt, percent); +} + +static int hpp__header_overhead_guest_us(struct perf_hpp *hpp) +{ + return scnprintf(hpp->buf, hpp->size, "guest usr"); +} + +static int hpp__width_overhead_guest_us(struct perf_hpp *hpp __maybe_unused) +{ + return 9; +} + +static int hpp__color_overhead_guest_us(struct perf_hpp *hpp, + struct hist_entry *he) +{ + double percent = 100.0 * he->period_guest_us / hpp->total_period; + return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%% ", percent); +} + +static int hpp__entry_overhead_guest_us(struct perf_hpp *hpp, + struct hist_entry *he) +{ + double percent = 100.0 * he->period_guest_us / hpp->total_period; + const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%% "; + + return scnprintf(hpp->buf, hpp->size, fmt, percent); +} + +static int hpp__header_samples(struct perf_hpp *hpp) +{ + const char *fmt = symbol_conf.field_sep ? "%s" : "%11s"; + + return scnprintf(hpp->buf, hpp->size, fmt, "Samples"); +} + +static int hpp__width_samples(struct perf_hpp *hpp __maybe_unused) +{ + return 11; +} + +static int hpp__entry_samples(struct perf_hpp *hpp, struct hist_entry *he) +{ + const char *fmt = symbol_conf.field_sep ? "%" PRIu64 : "%11" PRIu64; + + return scnprintf(hpp->buf, hpp->size, fmt, he->nr_events); +} + +static int hpp__header_period(struct perf_hpp *hpp) +{ + const char *fmt = symbol_conf.field_sep ? "%s" : "%12s"; + + return scnprintf(hpp->buf, hpp->size, fmt, "Period"); +} + +static int hpp__width_period(struct perf_hpp *hpp __maybe_unused) +{ + return 12; +} + +static int hpp__entry_period(struct perf_hpp *hpp, struct hist_entry *he) +{ + const char *fmt = symbol_conf.field_sep ? "%" PRIu64 : "%12" PRIu64; + + return scnprintf(hpp->buf, hpp->size, fmt, he->period); +} + +static int hpp__header_delta(struct perf_hpp *hpp) +{ + const char *fmt = symbol_conf.field_sep ? "%s" : "%7s"; + + return scnprintf(hpp->buf, hpp->size, fmt, "Delta"); +} + +static int hpp__width_delta(struct perf_hpp *hpp __maybe_unused) +{ + return 7; +} + +static int hpp__entry_delta(struct perf_hpp *hpp, struct hist_entry *he) +{ + struct hists *pair_hists = hpp->ptr; + u64 old_total, new_total; + double old_percent = 0, new_percent = 0; + double diff; + const char *fmt = symbol_conf.field_sep ? "%s" : "%7.7s"; + char buf[32] = " "; + + old_total = pair_hists->stats.total_period; + if (old_total > 0 && he->pair) + old_percent = 100.0 * he->pair->period / old_total; + + new_total = hpp->total_period; + if (new_total > 0) + new_percent = 100.0 * he->period / new_total; + + diff = new_percent - old_percent; + if (fabs(diff) >= 0.01) + scnprintf(buf, sizeof(buf), "%+4.2F%%", diff); + + return scnprintf(hpp->buf, hpp->size, fmt, buf); +} + +static int hpp__header_displ(struct perf_hpp *hpp) +{ + return scnprintf(hpp->buf, hpp->size, "Displ."); +} + +static int hpp__width_displ(struct perf_hpp *hpp __maybe_unused) +{ + return 6; +} + +static int hpp__entry_displ(struct perf_hpp *hpp, + struct hist_entry *he __maybe_unused) +{ + const char *fmt = symbol_conf.field_sep ? "%s" : "%6.6s"; + char buf[32] = " "; + + if (hpp->displacement) + scnprintf(buf, sizeof(buf), "%+4ld", hpp->displacement); + + return scnprintf(hpp->buf, hpp->size, fmt, buf); +} + +#define HPP__COLOR_PRINT_FNS(_name) \ + .header = hpp__header_ ## _name, \ + .width = hpp__width_ ## _name, \ + .color = hpp__color_ ## _name, \ + .entry = hpp__entry_ ## _name + +#define HPP__PRINT_FNS(_name) \ + .header = hpp__header_ ## _name, \ + .width = hpp__width_ ## _name, \ + .entry = hpp__entry_ ## _name + +struct perf_hpp_fmt perf_hpp__format[] = { + { .cond = true, HPP__COLOR_PRINT_FNS(overhead) }, + { .cond = false, HPP__COLOR_PRINT_FNS(overhead_sys) }, + { .cond = false, HPP__COLOR_PRINT_FNS(overhead_us) }, + { .cond = false, HPP__COLOR_PRINT_FNS(overhead_guest_sys) }, + { .cond = false, HPP__COLOR_PRINT_FNS(overhead_guest_us) }, + { .cond = false, HPP__PRINT_FNS(samples) }, + { .cond = false, HPP__PRINT_FNS(period) }, + { .cond = false, HPP__PRINT_FNS(delta) }, + { .cond = false, HPP__PRINT_FNS(displ) } +}; + +#undef HPP__COLOR_PRINT_FNS +#undef HPP__PRINT_FNS + +void perf_hpp__init(bool need_pair, bool show_displacement) +{ + if (symbol_conf.show_cpu_utilization) { + perf_hpp__format[PERF_HPP__OVERHEAD_SYS].cond = true; + perf_hpp__format[PERF_HPP__OVERHEAD_US].cond = true; + + if (perf_guest) { + perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].cond = true; + perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].cond = true; + } + } + + if (symbol_conf.show_nr_samples) + perf_hpp__format[PERF_HPP__SAMPLES].cond = true; + + if (symbol_conf.show_total_period) + perf_hpp__format[PERF_HPP__PERIOD].cond = true; + + if (need_pair) { + perf_hpp__format[PERF_HPP__DELTA].cond = true; + + if (show_displacement) + perf_hpp__format[PERF_HPP__DISPL].cond = true; + } +} + +static inline void advance_hpp(struct perf_hpp *hpp, int inc) +{ + hpp->buf += inc; + hpp->size -= inc; +} + +int hist_entry__period_snprintf(struct perf_hpp *hpp, struct hist_entry *he, + bool color) +{ + const char *sep = symbol_conf.field_sep; + char *start = hpp->buf; + int i, ret; + + if (symbol_conf.exclude_other && !he->parent) + return 0; + + for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { + if (!perf_hpp__format[i].cond) + continue; + + if (!sep || i > 0) { + ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: " "); + advance_hpp(hpp, ret); + } + + if (color && perf_hpp__format[i].color) + ret = perf_hpp__format[i].color(hpp, he); + else + ret = perf_hpp__format[i].entry(hpp, he); + + advance_hpp(hpp, ret); + } + + return hpp->buf - start; +} + +int hist_entry__sort_snprintf(struct hist_entry *he, char *s, size_t size, + struct hists *hists) +{ + const char *sep = symbol_conf.field_sep; + struct sort_entry *se; + int ret = 0; + + list_for_each_entry(se, &hist_entry__sort_list, list) { + if (se->elide) + continue; + + ret += scnprintf(s + ret, size - ret, "%s", sep ?: " "); + ret += se->se_snprintf(he, s + ret, size - ret, + hists__col_len(hists, se->se_width_idx)); + } + + return ret; +} + +/* + * See hists__fprintf to match the column widths + */ +unsigned int hists__sort_list_width(struct hists *hists) +{ + struct sort_entry *se; + int i, ret = 0; + + for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { + if (!perf_hpp__format[i].cond) + continue; + if (i) + ret += 2; + + ret += perf_hpp__format[i].width(NULL); + } + + list_for_each_entry(se, &hist_entry__sort_list, list) + if (!se->elide) + ret += 2 + hists__col_len(hists, se->se_width_idx); + + if (verbose) /* Addr + origin */ + ret += 3 + BITS_PER_LONG / 4; + + return ret; +} diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c index 791fb15ce35..bd7d460f844 100644 --- a/tools/perf/ui/setup.c +++ b/tools/perf/ui/setup.c @@ -1,6 +1,10 @@ -#include "../cache.h" -#include "../debug.h" +#include <pthread.h> +#include "../util/cache.h" +#include "../util/debug.h" +#include "../util/hist.h" + +pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER; void setup_browser(bool fallback_to_pager) { @@ -25,6 +29,8 @@ void setup_browser(bool fallback_to_pager) use_browser = 0; if (fallback_to_pager) setup_pager(); + + perf_hpp__init(false, false); break; } } diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c new file mode 100644 index 00000000000..882461a4283 --- /dev/null +++ b/tools/perf/ui/stdio/hist.c @@ -0,0 +1,498 @@ +#include <stdio.h> + +#include "../../util/util.h" +#include "../../util/hist.h" +#include "../../util/sort.h" + + +static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) +{ + int i; + int ret = fprintf(fp, " "); + + for (i = 0; i < left_margin; i++) + ret += fprintf(fp, " "); + + return ret; +} + +static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask, + int left_margin) +{ + int i; + size_t ret = callchain__fprintf_left_margin(fp, left_margin); + + for (i = 0; i < depth; i++) + if (depth_mask & (1 << i)) + ret += fprintf(fp, "| "); + else + ret += fprintf(fp, " "); + + ret += fprintf(fp, "\n"); + + return ret; +} + +static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, + int depth, int depth_mask, int period, + u64 total_samples, u64 hits, + int left_margin) +{ + int i; + size_t ret = 0; + + ret += callchain__fprintf_left_margin(fp, left_margin); + for (i = 0; i < depth; i++) { + if (depth_mask & (1 << i)) + ret += fprintf(fp, "|"); + else + ret += fprintf(fp, " "); + if (!period && i == depth - 1) { + double percent; + + percent = hits * 100.0 / total_samples; + ret += percent_color_fprintf(fp, "--%2.2f%%-- ", percent); + } else + ret += fprintf(fp, "%s", " "); + } + if (chain->ms.sym) + ret += fprintf(fp, "%s\n", chain->ms.sym->name); + else + ret += fprintf(fp, "0x%0" PRIx64 "\n", chain->ip); + + return ret; +} + +static struct symbol *rem_sq_bracket; +static struct callchain_list rem_hits; + +static void init_rem_hits(void) +{ + rem_sq_bracket = malloc(sizeof(*rem_sq_bracket) + 6); + if (!rem_sq_bracket) { + fprintf(stderr, "Not enough memory to display remaining hits\n"); + return; + } + + strcpy(rem_sq_bracket->name, "[...]"); + rem_hits.ms.sym = rem_sq_bracket; +} + +static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root, + u64 total_samples, int depth, + int depth_mask, int left_margin) +{ + struct rb_node *node, *next; + struct callchain_node *child; + struct callchain_list *chain; + int new_depth_mask = depth_mask; + u64 remaining; + size_t ret = 0; + int i; + uint entries_printed = 0; + + remaining = total_samples; + + node = rb_first(root); + while (node) { + u64 new_total; + u64 cumul; + + child = rb_entry(node, struct callchain_node, rb_node); + cumul = callchain_cumul_hits(child); + remaining -= cumul; + + /* + * The depth mask manages the output of pipes that show + * the depth. We don't want to keep the pipes of the current + * level for the last child of this depth. + * Except if we have remaining filtered hits. They will + * supersede the last child + */ + next = rb_next(node); + if (!next && (callchain_param.mode != CHAIN_GRAPH_REL || !remaining)) + new_depth_mask &= ~(1 << (depth - 1)); + + /* + * But we keep the older depth mask for the line separator + * to keep the level link until we reach the last child + */ + ret += ipchain__fprintf_graph_line(fp, depth, depth_mask, + left_margin); + i = 0; + list_for_each_entry(chain, &child->val, list) { + ret += ipchain__fprintf_graph(fp, chain, depth, + new_depth_mask, i++, + total_samples, + cumul, + left_margin); + } + + if (callchain_param.mode == CHAIN_GRAPH_REL) + new_total = child->children_hit; + else + new_total = total_samples; + + ret += __callchain__fprintf_graph(fp, &child->rb_root, new_total, + depth + 1, + new_depth_mask | (1 << depth), + left_margin); + node = next; + if (++entries_printed == callchain_param.print_limit) + break; + } + + if (callchain_param.mode == CHAIN_GRAPH_REL && + remaining && remaining != total_samples) { + + if (!rem_sq_bracket) + return ret; + + new_depth_mask &= ~(1 << (depth - 1)); + ret += ipchain__fprintf_graph(fp, &rem_hits, depth, + new_depth_mask, 0, total_samples, + remaining, left_margin); + } + + return ret; +} + +static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, + u64 total_samples, int left_margin) +{ + struct callchain_node *cnode; + struct callchain_list *chain; + u32 entries_printed = 0; + bool printed = false; + struct rb_node *node; + int i = 0; + int ret = 0; + + /* + * If have one single callchain root, don't bother printing + * its percentage (100 % in fractal mode and the same percentage + * than the hist in graph mode). This also avoid one level of column. + */ + node = rb_first(root); + if (node && !rb_next(node)) { + cnode = rb_entry(node, struct callchain_node, rb_node); + list_for_each_entry(chain, &cnode->val, list) { + /* + * If we sort by symbol, the first entry is the same than + * the symbol. No need to print it otherwise it appears as + * displayed twice. + */ + if (!i++ && sort__first_dimension == SORT_SYM) + continue; + if (!printed) { + ret += callchain__fprintf_left_margin(fp, left_margin); + ret += fprintf(fp, "|\n"); + ret += callchain__fprintf_left_margin(fp, left_margin); + ret += fprintf(fp, "---"); + left_margin += 3; + printed = true; + } else + ret += callchain__fprintf_left_margin(fp, left_margin); + + if (chain->ms.sym) + ret += fprintf(fp, " %s\n", chain->ms.sym->name); + else + ret += fprintf(fp, " %p\n", (void *)(long)chain->ip); + + if (++entries_printed == callchain_param.print_limit) + break; + } + root = &cnode->rb_root; + } + + ret += __callchain__fprintf_graph(fp, root, total_samples, + 1, 1, left_margin); + ret += fprintf(fp, "\n"); + + return ret; +} + +static size_t __callchain__fprintf_flat(FILE *fp, + struct callchain_node *self, + u64 total_samples) +{ + struct callchain_list *chain; + size_t ret = 0; + + if (!self) + return 0; + + ret += __callchain__fprintf_flat(fp, self->parent, total_samples); + + + list_for_each_entry(chain, &self->val, list) { + if (chain->ip >= PERF_CONTEXT_MAX) + continue; + if (chain->ms.sym) + ret += fprintf(fp, " %s\n", chain->ms.sym->name); + else + ret += fprintf(fp, " %p\n", + (void *)(long)chain->ip); + } + + return ret; +} + +static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *self, + u64 total_samples) +{ + size_t ret = 0; + u32 entries_printed = 0; + struct rb_node *rb_node; + struct callchain_node *chain; + + rb_node = rb_first(self); + while (rb_node) { + double percent; + + chain = rb_entry(rb_node, struct callchain_node, rb_node); + percent = chain->hit * 100.0 / total_samples; + + ret = percent_color_fprintf(fp, " %6.2f%%\n", percent); + ret += __callchain__fprintf_flat(fp, chain, total_samples); + ret += fprintf(fp, "\n"); + if (++entries_printed == callchain_param.print_limit) + break; + + rb_node = rb_next(rb_node); + } + + return ret; +} + +static size_t hist_entry_callchain__fprintf(struct hist_entry *he, + u64 total_samples, int left_margin, + FILE *fp) +{ + switch (callchain_param.mode) { + case CHAIN_GRAPH_REL: + return callchain__fprintf_graph(fp, &he->sorted_chain, he->period, + left_margin); + break; + case CHAIN_GRAPH_ABS: + return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples, + left_margin); + break; + case CHAIN_FLAT: + return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples); + break; + case CHAIN_NONE: + break; + default: + pr_err("Bad callchain mode\n"); + } + + return 0; +} + +static size_t hist_entry__callchain_fprintf(struct hist_entry *he, + struct hists *hists, + u64 total_period, FILE *fp) +{ + int left_margin = 0; + + if (sort__first_dimension == SORT_COMM) { + struct sort_entry *se = list_first_entry(&hist_entry__sort_list, + typeof(*se), list); + left_margin = hists__col_len(hists, se->se_width_idx); + left_margin -= thread__comm_len(he->thread); + } + + return hist_entry_callchain__fprintf(he, total_period, left_margin, fp); +} + +static int hist_entry__fprintf(struct hist_entry *he, size_t size, + struct hists *hists, struct hists *pair_hists, + long displacement, u64 total_period, FILE *fp) +{ + char bf[512]; + int ret; + struct perf_hpp hpp = { + .buf = bf, + .size = size, + .total_period = total_period, + .displacement = displacement, + .ptr = pair_hists, + }; + bool color = !symbol_conf.field_sep; + + if (size == 0 || size > sizeof(bf)) + size = hpp.size = sizeof(bf); + + ret = hist_entry__period_snprintf(&hpp, he, color); + hist_entry__sort_snprintf(he, bf + ret, size - ret, hists); + + ret = fprintf(fp, "%s\n", bf); + + if (symbol_conf.use_callchain) + ret += hist_entry__callchain_fprintf(he, hists, + total_period, fp); + + return ret; +} + +size_t hists__fprintf(struct hists *hists, struct hists *pair, + bool show_displacement, bool show_header, int max_rows, + int max_cols, FILE *fp) +{ + struct sort_entry *se; + struct rb_node *nd; + size_t ret = 0; + u64 total_period; + unsigned long position = 1; + long displacement = 0; + unsigned int width; + const char *sep = symbol_conf.field_sep; + const char *col_width = symbol_conf.col_width_list_str; + int idx, nr_rows = 0; + char bf[64]; + struct perf_hpp dummy_hpp = { + .buf = bf, + .size = sizeof(bf), + .ptr = pair, + }; + + init_rem_hits(); + + if (!show_header) + goto print_entries; + + fprintf(fp, "# "); + for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) { + if (!perf_hpp__format[idx].cond) + continue; + + if (idx) + fprintf(fp, "%s", sep ?: " "); + + perf_hpp__format[idx].header(&dummy_hpp); + fprintf(fp, "%s", bf); + } + + list_for_each_entry(se, &hist_entry__sort_list, list) { + if (se->elide) + continue; + if (sep) { + fprintf(fp, "%c%s", *sep, se->se_header); + continue; + } + width = strlen(se->se_header); + if (symbol_conf.col_width_list_str) { + if (col_width) { + hists__set_col_len(hists, se->se_width_idx, + atoi(col_width)); + col_width = strchr(col_width, ','); + if (col_width) + ++col_width; + } + } + if (!hists__new_col_len(hists, se->se_width_idx, width)) + width = hists__col_len(hists, se->se_width_idx); + fprintf(fp, " %*s", width, se->se_header); + } + + fprintf(fp, "\n"); + if (max_rows && ++nr_rows >= max_rows) + goto out; + + if (sep) + goto print_entries; + + fprintf(fp, "# "); + for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) { + unsigned int i; + + if (!perf_hpp__format[idx].cond) + continue; + + if (idx) + fprintf(fp, "%s", sep ?: " "); + + width = perf_hpp__format[idx].width(&dummy_hpp); + for (i = 0; i < width; i++) + fprintf(fp, "."); + } + + list_for_each_entry(se, &hist_entry__sort_list, list) { + unsigned int i; + + if (se->elide) + continue; + + fprintf(fp, " "); + width = hists__col_len(hists, se->se_width_idx); + if (width == 0) + width = strlen(se->se_header); + for (i = 0; i < width; i++) + fprintf(fp, "."); + } + + fprintf(fp, "\n"); + if (max_rows && ++nr_rows >= max_rows) + goto out; + + fprintf(fp, "#\n"); + if (max_rows && ++nr_rows >= max_rows) + goto out; + +print_entries: + total_period = hists->stats.total_period; + + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + + if (h->filtered) + continue; + + if (show_displacement) { + if (h->pair != NULL) + displacement = ((long)h->pair->position - + (long)position); + else + displacement = 0; + ++position; + } + ret += hist_entry__fprintf(h, max_cols, hists, pair, displacement, + total_period, fp); + + if (max_rows && ++nr_rows >= max_rows) + goto out; + + if (h->ms.map == NULL && verbose > 1) { + __map_groups__fprintf_maps(&h->thread->mg, + MAP__FUNCTION, verbose, fp); + fprintf(fp, "%.10s end\n", graph_dotted_line); + } + } +out: + free(rem_sq_bracket); + + return ret; +} + +size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp) +{ + int i; + size_t ret = 0; + + for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { + const char *name; + + if (hists->stats.nr_events[i] == 0) + continue; + + name = perf_event__name(i); + if (!strcmp(name, "UNKNOWN")) + continue; + + ret += fprintf(fp, "%16s events: %10d\n", name, + hists->stats.nr_events[i]); + } + + return ret; +} diff --git a/tools/perf/ui/tui/helpline.c b/tools/perf/ui/tui/helpline.c new file mode 100644 index 00000000000..2884d2f41e3 --- /dev/null +++ b/tools/perf/ui/tui/helpline.c @@ -0,0 +1,57 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <pthread.h> + +#include "../../util/debug.h" +#include "../helpline.h" +#include "../ui.h" +#include "../libslang.h" + +static void tui_helpline__pop(void) +{ +} + +static void tui_helpline__push(const char *msg) +{ + const size_t sz = sizeof(ui_helpline__current); + + SLsmg_gotorc(SLtt_Screen_Rows - 1, 0); + SLsmg_set_color(0); + SLsmg_write_nstring((char *)msg, SLtt_Screen_Cols); + SLsmg_refresh(); + strncpy(ui_helpline__current, msg, sz)[sz - 1] = '\0'; +} + +struct ui_helpline tui_helpline_fns = { + .pop = tui_helpline__pop, + .push = tui_helpline__push, +}; + +void ui_helpline__init(void) +{ + helpline_fns = &tui_helpline_fns; + ui_helpline__puts(" "); +} + +char ui_helpline__last_msg[1024]; + +int ui_helpline__show_help(const char *format, va_list ap) +{ + int ret; + static int backlog; + + pthread_mutex_lock(&ui__lock); + ret = vscnprintf(ui_helpline__last_msg + backlog, + sizeof(ui_helpline__last_msg) - backlog, format, ap); + backlog += ret; + + if (ui_helpline__last_msg[backlog - 1] == '\n') { + ui_helpline__puts(ui_helpline__last_msg); + SLsmg_refresh(); + backlog = 0; + } + pthread_mutex_unlock(&ui__lock); + + return ret; +} diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index e813c1d1734..60debb81537 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -11,12 +11,12 @@ #include "../libslang.h" #include "../keysyms.h" -pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER; - static volatile int ui__need_resize; extern struct perf_error_ops perf_tui_eops; +extern void hist_browser__init_hpp(void); + void ui__refresh_dimensions(bool force) { if (force || ui__need_resize) { @@ -28,7 +28,7 @@ void ui__refresh_dimensions(bool force) } } -static void ui__sigwinch(int sig __used) +static void ui__sigwinch(int sig __maybe_unused) { ui__need_resize = 1; } @@ -88,7 +88,7 @@ int ui__getch(int delay_secs) return SLkp_getkey(); } -static void newt_suspend(void *d __used) +static void newt_suspend(void *d __maybe_unused) { newtSuspend(); raise(SIGTSTP); @@ -126,6 +126,8 @@ int ui__init(void) signal(SIGTERM, ui__signal); perf_error__register(&perf_tui_eops); + + hist_browser__init_hpp(); out: return err; } diff --git a/tools/perf/util/alias.c b/tools/perf/util/alias.c index b8144e80bb1..e6d134773d0 100644 --- a/tools/perf/util/alias.c +++ b/tools/perf/util/alias.c @@ -3,7 +3,8 @@ static const char *alias_key; static char *alias_val; -static int alias_lookup_cb(const char *k, const char *v, void *cb __used) +static int alias_lookup_cb(const char *k, const char *v, + void *cb __maybe_unused) { if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) { if (!v) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 3a282c0057d..f0a91037137 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -17,6 +17,7 @@ #include <pthread.h> const char *disassembler_style; +const char *objdump_path; static struct ins *ins__find(const char *name); static int disasm_line__parse(char *line, char **namep, char **rawp); @@ -312,8 +313,8 @@ static struct ins_ops dec_ops = { .scnprintf = dec__scnprintf, }; -static int nop__scnprintf(struct ins *ins __used, char *bf, size_t size, - struct ins_operands *ops __used) +static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size, + struct ins_operands *ops __maybe_unused) { return scnprintf(bf, size, "%-6.6s", "nop"); } @@ -415,7 +416,7 @@ static struct ins *ins__find(const char *name) return bsearch(name, instructions, nmemb, sizeof(struct ins), ins__cmp); } -int symbol__annotate_init(struct map *map __used, struct symbol *sym) +int symbol__annotate_init(struct map *map __maybe_unused, struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); pthread_mutex_init(¬es->lock, NULL); @@ -820,9 +821,10 @@ fallback: dso, dso->long_name, sym, sym->name); snprintf(command, sizeof(command), - "objdump %s%s --start-address=0x%016" PRIx64 + "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 " -d %s %s -C %s|grep -v %s|expand", + objdump_path ? objdump_path : "objdump", disassembler_style ? "-M " : "", disassembler_style ? disassembler_style : "", map__rip_2objdump(map, sym->start), @@ -982,7 +984,8 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx, int context) { struct dso *dso = map->dso; - const char *filename = dso->long_name, *d_filename; + char *filename; + const char *d_filename; struct annotation *notes = symbol__annotation(sym); struct disasm_line *pos, *queue = NULL; u64 start = map__rip_2objdump(map, sym->start); @@ -990,6 +993,10 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx, int more = 0; u64 len; + filename = strdup(dso->long_name); + if (!filename) + return -ENOMEM; + if (full_paths) d_filename = filename; else @@ -1040,6 +1047,8 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx, } } + free(filename); + return more; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 78a5692dd71..9b5b21e7b03 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -7,6 +7,7 @@ #include "symbol.h" #include <linux/list.h> #include <linux/rbtree.h> +#include <pthread.h> struct ins; @@ -125,7 +126,7 @@ int symbol__alloc_hist(struct symbol *sym); void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize); -int symbol__annotate_init(struct map *map __used, struct symbol *sym); +int symbol__annotate_init(struct map *map __maybe_unused, struct symbol *sym); int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx, bool full_paths, int min_pcnt, int max_lines, int context); @@ -138,11 +139,12 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, int max_lines); #ifdef NO_NEWT_SUPPORT -static inline int symbol__tui_annotate(struct symbol *sym __used, - struct map *map __used, - int evidx __used, - void(*timer)(void *arg) __used, - void *arg __used, int delay_secs __used) +static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused, + struct map *map __maybe_unused, + int evidx __maybe_unused, + void(*timer)(void *arg) __maybe_unused, + void *arg __maybe_unused, + int delay_secs __maybe_unused) { return 0; } @@ -152,5 +154,6 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, #endif extern const char *disassembler_style; +extern const char *objdump_path; #endif /* __PERF_ANNOTATE_H */ diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index fd9a5944b62..8e3a740ddbd 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -16,10 +16,10 @@ #include "session.h" #include "tool.h" -static int build_id__mark_dso_hit(struct perf_tool *tool __used, +static int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __used, - struct perf_evsel *evsel __used, + struct perf_sample *sample __maybe_unused, + struct perf_evsel *evsel __maybe_unused, struct machine *machine) { struct addr_location al; @@ -41,9 +41,10 @@ static int build_id__mark_dso_hit(struct perf_tool *tool __used, return 0; } -static int perf_event__exit_del_thread(struct perf_tool *tool __used, +static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __used, + struct perf_sample *sample + __maybe_unused, struct machine *machine) { struct thread *thread = machine__findnew_thread(machine, event->fork.tid); diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index cff18c617d1..ab176942654 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -39,7 +39,7 @@ static inline void setup_browser(bool fallback_to_pager) if (fallback_to_pager) setup_pager(); } -static inline void exit_browser(bool wait_for_ok __used) {} +static inline void exit_browser(bool wait_for_ok __maybe_unused) {} #else void setup_browser(bool fallback_to_pager); void exit_browser(bool wait_for_ok); @@ -49,7 +49,7 @@ static inline int ui__init(void) { return -1; } -static inline void ui__exit(bool wait_for_ok __used) {} +static inline void ui__exit(bool wait_for_ok __maybe_unused) {} #else int ui__init(void); void ui__exit(bool wait_for_ok); @@ -60,7 +60,7 @@ static inline int perf_gtk__init(void) { return -1; } -static inline void perf_gtk__exit(bool wait_for_ok __used) {} +static inline void perf_gtk__exit(bool wait_for_ok __maybe_unused) {} #else int perf_gtk__init(void); void perf_gtk__exit(bool wait_for_ok); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 3a6bff47614..d3b3f5d8213 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -93,7 +93,7 @@ __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, */ static void sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root, - u64 min_hit, struct callchain_param *param __used) + u64 min_hit, struct callchain_param *param __maybe_unused) { __sort_chain_flat(rb_root, &root->node, min_hit); } @@ -115,7 +115,7 @@ static void __sort_chain_graph_abs(struct callchain_node *node, static void sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root, - u64 min_hit, struct callchain_param *param __used) + u64 min_hit, struct callchain_param *param __maybe_unused) { __sort_chain_graph_abs(&chain_root->node, min_hit); rb_root->rb_node = chain_root->node.rb_root.rb_node; @@ -140,7 +140,7 @@ static void __sort_chain_graph_rel(struct callchain_node *node, static void sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root, - u64 min_hit __used, struct callchain_param *param) + u64 min_hit __maybe_unused, struct callchain_param *param) { __sort_chain_graph_rel(&chain_root->node, param->min_percent / 100.0); rb_root->rb_node = chain_root->node.rb_root.rb_node; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 3bdb407f9cd..eb340571e7d 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -58,7 +58,7 @@ struct callchain_list { /* * A callchain cursor is a single linked list that * let one feed a callchain progressively. - * It keeps persitent allocated entries to minimize + * It keeps persistent allocated entries to minimize * allocations. */ struct callchain_cursor_node { diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index dbe2f16b1a1..96bbda1ddb8 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -138,8 +138,8 @@ void close_cgroup(struct cgroup_sel *cgrp) } } -int parse_cgroups(const struct option *opt __used, const char *str, - int unset __used) +int parse_cgroups(const struct option *opt __maybe_unused, const char *str, + int unset __maybe_unused) { struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; const char *p, *e, *eos = str + strlen(str); diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 6faa3a18bfb..3e0fdd369cc 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -342,13 +342,15 @@ const char *perf_config_dirname(const char *name, const char *value) return value; } -static int perf_default_core_config(const char *var __used, const char *value __used) +static int perf_default_core_config(const char *var __maybe_unused, + const char *value __maybe_unused) { /* Add other config variables here. */ return 0; } -int perf_default_config(const char *var, const char *value, void *dummy __used) +int perf_default_config(const char *var, const char *value, + void *dummy __maybe_unused) { if (!prefixcmp(var, "core.")) return perf_default_core_config(var, value); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index adc72f09914..2b32ffa9ebd 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -38,24 +38,19 @@ static struct cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus) return cpus; } -static struct cpu_map *cpu_map__read_all_cpu_map(void) +struct cpu_map *cpu_map__read(FILE *file) { struct cpu_map *cpus = NULL; - FILE *onlnf; int nr_cpus = 0; int *tmp_cpus = NULL, *tmp; int max_entries = 0; int n, cpu, prev; char sep; - onlnf = fopen("/sys/devices/system/cpu/online", "r"); - if (!onlnf) - return cpu_map__default_new(); - sep = 0; prev = -1; for (;;) { - n = fscanf(onlnf, "%u%c", &cpu, &sep); + n = fscanf(file, "%u%c", &cpu, &sep); if (n <= 0) break; if (prev >= 0) { @@ -95,6 +90,19 @@ static struct cpu_map *cpu_map__read_all_cpu_map(void) cpus = cpu_map__default_new(); out_free_tmp: free(tmp_cpus); + return cpus; +} + +static struct cpu_map *cpu_map__read_all_cpu_map(void) +{ + struct cpu_map *cpus = NULL; + FILE *onlnf; + + onlnf = fopen("/sys/devices/system/cpu/online", "r"); + if (!onlnf) + return cpu_map__default_new(); + + cpus = cpu_map__read(onlnf); fclose(onlnf); return cpus; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index c41518573c6..2f68a3b8c28 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -2,6 +2,7 @@ #define __PERF_CPUMAP_H #include <stdio.h> +#include <stdbool.h> struct cpu_map { int nr; @@ -11,7 +12,17 @@ struct cpu_map { struct cpu_map *cpu_map__new(const char *cpu_list); struct cpu_map *cpu_map__dummy_new(void); void cpu_map__delete(struct cpu_map *map); - +struct cpu_map *cpu_map__read(FILE *file); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); +static inline int cpu_map__nr(const struct cpu_map *map) +{ + return map ? map->nr : 1; +} + +static inline bool cpu_map__all(const struct cpu_map *map) +{ + return map ? map->map[0] == -1 : true; +} + #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 4dfe0bb3c32..66eb3828ceb 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -23,8 +23,10 @@ int eprintf(int level, const char *fmt, ...) if (verbose >= level) { va_start(args, fmt); - if (use_browser > 0) + if (use_browser == 1) ret = ui_helpline__show_help(fmt, args); + else if (use_browser == 2) + ret = perf_gtk__show_helpline(fmt, args); else ret = vfprintf(stderr, fmt, args); va_end(args); diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 015c91dbc09..bb2e7d1007a 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -4,6 +4,7 @@ #include <stdbool.h> #include "event.h" +#include "../ui/helpline.h" extern int verbose; extern bool quiet, dump_trace; @@ -15,32 +16,26 @@ struct ui_progress; struct perf_error_ops; #if defined(NO_NEWT_SUPPORT) && defined(NO_GTK2_SUPPORT) -static inline int ui_helpline__show_help(const char *format __used, va_list ap __used) -{ - return 0; -} - -static inline void ui_progress__update(u64 curr __used, u64 total __used, - const char *title __used) {} +static inline void ui_progress__update(u64 curr __maybe_unused, + u64 total __maybe_unused, + const char *title __maybe_unused) {} #define ui__error(format, arg...) ui__warning(format, ##arg) static inline int -perf_error__register(struct perf_error_ops *eops __used) +perf_error__register(struct perf_error_ops *eops __maybe_unused) { return 0; } static inline int -perf_error__unregister(struct perf_error_ops *eops __used) +perf_error__unregister(struct perf_error_ops *eops __maybe_unused) { return 0; } #else /* NO_NEWT_SUPPORT && NO_GTK2_SUPPORT */ -extern char ui_helpline__last_msg[]; -int ui_helpline__show_help(const char *format, va_list ap); #include "../ui/progress.h" int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2))); #include "../ui/util.h" diff --git a/tools/perf/util/dso-test-data.c b/tools/perf/util/dso-test-data.c index 541cdc72c7d..c6caedeb1d6 100644 --- a/tools/perf/util/dso-test-data.c +++ b/tools/perf/util/dso-test-data.c @@ -23,7 +23,7 @@ static char *test_file(int size) int fd, i; unsigned char *buf; - fd = mkostemp(templ, O_CREAT|O_WRONLY|O_TRUNC); + fd = mkstemp(templ); buf = malloc(size); if (!buf) { diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index ee51e9b4dc0..3e5f5430a28 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -804,6 +804,8 @@ int die_get_typename(Dwarf_Die *vr_die, char *buf, int len) tmp = "union "; else if (tag == DW_TAG_structure_type) tmp = "struct "; + else if (tag == DW_TAG_enumeration_type) + tmp = "enum "; /* Write a base name */ ret = snprintf(buf, len, "%s%s", tmp, dwarf_diename(&type)); return (ret >= len) ? -E2BIG : ret; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 2a6f33cd888..6715b193872 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -112,7 +112,7 @@ static pid_t perf_event__synthesize_comm(struct perf_tool *tool, event->comm.header.type = PERF_RECORD_COMM; size = strlen(event->comm.comm) + 1; - size = ALIGN(size, sizeof(u64)); + size = PERF_ALIGN(size, sizeof(u64)); memset(event->comm.comm + size, 0, machine->id_hdr_size); event->comm.header.size = (sizeof(event->comm) - (sizeof(event->comm.comm) - size) + @@ -120,7 +120,9 @@ static pid_t perf_event__synthesize_comm(struct perf_tool *tool, if (!full) { event->comm.tid = pid; - process(tool, event, &synth_sample, machine); + if (process(tool, event, &synth_sample, machine) != 0) + return -1; + goto out; } @@ -143,7 +145,7 @@ static pid_t perf_event__synthesize_comm(struct perf_tool *tool, sizeof(event->comm.comm)); size = strlen(event->comm.comm) + 1; - size = ALIGN(size, sizeof(u64)); + size = PERF_ALIGN(size, sizeof(u64)); memset(event->comm.comm + size, 0, machine->id_hdr_size); event->comm.header.size = (sizeof(event->comm) - (sizeof(event->comm.comm) - size) + @@ -151,7 +153,10 @@ static pid_t perf_event__synthesize_comm(struct perf_tool *tool, event->comm.tid = pid; - process(tool, event, &synth_sample, machine); + if (process(tool, event, &synth_sample, machine) != 0) { + tgid = -1; + break; + } } closedir(tasks); @@ -167,6 +172,7 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, { char filename[PATH_MAX]; FILE *fp; + int rc = 0; snprintf(filename, sizeof(filename), "/proc/%d/maps", pid); @@ -222,7 +228,7 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, size = strlen(execname); execname[size - 1] = '\0'; /* Remove \n */ memcpy(event->mmap.filename, execname, size); - size = ALIGN(size, sizeof(u64)); + size = PERF_ALIGN(size, sizeof(u64)); event->mmap.len -= event->mmap.start; event->mmap.header.size = (sizeof(event->mmap) - (sizeof(event->mmap.filename) - size)); @@ -231,18 +237,22 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, event->mmap.pid = tgid; event->mmap.tid = pid; - process(tool, event, &synth_sample, machine); + if (process(tool, event, &synth_sample, machine) != 0) { + rc = -1; + break; + } } } fclose(fp); - return 0; + return rc; } int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { + int rc = 0; struct rb_node *nd; struct map_groups *kmaps = &machine->kmaps; union perf_event *event = zalloc((sizeof(event->mmap) + @@ -272,7 +282,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, if (pos->dso->kernel) continue; - size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); + size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); event->mmap.header.type = PERF_RECORD_MMAP; event->mmap.header.size = (sizeof(event->mmap) - (sizeof(event->mmap.filename) - size)); @@ -284,11 +294,14 @@ int perf_event__synthesize_modules(struct perf_tool *tool, memcpy(event->mmap.filename, pos->dso->long_name, pos->dso->long_name_len + 1); - process(tool, event, &synth_sample, machine); + if (process(tool, event, &synth_sample, machine) != 0) { + rc = -1; + break; + } } free(event); - return 0; + return rc; } static int __event__synthesize_thread(union perf_event *comm_event, @@ -392,12 +405,16 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (*end) /* only interested in proper numerical dirents */ continue; - __event__synthesize_thread(comm_event, mmap_event, pid, 1, - process, tool, machine); + if (__event__synthesize_thread(comm_event, mmap_event, pid, 1, + process, tool, machine) != 0) { + err = -1; + goto out_closedir; + } } - closedir(proc); err = 0; +out_closedir: + closedir(proc); out_free_mmap: free(mmap_event); out_free_comm: @@ -412,7 +429,7 @@ struct process_symbol_args { }; static int find_symbol_cb(void *arg, const char *name, char type, - u64 start, u64 end __used) + u64 start) { struct process_symbol_args *args = arg; @@ -477,7 +494,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, map = machine->vmlinux_maps[MAP__FUNCTION]; size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), "%s%s", mmap_name, symbol_name) + 1; - size = ALIGN(size, sizeof(u64)); + size = PERF_ALIGN(size, sizeof(u64)); event->mmap.header.type = PERF_RECORD_MMAP; event->mmap.header.size = (sizeof(event->mmap) - (sizeof(event->mmap.filename) - size) + machine->id_hdr_size); @@ -497,9 +514,9 @@ size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp) return fprintf(fp, ": %s:%d\n", event->comm.comm, event->comm.tid); } -int perf_event__process_comm(struct perf_tool *tool __used, +int perf_event__process_comm(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __used, + struct perf_sample *sample __maybe_unused, struct machine *machine) { struct thread *thread = machine__findnew_thread(machine, event->comm.tid); @@ -515,10 +532,10 @@ int perf_event__process_comm(struct perf_tool *tool __used, return 0; } -int perf_event__process_lost(struct perf_tool *tool __used, +int perf_event__process_lost(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __used, - struct machine *machine __used) + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) { dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n", event->lost.id, event->lost.lost); @@ -538,7 +555,8 @@ static void perf_event__set_kernel_mmap_len(union perf_event *event, maps[MAP__FUNCTION]->end = ~0ULL; } -static int perf_event__process_kernel_mmap(struct perf_tool *tool __used, +static int perf_event__process_kernel_mmap(struct perf_tool *tool + __maybe_unused, union perf_event *event, struct machine *machine) { @@ -640,7 +658,7 @@ size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) int perf_event__process_mmap(struct perf_tool *tool, union perf_event *event, - struct perf_sample *sample __used, + struct perf_sample *sample __maybe_unused, struct machine *machine) { struct thread *thread; @@ -684,9 +702,9 @@ size_t perf_event__fprintf_task(union perf_event *event, FILE *fp) event->fork.ppid, event->fork.ptid); } -int perf_event__process_task(struct perf_tool *tool __used, +int perf_event__process_task(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __used, + struct perf_sample *sample __maybe_unused, struct machine *machine) { struct thread *thread = machine__findnew_thread(machine, event->fork.tid); @@ -886,8 +904,9 @@ int perf_event__preprocess_sample(const union perf_event *event, al->sym = map__find_symbol(al->map, al->addr, filter); } - if (symbol_conf.sym_list && al->sym && - !strlist__has_entry(symbol_conf.sym_list, al->sym->name)) + if (symbol_conf.sym_list && + (!al->sym || !strlist__has_entry(symbol_conf.sym_list, + al->sym->name))) goto out_filtered; return 0; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index d84870b0642..21b99e741a8 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -69,6 +69,16 @@ struct sample_event { u64 array[]; }; +struct regs_dump { + u64 *regs; +}; + +struct stack_dump { + u16 offset; + u64 size; + char *data; +}; + struct perf_sample { u64 ip; u32 pid, tid; @@ -82,6 +92,8 @@ struct perf_sample { void *raw_data; struct ip_callchain *callchain; struct branch_stack *branch_stack; + struct regs_dump user_regs; + struct stack_dump user_stack; }; #define BUILD_ID_SIZE 20 @@ -89,7 +101,7 @@ struct perf_sample { struct build_id_event { struct perf_event_header header; pid_t pid; - u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))]; + u8 build_id[PERF_ALIGN(BUILD_ID_SIZE, sizeof(u64))]; char filename[]; }; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 9b38681add9..ae89686102f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -57,7 +57,7 @@ void perf_evlist__config_attrs(struct perf_evlist *evlist, if (evlist->cpus->map[0] < 0) opts->no_inherit = true; - first = list_entry(evlist->entries.next, struct perf_evsel, node); + first = perf_evlist__first(evlist); list_for_each_entry(evsel, &evlist->entries, node) { perf_evsel__config(evsel, opts, first); @@ -108,6 +108,25 @@ void perf_evlist__splice_list_tail(struct perf_evlist *evlist, evlist->nr_entries += nr_entries; } +void __perf_evlist__set_leader(struct list_head *list) +{ + struct perf_evsel *evsel, *leader; + + leader = list_entry(list->next, struct perf_evsel, node); + leader->leader = NULL; + + list_for_each_entry(evsel, list, node) { + if (evsel != leader) + evsel->leader = leader; + } +} + +void perf_evlist__set_leader(struct perf_evlist *evlist) +{ + if (evlist->nr_entries) + __perf_evlist__set_leader(&evlist->entries); +} + int perf_evlist__add_default(struct perf_evlist *evlist) { struct perf_event_attr attr = { @@ -285,7 +304,7 @@ void perf_evlist__enable(struct perf_evlist *evlist) int cpu, thread; struct perf_evsel *pos; - for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { + for (cpu = 0; cpu < cpu_map__nr(evlist->cpus); cpu++) { list_for_each_entry(pos, &evlist->entries, node) { for (thread = 0; thread < evlist->threads->nr; thread++) ioctl(FD(pos, cpu, thread), @@ -296,7 +315,7 @@ void perf_evlist__enable(struct perf_evlist *evlist) static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) { - int nfds = evlist->cpus->nr * evlist->threads->nr * evlist->nr_entries; + int nfds = cpu_map__nr(evlist->cpus) * evlist->threads->nr * evlist->nr_entries; evlist->pollfd = malloc(sizeof(struct pollfd) * nfds); return evlist->pollfd != NULL ? 0 : -ENOMEM; } @@ -357,7 +376,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) int hash; if (evlist->nr_entries == 1) - return list_entry(evlist->entries.next, struct perf_evsel, node); + return perf_evlist__first(evlist); hash = hash_64(id, PERF_EVLIST__HLIST_BITS); head = &evlist->heads[hash]; @@ -367,7 +386,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) return sid->evsel; if (!perf_evlist__sample_id_all(evlist)) - return list_entry(evlist->entries.next, struct perf_evsel, node); + return perf_evlist__first(evlist); return NULL; } @@ -456,8 +475,8 @@ void perf_evlist__munmap(struct perf_evlist *evlist) static int perf_evlist__alloc_mmap(struct perf_evlist *evlist) { - evlist->nr_mmaps = evlist->cpus->nr; - if (evlist->cpus->map[0] == -1) + evlist->nr_mmaps = cpu_map__nr(evlist->cpus); + if (cpu_map__all(evlist->cpus)) evlist->nr_mmaps = evlist->threads->nr; evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); return evlist->mmap != NULL ? 0 : -ENOMEM; @@ -603,11 +622,11 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, list_for_each_entry(evsel, &evlist->entries, node) { if ((evsel->attr.read_format & PERF_FORMAT_ID) && evsel->sample_id == NULL && - perf_evsel__alloc_id(evsel, cpus->nr, threads->nr) < 0) + perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) return -ENOMEM; } - if (evlist->cpus->map[0] == -1) + if (cpu_map__all(cpus)) return perf_evlist__mmap_per_thread(evlist, prot, mask); return perf_evlist__mmap_per_cpu(evlist, prot, mask); @@ -647,39 +666,44 @@ void perf_evlist__delete_maps(struct perf_evlist *evlist) evlist->threads = NULL; } -int perf_evlist__set_filters(struct perf_evlist *evlist) +int perf_evlist__apply_filters(struct perf_evlist *evlist) { - const struct thread_map *threads = evlist->threads; - const struct cpu_map *cpus = evlist->cpus; struct perf_evsel *evsel; - char *filter; - int thread; - int cpu; - int err; - int fd; + int err = 0; + const int ncpus = cpu_map__nr(evlist->cpus), + nthreads = evlist->threads->nr; list_for_each_entry(evsel, &evlist->entries, node) { - filter = evsel->filter; - if (!filter) + if (evsel->filter == NULL) continue; - for (cpu = 0; cpu < cpus->nr; cpu++) { - for (thread = 0; thread < threads->nr; thread++) { - fd = FD(evsel, cpu, thread); - err = ioctl(fd, PERF_EVENT_IOC_SET_FILTER, filter); - if (err) - return err; - } - } + + err = perf_evsel__set_filter(evsel, ncpus, nthreads, evsel->filter); + if (err) + break; } - return 0; + return err; } -bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist) +int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) { - struct perf_evsel *pos, *first; + struct perf_evsel *evsel; + int err = 0; + const int ncpus = cpu_map__nr(evlist->cpus), + nthreads = evlist->threads->nr; + + list_for_each_entry(evsel, &evlist->entries, node) { + err = perf_evsel__set_filter(evsel, ncpus, nthreads, filter); + if (err) + break; + } + + return err; +} - pos = first = list_entry(evlist->entries.next, struct perf_evsel, node); +bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) +{ + struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; list_for_each_entry_continue(pos, &evlist->entries, node) { if (first->attr.sample_type != pos->attr.sample_type) @@ -689,23 +713,19 @@ bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist) return true; } -u64 perf_evlist__sample_type(const struct perf_evlist *evlist) +u64 perf_evlist__sample_type(struct perf_evlist *evlist) { - struct perf_evsel *first; - - first = list_entry(evlist->entries.next, struct perf_evsel, node); + struct perf_evsel *first = perf_evlist__first(evlist); return first->attr.sample_type; } -u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist) +u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) { - struct perf_evsel *first; + struct perf_evsel *first = perf_evlist__first(evlist); struct perf_sample *data; u64 sample_type; u16 size = 0; - first = list_entry(evlist->entries.next, struct perf_evsel, node); - if (!first->attr.sample_id_all) goto out; @@ -729,11 +749,9 @@ out: return size; } -bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist) +bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) { - struct perf_evsel *pos, *first; - - pos = first = list_entry(evlist->entries.next, struct perf_evsel, node); + struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; list_for_each_entry_continue(pos, &evlist->entries, node) { if (first->attr.sample_id_all != pos->attr.sample_id_all) @@ -743,11 +761,9 @@ bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist) return true; } -bool perf_evlist__sample_id_all(const struct perf_evlist *evlist) +bool perf_evlist__sample_id_all(struct perf_evlist *evlist) { - struct perf_evsel *first; - - first = list_entry(evlist->entries.next, struct perf_evsel, node); + struct perf_evsel *first = perf_evlist__first(evlist); return first->attr.sample_id_all; } @@ -757,21 +773,13 @@ void perf_evlist__set_selected(struct perf_evlist *evlist, evlist->selected = evsel; } -int perf_evlist__open(struct perf_evlist *evlist, bool group) +int perf_evlist__open(struct perf_evlist *evlist) { - struct perf_evsel *evsel, *first; + struct perf_evsel *evsel; int err, ncpus, nthreads; - first = list_entry(evlist->entries.next, struct perf_evsel, node); - list_for_each_entry(evsel, &evlist->entries, node) { - struct xyarray *group_fd = NULL; - - if (group && evsel != first) - group_fd = first->fd; - - err = perf_evsel__open(evsel, evlist->cpus, evlist->threads, - group, group_fd); + err = perf_evsel__open(evsel, evlist->cpus, evlist->threads); if (err < 0) goto out_err; } @@ -883,8 +891,21 @@ int perf_evlist__start_workload(struct perf_evlist *evlist) } int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, - struct perf_sample *sample, bool swapped) + struct perf_sample *sample) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + return perf_evsel__parse_sample(evsel, event, sample); +} + +size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) { - struct perf_evsel *e = list_entry(evlist->entries.next, struct perf_evsel, node); - return perf_evsel__parse_sample(e, event, sample, swapped); + struct perf_evsel *evsel; + size_t printed = 0; + + list_for_each_entry(evsel, &evlist->entries, node) { + printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", + perf_evsel__name(evsel)); + } + + return printed + fprintf(fp, "\n");; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 528c1acd929..3f1fb66be02 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -5,6 +5,7 @@ #include <stdio.h> #include "../perf.h" #include "event.h" +#include "evsel.h" #include "util.h" #include <unistd.h> @@ -41,8 +42,6 @@ struct perf_evsel_str_handler { void *handler; }; -struct perf_evsel; - struct perf_evlist *perf_evlist__new(struct cpu_map *cpus, struct thread_map *threads); void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, @@ -73,6 +72,8 @@ int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist, #define perf_evlist__set_tracepoints_handlers_array(evlist, array) \ perf_evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array)) +int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter); + struct perf_evsel * perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id); @@ -85,7 +86,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx); -int perf_evlist__open(struct perf_evlist *evlist, bool group); +int perf_evlist__open(struct perf_evlist *evlist); void perf_evlist__config_attrs(struct perf_evlist *evlist, struct perf_record_opts *opts); @@ -116,20 +117,34 @@ static inline void perf_evlist__set_maps(struct perf_evlist *evlist, int perf_evlist__create_maps(struct perf_evlist *evlist, struct perf_target *target); void perf_evlist__delete_maps(struct perf_evlist *evlist); -int perf_evlist__set_filters(struct perf_evlist *evlist); +int perf_evlist__apply_filters(struct perf_evlist *evlist); + +void __perf_evlist__set_leader(struct list_head *list); +void perf_evlist__set_leader(struct perf_evlist *evlist); -u64 perf_evlist__sample_type(const struct perf_evlist *evlist); -bool perf_evlist__sample_id_all(const const struct perf_evlist *evlist); -u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist); +u64 perf_evlist__sample_type(struct perf_evlist *evlist); +bool perf_evlist__sample_id_all(struct perf_evlist *evlist); +u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, - struct perf_sample *sample, bool swapped); + struct perf_sample *sample); -bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist); -bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist); +bool perf_evlist__valid_sample_type(struct perf_evlist *evlist); +bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist); void perf_evlist__splice_list_tail(struct perf_evlist *evlist, struct list_head *list, int nr_entries); +static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist) +{ + return list_entry(evlist->entries.next, struct perf_evsel, node); +} + +static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist) +{ + return list_entry(evlist->entries.prev, struct perf_evsel, node); +} + +size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2eaae140def..ffdd94e9c9c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -8,7 +8,10 @@ */ #include <byteswap.h> +#include <linux/bitops.h> #include "asm/bug.h" +#include "debugfs.h" +#include "event-parse.h" #include "evsel.h" #include "evlist.h" #include "util.h" @@ -16,9 +19,10 @@ #include "thread_map.h" #include "target.h" #include "../../../include/linux/hw_breakpoint.h" +#include "../../include/linux/perf_event.h" +#include "perf_regs.h" #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) -#define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0)) static int __perf_evsel__sample_size(u64 sample_type) { @@ -66,7 +70,80 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) return evsel; } -static const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = { +struct event_format *event_format__new(const char *sys, const char *name) +{ + int fd, n; + char *filename; + void *bf = NULL, *nbf; + size_t size = 0, alloc_size = 0; + struct event_format *format = NULL; + + if (asprintf(&filename, "%s/%s/%s/format", tracing_events_path, sys, name) < 0) + goto out; + + fd = open(filename, O_RDONLY); + if (fd < 0) + goto out_free_filename; + + do { + if (size == alloc_size) { + alloc_size += BUFSIZ; + nbf = realloc(bf, alloc_size); + if (nbf == NULL) + goto out_free_bf; + bf = nbf; + } + + n = read(fd, bf + size, BUFSIZ); + if (n < 0) + goto out_free_bf; + size += n; + } while (n > 0); + + pevent_parse_format(&format, bf, size, sys); + +out_free_bf: + free(bf); + close(fd); +out_free_filename: + free(filename); +out: + return format; +} + +struct perf_evsel *perf_evsel__newtp(const char *sys, const char *name, int idx) +{ + struct perf_evsel *evsel = zalloc(sizeof(*evsel)); + + if (evsel != NULL) { + struct perf_event_attr attr = { + .type = PERF_TYPE_TRACEPOINT, + .sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | + PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), + }; + + if (asprintf(&evsel->name, "%s:%s", sys, name) < 0) + goto out_free; + + evsel->tp_format = event_format__new(sys, name); + if (evsel->tp_format == NULL) + goto out_free; + + event_attr_init(&attr); + attr.config = evsel->tp_format->id; + attr.sample_period = 1; + perf_evsel__init(evsel, &attr, idx); + } + + return evsel; + +out_free: + free(evsel->name); + free(evsel); + return NULL; +} + +const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = { "cycles", "instructions", "cache-references", @@ -129,12 +206,12 @@ static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size) return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); } -static const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = { +const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = { "cpu-clock", "task-clock", "page-faults", "context-switches", - "CPU-migrations", + "cpu-migrations", "minor-faults", "major-faults", "alignment-faults", @@ -317,7 +394,8 @@ const char *perf_evsel__name(struct perf_evsel *evsel) break; default: - scnprintf(bf, sizeof(bf), "%s", "unknown attr type"); + scnprintf(bf, sizeof(bf), "unknown attr type: %d", + evsel->attr.type); break; } @@ -367,9 +445,18 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts, attr->mmap_data = track; } - if (opts->call_graph) + if (opts->call_graph) { attr->sample_type |= PERF_SAMPLE_CALLCHAIN; + if (opts->call_graph == CALLCHAIN_DWARF) { + attr->sample_type |= PERF_SAMPLE_REGS_USER | + PERF_SAMPLE_STACK_USER; + attr->sample_regs_user = PERF_REGS_MASK; + attr->sample_stack_user = opts->stack_dump_size; + attr->exclude_callchain_user = 1; + } + } + if (perf_target__has_cpu(&opts->target)) attr->sample_type |= PERF_SAMPLE_CPU; @@ -421,6 +508,24 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) return evsel->fd != NULL ? 0 : -ENOMEM; } +int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, + const char *filter) +{ + int cpu, thread; + + for (cpu = 0; cpu < ncpus; cpu++) { + for (thread = 0; thread < nthreads; thread++) { + int fd = FD(evsel, cpu, thread), + err = ioctl(fd, PERF_EVENT_IOC_SET_FILTER, filter); + + if (err) + return err; + } + } + + return 0; +} + int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) { evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); @@ -481,6 +586,9 @@ void perf_evsel__delete(struct perf_evsel *evsel) { perf_evsel__exit(evsel); close_cgroup(evsel->cgrp); + free(evsel->group_name); + if (evsel->tp_format) + pevent_free_format(evsel->tp_format); free(evsel->name); free(evsel); } @@ -556,9 +664,28 @@ int __perf_evsel__read(struct perf_evsel *evsel, return 0; } +static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread) +{ + struct perf_evsel *leader = evsel->leader; + int fd; + + if (!leader) + return -1; + + /* + * Leader must be already processed/open, + * if not it's a bug. + */ + BUG_ON(!leader->fd); + + fd = FD(leader, cpu, thread); + BUG_ON(fd == -1); + + return fd; +} + static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, - struct thread_map *threads, bool group, - struct xyarray *group_fds) + struct thread_map *threads) { int cpu, thread; unsigned long flags = 0; @@ -574,13 +701,15 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, } for (cpu = 0; cpu < cpus->nr; cpu++) { - int group_fd = group_fds ? GROUP_FD(group_fds, cpu) : -1; for (thread = 0; thread < threads->nr; thread++) { + int group_fd; if (!evsel->cgrp) pid = threads->map[thread]; + group_fd = get_group_fd(evsel, cpu, thread); + FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu], @@ -589,9 +718,6 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, err = -errno; goto out_close; } - - if (group && group_fd == -1) - group_fd = FD(evsel, cpu, thread); } } @@ -635,8 +761,7 @@ static struct { }; int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, - struct thread_map *threads, bool group, - struct xyarray *group_fd) + struct thread_map *threads) { if (cpus == NULL) { /* Work around old compiler warnings about strict aliasing */ @@ -646,30 +771,28 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, if (threads == NULL) threads = &empty_thread_map.map; - return __perf_evsel__open(evsel, cpus, threads, group, group_fd); + return __perf_evsel__open(evsel, cpus, threads); } int perf_evsel__open_per_cpu(struct perf_evsel *evsel, - struct cpu_map *cpus, bool group, - struct xyarray *group_fd) + struct cpu_map *cpus) { - return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group, - group_fd); + return __perf_evsel__open(evsel, cpus, &empty_thread_map.map); } int perf_evsel__open_per_thread(struct perf_evsel *evsel, - struct thread_map *threads, bool group, - struct xyarray *group_fd) + struct thread_map *threads) { - return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, - group_fd); + return __perf_evsel__open(evsel, &empty_cpu_map.map, threads); } -static int perf_event__parse_id_sample(const union perf_event *event, u64 type, - struct perf_sample *sample, - bool swapped) +static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel, + const union perf_event *event, + struct perf_sample *sample) { + u64 type = evsel->attr.sample_type; const u64 *array = event->sample.array; + bool swapped = evsel->needs_swap; union u64_swap u; array += ((event->header.size - @@ -730,9 +853,11 @@ static bool sample_overlap(const union perf_event *event, } int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, - struct perf_sample *data, bool swapped) + struct perf_sample *data) { u64 type = evsel->attr.sample_type; + u64 regs_user = evsel->attr.sample_regs_user; + bool swapped = evsel->needs_swap; const u64 *array; /* @@ -749,7 +874,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, if (event->header.type != PERF_RECORD_SAMPLE) { if (!evsel->attr.sample_id_all) return 0; - return perf_event__parse_id_sample(event, type, data, swapped); + return perf_evsel__parse_id_sample(evsel, event, data); } array = event->sample.array; @@ -869,6 +994,32 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, sz /= sizeof(u64); array += sz; } + + if (type & PERF_SAMPLE_REGS_USER) { + /* First u64 tells us if we have any regs in sample. */ + u64 avail = *array++; + + if (avail) { + data->user_regs.regs = (u64 *)array; + array += hweight_long(regs_user); + } + } + + if (type & PERF_SAMPLE_STACK_USER) { + u64 size = *array++; + + data->user_stack.offset = ((char *)(array - 1) + - (char *) event); + + if (!size) { + data->user_stack.size = 0; + } else { + data->user_stack.data = (char *)array; + array += size / sizeof(*array); + data->user_stack.size = *array; + } + } + return 0; } @@ -947,3 +1098,72 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, return 0; } + +struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name) +{ + return pevent_find_field(evsel->tp_format, name); +} + +void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample, + const char *name) +{ + struct format_field *field = perf_evsel__field(evsel, name); + int offset; + + if (!field) + return NULL; + + offset = field->offset; + + if (field->flags & FIELD_IS_DYNAMIC) { + offset = *(int *)(sample->raw_data + field->offset); + offset &= 0xffff; + } + + return sample->raw_data + offset; +} + +u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, + const char *name) +{ + struct format_field *field = perf_evsel__field(evsel, name); + void *ptr; + u64 value; + + if (!field) + return 0; + + ptr = sample->raw_data + field->offset; + + switch (field->size) { + case 1: + return *(u8 *)ptr; + case 2: + value = *(u16 *)ptr; + break; + case 4: + value = *(u32 *)ptr; + break; + case 8: + value = *(u64 *)ptr; + break; + default: + return 0; + } + + if (!evsel->needs_swap) + return value; + + switch (field->size) { + case 2: + return bswap_16(value); + case 4: + return bswap_32(value); + case 8: + return bswap_64(value); + default: + return 0; + } + + return 0; +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index b559929983b..3ead0d59c03 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -53,9 +53,10 @@ struct perf_evsel { u64 *id; struct perf_counts *counts; int idx; - int ids; + u32 ids; struct hists hists; char *name; + struct event_format *tp_format; union { void *priv; off_t id_offset; @@ -65,8 +66,14 @@ struct perf_evsel { void *func; void *data; } handler; + struct cpu_map *cpus; unsigned int sample_size; bool supported; + bool needs_swap; + /* parse modifier helper */ + int exclude_GH; + struct perf_evsel *leader; + char *group_name; }; struct cpu_map; @@ -75,6 +82,10 @@ struct perf_evlist; struct perf_record_opts; struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx); +struct perf_evsel *perf_evsel__newtp(const char *sys, const char *name, int idx); + +struct event_format *event_format__new(const char *sys, const char *name); + void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx); void perf_evsel__exit(struct perf_evsel *evsel); @@ -92,8 +103,10 @@ extern const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX] [PERF_EVSEL__MAX_ALIASES]; extern const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX] [PERF_EVSEL__MAX_ALIASES]; -const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] - [PERF_EVSEL__MAX_ALIASES]; +extern const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] + [PERF_EVSEL__MAX_ALIASES]; +extern const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX]; +extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX]; int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size); const char *perf_evsel__name(struct perf_evsel *evsel); @@ -105,21 +118,46 @@ void perf_evsel__free_fd(struct perf_evsel *evsel); void perf_evsel__free_id(struct perf_evsel *evsel); void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); +int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, + const char *filter); + int perf_evsel__open_per_cpu(struct perf_evsel *evsel, - struct cpu_map *cpus, bool group, - struct xyarray *group_fds); + struct cpu_map *cpus); int perf_evsel__open_per_thread(struct perf_evsel *evsel, - struct thread_map *threads, bool group, - struct xyarray *group_fds); + struct thread_map *threads); int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, - struct thread_map *threads, bool group, - struct xyarray *group_fds); + struct thread_map *threads); void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads); +struct perf_sample; + +void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample, + const char *name); +u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, + const char *name); + +static inline char *perf_evsel__strval(struct perf_evsel *evsel, + struct perf_sample *sample, + const char *name) +{ + return perf_evsel__rawptr(evsel, sample, name); +} + +struct format_field; + +struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name); + #define perf_evsel__match(evsel, t, c) \ (evsel->attr.type == PERF_TYPE_##t && \ evsel->attr.config == PERF_COUNT_##c) +static inline bool perf_evsel__match2(struct perf_evsel *e1, + struct perf_evsel *e2) +{ + return (e1->attr.type == e2->attr.type) && + (e1->attr.config == e2->attr.config); +} + int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, int cpu, int thread, bool scale); @@ -181,5 +219,10 @@ static inline int perf_evsel__read_scaled(struct perf_evsel *evsel, void hists__init(struct hists *hists); int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, - struct perf_sample *sample, bool swapped); + struct perf_sample *sample); + +static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel) +{ + return list_entry(evsel->node.next, struct perf_evsel, node); +} #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh index f06f6fd148f..389590c1ad2 100755 --- a/tools/perf/util/generate-cmdlist.sh +++ b/tools/perf/util/generate-cmdlist.sh @@ -21,4 +21,19 @@ do p }' "Documentation/perf-$cmd.txt" done + +echo "#ifndef NO_LIBELF_SUPPORT" +sed -n -e 's/^perf-\([^ ]*\)[ ].* full.*/\1/p' command-list.txt | +sort | +while read cmd +do + sed -n ' + /^NAME/,/perf-'"$cmd"'/H + ${ + x + s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/ + p + }' "Documentation/perf-$cmd.txt" +done +echo "#endif /* NO_LIBELF_SUPPORT */" echo "};" diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 74ea3c2f813..7daad237dea 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -20,11 +20,14 @@ #include "symbol.h" #include "debug.h" #include "cpumap.h" +#include "pmu.h" +#include "vdso.h" +#include "strbuf.h" static bool no_buildid_cache = false; -static int event_count; -static struct perf_trace_event_type *events; +static int trace_event_count; +static struct perf_trace_event_type *trace_events; static u32 header_argc; static const char **header_argv; @@ -36,24 +39,24 @@ int perf_header__push_event(u64 id, const char *name) if (strlen(name) > MAX_EVENT_NAME) pr_warning("Event %s will be truncated\n", name); - nevents = realloc(events, (event_count + 1) * sizeof(*events)); + nevents = realloc(trace_events, (trace_event_count + 1) * sizeof(*trace_events)); if (nevents == NULL) return -ENOMEM; - events = nevents; + trace_events = nevents; - memset(&events[event_count], 0, sizeof(struct perf_trace_event_type)); - events[event_count].event_id = id; - strncpy(events[event_count].name, name, MAX_EVENT_NAME - 1); - event_count++; + memset(&trace_events[trace_event_count], 0, sizeof(struct perf_trace_event_type)); + trace_events[trace_event_count].event_id = id; + strncpy(trace_events[trace_event_count].name, name, MAX_EVENT_NAME - 1); + trace_event_count++; return 0; } char *perf_header__find_event(u64 id) { int i; - for (i = 0 ; i < event_count; i++) { - if (events[i].event_id == id) - return events[i].name; + for (i = 0 ; i < trace_event_count; i++) { + if (trace_events[i].event_id == id) + return trace_events[i].name; } return NULL; } @@ -128,7 +131,7 @@ static int do_write_string(int fd, const char *str) int ret; olen = strlen(str) + 1; - len = ALIGN(olen, NAME_ALIGN); + len = PERF_ALIGN(olen, NAME_ALIGN); /* write len, incl. \0 */ ret = do_write(fd, &len, sizeof(len)); @@ -206,6 +209,29 @@ perf_header__set_cmdline(int argc, const char **argv) continue; \ else +static int write_buildid(char *name, size_t name_len, u8 *build_id, + pid_t pid, u16 misc, int fd) +{ + int err; + struct build_id_event b; + size_t len; + + len = name_len + 1; + len = PERF_ALIGN(len, NAME_ALIGN); + + memset(&b, 0, sizeof(b)); + memcpy(&b.build_id, build_id, BUILD_ID_SIZE); + b.pid = pid; + b.header.misc = misc; + b.header.size = sizeof(b) + len; + + err = do_write(fd, &b, sizeof(b)); + if (err < 0) + return err; + + return write_padded(fd, name, name_len + 1, len); +} + static int __dsos__write_buildid_table(struct list_head *head, pid_t pid, u16 misc, int fd) { @@ -213,24 +239,23 @@ static int __dsos__write_buildid_table(struct list_head *head, pid_t pid, dsos__for_each_with_build_id(pos, head) { int err; - struct build_id_event b; - size_t len; + char *name; + size_t name_len; if (!pos->hit) continue; - len = pos->long_name_len + 1; - len = ALIGN(len, NAME_ALIGN); - memset(&b, 0, sizeof(b)); - memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id)); - b.pid = pid; - b.header.misc = misc; - b.header.size = sizeof(b) + len; - err = do_write(fd, &b, sizeof(b)); - if (err < 0) - return err; - err = write_padded(fd, pos->long_name, - pos->long_name_len + 1, len); - if (err < 0) + + if (is_vdso_map(pos->short_name)) { + name = (char *) VDSO__MAP_NAME; + name_len = sizeof(VDSO__MAP_NAME) + 1; + } else { + name = pos->long_name; + name_len = pos->long_name_len + 1; + } + + err = write_buildid(name, name_len, pos->build_id, + pid, misc, fd); + if (err) return err; } @@ -276,19 +301,20 @@ static int dsos__write_buildid_table(struct perf_header *header, int fd) } int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, - const char *name, bool is_kallsyms) + const char *name, bool is_kallsyms, bool is_vdso) { const size_t size = PATH_MAX; char *realname, *filename = zalloc(size), *linkname = zalloc(size), *targetname; int len, err = -1; + bool slash = is_kallsyms || is_vdso; if (is_kallsyms) { if (symbol_conf.kptr_restrict) { pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n"); return 0; } - realname = (char *)name; + realname = (char *) name; } else realname = realpath(name, NULL); @@ -296,7 +322,8 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, goto out_free; len = scnprintf(filename, size, "%s%s%s", - debugdir, is_kallsyms ? "/" : "", realname); + debugdir, slash ? "/" : "", + is_vdso ? VDSO__MAP_NAME : realname); if (mkdir_p(filename, 0755)) goto out_free; @@ -332,13 +359,14 @@ out_free: static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, const char *name, const char *debugdir, - bool is_kallsyms) + bool is_kallsyms, bool is_vdso) { char sbuild_id[BUILD_ID_SIZE * 2 + 1]; build_id__sprintf(build_id, build_id_size, sbuild_id); - return build_id_cache__add_s(sbuild_id, debugdir, name, is_kallsyms); + return build_id_cache__add_s(sbuild_id, debugdir, name, + is_kallsyms, is_vdso); } int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir) @@ -382,9 +410,11 @@ out_free: static int dso__cache_build_id(struct dso *dso, const char *debugdir) { bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; + bool is_vdso = is_vdso_map(dso->short_name); return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), - dso->long_name, debugdir, is_kallsyms); + dso->long_name, debugdir, + is_kallsyms, is_vdso); } static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir) @@ -446,7 +476,7 @@ static bool perf_session__read_build_ids(struct perf_session *session, bool with return ret; } -static int write_tracing_data(int fd, struct perf_header *h __used, +static int write_tracing_data(int fd, struct perf_header *h __maybe_unused, struct perf_evlist *evlist) { return read_tracing_data(fd, &evlist->entries); @@ -454,7 +484,7 @@ static int write_tracing_data(int fd, struct perf_header *h __used, static int write_build_id(int fd, struct perf_header *h, - struct perf_evlist *evlist __used) + struct perf_evlist *evlist __maybe_unused) { struct perf_session *session; int err; @@ -475,8 +505,8 @@ static int write_build_id(int fd, struct perf_header *h, return 0; } -static int write_hostname(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_hostname(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { struct utsname uts; int ret; @@ -488,8 +518,8 @@ static int write_hostname(int fd, struct perf_header *h __used, return do_write_string(fd, uts.nodename); } -static int write_osrelease(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_osrelease(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { struct utsname uts; int ret; @@ -501,8 +531,8 @@ static int write_osrelease(int fd, struct perf_header *h __used, return do_write_string(fd, uts.release); } -static int write_arch(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_arch(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { struct utsname uts; int ret; @@ -514,14 +544,14 @@ static int write_arch(int fd, struct perf_header *h __used, return do_write_string(fd, uts.machine); } -static int write_version(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_version(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { return do_write_string(fd, perf_version_string); } -static int write_cpudesc(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_cpudesc(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { #ifndef CPUINFO_PROC #define CPUINFO_PROC NULL @@ -579,8 +609,8 @@ done: return ret; } -static int write_nrcpus(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_nrcpus(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { long nr; u32 nrc, nra; @@ -605,15 +635,14 @@ static int write_nrcpus(int fd, struct perf_header *h __used, return do_write(fd, &nra, sizeof(nra)); } -static int write_event_desc(int fd, struct perf_header *h __used, +static int write_event_desc(int fd, struct perf_header *h __maybe_unused, struct perf_evlist *evlist) { - struct perf_evsel *attr; - u32 nre = 0, nri, sz; + struct perf_evsel *evsel; + u32 nre, nri, sz; int ret; - list_for_each_entry(attr, &evlist->entries, node) - nre++; + nre = evlist->nr_entries; /* * write number of events @@ -625,14 +654,14 @@ static int write_event_desc(int fd, struct perf_header *h __used, /* * size of perf_event_attr struct */ - sz = (u32)sizeof(attr->attr); + sz = (u32)sizeof(evsel->attr); ret = do_write(fd, &sz, sizeof(sz)); if (ret < 0) return ret; - list_for_each_entry(attr, &evlist->entries, node) { + list_for_each_entry(evsel, &evlist->entries, node) { - ret = do_write(fd, &attr->attr, sz); + ret = do_write(fd, &evsel->attr, sz); if (ret < 0) return ret; /* @@ -642,7 +671,7 @@ static int write_event_desc(int fd, struct perf_header *h __used, * copy into an nri to be independent of the * type of ids, */ - nri = attr->ids; + nri = evsel->ids; ret = do_write(fd, &nri, sizeof(nri)); if (ret < 0) return ret; @@ -650,21 +679,21 @@ static int write_event_desc(int fd, struct perf_header *h __used, /* * write event string as passed on cmdline */ - ret = do_write_string(fd, perf_evsel__name(attr)); + ret = do_write_string(fd, perf_evsel__name(evsel)); if (ret < 0) return ret; /* * write unique ids for this event */ - ret = do_write(fd, attr->id, attr->ids * sizeof(u64)); + ret = do_write(fd, evsel->id, evsel->ids * sizeof(u64)); if (ret < 0) return ret; } return 0; } -static int write_cmdline(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_cmdline(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { char buf[MAXPATHLEN]; char proc[32]; @@ -832,8 +861,8 @@ static struct cpu_topo *build_cpu_topology(void) return tp; } -static int write_cpu_topology(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_cpu_topology(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { struct cpu_topo *tp; u32 i; @@ -868,8 +897,8 @@ done: -static int write_total_mem(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_total_mem(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { char *buf = NULL; FILE *fp; @@ -954,8 +983,8 @@ done: return ret; } -static int write_numa_topology(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_numa_topology(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { char *buf = NULL; size_t len = 0; @@ -1004,16 +1033,56 @@ done: } /* + * File format: + * + * struct pmu_mappings { + * u32 pmu_num; + * struct pmu_map { + * u32 type; + * char name[]; + * }[pmu_num]; + * }; + */ + +static int write_pmu_mappings(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) +{ + struct perf_pmu *pmu = NULL; + off_t offset = lseek(fd, 0, SEEK_CUR); + __u32 pmu_num = 0; + + /* write real pmu_num later */ + do_write(fd, &pmu_num, sizeof(pmu_num)); + + while ((pmu = perf_pmu__scan(pmu))) { + if (!pmu->name) + continue; + pmu_num++; + do_write(fd, &pmu->type, sizeof(pmu->type)); + do_write_string(fd, pmu->name); + } + + if (pwrite(fd, &pmu_num, sizeof(pmu_num), offset) != sizeof(pmu_num)) { + /* discard all */ + lseek(fd, offset, SEEK_SET); + return -1; + } + + return 0; +} + +/* * default get_cpuid(): nothing gets recorded * actual implementation must be in arch/$(ARCH)/util/header.c */ -int __attribute__((weak)) get_cpuid(char *buffer __used, size_t sz __used) +int __attribute__ ((weak)) get_cpuid(char *buffer __maybe_unused, + size_t sz __maybe_unused) { return -1; } -static int write_cpuid(int fd, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_cpuid(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { char buffer[64]; int ret; @@ -1027,133 +1096,113 @@ write_it: return do_write_string(fd, buffer); } -static int write_branch_stack(int fd __used, struct perf_header *h __used, - struct perf_evlist *evlist __used) +static int write_branch_stack(int fd __maybe_unused, + struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { return 0; } -static void print_hostname(struct perf_header *ph, int fd, FILE *fp) +static void print_hostname(struct perf_header *ph, int fd __maybe_unused, + FILE *fp) { - char *str = do_read_string(fd, ph); - fprintf(fp, "# hostname : %s\n", str); - free(str); + fprintf(fp, "# hostname : %s\n", ph->env.hostname); } -static void print_osrelease(struct perf_header *ph, int fd, FILE *fp) +static void print_osrelease(struct perf_header *ph, int fd __maybe_unused, + FILE *fp) { - char *str = do_read_string(fd, ph); - fprintf(fp, "# os release : %s\n", str); - free(str); + fprintf(fp, "# os release : %s\n", ph->env.os_release); } -static void print_arch(struct perf_header *ph, int fd, FILE *fp) +static void print_arch(struct perf_header *ph, int fd __maybe_unused, FILE *fp) { - char *str = do_read_string(fd, ph); - fprintf(fp, "# arch : %s\n", str); - free(str); + fprintf(fp, "# arch : %s\n", ph->env.arch); } -static void print_cpudesc(struct perf_header *ph, int fd, FILE *fp) +static void print_cpudesc(struct perf_header *ph, int fd __maybe_unused, + FILE *fp) { - char *str = do_read_string(fd, ph); - fprintf(fp, "# cpudesc : %s\n", str); - free(str); + fprintf(fp, "# cpudesc : %s\n", ph->env.cpu_desc); } -static void print_nrcpus(struct perf_header *ph, int fd, FILE *fp) +static void print_nrcpus(struct perf_header *ph, int fd __maybe_unused, + FILE *fp) { - ssize_t ret; - u32 nr; - - ret = read(fd, &nr, sizeof(nr)); - if (ret != (ssize_t)sizeof(nr)) - nr = -1; /* interpreted as error */ - - if (ph->needs_swap) - nr = bswap_32(nr); - - fprintf(fp, "# nrcpus online : %u\n", nr); - - ret = read(fd, &nr, sizeof(nr)); - if (ret != (ssize_t)sizeof(nr)) - nr = -1; /* interpreted as error */ - - if (ph->needs_swap) - nr = bswap_32(nr); - - fprintf(fp, "# nrcpus avail : %u\n", nr); + fprintf(fp, "# nrcpus online : %u\n", ph->env.nr_cpus_online); + fprintf(fp, "# nrcpus avail : %u\n", ph->env.nr_cpus_avail); } -static void print_version(struct perf_header *ph, int fd, FILE *fp) +static void print_version(struct perf_header *ph, int fd __maybe_unused, + FILE *fp) { - char *str = do_read_string(fd, ph); - fprintf(fp, "# perf version : %s\n", str); - free(str); + fprintf(fp, "# perf version : %s\n", ph->env.version); } -static void print_cmdline(struct perf_header *ph, int fd, FILE *fp) +static void print_cmdline(struct perf_header *ph, int fd __maybe_unused, + FILE *fp) { - ssize_t ret; + int nr, i; char *str; - u32 nr, i; - - ret = read(fd, &nr, sizeof(nr)); - if (ret != (ssize_t)sizeof(nr)) - return; - if (ph->needs_swap) - nr = bswap_32(nr); + nr = ph->env.nr_cmdline; + str = ph->env.cmdline; fprintf(fp, "# cmdline : "); for (i = 0; i < nr; i++) { - str = do_read_string(fd, ph); fprintf(fp, "%s ", str); - free(str); + str += strlen(str) + 1; } fputc('\n', fp); } -static void print_cpu_topology(struct perf_header *ph, int fd, FILE *fp) +static void print_cpu_topology(struct perf_header *ph, int fd __maybe_unused, + FILE *fp) { - ssize_t ret; - u32 nr, i; + int nr, i; char *str; - ret = read(fd, &nr, sizeof(nr)); - if (ret != (ssize_t)sizeof(nr)) - return; - - if (ph->needs_swap) - nr = bswap_32(nr); + nr = ph->env.nr_sibling_cores; + str = ph->env.sibling_cores; for (i = 0; i < nr; i++) { - str = do_read_string(fd, ph); fprintf(fp, "# sibling cores : %s\n", str); - free(str); + str += strlen(str) + 1; } - ret = read(fd, &nr, sizeof(nr)); - if (ret != (ssize_t)sizeof(nr)) - return; - - if (ph->needs_swap) - nr = bswap_32(nr); + nr = ph->env.nr_sibling_threads; + str = ph->env.sibling_threads; for (i = 0; i < nr; i++) { - str = do_read_string(fd, ph); fprintf(fp, "# sibling threads : %s\n", str); - free(str); + str += strlen(str) + 1; } } -static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) +static void free_event_desc(struct perf_evsel *events) +{ + struct perf_evsel *evsel; + + if (!events) + return; + + for (evsel = events; evsel->attr.size; evsel++) { + if (evsel->name) + free(evsel->name); + if (evsel->id) + free(evsel->id); + } + + free(events); +} + +static struct perf_evsel * +read_event_desc(struct perf_header *ph, int fd) { - struct perf_event_attr attr; - uint64_t id; + struct perf_evsel *evsel, *events = NULL; + u64 *id; void *buf = NULL; - char *str; u32 nre, sz, nr, i, j; ssize_t ret; size_t msz; @@ -1173,18 +1222,22 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) if (ph->needs_swap) sz = bswap_32(sz); - memset(&attr, 0, sizeof(attr)); - /* buffer to hold on file attr struct */ buf = malloc(sz); if (!buf) goto error; - msz = sizeof(attr); + /* the last event terminates with evsel->attr.size == 0: */ + events = calloc(nre + 1, sizeof(*events)); + if (!events) + goto error; + + msz = sizeof(evsel->attr); if (sz < msz) msz = sz; - for (i = 0 ; i < nre; i++) { + for (i = 0, evsel = events; i < nre; evsel++, i++) { + evsel->idx = i; /* * must read entire on-file attr struct to @@ -1197,146 +1250,188 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) if (ph->needs_swap) perf_event__attr_swap(buf); - memcpy(&attr, buf, msz); + memcpy(&evsel->attr, buf, msz); ret = read(fd, &nr, sizeof(nr)); if (ret != (ssize_t)sizeof(nr)) goto error; - if (ph->needs_swap) + if (ph->needs_swap) { nr = bswap_32(nr); + evsel->needs_swap = true; + } - str = do_read_string(fd, ph); - fprintf(fp, "# event : name = %s, ", str); - free(str); + evsel->name = do_read_string(fd, ph); + + if (!nr) + continue; + + id = calloc(nr, sizeof(*id)); + if (!id) + goto error; + evsel->ids = nr; + evsel->id = id; + + for (j = 0 ; j < nr; j++) { + ret = read(fd, id, sizeof(*id)); + if (ret != (ssize_t)sizeof(*id)) + goto error; + if (ph->needs_swap) + *id = bswap_64(*id); + id++; + } + } +out: + if (buf) + free(buf); + return events; +error: + if (events) + free_event_desc(events); + events = NULL; + goto out; +} + +static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) +{ + struct perf_evsel *evsel, *events = read_event_desc(ph, fd); + u32 j; + u64 *id; + + if (!events) { + fprintf(fp, "# event desc: not available or unable to read\n"); + return; + } + + for (evsel = events; evsel->attr.size; evsel++) { + fprintf(fp, "# event : name = %s, ", evsel->name); fprintf(fp, "type = %d, config = 0x%"PRIx64 ", config1 = 0x%"PRIx64", config2 = 0x%"PRIx64, - attr.type, - (u64)attr.config, - (u64)attr.config1, - (u64)attr.config2); + evsel->attr.type, + (u64)evsel->attr.config, + (u64)evsel->attr.config1, + (u64)evsel->attr.config2); fprintf(fp, ", excl_usr = %d, excl_kern = %d", - attr.exclude_user, - attr.exclude_kernel); + evsel->attr.exclude_user, + evsel->attr.exclude_kernel); fprintf(fp, ", excl_host = %d, excl_guest = %d", - attr.exclude_host, - attr.exclude_guest); + evsel->attr.exclude_host, + evsel->attr.exclude_guest); - fprintf(fp, ", precise_ip = %d", attr.precise_ip); + fprintf(fp, ", precise_ip = %d", evsel->attr.precise_ip); - if (nr) + if (evsel->ids) { fprintf(fp, ", id = {"); - - for (j = 0 ; j < nr; j++) { - ret = read(fd, &id, sizeof(id)); - if (ret != (ssize_t)sizeof(id)) - goto error; - - if (ph->needs_swap) - id = bswap_64(id); - - if (j) - fputc(',', fp); - - fprintf(fp, " %"PRIu64, id); - } - if (nr && j == nr) + for (j = 0, id = evsel->id; j < evsel->ids; j++, id++) { + if (j) + fputc(',', fp); + fprintf(fp, " %"PRIu64, *id); + } fprintf(fp, " }"); + } + fputc('\n', fp); } - free(buf); - return; -error: - fprintf(fp, "# event desc: not available or unable to read\n"); + + free_event_desc(events); } -static void print_total_mem(struct perf_header *h __used, int fd, FILE *fp) +static void print_total_mem(struct perf_header *ph, int fd __maybe_unused, + FILE *fp) { - uint64_t mem; - ssize_t ret; - - ret = read(fd, &mem, sizeof(mem)); - if (ret != sizeof(mem)) - goto error; - - if (h->needs_swap) - mem = bswap_64(mem); - - fprintf(fp, "# total memory : %"PRIu64" kB\n", mem); - return; -error: - fprintf(fp, "# total memory : unknown\n"); + fprintf(fp, "# total memory : %Lu kB\n", ph->env.total_mem); } -static void print_numa_topology(struct perf_header *h __used, int fd, FILE *fp) +static void print_numa_topology(struct perf_header *ph, int fd __maybe_unused, + FILE *fp) { - ssize_t ret; u32 nr, c, i; - char *str; + char *str, *tmp; uint64_t mem_total, mem_free; /* nr nodes */ - ret = read(fd, &nr, sizeof(nr)); - if (ret != (ssize_t)sizeof(nr)) - goto error; - - if (h->needs_swap) - nr = bswap_32(nr); + nr = ph->env.nr_numa_nodes; + str = ph->env.numa_nodes; for (i = 0; i < nr; i++) { - /* node number */ - ret = read(fd, &c, sizeof(c)); - if (ret != (ssize_t)sizeof(c)) + c = strtoul(str, &tmp, 0); + if (*tmp != ':') goto error; - if (h->needs_swap) - c = bswap_32(c); - - ret = read(fd, &mem_total, sizeof(u64)); - if (ret != sizeof(u64)) + str = tmp + 1; + mem_total = strtoull(str, &tmp, 0); + if (*tmp != ':') goto error; - ret = read(fd, &mem_free, sizeof(u64)); - if (ret != sizeof(u64)) + str = tmp + 1; + mem_free = strtoull(str, &tmp, 0); + if (*tmp != ':') goto error; - if (h->needs_swap) { - mem_total = bswap_64(mem_total); - mem_free = bswap_64(mem_free); - } - fprintf(fp, "# node%u meminfo : total = %"PRIu64" kB," " free = %"PRIu64" kB\n", - c, - mem_total, - mem_free); + c, mem_total, mem_free); - str = do_read_string(fd, h); + str = tmp + 1; fprintf(fp, "# node%u cpu list : %s\n", c, str); - free(str); } return; error: fprintf(fp, "# numa topology : not available\n"); } -static void print_cpuid(struct perf_header *ph, int fd, FILE *fp) +static void print_cpuid(struct perf_header *ph, int fd __maybe_unused, FILE *fp) { - char *str = do_read_string(fd, ph); - fprintf(fp, "# cpuid : %s\n", str); - free(str); + fprintf(fp, "# cpuid : %s\n", ph->env.cpuid); } -static void print_branch_stack(struct perf_header *ph __used, int fd __used, - FILE *fp) +static void print_branch_stack(struct perf_header *ph __maybe_unused, + int fd __maybe_unused, FILE *fp) { fprintf(fp, "# contains samples with branch stack\n"); } +static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused, + FILE *fp) +{ + const char *delimiter = "# pmu mappings: "; + char *str, *tmp; + u32 pmu_num; + u32 type; + + pmu_num = ph->env.nr_pmu_mappings; + if (!pmu_num) { + fprintf(fp, "# pmu mappings: not available\n"); + return; + } + + str = ph->env.pmu_mappings; + + while (pmu_num) { + type = strtoul(str, &tmp, 0); + if (*tmp != ':') + goto error; + + str = tmp + 1; + fprintf(fp, "%s%s = %" PRIu32, delimiter, str, type); + + delimiter = ", "; + str += strlen(str) + 1; + pmu_num--; + } + + fprintf(fp, "\n"); + + if (!pmu_num) + return; +error: + fprintf(fp, "# pmu mappings: unable to read\n"); +} + static int __event_process_build_id(struct build_id_event *bev, char *filename, struct perf_session *session) @@ -1398,7 +1493,7 @@ static int perf_header__read_build_ids_abi_quirk(struct perf_header *header, struct perf_session *session = container_of(header, struct perf_session, header); struct { struct perf_event_header header; - u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))]; + u8 build_id[PERF_ALIGN(BUILD_ID_SIZE, sizeof(u64))]; char filename[0]; } old_bev; struct build_id_event bev; @@ -1487,28 +1582,375 @@ out: return err; } -static int process_tracing_data(struct perf_file_section *section __unused, - struct perf_header *ph __unused, - int feat __unused, int fd, void *data) +static int process_tracing_data(struct perf_file_section *section __maybe_unused, + struct perf_header *ph __maybe_unused, + int fd, void *data) { trace_report(fd, data, false); return 0; } static int process_build_id(struct perf_file_section *section, - struct perf_header *ph, - int feat __unused, int fd, void *data __used) + struct perf_header *ph, int fd, + void *data __maybe_unused) { if (perf_header__read_build_ids(ph, fd, section->offset, section->size)) pr_debug("Failed to read buildids, continuing...\n"); return 0; } +static int process_hostname(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int fd, + void *data __maybe_unused) +{ + ph->env.hostname = do_read_string(fd, ph); + return ph->env.hostname ? 0 : -ENOMEM; +} + +static int process_osrelease(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int fd, + void *data __maybe_unused) +{ + ph->env.os_release = do_read_string(fd, ph); + return ph->env.os_release ? 0 : -ENOMEM; +} + +static int process_version(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int fd, + void *data __maybe_unused) +{ + ph->env.version = do_read_string(fd, ph); + return ph->env.version ? 0 : -ENOMEM; +} + +static int process_arch(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int fd, + void *data __maybe_unused) +{ + ph->env.arch = do_read_string(fd, ph); + return ph->env.arch ? 0 : -ENOMEM; +} + +static int process_nrcpus(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int fd, + void *data __maybe_unused) +{ + size_t ret; + u32 nr; + + ret = read(fd, &nr, sizeof(nr)); + if (ret != sizeof(nr)) + return -1; + + if (ph->needs_swap) + nr = bswap_32(nr); + + ph->env.nr_cpus_online = nr; + + ret = read(fd, &nr, sizeof(nr)); + if (ret != sizeof(nr)) + return -1; + + if (ph->needs_swap) + nr = bswap_32(nr); + + ph->env.nr_cpus_avail = nr; + return 0; +} + +static int process_cpudesc(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int fd, + void *data __maybe_unused) +{ + ph->env.cpu_desc = do_read_string(fd, ph); + return ph->env.cpu_desc ? 0 : -ENOMEM; +} + +static int process_cpuid(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int fd, + void *data __maybe_unused) +{ + ph->env.cpuid = do_read_string(fd, ph); + return ph->env.cpuid ? 0 : -ENOMEM; +} + +static int process_total_mem(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int fd, + void *data __maybe_unused) +{ + uint64_t mem; + size_t ret; + + ret = read(fd, &mem, sizeof(mem)); + if (ret != sizeof(mem)) + return -1; + + if (ph->needs_swap) + mem = bswap_64(mem); + + ph->env.total_mem = mem; + return 0; +} + +static struct perf_evsel * +perf_evlist__find_by_index(struct perf_evlist *evlist, int idx) +{ + struct perf_evsel *evsel; + + list_for_each_entry(evsel, &evlist->entries, node) { + if (evsel->idx == idx) + return evsel; + } + + return NULL; +} + +static void +perf_evlist__set_event_name(struct perf_evlist *evlist, + struct perf_evsel *event) +{ + struct perf_evsel *evsel; + + if (!event->name) + return; + + evsel = perf_evlist__find_by_index(evlist, event->idx); + if (!evsel) + return; + + if (evsel->name) + return; + + evsel->name = strdup(event->name); +} + +static int +process_event_desc(struct perf_file_section *section __maybe_unused, + struct perf_header *header, int fd, + void *data __maybe_unused) +{ + struct perf_session *session; + struct perf_evsel *evsel, *events = read_event_desc(header, fd); + + if (!events) + return 0; + + session = container_of(header, struct perf_session, header); + for (evsel = events; evsel->attr.size; evsel++) + perf_evlist__set_event_name(session->evlist, evsel); + + free_event_desc(events); + + return 0; +} + +static int process_cmdline(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int fd, + void *data __maybe_unused) +{ + size_t ret; + char *str; + u32 nr, i; + struct strbuf sb; + + ret = read(fd, &nr, sizeof(nr)); + if (ret != sizeof(nr)) + return -1; + + if (ph->needs_swap) + nr = bswap_32(nr); + + ph->env.nr_cmdline = nr; + strbuf_init(&sb, 128); + + for (i = 0; i < nr; i++) { + str = do_read_string(fd, ph); + if (!str) + goto error; + + /* include a NULL character at the end */ + strbuf_add(&sb, str, strlen(str) + 1); + free(str); + } + ph->env.cmdline = strbuf_detach(&sb, NULL); + return 0; + +error: + strbuf_release(&sb); + return -1; +} + +static int process_cpu_topology(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int fd, + void *data __maybe_unused) +{ + size_t ret; + u32 nr, i; + char *str; + struct strbuf sb; + + ret = read(fd, &nr, sizeof(nr)); + if (ret != sizeof(nr)) + return -1; + + if (ph->needs_swap) + nr = bswap_32(nr); + + ph->env.nr_sibling_cores = nr; + strbuf_init(&sb, 128); + + for (i = 0; i < nr; i++) { + str = do_read_string(fd, ph); + if (!str) + goto error; + + /* include a NULL character at the end */ + strbuf_add(&sb, str, strlen(str) + 1); + free(str); + } + ph->env.sibling_cores = strbuf_detach(&sb, NULL); + + ret = read(fd, &nr, sizeof(nr)); + if (ret != sizeof(nr)) + return -1; + + if (ph->needs_swap) + nr = bswap_32(nr); + + ph->env.nr_sibling_threads = nr; + + for (i = 0; i < nr; i++) { + str = do_read_string(fd, ph); + if (!str) + goto error; + + /* include a NULL character at the end */ + strbuf_add(&sb, str, strlen(str) + 1); + free(str); + } + ph->env.sibling_threads = strbuf_detach(&sb, NULL); + return 0; + +error: + strbuf_release(&sb); + return -1; +} + +static int process_numa_topology(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int fd, + void *data __maybe_unused) +{ + size_t ret; + u32 nr, node, i; + char *str; + uint64_t mem_total, mem_free; + struct strbuf sb; + + /* nr nodes */ + ret = read(fd, &nr, sizeof(nr)); + if (ret != sizeof(nr)) + goto error; + + if (ph->needs_swap) + nr = bswap_32(nr); + + ph->env.nr_numa_nodes = nr; + strbuf_init(&sb, 256); + + for (i = 0; i < nr; i++) { + /* node number */ + ret = read(fd, &node, sizeof(node)); + if (ret != sizeof(node)) + goto error; + + ret = read(fd, &mem_total, sizeof(u64)); + if (ret != sizeof(u64)) + goto error; + + ret = read(fd, &mem_free, sizeof(u64)); + if (ret != sizeof(u64)) + goto error; + + if (ph->needs_swap) { + node = bswap_32(node); + mem_total = bswap_64(mem_total); + mem_free = bswap_64(mem_free); + } + + strbuf_addf(&sb, "%u:%"PRIu64":%"PRIu64":", + node, mem_total, mem_free); + + str = do_read_string(fd, ph); + if (!str) + goto error; + + /* include a NULL character at the end */ + strbuf_add(&sb, str, strlen(str) + 1); + free(str); + } + ph->env.numa_nodes = strbuf_detach(&sb, NULL); + return 0; + +error: + strbuf_release(&sb); + return -1; +} + +static int process_pmu_mappings(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int fd, + void *data __maybe_unused) +{ + size_t ret; + char *name; + u32 pmu_num; + u32 type; + struct strbuf sb; + + ret = read(fd, &pmu_num, sizeof(pmu_num)); + if (ret != sizeof(pmu_num)) + return -1; + + if (ph->needs_swap) + pmu_num = bswap_32(pmu_num); + + if (!pmu_num) { + pr_debug("pmu mappings not available\n"); + return 0; + } + + ph->env.nr_pmu_mappings = pmu_num; + strbuf_init(&sb, 128); + + while (pmu_num) { + if (read(fd, &type, sizeof(type)) != sizeof(type)) + goto error; + if (ph->needs_swap) + type = bswap_32(type); + + name = do_read_string(fd, ph); + if (!name) + goto error; + + strbuf_addf(&sb, "%u:%s", type, name); + /* include a NULL character at the end */ + strbuf_add(&sb, "", 1); + + free(name); + pmu_num--; + } + ph->env.pmu_mappings = strbuf_detach(&sb, NULL); + return 0; + +error: + strbuf_release(&sb); + return -1; +} + struct feature_ops { int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); void (*print)(struct perf_header *h, int fd, FILE *fp); int (*process)(struct perf_file_section *section, - struct perf_header *h, int feat, int fd, void *data); + struct perf_header *h, int fd, void *data); const char *name; bool full_only; }; @@ -1520,7 +1962,7 @@ struct feature_ops { .process = process_##func } #define FEAT_OPF(n, func) \ [n] = { .name = #n, .write = write_##func, .print = print_##func, \ - .full_only = true } + .process = process_##func, .full_only = true } /* feature_ops not implemented: */ #define print_tracing_data NULL @@ -1529,19 +1971,20 @@ struct feature_ops { static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPP(HEADER_TRACING_DATA, tracing_data), FEAT_OPP(HEADER_BUILD_ID, build_id), - FEAT_OPA(HEADER_HOSTNAME, hostname), - FEAT_OPA(HEADER_OSRELEASE, osrelease), - FEAT_OPA(HEADER_VERSION, version), - FEAT_OPA(HEADER_ARCH, arch), - FEAT_OPA(HEADER_NRCPUS, nrcpus), - FEAT_OPA(HEADER_CPUDESC, cpudesc), - FEAT_OPA(HEADER_CPUID, cpuid), - FEAT_OPA(HEADER_TOTAL_MEM, total_mem), - FEAT_OPA(HEADER_EVENT_DESC, event_desc), - FEAT_OPA(HEADER_CMDLINE, cmdline), + FEAT_OPP(HEADER_HOSTNAME, hostname), + FEAT_OPP(HEADER_OSRELEASE, osrelease), + FEAT_OPP(HEADER_VERSION, version), + FEAT_OPP(HEADER_ARCH, arch), + FEAT_OPP(HEADER_NRCPUS, nrcpus), + FEAT_OPP(HEADER_CPUDESC, cpudesc), + FEAT_OPP(HEADER_CPUID, cpuid), + FEAT_OPP(HEADER_TOTAL_MEM, total_mem), + FEAT_OPP(HEADER_EVENT_DESC, event_desc), + FEAT_OPP(HEADER_CMDLINE, cmdline), FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), FEAT_OPA(HEADER_BRANCH_STACK, branch_stack), + FEAT_OPP(HEADER_PMU_MAPPINGS, pmu_mappings), }; struct header_print_data { @@ -1683,17 +2126,17 @@ int perf_session__write_header(struct perf_session *session, struct perf_file_header f_header; struct perf_file_attr f_attr; struct perf_header *header = &session->header; - struct perf_evsel *attr, *pair = NULL; + struct perf_evsel *evsel, *pair = NULL; int err; lseek(fd, sizeof(f_header), SEEK_SET); if (session->evlist != evlist) - pair = list_entry(session->evlist->entries.next, struct perf_evsel, node); + pair = perf_evlist__first(session->evlist); - list_for_each_entry(attr, &evlist->entries, node) { - attr->id_offset = lseek(fd, 0, SEEK_CUR); - err = do_write(fd, attr->id, attr->ids * sizeof(u64)); + list_for_each_entry(evsel, &evlist->entries, node) { + evsel->id_offset = lseek(fd, 0, SEEK_CUR); + err = do_write(fd, evsel->id, evsel->ids * sizeof(u64)); if (err < 0) { out_err_write: pr_debug("failed to write perf header\n"); @@ -1703,19 +2146,19 @@ out_err_write: err = do_write(fd, pair->id, pair->ids * sizeof(u64)); if (err < 0) goto out_err_write; - attr->ids += pair->ids; - pair = list_entry(pair->node.next, struct perf_evsel, node); + evsel->ids += pair->ids; + pair = perf_evsel__next(pair); } } header->attr_offset = lseek(fd, 0, SEEK_CUR); - list_for_each_entry(attr, &evlist->entries, node) { + list_for_each_entry(evsel, &evlist->entries, node) { f_attr = (struct perf_file_attr){ - .attr = attr->attr, + .attr = evsel->attr, .ids = { - .offset = attr->id_offset, - .size = attr->ids * sizeof(u64), + .offset = evsel->id_offset, + .size = evsel->ids * sizeof(u64), } }; err = do_write(fd, &f_attr, sizeof(f_attr)); @@ -1726,9 +2169,9 @@ out_err_write: } header->event_offset = lseek(fd, 0, SEEK_CUR); - header->event_size = event_count * sizeof(struct perf_trace_event_type); - if (events) { - err = do_write(fd, events, header->event_size); + header->event_size = trace_event_count * sizeof(struct perf_trace_event_type); + if (trace_events) { + err = do_write(fd, trace_events, header->event_size); if (err < 0) { pr_debug("failed to write perf header events\n"); return err; @@ -1829,6 +2272,8 @@ out_free: static const int attr_file_abi_sizes[] = { [0] = PERF_ATTR_SIZE_VER0, [1] = PERF_ATTR_SIZE_VER1, + [2] = PERF_ATTR_SIZE_VER2, + [3] = PERF_ATTR_SIZE_VER3, 0, }; @@ -2019,7 +2464,7 @@ static int perf_file_section__process(struct perf_file_section *section, if (!feat_ops[feat].process) return 0; - return feat_ops[feat].process(section, ph, feat, fd, data); + return feat_ops[feat].process(section, ph, fd, data); } static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, @@ -2108,32 +2553,39 @@ static int read_attr(int fd, struct perf_header *ph, return ret <= 0 ? -1 : 0; } -static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel, - struct pevent *pevent) +static int perf_evsel__prepare_tracepoint_event(struct perf_evsel *evsel, + struct pevent *pevent) { - struct event_format *event = pevent_find_event(pevent, - evsel->attr.config); + struct event_format *event; char bf[128]; + /* already prepared */ + if (evsel->tp_format) + return 0; + + event = pevent_find_event(pevent, evsel->attr.config); if (event == NULL) return -1; - snprintf(bf, sizeof(bf), "%s:%s", event->system, event->name); - evsel->name = strdup(bf); - if (event->name == NULL) - return -1; + if (!evsel->name) { + snprintf(bf, sizeof(bf), "%s:%s", event->system, event->name); + evsel->name = strdup(bf); + if (evsel->name == NULL) + return -1; + } + evsel->tp_format = event; return 0; } -static int perf_evlist__set_tracepoint_names(struct perf_evlist *evlist, - struct pevent *pevent) +static int perf_evlist__prepare_tracepoint_events(struct perf_evlist *evlist, + struct pevent *pevent) { struct perf_evsel *pos; list_for_each_entry(pos, &evlist->entries, node) { if (pos->attr.type == PERF_TYPE_TRACEPOINT && - perf_evsel__set_tracepoint_name(pos, pevent)) + perf_evsel__prepare_tracepoint_event(pos, pevent)) return -1; } @@ -2176,6 +2628,8 @@ int perf_session__read_header(struct perf_session *session, int fd) if (evsel == NULL) goto out_delete_evlist; + + evsel->needs_swap = header->needs_swap; /* * Do it before so that if perf_evsel__alloc_id fails, this * entry gets purged too at perf_evlist__delete(). @@ -2207,13 +2661,13 @@ int perf_session__read_header(struct perf_session *session, int fd) if (f_header.event_types.size) { lseek(fd, f_header.event_types.offset, SEEK_SET); - events = malloc(f_header.event_types.size); - if (events == NULL) + trace_events = malloc(f_header.event_types.size); + if (trace_events == NULL) return -ENOMEM; - if (perf_header__getbuffer64(header, fd, events, + if (perf_header__getbuffer64(header, fd, trace_events, f_header.event_types.size)) goto out_errno; - event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); + trace_event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); } perf_header__process_sections(header, fd, &session->pevent, @@ -2221,7 +2675,8 @@ int perf_session__read_header(struct perf_session *session, int fd) lseek(fd, header->data_offset, SEEK_SET); - if (perf_evlist__set_tracepoint_names(session->evlist, session->pevent)) + if (perf_evlist__prepare_tracepoint_events(session->evlist, + session->pevent)) goto out_delete_evlist; header->frozen = 1; @@ -2236,7 +2691,7 @@ out_delete_evlist: } int perf_event__synthesize_attr(struct perf_tool *tool, - struct perf_event_attr *attr, u16 ids, u64 *id, + struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process) { union perf_event *ev; @@ -2244,7 +2699,7 @@ int perf_event__synthesize_attr(struct perf_tool *tool, int err; size = sizeof(struct perf_event_attr); - size = ALIGN(size, sizeof(u64)); + size = PERF_ALIGN(size, sizeof(u64)); size += sizeof(struct perf_event_header); size += ids * sizeof(u64); @@ -2257,9 +2712,12 @@ int perf_event__synthesize_attr(struct perf_tool *tool, memcpy(ev->attr.id, id, ids * sizeof(u64)); ev->attr.header.type = PERF_RECORD_HEADER_ATTR; - ev->attr.header.size = size; + ev->attr.header.size = (u16)size; - err = process(tool, ev, NULL, NULL); + if (ev->attr.header.size == size) + err = process(tool, ev, NULL, NULL); + else + err = -E2BIG; free(ev); @@ -2270,12 +2728,12 @@ int perf_event__synthesize_attrs(struct perf_tool *tool, struct perf_session *session, perf_event__handler_t process) { - struct perf_evsel *attr; + struct perf_evsel *evsel; int err = 0; - list_for_each_entry(attr, &session->evlist->entries, node) { - err = perf_event__synthesize_attr(tool, &attr->attr, attr->ids, - attr->id, process); + list_for_each_entry(evsel, &session->evlist->entries, node) { + err = perf_event__synthesize_attr(tool, &evsel->attr, evsel->ids, + evsel->id, process); if (err) { pr_debug("failed to create perf header attribute\n"); return err; @@ -2288,7 +2746,7 @@ int perf_event__synthesize_attrs(struct perf_tool *tool, int perf_event__process_attr(union perf_event *event, struct perf_evlist **pevlist) { - unsigned int i, ids, n_ids; + u32 i, ids, n_ids; struct perf_evsel *evsel; struct perf_evlist *evlist = *pevlist; @@ -2339,7 +2797,7 @@ int perf_event__synthesize_event_type(struct perf_tool *tool, ev.event_type.header.type = PERF_RECORD_HEADER_EVENT_TYPE; size = strlen(ev.event_type.event_type.name); - size = ALIGN(size, sizeof(u64)); + size = PERF_ALIGN(size, sizeof(u64)); ev.event_type.header.size = sizeof(ev.event_type) - (sizeof(ev.event_type.event_type.name) - size); @@ -2355,8 +2813,8 @@ int perf_event__synthesize_event_types(struct perf_tool *tool, struct perf_trace_event_type *type; int i, err = 0; - for (i = 0; i < event_count; i++) { - type = &events[i]; + for (i = 0; i < trace_event_count; i++) { + type = &trace_events[i]; err = perf_event__synthesize_event_type(tool, type->event_id, type->name, process, @@ -2370,7 +2828,7 @@ int perf_event__synthesize_event_types(struct perf_tool *tool, return err; } -int perf_event__process_event_type(struct perf_tool *tool __unused, +int perf_event__process_event_type(struct perf_tool *tool __maybe_unused, union perf_event *event) { if (perf_header__push_event(event->event_type.event_type.event_id, @@ -2387,7 +2845,7 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, union perf_event ev; struct tracing_data *tdata; ssize_t size = 0, aligned_size = 0, padding; - int err __used = 0; + int err __maybe_unused = 0; /* * We are going to store the size of the data followed @@ -2408,7 +2866,7 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA; size = tdata->size; - aligned_size = ALIGN(size, sizeof(u64)); + aligned_size = PERF_ALIGN(size, sizeof(u64)); padding = aligned_size - size; ev.tracing_data.header.size = sizeof(ev.tracing_data); ev.tracing_data.size = aligned_size; @@ -2439,7 +2897,7 @@ int perf_event__process_tracing_data(union perf_event *event, size_read = trace_report(session->fd, &session->pevent, session->repipe); - padding = ALIGN(size_read, sizeof(u64)) - size_read; + padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read; if (read(session->fd, buf, padding) < 0) die("reading input file"); @@ -2452,6 +2910,9 @@ int perf_event__process_tracing_data(union perf_event *event, if (size_read + padding != size) die("tracing data size mismatch"); + perf_evlist__prepare_tracepoint_events(session->evlist, + session->pevent); + return size_read + padding; } @@ -2470,7 +2931,7 @@ int perf_event__synthesize_build_id(struct perf_tool *tool, memset(&ev, 0, sizeof(ev)); len = pos->long_name_len + 1; - len = ALIGN(len, NAME_ALIGN); + len = PERF_ALIGN(len, NAME_ALIGN); memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id)); ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID; ev.build_id.header.misc = misc; @@ -2483,7 +2944,7 @@ int perf_event__synthesize_build_id(struct perf_tool *tool, return err; } -int perf_event__process_build_id(struct perf_tool *tool __used, +int perf_event__process_build_id(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_session *session) { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 2d42b3e1826..99bdd3abce5 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -28,6 +28,7 @@ enum { HEADER_CPU_TOPOLOGY, HEADER_NUMA_TOPOLOGY, HEADER_BRANCH_STACK, + HEADER_PMU_MAPPINGS, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; @@ -57,6 +58,29 @@ struct perf_header; int perf_file_header__read(struct perf_file_header *header, struct perf_header *ph, int fd); +struct perf_session_env { + char *hostname; + char *os_release; + char *version; + char *arch; + int nr_cpus_online; + int nr_cpus_avail; + char *cpu_desc; + char *cpuid; + unsigned long long total_mem; + + int nr_cmdline; + char *cmdline; + int nr_sibling_cores; + char *sibling_cores; + int nr_sibling_threads; + char *sibling_threads; + int nr_numa_nodes; + char *numa_nodes; + int nr_pmu_mappings; + char *pmu_mappings; +}; + struct perf_header { int frozen; bool needs_swap; @@ -66,6 +90,7 @@ struct perf_header { u64 event_offset; u64 event_size; DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); + struct perf_session_env env; }; struct perf_evlist; @@ -95,11 +120,11 @@ int perf_header__process_sections(struct perf_header *header, int fd, int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full); int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, - const char *name, bool is_kallsyms); + const char *name, bool is_kallsyms, bool is_vdso); int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); int perf_event__synthesize_attr(struct perf_tool *tool, - struct perf_event_attr *attr, u16 ids, u64 *id, + struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process); int perf_event__synthesize_attrs(struct perf_tool *tool, struct perf_session *session, diff --git a/tools/perf/util/help.c b/tools/perf/util/help.c index 6f2975a0035..8b1f6e891b8 100644 --- a/tools/perf/util/help.c +++ b/tools/perf/util/help.c @@ -3,6 +3,7 @@ #include "exec_cmd.h" #include "levenshtein.h" #include "help.h" +#include <termios.h> void add_cmdname(struct cmdnames *cmds, const char *name, size_t len) { @@ -331,7 +332,8 @@ const char *help_unknown_cmd(const char *cmd) exit(1); } -int cmd_version(int argc __used, const char **argv __used, const char *prefix __used) +int cmd_version(int argc __maybe_unused, const char **argv __maybe_unused, + const char *prefix __maybe_unused) { printf("perf version %s\n", perf_version_string); return 0; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index f247ef2789a..236bc9d98ff 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -45,7 +45,7 @@ bool hists__new_col_len(struct hists *hists, enum hist_column col, u16 len) return false; } -static void hists__reset_col_len(struct hists *hists) +void hists__reset_col_len(struct hists *hists) { enum hist_column col; @@ -63,7 +63,7 @@ static void hists__set_unres_dso_col_len(struct hists *hists, int dso) hists__set_col_len(hists, dso, unresolved_col_width); } -static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) +void hists__calc_col_len(struct hists *hists, struct hist_entry *h) { const unsigned int unresolved_col_width = BITS_PER_LONG / 4; u16 len; @@ -114,6 +114,22 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) } } +void hists__output_recalc_col_len(struct hists *hists, int max_rows) +{ + struct rb_node *next = rb_first(&hists->entries); + struct hist_entry *n; + int row = 0; + + hists__reset_col_len(hists); + + while (next && row++ < max_rows) { + n = rb_entry(next, struct hist_entry, rb_node); + if (!n->filtered) + hists__calc_col_len(hists, n); + next = rb_next(&n->rb_node); + } +} + static void hist_entry__add_cpumode_period(struct hist_entry *he, unsigned int cpumode, u64 period) { @@ -378,7 +394,7 @@ void hist_entry__free(struct hist_entry *he) * collapse the histogram */ -static bool hists__collapse_insert_entry(struct hists *hists __used, +static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, struct rb_root *root, struct hist_entry *he) { @@ -394,8 +410,13 @@ static bool hists__collapse_insert_entry(struct hists *hists __used, cmp = hist_entry__collapse(iter, he); if (!cmp) { - iter->period += he->period; - iter->nr_events += he->nr_events; + iter->period += he->period; + iter->period_sys += he->period_sys; + iter->period_us += he->period_us; + iter->period_guest_sys += he->period_guest_sys; + iter->period_guest_us += he->period_guest_us; + iter->nr_events += he->nr_events; + if (symbol_conf.use_callchain) { callchain_cursor_reset(&callchain_cursor); callchain_merge(&callchain_cursor, @@ -547,674 +568,6 @@ void hists__output_resort_threaded(struct hists *hists) return __hists__output_resort(hists, true); } -static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) -{ - int i; - int ret = fprintf(fp, " "); - - for (i = 0; i < left_margin; i++) - ret += fprintf(fp, " "); - - return ret; -} - -static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask, - int left_margin) -{ - int i; - size_t ret = callchain__fprintf_left_margin(fp, left_margin); - - for (i = 0; i < depth; i++) - if (depth_mask & (1 << i)) - ret += fprintf(fp, "| "); - else - ret += fprintf(fp, " "); - - ret += fprintf(fp, "\n"); - - return ret; -} - -static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, - int depth, int depth_mask, int period, - u64 total_samples, u64 hits, - int left_margin) -{ - int i; - size_t ret = 0; - - ret += callchain__fprintf_left_margin(fp, left_margin); - for (i = 0; i < depth; i++) { - if (depth_mask & (1 << i)) - ret += fprintf(fp, "|"); - else - ret += fprintf(fp, " "); - if (!period && i == depth - 1) { - double percent; - - percent = hits * 100.0 / total_samples; - ret += percent_color_fprintf(fp, "--%2.2f%%-- ", percent); - } else - ret += fprintf(fp, "%s", " "); - } - if (chain->ms.sym) - ret += fprintf(fp, "%s\n", chain->ms.sym->name); - else - ret += fprintf(fp, "0x%0" PRIx64 "\n", chain->ip); - - return ret; -} - -static struct symbol *rem_sq_bracket; -static struct callchain_list rem_hits; - -static void init_rem_hits(void) -{ - rem_sq_bracket = malloc(sizeof(*rem_sq_bracket) + 6); - if (!rem_sq_bracket) { - fprintf(stderr, "Not enough memory to display remaining hits\n"); - return; - } - - strcpy(rem_sq_bracket->name, "[...]"); - rem_hits.ms.sym = rem_sq_bracket; -} - -static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root, - u64 total_samples, int depth, - int depth_mask, int left_margin) -{ - struct rb_node *node, *next; - struct callchain_node *child; - struct callchain_list *chain; - int new_depth_mask = depth_mask; - u64 remaining; - size_t ret = 0; - int i; - uint entries_printed = 0; - - remaining = total_samples; - - node = rb_first(root); - while (node) { - u64 new_total; - u64 cumul; - - child = rb_entry(node, struct callchain_node, rb_node); - cumul = callchain_cumul_hits(child); - remaining -= cumul; - - /* - * The depth mask manages the output of pipes that show - * the depth. We don't want to keep the pipes of the current - * level for the last child of this depth. - * Except if we have remaining filtered hits. They will - * supersede the last child - */ - next = rb_next(node); - if (!next && (callchain_param.mode != CHAIN_GRAPH_REL || !remaining)) - new_depth_mask &= ~(1 << (depth - 1)); - - /* - * But we keep the older depth mask for the line separator - * to keep the level link until we reach the last child - */ - ret += ipchain__fprintf_graph_line(fp, depth, depth_mask, - left_margin); - i = 0; - list_for_each_entry(chain, &child->val, list) { - ret += ipchain__fprintf_graph(fp, chain, depth, - new_depth_mask, i++, - total_samples, - cumul, - left_margin); - } - - if (callchain_param.mode == CHAIN_GRAPH_REL) - new_total = child->children_hit; - else - new_total = total_samples; - - ret += __callchain__fprintf_graph(fp, &child->rb_root, new_total, - depth + 1, - new_depth_mask | (1 << depth), - left_margin); - node = next; - if (++entries_printed == callchain_param.print_limit) - break; - } - - if (callchain_param.mode == CHAIN_GRAPH_REL && - remaining && remaining != total_samples) { - - if (!rem_sq_bracket) - return ret; - - new_depth_mask &= ~(1 << (depth - 1)); - ret += ipchain__fprintf_graph(fp, &rem_hits, depth, - new_depth_mask, 0, total_samples, - remaining, left_margin); - } - - return ret; -} - -static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, - u64 total_samples, int left_margin) -{ - struct callchain_node *cnode; - struct callchain_list *chain; - u32 entries_printed = 0; - bool printed = false; - struct rb_node *node; - int i = 0; - int ret = 0; - - /* - * If have one single callchain root, don't bother printing - * its percentage (100 % in fractal mode and the same percentage - * than the hist in graph mode). This also avoid one level of column. - */ - node = rb_first(root); - if (node && !rb_next(node)) { - cnode = rb_entry(node, struct callchain_node, rb_node); - list_for_each_entry(chain, &cnode->val, list) { - /* - * If we sort by symbol, the first entry is the same than - * the symbol. No need to print it otherwise it appears as - * displayed twice. - */ - if (!i++ && sort__first_dimension == SORT_SYM) - continue; - if (!printed) { - ret += callchain__fprintf_left_margin(fp, left_margin); - ret += fprintf(fp, "|\n"); - ret += callchain__fprintf_left_margin(fp, left_margin); - ret += fprintf(fp, "---"); - left_margin += 3; - printed = true; - } else - ret += callchain__fprintf_left_margin(fp, left_margin); - - if (chain->ms.sym) - ret += fprintf(fp, " %s\n", chain->ms.sym->name); - else - ret += fprintf(fp, " %p\n", (void *)(long)chain->ip); - - if (++entries_printed == callchain_param.print_limit) - break; - } - root = &cnode->rb_root; - } - - ret += __callchain__fprintf_graph(fp, root, total_samples, - 1, 1, left_margin); - ret += fprintf(fp, "\n"); - - return ret; -} - -static size_t __callchain__fprintf_flat(FILE *fp, - struct callchain_node *self, - u64 total_samples) -{ - struct callchain_list *chain; - size_t ret = 0; - - if (!self) - return 0; - - ret += __callchain__fprintf_flat(fp, self->parent, total_samples); - - - list_for_each_entry(chain, &self->val, list) { - if (chain->ip >= PERF_CONTEXT_MAX) - continue; - if (chain->ms.sym) - ret += fprintf(fp, " %s\n", chain->ms.sym->name); - else - ret += fprintf(fp, " %p\n", - (void *)(long)chain->ip); - } - - return ret; -} - -static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *self, - u64 total_samples) -{ - size_t ret = 0; - u32 entries_printed = 0; - struct rb_node *rb_node; - struct callchain_node *chain; - - rb_node = rb_first(self); - while (rb_node) { - double percent; - - chain = rb_entry(rb_node, struct callchain_node, rb_node); - percent = chain->hit * 100.0 / total_samples; - - ret = percent_color_fprintf(fp, " %6.2f%%\n", percent); - ret += __callchain__fprintf_flat(fp, chain, total_samples); - ret += fprintf(fp, "\n"); - if (++entries_printed == callchain_param.print_limit) - break; - - rb_node = rb_next(rb_node); - } - - return ret; -} - -static size_t hist_entry_callchain__fprintf(struct hist_entry *he, - u64 total_samples, int left_margin, - FILE *fp) -{ - switch (callchain_param.mode) { - case CHAIN_GRAPH_REL: - return callchain__fprintf_graph(fp, &he->sorted_chain, he->period, - left_margin); - break; - case CHAIN_GRAPH_ABS: - return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples, - left_margin); - break; - case CHAIN_FLAT: - return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples); - break; - case CHAIN_NONE: - break; - default: - pr_err("Bad callchain mode\n"); - } - - return 0; -} - -void hists__output_recalc_col_len(struct hists *hists, int max_rows) -{ - struct rb_node *next = rb_first(&hists->entries); - struct hist_entry *n; - int row = 0; - - hists__reset_col_len(hists); - - while (next && row++ < max_rows) { - n = rb_entry(next, struct hist_entry, rb_node); - if (!n->filtered) - hists__calc_col_len(hists, n); - next = rb_next(&n->rb_node); - } -} - -static int hist_entry__pcnt_snprintf(struct hist_entry *he, char *s, - size_t size, struct hists *pair_hists, - bool show_displacement, long displacement, - bool color, u64 total_period) -{ - u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us; - u64 nr_events; - const char *sep = symbol_conf.field_sep; - int ret; - - if (symbol_conf.exclude_other && !he->parent) - return 0; - - if (pair_hists) { - period = he->pair ? he->pair->period : 0; - nr_events = he->pair ? he->pair->nr_events : 0; - total = pair_hists->stats.total_period; - period_sys = he->pair ? he->pair->period_sys : 0; - period_us = he->pair ? he->pair->period_us : 0; - period_guest_sys = he->pair ? he->pair->period_guest_sys : 0; - period_guest_us = he->pair ? he->pair->period_guest_us : 0; - } else { - period = he->period; - nr_events = he->nr_events; - total = total_period; - period_sys = he->period_sys; - period_us = he->period_us; - period_guest_sys = he->period_guest_sys; - period_guest_us = he->period_guest_us; - } - - if (total) { - if (color) - ret = percent_color_snprintf(s, size, - sep ? "%.2f" : " %6.2f%%", - (period * 100.0) / total); - else - ret = scnprintf(s, size, sep ? "%.2f" : " %6.2f%%", - (period * 100.0) / total); - if (symbol_conf.show_cpu_utilization) { - ret += percent_color_snprintf(s + ret, size - ret, - sep ? "%.2f" : " %6.2f%%", - (period_sys * 100.0) / total); - ret += percent_color_snprintf(s + ret, size - ret, - sep ? "%.2f" : " %6.2f%%", - (period_us * 100.0) / total); - if (perf_guest) { - ret += percent_color_snprintf(s + ret, - size - ret, - sep ? "%.2f" : " %6.2f%%", - (period_guest_sys * 100.0) / - total); - ret += percent_color_snprintf(s + ret, - size - ret, - sep ? "%.2f" : " %6.2f%%", - (period_guest_us * 100.0) / - total); - } - } - } else - ret = scnprintf(s, size, sep ? "%" PRIu64 : "%12" PRIu64 " ", period); - - if (symbol_conf.show_nr_samples) { - if (sep) - ret += scnprintf(s + ret, size - ret, "%c%" PRIu64, *sep, nr_events); - else - ret += scnprintf(s + ret, size - ret, "%11" PRIu64, nr_events); - } - - if (symbol_conf.show_total_period) { - if (sep) - ret += scnprintf(s + ret, size - ret, "%c%" PRIu64, *sep, period); - else - ret += scnprintf(s + ret, size - ret, " %12" PRIu64, period); - } - - if (pair_hists) { - char bf[32]; - double old_percent = 0, new_percent = 0, diff; - - if (total > 0) - old_percent = (period * 100.0) / total; - if (total_period > 0) - new_percent = (he->period * 100.0) / total_period; - - diff = new_percent - old_percent; - - if (fabs(diff) >= 0.01) - scnprintf(bf, sizeof(bf), "%+4.2F%%", diff); - else - scnprintf(bf, sizeof(bf), " "); - - if (sep) - ret += scnprintf(s + ret, size - ret, "%c%s", *sep, bf); - else - ret += scnprintf(s + ret, size - ret, "%11.11s", bf); - - if (show_displacement) { - if (displacement) - scnprintf(bf, sizeof(bf), "%+4ld", displacement); - else - scnprintf(bf, sizeof(bf), " "); - - if (sep) - ret += scnprintf(s + ret, size - ret, "%c%s", *sep, bf); - else - ret += scnprintf(s + ret, size - ret, "%6.6s", bf); - } - } - - return ret; -} - -int hist_entry__snprintf(struct hist_entry *he, char *s, size_t size, - struct hists *hists) -{ - const char *sep = symbol_conf.field_sep; - struct sort_entry *se; - int ret = 0; - - list_for_each_entry(se, &hist_entry__sort_list, list) { - if (se->elide) - continue; - - ret += scnprintf(s + ret, size - ret, "%s", sep ?: " "); - ret += se->se_snprintf(he, s + ret, size - ret, - hists__col_len(hists, se->se_width_idx)); - } - - return ret; -} - -static int hist_entry__fprintf(struct hist_entry *he, size_t size, - struct hists *hists, struct hists *pair_hists, - bool show_displacement, long displacement, - u64 total_period, FILE *fp) -{ - char bf[512]; - int ret; - - if (size == 0 || size > sizeof(bf)) - size = sizeof(bf); - - ret = hist_entry__pcnt_snprintf(he, bf, size, pair_hists, - show_displacement, displacement, - true, total_period); - hist_entry__snprintf(he, bf + ret, size - ret, hists); - return fprintf(fp, "%s\n", bf); -} - -static size_t hist_entry__fprintf_callchain(struct hist_entry *he, - struct hists *hists, - u64 total_period, FILE *fp) -{ - int left_margin = 0; - - if (sort__first_dimension == SORT_COMM) { - struct sort_entry *se = list_first_entry(&hist_entry__sort_list, - typeof(*se), list); - left_margin = hists__col_len(hists, se->se_width_idx); - left_margin -= thread__comm_len(he->thread); - } - - return hist_entry_callchain__fprintf(he, total_period, left_margin, fp); -} - -size_t hists__fprintf(struct hists *hists, struct hists *pair, - bool show_displacement, bool show_header, int max_rows, - int max_cols, FILE *fp) -{ - struct sort_entry *se; - struct rb_node *nd; - size_t ret = 0; - u64 total_period; - unsigned long position = 1; - long displacement = 0; - unsigned int width; - const char *sep = symbol_conf.field_sep; - const char *col_width = symbol_conf.col_width_list_str; - int nr_rows = 0; - - init_rem_hits(); - - if (!show_header) - goto print_entries; - - fprintf(fp, "# %s", pair ? "Baseline" : "Overhead"); - - if (symbol_conf.show_cpu_utilization) { - if (sep) { - ret += fprintf(fp, "%csys", *sep); - ret += fprintf(fp, "%cus", *sep); - if (perf_guest) { - ret += fprintf(fp, "%cguest sys", *sep); - ret += fprintf(fp, "%cguest us", *sep); - } - } else { - ret += fprintf(fp, " sys "); - ret += fprintf(fp, " us "); - if (perf_guest) { - ret += fprintf(fp, " guest sys "); - ret += fprintf(fp, " guest us "); - } - } - } - - if (symbol_conf.show_nr_samples) { - if (sep) - fprintf(fp, "%cSamples", *sep); - else - fputs(" Samples ", fp); - } - - if (symbol_conf.show_total_period) { - if (sep) - ret += fprintf(fp, "%cPeriod", *sep); - else - ret += fprintf(fp, " Period "); - } - - if (pair) { - if (sep) - ret += fprintf(fp, "%cDelta", *sep); - else - ret += fprintf(fp, " Delta "); - - if (show_displacement) { - if (sep) - ret += fprintf(fp, "%cDisplacement", *sep); - else - ret += fprintf(fp, " Displ"); - } - } - - list_for_each_entry(se, &hist_entry__sort_list, list) { - if (se->elide) - continue; - if (sep) { - fprintf(fp, "%c%s", *sep, se->se_header); - continue; - } - width = strlen(se->se_header); - if (symbol_conf.col_width_list_str) { - if (col_width) { - hists__set_col_len(hists, se->se_width_idx, - atoi(col_width)); - col_width = strchr(col_width, ','); - if (col_width) - ++col_width; - } - } - if (!hists__new_col_len(hists, se->se_width_idx, width)) - width = hists__col_len(hists, se->se_width_idx); - fprintf(fp, " %*s", width, se->se_header); - } - - fprintf(fp, "\n"); - if (max_rows && ++nr_rows >= max_rows) - goto out; - - if (sep) - goto print_entries; - - fprintf(fp, "# ........"); - if (symbol_conf.show_cpu_utilization) - fprintf(fp, " ....... ......."); - if (symbol_conf.show_nr_samples) - fprintf(fp, " .........."); - if (symbol_conf.show_total_period) - fprintf(fp, " ............"); - if (pair) { - fprintf(fp, " .........."); - if (show_displacement) - fprintf(fp, " ....."); - } - list_for_each_entry(se, &hist_entry__sort_list, list) { - unsigned int i; - - if (se->elide) - continue; - - fprintf(fp, " "); - width = hists__col_len(hists, se->se_width_idx); - if (width == 0) - width = strlen(se->se_header); - for (i = 0; i < width; i++) - fprintf(fp, "."); - } - - fprintf(fp, "\n"); - if (max_rows && ++nr_rows >= max_rows) - goto out; - - fprintf(fp, "#\n"); - if (max_rows && ++nr_rows >= max_rows) - goto out; - -print_entries: - total_period = hists->stats.total_period; - - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { - struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - - if (h->filtered) - continue; - - if (show_displacement) { - if (h->pair != NULL) - displacement = ((long)h->pair->position - - (long)position); - else - displacement = 0; - ++position; - } - ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement, - displacement, total_period, fp); - - if (symbol_conf.use_callchain) - ret += hist_entry__fprintf_callchain(h, hists, total_period, fp); - if (max_rows && ++nr_rows >= max_rows) - goto out; - - if (h->ms.map == NULL && verbose > 1) { - __map_groups__fprintf_maps(&h->thread->mg, - MAP__FUNCTION, verbose, fp); - fprintf(fp, "%.10s end\n", graph_dotted_line); - } - } -out: - free(rem_sq_bracket); - - return ret; -} - -/* - * See hists__fprintf to match the column widths - */ -unsigned int hists__sort_list_width(struct hists *hists) -{ - struct sort_entry *se; - int ret = 9; /* total % */ - - if (symbol_conf.show_cpu_utilization) { - ret += 7; /* count_sys % */ - ret += 6; /* count_us % */ - if (perf_guest) { - ret += 13; /* count_guest_sys % */ - ret += 12; /* count_guest_us % */ - } - } - - if (symbol_conf.show_nr_samples) - ret += 11; - - if (symbol_conf.show_total_period) - ret += 13; - - list_for_each_entry(se, &hist_entry__sort_list, list) - if (!se->elide) - ret += 2 + hists__col_len(hists, se->se_width_idx); - - if (verbose) /* Addr + origin */ - ret += 3 + BITS_PER_LONG / 4; - - return ret; -} - static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h, enum hist_filter filter) { @@ -1342,25 +695,3 @@ void hists__inc_nr_events(struct hists *hists, u32 type) ++hists->stats.nr_events[0]; ++hists->stats.nr_events[type]; } - -size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp) -{ - int i; - size_t ret = 0; - - for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { - const char *name; - - if (hists->stats.nr_events[i] == 0) - continue; - - name = perf_event__name(i); - if (!strcmp(name, "UNKNOWN")) - continue; - - ret += fprintf(fp, "%16s events: %10d\n", name, - hists->stats.nr_events[i]); - } - - return ret; -} diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 0b096c27a41..f011ad4756e 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -75,8 +75,8 @@ struct hist_entry *__hists__add_entry(struct hists *self, struct symbol *parent, u64 period); int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); -int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, - struct hists *hists); +int hist_entry__sort_snprintf(struct hist_entry *self, char *bf, size_t size, + struct hists *hists); void hist_entry__free(struct hist_entry *); struct hist_entry *__hists__add_branch_entry(struct hists *self, @@ -112,25 +112,66 @@ void hists__filter_by_symbol(struct hists *hists); u16 hists__col_len(struct hists *self, enum hist_column col); void hists__set_col_len(struct hists *self, enum hist_column col, u16 len); bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len); +void hists__reset_col_len(struct hists *hists); +void hists__calc_col_len(struct hists *hists, struct hist_entry *he); + +struct perf_hpp { + char *buf; + size_t size; + u64 total_period; + const char *sep; + long displacement; + void *ptr; +}; + +struct perf_hpp_fmt { + bool cond; + int (*header)(struct perf_hpp *hpp); + int (*width)(struct perf_hpp *hpp); + int (*color)(struct perf_hpp *hpp, struct hist_entry *he); + int (*entry)(struct perf_hpp *hpp, struct hist_entry *he); +}; + +extern struct perf_hpp_fmt perf_hpp__format[]; + +enum { + PERF_HPP__OVERHEAD, + PERF_HPP__OVERHEAD_SYS, + PERF_HPP__OVERHEAD_US, + PERF_HPP__OVERHEAD_GUEST_SYS, + PERF_HPP__OVERHEAD_GUEST_US, + PERF_HPP__SAMPLES, + PERF_HPP__PERIOD, + PERF_HPP__DELTA, + PERF_HPP__DISPL, + + PERF_HPP__MAX_INDEX +}; + +void perf_hpp__init(bool need_pair, bool show_displacement); +int hist_entry__period_snprintf(struct perf_hpp *hpp, struct hist_entry *he, + bool color); struct perf_evlist; #ifdef NO_NEWT_SUPPORT static inline -int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __used, - const char *help __used, - void(*timer)(void *arg) __used, - void *arg __used, - int refresh __used) +int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused, + const char *help __maybe_unused, + void(*timer)(void *arg) __maybe_unused, + void *arg __maybe_unused, + int refresh __maybe_unused) { return 0; } -static inline int hist_entry__tui_annotate(struct hist_entry *self __used, - int evidx __used, - void(*timer)(void *arg) __used, - void *arg __used, - int delay_secs __used) +static inline int hist_entry__tui_annotate(struct hist_entry *self + __maybe_unused, + int evidx __maybe_unused, + void(*timer)(void *arg) + __maybe_unused, + void *arg __maybe_unused, + int delay_secs __maybe_unused) { return 0; } @@ -148,11 +189,11 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, #ifdef NO_GTK2_SUPPORT static inline -int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist __used, - const char *help __used, - void(*timer)(void *arg) __used, - void *arg __used, - int refresh __used) +int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist __maybe_unused, + const char *help __maybe_unused, + void(*timer)(void *arg) __maybe_unused, + void *arg __maybe_unused, + int refresh __maybe_unused) { return 0; } diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h index 587a230d207..a55d8cf083c 100644 --- a/tools/perf/util/include/linux/bitops.h +++ b/tools/perf/util/include/linux/bitops.h @@ -5,6 +5,10 @@ #include <linux/compiler.h> #include <asm/hweight.h> +#ifndef __WORDSIZE +#define __WORDSIZE (__SIZEOF_LONG__ * 8) +#endif + #define BITS_PER_LONG __WORDSIZE #define BITS_PER_BYTE 8 #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) diff --git a/tools/perf/util/include/linux/compiler.h b/tools/perf/util/include/linux/compiler.h index 547628e97f3..96b919dae11 100644 --- a/tools/perf/util/include/linux/compiler.h +++ b/tools/perf/util/include/linux/compiler.h @@ -9,6 +9,13 @@ #define __attribute_const__ #endif -#define __used __attribute__((__unused__)) +#ifndef __maybe_unused +#define __maybe_unused __attribute__((unused)) +#endif +#define __packed __attribute__((__packed__)) + +#ifndef __force +#define __force +#endif #endif diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h index b6842c1d02a..d8c927c868e 100644 --- a/tools/perf/util/include/linux/kernel.h +++ b/tools/perf/util/include/linux/kernel.h @@ -8,8 +8,8 @@ #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) -#define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1) -#define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) +#define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1) +#define __PERF_ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) #ifndef offsetof #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) @@ -46,9 +46,22 @@ _min1 < _min2 ? _min1 : _min2; }) #endif +#ifndef roundup +#define roundup(x, y) ( \ +{ \ + const typeof(y) __y = y; \ + (((x) + (__y - 1)) / __y) * __y; \ +} \ +) +#endif + #ifndef BUG_ON +#ifdef NDEBUG +#define BUG_ON(cond) do { if (cond) {} } while (0) +#else #define BUG_ON(cond) assert(!(cond)) #endif +#endif /* * Both need more care to handle endianness diff --git a/tools/perf/util/include/linux/magic.h b/tools/perf/util/include/linux/magic.h new file mode 100644 index 00000000000..58b64ed4da1 --- /dev/null +++ b/tools/perf/util/include/linux/magic.h @@ -0,0 +1,12 @@ +#ifndef _PERF_LINUX_MAGIC_H_ +#define _PERF_LINUX_MAGIC_H_ + +#ifndef DEBUGFS_MAGIC +#define DEBUGFS_MAGIC 0x64626720 +#endif + +#ifndef SYSFS_MAGIC +#define SYSFS_MAGIC 0x62656572 +#endif + +#endif diff --git a/tools/perf/util/include/linux/rbtree.h b/tools/perf/util/include/linux/rbtree.h index 7a243a14303..2a030c5af3a 100644 --- a/tools/perf/util/include/linux/rbtree.h +++ b/tools/perf/util/include/linux/rbtree.h @@ -1 +1,2 @@ +#include <stdbool.h> #include "../../../../include/linux/rbtree.h" diff --git a/tools/perf/util/include/linux/string.h b/tools/perf/util/include/linux/string.h index 3b2f5900276..6f19c548ecc 100644 --- a/tools/perf/util/include/linux/string.h +++ b/tools/perf/util/include/linux/string.h @@ -1 +1,3 @@ #include <string.h> + +void *memdup(const void *src, size_t len); diff --git a/tools/perf/util/include/linux/types.h b/tools/perf/util/include/linux/types.h index 12de3b8112f..eb464786c08 100644 --- a/tools/perf/util/include/linux/types.h +++ b/tools/perf/util/include/linux/types.h @@ -3,6 +3,14 @@ #include <asm/types.h> +#ifndef __bitwise +#define __bitwise +#endif + +#ifndef __le32 +typedef __u32 __bitwise __le32; +#endif + #define DECLARE_BITMAP(name,bits) \ unsigned long name[BITS_TO_LONGS(bits)] diff --git a/tools/perf/util/intlist.c b/tools/perf/util/intlist.c index fd530dced9c..9d0740024ba 100644 --- a/tools/perf/util/intlist.c +++ b/tools/perf/util/intlist.c @@ -11,7 +11,7 @@ #include "intlist.h" -static struct rb_node *intlist__node_new(struct rblist *rblist __used, +static struct rb_node *intlist__node_new(struct rblist *rblist __maybe_unused, const void *entry) { int i = (int)((long)entry); @@ -31,7 +31,7 @@ static void int_node__delete(struct int_node *ilist) free(ilist); } -static void intlist__node_delete(struct rblist *rblist __used, +static void intlist__node_delete(struct rblist *rblist __maybe_unused, struct rb_node *rb_node) { struct int_node *node = container_of(rb_node, struct int_node, rb_node); @@ -52,9 +52,9 @@ int intlist__add(struct intlist *ilist, int i) return rblist__add_node(&ilist->rblist, (void *)((long)i)); } -void intlist__remove(struct intlist *ilist __used, struct int_node *node) +void intlist__remove(struct intlist *ilist, struct int_node *node) { - int_node__delete(node); + rblist__remove_node(&ilist->rblist, &node->rb_node); } struct int_node *intlist__find(struct intlist *ilist, int i) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index cc33486ad9e..ead5316b3f8 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -9,6 +9,7 @@ #include "map.h" #include "thread.h" #include "strlist.h" +#include "vdso.h" const char *map_type__name[MAP__NR_TYPES] = { [MAP__FUNCTION] = "Functions", @@ -23,7 +24,6 @@ static inline int is_anon_memory(const char *filename) static inline int is_no_dso_memory(const char *filename) { return !strcmp(filename, "[stack]") || - !strcmp(filename, "[vdso]") || !strcmp(filename, "[heap]"); } @@ -52,9 +52,10 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, if (self != NULL) { char newfilename[PATH_MAX]; struct dso *dso; - int anon, no_dso; + int anon, no_dso, vdso; anon = is_anon_memory(filename); + vdso = is_vdso_map(filename); no_dso = is_no_dso_memory(filename); if (anon) { @@ -62,7 +63,12 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, filename = newfilename; } - dso = __dsos__findnew(dsos__list, filename); + if (vdso) { + pgoff = 0; + dso = vdso__dso_findnew(dsos__list); + } else + dso = __dsos__findnew(dsos__list, filename); + if (dso == NULL) goto out_delete; @@ -86,6 +92,25 @@ out_delete: return NULL; } +/* + * Constructor variant for modules (where we know from /proc/modules where + * they are loaded) and for vmlinux, where only after we load all the + * symbols we'll know where it starts and ends. + */ +struct map *map__new2(u64 start, struct dso *dso, enum map_type type) +{ + struct map *map = calloc(1, (sizeof(*map) + + (dso->kernel ? sizeof(struct kmap) : 0))); + if (map != NULL) { + /* + * ->end will be filled after we load all the symbols + */ + map__init(map, type, start, 0, 0, dso); + } + + return map; +} + void map__delete(struct map *self) { free(self); @@ -137,6 +162,7 @@ int map__load(struct map *self, symbol_filter_t filter) pr_warning(", continuing without symbols\n"); return -1; } else if (nr == 0) { +#ifndef NO_LIBELF_SUPPORT const size_t len = strlen(name); const size_t real_len = len - sizeof(DSO__DELETED); @@ -149,7 +175,7 @@ int map__load(struct map *self, symbol_filter_t filter) pr_warning("no symbols found in %s, maybe install " "a debug package?\n", name); } - +#endif return -1; } /* @@ -217,15 +243,14 @@ size_t map__fprintf(struct map *self, FILE *fp) size_t map__fprintf_dsoname(struct map *map, FILE *fp) { - const char *dsoname; + const char *dsoname = "[unknown]"; if (map && map->dso && (map->dso->name || map->dso->long_name)) { if (symbol_conf.show_kernel_path && map->dso->long_name) dsoname = map->dso->long_name; else if (map->dso->name) dsoname = map->dso->name; - } else - dsoname = "[unknown]"; + } return fprintf(fp, "%s", dsoname); } @@ -242,14 +267,6 @@ u64 map__rip_2objdump(struct map *map, u64 rip) return addr; } -u64 map__objdump_2ip(struct map *map, u64 addr) -{ - u64 ip = map->dso->adjust_symbols ? - addr : - map->unmap_ip(map, addr); /* RIP -> IP */ - return ip; -} - void map_groups__init(struct map_groups *mg) { int i; diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 03a1e9b08b2..d2250fc97e2 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -96,7 +96,7 @@ static inline u64 map__unmap_ip(struct map *map, u64 ip) return ip + map->start - map->pgoff; } -static inline u64 identity__map_ip(struct map *map __used, u64 ip) +static inline u64 identity__map_ip(struct map *map __maybe_unused, u64 ip) { return ip; } @@ -104,7 +104,6 @@ static inline u64 identity__map_ip(struct map *map __used, u64 ip) /* rip/ip <-> addr suitable for passing to `objdump --start-address=` */ u64 map__rip_2objdump(struct map *map, u64 rip); -u64 map__objdump_2ip(struct map *map, u64 addr); struct symbol; @@ -115,6 +114,7 @@ void map__init(struct map *self, enum map_type type, struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, u64 pgoff, u32 pid, char *filename, enum map_type type); +struct map *map__new2(u64 start, struct dso *dso, enum map_type type); void map__delete(struct map *self); struct map *map__clone(struct map *self); int map__overlap(struct map *l, struct map *r); @@ -157,9 +157,12 @@ int machine__init(struct machine *self, const char *root_dir, pid_t pid); void machine__exit(struct machine *self); void machine__delete(struct machine *self); +struct perf_evsel; +struct perf_sample; int machine__resolve_callchain(struct machine *machine, + struct perf_evsel *evsel, struct thread *thread, - struct ip_callchain *chain, + struct perf_sample *sample, struct symbol **parent); int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name, u64 addr); diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c index 127d648cc54..28c18d1d52c 100644 --- a/tools/perf/util/parse-events-test.c +++ b/tools/perf/util/parse-events-test.c @@ -18,8 +18,7 @@ do { \ static int test__checkevent_tracepoint(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); @@ -48,8 +47,7 @@ static int test__checkevent_tracepoint_multi(struct perf_evlist *evlist) static int test__checkevent_raw(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); @@ -59,8 +57,7 @@ static int test__checkevent_raw(struct perf_evlist *evlist) static int test__checkevent_numeric(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type); @@ -70,8 +67,7 @@ static int test__checkevent_numeric(struct perf_evlist *evlist) static int test__checkevent_symbolic_name(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); @@ -82,8 +78,7 @@ static int test__checkevent_symbolic_name(struct perf_evlist *evlist) static int test__checkevent_symbolic_name_config(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); @@ -100,8 +95,7 @@ static int test__checkevent_symbolic_name_config(struct perf_evlist *evlist) static int test__checkevent_symbolic_alias(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type); @@ -112,8 +106,7 @@ static int test__checkevent_symbolic_alias(struct perf_evlist *evlist) static int test__checkevent_genhw(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->attr.type); @@ -123,8 +116,7 @@ static int test__checkevent_genhw(struct perf_evlist *evlist) static int test__checkevent_breakpoint(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type); @@ -138,8 +130,7 @@ static int test__checkevent_breakpoint(struct perf_evlist *evlist) static int test__checkevent_breakpoint_x(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type); @@ -152,8 +143,7 @@ static int test__checkevent_breakpoint_x(struct perf_evlist *evlist) static int test__checkevent_breakpoint_r(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", @@ -168,8 +158,7 @@ static int test__checkevent_breakpoint_r(struct perf_evlist *evlist) static int test__checkevent_breakpoint_w(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", @@ -184,8 +173,7 @@ static int test__checkevent_breakpoint_w(struct perf_evlist *evlist) static int test__checkevent_breakpoint_rw(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", @@ -200,8 +188,7 @@ static int test__checkevent_breakpoint_rw(struct perf_evlist *evlist) static int test__checkevent_tracepoint_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); @@ -232,8 +219,7 @@ test__checkevent_tracepoint_multi_modifier(struct perf_evlist *evlist) static int test__checkevent_raw_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); @@ -245,8 +231,7 @@ static int test__checkevent_raw_modifier(struct perf_evlist *evlist) static int test__checkevent_numeric_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); @@ -258,8 +243,7 @@ static int test__checkevent_numeric_modifier(struct perf_evlist *evlist) static int test__checkevent_symbolic_name_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); @@ -271,8 +255,7 @@ static int test__checkevent_symbolic_name_modifier(struct perf_evlist *evlist) static int test__checkevent_exclude_host_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); @@ -282,8 +265,7 @@ static int test__checkevent_exclude_host_modifier(struct perf_evlist *evlist) static int test__checkevent_exclude_guest_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); @@ -293,8 +275,7 @@ static int test__checkevent_exclude_guest_modifier(struct perf_evlist *evlist) static int test__checkevent_symbolic_alias_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); @@ -306,8 +287,7 @@ static int test__checkevent_symbolic_alias_modifier(struct perf_evlist *evlist) static int test__checkevent_genhw_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); @@ -319,75 +299,71 @@ static int test__checkevent_genhw_modifier(struct perf_evlist *evlist) static int test__checkevent_breakpoint_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0x0:rw:u")); + !strcmp(perf_evsel__name(evsel), "mem:0:u")); return test__checkevent_breakpoint(evlist); } static int test__checkevent_breakpoint_x_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0x0:x:k")); + !strcmp(perf_evsel__name(evsel), "mem:0:x:k")); return test__checkevent_breakpoint_x(evlist); } static int test__checkevent_breakpoint_r_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0x0:r:hp")); + !strcmp(perf_evsel__name(evsel), "mem:0:r:hp")); return test__checkevent_breakpoint_r(evlist); } static int test__checkevent_breakpoint_w_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0x0:w:up")); + !strcmp(perf_evsel__name(evsel), "mem:0:w:up")); return test__checkevent_breakpoint_w(evlist); } static int test__checkevent_breakpoint_rw_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0x0:rw:kp")); + !strcmp(perf_evsel__name(evsel), "mem:0:rw:kp")); return test__checkevent_breakpoint_rw(evlist); } @@ -395,8 +371,7 @@ static int test__checkevent_breakpoint_rw_modifier(struct perf_evlist *evlist) static int test__checkevent_pmu(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); @@ -410,12 +385,11 @@ static int test__checkevent_pmu(struct perf_evlist *evlist) static int test__checkevent_list(struct perf_evlist *evlist) { - struct perf_evsel *evsel; + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries); /* r1 */ - evsel = list_entry(evlist->entries.next, struct perf_evsel, node); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); TEST_ASSERT_VAL("wrong config1", 0 == evsel->attr.config1); @@ -426,7 +400,7 @@ static int test__checkevent_list(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); /* syscalls:sys_enter_open:k */ - evsel = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); TEST_ASSERT_VAL("wrong sample_type", PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type); @@ -437,7 +411,7 @@ static int test__checkevent_list(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); /* 1:1:hp */ - evsel = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type); TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); @@ -450,22 +424,21 @@ static int test__checkevent_list(struct perf_evlist *evlist) static int test__checkevent_pmu_name(struct perf_evlist *evlist) { - struct perf_evsel *evsel; + struct perf_evsel *evsel = perf_evlist__first(evlist); /* cpu/config=1,name=krava/u */ - evsel = list_entry(evlist->entries.next, struct perf_evsel, node); TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "krava")); /* cpu/config=2/u" */ - evsel = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); TEST_ASSERT_VAL("wrong config", 2 == evsel->attr.config); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "raw 0x2:u")); + !strcmp(perf_evsel__name(evsel), "cpu/config=2/u")); return 0; } @@ -513,6 +486,280 @@ static int test__checkterms_simple(struct list_head *terms) return 0; } +static int test__group1(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + + /* instructions:k */ + evsel = leader = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + /* cycles:upp */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + return 0; +} + +static int test__group2(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries); + + /* faults + :ku modifier */ + evsel = leader = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_SW_PAGE_FAULTS == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + /* cache-references + :u modifier */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CACHE_REFERENCES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + /* cycles:k */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + return 0; +} + +static int test__group3(struct perf_evlist *evlist __maybe_unused) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries); + + /* group1 syscalls:sys_enter_open:H */ + evsel = leader = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); + TEST_ASSERT_VAL("wrong sample_type", + PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type); + TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->attr.sample_period); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + TEST_ASSERT_VAL("wrong group name", + !strcmp(leader->group_name, "group1")); + + /* group1 cycles:kppp */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 3); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + + /* group2 cycles + G modifier */ + evsel = leader = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + TEST_ASSERT_VAL("wrong group name", + !strcmp(leader->group_name, "group2")); + + /* group2 1:3 + G modifier */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 3 == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + /* instructions:u */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + return 0; +} + +static int test__group4(struct perf_evlist *evlist __maybe_unused) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + + /* cycles:u + p */ + evsel = leader = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 1); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + /* instructions:kp + p */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + return 0; +} + +static int test__group5(struct perf_evlist *evlist __maybe_unused) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries); + + /* cycles + G */ + evsel = leader = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + /* instructions + G */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + /* cycles:G */ + evsel = leader = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + /* instructions:G */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + /* cycles */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + return 0; +} + struct test__event_st { const char *name; __u32 type; @@ -632,6 +879,26 @@ static struct test__event_st test__events[] = { .name = "mem:0:rw:kp", .check = test__checkevent_breakpoint_rw_modifier, }, + [28] = { + .name = "{instructions:k,cycles:upp}", + .check = test__group1, + }, + [29] = { + .name = "{faults:k,cache-references}:u,cycles:k", + .check = test__group2, + }, + [30] = { + .name = "group1{syscalls:sys_enter_open:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u", + .check = test__group3, + }, + [31] = { + .name = "{cycles:u,instructions:kp}:p", + .check = test__group4, + }, + [32] = { + .name = "{cycles,instructions}:G,{cycles:G,instructions:G},cycles", + .check = test__group5, + }, }; static struct test__event_st test__events_pmu[] = { @@ -658,9 +925,6 @@ static struct test__term test__terms[] = { }, }; -#define TEST__TERMS_CNT (sizeof(test__terms) / \ - sizeof(struct test__term)) - static int test_event(struct test__event_st *e) { struct perf_evlist *evlist; @@ -685,19 +949,19 @@ static int test_event(struct test__event_st *e) static int test_events(struct test__event_st *events, unsigned cnt) { - int ret = 0; + int ret1, ret2 = 0; unsigned i; for (i = 0; i < cnt; i++) { struct test__event_st *e = &events[i]; pr_debug("running test %d '%s'\n", i, e->name); - ret = test_event(e); - if (ret) - break; + ret1 = test_event(e); + if (ret1) + ret2 = ret1; } - return ret; + return ret2; } static int test_term(struct test__term *t) @@ -752,19 +1016,19 @@ static int test_pmu(void) ret = stat(path, &st); if (ret) - pr_debug("ommiting PMU cpu tests\n"); + pr_debug("omitting PMU cpu tests\n"); return !ret; } int parse_events__test(void) { - int ret; + int ret1, ret2 = 0; #define TEST_EVENTS(tests) \ do { \ - ret = test_events(tests, ARRAY_SIZE(tests)); \ - if (ret) \ - return ret; \ + ret1 = test_events(tests, ARRAY_SIZE(tests)); \ + if (!ret2) \ + ret2 = ret1; \ } while (0) TEST_EVENTS(test__events); @@ -772,5 +1036,9 @@ do { \ if (test_pmu()) TEST_EVENTS(test__events_pmu); - return test_terms(test__terms, ARRAY_SIZE(test__terms)); + ret1 = test_terms(test__terms, ARRAY_SIZE(test__terms)); + if (!ret2) + ret2 = ret1; + + return ret2; } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 74a5af4d33e..aed38e4b9df 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -239,8 +239,11 @@ const char *event_type(int type) return "unknown"; } -static int add_event(struct list_head **_list, int *idx, - struct perf_event_attr *attr, char *name) + + +static int __add_event(struct list_head **_list, int *idx, + struct perf_event_attr *attr, + char *name, struct cpu_map *cpus) { struct perf_evsel *evsel; struct list_head *list = *_list; @@ -260,6 +263,7 @@ static int add_event(struct list_head **_list, int *idx, return -ENOMEM; } + evsel->cpus = cpus; if (name) evsel->name = strdup(name); list_add_tail(&evsel->node, list); @@ -267,6 +271,12 @@ static int add_event(struct list_head **_list, int *idx, return 0; } +static int add_event(struct list_head **_list, int *idx, + struct perf_event_attr *attr, char *name) +{ + return __add_event(_list, idx, attr, name, NULL); +} + static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) { int i, j; @@ -308,7 +318,7 @@ int parse_events_add_cache(struct list_head **list, int *idx, for (i = 0; (i < 2) && (op_result[i]); i++) { char *str = op_result[i]; - snprintf(name + n, MAX_NAME_LEN - n, "-%s\n", str); + n += snprintf(name + n, MAX_NAME_LEN - n, "-%s", str); if (cache_op == -1) { cache_op = parse_aliases(str, perf_evsel__hw_cache_op, @@ -346,42 +356,28 @@ int parse_events_add_cache(struct list_head **list, int *idx, return add_event(list, idx, &attr, name); } -static int add_tracepoint(struct list_head **list, int *idx, +static int add_tracepoint(struct list_head **listp, int *idx, char *sys_name, char *evt_name) { - struct perf_event_attr attr; - char name[MAX_NAME_LEN]; - char evt_path[MAXPATHLEN]; - char id_buf[4]; - u64 id; - int fd; - - snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path, - sys_name, evt_name); - - fd = open(evt_path, O_RDONLY); - if (fd < 0) - return -1; + struct perf_evsel *evsel; + struct list_head *list = *listp; - if (read(fd, id_buf, sizeof(id_buf)) < 0) { - close(fd); - return -1; + if (!list) { + list = malloc(sizeof(*list)); + if (!list) + return -ENOMEM; + INIT_LIST_HEAD(list); } - close(fd); - id = atoll(id_buf); - - memset(&attr, 0, sizeof(attr)); - attr.config = id; - attr.type = PERF_TYPE_TRACEPOINT; - attr.sample_type |= PERF_SAMPLE_RAW; - attr.sample_type |= PERF_SAMPLE_TIME; - attr.sample_type |= PERF_SAMPLE_CPU; - attr.sample_type |= PERF_SAMPLE_PERIOD; - attr.sample_period = 1; + evsel = perf_evsel__newtp(sys_name, evt_name, (*idx)++); + if (!evsel) { + free(list); + return -ENOMEM; + } - snprintf(name, MAX_NAME_LEN, "%s:%s", sys_name, evt_name); - return add_event(list, idx, &attr, name); + list_add_tail(&evsel->node, list); + *listp = list; + return 0; } static int add_tracepoint_multi(struct list_head **list, int *idx, @@ -551,7 +547,7 @@ static int config_attr(struct perf_event_attr *attr, } int parse_events_add_numeric(struct list_head **list, int *idx, - unsigned long type, unsigned long config, + u32 type, u64 config, struct list_head *head_config) { struct perf_event_attr attr; @@ -607,8 +603,23 @@ int parse_events_add_pmu(struct list_head **list, int *idx, if (perf_pmu__config(pmu, &attr, head_config)) return -EINVAL; - return add_event(list, idx, &attr, - pmu_event_name(head_config)); + return __add_event(list, idx, &attr, pmu_event_name(head_config), + pmu->cpus); +} + +int parse_events__modifier_group(struct list_head *list, + char *event_mod) +{ + return parse_events__modifier_event(list, event_mod, true); +} + +void parse_events__set_leader(char *name, struct list_head *list) +{ + struct perf_evsel *leader; + + __perf_evlist__set_leader(list); + leader = list_entry(list->next, struct perf_evsel, node); + leader->group_name = name ? strdup(name) : NULL; } void parse_events_update_lists(struct list_head *list_event, @@ -616,21 +627,45 @@ void parse_events_update_lists(struct list_head *list_event, { /* * Called for single event definition. Update the - * 'all event' list, and reinit the 'signle event' + * 'all event' list, and reinit the 'single event' * list, for next event definition. */ list_splice_tail(list_event, list_all); free(list_event); } -int parse_events_modifier(struct list_head *list, char *str) +struct event_modifier { + int eu; + int ek; + int eh; + int eH; + int eG; + int precise; + int exclude_GH; +}; + +static int get_event_modifier(struct event_modifier *mod, char *str, + struct perf_evsel *evsel) { - struct perf_evsel *evsel; - int exclude = 0, exclude_GH = 0; - int eu = 0, ek = 0, eh = 0, eH = 0, eG = 0, precise = 0; + int eu = evsel ? evsel->attr.exclude_user : 0; + int ek = evsel ? evsel->attr.exclude_kernel : 0; + int eh = evsel ? evsel->attr.exclude_hv : 0; + int eH = evsel ? evsel->attr.exclude_host : 0; + int eG = evsel ? evsel->attr.exclude_guest : 0; + int precise = evsel ? evsel->attr.precise_ip : 0; - if (str == NULL) - return 0; + int exclude = eu | ek | eh; + int exclude_GH = evsel ? evsel->exclude_GH : 0; + + /* + * We are here for group and 'GH' was not set as event + * modifier and whatever event/group modifier override + * default 'GH' setup. + */ + if (evsel && !exclude_GH) + eH = eG = 0; + + memset(mod, 0, sizeof(*mod)); while (*str) { if (*str == 'u') { @@ -674,13 +709,51 @@ int parse_events_modifier(struct list_head *list, char *str) if (precise > 3) return -EINVAL; + mod->eu = eu; + mod->ek = ek; + mod->eh = eh; + mod->eH = eH; + mod->eG = eG; + mod->precise = precise; + mod->exclude_GH = exclude_GH; + return 0; +} + +int parse_events__modifier_event(struct list_head *list, char *str, bool add) +{ + struct perf_evsel *evsel; + struct event_modifier mod; + + if (str == NULL) + return 0; + + if (!add && get_event_modifier(&mod, str, NULL)) + return -EINVAL; + list_for_each_entry(evsel, list, node) { - evsel->attr.exclude_user = eu; - evsel->attr.exclude_kernel = ek; - evsel->attr.exclude_hv = eh; - evsel->attr.precise_ip = precise; - evsel->attr.exclude_host = eH; - evsel->attr.exclude_guest = eG; + + if (add && get_event_modifier(&mod, str, evsel)) + return -EINVAL; + + evsel->attr.exclude_user = mod.eu; + evsel->attr.exclude_kernel = mod.ek; + evsel->attr.exclude_hv = mod.eh; + evsel->attr.precise_ip = mod.precise; + evsel->attr.exclude_host = mod.eH; + evsel->attr.exclude_guest = mod.eG; + evsel->exclude_GH = mod.exclude_GH; + } + + return 0; +} + +int parse_events_name(struct list_head *list, char *name) +{ + struct perf_evsel *evsel; + + list_for_each_entry(evsel, list, node) { + if (!evsel->name) + evsel->name = strdup(name); } return 0; @@ -730,7 +803,8 @@ int parse_events_terms(struct list_head *terms, const char *str) return ret; } -int parse_events(struct perf_evlist *evlist, const char *str, int unset __used) +int parse_events(struct perf_evlist *evlist, const char *str, + int unset __maybe_unused) { struct parse_events_data__events data = { .list = LIST_HEAD_INIT(data.list), @@ -756,20 +830,20 @@ int parse_events(struct perf_evlist *evlist, const char *str, int unset __used) } int parse_events_option(const struct option *opt, const char *str, - int unset __used) + int unset __maybe_unused) { struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; return parse_events(evlist, str, unset); } int parse_filter(const struct option *opt, const char *str, - int unset __used) + int unset __maybe_unused) { struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; struct perf_evsel *last = NULL; if (evlist->nr_entries > 0) - last = list_entry(evlist->entries.prev, struct perf_evsel, node); + last = perf_evlist__last(evlist); if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) { fprintf(stderr, @@ -799,7 +873,8 @@ static const char * const event_type_descriptors[] = { * Print the events from <debugfs_mount_point>/tracing/events */ -void print_tracepoint_events(const char *subsys_glob, const char *event_glob) +void print_tracepoint_events(const char *subsys_glob, const char *event_glob, + bool name_only) { DIR *sys_dir, *evt_dir; struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; @@ -829,6 +904,11 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob) !strglobmatch(evt_dirent.d_name, event_glob)) continue; + if (name_only) { + printf("%s:%s ", sys_dirent.d_name, evt_dirent.d_name); + continue; + } + snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent.d_name, evt_dirent.d_name); printf(" %-50s [%s]\n", evt_path, @@ -906,7 +986,7 @@ void print_events_type(u8 type) __print_events_type(type, event_symbols_hw, PERF_COUNT_HW_MAX); } -int print_hwcache_events(const char *event_glob) +int print_hwcache_events(const char *event_glob, bool name_only) { unsigned int type, op, i, printed = 0; char name[64]; @@ -923,8 +1003,11 @@ int print_hwcache_events(const char *event_glob) if (event_glob != NULL && !strglobmatch(name, event_glob)) continue; - printf(" %-50s [%s]\n", name, - event_type_descriptors[PERF_TYPE_HW_CACHE]); + if (name_only) + printf("%s ", name); + else + printf(" %-50s [%s]\n", name, + event_type_descriptors[PERF_TYPE_HW_CACHE]); ++printed; } } @@ -934,7 +1017,8 @@ int print_hwcache_events(const char *event_glob) } static void print_symbol_events(const char *event_glob, unsigned type, - struct event_symbol *syms, unsigned max) + struct event_symbol *syms, unsigned max, + bool name_only) { unsigned i, printed = 0; char name[MAX_NAME_LEN]; @@ -946,6 +1030,11 @@ static void print_symbol_events(const char *event_glob, unsigned type, (syms->alias && strglobmatch(syms->alias, event_glob)))) continue; + if (name_only) { + printf("%s ", syms->symbol); + continue; + } + if (strlen(syms->alias)) snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); else @@ -963,39 +1052,42 @@ static void print_symbol_events(const char *event_glob, unsigned type, /* * Print the help text for the event symbols: */ -void print_events(const char *event_glob) +void print_events(const char *event_glob, bool name_only) { - - printf("\n"); - printf("List of pre-defined events (to be used in -e):\n"); + if (!name_only) { + printf("\n"); + printf("List of pre-defined events (to be used in -e):\n"); + } print_symbol_events(event_glob, PERF_TYPE_HARDWARE, - event_symbols_hw, PERF_COUNT_HW_MAX); + event_symbols_hw, PERF_COUNT_HW_MAX, name_only); print_symbol_events(event_glob, PERF_TYPE_SOFTWARE, - event_symbols_sw, PERF_COUNT_SW_MAX); + event_symbols_sw, PERF_COUNT_SW_MAX, name_only); - print_hwcache_events(event_glob); + print_hwcache_events(event_glob, name_only); if (event_glob != NULL) return; - printf("\n"); - printf(" %-50s [%s]\n", - "rNNN", - event_type_descriptors[PERF_TYPE_RAW]); - printf(" %-50s [%s]\n", - "cpu/t1=v1[,t2=v2,t3 ...]/modifier", - event_type_descriptors[PERF_TYPE_RAW]); - printf(" (see 'perf list --help' on how to encode it)\n"); - printf("\n"); - - printf(" %-50s [%s]\n", - "mem:<addr>[:access]", + if (!name_only) { + printf("\n"); + printf(" %-50s [%s]\n", + "rNNN", + event_type_descriptors[PERF_TYPE_RAW]); + printf(" %-50s [%s]\n", + "cpu/t1=v1[,t2=v2,t3 ...]/modifier", + event_type_descriptors[PERF_TYPE_RAW]); + printf(" (see 'perf list --help' on how to encode it)\n"); + printf("\n"); + + printf(" %-50s [%s]\n", + "mem:<addr>[:access]", event_type_descriptors[PERF_TYPE_BREAKPOINT]); - printf("\n"); + printf("\n"); + } - print_tracepoint_events(NULL, NULL); + print_tracepoint_events(NULL, NULL, name_only); } int parse_events__is_hardcoded_term(struct parse_events__term *term) @@ -1005,7 +1097,7 @@ int parse_events__is_hardcoded_term(struct parse_events__term *term) static int new_term(struct parse_events__term **_term, int type_val, int type_term, char *config, - char *str, long num) + char *str, u64 num) { struct parse_events__term *term; @@ -1034,7 +1126,7 @@ static int new_term(struct parse_events__term **_term, int type_val, } int parse_events__term_num(struct parse_events__term **term, - int type_term, char *config, long num) + int type_term, char *config, u64 num) { return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term, config, NULL, num); diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index ee9c218a193..c356e443448 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -55,7 +55,7 @@ struct parse_events__term { char *config; union { char *str; - long num; + u64 num; } val; int type_val; int type_term; @@ -73,17 +73,19 @@ struct parse_events_data__terms { int parse_events__is_hardcoded_term(struct parse_events__term *term); int parse_events__term_num(struct parse_events__term **_term, - int type_term, char *config, long num); + int type_term, char *config, u64 num); int parse_events__term_str(struct parse_events__term **_term, int type_term, char *config, char *str); int parse_events__term_clone(struct parse_events__term **new, struct parse_events__term *term); void parse_events__free_terms(struct list_head *terms); -int parse_events_modifier(struct list_head *list, char *str); +int parse_events__modifier_event(struct list_head *list, char *str, bool add); +int parse_events__modifier_group(struct list_head *list, char *event_mod); +int parse_events_name(struct list_head *list, char *name); int parse_events_add_tracepoint(struct list_head **list, int *idx, char *sys, char *event); int parse_events_add_numeric(struct list_head **list, int *idx, - unsigned long type, unsigned long config, + u32 type, u64 config, struct list_head *head_config); int parse_events_add_cache(struct list_head **list, int *idx, char *type, char *op_result1, char *op_result2); @@ -91,15 +93,17 @@ int parse_events_add_breakpoint(struct list_head **list, int *idx, void *ptr, char *type); int parse_events_add_pmu(struct list_head **list, int *idx, char *pmu , struct list_head *head_config); +void parse_events__set_leader(char *name, struct list_head *list); void parse_events_update_lists(struct list_head *list_event, struct list_head *list_all); void parse_events_error(void *data, void *scanner, char const *msg); int parse_events__test(void); -void print_events(const char *event_glob); +void print_events(const char *event_glob, bool name_only); void print_events_type(u8 type); -void print_tracepoint_events(const char *subsys_glob, const char *event_glob); -int print_hwcache_events(const char *event_glob); +void print_tracepoint_events(const char *subsys_glob, const char *event_glob, + bool name_only); +int print_hwcache_events(const char *event_glob, bool name_only); extern int is_valid_tracepoint(const char *event_string); extern int valid_debugfs_mount(const char *debugfs); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 384ca74c6b2..c87efc12579 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -15,10 +15,10 @@ YYSTYPE *parse_events_get_lval(yyscan_t yyscanner); static int __value(YYSTYPE *yylval, char *str, int base, int token) { - long num; + u64 num; errno = 0; - num = strtoul(str, NULL, base); + num = strtoull(str, NULL, base); if (errno) return PE_ERROR; @@ -70,6 +70,12 @@ static int term(yyscan_t scanner, int type) %} %x mem +%s config +%x event + +group [^,{}/]*[{][^}]*[}][^,{}/]* +event_pmu [^,{}/]+[/][^/]*[/][^,{}/]* +event [^,{}/]+ num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ @@ -84,7 +90,13 @@ modifier_bp [rwx]{1,3} { int start_token; - start_token = (int) parse_events_get_extra(yyscanner); + start_token = parse_events_get_extra(yyscanner); + + if (start_token == PE_START_TERMS) + BEGIN(config); + else if (start_token == PE_START_EVENTS) + BEGIN(event); + if (start_token) { parse_events_set_extra(NULL, yyscanner); return start_token; @@ -92,6 +104,26 @@ modifier_bp [rwx]{1,3} } %} +<event>{ + +{group} { + BEGIN(INITIAL); yyless(0); + } + +{event_pmu} | +{event} { + str(yyscanner, PE_EVENT_NAME); + BEGIN(INITIAL); yyless(0); + return PE_EVENT_NAME; + } + +. | +<<EOF>> { + BEGIN(INITIAL); yyless(0); + } + +} + cpu-cycles|cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); } stalled-cycles-frontend|idle-cycles-frontend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } stalled-cycles-backend|idle-cycles-backend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } @@ -127,18 +159,16 @@ speculative-read|speculative-load | refs|Reference|ops|access | misses|miss { return str(yyscanner, PE_NAME_CACHE_OP_RESULT); } - /* - * These are event config hardcoded term names to be specified - * within xxx/.../ syntax. So far we dont clash with other names, - * so we can put them here directly. In case the we have a conflict - * in future, this needs to go into '//' condition block. - */ +<config>{ config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); } config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); } name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); } period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); } branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); } +, { return ','; } +"/" { BEGIN(INITIAL); return '/'; } +} mem: { BEGIN(mem); return PE_PREFIX_MEM; } r{num_raw_hex} { return raw(yyscanner); } @@ -147,10 +177,12 @@ r{num_raw_hex} { return raw(yyscanner); } {modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); } {name} { return str(yyscanner, PE_NAME); } -"/" { return '/'; } +"/" { BEGIN(config); return '/'; } - { return '-'; } -, { return ','; } +, { BEGIN(event); return ','; } : { return ':'; } +"{" { BEGIN(event); return '{'; } +"}" { return '}'; } = { return '='; } \n { } @@ -175,7 +207,7 @@ r{num_raw_hex} { return raw(yyscanner); } %% -int parse_events_wrap(void *scanner __used) +int parse_events_wrap(void *scanner __maybe_unused) { return 1; } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 2bc5fbff2b5..cd88209e3c5 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -27,10 +27,11 @@ do { \ %token PE_START_EVENTS PE_START_TERMS %token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_RAW PE_TERM +%token PE_EVENT_NAME %token PE_NAME %token PE_MODIFIER_EVENT PE_MODIFIER_BP %token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT -%token PE_PREFIX_MEM PE_PREFIX_RAW +%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP %token PE_ERROR %type <num> PE_VALUE %type <num> PE_VALUE_SYM_HW @@ -42,6 +43,7 @@ do { \ %type <str> PE_NAME_CACHE_OP_RESULT %type <str> PE_MODIFIER_EVENT %type <str> PE_MODIFIER_BP +%type <str> PE_EVENT_NAME %type <num> value_sym %type <head> event_config %type <term> event_term @@ -53,44 +55,125 @@ do { \ %type <head> event_legacy_numeric %type <head> event_legacy_raw %type <head> event_def +%type <head> event_mod +%type <head> event_name +%type <head> event +%type <head> events +%type <head> group_def +%type <head> group +%type <head> groups %union { char *str; - unsigned long num; + u64 num; struct list_head *head; struct parse_events__term *term; } %% start: -PE_START_EVENTS events +PE_START_EVENTS start_events | -PE_START_TERMS terms +PE_START_TERMS start_terms + +start_events: groups +{ + struct parse_events_data__events *data = _data; + + parse_events_update_lists($1, &data->list); +} + +groups: +groups ',' group +{ + struct list_head *list = $1; + struct list_head *group = $3; + + parse_events_update_lists(group, list); + $$ = list; +} +| +groups ',' event +{ + struct list_head *list = $1; + struct list_head *event = $3; + + parse_events_update_lists(event, list); + $$ = list; +} +| +group +| +event + +group: +group_def ':' PE_MODIFIER_EVENT +{ + struct list_head *list = $1; + + ABORT_ON(parse_events__modifier_group(list, $3)); + $$ = list; +} +| +group_def + +group_def: +PE_NAME '{' events '}' +{ + struct list_head *list = $3; + + parse_events__set_leader($1, list); + $$ = list; +} +| +'{' events '}' +{ + struct list_head *list = $2; + + parse_events__set_leader(NULL, list); + $$ = list; +} events: -events ',' event | event +events ',' event +{ + struct list_head *event = $3; + struct list_head *list = $1; -event: -event_def PE_MODIFIER_EVENT + parse_events_update_lists(event, list); + $$ = list; +} +| +event + +event: event_mod + +event_mod: +event_name PE_MODIFIER_EVENT { - struct parse_events_data__events *data = _data; + struct list_head *list = $1; /* * Apply modifier on all events added by single event definition * (there could be more events added for multiple tracepoint * definitions via '*?'. */ - ABORT_ON(parse_events_modifier($1, $2)); - parse_events_update_lists($1, &data->list); + ABORT_ON(parse_events__modifier_event(list, $2, false)); + $$ = list; } | -event_def -{ - struct parse_events_data__events *data = _data; +event_name - parse_events_update_lists($1, &data->list); +event_name: +PE_EVENT_NAME event_def +{ + ABORT_ON(parse_events_name($2, $1)); + free($1); + $$ = $2; } +| +event_def event_def: event_pmu | event_legacy_symbol | @@ -207,7 +290,7 @@ PE_VALUE ':' PE_VALUE struct parse_events_data__events *data = _data; struct list_head *list = NULL; - ABORT_ON(parse_events_add_numeric(&list, &data->idx, $1, $3, NULL)); + ABORT_ON(parse_events_add_numeric(&list, &data->idx, (u32)$1, $3, NULL)); $$ = list; } @@ -222,7 +305,7 @@ PE_RAW $$ = list; } -terms: event_config +start_terms: event_config { struct parse_events_data__terms *data = _data; data->terms = $1; @@ -282,7 +365,7 @@ PE_TERM '=' PE_NAME { struct parse_events__term *term; - ABORT_ON(parse_events__term_str(&term, $1, NULL, $3)); + ABORT_ON(parse_events__term_str(&term, (int)$1, NULL, $3)); $$ = term; } | @@ -290,7 +373,7 @@ PE_TERM '=' PE_VALUE { struct parse_events__term *term; - ABORT_ON(parse_events__term_num(&term, $1, NULL, $3)); + ABORT_ON(parse_events__term_num(&term, (int)$1, NULL, $3)); $$ = term; } | @@ -298,7 +381,7 @@ PE_TERM { struct parse_events__term *term; - ABORT_ON(parse_events__term_num(&term, $1, NULL, 1)); + ABORT_ON(parse_events__term_num(&term, (int)$1, NULL, 1)); $$ = term; } @@ -308,7 +391,7 @@ sep_slash_dc: '/' | ':' | %% -void parse_events_error(void *data __used, void *scanner __used, - char const *msg __used) +void parse_events_error(void *data __maybe_unused, void *scanner __maybe_unused, + char const *msg __maybe_unused) { } diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 594f8fad5ec..443fc116512 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -557,7 +557,8 @@ int parse_options_usage(const char * const *usagestr, } -int parse_opt_verbosity_cb(const struct option *opt, const char *arg __used, +int parse_opt_verbosity_cb(const struct option *opt, + const char *arg __maybe_unused, int unset) { int *target = opt->value; diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h new file mode 100644 index 00000000000..316dbe7f86e --- /dev/null +++ b/tools/perf/util/perf_regs.h @@ -0,0 +1,14 @@ +#ifndef __PERF_REGS_H +#define __PERF_REGS_H + +#ifndef NO_PERF_REGS +#include <perf_regs.h> +#else +#define PERF_REGS_MASK 0 + +static inline const char *perf_reg_name(int id __maybe_unused) +{ + return NULL; +} +#endif /* NO_PERF_REGS */ +#endif /* __PERF_REGS_H */ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 67715a42cd6..8a2229da594 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -9,6 +9,9 @@ #include "util.h" #include "pmu.h" #include "parse-events.h" +#include "cpumap.h" + +#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" int perf_pmu_parse(struct list_head *list, char *name); extern FILE *perf_pmu_in; @@ -69,7 +72,7 @@ static int pmu_format(char *name, struct list_head *format) return -1; snprintf(path, PATH_MAX, - "%s/bus/event_source/devices/%s/format", sysfs, name); + "%s" EVENT_SOURCE_DEVICE_PATH "%s/format", sysfs, name); if (stat(path, &st) < 0) return 0; /* no error if format does not exist */ @@ -206,7 +209,7 @@ static int pmu_type(char *name, __u32 *type) return -1; snprintf(path, PATH_MAX, - "%s/bus/event_source/devices/%s/type", sysfs, name); + "%s" EVENT_SOURCE_DEVICE_PATH "%s/type", sysfs, name); if (stat(path, &st) < 0) return -1; @@ -222,6 +225,62 @@ static int pmu_type(char *name, __u32 *type) return ret; } +/* Add all pmus in sysfs to pmu list: */ +static void pmu_read_sysfs(void) +{ + char path[PATH_MAX]; + const char *sysfs; + DIR *dir; + struct dirent *dent; + + sysfs = sysfs_find_mountpoint(); + if (!sysfs) + return; + + snprintf(path, PATH_MAX, + "%s" EVENT_SOURCE_DEVICE_PATH, sysfs); + + dir = opendir(path); + if (!dir) + return; + + while ((dent = readdir(dir))) { + if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) + continue; + /* add to static LIST_HEAD(pmus): */ + perf_pmu__find(dent->d_name); + } + + closedir(dir); +} + +static struct cpu_map *pmu_cpumask(char *name) +{ + struct stat st; + char path[PATH_MAX]; + const char *sysfs; + FILE *file; + struct cpu_map *cpus; + + sysfs = sysfs_find_mountpoint(); + if (!sysfs) + return NULL; + + snprintf(path, PATH_MAX, + "%s/bus/event_source/devices/%s/cpumask", sysfs, name); + + if (stat(path, &st) < 0) + return NULL; + + file = fopen(path, "r"); + if (!file) + return NULL; + + cpus = cpu_map__read(file); + fclose(file); + return cpus; +} + static struct perf_pmu *pmu_lookup(char *name) { struct perf_pmu *pmu; @@ -244,6 +303,8 @@ static struct perf_pmu *pmu_lookup(char *name) if (!pmu) return NULL; + pmu->cpus = pmu_cpumask(name); + pmu_aliases(name, &aliases); INIT_LIST_HEAD(&pmu->format); @@ -267,6 +328,21 @@ static struct perf_pmu *pmu_find(char *name) return NULL; } +struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu) +{ + /* + * pmu iterator: If pmu is NULL, we start at the begin, + * otherwise return the next pmu. Returns NULL on end. + */ + if (!pmu) { + pmu_read_sysfs(); + pmu = list_prepare_entry(pmu, &pmus, list); + } + list_for_each_entry_continue(pmu, &pmus, list) + return pmu; + return NULL; +} + struct perf_pmu *perf_pmu__find(char *name) { struct perf_pmu *pmu; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 535f2c5258a..53c7794fc4b 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -28,6 +28,7 @@ struct perf_pmu__alias { struct perf_pmu { char *name; __u32 type; + struct cpu_map *cpus; struct list_head format; struct list_head aliases; struct list_head list; @@ -46,5 +47,7 @@ int perf_pmu__new_format(struct list_head *list, char *name, int config, unsigned long *bits); void perf_pmu__set_format(unsigned long *bits, long from, long to); +struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); + int perf_pmu__test(void); #endif /* __PMU_H */ diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y index 20ea77e9316..ec898047ebb 100644 --- a/tools/perf/util/pmu.y +++ b/tools/perf/util/pmu.y @@ -86,8 +86,8 @@ PP_VALUE %% -void perf_pmu_error(struct list_head *list __used, - char *name __used, - char const *msg __used) +void perf_pmu_error(struct list_head *list __maybe_unused, + char *name __maybe_unused, + char const *msg __maybe_unused) { } diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 0dda25d82d0..49a256e6e0a 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -41,7 +41,7 @@ #include "symbol.h" #include "thread.h" #include "debugfs.h" -#include "trace-event.h" /* For __unused */ +#include "trace-event.h" /* For __maybe_unused */ #include "probe-event.h" #include "probe-finder.h" #include "session.h" @@ -647,8 +647,8 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, } static int try_to_find_probe_trace_events(struct perf_probe_event *pev, - struct probe_trace_event **tevs __unused, - int max_tevs __unused, const char *target) + struct probe_trace_event **tevs __maybe_unused, + int max_tevs __maybe_unused, const char *target) { if (perf_probe_event_need_dwarf(pev)) { pr_warning("Debuginfo-analysis is not supported.\n"); @@ -661,17 +661,18 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, return 0; } -int show_line_range(struct line_range *lr __unused, const char *module __unused) +int show_line_range(struct line_range *lr __maybe_unused, + const char *module __maybe_unused) { pr_warning("Debuginfo-analysis is not supported.\n"); return -ENOSYS; } -int show_available_vars(struct perf_probe_event *pevs __unused, - int npevs __unused, int max_vls __unused, - const char *module __unused, - struct strfilter *filter __unused, - bool externs __unused) +int show_available_vars(struct perf_probe_event *pevs __maybe_unused, + int npevs __maybe_unused, int max_vls __maybe_unused, + const char *module __maybe_unused, + struct strfilter *filter __maybe_unused, + bool externs __maybe_unused) { pr_warning("Debuginfo-analysis is not supported.\n"); return -ENOSYS; @@ -1099,6 +1100,7 @@ static int parse_probe_trace_command(const char *cmd, struct probe_trace_point *tp = &tev->point; char pr; char *p; + char *argv0_str = NULL, *fmt, *fmt1_str, *fmt2_str, *fmt3_str; int ret, i, argc; char **argv; @@ -1115,14 +1117,27 @@ static int parse_probe_trace_command(const char *cmd, } /* Scan event and group name. */ - ret = sscanf(argv[0], "%c:%a[^/ \t]/%a[^ \t]", - &pr, (float *)(void *)&tev->group, - (float *)(void *)&tev->event); - if (ret != 3) { + argv0_str = strdup(argv[0]); + if (argv0_str == NULL) { + ret = -ENOMEM; + goto out; + } + fmt1_str = strtok_r(argv0_str, ":", &fmt); + fmt2_str = strtok_r(NULL, "/", &fmt); + fmt3_str = strtok_r(NULL, " \t", &fmt); + if (fmt1_str == NULL || strlen(fmt1_str) != 1 || fmt2_str == NULL + || fmt3_str == NULL) { semantic_error("Failed to parse event name: %s\n", argv[0]); ret = -EINVAL; goto out; } + pr = fmt1_str[0]; + tev->group = strdup(fmt2_str); + tev->event = strdup(fmt3_str); + if (tev->group == NULL || tev->event == NULL) { + ret = -ENOMEM; + goto out; + } pr_debug("Group:%s Event:%s probe:%c\n", tev->group, tev->event, pr); tp->retprobe = (pr == 'r'); @@ -1134,10 +1149,17 @@ static int parse_probe_trace_command(const char *cmd, p++; } else p = argv[1]; - ret = sscanf(p, "%a[^+]+%lu", (float *)(void *)&tp->symbol, - &tp->offset); - if (ret == 1) + fmt1_str = strtok_r(p, "+", &fmt); + tp->symbol = strdup(fmt1_str); + if (tp->symbol == NULL) { + ret = -ENOMEM; + goto out; + } + fmt2_str = strtok_r(NULL, "", &fmt); + if (fmt2_str == NULL) tp->offset = 0; + else + tp->offset = strtoul(fmt2_str, NULL, 10); tev->nargs = argc - 2; tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs); @@ -1161,6 +1183,7 @@ static int parse_probe_trace_command(const char *cmd, } ret = 0; out: + free(argv0_str); argv_free(argv); return ret; } @@ -2183,7 +2206,7 @@ static struct strfilter *available_func_filter; * If a symbol corresponds to a function with global binding and * matches filter return 0. For all others return 1. */ -static int filter_available_functions(struct map *map __unused, +static int filter_available_functions(struct map *map __maybe_unused, struct symbol *sym) { if (sym->binding == STB_GLOBAL && @@ -2307,10 +2330,17 @@ static int convert_name_to_addr(struct perf_probe_event *pev, const char *exec) function = NULL; } if (!pev->group) { - char *ptr1, *ptr2; + char *ptr1, *ptr2, *exec_copy; pev->group = zalloc(sizeof(char *) * 64); - ptr1 = strdup(basename(exec)); + exec_copy = strdup(exec); + if (!exec_copy) { + ret = -ENOMEM; + pr_warning("Failed to copy exec string.\n"); + goto out; + } + + ptr1 = strdup(basename(exec_copy)); if (ptr1) { ptr2 = strpbrk(ptr1, "-._"); if (ptr2) @@ -2319,6 +2349,7 @@ static int convert_name_to_addr(struct perf_probe_event *pev, const char *exec) ptr1); free(ptr1); } + free(exec_copy); } free(pp->function); pp->function = zalloc(sizeof(char *) * MAX_PROBE_ARGS); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index d448984ed78..1daf5c14e75 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -207,7 +207,7 @@ static int debuginfo__init_online_kernel_dwarf(struct debuginfo *self, #else /* With older elfutils, this just support kernel module... */ static int debuginfo__init_online_kernel_dwarf(struct debuginfo *self, - Dwarf_Addr addr __used) + Dwarf_Addr addr __maybe_unused) { const char *path = kernel_get_module_path("kernel"); @@ -525,8 +525,10 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, return -ENOENT; } /* Verify it is a data structure */ - if (dwarf_tag(&type) != DW_TAG_structure_type) { - pr_warning("%s is not a data structure.\n", varname); + tag = dwarf_tag(&type); + if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) { + pr_warning("%s is not a data structure nor an union.\n", + varname); return -EINVAL; } @@ -539,8 +541,9 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, *ref_ptr = ref; } else { /* Verify it is a data structure */ - if (tag != DW_TAG_structure_type) { - pr_warning("%s is not a data structure.\n", varname); + if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) { + pr_warning("%s is not a data structure nor an union.\n", + varname); return -EINVAL; } if (field->name[0] == '[') { @@ -567,10 +570,15 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, } /* Get the offset of the field */ - ret = die_get_data_member_location(die_mem, &offs); - if (ret < 0) { - pr_warning("Failed to get the offset of %s.\n", field->name); - return ret; + if (tag == DW_TAG_union_type) { + offs = 0; + } else { + ret = die_get_data_member_location(die_mem, &offs); + if (ret < 0) { + pr_warning("Failed to get the offset of %s.\n", + field->name); + return ret; + } } ref->offset += (long)offs; @@ -1419,7 +1427,7 @@ static int line_range_add_line(const char *src, unsigned int lineno, } static int line_range_walk_cb(const char *fname, int lineno, - Dwarf_Addr addr __used, + Dwarf_Addr addr __maybe_unused, void *data) { struct line_finder *lf = data; diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 213362850ab..c40c2d33199 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -1,5 +1,5 @@ # -# List of files needed by perf python extention +# List of files needed by perf python extension # # Each source file must be placed on its own line so that it can be # processed by Makefile and util/setup.py accordingly. diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 0688bfb6d28..9181bf212fb 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -627,7 +627,7 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel, * This will group just the fds for this single evsel, to group * multiple events, use evlist.open(). */ - if (perf_evsel__open(evsel, cpus, threads, group, NULL) < 0) { + if (perf_evsel__open(evsel, cpus, threads) < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; } @@ -672,7 +672,7 @@ struct pyrf_evlist { }; static int pyrf_evlist__init(struct pyrf_evlist *pevlist, - PyObject *args, PyObject *kwargs __used) + PyObject *args, PyObject *kwargs __maybe_unused) { PyObject *pcpus = NULL, *pthreads = NULL; struct cpu_map *cpus; @@ -733,7 +733,8 @@ static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist, } static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist, - PyObject *args __used, PyObject *kwargs __used) + PyObject *args __maybe_unused, + PyObject *kwargs __maybe_unused) { struct perf_evlist *evlist = &pevlist->evlist; PyObject *list = PyList_New(0); @@ -765,7 +766,8 @@ free_list: static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist, - PyObject *args, PyObject *kwargs __used) + PyObject *args, + PyObject *kwargs __maybe_unused) { struct perf_evlist *evlist = &pevlist->evlist; PyObject *pevsel; @@ -803,7 +805,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, if (pyevent == NULL) return PyErr_NoMemory(); - err = perf_evlist__parse_sample(evlist, event, &pevent->sample, false); + err = perf_evlist__parse_sample(evlist, event, &pevent->sample); if (err) return PyErr_Format(PyExc_OSError, "perf: can't parse sample, err=%d", err); @@ -824,7 +826,10 @@ static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist, if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist, &group)) return NULL; - if (perf_evlist__open(evlist, group) < 0) { + if (group) + perf_evlist__set_leader(evlist); + + if (perf_evlist__open(evlist) < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; } diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 02dfa19a467..f80605eb185 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -25,16 +25,16 @@ #include <ctype.h> #include <errno.h> -#include "../../perf.h" #include "../util.h" +#include <EXTERN.h> +#include <perl.h> + +#include "../../perf.h" #include "../thread.h" #include "../event.h" #include "../trace-event.h" #include "../evsel.h" -#include <EXTERN.h> -#include <perl.h> - void boot_Perf__Trace__Context(pTHX_ CV *cv); void boot_DynaLoader(pTHX_ CV *cv); typedef PerlInterpreter * INTERP; @@ -237,16 +237,16 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->next); } -static inline -struct event_format *find_cache_event(struct pevent *pevent, int type) +static inline struct event_format *find_cache_event(struct perf_evsel *evsel) { static char ev_name[256]; struct event_format *event; + int type = evsel->attr.config; if (events[type]) return events[type]; - events[type] = event = pevent_find_event(pevent, type); + events[type] = event = evsel->tp_format; if (!event) return NULL; @@ -257,23 +257,22 @@ struct event_format *find_cache_event(struct pevent *pevent, int type) return event; } -static void perl_process_tracepoint(union perf_event *perf_event __unused, - struct pevent *pevent, +static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused, struct perf_sample *sample, struct perf_evsel *evsel, - struct machine *machine __unused, - struct thread *thread) + struct machine *machine __maybe_unused, + struct addr_location *al) { struct format_field *field; static char handler[256]; unsigned long long val; unsigned long s, ns; struct event_format *event; - int type; int pid; int cpu = sample->cpu; void *data = sample->raw_data; unsigned long long nsecs = sample->time; + struct thread *thread = al->thread; char *comm = thread->comm; dSP; @@ -281,13 +280,11 @@ static void perl_process_tracepoint(union perf_event *perf_event __unused, if (evsel->attr.type != PERF_TYPE_TRACEPOINT) return; - type = trace_parse_common_type(pevent, data); - - event = find_cache_event(pevent, type); + event = find_cache_event(evsel); if (!event) - die("ug! no event found for type %d", type); + die("ug! no event found for type %" PRIu64, evsel->attr.config); - pid = trace_parse_common_pid(pevent, data); + pid = raw_field_value(event, "common_pid", data); sprintf(handler, "%s::%s", event->system, event->name); @@ -320,7 +317,7 @@ static void perl_process_tracepoint(union perf_event *perf_event __unused, offset = field->offset; XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0))); } else { /* FIELD_IS_NUMERIC */ - val = read_size(pevent, data + field->offset, + val = read_size(event, data + field->offset, field->size); if (field->flags & FIELD_IS_SIGNED) { XPUSHs(sv_2mortal(newSViv(val))); @@ -349,11 +346,11 @@ static void perl_process_tracepoint(union perf_event *perf_event __unused, LEAVE; } -static void perl_process_event_generic(union perf_event *pevent __unused, +static void perl_process_event_generic(union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel __unused, - struct machine *machine __unused, - struct thread *thread __unused) + struct perf_evsel *evsel, + struct machine *machine __maybe_unused, + struct addr_location *al __maybe_unused) { dSP; @@ -363,7 +360,7 @@ static void perl_process_event_generic(union perf_event *pevent __unused, ENTER; SAVETMPS; PUSHMARK(SP); - XPUSHs(sv_2mortal(newSVpvn((const char *)pevent, pevent->header.size))); + XPUSHs(sv_2mortal(newSVpvn((const char *)event, event->header.size))); XPUSHs(sv_2mortal(newSVpvn((const char *)&evsel->attr, sizeof(evsel->attr)))); XPUSHs(sv_2mortal(newSVpvn((const char *)sample, sizeof(*sample)))); XPUSHs(sv_2mortal(newSVpvn((const char *)sample->raw_data, sample->raw_size))); @@ -376,14 +373,13 @@ static void perl_process_event_generic(union perf_event *pevent __unused, } static void perl_process_event(union perf_event *event, - struct pevent *pevent, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, - struct thread *thread) + struct addr_location *al) { - perl_process_tracepoint(event, pevent, sample, evsel, machine, thread); - perl_process_event_generic(event, sample, evsel, machine, thread); + perl_process_tracepoint(event, sample, evsel, machine, al); + perl_process_event_generic(event, sample, evsel, machine, al); } static void run_start_sub(void) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index ce4d1b0c386..730c6630cba 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -27,10 +27,12 @@ #include <errno.h> #include "../../perf.h" +#include "../evsel.h" #include "../util.h" #include "../event.h" #include "../thread.h" #include "../trace-event.h" +#include "../evsel.h" PyMODINIT_FUNC initperf_trace_context(void); @@ -194,16 +196,21 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->next); } -static inline -struct event_format *find_cache_event(struct pevent *pevent, int type) +static inline struct event_format *find_cache_event(struct perf_evsel *evsel) { static char ev_name[256]; struct event_format *event; + int type = evsel->attr.config; + /* + * XXX: Do we really need to cache this since now we have evsel->tp_format + * cached already? Need to re-read this "cache" routine that as well calls + * define_event_symbols() :-\ + */ if (events[type]) return events[type]; - events[type] = event = pevent_find_event(pevent, type); + events[type] = event = evsel->tp_format; if (!event) return NULL; @@ -214,12 +221,12 @@ struct event_format *find_cache_event(struct pevent *pevent, int type) return event; } -static void python_process_event(union perf_event *perf_event __unused, - struct pevent *pevent, +static void python_process_tracepoint(union perf_event *perf_event + __maybe_unused, struct perf_sample *sample, - struct perf_evsel *evsel __unused, - struct machine *machine __unused, - struct thread *thread) + struct perf_evsel *evsel, + struct machine *machine __maybe_unused, + struct addr_location *al) { PyObject *handler, *retval, *context, *t, *obj, *dict = NULL; static char handler_name[256]; @@ -228,24 +235,22 @@ static void python_process_event(union perf_event *perf_event __unused, unsigned long s, ns; struct event_format *event; unsigned n = 0; - int type; int pid; int cpu = sample->cpu; void *data = sample->raw_data; unsigned long long nsecs = sample->time; + struct thread *thread = al->thread; char *comm = thread->comm; t = PyTuple_New(MAX_FIELDS); if (!t) Py_FatalError("couldn't create Python tuple"); - type = trace_parse_common_type(pevent, data); - - event = find_cache_event(pevent, type); + event = find_cache_event(evsel); if (!event) - die("ug! no event found for type %d", type); + die("ug! no event found for type %d", (int)evsel->attr.config); - pid = trace_parse_common_pid(pevent, data); + pid = raw_field_value(event, "common_pid", data); sprintf(handler_name, "%s__%s", event->system, event->name); @@ -290,7 +295,7 @@ static void python_process_event(union perf_event *perf_event __unused, offset = field->offset; obj = PyString_FromString((char *)data + offset); } else { /* FIELD_IS_NUMERIC */ - val = read_size(pevent, data + field->offset, + val = read_size(event, data + field->offset, field->size); if (field->flags & FIELD_IS_SIGNED) { if ((long long)val >= LONG_MIN && @@ -335,6 +340,84 @@ static void python_process_event(union perf_event *perf_event __unused, Py_DECREF(t); } +static void python_process_general_event(union perf_event *perf_event + __maybe_unused, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine __maybe_unused, + struct addr_location *al) +{ + PyObject *handler, *retval, *t, *dict; + static char handler_name[64]; + unsigned n = 0; + struct thread *thread = al->thread; + + /* + * Use the MAX_FIELDS to make the function expandable, though + * currently there is only one item for the tuple. + */ + t = PyTuple_New(MAX_FIELDS); + if (!t) + Py_FatalError("couldn't create Python tuple"); + + dict = PyDict_New(); + if (!dict) + Py_FatalError("couldn't create Python dictionary"); + + snprintf(handler_name, sizeof(handler_name), "%s", "process_event"); + + handler = PyDict_GetItemString(main_dict, handler_name); + if (!handler || !PyCallable_Check(handler)) + goto exit; + + PyDict_SetItemString(dict, "ev_name", PyString_FromString(perf_evsel__name(evsel))); + PyDict_SetItemString(dict, "attr", PyString_FromStringAndSize( + (const char *)&evsel->attr, sizeof(evsel->attr))); + PyDict_SetItemString(dict, "sample", PyString_FromStringAndSize( + (const char *)sample, sizeof(*sample))); + PyDict_SetItemString(dict, "raw_buf", PyString_FromStringAndSize( + (const char *)sample->raw_data, sample->raw_size)); + PyDict_SetItemString(dict, "comm", + PyString_FromString(thread->comm)); + if (al->map) { + PyDict_SetItemString(dict, "dso", + PyString_FromString(al->map->dso->name)); + } + if (al->sym) { + PyDict_SetItemString(dict, "symbol", + PyString_FromString(al->sym->name)); + } + + PyTuple_SetItem(t, n++, dict); + if (_PyTuple_Resize(&t, n) == -1) + Py_FatalError("error resizing Python tuple"); + + retval = PyObject_CallObject(handler, t); + if (retval == NULL) + handler_call_die(handler_name); +exit: + Py_DECREF(dict); + Py_DECREF(t); +} + +static void python_process_event(union perf_event *perf_event, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine, + struct addr_location *al) +{ + switch (evsel->attr.type) { + case PERF_TYPE_TRACEPOINT: + python_process_tracepoint(perf_event, sample, evsel, + machine, al); + break; + /* Reserve for future process_hw/sw/raw APIs */ + default: + python_process_general_event(perf_event, sample, evsel, + machine, al); + } +} + static int run_start_sub(void) { PyObject *handler, *retval; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 2437fb0b463..8cdd23239c9 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -15,6 +15,9 @@ #include "util.h" #include "cpumap.h" #include "event-parse.h" +#include "perf_regs.h" +#include "unwind.h" +#include "vdso.h" static int perf_session__open(struct perf_session *self, bool force) { @@ -209,6 +212,7 @@ void perf_session__delete(struct perf_session *self) machine__exit(&self->host_machine); close(self->fd); free(self); + vdso__exit(); } void machine__remove_thread(struct machine *self, struct thread *th) @@ -288,10 +292,11 @@ struct branch_info *machine__resolve_bstack(struct machine *self, return bi; } -int machine__resolve_callchain(struct machine *self, - struct thread *thread, - struct ip_callchain *chain, - struct symbol **parent) +static int machine__resolve_callchain_sample(struct machine *machine, + struct thread *thread, + struct ip_callchain *chain, + struct symbol **parent) + { u8 cpumode = PERF_RECORD_MISC_USER; unsigned int i; @@ -316,11 +321,14 @@ int machine__resolve_callchain(struct machine *self, if (ip >= PERF_CONTEXT_MAX) { switch (ip) { case PERF_CONTEXT_HV: - cpumode = PERF_RECORD_MISC_HYPERVISOR; break; + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; case PERF_CONTEXT_KERNEL: - cpumode = PERF_RECORD_MISC_KERNEL; break; + cpumode = PERF_RECORD_MISC_KERNEL; + break; case PERF_CONTEXT_USER: - cpumode = PERF_RECORD_MISC_USER; break; + cpumode = PERF_RECORD_MISC_USER; + break; default: pr_debug("invalid callchain context: " "%"PRId64"\n", (s64) ip); @@ -335,7 +343,7 @@ int machine__resolve_callchain(struct machine *self, } al.filtered = false; - thread__find_addr_location(thread, self, cpumode, + thread__find_addr_location(thread, machine, cpumode, MAP__FUNCTION, ip, &al, NULL); if (al.sym != NULL) { if (sort__has_parent && !*parent && @@ -354,49 +362,92 @@ int machine__resolve_callchain(struct machine *self, return 0; } -static int process_event_synth_tracing_data_stub(union perf_event *event __used, - struct perf_session *session __used) +static int unwind_entry(struct unwind_entry *entry, void *arg) +{ + struct callchain_cursor *cursor = arg; + return callchain_cursor_append(cursor, entry->ip, + entry->map, entry->sym); +} + +int machine__resolve_callchain(struct machine *machine, + struct perf_evsel *evsel, + struct thread *thread, + struct perf_sample *sample, + struct symbol **parent) + +{ + int ret; + + callchain_cursor_reset(&callchain_cursor); + + ret = machine__resolve_callchain_sample(machine, thread, + sample->callchain, parent); + if (ret) + return ret; + + /* Can we do dwarf post unwind? */ + if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) && + (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER))) + return 0; + + /* Bail out if nothing was captured. */ + if ((!sample->user_regs.regs) || + (!sample->user_stack.size)) + return 0; + + return unwind__get_entries(unwind_entry, &callchain_cursor, machine, + thread, evsel->attr.sample_regs_user, + sample); + +} + +static int process_event_synth_tracing_data_stub(union perf_event *event + __maybe_unused, + struct perf_session *session + __maybe_unused) { dump_printf(": unhandled!\n"); return 0; } -static int process_event_synth_attr_stub(union perf_event *event __used, - struct perf_evlist **pevlist __used) +static int process_event_synth_attr_stub(union perf_event *event __maybe_unused, + struct perf_evlist **pevlist + __maybe_unused) { dump_printf(": unhandled!\n"); return 0; } -static int process_event_sample_stub(struct perf_tool *tool __used, - union perf_event *event __used, - struct perf_sample *sample __used, - struct perf_evsel *evsel __used, - struct machine *machine __used) +static int process_event_sample_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct perf_evsel *evsel __maybe_unused, + struct machine *machine __maybe_unused) { dump_printf(": unhandled!\n"); return 0; } -static int process_event_stub(struct perf_tool *tool __used, - union perf_event *event __used, - struct perf_sample *sample __used, - struct machine *machine __used) +static int process_event_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) { dump_printf(": unhandled!\n"); return 0; } -static int process_finished_round_stub(struct perf_tool *tool __used, - union perf_event *event __used, - struct perf_session *perf_session __used) +static int process_finished_round_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_session *perf_session + __maybe_unused) { dump_printf(": unhandled!\n"); return 0; } -static int process_event_type_stub(struct perf_tool *tool __used, - union perf_event *event __used) +static int process_event_type_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused) { dump_printf(": unhandled!\n"); return 0; @@ -473,7 +524,7 @@ static void swap_sample_id_all(union perf_event *event, void *data) } static void perf_event__all64_swap(union perf_event *event, - bool sample_id_all __used) + bool sample_id_all __maybe_unused) { struct perf_event_header *hdr = &event->header; mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr)); @@ -487,7 +538,7 @@ static void perf_event__comm_swap(union perf_event *event, bool sample_id_all) if (sample_id_all) { void *data = &event->comm.comm; - data += ALIGN(strlen(data) + 1, sizeof(u64)); + data += PERF_ALIGN(strlen(data) + 1, sizeof(u64)); swap_sample_id_all(event, data); } } @@ -504,7 +555,7 @@ static void perf_event__mmap_swap(union perf_event *event, if (sample_id_all) { void *data = &event->mmap.filename; - data += ALIGN(strlen(data) + 1, sizeof(u64)); + data += PERF_ALIGN(strlen(data) + 1, sizeof(u64)); swap_sample_id_all(event, data); } } @@ -584,7 +635,7 @@ void perf_event__attr_swap(struct perf_event_attr *attr) } static void perf_event__hdr_attr_swap(union perf_event *event, - bool sample_id_all __used) + bool sample_id_all __maybe_unused) { size_t size; @@ -596,14 +647,14 @@ static void perf_event__hdr_attr_swap(union perf_event *event, } static void perf_event__event_type_swap(union perf_event *event, - bool sample_id_all __used) + bool sample_id_all __maybe_unused) { event->event_type.event_type.event_id = bswap_64(event->event_type.event_type.event_id); } static void perf_event__tracing_data_swap(union perf_event *event, - bool sample_id_all __used) + bool sample_id_all __maybe_unused) { event->tracing_data.size = bswap_32(event->tracing_data.size); } @@ -652,7 +703,7 @@ static int perf_session_deliver_event(struct perf_session *session, struct perf_tool *tool, u64 file_offset); -static void flush_sample_queue(struct perf_session *s, +static int flush_sample_queue(struct perf_session *s, struct perf_tool *tool) { struct ordered_samples *os = &s->ordered_samples; @@ -665,19 +716,21 @@ static void flush_sample_queue(struct perf_session *s, int ret; if (!tool->ordered_samples || !limit) - return; + return 0; list_for_each_entry_safe(iter, tmp, head, list) { if (iter->timestamp > limit) break; - ret = perf_evlist__parse_sample(s->evlist, iter->event, &sample, - s->header.needs_swap); + ret = perf_evlist__parse_sample(s->evlist, iter->event, &sample); if (ret) pr_err("Can't parse sample, err = %d\n", ret); - else - perf_session_deliver_event(s, iter->event, &sample, tool, - iter->file_offset); + else { + ret = perf_session_deliver_event(s, iter->event, &sample, tool, + iter->file_offset); + if (ret) + return ret; + } os->last_flush = iter->timestamp; list_del(&iter->list); @@ -697,6 +750,8 @@ static void flush_sample_queue(struct perf_session *s, } os->nr_samples = 0; + + return 0; } /* @@ -739,13 +794,14 @@ static void flush_sample_queue(struct perf_session *s, * etc... */ static int process_finished_round(struct perf_tool *tool, - union perf_event *event __used, + union perf_event *event __maybe_unused, struct perf_session *session) { - flush_sample_queue(session, tool); - session->ordered_samples.next_flush = session->ordered_samples.max_timestamp; + int ret = flush_sample_queue(session, tool); + if (!ret) + session->ordered_samples.next_flush = session->ordered_samples.max_timestamp; - return 0; + return ret; } /* The queue is ordered by time */ @@ -860,6 +916,34 @@ static void branch_stack__printf(struct perf_sample *sample) sample->branch_stack->entries[i].to); } +static void regs_dump__printf(u64 mask, u64 *regs) +{ + unsigned rid, i = 0; + + for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) { + u64 val = regs[i++]; + + printf(".... %-5s 0x%" PRIx64 "\n", + perf_reg_name(rid), val); + } +} + +static void regs_user__printf(struct perf_sample *sample, u64 mask) +{ + struct regs_dump *user_regs = &sample->user_regs; + + if (user_regs->regs) { + printf("... user regs: mask 0x%" PRIx64 "\n", mask); + regs_dump__printf(mask, user_regs->regs); + } +} + +static void stack_user__printf(struct stack_dump *dump) +{ + printf("... ustack: size %" PRIu64 ", offset 0x%x\n", + dump->size, dump->offset); +} + static void perf_session__print_tstamp(struct perf_session *session, union perf_event *event, struct perf_sample *sample) @@ -897,7 +981,7 @@ static void dump_event(struct perf_session *session, union perf_event *event, event->header.size, perf_event__name(event->header.type)); } -static void dump_sample(struct perf_session *session, union perf_event *event, +static void dump_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *sample) { u64 sample_type; @@ -909,13 +993,19 @@ static void dump_sample(struct perf_session *session, union perf_event *event, event->header.misc, sample->pid, sample->tid, sample->ip, sample->period, sample->addr); - sample_type = perf_evlist__sample_type(session->evlist); + sample_type = evsel->attr.sample_type; if (sample_type & PERF_SAMPLE_CALLCHAIN) callchain__printf(sample); if (sample_type & PERF_SAMPLE_BRANCH_STACK) branch_stack__printf(sample); + + if (sample_type & PERF_SAMPLE_REGS_USER) + regs_user__printf(sample, evsel->attr.sample_regs_user); + + if (sample_type & PERF_SAMPLE_STACK_USER) + stack_user__printf(&sample->user_stack); } static struct machine * @@ -973,7 +1063,7 @@ static int perf_session_deliver_event(struct perf_session *session, switch (event->header.type) { case PERF_RECORD_SAMPLE: - dump_sample(session, event, sample); + dump_sample(evsel, event, sample); if (evsel == NULL) { ++session->hists.stats.nr_unknown_id; return 0; @@ -1083,8 +1173,7 @@ static int perf_session__process_event(struct perf_session *session, /* * For all kernel events we get the sample data */ - ret = perf_evlist__parse_sample(session->evlist, event, &sample, - session->header.needs_swap); + ret = perf_evlist__parse_sample(session->evlist, event, &sample); if (ret) return ret; @@ -1369,7 +1458,7 @@ more: err = 0; /* do the final flush for ordered samples */ session->ordered_samples.next_flush = ULLONG_MAX; - flush_sample_queue(session, tool); + err = flush_sample_queue(session, tool); out_err: perf_session__warn_about_errors(session, tool); perf_session_free_sample_buffers(session); @@ -1498,9 +1587,9 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, return NULL; } -void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, - struct machine *machine, int print_sym, - int print_dso, int print_symoffset) +void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event, + struct perf_sample *sample, struct machine *machine, + int print_sym, int print_dso, int print_symoffset) { struct addr_location al; struct callchain_cursor_node *node; @@ -1514,8 +1603,9 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, if (symbol_conf.use_callchain && sample->callchain) { - if (machine__resolve_callchain(machine, al.thread, - sample->callchain, NULL) != 0) { + + if (machine__resolve_callchain(machine, evsel, al.thread, + sample, NULL) != 0) { if (verbose) error("Failed to resolve callchain. Skipping\n"); return; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 1f7ec87db7d..aab414fbb64 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -36,9 +36,7 @@ struct perf_session { struct pevent *pevent; /* * FIXME: Need to split this up further, we need global - * stats + per event stats. 'perf diff' also needs - * to properly support multiple events in a single - * perf.data file. + * stats + per event stats. */ struct hists hists; int fd; @@ -129,9 +127,9 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp); struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, unsigned int type); -void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, - struct machine *machine, int print_sym, - int print_dso, int print_symoffset); +void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event, + struct perf_sample *sample, struct machine *machine, + int print_sym, int print_dso, int print_symoffset); int perf_session__cpu_bitmap(struct perf_session *session, const char *cpu_list, unsigned long *cpu_bitmap); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 0f5a0a496bc..b5b1b921196 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -8,12 +8,11 @@ const char default_sort_order[] = "comm,dso,symbol"; const char *sort_order = default_sort_order; int sort__need_collapse = 0; int sort__has_parent = 0; +int sort__has_sym = 0; int sort__branch_mode = -1; /* -1 = means not set */ enum sort_type sort__first_dimension; -char * field_sep; - LIST_HEAD(hist_entry__sort_list); static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...) @@ -23,11 +22,11 @@ static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...) va_start(ap, fmt); n = vsnprintf(bf, size, fmt, ap); - if (field_sep && n > 0) { + if (symbol_conf.field_sep && n > 0) { char *sep = bf; while (1) { - sep = strchr(sep, *field_sep); + sep = strchr(sep, *symbol_conf.field_sep); if (sep == NULL) break; *sep = '.'; @@ -172,7 +171,7 @@ static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, u64 ip, char level, char *bf, size_t size, - unsigned int width __used) + unsigned int width __maybe_unused) { size_t ret = 0; @@ -207,7 +206,8 @@ struct sort_entry sort_dso = { }; static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, - size_t size, unsigned int width __used) + size_t size, + unsigned int width __maybe_unused) { return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip, self->level, bf, size, width); @@ -250,7 +250,8 @@ sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) } static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf, - size_t size, unsigned int width __used) + size_t size, + unsigned int width __maybe_unused) { FILE *fp; char cmd[PATH_MAX + 2], *path = self->srcline, *nl; @@ -399,7 +400,8 @@ sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right) } static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf, - size_t size, unsigned int width __used) + size_t size, + unsigned int width __maybe_unused) { struct addr_map_symbol *from = &self->branch_info->from; return _hist_entry__sym_snprintf(from->map, from->sym, from->addr, @@ -408,7 +410,8 @@ static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf, } static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf, - size_t size, unsigned int width __used) + size_t size, + unsigned int width __maybe_unused) { struct addr_map_symbol *to = &self->branch_info->to; return _hist_entry__sym_snprintf(to->map, to->sym, to->addr, @@ -509,6 +512,10 @@ int sort_dimension__add(const char *tok) return -EINVAL; } sort__has_parent = 1; + } else if (sd->entry == &sort_sym || + sd->entry == &sort_sym_from || + sd->entry == &sort_sym_to) { + sort__has_sym = 1; } if (sd->taken) diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index e724b26acd5..12d634792de 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -31,8 +31,8 @@ extern const char *parent_pattern; extern const char default_sort_order[]; extern int sort__need_collapse; extern int sort__has_parent; +extern int sort__has_sym; extern int sort__branch_mode; -extern char *field_sep; extern struct sort_entry sort_comm; extern struct sort_entry sort_dso; extern struct sort_entry sort_sym; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c new file mode 100644 index 00000000000..23742126f47 --- /dev/null +++ b/tools/perf/util/stat.c @@ -0,0 +1,57 @@ +#include <math.h> + +#include "stat.h" + +void update_stats(struct stats *stats, u64 val) +{ + double delta; + + stats->n++; + delta = val - stats->mean; + stats->mean += delta / stats->n; + stats->M2 += delta*(val - stats->mean); +} + +double avg_stats(struct stats *stats) +{ + return stats->mean; +} + +/* + * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + * + * (\Sum n_i^2) - ((\Sum n_i)^2)/n + * s^2 = ------------------------------- + * n - 1 + * + * http://en.wikipedia.org/wiki/Stddev + * + * The std dev of the mean is related to the std dev by: + * + * s + * s_mean = ------- + * sqrt(n) + * + */ +double stddev_stats(struct stats *stats) +{ + double variance, variance_mean; + + if (!stats->n) + return 0.0; + + variance = stats->M2 / (stats->n - 1); + variance_mean = variance / stats->n; + + return sqrt(variance_mean); +} + +double rel_stddev_stats(double stddev, double avg) +{ + double pct = 0.0; + + if (avg) + pct = 100.0 * stddev/avg; + + return pct; +} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h new file mode 100644 index 00000000000..588367c3c76 --- /dev/null +++ b/tools/perf/util/stat.h @@ -0,0 +1,16 @@ +#ifndef __PERF_STATS_H +#define __PERF_STATS_H + +#include "types.h" + +struct stats +{ + double n, mean, M2; +}; + +void update_stats(struct stats *stats, u64 val); +double avg_stats(struct stats *stats); +double stddev_stats(struct stats *stats); +double rel_stddev_stats(double stddev, double avg); + +#endif diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index 199bc4d8905..32170590892 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -1,5 +1,5 @@ #include "util.h" -#include "string.h" +#include "linux/string.h" #define K 1024LL /* @@ -335,3 +335,19 @@ char *rtrim(char *s) return s; } + +/** + * memdup - duplicate region of memory + * @src: memory region to duplicate + * @len: memory region length + */ +void *memdup(const void *src, size_t len) +{ + void *p; + + p = malloc(len); + if (p) + memcpy(p, src, len); + + return p; +} diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c index 95856ff3dda..155d8b7078a 100644 --- a/tools/perf/util/strlist.c +++ b/tools/perf/util/strlist.c @@ -93,7 +93,7 @@ out: void strlist__remove(struct strlist *slist, struct str_node *snode) { - str_node__delete(snode, slist->dupstr); + rblist__remove_node(&slist->rblist, &snode->rb_node); } struct str_node *strlist__find(struct strlist *slist, const char *entry) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c new file mode 100644 index 00000000000..db0cc92cf2e --- /dev/null +++ b/tools/perf/util/symbol-elf.c @@ -0,0 +1,841 @@ +#include <libelf.h> +#include <gelf.h> +#include <elf.h> +#include <fcntl.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <inttypes.h> + +#include "symbol.h" +#include "debug.h" + +#ifndef NT_GNU_BUILD_ID +#define NT_GNU_BUILD_ID 3 +#endif + +/** + * elf_symtab__for_each_symbol - iterate thru all the symbols + * + * @syms: struct elf_symtab instance to iterate + * @idx: uint32_t idx + * @sym: GElf_Sym iterator + */ +#define elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) \ + for (idx = 0, gelf_getsym(syms, idx, &sym);\ + idx < nr_syms; \ + idx++, gelf_getsym(syms, idx, &sym)) + +static inline uint8_t elf_sym__type(const GElf_Sym *sym) +{ + return GELF_ST_TYPE(sym->st_info); +} + +static inline int elf_sym__is_function(const GElf_Sym *sym) +{ + return elf_sym__type(sym) == STT_FUNC && + sym->st_name != 0 && + sym->st_shndx != SHN_UNDEF; +} + +static inline bool elf_sym__is_object(const GElf_Sym *sym) +{ + return elf_sym__type(sym) == STT_OBJECT && + sym->st_name != 0 && + sym->st_shndx != SHN_UNDEF; +} + +static inline int elf_sym__is_label(const GElf_Sym *sym) +{ + return elf_sym__type(sym) == STT_NOTYPE && + sym->st_name != 0 && + sym->st_shndx != SHN_UNDEF && + sym->st_shndx != SHN_ABS; +} + +static bool elf_sym__is_a(GElf_Sym *sym, enum map_type type) +{ + switch (type) { + case MAP__FUNCTION: + return elf_sym__is_function(sym); + case MAP__VARIABLE: + return elf_sym__is_object(sym); + default: + return false; + } +} + +static inline const char *elf_sym__name(const GElf_Sym *sym, + const Elf_Data *symstrs) +{ + return symstrs->d_buf + sym->st_name; +} + +static inline const char *elf_sec__name(const GElf_Shdr *shdr, + const Elf_Data *secstrs) +{ + return secstrs->d_buf + shdr->sh_name; +} + +static inline int elf_sec__is_text(const GElf_Shdr *shdr, + const Elf_Data *secstrs) +{ + return strstr(elf_sec__name(shdr, secstrs), "text") != NULL; +} + +static inline bool elf_sec__is_data(const GElf_Shdr *shdr, + const Elf_Data *secstrs) +{ + return strstr(elf_sec__name(shdr, secstrs), "data") != NULL; +} + +static bool elf_sec__is_a(GElf_Shdr *shdr, Elf_Data *secstrs, + enum map_type type) +{ + switch (type) { + case MAP__FUNCTION: + return elf_sec__is_text(shdr, secstrs); + case MAP__VARIABLE: + return elf_sec__is_data(shdr, secstrs); + default: + return false; + } +} + +static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr) +{ + Elf_Scn *sec = NULL; + GElf_Shdr shdr; + size_t cnt = 1; + + while ((sec = elf_nextscn(elf, sec)) != NULL) { + gelf_getshdr(sec, &shdr); + + if ((addr >= shdr.sh_addr) && + (addr < (shdr.sh_addr + shdr.sh_size))) + return cnt; + + ++cnt; + } + + return -1; +} + +static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, + GElf_Shdr *shp, const char *name, + size_t *idx) +{ + Elf_Scn *sec = NULL; + size_t cnt = 1; + + /* Elf is corrupted/truncated, avoid calling elf_strptr. */ + if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) + return NULL; + + while ((sec = elf_nextscn(elf, sec)) != NULL) { + char *str; + + gelf_getshdr(sec, shp); + str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name); + if (!strcmp(name, str)) { + if (idx) + *idx = cnt; + break; + } + ++cnt; + } + + return sec; +} + +#define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \ + for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \ + idx < nr_entries; \ + ++idx, pos = gelf_getrel(reldata, idx, &pos_mem)) + +#define elf_section__for_each_rela(reldata, pos, pos_mem, idx, nr_entries) \ + for (idx = 0, pos = gelf_getrela(reldata, 0, &pos_mem); \ + idx < nr_entries; \ + ++idx, pos = gelf_getrela(reldata, idx, &pos_mem)) + +/* + * We need to check if we have a .dynsym, so that we can handle the + * .plt, synthesizing its symbols, that aren't on the symtabs (be it + * .dynsym or .symtab). + * And always look at the original dso, not at debuginfo packages, that + * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS). + */ +int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map, + symbol_filter_t filter) +{ + uint32_t nr_rel_entries, idx; + GElf_Sym sym; + u64 plt_offset; + GElf_Shdr shdr_plt; + struct symbol *f; + GElf_Shdr shdr_rel_plt, shdr_dynsym; + Elf_Data *reldata, *syms, *symstrs; + Elf_Scn *scn_plt_rel, *scn_symstrs, *scn_dynsym; + size_t dynsym_idx; + GElf_Ehdr ehdr; + char sympltname[1024]; + Elf *elf; + int nr = 0, symidx, err = 0; + + if (!ss->dynsym) + return 0; + + elf = ss->elf; + ehdr = ss->ehdr; + + scn_dynsym = ss->dynsym; + shdr_dynsym = ss->dynshdr; + dynsym_idx = ss->dynsym_idx; + + if (scn_dynsym == NULL) + goto out_elf_end; + + scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, + ".rela.plt", NULL); + if (scn_plt_rel == NULL) { + scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, + ".rel.plt", NULL); + if (scn_plt_rel == NULL) + goto out_elf_end; + } + + err = -1; + + if (shdr_rel_plt.sh_link != dynsym_idx) + goto out_elf_end; + + if (elf_section_by_name(elf, &ehdr, &shdr_plt, ".plt", NULL) == NULL) + goto out_elf_end; + + /* + * Fetch the relocation section to find the idxes to the GOT + * and the symbols in the .dynsym they refer to. + */ + reldata = elf_getdata(scn_plt_rel, NULL); + if (reldata == NULL) + goto out_elf_end; + + syms = elf_getdata(scn_dynsym, NULL); + if (syms == NULL) + goto out_elf_end; + + scn_symstrs = elf_getscn(elf, shdr_dynsym.sh_link); + if (scn_symstrs == NULL) + goto out_elf_end; + + symstrs = elf_getdata(scn_symstrs, NULL); + if (symstrs == NULL) + goto out_elf_end; + + if (symstrs->d_size == 0) + goto out_elf_end; + + nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; + plt_offset = shdr_plt.sh_offset; + + if (shdr_rel_plt.sh_type == SHT_RELA) { + GElf_Rela pos_mem, *pos; + + elf_section__for_each_rela(reldata, pos, pos_mem, idx, + nr_rel_entries) { + symidx = GELF_R_SYM(pos->r_info); + plt_offset += shdr_plt.sh_entsize; + gelf_getsym(syms, symidx, &sym); + snprintf(sympltname, sizeof(sympltname), + "%s@plt", elf_sym__name(&sym, symstrs)); + + f = symbol__new(plt_offset, shdr_plt.sh_entsize, + STB_GLOBAL, sympltname); + if (!f) + goto out_elf_end; + + if (filter && filter(map, f)) + symbol__delete(f); + else { + symbols__insert(&dso->symbols[map->type], f); + ++nr; + } + } + } else if (shdr_rel_plt.sh_type == SHT_REL) { + GElf_Rel pos_mem, *pos; + elf_section__for_each_rel(reldata, pos, pos_mem, idx, + nr_rel_entries) { + symidx = GELF_R_SYM(pos->r_info); + plt_offset += shdr_plt.sh_entsize; + gelf_getsym(syms, symidx, &sym); + snprintf(sympltname, sizeof(sympltname), + "%s@plt", elf_sym__name(&sym, symstrs)); + + f = symbol__new(plt_offset, shdr_plt.sh_entsize, + STB_GLOBAL, sympltname); + if (!f) + goto out_elf_end; + + if (filter && filter(map, f)) + symbol__delete(f); + else { + symbols__insert(&dso->symbols[map->type], f); + ++nr; + } + } + } + + err = 0; +out_elf_end: + if (err == 0) + return nr; + pr_debug("%s: problems reading %s PLT info.\n", + __func__, dso->long_name); + return 0; +} + +/* + * Align offset to 4 bytes as needed for note name and descriptor data. + */ +#define NOTE_ALIGN(n) (((n) + 3) & -4U) + +static int elf_read_build_id(Elf *elf, void *bf, size_t size) +{ + int err = -1; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + Elf_Data *data; + Elf_Scn *sec; + Elf_Kind ek; + void *ptr; + + if (size < BUILD_ID_SIZE) + goto out; + + ek = elf_kind(elf); + if (ek != ELF_K_ELF) + goto out; + + if (gelf_getehdr(elf, &ehdr) == NULL) { + pr_err("%s: cannot get elf header.\n", __func__); + goto out; + } + + /* + * Check following sections for notes: + * '.note.gnu.build-id' + * '.notes' + * '.note' (VDSO specific) + */ + do { + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".note.gnu.build-id", NULL); + if (sec) + break; + + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".notes", NULL); + if (sec) + break; + + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".note", NULL); + if (sec) + break; + + return err; + + } while (0); + + data = elf_getdata(sec, NULL); + if (data == NULL) + goto out; + + ptr = data->d_buf; + while (ptr < (data->d_buf + data->d_size)) { + GElf_Nhdr *nhdr = ptr; + size_t namesz = NOTE_ALIGN(nhdr->n_namesz), + descsz = NOTE_ALIGN(nhdr->n_descsz); + const char *name; + + ptr += sizeof(*nhdr); + name = ptr; + ptr += namesz; + if (nhdr->n_type == NT_GNU_BUILD_ID && + nhdr->n_namesz == sizeof("GNU")) { + if (memcmp(name, "GNU", sizeof("GNU")) == 0) { + size_t sz = min(size, descsz); + memcpy(bf, ptr, sz); + memset(bf + sz, 0, size - sz); + err = descsz; + break; + } + } + ptr += descsz; + } + +out: + return err; +} + +int filename__read_build_id(const char *filename, void *bf, size_t size) +{ + int fd, err = -1; + Elf *elf; + + if (size < BUILD_ID_SIZE) + goto out; + + fd = open(filename, O_RDONLY); + if (fd < 0) + goto out; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) { + pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename); + goto out_close; + } + + err = elf_read_build_id(elf, bf, size); + + elf_end(elf); +out_close: + close(fd); +out: + return err; +} + +int sysfs__read_build_id(const char *filename, void *build_id, size_t size) +{ + int fd, err = -1; + + if (size < BUILD_ID_SIZE) + goto out; + + fd = open(filename, O_RDONLY); + if (fd < 0) + goto out; + + while (1) { + char bf[BUFSIZ]; + GElf_Nhdr nhdr; + size_t namesz, descsz; + + if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr)) + break; + + namesz = NOTE_ALIGN(nhdr.n_namesz); + descsz = NOTE_ALIGN(nhdr.n_descsz); + if (nhdr.n_type == NT_GNU_BUILD_ID && + nhdr.n_namesz == sizeof("GNU")) { + if (read(fd, bf, namesz) != (ssize_t)namesz) + break; + if (memcmp(bf, "GNU", sizeof("GNU")) == 0) { + size_t sz = min(descsz, size); + if (read(fd, build_id, sz) == (ssize_t)sz) { + memset(build_id + sz, 0, size - sz); + err = 0; + break; + } + } else if (read(fd, bf, descsz) != (ssize_t)descsz) + break; + } else { + int n = namesz + descsz; + if (read(fd, bf, n) != n) + break; + } + } + close(fd); +out: + return err; +} + +int filename__read_debuglink(const char *filename, char *debuglink, + size_t size) +{ + int fd, err = -1; + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + Elf_Data *data; + Elf_Scn *sec; + Elf_Kind ek; + + fd = open(filename, O_RDONLY); + if (fd < 0) + goto out; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) { + pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename); + goto out_close; + } + + ek = elf_kind(elf); + if (ek != ELF_K_ELF) + goto out_close; + + if (gelf_getehdr(elf, &ehdr) == NULL) { + pr_err("%s: cannot get elf header.\n", __func__); + goto out_close; + } + + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".gnu_debuglink", NULL); + if (sec == NULL) + goto out_close; + + data = elf_getdata(sec, NULL); + if (data == NULL) + goto out_close; + + /* the start of this section is a zero-terminated string */ + strncpy(debuglink, data->d_buf, size); + + elf_end(elf); + +out_close: + close(fd); +out: + return err; +} + +static int dso__swap_init(struct dso *dso, unsigned char eidata) +{ + static unsigned int const endian = 1; + + dso->needs_swap = DSO_SWAP__NO; + + switch (eidata) { + case ELFDATA2LSB: + /* We are big endian, DSO is little endian. */ + if (*(unsigned char const *)&endian != 1) + dso->needs_swap = DSO_SWAP__YES; + break; + + case ELFDATA2MSB: + /* We are little endian, DSO is big endian. */ + if (*(unsigned char const *)&endian != 0) + dso->needs_swap = DSO_SWAP__YES; + break; + + default: + pr_err("unrecognized DSO data encoding %d\n", eidata); + return -EINVAL; + } + + return 0; +} + +bool symsrc__possibly_runtime(struct symsrc *ss) +{ + return ss->dynsym || ss->opdsec; +} + +bool symsrc__has_symtab(struct symsrc *ss) +{ + return ss->symtab != NULL; +} + +void symsrc__destroy(struct symsrc *ss) +{ + free(ss->name); + elf_end(ss->elf); + close(ss->fd); +} + +int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, + enum dso_binary_type type) +{ + int err = -1; + GElf_Ehdr ehdr; + Elf *elf; + int fd; + + fd = open(name, O_RDONLY); + if (fd < 0) + return -1; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) { + pr_debug("%s: cannot read %s ELF file.\n", __func__, name); + goto out_close; + } + + if (gelf_getehdr(elf, &ehdr) == NULL) { + pr_debug("%s: cannot get elf header.\n", __func__); + goto out_elf_end; + } + + if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) + goto out_elf_end; + + /* Always reject images with a mismatched build-id: */ + if (dso->has_build_id) { + u8 build_id[BUILD_ID_SIZE]; + + if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) + goto out_elf_end; + + if (!dso__build_id_equal(dso, build_id)) + goto out_elf_end; + } + + ss->symtab = elf_section_by_name(elf, &ehdr, &ss->symshdr, ".symtab", + NULL); + if (ss->symshdr.sh_type != SHT_SYMTAB) + ss->symtab = NULL; + + ss->dynsym_idx = 0; + ss->dynsym = elf_section_by_name(elf, &ehdr, &ss->dynshdr, ".dynsym", + &ss->dynsym_idx); + if (ss->dynshdr.sh_type != SHT_DYNSYM) + ss->dynsym = NULL; + + ss->opdidx = 0; + ss->opdsec = elf_section_by_name(elf, &ehdr, &ss->opdshdr, ".opd", + &ss->opdidx); + if (ss->opdshdr.sh_type != SHT_PROGBITS) + ss->opdsec = NULL; + + if (dso->kernel == DSO_TYPE_USER) { + GElf_Shdr shdr; + ss->adjust_symbols = (ehdr.e_type == ET_EXEC || + elf_section_by_name(elf, &ehdr, &shdr, + ".gnu.prelink_undo", + NULL) != NULL); + } else { + ss->adjust_symbols = 0; + } + + ss->name = strdup(name); + if (!ss->name) + goto out_elf_end; + + ss->elf = elf; + ss->fd = fd; + ss->ehdr = ehdr; + ss->type = type; + + return 0; + +out_elf_end: + elf_end(elf); +out_close: + close(fd); + return err; +} + +int dso__load_sym(struct dso *dso, struct map *map, + struct symsrc *syms_ss, struct symsrc *runtime_ss, + symbol_filter_t filter, int kmodule) +{ + struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL; + struct map *curr_map = map; + struct dso *curr_dso = dso; + Elf_Data *symstrs, *secstrs; + uint32_t nr_syms; + int err = -1; + uint32_t idx; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + Elf_Data *syms, *opddata = NULL; + GElf_Sym sym; + Elf_Scn *sec, *sec_strndx; + Elf *elf; + int nr = 0; + + dso->symtab_type = syms_ss->type; + + if (!syms_ss->symtab) { + syms_ss->symtab = syms_ss->dynsym; + syms_ss->symshdr = syms_ss->dynshdr; + } + + elf = syms_ss->elf; + ehdr = syms_ss->ehdr; + sec = syms_ss->symtab; + shdr = syms_ss->symshdr; + + if (runtime_ss->opdsec) + opddata = elf_rawdata(runtime_ss->opdsec, NULL); + + syms = elf_getdata(sec, NULL); + if (syms == NULL) + goto out_elf_end; + + sec = elf_getscn(elf, shdr.sh_link); + if (sec == NULL) + goto out_elf_end; + + symstrs = elf_getdata(sec, NULL); + if (symstrs == NULL) + goto out_elf_end; + + sec_strndx = elf_getscn(elf, ehdr.e_shstrndx); + if (sec_strndx == NULL) + goto out_elf_end; + + secstrs = elf_getdata(sec_strndx, NULL); + if (secstrs == NULL) + goto out_elf_end; + + nr_syms = shdr.sh_size / shdr.sh_entsize; + + memset(&sym, 0, sizeof(sym)); + dso->adjust_symbols = runtime_ss->adjust_symbols; + elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) { + struct symbol *f; + const char *elf_name = elf_sym__name(&sym, symstrs); + char *demangled = NULL; + int is_label = elf_sym__is_label(&sym); + const char *section_name; + bool used_opd = false; + + if (kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name && + strcmp(elf_name, kmap->ref_reloc_sym->name) == 0) + kmap->ref_reloc_sym->unrelocated_addr = sym.st_value; + + if (!is_label && !elf_sym__is_a(&sym, map->type)) + continue; + + /* Reject ARM ELF "mapping symbols": these aren't unique and + * don't identify functions, so will confuse the profile + * output: */ + if (ehdr.e_machine == EM_ARM) { + if (!strcmp(elf_name, "$a") || + !strcmp(elf_name, "$d") || + !strcmp(elf_name, "$t")) + continue; + } + + if (runtime_ss->opdsec && sym.st_shndx == runtime_ss->opdidx) { + u32 offset = sym.st_value - syms_ss->opdshdr.sh_addr; + u64 *opd = opddata->d_buf + offset; + sym.st_value = DSO__SWAP(dso, u64, *opd); + sym.st_shndx = elf_addr_to_index(runtime_ss->elf, + sym.st_value); + used_opd = true; + } + + sec = elf_getscn(runtime_ss->elf, sym.st_shndx); + if (!sec) + goto out_elf_end; + + gelf_getshdr(sec, &shdr); + + if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type)) + continue; + + section_name = elf_sec__name(&shdr, secstrs); + + /* On ARM, symbols for thumb functions have 1 added to + * the symbol address as a flag - remove it */ + if ((ehdr.e_machine == EM_ARM) && + (map->type == MAP__FUNCTION) && + (sym.st_value & 1)) + --sym.st_value; + + if (dso->kernel != DSO_TYPE_USER || kmodule) { + char dso_name[PATH_MAX]; + + if (strcmp(section_name, + (curr_dso->short_name + + dso->short_name_len)) == 0) + goto new_symbol; + + if (strcmp(section_name, ".text") == 0) { + curr_map = map; + curr_dso = dso; + goto new_symbol; + } + + snprintf(dso_name, sizeof(dso_name), + "%s%s", dso->short_name, section_name); + + curr_map = map_groups__find_by_name(kmap->kmaps, map->type, dso_name); + if (curr_map == NULL) { + u64 start = sym.st_value; + + if (kmodule) + start += map->start + shdr.sh_offset; + + curr_dso = dso__new(dso_name); + if (curr_dso == NULL) + goto out_elf_end; + curr_dso->kernel = dso->kernel; + curr_dso->long_name = dso->long_name; + curr_dso->long_name_len = dso->long_name_len; + curr_map = map__new2(start, curr_dso, + map->type); + if (curr_map == NULL) { + dso__delete(curr_dso); + goto out_elf_end; + } + curr_map->map_ip = identity__map_ip; + curr_map->unmap_ip = identity__map_ip; + curr_dso->symtab_type = dso->symtab_type; + map_groups__insert(kmap->kmaps, curr_map); + dsos__add(&dso->node, curr_dso); + dso__set_loaded(curr_dso, map->type); + } else + curr_dso = curr_map->dso; + + goto new_symbol; + } + + if ((used_opd && runtime_ss->adjust_symbols) + || (!used_opd && syms_ss->adjust_symbols)) { + pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " + "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, + (u64)sym.st_value, (u64)shdr.sh_addr, + (u64)shdr.sh_offset); + sym.st_value -= shdr.sh_addr - shdr.sh_offset; + } + /* + * We need to figure out if the object was created from C++ sources + * DWARF DW_compile_unit has this, but we don't always have access + * to it... + */ + demangled = bfd_demangle(NULL, elf_name, DMGL_PARAMS | DMGL_ANSI); + if (demangled != NULL) + elf_name = demangled; +new_symbol: + f = symbol__new(sym.st_value, sym.st_size, + GELF_ST_BIND(sym.st_info), elf_name); + free(demangled); + if (!f) + goto out_elf_end; + + if (filter && filter(curr_map, f)) + symbol__delete(f); + else { + symbols__insert(&curr_dso->symbols[curr_map->type], f); + nr++; + } + } + + /* + * For misannotated, zeroed, ASM function sizes. + */ + if (nr > 0) { + symbols__fixup_duplicate(&dso->symbols[map->type]); + symbols__fixup_end(&dso->symbols[map->type]); + if (kmap) { + /* + * We need to fixup this here too because we create new + * maps here, for things like vsyscall sections. + */ + __map_groups__fixup_end(kmap->kmaps, map->type); + } + } + err = nr; +out_elf_end: + return err; +} + +void symbol__elf_init(void) +{ + elf_version(EV_CURRENT); +} diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c new file mode 100644 index 00000000000..259f8f2ea9c --- /dev/null +++ b/tools/perf/util/symbol-minimal.c @@ -0,0 +1,307 @@ +#include "symbol.h" + +#include <elf.h> +#include <stdio.h> +#include <fcntl.h> +#include <string.h> +#include <byteswap.h> +#include <sys/stat.h> + + +static bool check_need_swap(int file_endian) +{ + const int data = 1; + u8 *check = (u8 *)&data; + int host_endian; + + if (check[0] == 1) + host_endian = ELFDATA2LSB; + else + host_endian = ELFDATA2MSB; + + return host_endian != file_endian; +} + +#define NOTE_ALIGN(sz) (((sz) + 3) & ~3) + +#define NT_GNU_BUILD_ID 3 + +static int read_build_id(void *note_data, size_t note_len, void *bf, + size_t size, bool need_swap) +{ + struct { + u32 n_namesz; + u32 n_descsz; + u32 n_type; + } *nhdr; + void *ptr; + + ptr = note_data; + while (ptr < (note_data + note_len)) { + const char *name; + size_t namesz, descsz; + + nhdr = ptr; + if (need_swap) { + nhdr->n_namesz = bswap_32(nhdr->n_namesz); + nhdr->n_descsz = bswap_32(nhdr->n_descsz); + nhdr->n_type = bswap_32(nhdr->n_type); + } + + namesz = NOTE_ALIGN(nhdr->n_namesz); + descsz = NOTE_ALIGN(nhdr->n_descsz); + + ptr += sizeof(*nhdr); + name = ptr; + ptr += namesz; + if (nhdr->n_type == NT_GNU_BUILD_ID && + nhdr->n_namesz == sizeof("GNU")) { + if (memcmp(name, "GNU", sizeof("GNU")) == 0) { + size_t sz = min(size, descsz); + memcpy(bf, ptr, sz); + memset(bf + sz, 0, size - sz); + return 0; + } + } + ptr += descsz; + } + + return -1; +} + +int filename__read_debuglink(const char *filename __maybe_unused, + char *debuglink __maybe_unused, + size_t size __maybe_unused) +{ + return -1; +} + +/* + * Just try PT_NOTE header otherwise fails + */ +int filename__read_build_id(const char *filename, void *bf, size_t size) +{ + FILE *fp; + int ret = -1; + bool need_swap = false; + u8 e_ident[EI_NIDENT]; + size_t buf_size; + void *buf; + int i; + + fp = fopen(filename, "r"); + if (fp == NULL) + return -1; + + if (fread(e_ident, sizeof(e_ident), 1, fp) != 1) + goto out; + + if (memcmp(e_ident, ELFMAG, SELFMAG) || + e_ident[EI_VERSION] != EV_CURRENT) + goto out; + + need_swap = check_need_swap(e_ident[EI_DATA]); + + /* for simplicity */ + fseek(fp, 0, SEEK_SET); + + if (e_ident[EI_CLASS] == ELFCLASS32) { + Elf32_Ehdr ehdr; + Elf32_Phdr *phdr; + + if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) + goto out; + + if (need_swap) { + ehdr.e_phoff = bswap_32(ehdr.e_phoff); + ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); + ehdr.e_phnum = bswap_16(ehdr.e_phnum); + } + + buf_size = ehdr.e_phentsize * ehdr.e_phnum; + buf = malloc(buf_size); + if (buf == NULL) + goto out; + + fseek(fp, ehdr.e_phoff, SEEK_SET); + if (fread(buf, buf_size, 1, fp) != 1) + goto out_free; + + for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { + void *tmp; + + if (need_swap) { + phdr->p_type = bswap_32(phdr->p_type); + phdr->p_offset = bswap_32(phdr->p_offset); + phdr->p_filesz = bswap_32(phdr->p_filesz); + } + + if (phdr->p_type != PT_NOTE) + continue; + + buf_size = phdr->p_filesz; + tmp = realloc(buf, buf_size); + if (tmp == NULL) + goto out_free; + + buf = tmp; + fseek(fp, phdr->p_offset, SEEK_SET); + if (fread(buf, buf_size, 1, fp) != 1) + goto out_free; + + ret = read_build_id(buf, buf_size, bf, size, need_swap); + if (ret == 0) + ret = size; + break; + } + } else { + Elf64_Ehdr ehdr; + Elf64_Phdr *phdr; + + if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) + goto out; + + if (need_swap) { + ehdr.e_phoff = bswap_64(ehdr.e_phoff); + ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); + ehdr.e_phnum = bswap_16(ehdr.e_phnum); + } + + buf_size = ehdr.e_phentsize * ehdr.e_phnum; + buf = malloc(buf_size); + if (buf == NULL) + goto out; + + fseek(fp, ehdr.e_phoff, SEEK_SET); + if (fread(buf, buf_size, 1, fp) != 1) + goto out_free; + + for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { + void *tmp; + + if (need_swap) { + phdr->p_type = bswap_32(phdr->p_type); + phdr->p_offset = bswap_64(phdr->p_offset); + phdr->p_filesz = bswap_64(phdr->p_filesz); + } + + if (phdr->p_type != PT_NOTE) + continue; + + buf_size = phdr->p_filesz; + tmp = realloc(buf, buf_size); + if (tmp == NULL) + goto out_free; + + buf = tmp; + fseek(fp, phdr->p_offset, SEEK_SET); + if (fread(buf, buf_size, 1, fp) != 1) + goto out_free; + + ret = read_build_id(buf, buf_size, bf, size, need_swap); + if (ret == 0) + ret = size; + break; + } + } +out_free: + free(buf); +out: + fclose(fp); + return ret; +} + +int sysfs__read_build_id(const char *filename, void *build_id, size_t size) +{ + int fd; + int ret = -1; + struct stat stbuf; + size_t buf_size; + void *buf; + + fd = open(filename, O_RDONLY); + if (fd < 0) + return -1; + + if (fstat(fd, &stbuf) < 0) + goto out; + + buf_size = stbuf.st_size; + buf = malloc(buf_size); + if (buf == NULL) + goto out; + + if (read(fd, buf, buf_size) != (ssize_t) buf_size) + goto out_free; + + ret = read_build_id(buf, buf_size, build_id, size, false); +out_free: + free(buf); +out: + close(fd); + return ret; +} + +int symsrc__init(struct symsrc *ss, struct dso *dso __maybe_unused, + const char *name, + enum dso_binary_type type) +{ + int fd = open(name, O_RDONLY); + if (fd < 0) + return -1; + + ss->name = strdup(name); + if (!ss->name) + goto out_close; + + ss->type = type; + + return 0; +out_close: + close(fd); + return -1; +} + +bool symsrc__possibly_runtime(struct symsrc *ss __maybe_unused) +{ + /* Assume all sym sources could be a runtime image. */ + return true; +} + +bool symsrc__has_symtab(struct symsrc *ss __maybe_unused) +{ + return false; +} + +void symsrc__destroy(struct symsrc *ss) +{ + free(ss->name); + close(ss->fd); +} + +int dso__synthesize_plt_symbols(struct dso *dso __maybe_unused, + struct symsrc *ss __maybe_unused, + struct map *map __maybe_unused, + symbol_filter_t filter __maybe_unused) +{ + return 0; +} + +int dso__load_sym(struct dso *dso, struct map *map __maybe_unused, + struct symsrc *ss, + struct symsrc *runtime_ss __maybe_unused, + symbol_filter_t filter __maybe_unused, + int kmodule __maybe_unused) +{ + unsigned char *build_id[BUILD_ID_SIZE]; + + if (filename__read_build_id(ss->name, build_id, BUILD_ID_SIZE) > 0) { + dso__set_build_id(dso, build_id); + return 1; + } + return 0; +} + +void symbol__elf_init(void) +{ +} diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 8b63b678e12..e2e8c697cff 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -15,8 +15,6 @@ #include "symbol.h" #include "strlist.h" -#include <libelf.h> -#include <gelf.h> #include <elf.h> #include <limits.h> #include <sys/utsname.h> @@ -25,15 +23,7 @@ #define KSYM_NAME_LEN 256 #endif -#ifndef NT_GNU_BUILD_ID -#define NT_GNU_BUILD_ID 3 -#endif - static void dso_cache__free(struct rb_root *root); -static bool dso__build_id_equal(const struct dso *dso, u8 *build_id); -static int elf_read_build_id(Elf *elf, void *bf, size_t size); -static void dsos__add(struct list_head *head, struct dso *dso); -static struct map *map__new2(u64 start, struct dso *dso, enum map_type type); static int dso__load_kernel_sym(struct dso *dso, struct map *map, symbol_filter_t filter); static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map, @@ -170,7 +160,7 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb) return SYMBOL_B; } -static void symbols__fixup_duplicate(struct rb_root *symbols) +void symbols__fixup_duplicate(struct rb_root *symbols) { struct rb_node *nd; struct symbol *curr, *next; @@ -199,7 +189,7 @@ again: } } -static void symbols__fixup_end(struct rb_root *symbols) +void symbols__fixup_end(struct rb_root *symbols) { struct rb_node *nd, *prevnd = rb_first(symbols); struct symbol *curr, *prev; @@ -222,7 +212,7 @@ static void symbols__fixup_end(struct rb_root *symbols) curr->end = roundup(curr->start, 4096); } -static void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) +void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) { struct map *prev, *curr; struct rb_node *nd, *prevnd = rb_first(&mg->maps[type]); @@ -252,8 +242,7 @@ static void map_groups__fixup_end(struct map_groups *mg) __map_groups__fixup_end(mg, i); } -static struct symbol *symbol__new(u64 start, u64 len, u8 binding, - const char *name) +struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name) { size_t namelen = strlen(name) + 1; struct symbol *sym = calloc(1, (symbol_conf.priv_size + @@ -390,7 +379,7 @@ void dso__set_build_id(struct dso *dso, void *build_id) dso->has_build_id = 1; } -static void symbols__insert(struct rb_root *symbols, struct symbol *sym) +void symbols__insert(struct rb_root *symbols, struct symbol *sym) { struct rb_node **p = &symbols->rb_node; struct rb_node *parent = NULL; @@ -574,7 +563,7 @@ size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp) int kallsyms__parse(const char *filename, void *arg, int (*process_symbol)(void *arg, const char *name, - char type, u64 start, u64 end)) + char type, u64 start)) { char *line = NULL; size_t n; @@ -614,13 +603,8 @@ int kallsyms__parse(const char *filename, void *arg, break; } - /* - * module symbols are not sorted so we add all - * symbols with zero length and rely on - * symbols__fixup_end() to fix it up. - */ err = process_symbol(arg, symbol_name, - symbol_type, start, start); + symbol_type, start); if (err) break; } @@ -647,7 +631,7 @@ static u8 kallsyms2elf_type(char type) } static int map__process_kallsym_symbol(void *arg, const char *name, - char type, u64 start, u64 end) + char type, u64 start) { struct symbol *sym; struct process_kallsyms_args *a = arg; @@ -656,8 +640,12 @@ static int map__process_kallsym_symbol(void *arg, const char *name, if (!symbol_type__is_a(type, a->map->type)) return 0; - sym = symbol__new(start, end - start + 1, - kallsyms2elf_type(type), name); + /* + * module symbols are not sorted so we add all + * symbols, setting length to 0, and rely on + * symbols__fixup_end() to fix it up. + */ + sym = symbol__new(start, 0, kallsyms2elf_type(type), name); if (sym == NULL) return -ENOMEM; /* @@ -904,556 +892,7 @@ out_failure: return -1; } -/** - * elf_symtab__for_each_symbol - iterate thru all the symbols - * - * @syms: struct elf_symtab instance to iterate - * @idx: uint32_t idx - * @sym: GElf_Sym iterator - */ -#define elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) \ - for (idx = 0, gelf_getsym(syms, idx, &sym);\ - idx < nr_syms; \ - idx++, gelf_getsym(syms, idx, &sym)) - -static inline uint8_t elf_sym__type(const GElf_Sym *sym) -{ - return GELF_ST_TYPE(sym->st_info); -} - -static inline int elf_sym__is_function(const GElf_Sym *sym) -{ - return elf_sym__type(sym) == STT_FUNC && - sym->st_name != 0 && - sym->st_shndx != SHN_UNDEF; -} - -static inline bool elf_sym__is_object(const GElf_Sym *sym) -{ - return elf_sym__type(sym) == STT_OBJECT && - sym->st_name != 0 && - sym->st_shndx != SHN_UNDEF; -} - -static inline int elf_sym__is_label(const GElf_Sym *sym) -{ - return elf_sym__type(sym) == STT_NOTYPE && - sym->st_name != 0 && - sym->st_shndx != SHN_UNDEF && - sym->st_shndx != SHN_ABS; -} - -static inline const char *elf_sec__name(const GElf_Shdr *shdr, - const Elf_Data *secstrs) -{ - return secstrs->d_buf + shdr->sh_name; -} - -static inline int elf_sec__is_text(const GElf_Shdr *shdr, - const Elf_Data *secstrs) -{ - return strstr(elf_sec__name(shdr, secstrs), "text") != NULL; -} - -static inline bool elf_sec__is_data(const GElf_Shdr *shdr, - const Elf_Data *secstrs) -{ - return strstr(elf_sec__name(shdr, secstrs), "data") != NULL; -} - -static inline const char *elf_sym__name(const GElf_Sym *sym, - const Elf_Data *symstrs) -{ - return symstrs->d_buf + sym->st_name; -} - -static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, - GElf_Shdr *shp, const char *name, - size_t *idx) -{ - Elf_Scn *sec = NULL; - size_t cnt = 1; - - while ((sec = elf_nextscn(elf, sec)) != NULL) { - char *str; - - gelf_getshdr(sec, shp); - str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name); - if (!strcmp(name, str)) { - if (idx) - *idx = cnt; - break; - } - ++cnt; - } - - return sec; -} - -#define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \ - for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \ - idx < nr_entries; \ - ++idx, pos = gelf_getrel(reldata, idx, &pos_mem)) - -#define elf_section__for_each_rela(reldata, pos, pos_mem, idx, nr_entries) \ - for (idx = 0, pos = gelf_getrela(reldata, 0, &pos_mem); \ - idx < nr_entries; \ - ++idx, pos = gelf_getrela(reldata, idx, &pos_mem)) - -/* - * We need to check if we have a .dynsym, so that we can handle the - * .plt, synthesizing its symbols, that aren't on the symtabs (be it - * .dynsym or .symtab). - * And always look at the original dso, not at debuginfo packages, that - * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS). - */ -static int -dso__synthesize_plt_symbols(struct dso *dso, char *name, struct map *map, - symbol_filter_t filter) -{ - uint32_t nr_rel_entries, idx; - GElf_Sym sym; - u64 plt_offset; - GElf_Shdr shdr_plt; - struct symbol *f; - GElf_Shdr shdr_rel_plt, shdr_dynsym; - Elf_Data *reldata, *syms, *symstrs; - Elf_Scn *scn_plt_rel, *scn_symstrs, *scn_dynsym; - size_t dynsym_idx; - GElf_Ehdr ehdr; - char sympltname[1024]; - Elf *elf; - int nr = 0, symidx, fd, err = 0; - - fd = open(name, O_RDONLY); - if (fd < 0) - goto out; - - elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) - goto out_close; - - if (gelf_getehdr(elf, &ehdr) == NULL) - goto out_elf_end; - - scn_dynsym = elf_section_by_name(elf, &ehdr, &shdr_dynsym, - ".dynsym", &dynsym_idx); - if (scn_dynsym == NULL) - goto out_elf_end; - - scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, - ".rela.plt", NULL); - if (scn_plt_rel == NULL) { - scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, - ".rel.plt", NULL); - if (scn_plt_rel == NULL) - goto out_elf_end; - } - - err = -1; - - if (shdr_rel_plt.sh_link != dynsym_idx) - goto out_elf_end; - - if (elf_section_by_name(elf, &ehdr, &shdr_plt, ".plt", NULL) == NULL) - goto out_elf_end; - - /* - * Fetch the relocation section to find the idxes to the GOT - * and the symbols in the .dynsym they refer to. - */ - reldata = elf_getdata(scn_plt_rel, NULL); - if (reldata == NULL) - goto out_elf_end; - - syms = elf_getdata(scn_dynsym, NULL); - if (syms == NULL) - goto out_elf_end; - - scn_symstrs = elf_getscn(elf, shdr_dynsym.sh_link); - if (scn_symstrs == NULL) - goto out_elf_end; - - symstrs = elf_getdata(scn_symstrs, NULL); - if (symstrs == NULL) - goto out_elf_end; - - nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; - plt_offset = shdr_plt.sh_offset; - - if (shdr_rel_plt.sh_type == SHT_RELA) { - GElf_Rela pos_mem, *pos; - - elf_section__for_each_rela(reldata, pos, pos_mem, idx, - nr_rel_entries) { - symidx = GELF_R_SYM(pos->r_info); - plt_offset += shdr_plt.sh_entsize; - gelf_getsym(syms, symidx, &sym); - snprintf(sympltname, sizeof(sympltname), - "%s@plt", elf_sym__name(&sym, symstrs)); - - f = symbol__new(plt_offset, shdr_plt.sh_entsize, - STB_GLOBAL, sympltname); - if (!f) - goto out_elf_end; - - if (filter && filter(map, f)) - symbol__delete(f); - else { - symbols__insert(&dso->symbols[map->type], f); - ++nr; - } - } - } else if (shdr_rel_plt.sh_type == SHT_REL) { - GElf_Rel pos_mem, *pos; - elf_section__for_each_rel(reldata, pos, pos_mem, idx, - nr_rel_entries) { - symidx = GELF_R_SYM(pos->r_info); - plt_offset += shdr_plt.sh_entsize; - gelf_getsym(syms, symidx, &sym); - snprintf(sympltname, sizeof(sympltname), - "%s@plt", elf_sym__name(&sym, symstrs)); - - f = symbol__new(plt_offset, shdr_plt.sh_entsize, - STB_GLOBAL, sympltname); - if (!f) - goto out_elf_end; - - if (filter && filter(map, f)) - symbol__delete(f); - else { - symbols__insert(&dso->symbols[map->type], f); - ++nr; - } - } - } - - err = 0; -out_elf_end: - elf_end(elf); -out_close: - close(fd); - - if (err == 0) - return nr; -out: - pr_debug("%s: problems reading %s PLT info.\n", - __func__, dso->long_name); - return 0; -} - -static bool elf_sym__is_a(GElf_Sym *sym, enum map_type type) -{ - switch (type) { - case MAP__FUNCTION: - return elf_sym__is_function(sym); - case MAP__VARIABLE: - return elf_sym__is_object(sym); - default: - return false; - } -} - -static bool elf_sec__is_a(GElf_Shdr *shdr, Elf_Data *secstrs, - enum map_type type) -{ - switch (type) { - case MAP__FUNCTION: - return elf_sec__is_text(shdr, secstrs); - case MAP__VARIABLE: - return elf_sec__is_data(shdr, secstrs); - default: - return false; - } -} - -static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr) -{ - Elf_Scn *sec = NULL; - GElf_Shdr shdr; - size_t cnt = 1; - - while ((sec = elf_nextscn(elf, sec)) != NULL) { - gelf_getshdr(sec, &shdr); - - if ((addr >= shdr.sh_addr) && - (addr < (shdr.sh_addr + shdr.sh_size))) - return cnt; - - ++cnt; - } - - return -1; -} - -static int dso__swap_init(struct dso *dso, unsigned char eidata) -{ - static unsigned int const endian = 1; - - dso->needs_swap = DSO_SWAP__NO; - - switch (eidata) { - case ELFDATA2LSB: - /* We are big endian, DSO is little endian. */ - if (*(unsigned char const *)&endian != 1) - dso->needs_swap = DSO_SWAP__YES; - break; - - case ELFDATA2MSB: - /* We are little endian, DSO is big endian. */ - if (*(unsigned char const *)&endian != 0) - dso->needs_swap = DSO_SWAP__YES; - break; - - default: - pr_err("unrecognized DSO data encoding %d\n", eidata); - return -EINVAL; - } - - return 0; -} - -static int dso__load_sym(struct dso *dso, struct map *map, const char *name, - int fd, symbol_filter_t filter, int kmodule, - int want_symtab) -{ - struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL; - struct map *curr_map = map; - struct dso *curr_dso = dso; - Elf_Data *symstrs, *secstrs; - uint32_t nr_syms; - int err = -1; - uint32_t idx; - GElf_Ehdr ehdr; - GElf_Shdr shdr, opdshdr; - Elf_Data *syms, *opddata = NULL; - GElf_Sym sym; - Elf_Scn *sec, *sec_strndx, *opdsec; - Elf *elf; - int nr = 0; - size_t opdidx = 0; - - elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) { - pr_debug("%s: cannot read %s ELF file.\n", __func__, name); - goto out_close; - } - - if (gelf_getehdr(elf, &ehdr) == NULL) { - pr_debug("%s: cannot get elf header.\n", __func__); - goto out_elf_end; - } - - if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) - goto out_elf_end; - - /* Always reject images with a mismatched build-id: */ - if (dso->has_build_id) { - u8 build_id[BUILD_ID_SIZE]; - - if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) - goto out_elf_end; - - if (!dso__build_id_equal(dso, build_id)) - goto out_elf_end; - } - - sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL); - if (sec == NULL) { - if (want_symtab) - goto out_elf_end; - - sec = elf_section_by_name(elf, &ehdr, &shdr, ".dynsym", NULL); - if (sec == NULL) - goto out_elf_end; - } - - opdsec = elf_section_by_name(elf, &ehdr, &opdshdr, ".opd", &opdidx); - if (opdshdr.sh_type != SHT_PROGBITS) - opdsec = NULL; - if (opdsec) - opddata = elf_rawdata(opdsec, NULL); - - syms = elf_getdata(sec, NULL); - if (syms == NULL) - goto out_elf_end; - - sec = elf_getscn(elf, shdr.sh_link); - if (sec == NULL) - goto out_elf_end; - - symstrs = elf_getdata(sec, NULL); - if (symstrs == NULL) - goto out_elf_end; - - sec_strndx = elf_getscn(elf, ehdr.e_shstrndx); - if (sec_strndx == NULL) - goto out_elf_end; - - secstrs = elf_getdata(sec_strndx, NULL); - if (secstrs == NULL) - goto out_elf_end; - - nr_syms = shdr.sh_size / shdr.sh_entsize; - - memset(&sym, 0, sizeof(sym)); - if (dso->kernel == DSO_TYPE_USER) { - dso->adjust_symbols = (ehdr.e_type == ET_EXEC || - elf_section_by_name(elf, &ehdr, &shdr, - ".gnu.prelink_undo", - NULL) != NULL); - } else { - dso->adjust_symbols = 0; - } - elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) { - struct symbol *f; - const char *elf_name = elf_sym__name(&sym, symstrs); - char *demangled = NULL; - int is_label = elf_sym__is_label(&sym); - const char *section_name; - - if (kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name && - strcmp(elf_name, kmap->ref_reloc_sym->name) == 0) - kmap->ref_reloc_sym->unrelocated_addr = sym.st_value; - - if (!is_label && !elf_sym__is_a(&sym, map->type)) - continue; - - /* Reject ARM ELF "mapping symbols": these aren't unique and - * don't identify functions, so will confuse the profile - * output: */ - if (ehdr.e_machine == EM_ARM) { - if (!strcmp(elf_name, "$a") || - !strcmp(elf_name, "$d") || - !strcmp(elf_name, "$t")) - continue; - } - - if (opdsec && sym.st_shndx == opdidx) { - u32 offset = sym.st_value - opdshdr.sh_addr; - u64 *opd = opddata->d_buf + offset; - sym.st_value = DSO__SWAP(dso, u64, *opd); - sym.st_shndx = elf_addr_to_index(elf, sym.st_value); - } - - sec = elf_getscn(elf, sym.st_shndx); - if (!sec) - goto out_elf_end; - - gelf_getshdr(sec, &shdr); - - if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type)) - continue; - - section_name = elf_sec__name(&shdr, secstrs); - - /* On ARM, symbols for thumb functions have 1 added to - * the symbol address as a flag - remove it */ - if ((ehdr.e_machine == EM_ARM) && - (map->type == MAP__FUNCTION) && - (sym.st_value & 1)) - --sym.st_value; - - if (dso->kernel != DSO_TYPE_USER || kmodule) { - char dso_name[PATH_MAX]; - - if (strcmp(section_name, - (curr_dso->short_name + - dso->short_name_len)) == 0) - goto new_symbol; - - if (strcmp(section_name, ".text") == 0) { - curr_map = map; - curr_dso = dso; - goto new_symbol; - } - - snprintf(dso_name, sizeof(dso_name), - "%s%s", dso->short_name, section_name); - - curr_map = map_groups__find_by_name(kmap->kmaps, map->type, dso_name); - if (curr_map == NULL) { - u64 start = sym.st_value; - - if (kmodule) - start += map->start + shdr.sh_offset; - - curr_dso = dso__new(dso_name); - if (curr_dso == NULL) - goto out_elf_end; - curr_dso->kernel = dso->kernel; - curr_dso->long_name = dso->long_name; - curr_dso->long_name_len = dso->long_name_len; - curr_map = map__new2(start, curr_dso, - map->type); - if (curr_map == NULL) { - dso__delete(curr_dso); - goto out_elf_end; - } - curr_map->map_ip = identity__map_ip; - curr_map->unmap_ip = identity__map_ip; - curr_dso->symtab_type = dso->symtab_type; - map_groups__insert(kmap->kmaps, curr_map); - dsos__add(&dso->node, curr_dso); - dso__set_loaded(curr_dso, map->type); - } else - curr_dso = curr_map->dso; - - goto new_symbol; - } - - if (curr_dso->adjust_symbols) { - pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " - "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, - (u64)sym.st_value, (u64)shdr.sh_addr, - (u64)shdr.sh_offset); - sym.st_value -= shdr.sh_addr - shdr.sh_offset; - } - /* - * We need to figure out if the object was created from C++ sources - * DWARF DW_compile_unit has this, but we don't always have access - * to it... - */ - demangled = bfd_demangle(NULL, elf_name, DMGL_PARAMS | DMGL_ANSI); - if (demangled != NULL) - elf_name = demangled; -new_symbol: - f = symbol__new(sym.st_value, sym.st_size, - GELF_ST_BIND(sym.st_info), elf_name); - free(demangled); - if (!f) - goto out_elf_end; - - if (filter && filter(curr_map, f)) - symbol__delete(f); - else { - symbols__insert(&curr_dso->symbols[curr_map->type], f); - nr++; - } - } - - /* - * For misannotated, zeroed, ASM function sizes. - */ - if (nr > 0) { - symbols__fixup_duplicate(&dso->symbols[map->type]); - symbols__fixup_end(&dso->symbols[map->type]); - if (kmap) { - /* - * We need to fixup this here too because we create new - * maps here, for things like vsyscall sections. - */ - __map_groups__fixup_end(kmap->kmaps, map->type); - } - } - err = nr; -out_elf_end: - elf_end(elf); -out_close: - return err; -} - -static bool dso__build_id_equal(const struct dso *dso, u8 *build_id) +bool dso__build_id_equal(const struct dso *dso, u8 *build_id) { return memcmp(dso->build_id, build_id, sizeof(dso->build_id)) == 0; } @@ -1480,216 +919,11 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) return have_build_id; } -/* - * Align offset to 4 bytes as needed for note name and descriptor data. - */ -#define NOTE_ALIGN(n) (((n) + 3) & -4U) - -static int elf_read_build_id(Elf *elf, void *bf, size_t size) -{ - int err = -1; - GElf_Ehdr ehdr; - GElf_Shdr shdr; - Elf_Data *data; - Elf_Scn *sec; - Elf_Kind ek; - void *ptr; - - if (size < BUILD_ID_SIZE) - goto out; - - ek = elf_kind(elf); - if (ek != ELF_K_ELF) - goto out; - - if (gelf_getehdr(elf, &ehdr) == NULL) { - pr_err("%s: cannot get elf header.\n", __func__); - goto out; - } - - /* - * Check following sections for notes: - * '.note.gnu.build-id' - * '.notes' - * '.note' (VDSO specific) - */ - do { - sec = elf_section_by_name(elf, &ehdr, &shdr, - ".note.gnu.build-id", NULL); - if (sec) - break; - - sec = elf_section_by_name(elf, &ehdr, &shdr, - ".notes", NULL); - if (sec) - break; - - sec = elf_section_by_name(elf, &ehdr, &shdr, - ".note", NULL); - if (sec) - break; - - return err; - - } while (0); - - data = elf_getdata(sec, NULL); - if (data == NULL) - goto out; - - ptr = data->d_buf; - while (ptr < (data->d_buf + data->d_size)) { - GElf_Nhdr *nhdr = ptr; - size_t namesz = NOTE_ALIGN(nhdr->n_namesz), - descsz = NOTE_ALIGN(nhdr->n_descsz); - const char *name; - - ptr += sizeof(*nhdr); - name = ptr; - ptr += namesz; - if (nhdr->n_type == NT_GNU_BUILD_ID && - nhdr->n_namesz == sizeof("GNU")) { - if (memcmp(name, "GNU", sizeof("GNU")) == 0) { - size_t sz = min(size, descsz); - memcpy(bf, ptr, sz); - memset(bf + sz, 0, size - sz); - err = descsz; - break; - } - } - ptr += descsz; - } - -out: - return err; -} - -int filename__read_build_id(const char *filename, void *bf, size_t size) -{ - int fd, err = -1; - Elf *elf; - - if (size < BUILD_ID_SIZE) - goto out; - - fd = open(filename, O_RDONLY); - if (fd < 0) - goto out; - - elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) { - pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename); - goto out_close; - } - - err = elf_read_build_id(elf, bf, size); - - elf_end(elf); -out_close: - close(fd); -out: - return err; -} - -int sysfs__read_build_id(const char *filename, void *build_id, size_t size) -{ - int fd, err = -1; - - if (size < BUILD_ID_SIZE) - goto out; - - fd = open(filename, O_RDONLY); - if (fd < 0) - goto out; - - while (1) { - char bf[BUFSIZ]; - GElf_Nhdr nhdr; - size_t namesz, descsz; - - if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr)) - break; - - namesz = NOTE_ALIGN(nhdr.n_namesz); - descsz = NOTE_ALIGN(nhdr.n_descsz); - if (nhdr.n_type == NT_GNU_BUILD_ID && - nhdr.n_namesz == sizeof("GNU")) { - if (read(fd, bf, namesz) != (ssize_t)namesz) - break; - if (memcmp(bf, "GNU", sizeof("GNU")) == 0) { - size_t sz = min(descsz, size); - if (read(fd, build_id, sz) == (ssize_t)sz) { - memset(build_id + sz, 0, size - sz); - err = 0; - break; - } - } else if (read(fd, bf, descsz) != (ssize_t)descsz) - break; - } else { - int n = namesz + descsz; - if (read(fd, bf, n) != n) - break; - } - } - close(fd); -out: - return err; -} - -static int filename__read_debuglink(const char *filename, - char *debuglink, size_t size) -{ - int fd, err = -1; - Elf *elf; - GElf_Ehdr ehdr; - GElf_Shdr shdr; - Elf_Data *data; - Elf_Scn *sec; - Elf_Kind ek; - - fd = open(filename, O_RDONLY); - if (fd < 0) - goto out; - - elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) { - pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename); - goto out_close; - } - - ek = elf_kind(elf); - if (ek != ELF_K_ELF) - goto out_close; - - if (gelf_getehdr(elf, &ehdr) == NULL) { - pr_err("%s: cannot get elf header.\n", __func__); - goto out_close; - } - - sec = elf_section_by_name(elf, &ehdr, &shdr, - ".gnu_debuglink", NULL); - if (sec == NULL) - goto out_close; - - data = elf_getdata(sec, NULL); - if (data == NULL) - goto out_close; - - /* the start of this section is a zero-terminated string */ - strncpy(debuglink, data->d_buf, size); - - elf_end(elf); - -out_close: - close(fd); -out: - return err; -} - char dso__symtab_origin(const struct dso *dso) { static const char origin[] = { [DSO_BINARY_TYPE__KALLSYMS] = 'k', + [DSO_BINARY_TYPE__VMLINUX] = 'v', [DSO_BINARY_TYPE__JAVA_JIT] = 'j', [DSO_BINARY_TYPE__DEBUGLINK] = 'l', [DSO_BINARY_TYPE__BUILD_ID_CACHE] = 'B', @@ -1700,6 +934,7 @@ char dso__symtab_origin(const struct dso *dso) [DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE] = 'K', [DSO_BINARY_TYPE__GUEST_KALLSYMS] = 'g', [DSO_BINARY_TYPE__GUEST_KMODULE] = 'G', + [DSO_BINARY_TYPE__GUEST_VMLINUX] = 'V', }; if (dso == NULL || dso->symtab_type == DSO_BINARY_TYPE__NOT_FOUND) @@ -1775,7 +1010,9 @@ int dso__binary_type_file(struct dso *dso, enum dso_binary_type type, default: case DSO_BINARY_TYPE__KALLSYMS: + case DSO_BINARY_TYPE__VMLINUX: case DSO_BINARY_TYPE__GUEST_KALLSYMS: + case DSO_BINARY_TYPE__GUEST_VMLINUX: case DSO_BINARY_TYPE__JAVA_JIT: case DSO_BINARY_TYPE__NOT_FOUND: ret = -1; @@ -1789,11 +1026,12 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) { char *name; int ret = -1; - int fd; u_int i; struct machine *machine; char *root_dir = (char *) ""; - int want_symtab; + int ss_pos = 0; + struct symsrc ss_[2]; + struct symsrc *syms_ss = NULL, *runtime_ss = NULL; dso__set_loaded(dso, map->type); @@ -1835,54 +1073,69 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) root_dir = machine->root_dir; /* Iterate over candidate debug images. - * On the first pass, only load images if they have a full symtab. - * Failing that, do a second pass where we accept .dynsym also + * Keep track of "interesting" ones (those which have a symtab, dynsym, + * and/or opd section) for processing. */ - want_symtab = 1; -restart: for (i = 0; i < DSO_BINARY_TYPE__SYMTAB_CNT; i++) { + struct symsrc *ss = &ss_[ss_pos]; + bool next_slot = false; - dso->symtab_type = binary_type_symtab[i]; + enum dso_binary_type symtab_type = binary_type_symtab[i]; - if (dso__binary_type_file(dso, dso->symtab_type, + if (dso__binary_type_file(dso, symtab_type, root_dir, name, PATH_MAX)) continue; /* Name is now the name of the next image to try */ - fd = open(name, O_RDONLY); - if (fd < 0) + if (symsrc__init(ss, dso, name, symtab_type) < 0) continue; - ret = dso__load_sym(dso, map, name, fd, filter, 0, - want_symtab); - close(fd); + if (!syms_ss && symsrc__has_symtab(ss)) { + syms_ss = ss; + next_slot = true; + } - /* - * Some people seem to have debuginfo files _WITHOUT_ debug - * info!?!? - */ - if (!ret) - continue; + if (!runtime_ss && symsrc__possibly_runtime(ss)) { + runtime_ss = ss; + next_slot = true; + } - if (ret > 0) { - int nr_plt; + if (next_slot) { + ss_pos++; - nr_plt = dso__synthesize_plt_symbols(dso, name, map, filter); - if (nr_plt > 0) - ret += nr_plt; - break; + if (syms_ss && runtime_ss) + break; } + } - /* - * If we wanted a full symtab but no image had one, - * relax our requirements and repeat the search. - */ - if (ret <= 0 && want_symtab) { - want_symtab = 0; - goto restart; + if (!runtime_ss && !syms_ss) + goto out_free; + + if (runtime_ss && !syms_ss) { + syms_ss = runtime_ss; + } + + /* We'll have to hope for the best */ + if (!runtime_ss && syms_ss) + runtime_ss = syms_ss; + + if (syms_ss) + ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, 0); + else + ret = -1; + + if (ret > 0) { + int nr_plt; + + nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss, map, filter); + if (nr_plt > 0) + ret += nr_plt; } + for (; ss_pos > 0; ss_pos--) + symsrc__destroy(&ss_[ss_pos - 1]); +out_free: free(name); if (ret < 0 && strstr(dso->name, " (deleted)") != NULL) return 0; @@ -2030,25 +1283,6 @@ static int machine__set_modules_path(struct machine *machine) return map_groups__set_modules_path_dir(&machine->kmaps, modules_path); } -/* - * Constructor variant for modules (where we know from /proc/modules where - * they are loaded) and for vmlinux, where only after we load all the - * symbols we'll know where it starts and ends. - */ -static struct map *map__new2(u64 start, struct dso *dso, enum map_type type) -{ - struct map *map = calloc(1, (sizeof(*map) + - (dso->kernel ? sizeof(struct kmap) : 0))); - if (map != NULL) { - /* - * ->end will be filled after we load all the symbols - */ - map__init(map, type, start, 0, 0, dso); - } - - return map; -} - struct map *machine__new_module(struct machine *machine, u64 start, const char *filename) { @@ -2141,22 +1375,30 @@ out_failure: int dso__load_vmlinux(struct dso *dso, struct map *map, const char *vmlinux, symbol_filter_t filter) { - int err = -1, fd; + int err = -1; + struct symsrc ss; char symfs_vmlinux[PATH_MAX]; + enum dso_binary_type symtab_type; snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s%s", symbol_conf.symfs, vmlinux); - fd = open(symfs_vmlinux, O_RDONLY); - if (fd < 0) + + if (dso->kernel == DSO_TYPE_GUEST_KERNEL) + symtab_type = DSO_BINARY_TYPE__GUEST_VMLINUX; + else + symtab_type = DSO_BINARY_TYPE__VMLINUX; + + if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type)) return -1; - dso__set_long_name(dso, (char *)vmlinux); - dso__set_loaded(dso, map->type); - err = dso__load_sym(dso, map, symfs_vmlinux, fd, filter, 0, 0); - close(fd); + err = dso__load_sym(dso, map, &ss, &ss, filter, 0); + symsrc__destroy(&ss); - if (err > 0) + if (err > 0) { + dso__set_long_name(dso, (char *)vmlinux); + dso__set_loaded(dso, map->type); pr_debug("Using %s for symbols\n", symfs_vmlinux); + } return err; } @@ -2173,10 +1415,8 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map, filename = dso__build_id_filename(dso, NULL, 0); if (filename != NULL) { err = dso__load_vmlinux(dso, map, filename, filter); - if (err > 0) { - dso__set_long_name(dso, filename); + if (err > 0) goto out; - } free(filename); } @@ -2291,9 +1531,8 @@ do_kallsyms: free(kallsyms_allocated_filename); if (err > 0) { + dso__set_long_name(dso, strdup("[kernel.kallsyms]")); out_fixup: - if (kallsyms_filename != NULL) - dso__set_long_name(dso, strdup("[kernel.kallsyms]")); map__fixup_start(map); map__fixup_end(map); } @@ -2352,12 +1591,12 @@ out_try_fixup: return err; } -static void dsos__add(struct list_head *head, struct dso *dso) +void dsos__add(struct list_head *head, struct dso *dso) { list_add_tail(&dso->node, head); } -static struct dso *dsos__find(struct list_head *head, const char *name) +struct dso *dsos__find(struct list_head *head, const char *name) { struct dso *pos; @@ -2516,7 +1755,7 @@ struct process_args { }; static int symbol__in_kernel(void *arg, const char *name, - char type __used, u64 start, u64 end __used) + char type __maybe_unused, u64 start) { struct process_args *args = arg; @@ -2752,9 +1991,10 @@ int symbol__init(void) if (symbol_conf.initialized) return 0; - symbol_conf.priv_size = ALIGN(symbol_conf.priv_size, sizeof(u64)); + symbol_conf.priv_size = PERF_ALIGN(symbol_conf.priv_size, sizeof(u64)); + + symbol__elf_init(); - elf_version(EV_CURRENT); if (symbol_conf.sort_by_name) symbol_conf.priv_size += (sizeof(struct symbol_name_rb_node) - sizeof(struct symbol)); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 1fe733a1e21..b441b07172b 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -10,22 +10,31 @@ #include <linux/rbtree.h> #include <stdio.h> #include <byteswap.h> +#include <libgen.h> + +#ifndef NO_LIBELF_SUPPORT +#include <libelf.h> +#include <gelf.h> +#include <elf.h> +#endif #ifdef HAVE_CPLUS_DEMANGLE extern char *cplus_demangle(const char *, int); -static inline char *bfd_demangle(void __used *v, const char *c, int i) +static inline char *bfd_demangle(void __maybe_unused *v, const char *c, int i) { return cplus_demangle(c, i); } #else #ifdef NO_DEMANGLE -static inline char *bfd_demangle(void __used *v, const char __used *c, - int __used i) +static inline char *bfd_demangle(void __maybe_unused *v, + const char __maybe_unused *c, + int __maybe_unused i) { return NULL; } #else +#define PACKAGE 'perf' #include <bfd.h> #endif #endif @@ -158,6 +167,8 @@ struct addr_location { enum dso_binary_type { DSO_BINARY_TYPE__KALLSYMS = 0, DSO_BINARY_TYPE__GUEST_KALLSYMS, + DSO_BINARY_TYPE__VMLINUX, + DSO_BINARY_TYPE__GUEST_VMLINUX, DSO_BINARY_TYPE__JAVA_JIT, DSO_BINARY_TYPE__DEBUGLINK, DSO_BINARY_TYPE__BUILD_ID_CACHE, @@ -217,6 +228,36 @@ struct dso { char name[0]; }; +struct symsrc { + char *name; + int fd; + enum dso_binary_type type; + +#ifndef NO_LIBELF_SUPPORT + Elf *elf; + GElf_Ehdr ehdr; + + Elf_Scn *opdsec; + size_t opdidx; + GElf_Shdr opdshdr; + + Elf_Scn *symtab; + GElf_Shdr symshdr; + + Elf_Scn *dynsym; + size_t dynsym_idx; + GElf_Shdr dynshdr; + + bool adjust_symbols; +#endif +}; + +void symsrc__destroy(struct symsrc *ss); +int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, + enum dso_binary_type type); +bool symsrc__has_symtab(struct symsrc *ss); +bool symsrc__possibly_runtime(struct symsrc *ss); + #define DSO__SWAP(dso, type, val) \ ({ \ type ____r = val; \ @@ -254,6 +295,8 @@ static inline void dso__set_loaded(struct dso *dso, enum map_type type) void dso__sort_by_name(struct dso *dso, enum map_type type); +void dsos__add(struct list_head *head, struct dso *dso); +struct dso *dsos__find(struct list_head *head, const char *name); struct dso *__dsos__findnew(struct list_head *head, const char *name); int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter); @@ -283,6 +326,7 @@ size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp); char dso__symtab_origin(const struct dso *dso); void dso__set_long_name(struct dso *dso, char *name); void dso__set_build_id(struct dso *dso, void *build_id); +bool dso__build_id_equal(const struct dso *dso, u8 *build_id); void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine); struct map *dso__new_map(const char *name); @@ -297,7 +341,9 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits); int build_id__sprintf(const u8 *build_id, int len, char *bf); int kallsyms__parse(const char *filename, void *arg, int (*process_symbol)(void *arg, const char *name, - char type, u64 start, u64 end)); + char type, u64 start)); +int filename__read_debuglink(const char *filename, char *debuglink, + size_t size); void machine__destroy_kernel_maps(struct machine *machine); int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel); @@ -309,6 +355,8 @@ void machines__destroy_guest_kernel_maps(struct rb_root *machines); int symbol__init(void); void symbol__exit(void); +void symbol__elf_init(void); +struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name); size_t symbol__fprintf_symname_offs(const struct symbol *sym, const struct addr_location *al, FILE *fp); size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp); @@ -326,4 +374,15 @@ ssize_t dso__data_read_addr(struct dso *dso, struct map *map, struct machine *machine, u64 addr, u8 *data, ssize_t size); int dso__test_data(void); +int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, + struct symsrc *runtime_ss, symbol_filter_t filter, + int kmodule); +int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, + struct map *map, symbol_filter_t filter); + +void symbols__insert(struct rb_root *symbols, struct symbol *sym); +void symbols__fixup_duplicate(struct rb_root *symbols); +void symbols__fixup_end(struct rb_root *symbols); +void __map_groups__fixup_end(struct map_groups *mg, enum map_type type); + #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c index 051eaa68095..065528b7563 100644 --- a/tools/perf/util/target.c +++ b/tools/perf/util/target.c @@ -117,8 +117,8 @@ int perf_target__strerror(struct perf_target *target, int errnum, if (err != buf) { size_t len = strlen(err); - char *c = mempcpy(buf, err, min(buflen - 1, len)); - *c = '\0'; + memcpy(buf, err, min(buflen - 1, len)); + *(buf + min(buflen - 1, len)) = '\0'; } return 0; diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 70c2c13ff67..f66610b7bac 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -16,6 +16,8 @@ struct thread { bool comm_set; char *comm; int comm_len; + + void *priv; }; struct machine; diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 7eeebcee291..884dde9b9bc 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -58,8 +58,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) } if (top->evlist->nr_entries == 1) { - struct perf_evsel *first; - first = list_entry(top->evlist->entries.next, struct perf_evsel, node); + struct perf_evsel *first = perf_evlist__first(top->evlist); ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ", (uint64_t)first->attr.sample_period, top->freq ? "Hz" : ""); diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 33347ca89ee..86ff1b15059 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -5,6 +5,7 @@ #include "types.h" #include <stddef.h> #include <stdbool.h> +#include <termios.h> struct perf_evlist; struct perf_evsel; diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 0715c843c2e..3aabcd687cd 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -162,25 +162,16 @@ int trace_parse_common_pid(struct pevent *pevent, void *data) return pevent_data_pid(pevent, &record); } -unsigned long long read_size(struct pevent *pevent, void *ptr, int size) +unsigned long long read_size(struct event_format *event, void *ptr, int size) { - return pevent_read_number(pevent, ptr, size); + return pevent_read_number(event->pevent, ptr, size); } -void print_trace_event(struct pevent *pevent, int cpu, void *data, int size) +void event_format__print(struct event_format *event, + int cpu, void *data, int size) { - struct event_format *event; struct pevent_record record; struct trace_seq s; - int type; - - type = trace_parse_common_type(pevent, data); - - event = pevent_find_event(pevent, type); - if (!event) { - warning("ug! no event found for type %d", type); - return; - } memset(&record, 0, sizeof(record)); record.cpu = cpu; @@ -192,6 +183,19 @@ void print_trace_event(struct pevent *pevent, int cpu, void *data, int size) trace_seq_do_printf(&s); } +void print_trace_event(struct pevent *pevent, int cpu, void *data, int size) +{ + int type = trace_parse_common_type(pevent, data); + struct event_format *event = pevent_find_event(pevent, type); + + if (!event) { + warning("ug! no event found for type %d", type); + return; + } + + event_format__print(event, cpu, data, size); +} + void print_event(struct pevent *pevent, int cpu, void *data, int size, unsigned long long nsecs, char *comm) { @@ -217,7 +221,7 @@ void print_event(struct pevent *pevent, int cpu, void *data, int size, } void parse_proc_kallsyms(struct pevent *pevent, - char *file, unsigned int size __unused) + char *file, unsigned int size __maybe_unused) { unsigned long long addr; char *func; @@ -225,31 +229,29 @@ void parse_proc_kallsyms(struct pevent *pevent, char *next = NULL; char *addr_str; char *mod; - char ch; + char *fmt; line = strtok_r(file, "\n", &next); while (line) { mod = NULL; - sscanf(line, "%as %c %as\t[%as", - (float *)(void *)&addr_str, /* workaround gcc warning */ - &ch, (float *)(void *)&func, (float *)(void *)&mod); + addr_str = strtok_r(line, " ", &fmt); addr = strtoull(addr_str, NULL, 16); - free(addr_str); - - /* truncate the extra ']' */ + /* skip character */ + strtok_r(NULL, " ", &fmt); + func = strtok_r(NULL, "\t", &fmt); + mod = strtok_r(NULL, "]", &fmt); + /* truncate the extra '[' */ if (mod) - mod[strlen(mod) - 1] = 0; + mod = mod + 1; pevent_register_function(pevent, func, addr, mod); - free(func); - free(mod); line = strtok_r(NULL, "\n", &next); } } void parse_ftrace_printk(struct pevent *pevent, - char *file, unsigned int size __unused) + char *file, unsigned int size __maybe_unused) { unsigned long long addr; char *printk; @@ -289,7 +291,7 @@ struct event_format *trace_find_next_event(struct pevent *pevent, { static int idx; - if (!pevent->events) + if (!pevent || !pevent->events) return NULL; if (!event) { diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 474aa7a7df4..8715a1006d0 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -35,12 +35,11 @@ static int stop_script_unsupported(void) return 0; } -static void process_event_unsupported(union perf_event *event __unused, - struct pevent *pevent __unused, - struct perf_sample *sample __unused, - struct perf_evsel *evsel __unused, - struct machine *machine __unused, - struct thread *thread __unused) +static void process_event_unsupported(union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct perf_evsel *evsel __maybe_unused, + struct machine *machine __maybe_unused, + struct addr_location *al __maybe_unused) { } @@ -53,17 +52,19 @@ static void print_python_unsupported_msg(void) "\n etc.\n"); } -static int python_start_script_unsupported(const char *script __unused, - int argc __unused, - const char **argv __unused) +static int python_start_script_unsupported(const char *script __maybe_unused, + int argc __maybe_unused, + const char **argv __maybe_unused) { print_python_unsupported_msg(); return -1; } -static int python_generate_script_unsupported(struct pevent *pevent __unused, - const char *outfile __unused) +static int python_generate_script_unsupported(struct pevent *pevent + __maybe_unused, + const char *outfile + __maybe_unused) { print_python_unsupported_msg(); @@ -115,17 +116,18 @@ static void print_perl_unsupported_msg(void) "\n etc.\n"); } -static int perl_start_script_unsupported(const char *script __unused, - int argc __unused, - const char **argv __unused) +static int perl_start_script_unsupported(const char *script __maybe_unused, + int argc __maybe_unused, + const char **argv __maybe_unused) { print_perl_unsupported_msg(); return -1; } -static int perl_generate_script_unsupported(struct pevent *pevent __unused, - const char *outfile __unused) +static int perl_generate_script_unsupported(struct pevent *pevent + __maybe_unused, + const char *outfile __maybe_unused) { print_perl_unsupported_msg(); diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 8fef1d6687b..a55fd37ffea 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -9,7 +9,6 @@ struct machine; struct perf_sample; union perf_event; struct perf_tool; -struct thread; extern int header_page_size_size; extern int header_page_ts_size; @@ -32,6 +31,8 @@ int bigendian(void); struct pevent *read_trace_init(int file_bigendian, int host_bigendian); void print_trace_event(struct pevent *pevent, int cpu, void *data, int size); +void event_format__print(struct event_format *event, + int cpu, void *data, int size); void print_event(struct pevent *pevent, int cpu, void *data, int size, unsigned long long nsecs, char *comm); @@ -56,7 +57,7 @@ int trace_parse_common_pid(struct pevent *pevent, void *data); struct event_format *trace_find_next_event(struct pevent *pevent, struct event_format *event); -unsigned long long read_size(struct pevent *pevent, void *ptr, int size); +unsigned long long read_size(struct event_format *event, void *ptr, int size); unsigned long long eval_flag(const char *flag); struct pevent_record *trace_read_data(struct pevent *pevent, int cpu); @@ -74,16 +75,19 @@ struct tracing_data *tracing_data_get(struct list_head *pattrs, void tracing_data_put(struct tracing_data *tdata); +struct addr_location; + +struct perf_session; + struct scripting_ops { const char *name; int (*start_script) (const char *script, int argc, const char **argv); int (*stop_script) (void); void (*process_event) (union perf_event *event, - struct pevent *pevent, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, - struct thread *thread); + struct addr_location *al); int (*generate_script) (struct pevent *pevent, const char *outfile); }; diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c new file mode 100644 index 00000000000..958723ba3d2 --- /dev/null +++ b/tools/perf/util/unwind.c @@ -0,0 +1,571 @@ +/* + * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps. + * + * Lots of this code have been borrowed or heavily inspired from parts of + * the libunwind 0.99 code which are (amongst other contributors I may have + * forgotten): + * + * Copyright (C) 2002-2007 Hewlett-Packard Co + * Contributed by David Mosberger-Tang <davidm@hpl.hp.com> + * + * And the bugs have been added by: + * + * Copyright (C) 2010, Frederic Weisbecker <fweisbec@gmail.com> + * Copyright (C) 2012, Jiri Olsa <jolsa@redhat.com> + * + */ + +#include <elf.h> +#include <gelf.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <linux/list.h> +#include <libunwind.h> +#include <libunwind-ptrace.h> +#include "thread.h" +#include "session.h" +#include "perf_regs.h" +#include "unwind.h" +#include "util.h" + +extern int +UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, + unw_word_t ip, + unw_dyn_info_t *di, + unw_proc_info_t *pi, + int need_unwind_info, void *arg); + +#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) + +#define DW_EH_PE_FORMAT_MASK 0x0f /* format of the encoded value */ +#define DW_EH_PE_APPL_MASK 0x70 /* how the value is to be applied */ + +/* Pointer-encoding formats: */ +#define DW_EH_PE_omit 0xff +#define DW_EH_PE_ptr 0x00 /* pointer-sized unsigned value */ +#define DW_EH_PE_udata4 0x03 /* unsigned 32-bit value */ +#define DW_EH_PE_udata8 0x04 /* unsigned 64-bit value */ +#define DW_EH_PE_sdata4 0x0b /* signed 32-bit value */ +#define DW_EH_PE_sdata8 0x0c /* signed 64-bit value */ + +/* Pointer-encoding application: */ +#define DW_EH_PE_absptr 0x00 /* absolute value */ +#define DW_EH_PE_pcrel 0x10 /* rel. to addr. of encoded value */ + +/* + * The following are not documented by LSB v1.3, yet they are used by + * GCC, presumably they aren't documented by LSB since they aren't + * used on Linux: + */ +#define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */ +#define DW_EH_PE_aligned 0x50 /* aligned pointer */ + +/* Flags intentionaly not handled, since they're not needed: + * #define DW_EH_PE_indirect 0x80 + * #define DW_EH_PE_uleb128 0x01 + * #define DW_EH_PE_udata2 0x02 + * #define DW_EH_PE_sleb128 0x09 + * #define DW_EH_PE_sdata2 0x0a + * #define DW_EH_PE_textrel 0x20 + * #define DW_EH_PE_datarel 0x30 + */ + +struct unwind_info { + struct perf_sample *sample; + struct machine *machine; + struct thread *thread; + u64 sample_uregs; +}; + +#define dw_read(ptr, type, end) ({ \ + type *__p = (type *) ptr; \ + type __v; \ + if ((__p + 1) > (type *) end) \ + return -EINVAL; \ + __v = *__p++; \ + ptr = (typeof(ptr)) __p; \ + __v; \ + }) + +static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val, + u8 encoding) +{ + u8 *cur = *p; + *val = 0; + + switch (encoding) { + case DW_EH_PE_omit: + *val = 0; + goto out; + case DW_EH_PE_ptr: + *val = dw_read(cur, unsigned long, end); + goto out; + default: + break; + } + + switch (encoding & DW_EH_PE_APPL_MASK) { + case DW_EH_PE_absptr: + break; + case DW_EH_PE_pcrel: + *val = (unsigned long) cur; + break; + default: + return -EINVAL; + } + + if ((encoding & 0x07) == 0x00) + encoding |= DW_EH_PE_udata4; + + switch (encoding & DW_EH_PE_FORMAT_MASK) { + case DW_EH_PE_sdata4: + *val += dw_read(cur, s32, end); + break; + case DW_EH_PE_udata4: + *val += dw_read(cur, u32, end); + break; + case DW_EH_PE_sdata8: + *val += dw_read(cur, s64, end); + break; + case DW_EH_PE_udata8: + *val += dw_read(cur, u64, end); + break; + default: + return -EINVAL; + } + + out: + *p = cur; + return 0; +} + +#define dw_read_encoded_value(ptr, end, enc) ({ \ + u64 __v; \ + if (__dw_read_encoded_value(&ptr, end, &__v, enc)) { \ + return -EINVAL; \ + } \ + __v; \ + }) + +static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, + GElf_Shdr *shp, const char *name) +{ + Elf_Scn *sec = NULL; + + while ((sec = elf_nextscn(elf, sec)) != NULL) { + char *str; + + gelf_getshdr(sec, shp); + str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name); + if (!strcmp(name, str)) + break; + } + + return sec; +} + +static u64 elf_section_offset(int fd, const char *name) +{ + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + u64 offset = 0; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) + return 0; + + do { + if (gelf_getehdr(elf, &ehdr) == NULL) + break; + + if (!elf_section_by_name(elf, &ehdr, &shdr, name)) + break; + + offset = shdr.sh_offset; + } while (0); + + elf_end(elf); + return offset; +} + +struct table_entry { + u32 start_ip_offset; + u32 fde_offset; +}; + +struct eh_frame_hdr { + unsigned char version; + unsigned char eh_frame_ptr_enc; + unsigned char fde_count_enc; + unsigned char table_enc; + + /* + * The rest of the header is variable-length and consists of the + * following members: + * + * encoded_t eh_frame_ptr; + * encoded_t fde_count; + */ + + /* A single encoded pointer should not be more than 8 bytes. */ + u64 enc[2]; + + /* + * struct { + * encoded_t start_ip; + * encoded_t fde_addr; + * } binary_search_table[fde_count]; + */ + char data[0]; +} __packed; + +static int unwind_spec_ehframe(struct dso *dso, struct machine *machine, + u64 offset, u64 *table_data, u64 *segbase, + u64 *fde_count) +{ + struct eh_frame_hdr hdr; + u8 *enc = (u8 *) &hdr.enc; + u8 *end = (u8 *) &hdr.data; + ssize_t r; + + r = dso__data_read_offset(dso, machine, offset, + (u8 *) &hdr, sizeof(hdr)); + if (r != sizeof(hdr)) + return -EINVAL; + + /* We dont need eh_frame_ptr, just skip it. */ + dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc); + + *fde_count = dw_read_encoded_value(enc, end, hdr.fde_count_enc); + *segbase = offset; + *table_data = (enc - (u8 *) &hdr) + offset; + return 0; +} + +static int read_unwind_spec(struct dso *dso, struct machine *machine, + u64 *table_data, u64 *segbase, u64 *fde_count) +{ + int ret = -EINVAL, fd; + u64 offset; + + fd = dso__data_fd(dso, machine); + if (fd < 0) + return -EINVAL; + + offset = elf_section_offset(fd, ".eh_frame_hdr"); + close(fd); + + if (offset) + ret = unwind_spec_ehframe(dso, machine, offset, + table_data, segbase, + fde_count); + + /* TODO .debug_frame check if eh_frame_hdr fails */ + return ret; +} + +static struct map *find_map(unw_word_t ip, struct unwind_info *ui) +{ + struct addr_location al; + + thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER, + MAP__FUNCTION, ip, &al); + return al.map; +} + +static int +find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, + int need_unwind_info, void *arg) +{ + struct unwind_info *ui = arg; + struct map *map; + unw_dyn_info_t di; + u64 table_data, segbase, fde_count; + + map = find_map(ip, ui); + if (!map || !map->dso) + return -EINVAL; + + pr_debug("unwind: find_proc_info dso %s\n", map->dso->name); + + if (read_unwind_spec(map->dso, ui->machine, + &table_data, &segbase, &fde_count)) + return -EINVAL; + + memset(&di, 0, sizeof(di)); + di.format = UNW_INFO_FORMAT_REMOTE_TABLE; + di.start_ip = map->start; + di.end_ip = map->end; + di.u.rti.segbase = map->start + segbase; + di.u.rti.table_data = map->start + table_data; + di.u.rti.table_len = fde_count * sizeof(struct table_entry) + / sizeof(unw_word_t); + return dwarf_search_unwind_table(as, ip, &di, pi, + need_unwind_info, arg); +} + +static int access_fpreg(unw_addr_space_t __maybe_unused as, + unw_regnum_t __maybe_unused num, + unw_fpreg_t __maybe_unused *val, + int __maybe_unused __write, + void __maybe_unused *arg) +{ + pr_err("unwind: access_fpreg unsupported\n"); + return -UNW_EINVAL; +} + +static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as, + unw_word_t __maybe_unused *dil_addr, + void __maybe_unused *arg) +{ + return -UNW_ENOINFO; +} + +static int resume(unw_addr_space_t __maybe_unused as, + unw_cursor_t __maybe_unused *cu, + void __maybe_unused *arg) +{ + pr_err("unwind: resume unsupported\n"); + return -UNW_EINVAL; +} + +static int +get_proc_name(unw_addr_space_t __maybe_unused as, + unw_word_t __maybe_unused addr, + char __maybe_unused *bufp, size_t __maybe_unused buf_len, + unw_word_t __maybe_unused *offp, void __maybe_unused *arg) +{ + pr_err("unwind: get_proc_name unsupported\n"); + return -UNW_EINVAL; +} + +static int access_dso_mem(struct unwind_info *ui, unw_word_t addr, + unw_word_t *data) +{ + struct addr_location al; + ssize_t size; + + thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER, + MAP__FUNCTION, addr, &al); + if (!al.map) { + pr_debug("unwind: no map for %lx\n", (unsigned long)addr); + return -1; + } + + if (!al.map->dso) + return -1; + + size = dso__data_read_addr(al.map->dso, al.map, ui->machine, + addr, (u8 *) data, sizeof(*data)); + + return !(size == sizeof(*data)); +} + +static int reg_value(unw_word_t *valp, struct regs_dump *regs, int id, + u64 sample_regs) +{ + int i, idx = 0; + + if (!(sample_regs & (1 << id))) + return -EINVAL; + + for (i = 0; i < id; i++) { + if (sample_regs & (1 << i)) + idx++; + } + + *valp = regs->regs[idx]; + return 0; +} + +static int access_mem(unw_addr_space_t __maybe_unused as, + unw_word_t addr, unw_word_t *valp, + int __write, void *arg) +{ + struct unwind_info *ui = arg; + struct stack_dump *stack = &ui->sample->user_stack; + unw_word_t start, end; + int offset; + int ret; + + /* Don't support write, probably not needed. */ + if (__write || !stack || !ui->sample->user_regs.regs) { + *valp = 0; + return 0; + } + + ret = reg_value(&start, &ui->sample->user_regs, PERF_REG_SP, + ui->sample_uregs); + if (ret) + return ret; + + end = start + stack->size; + + /* Check overflow. */ + if (addr + sizeof(unw_word_t) < addr) + return -EINVAL; + + if (addr < start || addr + sizeof(unw_word_t) >= end) { + ret = access_dso_mem(ui, addr, valp); + if (ret) { + pr_debug("unwind: access_mem %p not inside range %p-%p\n", + (void *)addr, (void *)start, (void *)end); + *valp = 0; + return ret; + } + return 0; + } + + offset = addr - start; + *valp = *(unw_word_t *)&stack->data[offset]; + pr_debug("unwind: access_mem addr %p, val %lx, offset %d\n", + (void *)addr, (unsigned long)*valp, offset); + return 0; +} + +static int access_reg(unw_addr_space_t __maybe_unused as, + unw_regnum_t regnum, unw_word_t *valp, + int __write, void *arg) +{ + struct unwind_info *ui = arg; + int id, ret; + + /* Don't support write, I suspect we don't need it. */ + if (__write) { + pr_err("unwind: access_reg w %d\n", regnum); + return 0; + } + + if (!ui->sample->user_regs.regs) { + *valp = 0; + return 0; + } + + id = unwind__arch_reg_id(regnum); + if (id < 0) + return -EINVAL; + + ret = reg_value(valp, &ui->sample->user_regs, id, ui->sample_uregs); + if (ret) { + pr_err("unwind: can't read reg %d\n", regnum); + return ret; + } + + pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp); + return 0; +} + +static void put_unwind_info(unw_addr_space_t __maybe_unused as, + unw_proc_info_t *pi __maybe_unused, + void *arg __maybe_unused) +{ + pr_debug("unwind: put_unwind_info called\n"); +} + +static int entry(u64 ip, struct thread *thread, struct machine *machine, + unwind_entry_cb_t cb, void *arg) +{ + struct unwind_entry e; + struct addr_location al; + + thread__find_addr_location(thread, machine, + PERF_RECORD_MISC_USER, + MAP__FUNCTION, ip, &al, NULL); + + e.ip = ip; + e.map = al.map; + e.sym = al.sym; + + pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n", + al.sym ? al.sym->name : "''", + ip, + al.map ? al.map->map_ip(al.map, ip) : (u64) 0); + + return cb(&e, arg); +} + +static void display_error(int err) +{ + switch (err) { + case UNW_EINVAL: + pr_err("unwind: Only supports local.\n"); + break; + case UNW_EUNSPEC: + pr_err("unwind: Unspecified error.\n"); + break; + case UNW_EBADREG: + pr_err("unwind: Register unavailable.\n"); + break; + default: + break; + } +} + +static unw_accessors_t accessors = { + .find_proc_info = find_proc_info, + .put_unwind_info = put_unwind_info, + .get_dyn_info_list_addr = get_dyn_info_list_addr, + .access_mem = access_mem, + .access_reg = access_reg, + .access_fpreg = access_fpreg, + .resume = resume, + .get_proc_name = get_proc_name, +}; + +static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, + void *arg) +{ + unw_addr_space_t addr_space; + unw_cursor_t c; + int ret; + + addr_space = unw_create_addr_space(&accessors, 0); + if (!addr_space) { + pr_err("unwind: Can't create unwind address space.\n"); + return -ENOMEM; + } + + ret = unw_init_remote(&c, addr_space, ui); + if (ret) + display_error(ret); + + while (!ret && (unw_step(&c) > 0)) { + unw_word_t ip; + + unw_get_reg(&c, UNW_REG_IP, &ip); + ret = entry(ip, ui->thread, ui->machine, cb, arg); + } + + unw_destroy_addr_space(addr_space); + return ret; +} + +int unwind__get_entries(unwind_entry_cb_t cb, void *arg, + struct machine *machine, struct thread *thread, + u64 sample_uregs, struct perf_sample *data) +{ + unw_word_t ip; + struct unwind_info ui = { + .sample = data, + .sample_uregs = sample_uregs, + .thread = thread, + .machine = machine, + }; + int ret; + + if (!data->user_regs.regs) + return -EINVAL; + + ret = reg_value(&ip, &data->user_regs, PERF_REG_IP, sample_uregs); + if (ret) + return ret; + + ret = entry(ip, thread, machine, cb, arg); + if (ret) + return -ENOMEM; + + return get_entries(&ui, cb, arg); +} diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h new file mode 100644 index 00000000000..a78c8b303bb --- /dev/null +++ b/tools/perf/util/unwind.h @@ -0,0 +1,35 @@ +#ifndef __UNWIND_H +#define __UNWIND_H + +#include "types.h" +#include "event.h" +#include "symbol.h" + +struct unwind_entry { + struct map *map; + struct symbol *sym; + u64 ip; +}; + +typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg); + +#ifndef NO_LIBUNWIND_SUPPORT +int unwind__get_entries(unwind_entry_cb_t cb, void *arg, + struct machine *machine, + struct thread *thread, + u64 sample_uregs, + struct perf_sample *data); +int unwind__arch_reg_id(int regnum); +#else +static inline int +unwind__get_entries(unwind_entry_cb_t cb __maybe_unused, + void *arg __maybe_unused, + struct machine *machine __maybe_unused, + struct thread *thread __maybe_unused, + u64 sample_uregs __maybe_unused, + struct perf_sample *data __maybe_unused) +{ + return 0; +} +#endif /* NO_LIBUNWIND_SUPPORT */ +#endif /* __UNWIND_H */ diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index d03599fbe78..2055cf38041 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -1,6 +1,11 @@ #include "../perf.h" #include "util.h" #include <sys/mman.h> +#ifndef NO_BACKTRACE +#include <execinfo.h> +#endif +#include <stdio.h> +#include <stdlib.h> /* * XXX We need to find a better place for these things... @@ -158,3 +163,23 @@ size_t hex_width(u64 v) return n; } + +/* Obtain a backtrace and print it to stdout. */ +#ifndef NO_BACKTRACE +void dump_stack(void) +{ + void *array[16]; + size_t size = backtrace(array, ARRAY_SIZE(array)); + char **strings = backtrace_symbols(array, size); + size_t i; + + printf("Obtained %zd stack frames.\n", size); + + for (i = 0; i < size; i++) + printf("%s\n", strings[i]); + + free(strings); +} +#else +void dump_stack(void) {} +#endif diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index b13c7331eaf..70fa70b535b 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -69,13 +69,8 @@ #include <sys/poll.h> #include <sys/socket.h> #include <sys/ioctl.h> -#include <sys/select.h> -#include <netinet/in.h> -#include <netinet/tcp.h> -#include <arpa/inet.h> -#include <netdb.h> #include <inttypes.h> -#include "../../../include/linux/magic.h" +#include <linux/magic.h> #include "types.h" #include <sys/ttydefaults.h> @@ -266,4 +261,6 @@ size_t hex_width(u64 v); char *rtrim(char *s); +void dump_stack(void); + #endif diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c new file mode 100644 index 00000000000..e60951fcdb1 --- /dev/null +++ b/tools/perf/util/vdso.c @@ -0,0 +1,111 @@ + +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdlib.h> +#include <linux/kernel.h> + +#include "vdso.h" +#include "util.h" +#include "symbol.h" +#include "linux/string.h" + +static bool vdso_found; +static char vdso_file[] = "/tmp/perf-vdso.so-XXXXXX"; + +static int find_vdso_map(void **start, void **end) +{ + FILE *maps; + char line[128]; + int found = 0; + + maps = fopen("/proc/self/maps", "r"); + if (!maps) { + pr_err("vdso: cannot open maps\n"); + return -1; + } + + while (!found && fgets(line, sizeof(line), maps)) { + int m = -1; + + /* We care only about private r-x mappings. */ + if (2 != sscanf(line, "%p-%p r-xp %*x %*x:%*x %*u %n", + start, end, &m)) + continue; + if (m < 0) + continue; + + if (!strncmp(&line[m], VDSO__MAP_NAME, + sizeof(VDSO__MAP_NAME) - 1)) + found = 1; + } + + fclose(maps); + return !found; +} + +static char *get_file(void) +{ + char *vdso = NULL; + char *buf = NULL; + void *start, *end; + size_t size; + int fd; + + if (vdso_found) + return vdso_file; + + if (find_vdso_map(&start, &end)) + return NULL; + + size = end - start; + + buf = memdup(start, size); + if (!buf) + return NULL; + + fd = mkstemp(vdso_file); + if (fd < 0) + goto out; + + if (size == (size_t) write(fd, buf, size)) + vdso = vdso_file; + + close(fd); + + out: + free(buf); + + vdso_found = (vdso != NULL); + return vdso; +} + +void vdso__exit(void) +{ + if (vdso_found) + unlink(vdso_file); +} + +struct dso *vdso__dso_findnew(struct list_head *head) +{ + struct dso *dso = dsos__find(head, VDSO__MAP_NAME); + + if (!dso) { + char *file; + + file = get_file(); + if (!file) + return NULL; + + dso = dso__new(VDSO__MAP_NAME); + if (dso != NULL) { + dsos__add(head, dso); + dso__set_long_name(dso, file); + } + } + + return dso; +} diff --git a/tools/perf/util/vdso.h b/tools/perf/util/vdso.h new file mode 100644 index 00000000000..0f76e7caf6f --- /dev/null +++ b/tools/perf/util/vdso.h @@ -0,0 +1,18 @@ +#ifndef __PERF_VDSO__ +#define __PERF_VDSO__ + +#include <linux/types.h> +#include <string.h> +#include <stdbool.h> + +#define VDSO__MAP_NAME "[vdso]" + +static inline bool is_vdso_map(const char *filename) +{ + return !strcmp(filename, VDSO__MAP_NAME); +} + +struct dso *vdso__dso_findnew(struct list_head *head); +void vdso__exit(void); + +#endif /* __PERF_VDSO__ */ diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c index 73e900edb5a..19f15b65070 100644 --- a/tools/perf/util/wrapper.c +++ b/tools/perf/util/wrapper.c @@ -7,7 +7,8 @@ * There's no pack memory to release - but stay close to the Git * version so wrap this away: */ -static inline void release_pack_memory(size_t size __used, int flag __used) +static inline void release_pack_memory(size_t size __maybe_unused, + int flag __maybe_unused) { } diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index bde8521d56b..96ce80a3743 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -1,6 +1,8 @@ ifeq ("$(origin O)", "command line") - OUTPUT := $(O)/ - COMMAND_O := O=$(O) + dummy := $(if $(shell test -d $(O) || echo $(O)),$(error O=$(O) does not exist),) + ABSOLUTE_O := $(shell cd $(O) ; pwd) + OUTPUT := $(ABSOLUTE_O)/ + COMMAND_O := O=$(ABSOLUTE_O) endif ifneq ($(OUTPUT),) diff --git a/tools/testing/ktest/examples/include/defaults.conf b/tools/testing/ktest/examples/include/defaults.conf index 323a552ce64..63a1a83f4f0 100644 --- a/tools/testing/ktest/examples/include/defaults.conf +++ b/tools/testing/ktest/examples/include/defaults.conf @@ -33,7 +33,7 @@ DEFAULTS THIS_DIR := ${PWD} -# to orginize your configs, having each machine save their configs +# to organize your configs, having each machine save their configs # into a separate directly is useful. CONFIG_DIR := ${THIS_DIR}/configs/${MACHINE} diff --git a/tools/testing/ktest/examples/include/tests.conf b/tools/testing/ktest/examples/include/tests.conf index 4fdb811bd81..60cedb1a115 100644 --- a/tools/testing/ktest/examples/include/tests.conf +++ b/tools/testing/ktest/examples/include/tests.conf @@ -47,7 +47,7 @@ BUILD_NOCLEAN = 1 # Build, install, boot and test with a randconfg 10 times. # It is important that you have set MIN_CONFIG in the config # that includes this file otherwise it is likely that the -# randconfig will not have the neccessary configs needed to +# randconfig will not have the necessary configs needed to # boot your box. This version of the test requires a min # config that has enough to make sure the target has network # working. diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 52b7959cd51..c05bcd293d8 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -840,7 +840,9 @@ sub __read_config { if ($rest =~ /\sIF\s+(.*)/) { # May be a ELSE IF section. - if (!process_if($name, $1)) { + if (process_if($name, $1)) { + $if_set = 1; + } else { $skip = 1; } $rest = ""; diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index 8b40bd5e5cc..4c53cae6c27 100644 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -36,7 +36,7 @@ mkdir $mnt mount -t hugetlbfs none $mnt echo "--------------------" -echo "runing hugepage-mmap" +echo "running hugepage-mmap" echo "--------------------" ./hugepage-mmap if [ $? -ne 0 ]; then @@ -50,7 +50,7 @@ shmall=`cat /proc/sys/kernel/shmall` echo 268435456 > /proc/sys/kernel/shmmax echo 4194304 > /proc/sys/kernel/shmall echo "--------------------" -echo "runing hugepage-shm" +echo "running hugepage-shm" echo "--------------------" ./hugepage-shm if [ $? -ne 0 ]; then @@ -62,7 +62,7 @@ echo $shmmax > /proc/sys/kernel/shmmax echo $shmall > /proc/sys/kernel/shmall echo "--------------------" -echo "runing map_hugetlb" +echo "running map_hugetlb" echo "--------------------" ./map_hugetlb if [ $? -ne 0 ]; then |