diff options
433 files changed, 27310 insertions, 8318 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-rbd b/Documentation/ABI/testing/sysfs-bus-rbd index cd9213ccf3d..0a306476424 100644 --- a/Documentation/ABI/testing/sysfs-bus-rbd +++ b/Documentation/ABI/testing/sysfs-bus-rbd @@ -66,27 +66,7 @@ current_snap The current snapshot for which the device is mapped. -snap_* - - A directory per each snapshot - parent Information identifying the pool, image, and snapshot id for the parent image in a layered rbd image (format 2 only). - -Entries under /sys/bus/rbd/devices/<dev-id>/snap_<snap-name> -------------------------------------------------------------- - -snap_id - - The rados internal snapshot id assigned for this snapshot - -snap_size - - The size of the image when this snapshot was taken. - -snap_features - - A hexadecimal encoding of the feature bits for this snapshot. - diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index e38b8df3d72..8e9359de1d2 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -191,7 +191,7 @@ o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that o A hardware or software issue shuts off the scheduler-clock interrupt on a CPU that is not in dyntick-idle mode. This problem really has happened, and seems to be most likely to - result in RCU CPU stall warnings for CONFIG_NO_HZ=n kernels. + result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels. o A bug in the RCU implementation. diff --git a/Documentation/acpi/enumeration.txt b/Documentation/acpi/enumeration.txt index 94a65613188..b0d541042ac 100644 --- a/Documentation/acpi/enumeration.txt +++ b/Documentation/acpi/enumeration.txt @@ -199,6 +199,8 @@ the device to the driver. For example: { Name (SBUF, ResourceTemplate() { + ... + // Used to power on/off the device GpioIo (Exclusive, PullDefault, 0x0000, 0x0000, IoRestrictionOutputOnly, "\\_SB.PCI0.GPI0", 0x00, ResourceConsumer,,) @@ -206,10 +208,20 @@ the device to the driver. For example: // Pin List 0x0055 } + + // Interrupt for the device + GpioInt (Edge, ActiveHigh, ExclusiveAndWake, PullNone, + 0x0000, "\\_SB.PCI0.GPI0", 0x00, ResourceConsumer,,) + { + // Pin list + 0x0058 + } + ... - Return (SBUF) } + + Return (SBUF) } These GPIO numbers are controller relative and path "\\_SB.PCI0.GPI0" @@ -220,6 +232,24 @@ The driver can do this by including <linux/acpi_gpio.h> and then calling acpi_get_gpio(path, gpio). This will return the Linux GPIO number or negative errno if there was no translation found. +In a simple case of just getting the Linux GPIO number from device +resources one can use acpi_get_gpio_by_index() helper function. It takes +pointer to the device and index of the GpioIo/GpioInt descriptor in the +device resources list. For example: + + int gpio_irq, gpio_power; + int ret; + + gpio_irq = acpi_get_gpio_by_index(dev, 1, NULL); + if (gpio_irq < 0) + /* handle error */ + + gpio_power = acpi_get_gpio_by_index(dev, 0, NULL); + if (gpio_power < 0) + /* handle error */ + + /* Now we can use the GPIO numbers */ + Other GpioIo parameters must be converted first by the driver to be suitable to the gpiolib before passing them. diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt index 66f9cc31068..219970ba54b 100644 --- a/Documentation/cpu-freq/governors.txt +++ b/Documentation/cpu-freq/governors.txt @@ -131,8 +131,8 @@ sampling_rate_min: The sampling rate is limited by the HW transition latency: transition_latency * 100 Or by kernel restrictions: -If CONFIG_NO_HZ is set, the limit is 10ms fixed. -If CONFIG_NO_HZ is not set or nohz=off boot parameter is used, the +If CONFIG_NO_HZ_COMMON is set, the limit is 10ms fixed. +If CONFIG_NO_HZ_COMMON is not set or nohz=off boot parameter is used, the limits depend on the CONFIG_HZ option: HZ=1000: min=20000us (20ms) HZ=250: min=80000us (80ms) diff --git a/Documentation/devicetree/bindings/gpio/gpio-grgpio.txt b/Documentation/devicetree/bindings/gpio/gpio-grgpio.txt new file mode 100644 index 00000000000..e466598105f --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/gpio-grgpio.txt @@ -0,0 +1,26 @@ +Aeroflex Gaisler GRGPIO General Purpose I/O cores. + +The GRGPIO GPIO core is available in the GRLIB VHDL IP core library. + +Note: In the ordinary environment for the GRGPIO core, a Leon SPARC system, +these properties are built from information in the AMBA plug&play. + +Required properties: + +- name : Should be "GAISLER_GPIO" or "01_01a" + +- reg : Address and length of the register set for the device + +- interrupts : Interrupt numbers for this device + +Optional properties: + +- nbits : The number of gpio lines. If not present driver assumes 32 lines. + +- irqmap : An array with an index for each gpio line. An index is either a valid + index into the interrupts property array, or 0xffffffff that indicates + no irq for that line. Driver provides no interrupt support if not + present. + +For further information look in the documentation for the GLIB IP core library: +http://www.gaisler.com/products/grlib/grip.pdf diff --git a/Documentation/devicetree/bindings/gpio/gpio-mcp23s08.txt b/Documentation/devicetree/bindings/gpio/gpio-mcp23s08.txt new file mode 100644 index 00000000000..629d0ef1730 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/gpio-mcp23s08.txt @@ -0,0 +1,47 @@ +Microchip MCP2308/MCP23S08/MCP23017/MCP23S17 driver for +8-/16-bit I/O expander with serial interface (I2C/SPI) + +Required properties: +- compatible : Should be + - "mcp,mcp23s08" for 8 GPIO SPI version + - "mcp,mcp23s17" for 16 GPIO SPI version + - "mcp,mcp23008" for 8 GPIO I2C version or + - "mcp,mcp23017" for 16 GPIO I2C version of the chip +- #gpio-cells : Should be two. + - first cell is the pin number + - second cell is used to specify flags. Flags are currently unused. +- gpio-controller : Marks the device node as a GPIO controller. +- reg : For an address on its bus. I2C uses this a the I2C address of the chip. + SPI uses this to specify the chipselect line which the chip is + connected to. The driver and the SPI variant of the chip support + multiple chips on the same chipselect. Have a look at + mcp,spi-present-mask below. + +Required device specific properties (only for SPI chips): +- mcp,spi-present-mask : This is a present flag, that makes only sense for SPI + chips - as the name suggests. Multiple SPI chips can share the same + SPI chipselect. Set a bit in bit0-7 in this mask to 1 if there is a + chip connected with the corresponding spi address set. For example if + you have a chip with address 3 connected, you have to set bit3 to 1, + which is 0x08. mcp23s08 chip variant only supports bits 0-3. It is not + possible to mix mcp23s08 and mcp23s17 on the same chipselect. Set at + least one bit to 1 for SPI chips. +- spi-max-frequency = The maximum frequency this chip is able to handle + +Example I2C: +gpiom1: gpio@20 { + compatible = "mcp,mcp23017"; + gpio-controller; + #gpio-cells = <2>; + reg = <0x20>; +}; + +Example SPI: +gpiom1: gpio@0 { + compatible = "mcp,mcp23s17"; + gpio-controller; + #gpio-cells = <2>; + spi-present-mask = <0x01>; + reg = <0>; + spi-max-frequency = <1000000>; +}; diff --git a/Documentation/devicetree/bindings/gpio/gpio-omap.txt b/Documentation/devicetree/bindings/gpio/gpio-omap.txt index bff51a2fee1..1b524c0c79f 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-omap.txt +++ b/Documentation/devicetree/bindings/gpio/gpio-omap.txt @@ -20,8 +20,11 @@ Required properties: 8 = active low level-sensitive. OMAP specific properties: -- ti,hwmods: Name of the hwmod associated to the GPIO: - "gpio<X>", <X> being the 1-based instance number from the HW spec +- ti,hwmods: Name of the hwmod associated to the GPIO: + "gpio<X>", <X> being the 1-based instance number + from the HW spec. +- ti,gpio-always-on: Indicates if a GPIO bank is always powered and + so will never lose its logic state. Example: diff --git a/Documentation/devicetree/bindings/input/cros-ec-keyb.txt b/Documentation/devicetree/bindings/input/cros-ec-keyb.txt new file mode 100644 index 00000000000..0f6355ce39b --- /dev/null +++ b/Documentation/devicetree/bindings/input/cros-ec-keyb.txt @@ -0,0 +1,72 @@ +ChromeOS EC Keyboard + +Google's ChromeOS EC Keyboard is a simple matrix keyboard implemented on +a separate EC (Embedded Controller) device. It provides a message for reading +key scans from the EC. These are then converted into keycodes for processing +by the kernel. + +This binding is based on matrix-keymap.txt and extends/modifies it as follows: + +Required properties: +- compatible: "google,cros-ec-keyb" + +Optional properties: +- google,needs-ghost-filter: True to enable a ghost filter for the matrix +keyboard. This is recommended if the EC does not have its own logic or +hardware for this. + + +Example: + +cros-ec-keyb { + compatible = "google,cros-ec-keyb"; + keypad,num-rows = <8>; + keypad,num-columns = <13>; + google,needs-ghost-filter; + /* + * Keymap entries take the form of 0xRRCCKKKK where + * RR=Row CC=Column KKKK=Key Code + * The values below are for a US keyboard layout and + * are taken from the Linux driver. Note that the + * 102ND key is not used for US keyboards. + */ + linux,keymap = < + /* CAPSLCK F1 B F10 */ + 0x0001003a 0x0002003b 0x00030030 0x00040044 + /* N = R_ALT ESC */ + 0x00060031 0x0008000d 0x000a0064 0x01010001 + /* F4 G F7 H */ + 0x0102003e 0x01030022 0x01040041 0x01060023 + /* ' F9 BKSPACE L_CTRL */ + 0x01080028 0x01090043 0x010b000e 0x0200001d + /* TAB F3 T F6 */ + 0x0201000f 0x0202003d 0x02030014 0x02040040 + /* ] Y 102ND [ */ + 0x0205001b 0x02060015 0x02070056 0x0208001a + /* F8 GRAVE F2 5 */ + 0x02090042 0x03010029 0x0302003c 0x03030006 + /* F5 6 - \ */ + 0x0304003f 0x03060007 0x0308000c 0x030b002b + /* R_CTRL A D F */ + 0x04000061 0x0401001e 0x04020020 0x04030021 + /* S K J ; */ + 0x0404001f 0x04050025 0x04060024 0x04080027 + /* L ENTER Z C */ + 0x04090026 0x040b001c 0x0501002c 0x0502002e + /* V X , M */ + 0x0503002f 0x0504002d 0x05050033 0x05060032 + /* L_SHIFT / . SPACE */ + 0x0507002a 0x05080035 0x05090034 0x050B0039 + /* 1 3 4 2 */ + 0x06010002 0x06020004 0x06030005 0x06040003 + /* 8 7 0 9 */ + 0x06050009 0x06060008 0x0608000b 0x0609000a + /* L_ALT DOWN RIGHT Q */ + 0x060a0038 0x060b006c 0x060c006a 0x07010010 + /* E R W I */ + 0x07020012 0x07030013 0x07040011 0x07050017 + /* U R_SHIFT P O */ + 0x07060016 0x07070036 0x07080019 0x07090018 + /* UP LEFT */ + 0x070b0067 0x070c0069>; +}; diff --git a/Documentation/devicetree/bindings/mfd/as3711.txt b/Documentation/devicetree/bindings/mfd/as3711.txt new file mode 100644 index 00000000000..d98cf18c721 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/as3711.txt @@ -0,0 +1,73 @@ +AS3711 is an I2C PMIC from Austria MicroSystems with multiple DCDC and LDO power +supplies, a battery charger and an RTC. So far only bindings for the two stepup +DCDC converters are defined. Other DCDC and LDO supplies are configured, using +standard regulator properties, they must belong to a sub-node, called +"regulators" and be called "sd1" to "sd4" and "ldo1" to "ldo8." Stepup converter +configuration should be placed in a subnode, called "backlight." + +Compulsory properties: +- compatible : must be "ams,as3711" +- reg : specifies the I2C address + +To use the SU1 converter as a backlight source the following two properties must +be provided: +- su1-dev : framebuffer phandle +- su1-max-uA : maximum current + +To use the SU2 converter as a backlight source the following two properties must +be provided: +- su2-dev : framebuffer phandle +- su1-max-uA : maximum current + +Additionally one of these properties must be provided to select the type of +feedback used: +- su2-feedback-voltage : voltage feedback is used +- su2-feedback-curr1 : CURR1 input used for current feedback +- su2-feedback-curr2 : CURR2 input used for current feedback +- su2-feedback-curr3 : CURR3 input used for current feedback +- su2-feedback-curr-auto: automatic current feedback selection + +and one of these to select the over-voltage protection pin +- su2-fbprot-lx-sd4 : LX_SD4 is used for over-voltage protection +- su2-fbprot-gpio2 : GPIO2 is used for over-voltage protection +- su2-fbprot-gpio3 : GPIO3 is used for over-voltage protection +- su2-fbprot-gpio4 : GPIO4 is used for over-voltage protection + +If "su2-feedback-curr-auto" is selected, one or more of the following properties +have to be specified: +- su2-auto-curr1 : use CURR1 input for current feedback +- su2-auto-curr2 : use CURR2 input for current feedback +- su2-auto-curr3 : use CURR3 input for current feedback + +Example: + +as3711@40 { + compatible = "ams,as3711"; + reg = <0x40>; + + regulators { + sd4 { + regulator-name = "1.215V"; + regulator-min-microvolt = <1215000>; + regulator-max-microvolt = <1235000>; + }; + ldo2 { + regulator-name = "2.8V CPU"; + regulator-min-microvolt = <2800000>; + regulator-max-microvolt = <2800000>; + regulator-always-on; + regulator-boot-on; + }; + }; + + backlight { + compatible = "ams,as3711-bl"; + su2-dev = <&lcdc>; + su2-max-uA = <36000>; + su2-feedback-curr-auto; + su2-fbprot-gpio4; + su2-auto-curr1; + su2-auto-curr2; + su2-auto-curr3; + }; +}; diff --git a/Documentation/devicetree/bindings/mfd/cros-ec.txt b/Documentation/devicetree/bindings/mfd/cros-ec.txt new file mode 100644 index 00000000000..e0e59c58a1f --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/cros-ec.txt @@ -0,0 +1,56 @@ +ChromeOS Embedded Controller + +Google's ChromeOS EC is a Cortex-M device which talks to the AP and +implements various function such as keyboard and battery charging. + +The EC can be connect through various means (I2C, SPI, LPC) and the +compatible string used depends on the inteface. Each connection method has +its own driver which connects to the top level interface-agnostic EC driver. +Other Linux driver (such as cros-ec-keyb for the matrix keyboard) connect to +the top-level driver. + +Required properties (I2C): +- compatible: "google,cros-ec-i2c" +- reg: I2C slave address + +Required properties (SPI): +- compatible: "google,cros-ec-spi" +- reg: SPI chip select + +Required properties (LPC): +- compatible: "google,cros-ec-lpc" +- reg: List of (IO address, size) pairs defining the interface uses + + +Example for I2C: + +i2c@12CA0000 { + cros-ec@1e { + reg = <0x1e>; + compatible = "google,cros-ec-i2c"; + interrupts = <14 0>; + interrupt-parent = <&wakeup_eint>; + wakeup-source; + }; + + +Example for SPI: + +spi@131b0000 { + ec@0 { + compatible = "google,cros-ec-spi"; + reg = <0x0>; + interrupts = <14 0>; + interrupt-parent = <&wakeup_eint>; + wakeup-source; + spi-max-frequency = <5000000>; + controller-data { + cs-gpio = <&gpf0 3 4 3 0>; + samsung,spi-cs; + samsung,spi-feedback-delay = <2>; + }; + }; +}; + + +Example for LPC is not supplied as it is not yet implemented. diff --git a/Documentation/devicetree/bindings/mfd/omap-usb-host.txt b/Documentation/devicetree/bindings/mfd/omap-usb-host.txt new file mode 100644 index 00000000000..b381fa696bf --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/omap-usb-host.txt @@ -0,0 +1,80 @@ +OMAP HS USB Host + +Required properties: + +- compatible: should be "ti,usbhs-host" +- reg: should contain one register range i.e. start and length +- ti,hwmods: must contain "usb_host_hs" + +Optional properties: + +- num-ports: number of USB ports. Usually this is automatically detected + from the IP's revision register but can be overridden by specifying + this property. A maximum of 3 ports are supported at the moment. + +- portN-mode: String specifying the port mode for port N, where N can be + from 1 to 3. If the port mode is not specified, that port is treated + as unused. When specified, it must be one of the following. + "ehci-phy", + "ehci-tll", + "ehci-hsic", + "ohci-phy-6pin-datse0", + "ohci-phy-6pin-dpdm", + "ohci-phy-3pin-datse0", + "ohci-phy-4pin-dpdm", + "ohci-tll-6pin-datse0", + "ohci-tll-6pin-dpdm", + "ohci-tll-3pin-datse0", + "ohci-tll-4pin-dpdm", + "ohci-tll-2pin-datse0", + "ohci-tll-2pin-dpdm", + +- single-ulpi-bypass: Must be present if the controller contains a single + ULPI bypass control bit. e.g. OMAP3 silicon <= ES2.1 + +Required properties if child node exists: + +- #address-cells: Must be 1 +- #size-cells: Must be 1 +- ranges: must be present + +Properties for children: + +The OMAP HS USB Host subsystem contains EHCI and OHCI controllers. +See Documentation/devicetree/bindings/usb/omap-ehci.txt and +omap3-ohci.txt + +Example for OMAP4: + +usbhshost: usbhshost@4a064000 { + compatible = "ti,usbhs-host"; + reg = <0x4a064000 0x800>; + ti,hwmods = "usb_host_hs"; + #address-cells = <1>; + #size-cells = <1>; + ranges; + + usbhsohci: ohci@4a064800 { + compatible = "ti,ohci-omap3", "usb-ohci"; + reg = <0x4a064800 0x400>; + interrupt-parent = <&gic>; + interrupts = <0 76 0x4>; + }; + + usbhsehci: ehci@4a064c00 { + compatible = "ti,ehci-omap", "usb-ehci"; + reg = <0x4a064c00 0x400>; + interrupt-parent = <&gic>; + interrupts = <0 77 0x4>; + }; +}; + +&usbhshost { + port1-mode = "ehci-phy"; + port2-mode = "ehci-tll"; + port3-mode = "ehci-phy"; +}; + +&usbhsehci { + phys = <&hsusb1_phy 0 &hsusb3_phy>; +}; diff --git a/Documentation/devicetree/bindings/mfd/omap-usb-tll.txt b/Documentation/devicetree/bindings/mfd/omap-usb-tll.txt new file mode 100644 index 00000000000..62fe69724e3 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/omap-usb-tll.txt @@ -0,0 +1,17 @@ +OMAP HS USB Host TLL (Transceiver-Less Interface) + +Required properties: + +- compatible : should be "ti,usbhs-tll" +- reg : should contain one register range i.e. start and length +- interrupts : should contain the TLL module's interrupt +- ti,hwmod : must contain "usb_tll_hs" + +Example: + + usbhstll: usbhstll@4a062000 { + compatible = "ti,usbhs-tll"; + reg = <0x4a062000 0x1000>; + interrupts = <78>; + ti,hwmods = "usb_tll_hs"; + }; diff --git a/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt b/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt index 131e8c11d26..681afad7377 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt +++ b/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt @@ -1,7 +1,9 @@ TI SOC ECAP based APWM controller Required properties: -- compatible: Must be "ti,am33xx-ecap" +- compatible: Must be "ti,<soc>-ecap". + for am33xx - compatible = "ti,am33xx-ecap"; + for da850 - compatible = "ti,da850-ecap", "ti,am33xx-ecap"; - #pwm-cells: Should be 3. Number of cells being used to specify PWM property. First cell specifies the per-chip index of the PWM to use, the second cell is the period in nanoseconds and bit 0 in the third cell is used to @@ -15,9 +17,15 @@ Optional properties: Example: -ecap0: ecap@0 { +ecap0: ecap@0 { /* ECAP on am33xx */ compatible = "ti,am33xx-ecap"; #pwm-cells = <3>; reg = <0x48300100 0x80>; ti,hwmods = "ecap0"; }; + +ecap0: ecap@0 { /* ECAP on da850 */ + compatible = "ti,da850-ecap", "ti,am33xx-ecap"; + #pwm-cells = <3>; + reg = <0x306000 0x80>; +}; diff --git a/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt b/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt index 4fc7079d822..337c6fc65d3 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt +++ b/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt @@ -1,7 +1,9 @@ TI SOC EHRPWM based PWM controller Required properties: -- compatible : Must be "ti,am33xx-ehrpwm" +- compatible: Must be "ti,<soc>-ehrpwm". + for am33xx - compatible = "ti,am33xx-ehrpwm"; + for da850 - compatible = "ti,da850-ehrpwm", "ti,am33xx-ehrpwm"; - #pwm-cells: Should be 3. Number of cells being used to specify PWM property. First cell specifies the per-chip index of the PWM to use, the second cell is the period in nanoseconds and bit 0 in the third cell is used to @@ -15,9 +17,15 @@ Optional properties: Example: -ehrpwm0: ehrpwm@0 { +ehrpwm0: ehrpwm@0 { /* EHRPWM on am33xx */ compatible = "ti,am33xx-ehrpwm"; #pwm-cells = <3>; reg = <0x48300200 0x100>; ti,hwmods = "ehrpwm0"; }; + +ehrpwm0: ehrpwm@0 { /* EHRPWM on da850 */ + compatible = "ti,da850-ehrpwm", "ti,am33xx-ehrpwm"; + #pwm-cells = <3>; + reg = <0x300000 0x2000>; +}; diff --git a/Documentation/devicetree/bindings/sound/wm8994.txt b/Documentation/devicetree/bindings/sound/wm8994.txt index 7a7eb1e7bda..f2f3e80934d 100644 --- a/Documentation/devicetree/bindings/sound/wm8994.txt +++ b/Documentation/devicetree/bindings/sound/wm8994.txt @@ -5,14 +5,70 @@ on the board). Required properties: - - compatible : "wlf,wm1811", "wlf,wm8994", "wlf,wm8958" + - compatible : One of "wlf,wm1811", "wlf,wm8994" or "wlf,wm8958". - reg : the I2C address of the device for I2C, the chip select number for SPI. + - gpio-controller : Indicates this device is a GPIO controller. + - #gpio-cells : Must be 2. The first cell is the pin number and the + second cell is used to specify optional parameters (currently unused). + + - AVDD2-supply, DBVDD1-supply, DBVDD2-supply, DBVDD3-supply, CPVDD-supply, + SPKVDD1-supply, SPKVDD2-supply : power supplies for the device, as covered + in Documentation/devicetree/bindings/regulator/regulator.txt + +Optional properties: + + - interrupts : The interrupt line the IRQ signal for the device is + connected to. This is optional, if it is not connected then none + of the interrupt related properties should be specified. + - interrupt-controller : These devices contain interrupt controllers + and may provide interrupt services to other devices if they have an + interrupt line connected. + - interrupt-parent : The parent interrupt controller. + - #interrupt-cells: the number of cells to describe an IRQ, this should be 2. + The first cell is the IRQ number. + The second cell is the flags, encoded as the trigger masks from + Documentation/devicetree/bindings/interrupts.txt + + - wlf,gpio-cfg : A list of GPIO configuration register values. If absent, + no configuration of these registers is performed. If any value is + over 0xffff then the register will be left as default. If present 11 + values must be supplied. + + - wlf,micbias-cfg : Two MICBIAS register values for WM1811 or + WM8958. If absent the register defaults will be used. + + - wlf,ldo1ena : GPIO specifier for control of LDO1ENA input to device. + - wlf,ldo2ena : GPIO specifier for control of LDO2ENA input to device. + + - wlf,lineout1-se : If present LINEOUT1 is in single ended mode. + - wlf,lineout2-se : If present LINEOUT2 is in single ended mode. + + - wlf,lineout1-feedback : If present LINEOUT1 has common mode feedback + connected. + - wlf,lineout2-feedback : If present LINEOUT2 has common mode feedback + connected. + + - wlf,ldoena-always-driven : If present LDOENA is always driven. + Example: codec: wm8994@1a { compatible = "wlf,wm8994"; reg = <0x1a>; + + gpio-controller; + #gpio-cells = <2>; + + lineout1-se; + + AVDD2-supply = <®ulator>; + CPVDD-supply = <®ulator>; + DBVDD1-supply = <®ulator>; + DBVDD2-supply = <®ulator>; + DBVDD3-supply = <®ulator>; + SPKVDD1-supply = <®ulator>; + SPKVDD2-supply = <®ulator>; }; diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9653cf2f972..c3bfacb9291 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1277,6 +1277,20 @@ bytes respectively. Such letter suffixes can also be entirely omitted. iucv= [HW,NET] + ivrs_ioapic [HW,X86_64] + Provide an override to the IOAPIC-ID<->DEVICE-ID + mapping provided in the IVRS ACPI table. For + example, to map IOAPIC-ID decimal 10 to + PCI device 00:14.0 write the parameter as: + ivrs_ioapic[10]=00:14.0 + + ivrs_hpet [HW,X86_64] + Provide an override to the HPET-ID<->DEVICE-ID + mapping provided in the IVRS ACPI table. For + example, to map HPET-ID decimal 0 to + PCI device 00:14.0 write the parameter as: + ivrs_hpet[0]=00:14.0 + js= [HW,JOY] Analog joystick See Documentation/input/joystick.txt. @@ -1964,6 +1978,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Valid arguments: on, off Default: on + nohz_full= [KNL,BOOT] + In kernels built with CONFIG_NO_HZ_FULL=y, set + the specified list of CPUs whose tick will be stopped + whenever possible. The boot CPU will be forced outside + the range to maintain the timekeeping. + The CPUs in this range must also be included in the + rcu_nocbs= set. + noiotrap [SH] Disables trapped I/O port accesses. noirqdebug [X86-32] Disables the code which attempts to detect and diff --git a/Documentation/leds/00-INDEX b/Documentation/leds/00-INDEX index 5246090ef15..1ecd1596633 100644 --- a/Documentation/leds/00-INDEX +++ b/Documentation/leds/00-INDEX @@ -6,6 +6,8 @@ leds-lp5521.txt - notes on how to use the leds-lp5521 driver. leds-lp5523.txt - notes on how to use the leds-lp5523 driver. +leds-lp5562.txt + - notes on how to use the leds-lp5562 driver. leds-lp55xx.txt - description about lp55xx common driver. leds-lm3556.txt diff --git a/Documentation/leds/leds-lp5521.txt b/Documentation/leds/leds-lp5521.txt index 270f5719633..79e4c2e6e5e 100644 --- a/Documentation/leds/leds-lp5521.txt +++ b/Documentation/leds/leds-lp5521.txt @@ -81,22 +81,3 @@ static struct lp55xx_platform_data lp5521_platform_data = { If the current is set to 0 in the platform data, that channel is disabled and it is not visible in the sysfs. - -The 'update_config' : CONFIG register (ADDR 08h) -This value is platform-specific data. -If update_config is not defined, the CONFIG register is set with -'LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT'. -(Enable auto-powersave, set charge pump to auto, red to battery) - -example of update_config : - -#define LP5521_CONFIGS (LP5521_PWM_HF | LP5521_PWRSAVE_EN | \ - LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT | \ - LP5521_CLK_INT) - -static struct lp55xx_platform_data lp5521_pdata = { - .led_config = lp5521_led_config, - .num_channels = ARRAY_SIZE(lp5521_led_config), - .clock_mode = LP55XX_CLOCK_INT, - .update_config = LP5521_CONFIGS, -}; diff --git a/Documentation/leds/leds-lp5562.txt b/Documentation/leds/leds-lp5562.txt new file mode 100644 index 00000000000..5a823ff6b39 --- /dev/null +++ b/Documentation/leds/leds-lp5562.txt @@ -0,0 +1,120 @@ +Kernel driver for LP5562 +======================== + +* TI LP5562 LED Driver + +Author: Milo(Woogyom) Kim <milo.kim@ti.com> + +Description + + LP5562 can drive up to 4 channels. R/G/B and White. + LEDs can be controlled directly via the led class control interface. + + All four channels can be also controlled using the engine micro programs. + LP5562 has the internal program memory for running various LED patterns. + For the details, please refer to 'firmware' section in leds-lp55xx.txt + +Device attribute: engine_mux + + 3 Engines are allocated in LP5562, but the number of channel is 4. + Therefore each channel should be mapped to the engine number. + Value : RGB or W + + This attribute is used for programming LED data with the firmware interface. + Unlike the LP5521/LP5523/55231, LP5562 has unique feature for the engine mux, + so additional sysfs is required. + + LED Map + Red ... Engine 1 (fixed) + Green ... Engine 2 (fixed) + Blue ... Engine 3 (fixed) + White ... Engine 1 or 2 or 3 (selective) + +How to load the program data using engine_mux + + Before loading the LP5562 program data, engine_mux should be written between + the engine selection and loading the firmware. + Engine mux has two different mode, RGB and W. + RGB is used for loading RGB program data, W is used for W program data. + + For example, run blinking green channel pattern, + echo 2 > /sys/bus/i2c/devices/xxxx/select_engine # 2 is for green channel + echo "RGB" > /sys/bus/i2c/devices/xxxx/engine_mux # engine mux for RGB + echo 1 > /sys/class/firmware/lp5562/loading + echo "4000600040FF6000" > /sys/class/firmware/lp5562/data + echo 0 > /sys/class/firmware/lp5562/loading + echo 1 > /sys/bus/i2c/devices/xxxx/run_engine + + To run a blinking white pattern, + echo 1 or 2 or 3 > /sys/bus/i2c/devices/xxxx/select_engine + echo "W" > /sys/bus/i2c/devices/xxxx/engine_mux + echo 1 > /sys/class/firmware/lp5562/loading + echo "4000600040FF6000" > /sys/class/firmware/lp5562/data + echo 0 > /sys/class/firmware/lp5562/loading + echo 1 > /sys/bus/i2c/devices/xxxx/run_engine + +How to load the predefined patterns + + Please refer to 'leds-lp55xx.txt" + +Setting Current of Each Channel + + Like LP5521 and LP5523/55231, LP5562 provides LED current settings. + The 'led_current' and 'max_current' are used. + +(Example of Platform data) + +To configure the platform specific data, lp55xx_platform_data structure is used. + +static struct lp55xx_led_config lp5562_led_config[] = { + { + .name = "R", + .chan_nr = 0, + .led_current = 20, + .max_current = 40, + }, + { + .name = "G", + .chan_nr = 1, + .led_current = 20, + .max_current = 40, + }, + { + .name = "B", + .chan_nr = 2, + .led_current = 20, + .max_current = 40, + }, + { + .name = "W", + .chan_nr = 3, + .led_current = 20, + .max_current = 40, + }, +}; + +static int lp5562_setup(void) +{ + /* setup HW resources */ +} + +static void lp5562_release(void) +{ + /* Release HW resources */ +} + +static void lp5562_enable(bool state) +{ + /* Control of chip enable signal */ +} + +static struct lp55xx_platform_data lp5562_platform_data = { + .led_config = lp5562_led_config, + .num_channels = ARRAY_SIZE(lp5562_led_config), + .setup_resources = lp5562_setup, + .release_resources = lp5562_release, + .enable = lp5562_enable, +}; + +If the current is set to 0 in the platform data, that channel is +disabled and it is not visible in the sysfs. diff --git a/Documentation/leds/leds-lp55xx.txt b/Documentation/leds/leds-lp55xx.txt index ced41868d2d..eec8fa2ffe4 100644 --- a/Documentation/leds/leds-lp55xx.txt +++ b/Documentation/leds/leds-lp55xx.txt @@ -5,7 +5,7 @@ Authors: Milo(Woogyom) Kim <milo.kim@ti.com> Description ----------- -LP5521, LP5523/55231 have common features as below. +LP5521, LP5523/55231 and LP5562 have common features as below. Register access via the I2C Device initialization/deinitialization @@ -116,3 +116,47 @@ To support this, 'run_engine' and 'firmware_cb' are configurable in each driver. run_engine : Control the selected engine firmware_cb : The callback function after loading the firmware is done. Chip specific commands for loading and updating program memory. + +( Predefined pattern data ) + +Without the firmware interface, LP55xx driver provides another method for +loading a LED pattern. That is 'predefined' pattern. +A predefined pattern is defined in the platform data and load it(or them) +via the sysfs if needed. +To use the predefined pattern concept, 'patterns' and 'num_patterns' should be +configured. + + Example of predefined pattern data: + + /* mode_1: blinking data */ + static const u8 mode_1[] = { + 0x40, 0x00, 0x60, 0x00, 0x40, 0xFF, 0x60, 0x00, + }; + + /* mode_2: always on */ + static const u8 mode_2[] = { 0x40, 0xFF, }; + + struct lp55xx_predef_pattern board_led_patterns[] = { + { + .r = mode_1, + .size_r = ARRAY_SIZE(mode_1), + }, + { + .b = mode_2, + .size_b = ARRAY_SIZE(mode_2), + }, + } + + struct lp55xx_platform_data lp5562_pdata = { + ... + .patterns = board_led_patterns, + .num_patterns = ARRAY_SIZE(board_led_patterns), + }; + +Then, mode_1 and mode_2 can be run via through the sysfs. + + echo 1 > /sys/bus/i2c/devices/xxxx/led_pattern # red blinking LED pattern + echo 2 > /sys/bus/i2c/devices/xxxx/led_pattern # blue LED always on + +To stop running pattern, + echo 0 > /sys/bus/i2c/devices/xxxx/led_pattern diff --git a/Documentation/s390/CommonIO b/Documentation/s390/CommonIO index d378cba6645..6e0f63f343b 100644 --- a/Documentation/s390/CommonIO +++ b/Documentation/s390/CommonIO @@ -8,9 +8,9 @@ Command line parameters Enable logging of debug information in case of ccw device timeouts. -* cio_ignore = {all} | - {<device> | <range of devices>} | - {!<device> | !<range of devices>} +* cio_ignore = device[,device[,..]] + + device := {all | [!]ipldev | [!]condev | [!]<devno> | [!]<devno>-<devno>} The given devices will be ignored by the common I/O-layer; no detection and device sensing will be done on any of those devices. The subchannel to @@ -24,8 +24,10 @@ Command line parameters device numbers (0xabcd or abcd, for 2.4 backward compatibility). If you give a device number 0xabcd, it will be interpreted as 0.0.abcd. - You can use the 'all' keyword to ignore all devices. - The '!' operator will cause the I/O-layer to _not_ ignore a device. + You can use the 'all' keyword to ignore all devices. The 'ipldev' and 'condev' + keywords can be used to refer to the CCW based boot device and CCW console + device respectively (these are probably useful only when combined with the '!' + operator). The '!' operator will cause the I/O-layer to _not_ ignore a device. The command line is parsed from left to right. For example, diff --git a/Documentation/timers/NO_HZ.txt b/Documentation/timers/NO_HZ.txt new file mode 100644 index 00000000000..5b532202406 --- /dev/null +++ b/Documentation/timers/NO_HZ.txt @@ -0,0 +1,273 @@ + NO_HZ: Reducing Scheduling-Clock Ticks + + +This document describes Kconfig options and boot parameters that can +reduce the number of scheduling-clock interrupts, thereby improving energy +efficiency and reducing OS jitter. Reducing OS jitter is important for +some types of computationally intensive high-performance computing (HPC) +applications and for real-time applications. + +There are two main contexts in which the number of scheduling-clock +interrupts can be reduced compared to the old-school approach of sending +a scheduling-clock interrupt to all CPUs every jiffy whether they need +it or not (CONFIG_HZ_PERIODIC=y or CONFIG_NO_HZ=n for older kernels): + +1. Idle CPUs (CONFIG_NO_HZ_IDLE=y or CONFIG_NO_HZ=y for older kernels). + +2. CPUs having only one runnable task (CONFIG_NO_HZ_FULL=y). + +These two cases are described in the following two sections, followed +by a third section on RCU-specific considerations and a fourth and final +section listing known issues. + + +IDLE CPUs + +If a CPU is idle, there is little point in sending it a scheduling-clock +interrupt. After all, the primary purpose of a scheduling-clock interrupt +is to force a busy CPU to shift its attention among multiple duties, +and an idle CPU has no duties to shift its attention among. + +The CONFIG_NO_HZ_IDLE=y Kconfig option causes the kernel to avoid sending +scheduling-clock interrupts to idle CPUs, which is critically important +both to battery-powered devices and to highly virtualized mainframes. +A battery-powered device running a CONFIG_HZ_PERIODIC=y kernel would +drain its battery very quickly, easily 2-3 times as fast as would the +same device running a CONFIG_NO_HZ_IDLE=y kernel. A mainframe running +1,500 OS instances might find that half of its CPU time was consumed by +unnecessary scheduling-clock interrupts. In these situations, there +is strong motivation to avoid sending scheduling-clock interrupts to +idle CPUs. That said, dyntick-idle mode is not free: + +1. It increases the number of instructions executed on the path + to and from the idle loop. + +2. On many architectures, dyntick-idle mode also increases the + number of expensive clock-reprogramming operations. + +Therefore, systems with aggressive real-time response constraints often +run CONFIG_HZ_PERIODIC=y kernels (or CONFIG_NO_HZ=n for older kernels) +in order to avoid degrading from-idle transition latencies. + +An idle CPU that is not receiving scheduling-clock interrupts is said to +be "dyntick-idle", "in dyntick-idle mode", "in nohz mode", or "running +tickless". The remainder of this document will use "dyntick-idle mode". + +There is also a boot parameter "nohz=" that can be used to disable +dyntick-idle mode in CONFIG_NO_HZ_IDLE=y kernels by specifying "nohz=off". +By default, CONFIG_NO_HZ_IDLE=y kernels boot with "nohz=on", enabling +dyntick-idle mode. + + +CPUs WITH ONLY ONE RUNNABLE TASK + +If a CPU has only one runnable task, there is little point in sending it +a scheduling-clock interrupt because there is no other task to switch to. + +The CONFIG_NO_HZ_FULL=y Kconfig option causes the kernel to avoid +sending scheduling-clock interrupts to CPUs with a single runnable task, +and such CPUs are said to be "adaptive-ticks CPUs". This is important +for applications with aggressive real-time response constraints because +it allows them to improve their worst-case response times by the maximum +duration of a scheduling-clock interrupt. It is also important for +computationally intensive short-iteration workloads: If any CPU is +delayed during a given iteration, all the other CPUs will be forced to +wait idle while the delayed CPU finishes. Thus, the delay is multiplied +by one less than the number of CPUs. In these situations, there is +again strong motivation to avoid sending scheduling-clock interrupts. + +By default, no CPU will be an adaptive-ticks CPU. The "nohz_full=" +boot parameter specifies the adaptive-ticks CPUs. For example, +"nohz_full=1,6-8" says that CPUs 1, 6, 7, and 8 are to be adaptive-ticks +CPUs. Note that you are prohibited from marking all of the CPUs as +adaptive-tick CPUs: At least one non-adaptive-tick CPU must remain +online to handle timekeeping tasks in order to ensure that system calls +like gettimeofday() returns accurate values on adaptive-tick CPUs. +(This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no +running user processes to observe slight drifts in clock rate.) +Therefore, the boot CPU is prohibited from entering adaptive-ticks +mode. Specifying a "nohz_full=" mask that includes the boot CPU will +result in a boot-time error message, and the boot CPU will be removed +from the mask. + +Alternatively, the CONFIG_NO_HZ_FULL_ALL=y Kconfig parameter specifies +that all CPUs other than the boot CPU are adaptive-ticks CPUs. This +Kconfig parameter will be overridden by the "nohz_full=" boot parameter, +so that if both the CONFIG_NO_HZ_FULL_ALL=y Kconfig parameter and +the "nohz_full=1" boot parameter is specified, the boot parameter will +prevail so that only CPU 1 will be an adaptive-ticks CPU. + +Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded. +This is covered in the "RCU IMPLICATIONS" section below. + +Normally, a CPU remains in adaptive-ticks mode as long as possible. +In particular, transitioning to kernel mode does not automatically change +the mode. Instead, the CPU will exit adaptive-ticks mode only if needed, +for example, if that CPU enqueues an RCU callback. + +Just as with dyntick-idle mode, the benefits of adaptive-tick mode do +not come for free: + +1. CONFIG_NO_HZ_FULL selects CONFIG_NO_HZ_COMMON, so you cannot run + adaptive ticks without also running dyntick idle. This dependency + extends down into the implementation, so that all of the costs + of CONFIG_NO_HZ_IDLE are also incurred by CONFIG_NO_HZ_FULL. + +2. The user/kernel transitions are slightly more expensive due + to the need to inform kernel subsystems (such as RCU) about + the change in mode. + +3. POSIX CPU timers on adaptive-tick CPUs may miss their deadlines + (perhaps indefinitely) because they currently rely on + scheduling-tick interrupts. This will likely be fixed in + one of two ways: (1) Prevent CPUs with POSIX CPU timers from + entering adaptive-tick mode, or (2) Use hrtimers or other + adaptive-ticks-immune mechanism to cause the POSIX CPU timer to + fire properly. + +4. If there are more perf events pending than the hardware can + accommodate, they are normally round-robined so as to collect + all of them over time. Adaptive-tick mode may prevent this + round-robining from happening. This will likely be fixed by + preventing CPUs with large numbers of perf events pending from + entering adaptive-tick mode. + +5. Scheduler statistics for adaptive-tick CPUs may be computed + slightly differently than those for non-adaptive-tick CPUs. + This might in turn perturb load-balancing of real-time tasks. + +6. The LB_BIAS scheduler feature is disabled by adaptive ticks. + +Although improvements are expected over time, adaptive ticks is quite +useful for many types of real-time and compute-intensive applications. +However, the drawbacks listed above mean that adaptive ticks should not +(yet) be enabled by default. + + +RCU IMPLICATIONS + +There are situations in which idle CPUs cannot be permitted to +enter either dyntick-idle mode or adaptive-tick mode, the most +common being when that CPU has RCU callbacks pending. + +The CONFIG_RCU_FAST_NO_HZ=y Kconfig option may be used to cause such CPUs +to enter dyntick-idle mode or adaptive-tick mode anyway. In this case, +a timer will awaken these CPUs every four jiffies in order to ensure +that the RCU callbacks are processed in a timely fashion. + +Another approach is to offload RCU callback processing to "rcuo" kthreads +using the CONFIG_RCU_NOCB_CPU=y Kconfig option. The specific CPUs to +offload may be selected via several methods: + +1. One of three mutually exclusive Kconfig options specify a + build-time default for the CPUs to offload: + + a. The CONFIG_RCU_NOCB_CPU_NONE=y Kconfig option results in + no CPUs being offloaded. + + b. The CONFIG_RCU_NOCB_CPU_ZERO=y Kconfig option causes + CPU 0 to be offloaded. + + c. The CONFIG_RCU_NOCB_CPU_ALL=y Kconfig option causes all + CPUs to be offloaded. Note that the callbacks will be + offloaded to "rcuo" kthreads, and that those kthreads + will in fact run on some CPU. However, this approach + gives fine-grained control on exactly which CPUs the + callbacks run on, along with their scheduling priority + (including the default of SCHED_OTHER), and it further + allows this control to be varied dynamically at runtime. + +2. The "rcu_nocbs=" kernel boot parameter, which takes a comma-separated + list of CPUs and CPU ranges, for example, "1,3-5" selects CPUs 1, + 3, 4, and 5. The specified CPUs will be offloaded in addition to + any CPUs specified as offloaded by CONFIG_RCU_NOCB_CPU_ZERO=y or + CONFIG_RCU_NOCB_CPU_ALL=y. This means that the "rcu_nocbs=" boot + parameter has no effect for kernels built with RCU_NOCB_CPU_ALL=y. + +The offloaded CPUs will never queue RCU callbacks, and therefore RCU +never prevents offloaded CPUs from entering either dyntick-idle mode +or adaptive-tick mode. That said, note that it is up to userspace to +pin the "rcuo" kthreads to specific CPUs if desired. Otherwise, the +scheduler will decide where to run them, which might or might not be +where you want them to run. + + +KNOWN ISSUES + +o Dyntick-idle slows transitions to and from idle slightly. + In practice, this has not been a problem except for the most + aggressive real-time workloads, which have the option of disabling + dyntick-idle mode, an option that most of them take. However, + some workloads will no doubt want to use adaptive ticks to + eliminate scheduling-clock interrupt latencies. Here are some + options for these workloads: + + a. Use PMQOS from userspace to inform the kernel of your + latency requirements (preferred). + + b. On x86 systems, use the "idle=mwait" boot parameter. + + c. On x86 systems, use the "intel_idle.max_cstate=" to limit + ` the maximum C-state depth. + + d. On x86 systems, use the "idle=poll" boot parameter. + However, please note that use of this parameter can cause + your CPU to overheat, which may cause thermal throttling + to degrade your latencies -- and that this degradation can + be even worse than that of dyntick-idle. Furthermore, + this parameter effectively disables Turbo Mode on Intel + CPUs, which can significantly reduce maximum performance. + +o Adaptive-ticks slows user/kernel transitions slightly. + This is not expected to be a problem for computationally intensive + workloads, which have few such transitions. Careful benchmarking + will be required to determine whether or not other workloads + are significantly affected by this effect. + +o Adaptive-ticks does not do anything unless there is only one + runnable task for a given CPU, even though there are a number + of other situations where the scheduling-clock tick is not + needed. To give but one example, consider a CPU that has one + runnable high-priority SCHED_FIFO task and an arbitrary number + of low-priority SCHED_OTHER tasks. In this case, the CPU is + required to run the SCHED_FIFO task until it either blocks or + some other higher-priority task awakens on (or is assigned to) + this CPU, so there is no point in sending a scheduling-clock + interrupt to this CPU. However, the current implementation + nevertheless sends scheduling-clock interrupts to CPUs having a + single runnable SCHED_FIFO task and multiple runnable SCHED_OTHER + tasks, even though these interrupts are unnecessary. + + Better handling of these sorts of situations is future work. + +o A reboot is required to reconfigure both adaptive idle and RCU + callback offloading. Runtime reconfiguration could be provided + if needed, however, due to the complexity of reconfiguring RCU at + runtime, there would need to be an earthshakingly good reason. + Especially given that you have the straightforward option of + simply offloading RCU callbacks from all CPUs and pinning them + where you want them whenever you want them pinned. + +o Additional configuration is required to deal with other sources + of OS jitter, including interrupts and system-utility tasks + and processes. This configuration normally involves binding + interrupts and tasks to particular CPUs. + +o Some sources of OS jitter can currently be eliminated only by + constraining the workload. For example, the only way to eliminate + OS jitter due to global TLB shootdowns is to avoid the unmapping + operations (such as kernel module unload operations) that + result in these shootdowns. For another example, page faults + and TLB misses can be reduced (and in some cases eliminated) by + using huge pages and by constraining the amount of memory used + by the application. Pre-faulting the working set can also be + helpful, especially when combined with the mlock() and mlockall() + system calls. + +o Unless all CPUs are idle, at least one CPU must keep the + scheduling-clock interrupt going in order to support accurate + timekeeping. + +o If there are adaptive-ticks CPUs, there will be at least one + CPU keeping the scheduling-clock interrupt going, even if all + CPUs are otherwise idle. diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 119358dfb74..5f91eda9164 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1486,15 +1486,23 @@ struct kvm_ioeventfd { __u8 pad[36]; }; +For the special case of virtio-ccw devices on s390, the ioevent is matched +to a subchannel/virtqueue tuple instead. + The following flags are defined: #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch) #define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio) #define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) +#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \ + (1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify) If datamatch flag is set, the event will be signaled only if the written value to the registered address is equal to datamatch in struct kvm_ioeventfd. +For virtio-ccw devices, addr contains the subchannel id and datamatch the +virtqueue index. + 4.60 KVM_DIRTY_TLB @@ -1780,27 +1788,48 @@ registers, find a list below: PPC | KVM_REG_PPC_VPA_DTL | 128 PPC | KVM_REG_PPC_EPCR | 32 PPC | KVM_REG_PPC_EPR | 32 + PPC | KVM_REG_PPC_TCR | 32 + PPC | KVM_REG_PPC_TSR | 32 + PPC | KVM_REG_PPC_OR_TSR | 32 + PPC | KVM_REG_PPC_CLEAR_TSR | 32 + PPC | KVM_REG_PPC_MAS0 | 32 + PPC | KVM_REG_PPC_MAS1 | 32 + PPC | KVM_REG_PPC_MAS2 | 64 + PPC | KVM_REG_PPC_MAS7_3 | 64 + PPC | KVM_REG_PPC_MAS4 | 32 + PPC | KVM_REG_PPC_MAS6 | 32 + PPC | KVM_REG_PPC_MMUCFG | 32 + PPC | KVM_REG_PPC_TLB0CFG | 32 + PPC | KVM_REG_PPC_TLB1CFG | 32 + PPC | KVM_REG_PPC_TLB2CFG | 32 + PPC | KVM_REG_PPC_TLB3CFG | 32 + PPC | KVM_REG_PPC_TLB0PS | 32 + PPC | KVM_REG_PPC_TLB1PS | 32 + PPC | KVM_REG_PPC_TLB2PS | 32 + PPC | KVM_REG_PPC_TLB3PS | 32 + PPC | KVM_REG_PPC_EPTCFG | 32 + PPC | KVM_REG_PPC_ICP_STATE | 64 ARM registers are mapped using the lower 32 bits. The upper 16 of that is the register group type, or coprocessor number: ARM core registers have the following id bit patterns: - 0x4002 0000 0010 <index into the kvm_regs struct:16> + 0x4020 0000 0010 <index into the kvm_regs struct:16> ARM 32-bit CP15 registers have the following id bit patterns: - 0x4002 0000 000F <zero:1> <crn:4> <crm:4> <opc1:4> <opc2:3> + 0x4020 0000 000F <zero:1> <crn:4> <crm:4> <opc1:4> <opc2:3> ARM 64-bit CP15 registers have the following id bit patterns: - 0x4003 0000 000F <zero:1> <zero:4> <crm:4> <opc1:4> <zero:3> + 0x4030 0000 000F <zero:1> <zero:4> <crm:4> <opc1:4> <zero:3> ARM CCSIDR registers are demultiplexed by CSSELR value: - 0x4002 0000 0011 00 <csselr:8> + 0x4020 0000 0011 00 <csselr:8> ARM 32-bit VFP control registers have the following id bit patterns: - 0x4002 0000 0012 1 <regno:12> + 0x4020 0000 0012 1 <regno:12> ARM 64-bit FP registers have the following id bit patterns: - 0x4002 0000 0012 0 <regno:12> + 0x4030 0000 0012 0 <regno:12> 4.69 KVM_GET_ONE_REG @@ -2161,6 +2190,76 @@ header; first `n_valid' valid entries with contents from the data written, then `n_invalid' invalid entries, invalidating any previously valid entries found. +4.79 KVM_CREATE_DEVICE + +Capability: KVM_CAP_DEVICE_CTRL +Type: vm ioctl +Parameters: struct kvm_create_device (in/out) +Returns: 0 on success, -1 on error +Errors: + ENODEV: The device type is unknown or unsupported + EEXIST: Device already created, and this type of device may not + be instantiated multiple times + + Other error conditions may be defined by individual device types or + have their standard meanings. + +Creates an emulated device in the kernel. The file descriptor returned +in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR. + +If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the +device type is supported (not necessarily whether it can be created +in the current vm). + +Individual devices should not define flags. Attributes should be used +for specifying any behavior that is not implied by the device type +number. + +struct kvm_create_device { + __u32 type; /* in: KVM_DEV_TYPE_xxx */ + __u32 fd; /* out: device handle */ + __u32 flags; /* in: KVM_CREATE_DEVICE_xxx */ +}; + +4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR + +Capability: KVM_CAP_DEVICE_CTRL +Type: device ioctl +Parameters: struct kvm_device_attr +Returns: 0 on success, -1 on error +Errors: + ENXIO: The group or attribute is unknown/unsupported for this device + EPERM: The attribute cannot (currently) be accessed this way + (e.g. read-only attribute, or attribute that only makes + sense when the device is in a different state) + + Other error conditions may be defined by individual device types. + +Gets/sets a specified piece of device configuration and/or state. The +semantics are device-specific. See individual device documentation in +the "devices" directory. As with ONE_REG, the size of the data +transferred is defined by the particular attribute. + +struct kvm_device_attr { + __u32 flags; /* no flags currently defined */ + __u32 group; /* device-defined */ + __u64 attr; /* group-defined */ + __u64 addr; /* userspace address of attr data */ +}; + +4.81 KVM_HAS_DEVICE_ATTR + +Capability: KVM_CAP_DEVICE_CTRL +Type: device ioctl +Parameters: struct kvm_device_attr +Returns: 0 on success, -1 on error +Errors: + ENXIO: The group or attribute is unknown/unsupported for this device + +Tests whether a device supports a particular attribute. A successful +return indicates the attribute is implemented. It does not necessarily +indicate that the attribute can be read or written in the device's +current state. "addr" is ignored. 4.77 KVM_ARM_VCPU_INIT @@ -2243,6 +2342,25 @@ and distributor interface, the ioctl must be called after calling KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the base addresses will return -EEXIST. +4.82 KVM_PPC_RTAS_DEFINE_TOKEN + +Capability: KVM_CAP_PPC_RTAS +Architectures: ppc +Type: vm ioctl +Parameters: struct kvm_rtas_token_args +Returns: 0 on success, -1 on error + +Defines a token value for a RTAS (Run Time Abstraction Services) +service in order to allow it to be handled in the kernel. The +argument struct gives the name of the service, which must be the name +of a service that has a kernel-side implementation. If the token +value is non-zero, it will be associated with that service, and +subsequent RTAS calls by the guest specifying that token will be +handled by the kernel. If the token value is 0, then any token +associated with the service will be forgotten, and subsequent RTAS +calls by the guest for that service will be passed to userspace to be +handled. + 5. The kvm_run structure ------------------------ @@ -2646,3 +2764,19 @@ to receive the topmost interrupt vector. When disabled (args[0] == 0), behavior is as if this facility is unsupported. When this capability is enabled, KVM_EXIT_EPR can occur. + +6.6 KVM_CAP_IRQ_MPIC + +Architectures: ppc +Parameters: args[0] is the MPIC device fd + args[1] is the MPIC CPU number for this vcpu + +This capability connects the vcpu to an in-kernel MPIC device. + +6.7 KVM_CAP_IRQ_XICS + +Architectures: ppc +Parameters: args[0] is the XICS device fd + args[1] is the XICS CPU number (server ID) for this vcpu + +This capability connects the vcpu to an in-kernel XICS device. diff --git a/Documentation/virtual/kvm/devices/README b/Documentation/virtual/kvm/devices/README new file mode 100644 index 00000000000..34a69834124 --- /dev/null +++ b/Documentation/virtual/kvm/devices/README @@ -0,0 +1 @@ +This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL. diff --git a/Documentation/virtual/kvm/devices/mpic.txt b/Documentation/virtual/kvm/devices/mpic.txt new file mode 100644 index 00000000000..8257397adc3 --- /dev/null +++ b/Documentation/virtual/kvm/devices/mpic.txt @@ -0,0 +1,53 @@ +MPIC interrupt controller +========================= + +Device types supported: + KVM_DEV_TYPE_FSL_MPIC_20 Freescale MPIC v2.0 + KVM_DEV_TYPE_FSL_MPIC_42 Freescale MPIC v4.2 + +Only one MPIC instance, of any type, may be instantiated. The created +MPIC will act as the system interrupt controller, connecting to each +vcpu's interrupt inputs. + +Groups: + KVM_DEV_MPIC_GRP_MISC + Attributes: + KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit) + Base address of the 256 KiB MPIC register space. Must be + naturally aligned. A value of zero disables the mapping. + Reset value is zero. + + KVM_DEV_MPIC_GRP_REGISTER (rw, 32-bit) + Access an MPIC register, as if the access were made from the guest. + "attr" is the byte offset into the MPIC register space. Accesses + must be 4-byte aligned. + + MSIs may be signaled by using this attribute group to write + to the relevant MSIIR. + + KVM_DEV_MPIC_GRP_IRQ_ACTIVE (rw, 32-bit) + IRQ input line for each standard openpic source. 0 is inactive and 1 + is active, regardless of interrupt sense. + + For edge-triggered interrupts: Writing 1 is considered an activating + edge, and writing 0 is ignored. Reading returns 1 if a previously + signaled edge has not been acknowledged, and 0 otherwise. + + "attr" is the IRQ number. IRQ numbers for standard sources are the + byte offset of the relevant IVPR from EIVPR0, divided by 32. + +IRQ Routing: + + The MPIC emulation supports IRQ routing. Only a single MPIC device can + be instantiated. Once that device has been created, it's available as + irqchip id 0. + + This irqchip 0 has 256 interrupt pins, which expose the interrupts in + the main array of interrupt sources (a.k.a. "SRC" interrupts). + + The numbering is the same as the MPIC device tree binding -- based on + the register offset from the beginning of the sources array, without + regard to any subdivisions in chip documentation such as "internal" + or "external" interrupts. + + Access to non-SRC interrupts is not implemented through IRQ routing mechanisms. diff --git a/Documentation/virtual/kvm/devices/xics.txt b/Documentation/virtual/kvm/devices/xics.txt new file mode 100644 index 00000000000..42864935ac5 --- /dev/null +++ b/Documentation/virtual/kvm/devices/xics.txt @@ -0,0 +1,66 @@ +XICS interrupt controller + +Device type supported: KVM_DEV_TYPE_XICS + +Groups: + KVM_DEV_XICS_SOURCES + Attributes: One per interrupt source, indexed by the source number. + +This device emulates the XICS (eXternal Interrupt Controller +Specification) defined in PAPR. The XICS has a set of interrupt +sources, each identified by a 20-bit source number, and a set of +Interrupt Control Presentation (ICP) entities, also called "servers", +each associated with a virtual CPU. + +The ICP entities are created by enabling the KVM_CAP_IRQ_ARCH +capability for each vcpu, specifying KVM_CAP_IRQ_XICS in args[0] and +the interrupt server number (i.e. the vcpu number from the XICS's +point of view) in args[1] of the kvm_enable_cap struct. Each ICP has +64 bits of state which can be read and written using the +KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctls on the vcpu. The 64 bit +state word has the following bitfields, starting at the +least-significant end of the word: + +* Unused, 16 bits + +* Pending interrupt priority, 8 bits + Zero is the highest priority, 255 means no interrupt is pending. + +* Pending IPI (inter-processor interrupt) priority, 8 bits + Zero is the highest priority, 255 means no IPI is pending. + +* Pending interrupt source number, 24 bits + Zero means no interrupt pending, 2 means an IPI is pending + +* Current processor priority, 8 bits + Zero is the highest priority, meaning no interrupts can be + delivered, and 255 is the lowest priority. + +Each source has 64 bits of state that can be read and written using +the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the +KVM_DEV_XICS_SOURCES attribute group, with the attribute number being +the interrupt source number. The 64 bit state word has the following +bitfields, starting from the least-significant end of the word: + +* Destination (server number), 32 bits + This specifies where the interrupt should be sent, and is the + interrupt server number specified for the destination vcpu. + +* Priority, 8 bits + This is the priority specified for this interrupt source, where 0 is + the highest priority and 255 is the lowest. An interrupt with a + priority of 255 will never be delivered. + +* Level sensitive flag, 1 bit + This bit is 1 for a level-sensitive interrupt source, or 0 for + edge-sensitive (or MSI). + +* Masked flag, 1 bit + This bit is set to 1 if the interrupt is masked (cannot be delivered + regardless of its priority), for example by the ibm,int-off RTAS + call, or 0 if it is not masked. + +* Pending flag, 1 bit + This bit is 1 if the source has a pending interrupt, otherwise 0. + +Only one XICS instance may be created per VM. diff --git a/arch/arm/include/asm/idmap.h b/arch/arm/include/asm/idmap.h index 1a66f907e5c..bf863edb517 100644 --- a/arch/arm/include/asm/idmap.h +++ b/arch/arm/include/asm/idmap.h @@ -8,7 +8,6 @@ #define __idmap __section(.idmap.text) noinline notrace extern pgd_t *idmap_pgd; -extern pgd_t *hyp_pgd; void setup_mm_for_reboot(void); diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 0c4e643d939..57cb786a620 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -87,7 +87,7 @@ struct kvm_vcpu_fault_info { u32 hyp_pc; /* PC when exception was taken from Hyp mode */ }; -typedef struct vfp_hard_struct kvm_kernel_vfp_t; +typedef struct vfp_hard_struct kvm_cpu_context_t; struct kvm_vcpu_arch { struct kvm_regs regs; @@ -105,8 +105,10 @@ struct kvm_vcpu_arch { struct kvm_vcpu_fault_info fault; /* Floating point registers (VFP and Advanced SIMD/NEON) */ - kvm_kernel_vfp_t vfp_guest; - kvm_kernel_vfp_t *vfp_host; + struct vfp_hard_struct vfp_guest; + + /* Host FP context */ + kvm_cpu_context_t *host_cpu_context; /* VGIC state */ struct vgic_cpu vgic_cpu; @@ -188,23 +190,38 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); -static inline void __cpu_init_hyp_mode(unsigned long long pgd_ptr, +static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr, + unsigned long long pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) { - unsigned long pgd_low, pgd_high; - - pgd_low = (pgd_ptr & ((1ULL << 32) - 1)); - pgd_high = (pgd_ptr >> 32ULL); - /* - * Call initialization code, and switch to the full blown - * HYP code. The init code doesn't need to preserve these registers as - * r1-r3 and r12 are already callee save according to the AAPCS. - * Note that we slightly misuse the prototype by casing the pgd_low to - * a void *. + * Call initialization code, and switch to the full blown HYP + * code. The init code doesn't need to preserve these + * registers as r0-r3 are already callee saved according to + * the AAPCS. + * Note that we slightly misuse the prototype by casing the + * stack pointer to a void *. + * + * We don't have enough registers to perform the full init in + * one go. Install the boot PGD first, and then install the + * runtime PGD, stack pointer and vectors. The PGDs are always + * passed as the third argument, in order to be passed into + * r2-r3 to the init code (yes, this is compliant with the + * PCS!). */ - kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr); + + kvm_call_hyp(NULL, 0, boot_pgd_ptr); + + kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); } +static inline int kvm_arch_dev_ioctl_check_extension(long ext) +{ + return 0; +} + +int kvm_perf_init(void); +int kvm_perf_teardown(void); + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 970f3b5fa10..472ac709100 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -19,21 +19,33 @@ #ifndef __ARM_KVM_MMU_H__ #define __ARM_KVM_MMU_H__ -#include <asm/cacheflush.h> -#include <asm/pgalloc.h> -#include <asm/idmap.h> +#include <asm/memory.h> +#include <asm/page.h> /* * We directly use the kernel VA for the HYP, as we can directly share * the mapping (HTTBR "covers" TTBR1). */ -#define HYP_PAGE_OFFSET_MASK (~0UL) +#define HYP_PAGE_OFFSET_MASK UL(~0) #define HYP_PAGE_OFFSET PAGE_OFFSET #define KERN_TO_HYP(kva) (kva) +/* + * Our virtual mapping for the boot-time MMU-enable code. Must be + * shared across all the page-tables. Conveniently, we use the vectors + * page, where no kernel data will ever be shared with HYP. + */ +#define TRAMPOLINE_VA UL(CONFIG_VECTORS_BASE) + +#ifndef __ASSEMBLY__ + +#include <asm/cacheflush.h> +#include <asm/pgalloc.h> + int create_hyp_mappings(void *from, void *to); int create_hyp_io_mappings(void *from, void *to, phys_addr_t); -void free_hyp_pmds(void); +void free_boot_hyp_pgd(void); +void free_hyp_pgds(void); int kvm_alloc_stage2_pgd(struct kvm *kvm); void kvm_free_stage2_pgd(struct kvm *kvm); @@ -45,6 +57,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); +phys_addr_t kvm_mmu_get_boot_httbr(void); +phys_addr_t kvm_get_idmap_vector(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); @@ -114,4 +128,8 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) } } +#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) + +#endif /* !__ASSEMBLY__ */ + #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index a53efa99369..ee68cce6b48 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -158,7 +158,7 @@ int main(void) DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.midr)); DEFINE(VCPU_CP15, offsetof(struct kvm_vcpu, arch.cp15)); DEFINE(VCPU_VFP_GUEST, offsetof(struct kvm_vcpu, arch.vfp_guest)); - DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.vfp_host)); + DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.host_cpu_context)); DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs)); DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs)); DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs)); diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index b571484e9f0..a871b8e00fc 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -20,7 +20,7 @@ VMLINUX_SYMBOL(__idmap_text_start) = .; \ *(.idmap.text) \ VMLINUX_SYMBOL(__idmap_text_end) = .; \ - ALIGN_FUNCTION(); \ + . = ALIGN(32); \ VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ *(.hyp.idmap.text) \ VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; @@ -315,3 +315,8 @@ SECTIONS */ ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support") ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined") +/* + * The HYP init code can't be more than a page long. + * The above comment applies as well. + */ +ASSERT(((__hyp_idmap_text_end - __hyp_idmap_text_start) <= PAGE_SIZE), "HYP init code too big") diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 49dd64e579c..370e1a8af6a 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -41,9 +41,9 @@ config KVM_ARM_HOST Provides host support for ARM processors. config KVM_ARM_MAX_VCPUS - int "Number maximum supported virtual CPUs per VM" - depends on KVM_ARM_HOST - default 4 + int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST + default 4 if KVM_ARM_HOST + default 0 help Static number of max supported virtual CPUs per VM. diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 8dc5e76cb78..53c5ed83d16 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -18,6 +18,6 @@ kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o -obj-y += coproc.o coproc_a15.o mmio.o psci.o +obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c index 6ac938d4629..c55b6089e92 100644 --- a/arch/arm/kvm/arch_timer.c +++ b/arch/arm/kvm/arch_timer.c @@ -22,6 +22,7 @@ #include <linux/kvm_host.h> #include <linux/interrupt.h> +#include <clocksource/arm_arch_timer.h> #include <asm/arch_timer.h> #include <asm/kvm_vgic.h> @@ -64,7 +65,7 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - timer->cntv_ctl |= 1 << 1; /* Mask the interrupt in the guest */ + timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, vcpu->arch.timer_cpu.irq->irq, vcpu->arch.timer_cpu.irq->level); @@ -133,8 +134,8 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) cycle_t cval, now; u64 ns; - /* Check if the timer is enabled and unmasked first */ - if ((timer->cntv_ctl & 3) != 1) + if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || + !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) return; cval = timer->cntv_cval; diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index a0dfc2a53f9..37d216d814c 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -16,6 +16,7 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include <linux/cpu.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> @@ -48,7 +49,7 @@ __asm__(".arch_extension virt"); #endif static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); -static kvm_kernel_vfp_t __percpu *kvm_host_vfp_state; +static kvm_cpu_context_t __percpu *kvm_host_cpu_state; static unsigned long hyp_default_vectors; /* Per-CPU variable containing the currently running vcpu. */ @@ -206,7 +207,7 @@ int kvm_dev_ioctl_check_extension(long ext) r = KVM_MAX_VCPUS; break; default: - r = 0; + r = kvm_arch_dev_ioctl_check_extension(ext); break; } return r; @@ -218,27 +219,18 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -int kvm_arch_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - int user_alloc) -{ - return 0; -} - int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - bool user_alloc) + enum kvm_mr_change change) { return 0; } void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { } @@ -326,7 +318,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { vcpu->cpu = cpu; - vcpu->arch.vfp_host = this_cpu_ptr(kvm_host_vfp_state); + vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); /* * Check whether this vcpu requires the cache to be flushed on @@ -639,7 +631,8 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) return 0; } -int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level) +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, + bool line_status) { u32 irq = irq_level->irq; unsigned int irq_type, vcpu_idx, irq_num; @@ -794,30 +787,48 @@ long kvm_arch_vm_ioctl(struct file *filp, } } -static void cpu_init_hyp_mode(void *vector) +static void cpu_init_hyp_mode(void *dummy) { + unsigned long long boot_pgd_ptr; unsigned long long pgd_ptr; unsigned long hyp_stack_ptr; unsigned long stack_page; unsigned long vector_ptr; /* Switch from the HYP stub to our own HYP init vector */ - __hyp_set_vectors((unsigned long)vector); + __hyp_set_vectors(kvm_get_idmap_vector()); + boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr(); pgd_ptr = (unsigned long long)kvm_mmu_get_httbr(); stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); hyp_stack_ptr = stack_page + PAGE_SIZE; vector_ptr = (unsigned long)__kvm_hyp_vector; - __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); + __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); +} + +static int hyp_init_cpu_notify(struct notifier_block *self, + unsigned long action, void *cpu) +{ + switch (action) { + case CPU_STARTING: + case CPU_STARTING_FROZEN: + cpu_init_hyp_mode(NULL); + break; + } + + return NOTIFY_OK; } +static struct notifier_block hyp_init_cpu_nb = { + .notifier_call = hyp_init_cpu_notify, +}; + /** * Inits Hyp-mode on all online CPUs */ static int init_hyp_mode(void) { - phys_addr_t init_phys_addr; int cpu; int err = 0; @@ -850,24 +861,6 @@ static int init_hyp_mode(void) } /* - * Execute the init code on each CPU. - * - * Note: The stack is not mapped yet, so don't do anything else than - * initializing the hypervisor mode on each CPU using a local stack - * space for temporary storage. - */ - init_phys_addr = virt_to_phys(__kvm_hyp_init); - for_each_online_cpu(cpu) { - smp_call_function_single(cpu, cpu_init_hyp_mode, - (void *)(long)init_phys_addr, 1); - } - - /* - * Unmap the identity mapping - */ - kvm_clear_hyp_idmap(); - - /* * Map the Hyp-code called directly from the host */ err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end); @@ -890,33 +883,38 @@ static int init_hyp_mode(void) } /* - * Map the host VFP structures + * Map the host CPU structures */ - kvm_host_vfp_state = alloc_percpu(kvm_kernel_vfp_t); - if (!kvm_host_vfp_state) { + kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t); + if (!kvm_host_cpu_state) { err = -ENOMEM; - kvm_err("Cannot allocate host VFP state\n"); + kvm_err("Cannot allocate host CPU state\n"); goto out_free_mappings; } for_each_possible_cpu(cpu) { - kvm_kernel_vfp_t *vfp; + kvm_cpu_context_t *cpu_ctxt; - vfp = per_cpu_ptr(kvm_host_vfp_state, cpu); - err = create_hyp_mappings(vfp, vfp + 1); + cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu); + err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1); if (err) { - kvm_err("Cannot map host VFP state: %d\n", err); - goto out_free_vfp; + kvm_err("Cannot map host CPU state: %d\n", err); + goto out_free_context; } } /* + * Execute the init code on each CPU. + */ + on_each_cpu(cpu_init_hyp_mode, NULL, 1); + + /* * Init HYP view of VGIC */ err = kvm_vgic_hyp_init(); if (err) - goto out_free_vfp; + goto out_free_context; #ifdef CONFIG_KVM_ARM_VGIC vgic_present = true; @@ -929,12 +927,19 @@ static int init_hyp_mode(void) if (err) goto out_free_mappings; +#ifndef CONFIG_HOTPLUG_CPU + free_boot_hyp_pgd(); +#endif + + kvm_perf_init(); + kvm_info("Hyp mode initialized successfully\n"); + return 0; -out_free_vfp: - free_percpu(kvm_host_vfp_state); +out_free_context: + free_percpu(kvm_host_cpu_state); out_free_mappings: - free_hyp_pmds(); + free_hyp_pgds(); out_free_stack_pages: for_each_possible_cpu(cpu) free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); @@ -943,27 +948,42 @@ out_err: return err; } +static void check_kvm_target_cpu(void *ret) +{ + *(int *)ret = kvm_target_cpu(); +} + /** * Initialize Hyp-mode and memory mappings on all CPUs. */ int kvm_arch_init(void *opaque) { int err; + int ret, cpu; if (!is_hyp_mode_available()) { kvm_err("HYP mode not available\n"); return -ENODEV; } - if (kvm_target_cpu() < 0) { - kvm_err("Target CPU not supported!\n"); - return -ENODEV; + for_each_online_cpu(cpu) { + smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1); + if (ret < 0) { + kvm_err("Error, CPU %d not supported!\n", cpu); + return -ENODEV; + } } err = init_hyp_mode(); if (err) goto out_err; + err = register_cpu_notifier(&hyp_init_cpu_nb); + if (err) { + kvm_err("Cannot register HYP init CPU notifier (%d)\n", err); + goto out_err; + } + kvm_coproc_table_init(); return 0; out_err: @@ -973,6 +993,7 @@ out_err: /* NOP: Compiling as a module not supported */ void kvm_arch_exit(void) { + kvm_perf_teardown(); } static int arm_init(void) diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index 9f37a79b880..f048338135f 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S @@ -21,13 +21,33 @@ #include <asm/asm-offsets.h> #include <asm/kvm_asm.h> #include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> /******************************************************************** * Hypervisor initialization * - should be called with: - * r0,r1 = Hypervisor pgd pointer - * r2 = top of Hyp stack (kernel VA) - * r3 = pointer to hyp vectors + * r0 = top of Hyp stack (kernel VA) + * r1 = pointer to hyp vectors + * r2,r3 = Hypervisor pgd pointer + * + * The init scenario is: + * - We jump in HYP with four parameters: boot HYP pgd, runtime HYP pgd, + * runtime stack, runtime vectors + * - Enable the MMU with the boot pgd + * - Jump to a target into the trampoline page (remember, this is the same + * physical page!) + * - Now switch to the runtime pgd (same VA, and still the same physical + * page!) + * - Invalidate TLBs + * - Set stack and vectors + * - Profit! (or eret, if you only care about the code). + * + * As we only have four registers available to pass parameters (and we + * need six), we split the init in two phases: + * - Phase 1: r0 = 0, r1 = 0, r2,r3 contain the boot PGD. + * Provides the basic HYP init, and enable the MMU. + * - Phase 2: r0 = ToS, r1 = vectors, r2,r3 contain the runtime PGD. + * Switches to the runtime PGD, set stack and vectors. */ .text @@ -47,22 +67,25 @@ __kvm_hyp_init: W(b) . __do_hyp_init: + cmp r0, #0 @ We have a SP? + bne phase2 @ Yes, second stage init + @ Set the HTTBR to point to the hypervisor PGD pointer passed - mcrr p15, 4, r0, r1, c2 + mcrr p15, 4, r2, r3, c2 @ Set the HTCR and VTCR to the same shareability and cacheability @ settings as the non-secure TTBCR and with T0SZ == 0. mrc p15, 4, r0, c2, c0, 2 @ HTCR - ldr r12, =HTCR_MASK - bic r0, r0, r12 + ldr r2, =HTCR_MASK + bic r0, r0, r2 mrc p15, 0, r1, c2, c0, 2 @ TTBCR and r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ) orr r0, r0, r1 mcr p15, 4, r0, c2, c0, 2 @ HTCR mrc p15, 4, r1, c2, c1, 2 @ VTCR - ldr r12, =VTCR_MASK - bic r1, r1, r12 + ldr r2, =VTCR_MASK + bic r1, r1, r2 bic r0, r0, #(~VTCR_HTCR_SH) @ clear non-reusable HTCR bits orr r1, r0, r1 orr r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S) @@ -85,24 +108,41 @@ __do_hyp_init: @ - Memory alignment checks: enabled @ - MMU: enabled (this code must be run from an identity mapping) mrc p15, 4, r0, c1, c0, 0 @ HSCR - ldr r12, =HSCTLR_MASK - bic r0, r0, r12 + ldr r2, =HSCTLR_MASK + bic r0, r0, r2 mrc p15, 0, r1, c1, c0, 0 @ SCTLR - ldr r12, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C) - and r1, r1, r12 - ARM( ldr r12, =(HSCTLR_M | HSCTLR_A) ) - THUMB( ldr r12, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) ) - orr r1, r1, r12 + ldr r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C) + and r1, r1, r2 + ARM( ldr r2, =(HSCTLR_M | HSCTLR_A) ) + THUMB( ldr r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) ) + orr r1, r1, r2 orr r0, r0, r1 isb mcr p15, 4, r0, c1, c0, 0 @ HSCR - isb - @ Set stack pointer and return to the kernel - mov sp, r2 + @ End of init phase-1 + eret + +phase2: + @ Set stack pointer + mov sp, r0 @ Set HVBAR to point to the HYP vectors - mcr p15, 4, r3, c12, c0, 0 @ HVBAR + mcr p15, 4, r1, c12, c0, 0 @ HVBAR + + @ Jump to the trampoline page + ldr r0, =TRAMPOLINE_VA + adr r1, target + bfi r0, r1, #0, #PAGE_SHIFT + mov pc, r0 + +target: @ We're now in the trampoline code, switch page tables + mcrr p15, 4, r2, r3, c2 + isb + + @ Invalidate the old TLBs + mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH + dsb eret diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 2f12e405640..965706578f1 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -32,8 +32,15 @@ extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; +static pgd_t *boot_hyp_pgd; +static pgd_t *hyp_pgd; static DEFINE_MUTEX(kvm_hyp_pgd_mutex); +static void *init_bounce_page; +static unsigned long hyp_idmap_start; +static unsigned long hyp_idmap_end; +static phys_addr_t hyp_idmap_vector; + static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); @@ -71,172 +78,224 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) return p; } -static void free_ptes(pmd_t *pmd, unsigned long addr) +static void clear_pud_entry(pud_t *pud) { - pte_t *pte; - unsigned int i; + pmd_t *pmd_table = pmd_offset(pud, 0); + pud_clear(pud); + pmd_free(NULL, pmd_table); + put_page(virt_to_page(pud)); +} - for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) { - if (!pmd_none(*pmd) && pmd_table(*pmd)) { - pte = pte_offset_kernel(pmd, addr); - pte_free_kernel(NULL, pte); - } - pmd++; +static void clear_pmd_entry(pmd_t *pmd) +{ + pte_t *pte_table = pte_offset_kernel(pmd, 0); + pmd_clear(pmd); + pte_free_kernel(NULL, pte_table); + put_page(virt_to_page(pmd)); +} + +static bool pmd_empty(pmd_t *pmd) +{ + struct page *pmd_page = virt_to_page(pmd); + return page_count(pmd_page) == 1; +} + +static void clear_pte_entry(pte_t *pte) +{ + if (pte_present(*pte)) { + kvm_set_pte(pte, __pte(0)); + put_page(virt_to_page(pte)); } } -static void free_hyp_pgd_entry(unsigned long addr) +static bool pte_empty(pte_t *pte) +{ + struct page *pte_page = virt_to_page(pte); + return page_count(pte_page) == 1; +} + +static void unmap_range(pgd_t *pgdp, unsigned long long start, u64 size) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; - unsigned long hyp_addr = KERN_TO_HYP(addr); + pte_t *pte; + unsigned long long addr = start, end = start + size; + u64 range; + + while (addr < end) { + pgd = pgdp + pgd_index(addr); + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) { + addr += PUD_SIZE; + continue; + } - pgd = hyp_pgd + pgd_index(hyp_addr); - pud = pud_offset(pgd, hyp_addr); + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + addr += PMD_SIZE; + continue; + } - if (pud_none(*pud)) - return; - BUG_ON(pud_bad(*pud)); + pte = pte_offset_kernel(pmd, addr); + clear_pte_entry(pte); + range = PAGE_SIZE; - pmd = pmd_offset(pud, hyp_addr); - free_ptes(pmd, addr); - pmd_free(NULL, pmd); - pud_clear(pud); + /* If we emptied the pte, walk back up the ladder */ + if (pte_empty(pte)) { + clear_pmd_entry(pmd); + range = PMD_SIZE; + if (pmd_empty(pmd)) { + clear_pud_entry(pud); + range = PUD_SIZE; + } + } + + addr += range; + } } /** - * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables + * free_boot_hyp_pgd - free HYP boot page tables * - * Assumes this is a page table used strictly in Hyp-mode and therefore contains - * either mappings in the kernel memory area (above PAGE_OFFSET), or - * device mappings in the vmalloc range (from VMALLOC_START to VMALLOC_END). + * Free the HYP boot page tables. The bounce page is also freed. */ -void free_hyp_pmds(void) +void free_boot_hyp_pgd(void) { - unsigned long addr; - mutex_lock(&kvm_hyp_pgd_mutex); - for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) - free_hyp_pgd_entry(addr); - for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) - free_hyp_pgd_entry(addr); + + if (boot_hyp_pgd) { + unmap_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); + unmap_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); + kfree(boot_hyp_pgd); + boot_hyp_pgd = NULL; + } + + if (hyp_pgd) + unmap_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); + + kfree(init_bounce_page); + init_bounce_page = NULL; + mutex_unlock(&kvm_hyp_pgd_mutex); } -static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, - unsigned long end) +/** + * free_hyp_pgds - free Hyp-mode page tables + * + * Assumes hyp_pgd is a page table used strictly in Hyp-mode and + * therefore contains either mappings in the kernel memory area (above + * PAGE_OFFSET), or device mappings in the vmalloc range (from + * VMALLOC_START to VMALLOC_END). + * + * boot_hyp_pgd should only map two pages for the init code. + */ +void free_hyp_pgds(void) { - pte_t *pte; unsigned long addr; - struct page *page; - for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { - unsigned long hyp_addr = KERN_TO_HYP(addr); + free_boot_hyp_pgd(); + + mutex_lock(&kvm_hyp_pgd_mutex); - pte = pte_offset_kernel(pmd, hyp_addr); - BUG_ON(!virt_addr_valid(addr)); - page = virt_to_page(addr); - kvm_set_pte(pte, mk_pte(page, PAGE_HYP)); + if (hyp_pgd) { + for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) + unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); + for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) + unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); + kfree(hyp_pgd); + hyp_pgd = NULL; } + + mutex_unlock(&kvm_hyp_pgd_mutex); } -static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start, - unsigned long end, - unsigned long *pfn_base) +static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, + unsigned long end, unsigned long pfn, + pgprot_t prot) { pte_t *pte; unsigned long addr; - for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { - unsigned long hyp_addr = KERN_TO_HYP(addr); - - pte = pte_offset_kernel(pmd, hyp_addr); - BUG_ON(pfn_valid(*pfn_base)); - kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE)); - (*pfn_base)++; - } + addr = start; + do { + pte = pte_offset_kernel(pmd, addr); + kvm_set_pte(pte, pfn_pte(pfn, prot)); + get_page(virt_to_page(pte)); + kvm_flush_dcache_to_poc(pte, sizeof(*pte)); + pfn++; + } while (addr += PAGE_SIZE, addr != end); } static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, - unsigned long end, unsigned long *pfn_base) + unsigned long end, unsigned long pfn, + pgprot_t prot) { pmd_t *pmd; pte_t *pte; unsigned long addr, next; - for (addr = start; addr < end; addr = next) { - unsigned long hyp_addr = KERN_TO_HYP(addr); - pmd = pmd_offset(pud, hyp_addr); + addr = start; + do { + pmd = pmd_offset(pud, addr); BUG_ON(pmd_sect(*pmd)); if (pmd_none(*pmd)) { - pte = pte_alloc_one_kernel(NULL, hyp_addr); + pte = pte_alloc_one_kernel(NULL, addr); if (!pte) { kvm_err("Cannot allocate Hyp pte\n"); return -ENOMEM; } pmd_populate_kernel(NULL, pmd, pte); + get_page(virt_to_page(pmd)); + kvm_flush_dcache_to_poc(pmd, sizeof(*pmd)); } next = pmd_addr_end(addr, end); - /* - * If pfn_base is NULL, we map kernel pages into HYP with the - * virtual address. Otherwise, this is considered an I/O - * mapping and we map the physical region starting at - * *pfn_base to [start, end[. - */ - if (!pfn_base) - create_hyp_pte_mappings(pmd, addr, next); - else - create_hyp_io_pte_mappings(pmd, addr, next, pfn_base); - } + create_hyp_pte_mappings(pmd, addr, next, pfn, prot); + pfn += (next - addr) >> PAGE_SHIFT; + } while (addr = next, addr != end); return 0; } -static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base) +static int __create_hyp_mappings(pgd_t *pgdp, + unsigned long start, unsigned long end, + unsigned long pfn, pgprot_t prot) { - unsigned long start = (unsigned long)from; - unsigned long end = (unsigned long)to; pgd_t *pgd; pud_t *pud; pmd_t *pmd; unsigned long addr, next; int err = 0; - if (start >= end) - return -EINVAL; - /* Check for a valid kernel memory mapping */ - if (!pfn_base && (!virt_addr_valid(from) || !virt_addr_valid(to - 1))) - return -EINVAL; - /* Check for a valid kernel IO mapping */ - if (pfn_base && (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))) - return -EINVAL; - mutex_lock(&kvm_hyp_pgd_mutex); - for (addr = start; addr < end; addr = next) { - unsigned long hyp_addr = KERN_TO_HYP(addr); - pgd = hyp_pgd + pgd_index(hyp_addr); - pud = pud_offset(pgd, hyp_addr); + addr = start & PAGE_MASK; + end = PAGE_ALIGN(end); + do { + pgd = pgdp + pgd_index(addr); + pud = pud_offset(pgd, addr); if (pud_none_or_clear_bad(pud)) { - pmd = pmd_alloc_one(NULL, hyp_addr); + pmd = pmd_alloc_one(NULL, addr); if (!pmd) { kvm_err("Cannot allocate Hyp pmd\n"); err = -ENOMEM; goto out; } pud_populate(NULL, pud, pmd); + get_page(virt_to_page(pud)); + kvm_flush_dcache_to_poc(pud, sizeof(*pud)); } next = pgd_addr_end(addr, end); - err = create_hyp_pmd_mappings(pud, addr, next, pfn_base); + err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); if (err) goto out; - } + pfn += (next - addr) >> PAGE_SHIFT; + } while (addr = next, addr != end); out: mutex_unlock(&kvm_hyp_pgd_mutex); return err; @@ -250,27 +309,41 @@ out: * The same virtual address as the kernel virtual address is also used * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying * physical pages. - * - * Note: Wrapping around zero in the "to" address is not supported. */ int create_hyp_mappings(void *from, void *to) { - return __create_hyp_mappings(from, to, NULL); + unsigned long phys_addr = virt_to_phys(from); + unsigned long start = KERN_TO_HYP((unsigned long)from); + unsigned long end = KERN_TO_HYP((unsigned long)to); + + /* Check for a valid kernel memory mapping */ + if (!virt_addr_valid(from) || !virt_addr_valid(to - 1)) + return -EINVAL; + + return __create_hyp_mappings(hyp_pgd, start, end, + __phys_to_pfn(phys_addr), PAGE_HYP); } /** * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode * @from: The kernel start VA of the range * @to: The kernel end VA of the range (exclusive) - * @addr: The physical start address which gets mapped + * @phys_addr: The physical start address which gets mapped * * The resulting HYP VA is the same as the kernel VA, modulo * HYP_PAGE_OFFSET. */ -int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr) +int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) { - unsigned long pfn = __phys_to_pfn(addr); - return __create_hyp_mappings(from, to, &pfn); + unsigned long start = KERN_TO_HYP((unsigned long)from); + unsigned long end = KERN_TO_HYP((unsigned long)to); + + /* Check for a valid kernel IO mapping */ + if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) + return -EINVAL; + + return __create_hyp_mappings(hyp_pgd, start, end, + __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); } /** @@ -307,42 +380,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) return 0; } -static void clear_pud_entry(pud_t *pud) -{ - pmd_t *pmd_table = pmd_offset(pud, 0); - pud_clear(pud); - pmd_free(NULL, pmd_table); - put_page(virt_to_page(pud)); -} - -static void clear_pmd_entry(pmd_t *pmd) -{ - pte_t *pte_table = pte_offset_kernel(pmd, 0); - pmd_clear(pmd); - pte_free_kernel(NULL, pte_table); - put_page(virt_to_page(pmd)); -} - -static bool pmd_empty(pmd_t *pmd) -{ - struct page *pmd_page = virt_to_page(pmd); - return page_count(pmd_page) == 1; -} - -static void clear_pte_entry(pte_t *pte) -{ - if (pte_present(*pte)) { - kvm_set_pte(pte, __pte(0)); - put_page(virt_to_page(pte)); - } -} - -static bool pte_empty(pte_t *pte) -{ - struct page *pte_page = virt_to_page(pte); - return page_count(pte_page) == 1; -} - /** * unmap_stage2_range -- Clear stage2 page table entries to unmap a range * @kvm: The VM pointer @@ -356,43 +393,7 @@ static bool pte_empty(pte_t *pte) */ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - phys_addr_t addr = start, end = start + size; - u64 range; - - while (addr < end) { - pgd = kvm->arch.pgd + pgd_index(addr); - pud = pud_offset(pgd, addr); - if (pud_none(*pud)) { - addr += PUD_SIZE; - continue; - } - - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - addr += PMD_SIZE; - continue; - } - - pte = pte_offset_kernel(pmd, addr); - clear_pte_entry(pte); - range = PAGE_SIZE; - - /* If we emptied the pte, walk back up the ladder */ - if (pte_empty(pte)) { - clear_pmd_entry(pmd); - range = PMD_SIZE; - if (pmd_empty(pmd)) { - clear_pud_entry(pud); - range = PUD_SIZE; - } - } - - addr += range; - } + unmap_range(kvm->arch.pgd, start, size); } /** @@ -728,47 +729,105 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) phys_addr_t kvm_mmu_get_httbr(void) { - VM_BUG_ON(!virt_addr_valid(hyp_pgd)); return virt_to_phys(hyp_pgd); } +phys_addr_t kvm_mmu_get_boot_httbr(void) +{ + return virt_to_phys(boot_hyp_pgd); +} + +phys_addr_t kvm_get_idmap_vector(void) +{ + return hyp_idmap_vector; +} + int kvm_mmu_init(void) { - if (!hyp_pgd) { + int err; + + hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start); + hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end); + hyp_idmap_vector = virt_to_phys(__kvm_hyp_init); + + if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) { + /* + * Our init code is crossing a page boundary. Allocate + * a bounce page, copy the code over and use that. + */ + size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start; + phys_addr_t phys_base; + + init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!init_bounce_page) { + kvm_err("Couldn't allocate HYP init bounce page\n"); + err = -ENOMEM; + goto out; + } + + memcpy(init_bounce_page, __hyp_idmap_text_start, len); + /* + * Warning: the code we just copied to the bounce page + * must be flushed to the point of coherency. + * Otherwise, the data may be sitting in L2, and HYP + * mode won't be able to observe it as it runs with + * caches off at that point. + */ + kvm_flush_dcache_to_poc(init_bounce_page, len); + + phys_base = virt_to_phys(init_bounce_page); + hyp_idmap_vector += phys_base - hyp_idmap_start; + hyp_idmap_start = phys_base; + hyp_idmap_end = phys_base + len; + + kvm_info("Using HYP init bounce page @%lx\n", + (unsigned long)phys_base); + } + + hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); + boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); + if (!hyp_pgd || !boot_hyp_pgd) { kvm_err("Hyp mode PGD not allocated\n"); - return -ENOMEM; + err = -ENOMEM; + goto out; } - return 0; -} + /* Create the idmap in the boot page tables */ + err = __create_hyp_mappings(boot_hyp_pgd, + hyp_idmap_start, hyp_idmap_end, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP); -/** - * kvm_clear_idmap - remove all idmaps from the hyp pgd - * - * Free the underlying pmds for all pgds in range and clear the pgds (but - * don't free them) afterwards. - */ -void kvm_clear_hyp_idmap(void) -{ - unsigned long addr, end; - unsigned long next; - pgd_t *pgd = hyp_pgd; - pud_t *pud; - pmd_t *pmd; + if (err) { + kvm_err("Failed to idmap %lx-%lx\n", + hyp_idmap_start, hyp_idmap_end); + goto out; + } - addr = virt_to_phys(__hyp_idmap_text_start); - end = virt_to_phys(__hyp_idmap_text_end); + /* Map the very same page at the trampoline VA */ + err = __create_hyp_mappings(boot_hyp_pgd, + TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP); + if (err) { + kvm_err("Failed to map trampoline @%lx into boot HYP pgd\n", + TRAMPOLINE_VA); + goto out; + } - pgd += pgd_index(addr); - do { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - continue; - pud = pud_offset(pgd, addr); - pmd = pmd_offset(pud, addr); + /* Map the same page again into the runtime page tables */ + err = __create_hyp_mappings(hyp_pgd, + TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP); + if (err) { + kvm_err("Failed to map trampoline @%lx into runtime HYP pgd\n", + TRAMPOLINE_VA); + goto out; + } - pud_clear(pud); - kvm_clean_pmd_entry(pmd); - pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK)); - } while (pgd++, addr = next, addr < end); + return 0; +out: + free_hyp_pgds(); + return err; } diff --git a/arch/arm/kvm/perf.c b/arch/arm/kvm/perf.c new file mode 100644 index 00000000000..1a3849da0b4 --- /dev/null +++ b/arch/arm/kvm/perf.c @@ -0,0 +1,68 @@ +/* + * Based on the x86 implementation. + * + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/perf_event.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_emulate.h> + +static int kvm_is_in_guest(void) +{ + return kvm_arm_get_running_vcpu() != NULL; +} + +static int kvm_is_user_mode(void) +{ + struct kvm_vcpu *vcpu; + + vcpu = kvm_arm_get_running_vcpu(); + + if (vcpu) + return !vcpu_mode_priv(vcpu); + + return 0; +} + +static unsigned long kvm_get_guest_ip(void) +{ + struct kvm_vcpu *vcpu; + + vcpu = kvm_arm_get_running_vcpu(); + + if (vcpu) + return *vcpu_pc(vcpu); + + return 0; +} + +static struct perf_guest_info_callbacks kvm_guest_cbs = { + .is_in_guest = kvm_is_in_guest, + .is_user_mode = kvm_is_user_mode, + .get_guest_ip = kvm_get_guest_ip, +}; + +int kvm_perf_init(void) +{ + return perf_register_guest_info_callbacks(&kvm_guest_cbs); +} + +int kvm_perf_teardown(void) +{ + return perf_unregister_guest_info_callbacks(&kvm_guest_cbs); +} diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig index d58ad4ff8d3..2ebc97e16b9 100644 --- a/arch/arm/mach-imx/Kconfig +++ b/arch/arm/mach-imx/Kconfig @@ -466,8 +466,6 @@ config MACH_MX31ADS_WM1133_EV1 depends on MACH_MX31ADS depends on MFD_WM8350_I2C depends on REGULATOR_WM8350 = y - select MFD_WM8350_CONFIG_MODE_0 - select MFD_WM8352_CONFIG_MODE_0 help Include support for the Wolfson Microelectronics 1133-EV1 PMU and audio module for the MX31ADS platform. diff --git a/arch/arm/mach-s3c64xx/Kconfig b/arch/arm/mach-s3c64xx/Kconfig index 283cb77d472..20578536aec 100644 --- a/arch/arm/mach-s3c64xx/Kconfig +++ b/arch/arm/mach-s3c64xx/Kconfig @@ -200,10 +200,7 @@ endchoice config SMDK6410_WM1190_EV1 bool "Support Wolfson Microelectronics 1190-EV1 PMIC card" depends on MACH_SMDK6410 - select MFD_WM8350_CONFIG_MODE_0 - select MFD_WM8350_CONFIG_MODE_3 select MFD_WM8350_I2C - select MFD_WM8352_CONFIG_MODE_0 select REGULATOR select REGULATOR_WM8350 select SAMSUNG_GPIO_EXTRA64 diff --git a/arch/arm/mach-s3c64xx/mach-crag6410-module.c b/arch/arm/mach-s3c64xx/mach-crag6410-module.c index a946b759fab..7ccfef227c7 100644 --- a/arch/arm/mach-s3c64xx/mach-crag6410-module.c +++ b/arch/arm/mach-s3c64xx/mach-crag6410-module.c @@ -208,7 +208,7 @@ static const struct i2c_board_info wm1277_devs[] = { static struct arizona_pdata wm5102_reva_pdata = { .ldoena = S3C64XX_GPN(7), .gpio_base = CODEC_GPIO_BASE, - .irq_active_high = true, + .irq_flags = IRQF_TRIGGER_HIGH, .micd_pol_gpio = CODEC_GPIO_BASE + 4, .micd_rate = 6, .gpio_defaults = { @@ -238,7 +238,7 @@ static struct spi_board_info wm5102_reva_spi_devs[] = { static struct arizona_pdata wm5102_pdata = { .ldoena = S3C64XX_GPN(7), .gpio_base = CODEC_GPIO_BASE, - .irq_active_high = true, + .irq_flags = IRQF_TRIGGER_HIGH, .micd_pol_gpio = CODEC_GPIO_BASE + 2, .gpio_defaults = { [2] = 0x10000, /* AIF3TXLRCLK */ diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index 5ee505c937d..83cb3ac2709 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -8,7 +8,6 @@ #include <asm/pgtable.h> #include <asm/sections.h> #include <asm/system_info.h> -#include <asm/virt.h> pgd_t *idmap_pgd; @@ -83,37 +82,10 @@ static void identity_mapping_add(pgd_t *pgd, const char *text_start, } while (pgd++, addr = next, addr != end); } -#if defined(CONFIG_ARM_VIRT_EXT) && defined(CONFIG_ARM_LPAE) -pgd_t *hyp_pgd; - -extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; - -static int __init init_static_idmap_hyp(void) -{ - hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); - if (!hyp_pgd) - return -ENOMEM; - - pr_info("Setting up static HYP identity map for 0x%p - 0x%p\n", - __hyp_idmap_text_start, __hyp_idmap_text_end); - identity_mapping_add(hyp_pgd, __hyp_idmap_text_start, - __hyp_idmap_text_end, PMD_SECT_AP1); - - return 0; -} -#else -static int __init init_static_idmap_hyp(void) -{ - return 0; -} -#endif - extern char __idmap_text_start[], __idmap_text_end[]; static int __init init_static_idmap(void) { - int ret; - idmap_pgd = pgd_alloc(&init_mm); if (!idmap_pgd) return -ENOMEM; @@ -123,12 +95,10 @@ static int __init init_static_idmap(void) identity_mapping_add(idmap_pgd, __idmap_text_start, __idmap_text_end, 0); - ret = init_static_idmap_hyp(); - /* Flush L1 for the hardware to see this page table content */ flush_cache_louis(); - return ret; + return 0; } early_initcall(init_static_idmap); diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index cfa74983c67..989dd3fe8de 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -26,6 +26,7 @@ #define KVM_USER_MEM_SLOTS 32 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS /* define exit reasons from vmm to kvm*/ #define EXIT_REASON_VM_PANIC 0 diff --git a/arch/ia64/include/uapi/asm/kvm.h b/arch/ia64/include/uapi/asm/kvm.h index ec6c6b30123..99503c28440 100644 --- a/arch/ia64/include/uapi/asm/kvm.h +++ b/arch/ia64/include/uapi/asm/kvm.h @@ -27,7 +27,6 @@ /* Select x86 specific features in <linux/kvm.h> */ #define __KVM_HAVE_IOAPIC #define __KVM_HAVE_IRQ_LINE -#define __KVM_HAVE_DEVICE_ASSIGNMENT /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index 2cd225f8c68..990b86420cc 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig @@ -21,12 +21,11 @@ config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on BROKEN depends on HAVE_KVM && MODULES - # for device assignment: - depends on PCI depends on BROKEN select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING select KVM_APIC_ARCHITECTURE select KVM_MMIO ---help--- @@ -50,6 +49,17 @@ config KVM_INTEL Provides support for KVM on Itanium 2 processors equipped with the VT extensions. +config KVM_DEVICE_ASSIGNMENT + bool "KVM legacy PCI device assignment support" + depends on KVM && PCI && IOMMU_API + default y + ---help--- + Provide support for legacy PCI device assignment through KVM. The + kernel now also supports a full featured userspace device driver + framework through VFIO, which supersedes much of this support. + + If unsure, say Y. + source drivers/vhost/Kconfig endif # VIRTUALIZATION diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index db3d7c5d107..1a4053789d0 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile @@ -49,10 +49,10 @@ ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ - coalesced_mmio.o irq_comm.o assigned-dev.o) + coalesced_mmio.o irq_comm.o) -ifeq ($(CONFIG_IOMMU_API),y) -common-objs += $(addprefix ../../../virt/kvm/, iommu.o) +ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y) +common-objs += $(addprefix ../../../virt/kvm/, assigned-dev.o iommu.o) endif kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index ad3126a5864..5b2dc0d10c8 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -204,9 +204,11 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; +#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT case KVM_CAP_IOMMU: r = iommu_present(&pci_bus_type); break; +#endif default: r = 0; } @@ -924,13 +926,15 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return 0; } -int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event) +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, + bool line_status) { if (!irqchip_in_kernel(kvm)) return -ENXIO; irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, - irq_event->irq, irq_event->level); + irq_event->irq, irq_event->level, + line_status); return 0; } @@ -942,24 +946,6 @@ long kvm_arch_vm_ioctl(struct file *filp, int r = -ENOTTY; switch (ioctl) { - case KVM_SET_MEMORY_REGION: { - struct kvm_memory_region kvm_mem; - struct kvm_userspace_memory_region kvm_userspace_mem; - - r = -EFAULT; - if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) - goto out; - kvm_userspace_mem.slot = kvm_mem.slot; - kvm_userspace_mem.flags = kvm_mem.flags; - kvm_userspace_mem.guest_phys_addr = - kvm_mem.guest_phys_addr; - kvm_userspace_mem.memory_size = kvm_mem.memory_size; - r = kvm_vm_ioctl_set_memory_region(kvm, - &kvm_userspace_mem, false); - if (r) - goto out; - break; - } case KVM_CREATE_IRQCHIP: r = -EFAULT; r = kvm_ioapic_init(kvm); @@ -1384,9 +1370,7 @@ void kvm_arch_sync_events(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_iommu_unmap_guest(kvm); -#ifdef KVM_CAP_DEVICE_ASSIGNMENT kvm_free_all_assigned_devices(kvm); -#endif kfree(kvm->arch.vioapic); kvm_release_vm_pages(kvm); } @@ -1578,9 +1562,8 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - bool user_alloc) + enum kvm_mr_change change) { unsigned long i; unsigned long pfn; @@ -1610,8 +1593,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { return; } diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h index c3e2935b6db..c5f92a926a9 100644 --- a/arch/ia64/kvm/lapic.h +++ b/arch/ia64/kvm/lapic.h @@ -27,10 +27,4 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); #define kvm_apic_present(x) (true) #define kvm_lapic_enabled(x) (true) -static inline bool kvm_apic_vid_enabled(void) -{ - /* IA64 has no apicv supporting, do nothing here */ - return false; -} - #endif diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 4bc2c3dad6a..cf4df8e2139 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -270,6 +270,9 @@ #define H_SET_MODE 0x31C #define MAX_HCALL_OPCODE H_SET_MODE +/* Platform specific hcalls, used by KVM */ +#define H_RTAS 0xf000 + #ifndef __ASSEMBLY__ /** diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 5a56e1c5f85..349ed85c7d6 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -142,6 +142,8 @@ extern int kvmppc_mmu_hv_init(void); extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); +extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, + unsigned int vec); extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags); extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); @@ -156,7 +158,8 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, unsigned long pte_index); extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, unsigned long *nb_ret); -extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); +extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr, + unsigned long gpa, bool dirty); extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel); extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, @@ -458,6 +461,8 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) #define OSI_SC_MAGIC_R4 0x77810F9B #define INS_DCBZ 0x7c0007ec +/* TO = 31 for unconditional trap */ +#define INS_TW 0x7fe00008 /* LPIDs we support with this build -- runtime limit may be lower */ #define KVMPPC_NR_LPIDS (LPID_RSVD + 1) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 38bec1dc992..9c1ff330c80 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -268,4 +268,17 @@ static inline int is_vrma_hpte(unsigned long hpte_v) (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16))); } +#ifdef CONFIG_KVM_BOOK3S_64_HV +/* + * Note modification of an HPTE; set the HPTE modified bit + * if anyone is interested. + */ +static inline void note_hpte_modification(struct kvm *kvm, + struct revmap_entry *rev) +{ + if (atomic_read(&kvm->arch.hpte_mod_interest)) + rev->guest_rpte |= HPTE_GR_MODIFIED; +} +#endif /* CONFIG_KVM_BOOK3S_64_HV */ + #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index cdc3d2717cc..9039d3c97ee 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -20,6 +20,11 @@ #ifndef __ASM_KVM_BOOK3S_ASM_H__ #define __ASM_KVM_BOOK3S_ASM_H__ +/* XICS ICP register offsets */ +#define XICS_XIRR 4 +#define XICS_MFRR 0xc +#define XICS_IPI 2 /* interrupt source # for IPIs */ + #ifdef __ASSEMBLY__ #ifdef CONFIG_KVM_BOOK3S_HANDLER @@ -81,10 +86,11 @@ struct kvmppc_host_state { #ifdef CONFIG_KVM_BOOK3S_64_HV u8 hwthread_req; u8 hwthread_state; - + u8 host_ipi; struct kvm_vcpu *kvm_vcpu; struct kvmppc_vcore *kvm_vcore; unsigned long xics_phys; + u32 saved_xirr; u64 dabr; u64 host_mmcr[3]; u32 host_pmc[8]; diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index b7cd3356a53..d3c1eb34c98 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -26,6 +26,8 @@ /* LPIDs we support with this build -- runtime limit may be lower */ #define KVMPPC_NR_LPIDS 64 +#define KVMPPC_INST_EHPRIV 0x7c00021c + static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) { vcpu->arch.gpr[num] = val; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d1bb8607472..af326cde7cb 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -44,6 +44,10 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif +/* These values are internal and can be increased later */ +#define KVM_NR_IRQCHIPS 1 +#define KVM_IRQCHIP_NUM_PINS 256 + #if !defined(CONFIG_KVM_440) #include <linux/mmu_notifier.h> @@ -188,6 +192,10 @@ struct kvmppc_linear_info { int type; }; +/* XICS components, defined in book3s_xics.c */ +struct kvmppc_xics; +struct kvmppc_icp; + /* * The reverse mapping array has one entry for each HPTE, * which stores the guest's view of the second word of the HPTE @@ -255,6 +263,13 @@ struct kvm_arch { #endif /* CONFIG_KVM_BOOK3S_64_HV */ #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; + struct list_head rtas_tokens; +#endif +#ifdef CONFIG_KVM_MPIC + struct openpic *mpic; +#endif +#ifdef CONFIG_KVM_XICS + struct kvmppc_xics *xics; #endif }; @@ -301,11 +316,13 @@ struct kvmppc_vcore { * that a guest can register. */ struct kvmppc_vpa { + unsigned long gpa; /* Current guest phys addr */ void *pinned_addr; /* Address in kernel linear mapping */ void *pinned_end; /* End of region */ unsigned long next_gpa; /* Guest phys addr for update */ unsigned long len; /* Number of bytes required */ u8 update_pending; /* 1 => update pinned_addr from next_gpa */ + bool dirty; /* true => area has been modified by kernel */ }; struct kvmppc_pte { @@ -359,6 +376,11 @@ struct kvmppc_slb { #define KVMPPC_BOOKE_MAX_IAC 4 #define KVMPPC_BOOKE_MAX_DAC 2 +/* KVMPPC_EPR_USER takes precedence over KVMPPC_EPR_KERNEL */ +#define KVMPPC_EPR_NONE 0 /* EPR not supported */ +#define KVMPPC_EPR_USER 1 /* exit to userspace to fill EPR */ +#define KVMPPC_EPR_KERNEL 2 /* in-kernel irqchip */ + struct kvmppc_booke_debug_reg { u32 dbcr0; u32 dbcr1; @@ -370,6 +392,12 @@ struct kvmppc_booke_debug_reg { u64 dac[KVMPPC_BOOKE_MAX_DAC]; }; +#define KVMPPC_IRQ_DEFAULT 0 +#define KVMPPC_IRQ_MPIC 1 +#define KVMPPC_IRQ_XICS 2 + +struct openpic; + struct kvm_vcpu_arch { ulong host_stack; u32 host_pid; @@ -502,8 +530,11 @@ struct kvm_vcpu_arch { spinlock_t wdt_lock; struct timer_list wdt_timer; u32 tlbcfg[4]; + u32 tlbps[4]; u32 mmucfg; + u32 eptcfg; u32 epr; + u32 crit_save; struct kvmppc_booke_debug_reg dbg_reg; #endif gpa_t paddr_accessed; @@ -521,7 +552,7 @@ struct kvm_vcpu_arch { u8 sane; u8 cpu_type; u8 hcall_needed; - u8 epr_enabled; + u8 epr_flags; /* KVMPPC_EPR_xxx */ u8 epr_needed; u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ @@ -548,6 +579,13 @@ struct kvm_vcpu_arch { unsigned long magic_page_pa; /* phys addr to map the magic page to */ unsigned long magic_page_ea; /* effect. addr to map the magic page to */ + int irq_type; /* one of KVM_IRQ_* */ + int irq_cpu_id; + struct openpic *mpic; /* KVM_IRQ_MPIC */ +#ifdef CONFIG_KVM_XICS + struct kvmppc_icp *icp; /* XICS presentation controller */ +#endif + #ifdef CONFIG_KVM_BOOK3S_64_HV struct kvm_vcpu_arch_shared shregs; @@ -588,5 +626,6 @@ struct kvm_vcpu_arch { #define KVM_MMIO_REG_FQPR 0x0060 #define __KVM_HAVE_ARCH_WQP +#define __KVM_HAVE_CREATE_DEVICE #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 44a657adf41..a5287fe03d7 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -44,7 +44,7 @@ enum emulation_result { EMULATE_DO_DCR, /* kvm_run filled with DCR request */ EMULATE_FAIL, /* can't emulate this instruction */ EMULATE_AGAIN, /* something went wrong. go again */ - EMULATE_DO_PAPR, /* kvm_run filled with PAPR request */ + EMULATE_EXIT_USER, /* emulation requires exit to user-space */ }; extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); @@ -104,8 +104,7 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); -extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq); +extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu); extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, @@ -131,6 +130,7 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm, extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, unsigned long porder); extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); + extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args); extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, @@ -152,7 +152,7 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); extern void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old); + const struct kvm_memory_slot *old); extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info); extern void kvmppc_core_flush_memslot(struct kvm *kvm, @@ -165,6 +165,18 @@ extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu); extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); +int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); + +extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp); +extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu); +extern void kvmppc_rtas_tokens_free(struct kvm *kvm); +extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, + u32 priority); +extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, + u32 *priority); +extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq); +extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq); + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. @@ -246,12 +258,29 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *); void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); +struct openpic; + #ifdef CONFIG_KVM_BOOK3S_64_HV static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) { paca[cpu].kvm_hstate.xics_phys = addr; } +static inline u32 kvmppc_get_xics_latch(void) +{ + u32 xirr = get_paca()->kvm_hstate.saved_xirr; + + get_paca()->kvm_hstate.saved_xirr = 0; + + return xirr; +} + +static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) +{ + paca[cpu].kvm_hstate.host_ipi = host_ipi; +} + +extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu); extern void kvm_linear_init(void); #else @@ -260,6 +289,46 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) static inline void kvm_linear_init(void) {} + +static inline u32 kvmppc_get_xics_latch(void) +{ + return 0; +} + +static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) +{} + +static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) +{ + kvm_vcpu_kick(vcpu); +} +#endif + +#ifdef CONFIG_KVM_XICS +static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.irq_type == KVMPPC_IRQ_XICS; +} +extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); +extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server); +extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args); +extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd); +extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu); +extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); +extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev, + struct kvm_vcpu *vcpu, u32 cpu); +#else +static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) + { return 0; } +static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { } +static inline int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, + unsigned long server) + { return -EINVAL; } +static inline int kvm_vm_ioctl_xics_irq(struct kvm *kvm, + struct kvm_irq_level *args) + { return -ENOTTY; } +static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) + { return 0; } #endif static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) @@ -271,6 +340,32 @@ static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) #endif } +#ifdef CONFIG_KVM_MPIC + +void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu); +int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, + u32 cpu); +void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu); + +#else + +static inline void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu) +{ +} + +static inline int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, + struct kvm_vcpu *vcpu, u32 cpu) +{ + return -EINVAL; +} + +static inline void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, + struct kvm_vcpu *vcpu) +{ +} + +#endif /* CONFIG_KVM_MPIC */ + int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, struct kvm_config_tlb *cfg); int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, @@ -283,8 +378,15 @@ void kvmppc_init_lpid(unsigned long nr_lpids); static inline void kvmppc_mmu_flush_icache(pfn_t pfn) { - /* Clear i-cache for new pages */ struct page *page; + /* + * We can only access pages that the kernel maps + * as memory. Bail out for unmapped ones. + */ + if (!pfn_valid(pfn)) + return; + + /* Clear i-cache for new pages */ page = pfn_to_page(pfn); if (!test_bit(PG_arch_1, &page->flags)) { flush_dcache_icache_page(page); @@ -324,4 +426,6 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb) return ea; } +extern void xics_wake_cpu(int cpu); + #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 3d17427e4fd..a6136515c7f 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -300,6 +300,7 @@ #define LPCR_PECE1 0x00002000 /* decrementer can cause exit */ #define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */ #define LPCR_MER 0x00000800 /* Mediated External Exception */ +#define LPCR_MER_SH 11 #define LPCR_LPES 0x0000000c #define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */ #define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 16064d00adb..0fb1a6e9ff9 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -25,6 +25,8 @@ /* Select powerpc specific features in <linux/kvm.h> */ #define __KVM_HAVE_SPAPR_TCE #define __KVM_HAVE_PPC_SMT +#define __KVM_HAVE_IRQCHIP +#define __KVM_HAVE_IRQ_LINE struct kvm_regs { __u64 pc; @@ -272,8 +274,31 @@ struct kvm_debug_exit_arch { /* for KVM_SET_GUEST_DEBUG */ struct kvm_guest_debug_arch { + struct { + /* H/W breakpoint/watchpoint address */ + __u64 addr; + /* + * Type denotes h/w breakpoint, read watchpoint, write + * watchpoint or watchpoint (both read and write). + */ +#define KVMPPC_DEBUG_NONE 0x0 +#define KVMPPC_DEBUG_BREAKPOINT (1UL << 1) +#define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2) +#define KVMPPC_DEBUG_WATCH_READ (1UL << 3) + __u32 type; + __u32 reserved; + } bp[16]; }; +/* Debug related defines */ +/* + * kvm_guest_debug->control is a 32 bit field. The lower 16 bits are generic + * and upper 16 bits are architecture specific. Architecture specific defines + * that ioctl is for setting hardware breakpoint or software breakpoint. + */ +#define KVM_GUESTDBG_USE_SW_BP 0x00010000 +#define KVM_GUESTDBG_USE_HW_BP 0x00020000 + /* definition of registers in kvm_run */ struct kvm_sync_regs { }; @@ -299,6 +324,12 @@ struct kvm_allocate_rma { __u64 rma_size; }; +/* for KVM_CAP_PPC_RTAS */ +struct kvm_rtas_token_args { + char name[120]; + __u64 token; /* Use a token of 0 to undefine a mapping */ +}; + struct kvm_book3e_206_tlb_entry { __u32 mas8; __u32 mas1; @@ -359,6 +390,26 @@ struct kvm_get_htab_header { __u16 n_invalid; }; +/* Per-vcpu XICS interrupt controller state */ +#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) + +#define KVM_REG_PPC_ICP_CPPR_SHIFT 56 /* current proc priority */ +#define KVM_REG_PPC_ICP_CPPR_MASK 0xff +#define KVM_REG_PPC_ICP_XISR_SHIFT 32 /* interrupt status field */ +#define KVM_REG_PPC_ICP_XISR_MASK 0xffffff +#define KVM_REG_PPC_ICP_MFRR_SHIFT 24 /* pending IPI priority */ +#define KVM_REG_PPC_ICP_MFRR_MASK 0xff +#define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */ +#define KVM_REG_PPC_ICP_PPRI_MASK 0xff + +/* Device control API: PPC-specific devices */ +#define KVM_DEV_MPIC_GRP_MISC 1 +#define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */ + +#define KVM_DEV_MPIC_GRP_REGISTER 2 /* 32-bit */ +#define KVM_DEV_MPIC_GRP_IRQ_ACTIVE 3 /* 32-bit */ + +/* One-Reg API: PPC-specific registers */ #define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) #define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2) #define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3) @@ -417,4 +468,47 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85) #define KVM_REG_PPC_EPR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86) +/* Timer Status Register OR/CLEAR interface */ +#define KVM_REG_PPC_OR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x87) +#define KVM_REG_PPC_CLEAR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x88) +#define KVM_REG_PPC_TCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x89) +#define KVM_REG_PPC_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8a) + +/* Debugging: Special instruction for software breakpoint */ +#define KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b) + +/* MMU registers */ +#define KVM_REG_PPC_MAS0 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8c) +#define KVM_REG_PPC_MAS1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8d) +#define KVM_REG_PPC_MAS2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8e) +#define KVM_REG_PPC_MAS7_3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8f) +#define KVM_REG_PPC_MAS4 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x90) +#define KVM_REG_PPC_MAS6 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x91) +#define KVM_REG_PPC_MMUCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x92) +/* + * TLBnCFG fields TLBnCFG_N_ENTRY and TLBnCFG_ASSOC can be changed only using + * KVM_CAP_SW_TLB ioctl + */ +#define KVM_REG_PPC_TLB0CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x93) +#define KVM_REG_PPC_TLB1CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94) +#define KVM_REG_PPC_TLB2CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95) +#define KVM_REG_PPC_TLB3CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96) +#define KVM_REG_PPC_TLB0PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x97) +#define KVM_REG_PPC_TLB1PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98) +#define KVM_REG_PPC_TLB2PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99) +#define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a) +#define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b) + +/* PPC64 eXternal Interrupt Controller Specification */ +#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ + +/* Layout of 64-bit source attribute values */ +#define KVM_XICS_DESTINATION_SHIFT 0 +#define KVM_XICS_DESTINATION_MASK 0xffffffffULL +#define KVM_XICS_PRIORITY_SHIFT 32 +#define KVM_XICS_PRIORITY_MASK 0xff +#define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40) +#define KVM_XICS_MASKED (1ULL << 41) +#define KVM_XICS_PENDING (1ULL << 42) + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 172233eab79..b51a97cfedf 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -480,6 +480,7 @@ int main(void) DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); + DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); #endif #ifdef CONFIG_PPC_BOOK3S DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); @@ -576,6 +577,8 @@ int main(void) HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); + HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); + HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_MMCR, host_mmcr); HSTATE_FIELD(HSTATE_PMC, host_pmc); HSTATE_FIELD(HSTATE_PURR, host_purr); @@ -599,6 +602,7 @@ int main(void) DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); + DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save)); #endif /* CONFIG_PPC_BOOK3S */ #endif /* CONFIG_KVM */ diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 3d7fd21c65f..2f5c6b6d687 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -124,6 +124,18 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) return kvmppc_set_sregs_ivor(vcpu, sregs); } +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) +{ + return -EINVAL; +} + +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) +{ + return -EINVAL; +} + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvmppc_vcpu_44x *vcpu_44x; diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 63c67ec72e4..eb643f86257 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -136,21 +136,41 @@ config KVM_E500V2 If unsure, say N. config KVM_E500MC - bool "KVM support for PowerPC E500MC/E5500 processors" + bool "KVM support for PowerPC E500MC/E5500/E6500 processors" depends on PPC_E500MC select KVM select KVM_MMIO select KVM_BOOKE_HV select MMU_NOTIFIER ---help--- - Support running unmodified E500MC/E5500 (32-bit) guest kernels in - virtual machines on E500MC/E5500 host processors. + Support running unmodified E500MC/E5500/E6500 guest kernels in + virtual machines on E500MC/E5500/E6500 host processors. This module provides access to the hardware capabilities through a character device node named /dev/kvm. If unsure, say N. +config KVM_MPIC + bool "KVM in-kernel MPIC emulation" + depends on KVM && E500 + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING + select HAVE_KVM_MSI + help + Enable support for emulating MPIC devices inside the + host kernel, rather than relying on userspace to emulate. + Currently, support is limited to certain versions of + Freescale's MPIC implementation. + +config KVM_XICS + bool "KVM in-kernel XICS emulation" + depends on KVM_BOOK3S_64 && !KVM_MPIC + ---help--- + Include support for the XICS (eXternal Interrupt Controller + Specification) interrupt controller architecture used on + IBM POWER (pSeries) servers. + source drivers/vhost/Kconfig endif # VIRTUALIZATION diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index b772eded8c2..422de3f4d46 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -72,12 +72,18 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ book3s_hv.o \ book3s_hv_interrupts.o \ book3s_64_mmu_hv.o +kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ + book3s_hv_rm_xics.o kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ book3s_hv_rmhandlers.o \ book3s_hv_rm_mmu.o \ book3s_64_vio_hv.o \ book3s_hv_ras.o \ - book3s_hv_builtin.o + book3s_hv_builtin.o \ + $(kvm-book3s_64-builtin-xics-objs-y) + +kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ + book3s_xics.o kvm-book3s_64-module-objs := \ ../../../virt/kvm/kvm_main.o \ @@ -86,6 +92,7 @@ kvm-book3s_64-module-objs := \ emulate.o \ book3s.o \ book3s_64_vio.o \ + book3s_rtas.o \ $(kvm-book3s_64-objs-y) kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) @@ -103,6 +110,9 @@ kvm-book3s_32-objs := \ book3s_32_mmu.o kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) +kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o +kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o) + kvm-objs := $(kvm-objs-m) $(kvm-objs-y) obj-$(CONFIG_KVM_440) += kvm.o diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index a4b64528524..700df6f1d32 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -104,7 +104,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec) return prio; } -static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, +void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) { unsigned long old_pending = vcpu->arch.pending_exceptions; @@ -160,8 +160,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, kvmppc_book3s_queue_irqprio(vcpu, vec); } -void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq) +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) { kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); @@ -530,6 +529,21 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]); break; #endif /* CONFIG_ALTIVEC */ + case KVM_REG_PPC_DEBUG_INST: { + u32 opcode = INS_TW; + r = copy_to_user((u32 __user *)(long)reg->addr, + &opcode, sizeof(u32)); + break; + } +#ifdef CONFIG_KVM_XICS + case KVM_REG_PPC_ICP_STATE: + if (!vcpu->arch.icp) { + r = -ENXIO; + break; + } + val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu)); + break; +#endif /* CONFIG_KVM_XICS */ default: r = -EINVAL; break; @@ -592,6 +606,16 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); break; #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_KVM_XICS + case KVM_REG_PPC_ICP_STATE: + if (!vcpu->arch.icp) { + r = -ENXIO; + break; + } + r = kvmppc_xics_set_icp(vcpu, + set_reg_val(reg->id, val)); + break; +#endif /* CONFIG_KVM_XICS */ default: r = -EINVAL; break; @@ -607,6 +631,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, return 0; } +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + return -EINVAL; +} + void kvmppc_decrementer_func(unsigned long data) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index da98e26f6e4..5880dfb3107 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -893,7 +893,10 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, /* Harvest R and C */ rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; - rev[i].guest_rpte = ptel | rcbits; + if (rcbits & ~rev[i].guest_rpte) { + rev[i].guest_rpte = ptel | rcbits; + note_hpte_modification(kvm, &rev[i]); + } } unlock_rmap(rmapp); hptep[0] &= ~HPTE_V_HVLOCK; @@ -976,7 +979,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, /* Now check and modify the HPTE */ if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) { kvmppc_clear_ref_hpte(kvm, hptep, i); - rev[i].guest_rpte |= HPTE_R_R; + if (!(rev[i].guest_rpte & HPTE_R_R)) { + rev[i].guest_rpte |= HPTE_R_R; + note_hpte_modification(kvm, &rev[i]); + } ret = 1; } hptep[0] &= ~HPTE_V_HVLOCK; @@ -1080,7 +1086,10 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) hptep[1] &= ~HPTE_R_C; eieio(); hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; - rev[i].guest_rpte |= HPTE_R_C; + if (!(rev[i].guest_rpte & HPTE_R_C)) { + rev[i].guest_rpte |= HPTE_R_C; + note_hpte_modification(kvm, &rev[i]); + } ret = 1; } hptep[0] &= ~HPTE_V_HVLOCK; @@ -1090,11 +1099,30 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) return ret; } +static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, + struct kvm_memory_slot *memslot, + unsigned long *map) +{ + unsigned long gfn; + + if (!vpa->dirty || !vpa->pinned_addr) + return; + gfn = vpa->gpa >> PAGE_SHIFT; + if (gfn < memslot->base_gfn || + gfn >= memslot->base_gfn + memslot->npages) + return; + + vpa->dirty = false; + if (map) + __set_bit_le(gfn - memslot->base_gfn, map); +} + long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long *map) { unsigned long i; unsigned long *rmapp; + struct kvm_vcpu *vcpu; preempt_disable(); rmapp = memslot->arch.rmap; @@ -1103,6 +1131,15 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, __set_bit_le(i, map); ++rmapp; } + + /* Harvest dirty bits from VPA and DTL updates */ + /* Note: we never modify the SLB shadow buffer areas */ + kvm_for_each_vcpu(i, vcpu, kvm) { + spin_lock(&vcpu->arch.vpa_update_lock); + harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map); + harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map); + spin_unlock(&vcpu->arch.vpa_update_lock); + } preempt_enable(); return 0; } @@ -1114,7 +1151,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, unsigned long gfn = gpa >> PAGE_SHIFT; struct page *page, *pages[1]; int npages; - unsigned long hva, psize, offset; + unsigned long hva, offset; unsigned long pa; unsigned long *physp; int srcu_idx; @@ -1146,14 +1183,9 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, } srcu_read_unlock(&kvm->srcu, srcu_idx); - psize = PAGE_SIZE; - if (PageHuge(page)) { - page = compound_head(page); - psize <<= compound_order(page); - } - offset = gpa & (psize - 1); + offset = gpa & (PAGE_SIZE - 1); if (nb_ret) - *nb_ret = psize - offset; + *nb_ret = PAGE_SIZE - offset; return page_address(page) + offset; err: @@ -1161,11 +1193,31 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, return NULL; } -void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) +void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa, + bool dirty) { struct page *page = virt_to_page(va); + struct kvm_memory_slot *memslot; + unsigned long gfn; + unsigned long *rmap; + int srcu_idx; put_page(page); + + if (!dirty || !kvm->arch.using_mmu_notifiers) + return; + + /* We need to mark this page dirty in the rmap chain */ + gfn = gpa >> PAGE_SHIFT; + srcu_idx = srcu_read_lock(&kvm->srcu); + memslot = gfn_to_memslot(kvm, gfn); + if (memslot) { + rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; + lock_rmap(rmap); + *rmap |= KVMPPC_RMAP_CHANGED; + unlock_rmap(rmap); + } + srcu_read_unlock(&kvm->srcu, srcu_idx); } /* @@ -1193,16 +1245,36 @@ struct kvm_htab_ctx { #define HPTE_SIZE (2 * sizeof(unsigned long)) +/* + * Returns 1 if this HPT entry has been modified or has pending + * R/C bit changes. + */ +static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp) +{ + unsigned long rcbits_unset; + + if (revp->guest_rpte & HPTE_GR_MODIFIED) + return 1; + + /* Also need to consider changes in reference and changed bits */ + rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); + if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset)) + return 1; + + return 0; +} + static long record_hpte(unsigned long flags, unsigned long *hptp, unsigned long *hpte, struct revmap_entry *revp, int want_valid, int first_pass) { unsigned long v, r; + unsigned long rcbits_unset; int ok = 1; int valid, dirty; /* Unmodified entries are uninteresting except on the first pass */ - dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); + dirty = hpte_dirty(revp, hptp); if (!first_pass && !dirty) return 0; @@ -1223,16 +1295,28 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) cpu_relax(); v = hptp[0]; + + /* re-evaluate valid and dirty from synchronized HPTE value */ + valid = !!(v & HPTE_V_VALID); + dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); + + /* Harvest R and C into guest view if necessary */ + rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); + if (valid && (rcbits_unset & hptp[1])) { + revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) | + HPTE_GR_MODIFIED; + dirty = 1; + } + if (v & HPTE_V_ABSENT) { v &= ~HPTE_V_ABSENT; v |= HPTE_V_VALID; + valid = 1; } - /* re-evaluate valid and dirty from synchronized HPTE value */ - valid = !!(v & HPTE_V_VALID); if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED)) valid = 0; - r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C)); - dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); + + r = revp->guest_rpte; /* only clear modified if this is the right sort of entry */ if (valid == want_valid && dirty) { r &= ~HPTE_GR_MODIFIED; @@ -1288,7 +1372,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, /* Skip uninteresting entries, i.e. clean on not-first pass */ if (!first_pass) { while (i < kvm->arch.hpt_npte && - !(revp->guest_rpte & HPTE_GR_MODIFIED)) { + !hpte_dirty(revp, hptp)) { ++i; hptp += 2; ++revp; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 836c56975e2..1f6344c4408 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -194,7 +194,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, run->papr_hcall.args[i] = gpr; } - emulated = EMULATE_DO_PAPR; + run->exit_reason = KVM_EXIT_PAPR_HCALL; + vcpu->arch.hcall_needed = 1; + emulated = EMULATE_EXIT_USER; break; } #endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index f5416934932..9de24f8e03c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -66,6 +66,31 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); +void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) +{ + int me; + int cpu = vcpu->cpu; + wait_queue_head_t *wqp; + + wqp = kvm_arch_vcpu_wq(vcpu); + if (waitqueue_active(wqp)) { + wake_up_interruptible(wqp); + ++vcpu->stat.halt_wakeup; + } + + me = get_cpu(); + + /* CPU points to the first thread of the core */ + if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) { + int real_cpu = cpu + vcpu->arch.ptid; + if (paca[real_cpu].kvm_hstate.xics_phys) + xics_wake_cpu(real_cpu); + else if (cpu_online(cpu)) + smp_send_reschedule(cpu); + } + put_cpu(); +} + /* * We use the vcpu_load/put functions to measure stolen time. * Stolen time is counted as time when either the vcpu is able to @@ -259,7 +284,7 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, len = ((struct reg_vpa *)va)->length.hword; else len = ((struct reg_vpa *)va)->length.word; - kvmppc_unpin_guest_page(kvm, va); + kvmppc_unpin_guest_page(kvm, va, vpa, false); /* Check length */ if (len > nb || len < sizeof(struct reg_vpa)) @@ -359,13 +384,13 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) va = NULL; nb = 0; if (gpa) - va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb); + va = kvmppc_pin_guest_page(kvm, gpa, &nb); spin_lock(&vcpu->arch.vpa_update_lock); if (gpa == vpap->next_gpa) break; /* sigh... unpin that one and try again */ if (va) - kvmppc_unpin_guest_page(kvm, va); + kvmppc_unpin_guest_page(kvm, va, gpa, false); } vpap->update_pending = 0; @@ -375,12 +400,15 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) * has changed the mappings underlying guest memory, * so unregister the region. */ - kvmppc_unpin_guest_page(kvm, va); + kvmppc_unpin_guest_page(kvm, va, gpa, false); va = NULL; } if (vpap->pinned_addr) - kvmppc_unpin_guest_page(kvm, vpap->pinned_addr); + kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa, + vpap->dirty); + vpap->gpa = gpa; vpap->pinned_addr = va; + vpap->dirty = false; if (va) vpap->pinned_end = va + vpap->len; } @@ -472,6 +500,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, /* order writing *dt vs. writing vpa->dtl_idx */ smp_wmb(); vpa->dtl_idx = ++vcpu->arch.dtl_index; + vcpu->arch.dtl.dirty = true; } int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) @@ -479,7 +508,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) unsigned long req = kvmppc_get_gpr(vcpu, 3); unsigned long target, ret = H_SUCCESS; struct kvm_vcpu *tvcpu; - int idx; + int idx, rc; switch (req) { case H_ENTER: @@ -515,6 +544,28 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6)); break; + case H_RTAS: + if (list_empty(&vcpu->kvm->arch.rtas_tokens)) + return RESUME_HOST; + + rc = kvmppc_rtas_hcall(vcpu); + + if (rc == -ENOENT) + return RESUME_HOST; + else if (rc == 0) + break; + + /* Send the error out to userspace via KVM_RUN */ + return rc; + + case H_XIRR: + case H_CPPR: + case H_EOI: + case H_IPI: + if (kvmppc_xics_enabled(vcpu)) { + ret = kvmppc_xics_hcall(vcpu, req); + break; + } /* fallthrough */ default: return RESUME_HOST; } @@ -913,15 +964,19 @@ out: return ERR_PTR(err); } +static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa) +{ + if (vpa->pinned_addr) + kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa, + vpa->dirty); +} + void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) { spin_lock(&vcpu->arch.vpa_update_lock); - if (vcpu->arch.dtl.pinned_addr) - kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr); - if (vcpu->arch.slb_shadow.pinned_addr) - kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr); - if (vcpu->arch.vpa.pinned_addr) - kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr); + unpin_vpa(vcpu->kvm, &vcpu->arch.dtl); + unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow); + unpin_vpa(vcpu->kvm, &vcpu->arch.vpa); spin_unlock(&vcpu->arch.vpa_update_lock); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); @@ -955,7 +1010,6 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu) } extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); -extern void xics_wake_cpu(int cpu); static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, struct kvm_vcpu *vcpu) @@ -1330,9 +1384,12 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) break; vc->runner = vcpu; n_ceded = 0; - list_for_each_entry(v, &vc->runnable_threads, arch.run_list) + list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { if (!v->arch.pending_exceptions) n_ceded += v->arch.ceded; + else + v->arch.ceded = 0; + } if (n_ceded == vc->n_runnable) kvmppc_vcore_blocked(vc); else @@ -1645,12 +1702,12 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old) { unsigned long npages = mem->memory_size >> PAGE_SHIFT; struct kvm_memory_slot *memslot; - if (npages && old.npages) { + if (npages && old->npages) { /* * If modifying a memslot, reset all the rmap dirty bits. * If this is a new memslot, we don't need to do anything @@ -1827,6 +1884,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) cpumask_setall(&kvm->arch.need_tlb_flush); INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); + INIT_LIST_HEAD(&kvm->arch.rtas_tokens); kvm->arch.rma = NULL; @@ -1872,6 +1930,8 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) kvm->arch.rma = NULL; } + kvmppc_rtas_tokens_free(kvm); + kvmppc_free_hpt(kvm); WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 19c93bae1ae..6dcbb49105a 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -97,17 +97,6 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, } EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); -/* - * Note modification of an HPTE; set the HPTE modified bit - * if anyone is interested. - */ -static inline void note_hpte_modification(struct kvm *kvm, - struct revmap_entry *rev) -{ - if (atomic_read(&kvm->arch.hpte_mod_interest)) - rev->guest_rpte |= HPTE_GR_MODIFIED; -} - /* Remove this HPTE from the chain for a real page */ static void remove_revmap_chain(struct kvm *kvm, long pte_index, struct revmap_entry *rev, diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c new file mode 100644 index 00000000000..b4b0082f761 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -0,0 +1,406 @@ +/* + * Copyright 2012 Michael Ellerman, IBM Corporation. + * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/kvm_host.h> +#include <linux/err.h> + +#include <asm/kvm_book3s.h> +#include <asm/kvm_ppc.h> +#include <asm/hvcall.h> +#include <asm/xics.h> +#include <asm/debug.h> +#include <asm/synch.h> +#include <asm/ppc-opcode.h> + +#include "book3s_xics.h" + +#define DEBUG_PASSUP + +static inline void rm_writeb(unsigned long paddr, u8 val) +{ + __asm__ __volatile__("sync; stbcix %0,0,%1" + : : "r" (val), "r" (paddr) : "memory"); +} + +static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, + struct kvm_vcpu *this_vcpu) +{ + struct kvmppc_icp *this_icp = this_vcpu->arch.icp; + unsigned long xics_phys; + int cpu; + + /* Mark the target VCPU as having an interrupt pending */ + vcpu->stat.queue_intr++; + set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); + + /* Kick self ? Just set MER and return */ + if (vcpu == this_vcpu) { + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER); + return; + } + + /* Check if the core is loaded, if not, too hard */ + cpu = vcpu->cpu; + if (cpu < 0 || cpu >= nr_cpu_ids) { + this_icp->rm_action |= XICS_RM_KICK_VCPU; + this_icp->rm_kick_target = vcpu; + return; + } + /* In SMT cpu will always point to thread 0, we adjust it */ + cpu += vcpu->arch.ptid; + + /* Not too hard, then poke the target */ + xics_phys = paca[cpu].kvm_hstate.xics_phys; + rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); +} + +static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu) +{ + /* Note: Only called on self ! */ + clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, + &vcpu->arch.pending_exceptions); + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER); +} + +static inline bool icp_rm_try_update(struct kvmppc_icp *icp, + union kvmppc_icp_state old, + union kvmppc_icp_state new) +{ + struct kvm_vcpu *this_vcpu = local_paca->kvm_hstate.kvm_vcpu; + bool success; + + /* Calculate new output value */ + new.out_ee = (new.xisr && (new.pending_pri < new.cppr)); + + /* Attempt atomic update */ + success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw; + if (!success) + goto bail; + + /* + * Check for output state update + * + * Note that this is racy since another processor could be updating + * the state already. This is why we never clear the interrupt output + * here, we only ever set it. The clear only happens prior to doing + * an update and only by the processor itself. Currently we do it + * in Accept (H_XIRR) and Up_Cppr (H_XPPR). + * + * We also do not try to figure out whether the EE state has changed, + * we unconditionally set it if the new state calls for it. The reason + * for that is that we opportunistically remove the pending interrupt + * flag when raising CPPR, so we need to set it back here if an + * interrupt is still pending. + */ + if (new.out_ee) + icp_rm_set_vcpu_irq(icp->vcpu, this_vcpu); + + /* Expose the state change for debug purposes */ + this_vcpu->arch.icp->rm_dbgstate = new; + this_vcpu->arch.icp->rm_dbgtgt = icp->vcpu; + + bail: + return success; +} + +static inline int check_too_hard(struct kvmppc_xics *xics, + struct kvmppc_icp *icp) +{ + return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS; +} + +static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp, + u8 new_cppr) +{ + union kvmppc_icp_state old_state, new_state; + bool resend; + + /* + * This handles several related states in one operation: + * + * ICP State: Down_CPPR + * + * Load CPPR with new value and if the XISR is 0 + * then check for resends: + * + * ICP State: Resend + * + * If MFRR is more favored than CPPR, check for IPIs + * and notify ICS of a potential resend. This is done + * asynchronously (when used in real mode, we will have + * to exit here). + * + * We do not handle the complete Check_IPI as documented + * here. In the PAPR, this state will be used for both + * Set_MFRR and Down_CPPR. However, we know that we aren't + * changing the MFRR state here so we don't need to handle + * the case of an MFRR causing a reject of a pending irq, + * this will have been handled when the MFRR was set in the + * first place. + * + * Thus we don't have to handle rejects, only resends. + * + * When implementing real mode for HV KVM, resend will lead to + * a H_TOO_HARD return and the whole transaction will be handled + * in virtual mode. + */ + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + /* Down_CPPR */ + new_state.cppr = new_cppr; + + /* + * Cut down Resend / Check_IPI / IPI + * + * The logic is that we cannot have a pending interrupt + * trumped by an IPI at this point (see above), so we + * know that either the pending interrupt is already an + * IPI (in which case we don't care to override it) or + * it's either more favored than us or non existent + */ + if (new_state.mfrr < new_cppr && + new_state.mfrr <= new_state.pending_pri) { + new_state.pending_pri = new_state.mfrr; + new_state.xisr = XICS_IPI; + } + + /* Latch/clear resend bit */ + resend = new_state.need_resend; + new_state.need_resend = 0; + + } while (!icp_rm_try_update(icp, old_state, new_state)); + + /* + * Now handle resend checks. Those are asynchronous to the ICP + * state update in HW (ie bus transactions) so we can handle them + * separately here as well. + */ + if (resend) + icp->rm_action |= XICS_RM_CHECK_RESEND; +} + + +unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu) +{ + union kvmppc_icp_state old_state, new_state; + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + u32 xirr; + + if (!xics || !xics->real_mode) + return H_TOO_HARD; + + /* First clear the interrupt */ + icp_rm_clr_vcpu_irq(icp->vcpu); + + /* + * ICP State: Accept_Interrupt + * + * Return the pending interrupt (if any) along with the + * current CPPR, then clear the XISR & set CPPR to the + * pending priority + */ + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + xirr = old_state.xisr | (((u32)old_state.cppr) << 24); + if (!old_state.xisr) + break; + new_state.cppr = new_state.pending_pri; + new_state.pending_pri = 0xff; + new_state.xisr = 0; + + } while (!icp_rm_try_update(icp, old_state, new_state)); + + /* Return the result in GPR4 */ + vcpu->arch.gpr[4] = xirr; + + return check_too_hard(xics, icp); +} + +int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, + unsigned long mfrr) +{ + union kvmppc_icp_state old_state, new_state; + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp, *this_icp = vcpu->arch.icp; + u32 reject; + bool resend; + bool local; + + if (!xics || !xics->real_mode) + return H_TOO_HARD; + + local = this_icp->server_num == server; + if (local) + icp = this_icp; + else + icp = kvmppc_xics_find_server(vcpu->kvm, server); + if (!icp) + return H_PARAMETER; + + /* + * ICP state: Set_MFRR + * + * If the CPPR is more favored than the new MFRR, then + * nothing needs to be done as there can be no XISR to + * reject. + * + * If the CPPR is less favored, then we might be replacing + * an interrupt, and thus need to possibly reject it as in + * + * ICP state: Check_IPI + */ + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + /* Set_MFRR */ + new_state.mfrr = mfrr; + + /* Check_IPI */ + reject = 0; + resend = false; + if (mfrr < new_state.cppr) { + /* Reject a pending interrupt if not an IPI */ + if (mfrr <= new_state.pending_pri) + reject = new_state.xisr; + new_state.pending_pri = mfrr; + new_state.xisr = XICS_IPI; + } + + if (mfrr > old_state.mfrr && mfrr > new_state.cppr) { + resend = new_state.need_resend; + new_state.need_resend = 0; + } + } while (!icp_rm_try_update(icp, old_state, new_state)); + + /* Pass rejects to virtual mode */ + if (reject && reject != XICS_IPI) { + this_icp->rm_action |= XICS_RM_REJECT; + this_icp->rm_reject = reject; + } + + /* Pass resends to virtual mode */ + if (resend) + this_icp->rm_action |= XICS_RM_CHECK_RESEND; + + return check_too_hard(xics, this_icp); +} + +int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) +{ + union kvmppc_icp_state old_state, new_state; + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + u32 reject; + + if (!xics || !xics->real_mode) + return H_TOO_HARD; + + /* + * ICP State: Set_CPPR + * + * We can safely compare the new value with the current + * value outside of the transaction as the CPPR is only + * ever changed by the processor on itself + */ + if (cppr > icp->state.cppr) { + icp_rm_down_cppr(xics, icp, cppr); + goto bail; + } else if (cppr == icp->state.cppr) + return H_SUCCESS; + + /* + * ICP State: Up_CPPR + * + * The processor is raising its priority, this can result + * in a rejection of a pending interrupt: + * + * ICP State: Reject_Current + * + * We can remove EE from the current processor, the update + * transaction will set it again if needed + */ + icp_rm_clr_vcpu_irq(icp->vcpu); + + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + reject = 0; + new_state.cppr = cppr; + + if (cppr <= new_state.pending_pri) { + reject = new_state.xisr; + new_state.xisr = 0; + new_state.pending_pri = 0xff; + } + + } while (!icp_rm_try_update(icp, old_state, new_state)); + + /* Pass rejects to virtual mode */ + if (reject && reject != XICS_IPI) { + icp->rm_action |= XICS_RM_REJECT; + icp->rm_reject = reject; + } + bail: + return check_too_hard(xics, icp); +} + +int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) +{ + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u32 irq = xirr & 0x00ffffff; + u16 src; + + if (!xics || !xics->real_mode) + return H_TOO_HARD; + + /* + * ICP State: EOI + * + * Note: If EOI is incorrectly used by SW to lower the CPPR + * value (ie more favored), we do not check for rejection of + * a pending interrupt, this is a SW error and PAPR sepcifies + * that we don't have to deal with it. + * + * The sending of an EOI to the ICS is handled after the + * CPPR update + * + * ICP State: Down_CPPR which we handle + * in a separate function as it's shared with H_CPPR. + */ + icp_rm_down_cppr(xics, icp, xirr >> 24); + + /* IPIs have no EOI */ + if (irq == XICS_IPI) + goto bail; + /* + * EOI handling: If the interrupt is still asserted, we need to + * resend it. We can take a lockless "peek" at the ICS state here. + * + * "Message" interrupts will never have "asserted" set + */ + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) + goto bail; + state = &ics->irq_state[src]; + + /* Still asserted, resend it, we make it look like a reject */ + if (state->asserted) { + icp->rm_action |= XICS_RM_REJECT; + icp->rm_reject = irq; + } + bail: + return check_too_hard(xics, icp); +} diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index e33d11f1b97..b02f91e4c70 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -79,10 +79,6 @@ _GLOBAL(kvmppc_hv_entry_trampoline) * * *****************************************************************************/ -#define XICS_XIRR 4 -#define XICS_QIRR 0xc -#define XICS_IPI 2 /* interrupt source # for IPIs */ - /* * We come in here when wakened from nap mode on a secondary hw thread. * Relocation is off and most register values are lost. @@ -101,50 +97,51 @@ kvm_start_guest: li r0,1 stb r0,PACA_NAPSTATELOST(r13) - /* get vcpu pointer, NULL if we have no vcpu to run */ - ld r4,HSTATE_KVM_VCPU(r13) - cmpdi cr1,r4,0 + /* were we napping due to cede? */ + lbz r0,HSTATE_NAPPING(r13) + cmpwi r0,0 + bne kvm_end_cede + + /* + * We weren't napping due to cede, so this must be a secondary + * thread being woken up to run a guest, or being woken up due + * to a stray IPI. (Or due to some machine check or hypervisor + * maintenance interrupt while the core is in KVM.) + */ /* Check the wake reason in SRR1 to see why we got here */ mfspr r3,SPRN_SRR1 rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ cmpwi r3,4 /* was it an external interrupt? */ - bne 27f - - /* - * External interrupt - for now assume it is an IPI, since we - * should never get any other interrupts sent to offline threads. - * Only do this for secondary threads. - */ - beq cr1,25f - lwz r3,VCPU_PTID(r4) - cmpwi r3,0 - beq 27f -25: ld r5,HSTATE_XICS_PHYS(r13) - li r0,0xff - li r6,XICS_QIRR - li r7,XICS_XIRR + bne 27f /* if not */ + ld r5,HSTATE_XICS_PHYS(r13) + li r7,XICS_XIRR /* if it was an external interrupt, */ lwzcix r8,r5,r7 /* get and ack the interrupt */ sync clrldi. r9,r8,40 /* get interrupt source ID. */ - beq 27f /* none there? */ - cmpwi r9,XICS_IPI - bne 26f + beq 28f /* none there? */ + cmpwi r9,XICS_IPI /* was it an IPI? */ + bne 29f + li r0,0xff + li r6,XICS_MFRR stbcix r0,r5,r6 /* clear IPI */ -26: stwcix r8,r5,r7 /* EOI the interrupt */ - -27: /* XXX should handle hypervisor maintenance interrupts etc. here */ + stwcix r8,r5,r7 /* EOI the interrupt */ + sync /* order loading of vcpu after that */ - /* reload vcpu pointer after clearing the IPI */ + /* get vcpu pointer, NULL if we have no vcpu to run */ ld r4,HSTATE_KVM_VCPU(r13) cmpdi r4,0 /* if we have no vcpu to run, go back to sleep */ beq kvm_no_guest + b kvmppc_hv_entry - /* were we napping due to cede? */ - lbz r0,HSTATE_NAPPING(r13) - cmpwi r0,0 - bne kvm_end_cede +27: /* XXX should handle hypervisor maintenance interrupts etc. here */ + b kvm_no_guest +28: /* SRR1 said external but ICP said nope?? */ + b kvm_no_guest +29: /* External non-IPI interrupt to offline secondary thread? help?? */ + stw r8,HSTATE_SAVED_XIRR(r13) + b kvm_no_guest .global kvmppc_hv_entry kvmppc_hv_entry: @@ -260,6 +257,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) lwz r5, LPPACA_YIELDCOUNT(r3) addi r5, r5, 1 stw r5, LPPACA_YIELDCOUNT(r3) + li r6, 1 + stb r6, VCPU_VPA_DIRTY(r4) 25: /* Load up DAR and DSISR */ ld r5, VCPU_DAR(r4) @@ -485,20 +484,20 @@ toc_tlbie_lock: mtctr r6 mtxer r7 + ld r10, VCPU_PC(r4) + ld r11, VCPU_MSR(r4) kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ ld r6, VCPU_SRR0(r4) ld r7, VCPU_SRR1(r4) - ld r10, VCPU_PC(r4) - ld r11, VCPU_MSR(r4) /* r11 = vcpu->arch.msr & ~MSR_HV */ + /* r11 = vcpu->arch.msr & ~MSR_HV */ rldicl r11, r11, 63 - MSR_HV_LG, 1 rotldi r11, r11, 1 + MSR_HV_LG ori r11, r11, MSR_ME /* Check if we can deliver an external or decrementer interrupt now */ ld r0,VCPU_PENDING_EXC(r4) - li r8,(1 << BOOK3S_IRQPRIO_EXTERNAL) - oris r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h + lis r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h and r0,r0,r8 cmpdi cr1,r0,0 andi. r0,r11,MSR_EE @@ -526,10 +525,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Move SRR0 and SRR1 into the respective regs */ 5: mtspr SPRN_SRR0, r6 mtspr SPRN_SRR1, r7 - li r0,0 - stb r0,VCPU_CEDED(r4) /* cancel cede */ fast_guest_return: + li r0,0 + stb r0,VCPU_CEDED(r4) /* cancel cede */ mtspr SPRN_HSRR0,r10 mtspr SPRN_HSRR1,r11 @@ -676,17 +675,99 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) cmpwi r12,BOOK3S_INTERRUPT_SYSCALL beq hcall_try_real_mode - /* Check for mediated interrupts (could be done earlier really ...) */ + /* Only handle external interrupts here on arch 206 and later */ BEGIN_FTR_SECTION - cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL - bne+ 1f - andi. r0,r11,MSR_EE - beq 1f - mfspr r5,SPRN_LPCR - andi. r0,r5,LPCR_MER - bne bounce_ext_interrupt -1: -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + b ext_interrupt_to_host +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) + + /* External interrupt ? */ + cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL + bne+ ext_interrupt_to_host + + /* External interrupt, first check for host_ipi. If this is + * set, we know the host wants us out so let's do it now + */ +do_ext_interrupt: + lbz r0, HSTATE_HOST_IPI(r13) + cmpwi r0, 0 + bne ext_interrupt_to_host + + /* Now read the interrupt from the ICP */ + ld r5, HSTATE_XICS_PHYS(r13) + li r7, XICS_XIRR + cmpdi r5, 0 + beq- ext_interrupt_to_host + lwzcix r3, r5, r7 + rlwinm. r0, r3, 0, 0xffffff + sync + beq 3f /* if nothing pending in the ICP */ + + /* We found something in the ICP... + * + * If it's not an IPI, stash it in the PACA and return to + * the host, we don't (yet) handle directing real external + * interrupts directly to the guest + */ + cmpwi r0, XICS_IPI + bne ext_stash_for_host + + /* It's an IPI, clear the MFRR and EOI it */ + li r0, 0xff + li r6, XICS_MFRR + stbcix r0, r5, r6 /* clear the IPI */ + stwcix r3, r5, r7 /* EOI it */ + sync + + /* We need to re-check host IPI now in case it got set in the + * meantime. If it's clear, we bounce the interrupt to the + * guest + */ + lbz r0, HSTATE_HOST_IPI(r13) + cmpwi r0, 0 + bne- 1f + + /* Allright, looks like an IPI for the guest, we need to set MER */ +3: + /* Check if any CPU is heading out to the host, if so head out too */ + ld r5, HSTATE_KVM_VCORE(r13) + lwz r0, VCORE_ENTRY_EXIT(r5) + cmpwi r0, 0x100 + bge ext_interrupt_to_host + + /* See if there is a pending interrupt for the guest */ + mfspr r8, SPRN_LPCR + ld r0, VCPU_PENDING_EXC(r9) + /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */ + rldicl. r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63 + rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH + beq 2f + + /* And if the guest EE is set, we can deliver immediately, else + * we return to the guest with MER set + */ + andi. r0, r11, MSR_EE + beq 2f + mtspr SPRN_SRR0, r10 + mtspr SPRN_SRR1, r11 + li r10, BOOK3S_INTERRUPT_EXTERNAL + li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ + rotldi r11, r11, 63 +2: mr r4, r9 + mtspr SPRN_LPCR, r8 + b fast_guest_return + + /* We raced with the host, we need to resend that IPI, bummer */ +1: li r0, IPI_PRIORITY + stbcix r0, r5, r6 /* set the IPI */ + sync + b ext_interrupt_to_host + +ext_stash_for_host: + /* It's not an IPI and it's for the host, stash it in the PACA + * before exit, it will be picked up by the host ICP driver + */ + stw r3, HSTATE_SAVED_XIRR(r13) +ext_interrupt_to_host: guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ /* Save DEC */ @@ -829,7 +910,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) beq 44f ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */ li r0,IPI_PRIORITY - li r7,XICS_QIRR + li r7,XICS_MFRR stbcix r0,r7,r8 /* trigger the IPI */ 44: srdi. r3,r3,1 addi r6,r6,PACA_SIZE @@ -1018,6 +1099,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) lwz r3, LPPACA_YIELDCOUNT(r8) addi r3, r3, 1 stw r3, LPPACA_YIELDCOUNT(r8) + li r3, 1 + stb r3, VCPU_VPA_DIRTY(r9) 25: /* Save PMU registers if requested */ /* r8 and cr0.eq are live here */ @@ -1350,11 +1433,19 @@ hcall_real_table: .long 0 /* 0x58 */ .long 0 /* 0x5c */ .long 0 /* 0x60 */ - .long 0 /* 0x64 */ - .long 0 /* 0x68 */ - .long 0 /* 0x6c */ - .long 0 /* 0x70 */ - .long 0 /* 0x74 */ +#ifdef CONFIG_KVM_XICS + .long .kvmppc_rm_h_eoi - hcall_real_table + .long .kvmppc_rm_h_cppr - hcall_real_table + .long .kvmppc_rm_h_ipi - hcall_real_table + .long 0 /* 0x70 - H_IPOLL */ + .long .kvmppc_rm_h_xirr - hcall_real_table +#else + .long 0 /* 0x64 - H_EOI */ + .long 0 /* 0x68 - H_CPPR */ + .long 0 /* 0x6c - H_IPI */ + .long 0 /* 0x70 - H_IPOLL */ + .long 0 /* 0x74 - H_XIRR */ +#endif .long 0 /* 0x78 */ .long 0 /* 0x7c */ .long 0 /* 0x80 */ @@ -1405,15 +1496,6 @@ ignore_hdec: mr r4,r9 b fast_guest_return -bounce_ext_interrupt: - mr r4,r9 - mtspr SPRN_SRR0,r10 - mtspr SPRN_SRR1,r11 - li r10,BOOK3S_INTERRUPT_EXTERNAL - li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ - rotldi r11,r11,63 - b fast_guest_return - _GLOBAL(kvmppc_h_set_dabr) std r4,VCPU_DABR(r3) /* Work around P7 bug where DABR can get corrupted on mtspr */ @@ -1519,6 +1601,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) b . kvm_end_cede: + /* get vcpu pointer */ + ld r4, HSTATE_KVM_VCPU(r13) + /* Woken by external or decrementer interrupt */ ld r1, HSTATE_HOST_R1(r13) @@ -1558,6 +1643,16 @@ kvm_end_cede: li r0,0 stb r0,HSTATE_NAPPING(r13) + /* Check the wake reason in SRR1 to see why we got here */ + mfspr r3, SPRN_SRR1 + rlwinm r3, r3, 44-31, 0x7 /* extract wake reason field */ + cmpwi r3, 4 /* was it an external interrupt? */ + li r12, BOOK3S_INTERRUPT_EXTERNAL + mr r9, r4 + ld r10, VCPU_PC(r9) + ld r11, VCPU_MSR(r9) + beq do_ext_interrupt /* if so */ + /* see if any other thread is already exiting */ lwz r0,VCORE_ENTRY_EXIT(r5) cmpwi r0,0x100 @@ -1577,8 +1672,7 @@ kvm_cede_prodded: /* we've ceded but we want to give control to the host */ kvm_cede_exit: - li r3,H_TOO_HARD - blr + b hcall_real_fallback /* Try to handle a machine check in real mode */ machine_check_realmode: @@ -1626,7 +1720,7 @@ secondary_nap: beq 37f sync li r0, 0xff - li r6, XICS_QIRR + li r6, XICS_MFRR stbcix r0, r5, r6 /* clear the IPI */ stwcix r3, r5, r7 /* EOI it */ 37: sync diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index dbdc15aa812..bdc40b8e77d 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -762,9 +762,7 @@ program_interrupt: run->exit_reason = KVM_EXIT_MMIO; r = RESUME_HOST_NV; break; - case EMULATE_DO_PAPR: - run->exit_reason = KVM_EXIT_PAPR_HCALL; - vcpu->arch.hcall_needed = 1; + case EMULATE_EXIT_USER: r = RESUME_HOST_NV; break; default: @@ -1283,7 +1281,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old) { } @@ -1298,6 +1296,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) { #ifdef CONFIG_PPC64 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); + INIT_LIST_HEAD(&kvm->arch.rtas_tokens); #endif if (firmware_has_feature(FW_FEATURE_SET_MODE)) { diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index ee02b30878e..b24309c6c2d 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -227,6 +227,13 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) +{ + long rc = kvmppc_xics_hcall(vcpu, cmd); + kvmppc_set_gpr(vcpu, 3, rc); + return EMULATE_DONE; +} + int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) { switch (cmd) { @@ -246,6 +253,20 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) clear_bit(KVM_REQ_UNHALT, &vcpu->requests); vcpu->stat.halt_wakeup++; return EMULATE_DONE; + case H_XIRR: + case H_CPPR: + case H_EOI: + case H_IPI: + if (kvmppc_xics_enabled(vcpu)) + return kvmppc_h_pr_xics_hcall(vcpu, cmd); + break; + case H_RTAS: + if (list_empty(&vcpu->kvm->arch.rtas_tokens)) + return RESUME_HOST; + if (kvmppc_rtas_hcall(vcpu)) + break; + kvmppc_set_gpr(vcpu, 3, 0); + return EMULATE_DONE; } return EMULATE_FAIL; diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c new file mode 100644 index 00000000000..3219ba89524 --- /dev/null +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -0,0 +1,274 @@ +/* + * Copyright 2012 Michael Ellerman, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/kvm_host.h> +#include <linux/kvm.h> +#include <linux/err.h> + +#include <asm/uaccess.h> +#include <asm/kvm_book3s.h> +#include <asm/kvm_ppc.h> +#include <asm/hvcall.h> +#include <asm/rtas.h> + +#ifdef CONFIG_KVM_XICS +static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq, server, priority; + int rc; + + if (args->nargs != 3 || args->nret != 1) { + rc = -3; + goto out; + } + + irq = args->args[0]; + server = args->args[1]; + priority = args->args[2]; + + rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); + if (rc) + rc = -3; +out: + args->rets[0] = rc; +} + +static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq, server, priority; + int rc; + + if (args->nargs != 1 || args->nret != 3) { + rc = -3; + goto out; + } + + irq = args->args[0]; + + server = priority = 0; + rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); + if (rc) { + rc = -3; + goto out; + } + + args->rets[1] = server; + args->rets[2] = priority; +out: + args->rets[0] = rc; +} + +static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq; + int rc; + + if (args->nargs != 1 || args->nret != 1) { + rc = -3; + goto out; + } + + irq = args->args[0]; + + rc = kvmppc_xics_int_off(vcpu->kvm, irq); + if (rc) + rc = -3; +out: + args->rets[0] = rc; +} + +static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq; + int rc; + + if (args->nargs != 1 || args->nret != 1) { + rc = -3; + goto out; + } + + irq = args->args[0]; + + rc = kvmppc_xics_int_on(vcpu->kvm, irq); + if (rc) + rc = -3; +out: + args->rets[0] = rc; +} +#endif /* CONFIG_KVM_XICS */ + +struct rtas_handler { + void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args); + char *name; +}; + +static struct rtas_handler rtas_handlers[] = { +#ifdef CONFIG_KVM_XICS + { .name = "ibm,set-xive", .handler = kvm_rtas_set_xive }, + { .name = "ibm,get-xive", .handler = kvm_rtas_get_xive }, + { .name = "ibm,int-off", .handler = kvm_rtas_int_off }, + { .name = "ibm,int-on", .handler = kvm_rtas_int_on }, +#endif +}; + +struct rtas_token_definition { + struct list_head list; + struct rtas_handler *handler; + u64 token; +}; + +static int rtas_name_matches(char *s1, char *s2) +{ + struct kvm_rtas_token_args args; + return !strncmp(s1, s2, sizeof(args.name)); +} + +static int rtas_token_undefine(struct kvm *kvm, char *name) +{ + struct rtas_token_definition *d, *tmp; + + lockdep_assert_held(&kvm->lock); + + list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) { + if (rtas_name_matches(d->handler->name, name)) { + list_del(&d->list); + kfree(d); + return 0; + } + } + + /* It's not an error to undefine an undefined token */ + return 0; +} + +static int rtas_token_define(struct kvm *kvm, char *name, u64 token) +{ + struct rtas_token_definition *d; + struct rtas_handler *h = NULL; + bool found; + int i; + + lockdep_assert_held(&kvm->lock); + + list_for_each_entry(d, &kvm->arch.rtas_tokens, list) { + if (d->token == token) + return -EEXIST; + } + + found = false; + for (i = 0; i < ARRAY_SIZE(rtas_handlers); i++) { + h = &rtas_handlers[i]; + if (rtas_name_matches(h->name, name)) { + found = true; + break; + } + } + + if (!found) + return -ENOENT; + + d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) + return -ENOMEM; + + d->handler = h; + d->token = token; + + list_add_tail(&d->list, &kvm->arch.rtas_tokens); + + return 0; +} + +int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp) +{ + struct kvm_rtas_token_args args; + int rc; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + mutex_lock(&kvm->lock); + + if (args.token) + rc = rtas_token_define(kvm, args.name, args.token); + else + rc = rtas_token_undefine(kvm, args.name); + + mutex_unlock(&kvm->lock); + + return rc; +} + +int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) +{ + struct rtas_token_definition *d; + struct rtas_args args; + rtas_arg_t *orig_rets; + gpa_t args_phys; + int rc; + + /* r4 contains the guest physical address of the RTAS args */ + args_phys = kvmppc_get_gpr(vcpu, 4); + + rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args)); + if (rc) + goto fail; + + /* + * args->rets is a pointer into args->args. Now that we've + * copied args we need to fix it up to point into our copy, + * not the guest args. We also need to save the original + * value so we can restore it on the way out. + */ + orig_rets = args.rets; + args.rets = &args.args[args.nargs]; + + mutex_lock(&vcpu->kvm->lock); + + rc = -ENOENT; + list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) { + if (d->token == args.token) { + d->handler->handler(vcpu, &args); + rc = 0; + break; + } + } + + mutex_unlock(&vcpu->kvm->lock); + + if (rc == 0) { + args.rets = orig_rets; + rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args)); + if (rc) + goto fail; + } + + return rc; + +fail: + /* + * We only get here if the guest has called RTAS with a bogus + * args pointer. That means we can't get to the args, and so we + * can't fail the RTAS call. So fail right out to userspace, + * which should kill the guest. + */ + return rc; +} + +void kvmppc_rtas_tokens_free(struct kvm *kvm) +{ + struct rtas_token_definition *d, *tmp; + + lockdep_assert_held(&kvm->lock); + + list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) { + list_del(&d->list); + kfree(d); + } +} diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c new file mode 100644 index 00000000000..f7a10375661 --- /dev/null +++ b/arch/powerpc/kvm/book3s_xics.c @@ -0,0 +1,1270 @@ +/* + * Copyright 2012 Michael Ellerman, IBM Corporation. + * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/kvm_host.h> +#include <linux/err.h> +#include <linux/gfp.h> +#include <linux/anon_inodes.h> + +#include <asm/uaccess.h> +#include <asm/kvm_book3s.h> +#include <asm/kvm_ppc.h> +#include <asm/hvcall.h> +#include <asm/xics.h> +#include <asm/debug.h> + +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +#include "book3s_xics.h" + +#if 1 +#define XICS_DBG(fmt...) do { } while (0) +#else +#define XICS_DBG(fmt...) trace_printk(fmt) +#endif + +#define ENABLE_REALMODE true +#define DEBUG_REALMODE false + +/* + * LOCKING + * ======= + * + * Each ICS has a mutex protecting the information about the IRQ + * sources and avoiding simultaneous deliveries if the same interrupt. + * + * ICP operations are done via a single compare & swap transaction + * (most ICP state fits in the union kvmppc_icp_state) + */ + +/* + * TODO + * ==== + * + * - To speed up resends, keep a bitmap of "resend" set bits in the + * ICS + * + * - Speed up server# -> ICP lookup (array ? hash table ?) + * + * - Make ICS lockless as well, or at least a per-interrupt lock or hashed + * locks array to improve scalability + */ + +/* -- ICS routines -- */ + +static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, + u32 new_irq); + +static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level, + bool report_status) +{ + struct ics_irq_state *state; + struct kvmppc_ics *ics; + u16 src; + + XICS_DBG("ics deliver %#x (level: %d)\n", irq, level); + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) { + XICS_DBG("ics_deliver_irq: IRQ 0x%06x not found !\n", irq); + return -EINVAL; + } + state = &ics->irq_state[src]; + if (!state->exists) + return -EINVAL; + + if (report_status) + return state->asserted; + + /* + * We set state->asserted locklessly. This should be fine as + * we are the only setter, thus concurrent access is undefined + * to begin with. + */ + if (level == KVM_INTERRUPT_SET_LEVEL) + state->asserted = 1; + else if (level == KVM_INTERRUPT_UNSET) { + state->asserted = 0; + return 0; + } + + /* Attempt delivery */ + icp_deliver_irq(xics, NULL, irq); + + return state->asserted; +} + +static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, + struct kvmppc_icp *icp) +{ + int i; + + mutex_lock(&ics->lock); + + for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { + struct ics_irq_state *state = &ics->irq_state[i]; + + if (!state->resend) + continue; + + XICS_DBG("resend %#x prio %#x\n", state->number, + state->priority); + + mutex_unlock(&ics->lock); + icp_deliver_irq(xics, icp, state->number); + mutex_lock(&ics->lock); + } + + mutex_unlock(&ics->lock); +} + +static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics, + struct ics_irq_state *state, + u32 server, u32 priority, u32 saved_priority) +{ + bool deliver; + + mutex_lock(&ics->lock); + + state->server = server; + state->priority = priority; + state->saved_priority = saved_priority; + deliver = false; + if ((state->masked_pending || state->resend) && priority != MASKED) { + state->masked_pending = 0; + deliver = true; + } + + mutex_unlock(&ics->lock); + + return deliver; +} + +int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_icp *icp; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u16 src; + + if (!xics) + return -ENODEV; + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) + return -EINVAL; + state = &ics->irq_state[src]; + + icp = kvmppc_xics_find_server(kvm, server); + if (!icp) + return -EINVAL; + + XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n", + irq, server, priority, + state->masked_pending, state->resend); + + if (write_xive(xics, ics, state, server, priority, priority)) + icp_deliver_irq(xics, icp, irq); + + return 0; +} + +int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u16 src; + + if (!xics) + return -ENODEV; + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) + return -EINVAL; + state = &ics->irq_state[src]; + + mutex_lock(&ics->lock); + *server = state->server; + *priority = state->priority; + mutex_unlock(&ics->lock); + + return 0; +} + +int kvmppc_xics_int_on(struct kvm *kvm, u32 irq) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_icp *icp; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u16 src; + + if (!xics) + return -ENODEV; + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) + return -EINVAL; + state = &ics->irq_state[src]; + + icp = kvmppc_xics_find_server(kvm, state->server); + if (!icp) + return -EINVAL; + + if (write_xive(xics, ics, state, state->server, state->saved_priority, + state->saved_priority)) + icp_deliver_irq(xics, icp, irq); + + return 0; +} + +int kvmppc_xics_int_off(struct kvm *kvm, u32 irq) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u16 src; + + if (!xics) + return -ENODEV; + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) + return -EINVAL; + state = &ics->irq_state[src]; + + write_xive(xics, ics, state, state->server, MASKED, state->priority); + + return 0; +} + +/* -- ICP routines, including hcalls -- */ + +static inline bool icp_try_update(struct kvmppc_icp *icp, + union kvmppc_icp_state old, + union kvmppc_icp_state new, + bool change_self) +{ + bool success; + + /* Calculate new output value */ + new.out_ee = (new.xisr && (new.pending_pri < new.cppr)); + + /* Attempt atomic update */ + success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw; + if (!success) + goto bail; + + XICS_DBG("UPD [%04x] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n", + icp->server_num, + old.cppr, old.mfrr, old.pending_pri, old.xisr, + old.need_resend, old.out_ee); + XICS_DBG("UPD - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n", + new.cppr, new.mfrr, new.pending_pri, new.xisr, + new.need_resend, new.out_ee); + /* + * Check for output state update + * + * Note that this is racy since another processor could be updating + * the state already. This is why we never clear the interrupt output + * here, we only ever set it. The clear only happens prior to doing + * an update and only by the processor itself. Currently we do it + * in Accept (H_XIRR) and Up_Cppr (H_XPPR). + * + * We also do not try to figure out whether the EE state has changed, + * we unconditionally set it if the new state calls for it. The reason + * for that is that we opportunistically remove the pending interrupt + * flag when raising CPPR, so we need to set it back here if an + * interrupt is still pending. + */ + if (new.out_ee) { + kvmppc_book3s_queue_irqprio(icp->vcpu, + BOOK3S_INTERRUPT_EXTERNAL_LEVEL); + if (!change_self) + kvmppc_fast_vcpu_kick(icp->vcpu); + } + bail: + return success; +} + +static void icp_check_resend(struct kvmppc_xics *xics, + struct kvmppc_icp *icp) +{ + u32 icsid; + + /* Order this load with the test for need_resend in the caller */ + smp_rmb(); + for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) { + struct kvmppc_ics *ics = xics->ics[icsid]; + + if (!test_and_clear_bit(icsid, icp->resend_map)) + continue; + if (!ics) + continue; + ics_check_resend(xics, ics, icp); + } +} + +static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority, + u32 *reject) +{ + union kvmppc_icp_state old_state, new_state; + bool success; + + XICS_DBG("try deliver %#x(P:%#x) to server %#x\n", irq, priority, + icp->server_num); + + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + *reject = 0; + + /* See if we can deliver */ + success = new_state.cppr > priority && + new_state.mfrr > priority && + new_state.pending_pri > priority; + + /* + * If we can, check for a rejection and perform the + * delivery + */ + if (success) { + *reject = new_state.xisr; + new_state.xisr = irq; + new_state.pending_pri = priority; + } else { + /* + * If we failed to deliver we set need_resend + * so a subsequent CPPR state change causes us + * to try a new delivery. + */ + new_state.need_resend = true; + } + + } while (!icp_try_update(icp, old_state, new_state, false)); + + return success; +} + +static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, + u32 new_irq) +{ + struct ics_irq_state *state; + struct kvmppc_ics *ics; + u32 reject; + u16 src; + + /* + * This is used both for initial delivery of an interrupt and + * for subsequent rejection. + * + * Rejection can be racy vs. resends. We have evaluated the + * rejection in an atomic ICP transaction which is now complete, + * so potentially the ICP can already accept the interrupt again. + * + * So we need to retry the delivery. Essentially the reject path + * boils down to a failed delivery. Always. + * + * Now the interrupt could also have moved to a different target, + * thus we may need to re-do the ICP lookup as well + */ + + again: + /* Get the ICS state and lock it */ + ics = kvmppc_xics_find_ics(xics, new_irq, &src); + if (!ics) { + XICS_DBG("icp_deliver_irq: IRQ 0x%06x not found !\n", new_irq); + return; + } + state = &ics->irq_state[src]; + + /* Get a lock on the ICS */ + mutex_lock(&ics->lock); + + /* Get our server */ + if (!icp || state->server != icp->server_num) { + icp = kvmppc_xics_find_server(xics->kvm, state->server); + if (!icp) { + pr_warn("icp_deliver_irq: IRQ 0x%06x server 0x%x not found !\n", + new_irq, state->server); + goto out; + } + } + + /* Clear the resend bit of that interrupt */ + state->resend = 0; + + /* + * If masked, bail out + * + * Note: PAPR doesn't mention anything about masked pending + * when doing a resend, only when doing a delivery. + * + * However that would have the effect of losing a masked + * interrupt that was rejected and isn't consistent with + * the whole masked_pending business which is about not + * losing interrupts that occur while masked. + * + * I don't differenciate normal deliveries and resends, this + * implementation will differ from PAPR and not lose such + * interrupts. + */ + if (state->priority == MASKED) { + XICS_DBG("irq %#x masked pending\n", new_irq); + state->masked_pending = 1; + goto out; + } + + /* + * Try the delivery, this will set the need_resend flag + * in the ICP as part of the atomic transaction if the + * delivery is not possible. + * + * Note that if successful, the new delivery might have itself + * rejected an interrupt that was "delivered" before we took the + * icp mutex. + * + * In this case we do the whole sequence all over again for the + * new guy. We cannot assume that the rejected interrupt is less + * favored than the new one, and thus doesn't need to be delivered, + * because by the time we exit icp_try_to_deliver() the target + * processor may well have alrady consumed & completed it, and thus + * the rejected interrupt might actually be already acceptable. + */ + if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) { + /* + * Delivery was successful, did we reject somebody else ? + */ + if (reject && reject != XICS_IPI) { + mutex_unlock(&ics->lock); + new_irq = reject; + goto again; + } + } else { + /* + * We failed to deliver the interrupt we need to set the + * resend map bit and mark the ICS state as needing a resend + */ + set_bit(ics->icsid, icp->resend_map); + state->resend = 1; + + /* + * If the need_resend flag got cleared in the ICP some time + * between icp_try_to_deliver() atomic update and now, then + * we know it might have missed the resend_map bit. So we + * retry + */ + smp_mb(); + if (!icp->state.need_resend) { + mutex_unlock(&ics->lock); + goto again; + } + } + out: + mutex_unlock(&ics->lock); +} + +static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp, + u8 new_cppr) +{ + union kvmppc_icp_state old_state, new_state; + bool resend; + + /* + * This handles several related states in one operation: + * + * ICP State: Down_CPPR + * + * Load CPPR with new value and if the XISR is 0 + * then check for resends: + * + * ICP State: Resend + * + * If MFRR is more favored than CPPR, check for IPIs + * and notify ICS of a potential resend. This is done + * asynchronously (when used in real mode, we will have + * to exit here). + * + * We do not handle the complete Check_IPI as documented + * here. In the PAPR, this state will be used for both + * Set_MFRR and Down_CPPR. However, we know that we aren't + * changing the MFRR state here so we don't need to handle + * the case of an MFRR causing a reject of a pending irq, + * this will have been handled when the MFRR was set in the + * first place. + * + * Thus we don't have to handle rejects, only resends. + * + * When implementing real mode for HV KVM, resend will lead to + * a H_TOO_HARD return and the whole transaction will be handled + * in virtual mode. + */ + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + /* Down_CPPR */ + new_state.cppr = new_cppr; + + /* + * Cut down Resend / Check_IPI / IPI + * + * The logic is that we cannot have a pending interrupt + * trumped by an IPI at this point (see above), so we + * know that either the pending interrupt is already an + * IPI (in which case we don't care to override it) or + * it's either more favored than us or non existent + */ + if (new_state.mfrr < new_cppr && + new_state.mfrr <= new_state.pending_pri) { + WARN_ON(new_state.xisr != XICS_IPI && + new_state.xisr != 0); + new_state.pending_pri = new_state.mfrr; + new_state.xisr = XICS_IPI; + } + + /* Latch/clear resend bit */ + resend = new_state.need_resend; + new_state.need_resend = 0; + + } while (!icp_try_update(icp, old_state, new_state, true)); + + /* + * Now handle resend checks. Those are asynchronous to the ICP + * state update in HW (ie bus transactions) so we can handle them + * separately here too + */ + if (resend) + icp_check_resend(xics, icp); +} + +static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu) +{ + union kvmppc_icp_state old_state, new_state; + struct kvmppc_icp *icp = vcpu->arch.icp; + u32 xirr; + + /* First, remove EE from the processor */ + kvmppc_book3s_dequeue_irqprio(icp->vcpu, + BOOK3S_INTERRUPT_EXTERNAL_LEVEL); + + /* + * ICP State: Accept_Interrupt + * + * Return the pending interrupt (if any) along with the + * current CPPR, then clear the XISR & set CPPR to the + * pending priority + */ + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + xirr = old_state.xisr | (((u32)old_state.cppr) << 24); + if (!old_state.xisr) + break; + new_state.cppr = new_state.pending_pri; + new_state.pending_pri = 0xff; + new_state.xisr = 0; + + } while (!icp_try_update(icp, old_state, new_state, true)); + + XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr); + + return xirr; +} + +static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, + unsigned long mfrr) +{ + union kvmppc_icp_state old_state, new_state; + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp; + u32 reject; + bool resend; + bool local; + + XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n", + vcpu->vcpu_id, server, mfrr); + + icp = vcpu->arch.icp; + local = icp->server_num == server; + if (!local) { + icp = kvmppc_xics_find_server(vcpu->kvm, server); + if (!icp) + return H_PARAMETER; + } + + /* + * ICP state: Set_MFRR + * + * If the CPPR is more favored than the new MFRR, then + * nothing needs to be rejected as there can be no XISR to + * reject. If the MFRR is being made less favored then + * there might be a previously-rejected interrupt needing + * to be resent. + * + * If the CPPR is less favored, then we might be replacing + * an interrupt, and thus need to possibly reject it as in + * + * ICP state: Check_IPI + */ + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + /* Set_MFRR */ + new_state.mfrr = mfrr; + + /* Check_IPI */ + reject = 0; + resend = false; + if (mfrr < new_state.cppr) { + /* Reject a pending interrupt if not an IPI */ + if (mfrr <= new_state.pending_pri) + reject = new_state.xisr; + new_state.pending_pri = mfrr; + new_state.xisr = XICS_IPI; + } + + if (mfrr > old_state.mfrr && mfrr > new_state.cppr) { + resend = new_state.need_resend; + new_state.need_resend = 0; + } + } while (!icp_try_update(icp, old_state, new_state, local)); + + /* Handle reject */ + if (reject && reject != XICS_IPI) + icp_deliver_irq(xics, icp, reject); + + /* Handle resend */ + if (resend) + icp_check_resend(xics, icp); + + return H_SUCCESS; +} + +static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) +{ + union kvmppc_icp_state old_state, new_state; + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + u32 reject; + + XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr); + + /* + * ICP State: Set_CPPR + * + * We can safely compare the new value with the current + * value outside of the transaction as the CPPR is only + * ever changed by the processor on itself + */ + if (cppr > icp->state.cppr) + icp_down_cppr(xics, icp, cppr); + else if (cppr == icp->state.cppr) + return; + + /* + * ICP State: Up_CPPR + * + * The processor is raising its priority, this can result + * in a rejection of a pending interrupt: + * + * ICP State: Reject_Current + * + * We can remove EE from the current processor, the update + * transaction will set it again if needed + */ + kvmppc_book3s_dequeue_irqprio(icp->vcpu, + BOOK3S_INTERRUPT_EXTERNAL_LEVEL); + + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + reject = 0; + new_state.cppr = cppr; + + if (cppr <= new_state.pending_pri) { + reject = new_state.xisr; + new_state.xisr = 0; + new_state.pending_pri = 0xff; + } + + } while (!icp_try_update(icp, old_state, new_state, true)); + + /* + * Check for rejects. They are handled by doing a new delivery + * attempt (see comments in icp_deliver_irq). + */ + if (reject && reject != XICS_IPI) + icp_deliver_irq(xics, icp, reject); +} + +static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) +{ + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u32 irq = xirr & 0x00ffffff; + u16 src; + + XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr); + + /* + * ICP State: EOI + * + * Note: If EOI is incorrectly used by SW to lower the CPPR + * value (ie more favored), we do not check for rejection of + * a pending interrupt, this is a SW error and PAPR sepcifies + * that we don't have to deal with it. + * + * The sending of an EOI to the ICS is handled after the + * CPPR update + * + * ICP State: Down_CPPR which we handle + * in a separate function as it's shared with H_CPPR. + */ + icp_down_cppr(xics, icp, xirr >> 24); + + /* IPIs have no EOI */ + if (irq == XICS_IPI) + return H_SUCCESS; + /* + * EOI handling: If the interrupt is still asserted, we need to + * resend it. We can take a lockless "peek" at the ICS state here. + * + * "Message" interrupts will never have "asserted" set + */ + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) { + XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq); + return H_PARAMETER; + } + state = &ics->irq_state[src]; + + /* Still asserted, resend it */ + if (state->asserted) + icp_deliver_irq(xics, icp, irq); + + return H_SUCCESS; +} + +static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) +{ + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + + XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n", + hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt); + + if (icp->rm_action & XICS_RM_KICK_VCPU) + kvmppc_fast_vcpu_kick(icp->rm_kick_target); + if (icp->rm_action & XICS_RM_CHECK_RESEND) + icp_check_resend(xics, icp); + if (icp->rm_action & XICS_RM_REJECT) + icp_deliver_irq(xics, icp, icp->rm_reject); + + icp->rm_action = 0; + + return H_SUCCESS; +} + +int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) +{ + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + unsigned long res; + int rc = H_SUCCESS; + + /* Check if we have an ICP */ + if (!xics || !vcpu->arch.icp) + return H_HARDWARE; + + /* Check for real mode returning too hard */ + if (xics->real_mode) + return kvmppc_xics_rm_complete(vcpu, req); + + switch (req) { + case H_XIRR: + res = kvmppc_h_xirr(vcpu); + kvmppc_set_gpr(vcpu, 4, res); + break; + case H_CPPR: + kvmppc_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4)); + break; + case H_EOI: + rc = kvmppc_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4)); + break; + case H_IPI: + rc = kvmppc_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5)); + break; + } + + return rc; +} + + +/* -- Initialisation code etc. -- */ + +static int xics_debug_show(struct seq_file *m, void *private) +{ + struct kvmppc_xics *xics = m->private; + struct kvm *kvm = xics->kvm; + struct kvm_vcpu *vcpu; + int icsid, i; + + if (!kvm) + return 0; + + seq_printf(m, "=========\nICP state\n=========\n"); + + kvm_for_each_vcpu(i, vcpu, kvm) { + struct kvmppc_icp *icp = vcpu->arch.icp; + union kvmppc_icp_state state; + + if (!icp) + continue; + + state.raw = ACCESS_ONCE(icp->state.raw); + seq_printf(m, "cpu server %#lx XIRR:%#x PPRI:%#x CPPR:%#x MFRR:%#x OUT:%d NR:%d\n", + icp->server_num, state.xisr, + state.pending_pri, state.cppr, state.mfrr, + state.out_ee, state.need_resend); + } + + for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) { + struct kvmppc_ics *ics = xics->ics[icsid]; + + if (!ics) + continue; + + seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n", + icsid); + + mutex_lock(&ics->lock); + + for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { + struct ics_irq_state *irq = &ics->irq_state[i]; + + seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n", + irq->number, irq->server, irq->priority, + irq->saved_priority, irq->asserted, + irq->resend, irq->masked_pending); + + } + mutex_unlock(&ics->lock); + } + return 0; +} + +static int xics_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, xics_debug_show, inode->i_private); +} + +static const struct file_operations xics_debug_fops = { + .open = xics_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void xics_debugfs_init(struct kvmppc_xics *xics) +{ + char *name; + + name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics); + if (!name) { + pr_err("%s: no memory for name\n", __func__); + return; + } + + xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root, + xics, &xics_debug_fops); + + pr_debug("%s: created %s\n", __func__, name); + kfree(name); +} + +static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm, + struct kvmppc_xics *xics, int irq) +{ + struct kvmppc_ics *ics; + int i, icsid; + + icsid = irq >> KVMPPC_XICS_ICS_SHIFT; + + mutex_lock(&kvm->lock); + + /* ICS already exists - somebody else got here first */ + if (xics->ics[icsid]) + goto out; + + /* Create the ICS */ + ics = kzalloc(sizeof(struct kvmppc_ics), GFP_KERNEL); + if (!ics) + goto out; + + mutex_init(&ics->lock); + ics->icsid = icsid; + + for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { + ics->irq_state[i].number = (icsid << KVMPPC_XICS_ICS_SHIFT) | i; + ics->irq_state[i].priority = MASKED; + ics->irq_state[i].saved_priority = MASKED; + } + smp_wmb(); + xics->ics[icsid] = ics; + + if (icsid > xics->max_icsid) + xics->max_icsid = icsid; + + out: + mutex_unlock(&kvm->lock); + return xics->ics[icsid]; +} + +int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num) +{ + struct kvmppc_icp *icp; + + if (!vcpu->kvm->arch.xics) + return -ENODEV; + + if (kvmppc_xics_find_server(vcpu->kvm, server_num)) + return -EEXIST; + + icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL); + if (!icp) + return -ENOMEM; + + icp->vcpu = vcpu; + icp->server_num = server_num; + icp->state.mfrr = MASKED; + icp->state.pending_pri = MASKED; + vcpu->arch.icp = icp; + + XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id); + + return 0; +} + +u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu) +{ + struct kvmppc_icp *icp = vcpu->arch.icp; + union kvmppc_icp_state state; + + if (!icp) + return 0; + state = icp->state; + return ((u64)state.cppr << KVM_REG_PPC_ICP_CPPR_SHIFT) | + ((u64)state.xisr << KVM_REG_PPC_ICP_XISR_SHIFT) | + ((u64)state.mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) | + ((u64)state.pending_pri << KVM_REG_PPC_ICP_PPRI_SHIFT); +} + +int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval) +{ + struct kvmppc_icp *icp = vcpu->arch.icp; + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + union kvmppc_icp_state old_state, new_state; + struct kvmppc_ics *ics; + u8 cppr, mfrr, pending_pri; + u32 xisr; + u16 src; + bool resend; + + if (!icp || !xics) + return -ENOENT; + + cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT; + xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) & + KVM_REG_PPC_ICP_XISR_MASK; + mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT; + pending_pri = icpval >> KVM_REG_PPC_ICP_PPRI_SHIFT; + + /* Require the new state to be internally consistent */ + if (xisr == 0) { + if (pending_pri != 0xff) + return -EINVAL; + } else if (xisr == XICS_IPI) { + if (pending_pri != mfrr || pending_pri >= cppr) + return -EINVAL; + } else { + if (pending_pri >= mfrr || pending_pri >= cppr) + return -EINVAL; + ics = kvmppc_xics_find_ics(xics, xisr, &src); + if (!ics) + return -EINVAL; + } + + new_state.raw = 0; + new_state.cppr = cppr; + new_state.xisr = xisr; + new_state.mfrr = mfrr; + new_state.pending_pri = pending_pri; + + /* + * Deassert the CPU interrupt request. + * icp_try_update will reassert it if necessary. + */ + kvmppc_book3s_dequeue_irqprio(icp->vcpu, + BOOK3S_INTERRUPT_EXTERNAL_LEVEL); + + /* + * Note that if we displace an interrupt from old_state.xisr, + * we don't mark it as rejected. We expect userspace to set + * the state of the interrupt sources to be consistent with + * the ICP states (either before or afterwards, which doesn't + * matter). We do handle resends due to CPPR becoming less + * favoured because that is necessary to end up with a + * consistent state in the situation where userspace restores + * the ICS states before the ICP states. + */ + do { + old_state = ACCESS_ONCE(icp->state); + + if (new_state.mfrr <= old_state.mfrr) { + resend = false; + new_state.need_resend = old_state.need_resend; + } else { + resend = old_state.need_resend; + new_state.need_resend = 0; + } + } while (!icp_try_update(icp, old_state, new_state, false)); + + if (resend) + icp_check_resend(xics, icp); + + return 0; +} + +static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr) +{ + int ret; + struct kvmppc_ics *ics; + struct ics_irq_state *irqp; + u64 __user *ubufp = (u64 __user *) addr; + u16 idx; + u64 val, prio; + + ics = kvmppc_xics_find_ics(xics, irq, &idx); + if (!ics) + return -ENOENT; + + irqp = &ics->irq_state[idx]; + mutex_lock(&ics->lock); + ret = -ENOENT; + if (irqp->exists) { + val = irqp->server; + prio = irqp->priority; + if (prio == MASKED) { + val |= KVM_XICS_MASKED; + prio = irqp->saved_priority; + } + val |= prio << KVM_XICS_PRIORITY_SHIFT; + if (irqp->asserted) + val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING; + else if (irqp->masked_pending || irqp->resend) + val |= KVM_XICS_PENDING; + ret = 0; + } + mutex_unlock(&ics->lock); + + if (!ret && put_user(val, ubufp)) + ret = -EFAULT; + + return ret; +} + +static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr) +{ + struct kvmppc_ics *ics; + struct ics_irq_state *irqp; + u64 __user *ubufp = (u64 __user *) addr; + u16 idx; + u64 val; + u8 prio; + u32 server; + + if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS) + return -ENOENT; + + ics = kvmppc_xics_find_ics(xics, irq, &idx); + if (!ics) { + ics = kvmppc_xics_create_ics(xics->kvm, xics, irq); + if (!ics) + return -ENOMEM; + } + irqp = &ics->irq_state[idx]; + if (get_user(val, ubufp)) + return -EFAULT; + + server = val & KVM_XICS_DESTINATION_MASK; + prio = val >> KVM_XICS_PRIORITY_SHIFT; + if (prio != MASKED && + kvmppc_xics_find_server(xics->kvm, server) == NULL) + return -EINVAL; + + mutex_lock(&ics->lock); + irqp->server = server; + irqp->saved_priority = prio; + if (val & KVM_XICS_MASKED) + prio = MASKED; + irqp->priority = prio; + irqp->resend = 0; + irqp->masked_pending = 0; + irqp->asserted = 0; + if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE)) + irqp->asserted = 1; + irqp->exists = 1; + mutex_unlock(&ics->lock); + + if (val & KVM_XICS_PENDING) + icp_deliver_irq(xics, NULL, irqp->number); + + return 0; +} + +int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, + bool line_status) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + + return ics_deliver_irq(xics, irq, level, line_status); +} + +static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + struct kvmppc_xics *xics = dev->private; + + switch (attr->group) { + case KVM_DEV_XICS_GRP_SOURCES: + return xics_set_source(xics, attr->attr, attr->addr); + } + return -ENXIO; +} + +static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + struct kvmppc_xics *xics = dev->private; + + switch (attr->group) { + case KVM_DEV_XICS_GRP_SOURCES: + return xics_get_source(xics, attr->attr, attr->addr); + } + return -ENXIO; +} + +static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_XICS_GRP_SOURCES: + if (attr->attr >= KVMPPC_XICS_FIRST_IRQ && + attr->attr < KVMPPC_XICS_NR_IRQS) + return 0; + break; + } + return -ENXIO; +} + +static void kvmppc_xics_free(struct kvm_device *dev) +{ + struct kvmppc_xics *xics = dev->private; + int i; + struct kvm *kvm = xics->kvm; + + debugfs_remove(xics->dentry); + + if (kvm) + kvm->arch.xics = NULL; + + for (i = 0; i <= xics->max_icsid; i++) + kfree(xics->ics[i]); + kfree(xics); + kfree(dev); +} + +static int kvmppc_xics_create(struct kvm_device *dev, u32 type) +{ + struct kvmppc_xics *xics; + struct kvm *kvm = dev->kvm; + int ret = 0; + + xics = kzalloc(sizeof(*xics), GFP_KERNEL); + if (!xics) + return -ENOMEM; + + dev->private = xics; + xics->dev = dev; + xics->kvm = kvm; + + /* Already there ? */ + mutex_lock(&kvm->lock); + if (kvm->arch.xics) + ret = -EEXIST; + else + kvm->arch.xics = xics; + mutex_unlock(&kvm->lock); + + if (ret) + return ret; + + xics_debugfs_init(xics); + +#ifdef CONFIG_KVM_BOOK3S_64_HV + if (cpu_has_feature(CPU_FTR_ARCH_206)) { + /* Enable real mode support */ + xics->real_mode = ENABLE_REALMODE; + xics->real_mode_dbg = DEBUG_REALMODE; + } +#endif /* CONFIG_KVM_BOOK3S_64_HV */ + + return 0; +} + +struct kvm_device_ops kvm_xics_ops = { + .name = "kvm-xics", + .create = kvmppc_xics_create, + .destroy = kvmppc_xics_free, + .set_attr = xics_set_attr, + .get_attr = xics_get_attr, + .has_attr = xics_has_attr, +}; + +int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, + u32 xcpu) +{ + struct kvmppc_xics *xics = dev->private; + int r = -EBUSY; + + if (dev->ops != &kvm_xics_ops) + return -EPERM; + if (xics->kvm != vcpu->kvm) + return -EPERM; + if (vcpu->arch.irq_type) + return -EBUSY; + + r = kvmppc_xics_create_icp(vcpu, xcpu); + if (!r) + vcpu->arch.irq_type = KVMPPC_IRQ_XICS; + + return r; +} + +void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) +{ + if (!vcpu->arch.icp) + return; + kfree(vcpu->arch.icp); + vcpu->arch.icp = NULL; + vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; +} diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h new file mode 100644 index 00000000000..dd9326c5c19 --- /dev/null +++ b/arch/powerpc/kvm/book3s_xics.h @@ -0,0 +1,130 @@ +/* + * Copyright 2012 Michael Ellerman, IBM Corporation. + * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#ifndef _KVM_PPC_BOOK3S_XICS_H +#define _KVM_PPC_BOOK3S_XICS_H + +/* + * We use a two-level tree to store interrupt source information. + * There are up to 1024 ICS nodes, each of which can represent + * 1024 sources. + */ +#define KVMPPC_XICS_MAX_ICS_ID 1023 +#define KVMPPC_XICS_ICS_SHIFT 10 +#define KVMPPC_XICS_IRQ_PER_ICS (1 << KVMPPC_XICS_ICS_SHIFT) +#define KVMPPC_XICS_SRC_MASK (KVMPPC_XICS_IRQ_PER_ICS - 1) + +/* + * Interrupt source numbers below this are reserved, for example + * 0 is "no interrupt", and 2 is used for IPIs. + */ +#define KVMPPC_XICS_FIRST_IRQ 16 +#define KVMPPC_XICS_NR_IRQS ((KVMPPC_XICS_MAX_ICS_ID + 1) * \ + KVMPPC_XICS_IRQ_PER_ICS) + +/* Priority value to use for disabling an interrupt */ +#define MASKED 0xff + +/* State for one irq source */ +struct ics_irq_state { + u32 number; + u32 server; + u8 priority; + u8 saved_priority; + u8 resend; + u8 masked_pending; + u8 asserted; /* Only for LSI */ + u8 exists; +}; + +/* Atomic ICP state, updated with a single compare & swap */ +union kvmppc_icp_state { + unsigned long raw; + struct { + u8 out_ee:1; + u8 need_resend:1; + u8 cppr; + u8 mfrr; + u8 pending_pri; + u32 xisr; + }; +}; + +/* One bit per ICS */ +#define ICP_RESEND_MAP_SIZE (KVMPPC_XICS_MAX_ICS_ID / BITS_PER_LONG + 1) + +struct kvmppc_icp { + struct kvm_vcpu *vcpu; + unsigned long server_num; + union kvmppc_icp_state state; + unsigned long resend_map[ICP_RESEND_MAP_SIZE]; + + /* Real mode might find something too hard, here's the action + * it might request from virtual mode + */ +#define XICS_RM_KICK_VCPU 0x1 +#define XICS_RM_CHECK_RESEND 0x2 +#define XICS_RM_REJECT 0x4 + u32 rm_action; + struct kvm_vcpu *rm_kick_target; + u32 rm_reject; + + /* Debug stuff for real mode */ + union kvmppc_icp_state rm_dbgstate; + struct kvm_vcpu *rm_dbgtgt; +}; + +struct kvmppc_ics { + struct mutex lock; + u16 icsid; + struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS]; +}; + +struct kvmppc_xics { + struct kvm *kvm; + struct kvm_device *dev; + struct dentry *dentry; + u32 max_icsid; + bool real_mode; + bool real_mode_dbg; + struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1]; +}; + +static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm, + u32 nr) +{ + struct kvm_vcpu *vcpu = NULL; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num) + return vcpu->arch.icp; + } + return NULL; +} + +static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics, + u32 irq, u16 *source) +{ + u32 icsid = irq >> KVMPPC_XICS_ICS_SHIFT; + u16 src = irq & KVMPPC_XICS_SRC_MASK; + struct kvmppc_ics *ics; + + if (source) + *source = src; + if (icsid > KVMPPC_XICS_MAX_ICS_ID) + return NULL; + ics = xics->ics[icsid]; + if (!ics) + return NULL; + return ics; +} + + +#endif /* _KVM_PPC_BOOK3S_XICS_H */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 020923e4313..1020119226d 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -222,8 +222,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, kvmppc_booke_queue_irqprio(vcpu, prio); } -void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq) +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) { clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); @@ -347,7 +346,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, keep_irq = true; } - if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled) + if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_flags) update_epr = true; switch (priority) { @@ -428,8 +427,14 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, set_guest_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) set_guest_dear(vcpu, vcpu->arch.queued_dear); - if (update_epr == true) - kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); + if (update_epr == true) { + if (vcpu->arch.epr_flags & KVMPPC_EPR_USER) + kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); + else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) { + BUG_ON(vcpu->arch.irq_type != KVMPPC_IRQ_MPIC); + kvmppc_mpic_set_epr(vcpu); + } + } new_msr &= msr_mask; #if defined(CONFIG_64BIT) @@ -746,6 +751,9 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) kvmppc_core_queue_program(vcpu, ESR_PIL); return RESUME_HOST; + case EMULATE_EXIT_USER: + return RESUME_HOST; + default: BUG(); } @@ -1148,6 +1156,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, return r; } +static void kvmppc_set_tsr(struct kvm_vcpu *vcpu, u32 new_tsr) +{ + u32 old_tsr = vcpu->arch.tsr; + + vcpu->arch.tsr = new_tsr; + + if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) + arm_next_watchdog(vcpu); + + update_timer_ints(vcpu); +} + /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { @@ -1287,16 +1307,8 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, kvmppc_emulate_dec(vcpu); } - if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { - u32 old_tsr = vcpu->arch.tsr; - - vcpu->arch.tsr = sregs->u.e.tsr; - - if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) - arm_next_watchdog(vcpu); - - update_timer_ints(vcpu); - } + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) + kvmppc_set_tsr(vcpu, sregs->u.e.tsr); return 0; } @@ -1409,84 +1421,134 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) { - int r = -EINVAL; + int r = 0; + union kvmppc_one_reg val; + int size; + long int i; + + size = one_reg_size(reg->id); + if (size > sizeof(val)) + return -EINVAL; switch (reg->id) { case KVM_REG_PPC_IAC1: case KVM_REG_PPC_IAC2: case KVM_REG_PPC_IAC3: - case KVM_REG_PPC_IAC4: { - int iac = reg->id - KVM_REG_PPC_IAC1; - r = copy_to_user((u64 __user *)(long)reg->addr, - &vcpu->arch.dbg_reg.iac[iac], sizeof(u64)); + case KVM_REG_PPC_IAC4: + i = reg->id - KVM_REG_PPC_IAC1; + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]); break; - } case KVM_REG_PPC_DAC1: - case KVM_REG_PPC_DAC2: { - int dac = reg->id - KVM_REG_PPC_DAC1; - r = copy_to_user((u64 __user *)(long)reg->addr, - &vcpu->arch.dbg_reg.dac[dac], sizeof(u64)); + case KVM_REG_PPC_DAC2: + i = reg->id - KVM_REG_PPC_DAC1; + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]); break; - } case KVM_REG_PPC_EPR: { u32 epr = get_guest_epr(vcpu); - r = put_user(epr, (u32 __user *)(long)reg->addr); + val = get_reg_val(reg->id, epr); break; } #if defined(CONFIG_64BIT) case KVM_REG_PPC_EPCR: - r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr); + val = get_reg_val(reg->id, vcpu->arch.epcr); break; #endif + case KVM_REG_PPC_TCR: + val = get_reg_val(reg->id, vcpu->arch.tcr); + break; + case KVM_REG_PPC_TSR: + val = get_reg_val(reg->id, vcpu->arch.tsr); + break; + case KVM_REG_PPC_DEBUG_INST: + val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV); + break; default: + r = kvmppc_get_one_reg(vcpu, reg->id, &val); break; } + + if (r) + return r; + + if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size)) + r = -EFAULT; + return r; } int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) { - int r = -EINVAL; + int r = 0; + union kvmppc_one_reg val; + int size; + long int i; + + size = one_reg_size(reg->id); + if (size > sizeof(val)) + return -EINVAL; + + if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) + return -EFAULT; switch (reg->id) { case KVM_REG_PPC_IAC1: case KVM_REG_PPC_IAC2: case KVM_REG_PPC_IAC3: - case KVM_REG_PPC_IAC4: { - int iac = reg->id - KVM_REG_PPC_IAC1; - r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac], - (u64 __user *)(long)reg->addr, sizeof(u64)); + case KVM_REG_PPC_IAC4: + i = reg->id - KVM_REG_PPC_IAC1; + vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val); break; - } case KVM_REG_PPC_DAC1: - case KVM_REG_PPC_DAC2: { - int dac = reg->id - KVM_REG_PPC_DAC1; - r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac], - (u64 __user *)(long)reg->addr, sizeof(u64)); + case KVM_REG_PPC_DAC2: + i = reg->id - KVM_REG_PPC_DAC1; + vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val); break; - } case KVM_REG_PPC_EPR: { - u32 new_epr; - r = get_user(new_epr, (u32 __user *)(long)reg->addr); - if (!r) - kvmppc_set_epr(vcpu, new_epr); + u32 new_epr = set_reg_val(reg->id, val); + kvmppc_set_epr(vcpu, new_epr); break; } #if defined(CONFIG_64BIT) case KVM_REG_PPC_EPCR: { - u32 new_epcr; - r = get_user(new_epcr, (u32 __user *)(long)reg->addr); - if (r == 0) - kvmppc_set_epcr(vcpu, new_epcr); + u32 new_epcr = set_reg_val(reg->id, val); + kvmppc_set_epcr(vcpu, new_epcr); break; } #endif + case KVM_REG_PPC_OR_TSR: { + u32 tsr_bits = set_reg_val(reg->id, val); + kvmppc_set_tsr_bits(vcpu, tsr_bits); + break; + } + case KVM_REG_PPC_CLEAR_TSR: { + u32 tsr_bits = set_reg_val(reg->id, val); + kvmppc_clr_tsr_bits(vcpu, tsr_bits); + break; + } + case KVM_REG_PPC_TSR: { + u32 tsr = set_reg_val(reg->id, val); + kvmppc_set_tsr(vcpu, tsr); + break; + } + case KVM_REG_PPC_TCR: { + u32 tcr = set_reg_val(reg->id, val); + kvmppc_set_tcr(vcpu, tcr); + break; + } default: + r = kvmppc_set_one_reg(vcpu, reg->id, &val); break; } + return r; } +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + return -EINVAL; +} + int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -ENOTSUPP; @@ -1531,7 +1593,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old) { } diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index f4bb55c9651..2c6deb5ef2f 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -54,8 +54,7 @@ (1<<BOOKE_INTERRUPT_DTLB_MISS) | \ (1<<BOOKE_INTERRUPT_ALIGNMENT)) -.macro KVM_HANDLER ivor_nr scratch srr0 -_GLOBAL(kvmppc_handler_\ivor_nr) +.macro __KVM_HANDLER ivor_nr scratch srr0 /* Get pointer to vcpu and record exit number. */ mtspr \scratch , r4 mfspr r4, SPRN_SPRG_THREAD @@ -76,6 +75,43 @@ _GLOBAL(kvmppc_handler_\ivor_nr) bctr .endm +.macro KVM_HANDLER ivor_nr scratch srr0 +_GLOBAL(kvmppc_handler_\ivor_nr) + __KVM_HANDLER \ivor_nr \scratch \srr0 +.endm + +.macro KVM_DBG_HANDLER ivor_nr scratch srr0 +_GLOBAL(kvmppc_handler_\ivor_nr) + mtspr \scratch, r4 + mfspr r4, SPRN_SPRG_THREAD + lwz r4, THREAD_KVM_VCPU(r4) + stw r3, VCPU_CRIT_SAVE(r4) + mfcr r3 + mfspr r4, SPRN_CSRR1 + andi. r4, r4, MSR_PR + bne 1f + /* debug interrupt happened in enter/exit path */ + mfspr r4, SPRN_CSRR1 + rlwinm r4, r4, 0, ~MSR_DE + mtspr SPRN_CSRR1, r4 + lis r4, 0xffff + ori r4, r4, 0xffff + mtspr SPRN_DBSR, r4 + mfspr r4, SPRN_SPRG_THREAD + lwz r4, THREAD_KVM_VCPU(r4) + mtcr r3 + lwz r3, VCPU_CRIT_SAVE(r4) + mfspr r4, \scratch + rfci +1: /* debug interrupt happened in guest */ + mtcr r3 + mfspr r4, SPRN_SPRG_THREAD + lwz r4, THREAD_KVM_VCPU(r4) + lwz r3, VCPU_CRIT_SAVE(r4) + mfspr r4, \scratch + __KVM_HANDLER \ivor_nr \scratch \srr0 +.endm + .macro KVM_HANDLER_ADDR ivor_nr .long kvmppc_handler_\ivor_nr .endm @@ -100,7 +136,7 @@ KVM_HANDLER BOOKE_INTERRUPT_FIT SPRN_SPRG_RSCRATCH0 SPRN_SRR0 KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0 KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0 -KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 +KVM_DBG_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0 KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0 diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 6dd4de7802b..ce6b73c2961 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -425,6 +425,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) return kvmppc_set_sregs_ivor(vcpu, sregs); } +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) +{ + int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); + return r; +} + +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) +{ + int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); + return r; +} + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvmppc_vcpu_e500 *vcpu_e500; diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 33db48a8ce2..c2e5e98453a 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -23,6 +23,10 @@ #include <asm/mmu-book3e.h> #include <asm/tlb.h> +enum vcpu_ftr { + VCPU_FTR_MMU_V2 +}; + #define E500_PID_NUM 3 #define E500_TLB_NUM 2 @@ -131,6 +135,10 @@ void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500); void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val); +int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val); #ifdef CONFIG_KVM_E500V2 unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500, @@ -295,4 +303,18 @@ static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu) #define get_tlb_sts(gtlbe) (MAS1_TS) #endif /* !BOOKE_HV */ +static inline bool has_feature(const struct kvm_vcpu *vcpu, + enum vcpu_ftr ftr) +{ + bool has_ftr; + switch (ftr) { + case VCPU_FTR_MMU_V2: + has_ftr = ((vcpu->arch.mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2); + break; + default: + return false; + } + return has_ftr; +} + #endif /* KVM_E500_H */ diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index e78f353a836..b10a01243ab 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -284,6 +284,16 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_TLB1CFG: *spr_val = vcpu->arch.tlbcfg[1]; break; + case SPRN_TLB0PS: + if (!has_feature(vcpu, VCPU_FTR_MMU_V2)) + return EMULATE_FAIL; + *spr_val = vcpu->arch.tlbps[0]; + break; + case SPRN_TLB1PS: + if (!has_feature(vcpu, VCPU_FTR_MMU_V2)) + return EMULATE_FAIL; + *spr_val = vcpu->arch.tlbps[1]; + break; case SPRN_L1CSR0: *spr_val = vcpu_e500->l1csr0; break; @@ -307,6 +317,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_MMUCFG: *spr_val = vcpu->arch.mmucfg; break; + case SPRN_EPTCFG: + if (!has_feature(vcpu, VCPU_FTR_MMU_V2)) + return EMULATE_FAIL; + /* + * Legacy Linux guests access EPTCFG register even if the E.PT + * category is disabled in the VM. Give them a chance to live. + */ + *spr_val = vcpu->arch.eptcfg; + break; /* extra exceptions */ case SPRN_IVOR32: diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index 5c4475983f7..c41a5a96b55 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -596,6 +596,140 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) return 0; } +int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) +{ + int r = 0; + long int i; + + switch (id) { + case KVM_REG_PPC_MAS0: + *val = get_reg_val(id, vcpu->arch.shared->mas0); + break; + case KVM_REG_PPC_MAS1: + *val = get_reg_val(id, vcpu->arch.shared->mas1); + break; + case KVM_REG_PPC_MAS2: + *val = get_reg_val(id, vcpu->arch.shared->mas2); + break; + case KVM_REG_PPC_MAS7_3: + *val = get_reg_val(id, vcpu->arch.shared->mas7_3); + break; + case KVM_REG_PPC_MAS4: + *val = get_reg_val(id, vcpu->arch.shared->mas4); + break; + case KVM_REG_PPC_MAS6: + *val = get_reg_val(id, vcpu->arch.shared->mas6); + break; + case KVM_REG_PPC_MMUCFG: + *val = get_reg_val(id, vcpu->arch.mmucfg); + break; + case KVM_REG_PPC_EPTCFG: + *val = get_reg_val(id, vcpu->arch.eptcfg); + break; + case KVM_REG_PPC_TLB0CFG: + case KVM_REG_PPC_TLB1CFG: + case KVM_REG_PPC_TLB2CFG: + case KVM_REG_PPC_TLB3CFG: + i = id - KVM_REG_PPC_TLB0CFG; + *val = get_reg_val(id, vcpu->arch.tlbcfg[i]); + break; + case KVM_REG_PPC_TLB0PS: + case KVM_REG_PPC_TLB1PS: + case KVM_REG_PPC_TLB2PS: + case KVM_REG_PPC_TLB3PS: + i = id - KVM_REG_PPC_TLB0PS; + *val = get_reg_val(id, vcpu->arch.tlbps[i]); + break; + default: + r = -EINVAL; + break; + } + + return r; +} + +int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) +{ + int r = 0; + long int i; + + switch (id) { + case KVM_REG_PPC_MAS0: + vcpu->arch.shared->mas0 = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MAS1: + vcpu->arch.shared->mas1 = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MAS2: + vcpu->arch.shared->mas2 = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MAS7_3: + vcpu->arch.shared->mas7_3 = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MAS4: + vcpu->arch.shared->mas4 = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MAS6: + vcpu->arch.shared->mas6 = set_reg_val(id, *val); + break; + /* Only allow MMU registers to be set to the config supported by KVM */ + case KVM_REG_PPC_MMUCFG: { + u32 reg = set_reg_val(id, *val); + if (reg != vcpu->arch.mmucfg) + r = -EINVAL; + break; + } + case KVM_REG_PPC_EPTCFG: { + u32 reg = set_reg_val(id, *val); + if (reg != vcpu->arch.eptcfg) + r = -EINVAL; + break; + } + case KVM_REG_PPC_TLB0CFG: + case KVM_REG_PPC_TLB1CFG: + case KVM_REG_PPC_TLB2CFG: + case KVM_REG_PPC_TLB3CFG: { + /* MMU geometry (N_ENTRY/ASSOC) can be set only using SW_TLB */ + u32 reg = set_reg_val(id, *val); + i = id - KVM_REG_PPC_TLB0CFG; + if (reg != vcpu->arch.tlbcfg[i]) + r = -EINVAL; + break; + } + case KVM_REG_PPC_TLB0PS: + case KVM_REG_PPC_TLB1PS: + case KVM_REG_PPC_TLB2PS: + case KVM_REG_PPC_TLB3PS: { + u32 reg = set_reg_val(id, *val); + i = id - KVM_REG_PPC_TLB0PS; + if (reg != vcpu->arch.tlbps[i]) + r = -EINVAL; + break; + } + default: + r = -EINVAL; + break; + } + + return r; +} + +static int vcpu_mmu_geometry_update(struct kvm_vcpu *vcpu, + struct kvm_book3e_206_tlb_params *params) +{ + vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + if (params->tlb_sizes[0] <= 2048) + vcpu->arch.tlbcfg[0] |= params->tlb_sizes[0]; + vcpu->arch.tlbcfg[0] |= params->tlb_ways[0] << TLBnCFG_ASSOC_SHIFT; + + vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + vcpu->arch.tlbcfg[1] |= params->tlb_sizes[1]; + vcpu->arch.tlbcfg[1] |= params->tlb_ways[1] << TLBnCFG_ASSOC_SHIFT; + return 0; +} + int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, struct kvm_config_tlb *cfg) { @@ -692,16 +826,8 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, vcpu_e500->gtlb_offset[0] = 0; vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0]; - vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE; - - vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); - if (params.tlb_sizes[0] <= 2048) - vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0]; - vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT; - - vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); - vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1]; - vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT; + /* Update vcpu's MMU geometry based on SW_TLB input */ + vcpu_mmu_geometry_update(vcpu, ¶ms); vcpu_e500->shared_tlb_pages = pages; vcpu_e500->num_shared_tlb_pages = num_pages; @@ -737,6 +863,39 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, return 0; } +/* Vcpu's MMU default configuration */ +static int vcpu_mmu_init(struct kvm_vcpu *vcpu, + struct kvmppc_e500_tlb_params *params) +{ + /* Initialize RASIZE, PIDSIZE, NTLBS and MAVN fields with host values*/ + vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE; + + /* Initialize TLBnCFG fields with host values and SW_TLB geometry*/ + vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) & + ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + vcpu->arch.tlbcfg[0] |= params[0].entries; + vcpu->arch.tlbcfg[0] |= params[0].ways << TLBnCFG_ASSOC_SHIFT; + + vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) & + ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + vcpu->arch.tlbcfg[1] |= params[1].entries; + vcpu->arch.tlbcfg[1] |= params[1].ways << TLBnCFG_ASSOC_SHIFT; + + if (has_feature(vcpu, VCPU_FTR_MMU_V2)) { + vcpu->arch.tlbps[0] = mfspr(SPRN_TLB0PS); + vcpu->arch.tlbps[1] = mfspr(SPRN_TLB1PS); + + vcpu->arch.mmucfg &= ~MMUCFG_LRAT; + + /* Guest mmu emulation currently doesn't handle E.PT */ + vcpu->arch.eptcfg = 0; + vcpu->arch.tlbcfg[0] &= ~TLBnCFG_PT; + vcpu->arch.tlbcfg[1] &= ~TLBnCFG_IND; + } + + return 0; +} + int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) { struct kvm_vcpu *vcpu = &vcpu_e500->vcpu; @@ -781,18 +940,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) if (!vcpu_e500->g2h_tlb1_map) goto err; - /* Init TLB configuration register */ - vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) & - ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); - vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries; - vcpu->arch.tlbcfg[0] |= - vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT; - - vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) & - ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); - vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries; - vcpu->arch.tlbcfg[1] |= - vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT; + vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params); kvmppc_recalc_tlb1map_range(vcpu_e500); return 0; diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 2f4baa074b2..753cc99eff2 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -177,6 +177,8 @@ int kvmppc_core_check_processor_compat(void) r = 0; else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0) r = 0; + else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0) + r = 0; else r = -ENOTSUPP; @@ -260,6 +262,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) return kvmppc_set_sregs_ivor(vcpu, sregs); } +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) +{ + int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); + return r; +} + +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) +{ + int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val); + return r; +} + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvmppc_vcpu_e500 *vcpu_e500; diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 7a73b6f72a8..631a2650e4e 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -38,6 +38,7 @@ #define OP_31_XOP_TRAP 4 #define OP_31_XOP_LWZX 23 +#define OP_31_XOP_DCBST 54 #define OP_31_XOP_TRAP_64 68 #define OP_31_XOP_DCBF 86 #define OP_31_XOP_LBZX 87 @@ -370,6 +371,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs); break; + case OP_31_XOP_DCBST: case OP_31_XOP_DCBF: case OP_31_XOP_DCBI: /* Do nothing. The guest is performing dcbi because diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h new file mode 100644 index 00000000000..5a9a10b9076 --- /dev/null +++ b/arch/powerpc/kvm/irq.h @@ -0,0 +1,20 @@ +#ifndef __IRQ_H +#define __IRQ_H + +#include <linux/kvm_host.h> + +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + int ret = 0; + +#ifdef CONFIG_KVM_MPIC + ret = ret || (kvm->arch.mpic != NULL); +#endif +#ifdef CONFIG_KVM_XICS + ret = ret || (kvm->arch.xics != NULL); +#endif + smp_rmb(); + return ret; +} + +#endif diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c new file mode 100644 index 00000000000..2861ae9eaae --- /dev/null +++ b/arch/powerpc/kvm/mpic.c @@ -0,0 +1,1853 @@ +/* + * OpenPIC emulation + * + * Copyright (c) 2004 Jocelyn Mayer + * 2011 Alexander Graf + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <linux/slab.h> +#include <linux/mutex.h> +#include <linux/kvm_host.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/anon_inodes.h> +#include <asm/uaccess.h> +#include <asm/mpic.h> +#include <asm/kvm_para.h> +#include <asm/kvm_host.h> +#include <asm/kvm_ppc.h> +#include "iodev.h" + +#define MAX_CPU 32 +#define MAX_SRC 256 +#define MAX_TMR 4 +#define MAX_IPI 4 +#define MAX_MSI 8 +#define MAX_IRQ (MAX_SRC + MAX_IPI + MAX_TMR) +#define VID 0x03 /* MPIC version ID */ + +/* OpenPIC capability flags */ +#define OPENPIC_FLAG_IDR_CRIT (1 << 0) +#define OPENPIC_FLAG_ILR (2 << 0) + +/* OpenPIC address map */ +#define OPENPIC_REG_SIZE 0x40000 +#define OPENPIC_GLB_REG_START 0x0 +#define OPENPIC_GLB_REG_SIZE 0x10F0 +#define OPENPIC_TMR_REG_START 0x10F0 +#define OPENPIC_TMR_REG_SIZE 0x220 +#define OPENPIC_MSI_REG_START 0x1600 +#define OPENPIC_MSI_REG_SIZE 0x200 +#define OPENPIC_SUMMARY_REG_START 0x3800 +#define OPENPIC_SUMMARY_REG_SIZE 0x800 +#define OPENPIC_SRC_REG_START 0x10000 +#define OPENPIC_SRC_REG_SIZE (MAX_SRC * 0x20) +#define OPENPIC_CPU_REG_START 0x20000 +#define OPENPIC_CPU_REG_SIZE (0x100 + ((MAX_CPU - 1) * 0x1000)) + +struct fsl_mpic_info { + int max_ext; +}; + +static struct fsl_mpic_info fsl_mpic_20 = { + .max_ext = 12, +}; + +static struct fsl_mpic_info fsl_mpic_42 = { + .max_ext = 12, +}; + +#define FRR_NIRQ_SHIFT 16 +#define FRR_NCPU_SHIFT 8 +#define FRR_VID_SHIFT 0 + +#define VID_REVISION_1_2 2 +#define VID_REVISION_1_3 3 + +#define VIR_GENERIC 0x00000000 /* Generic Vendor ID */ + +#define GCR_RESET 0x80000000 +#define GCR_MODE_PASS 0x00000000 +#define GCR_MODE_MIXED 0x20000000 +#define GCR_MODE_PROXY 0x60000000 + +#define TBCR_CI 0x80000000 /* count inhibit */ +#define TCCR_TOG 0x80000000 /* toggles when decrement to zero */ + +#define IDR_EP_SHIFT 31 +#define IDR_EP_MASK (1 << IDR_EP_SHIFT) +#define IDR_CI0_SHIFT 30 +#define IDR_CI1_SHIFT 29 +#define IDR_P1_SHIFT 1 +#define IDR_P0_SHIFT 0 + +#define ILR_INTTGT_MASK 0x000000ff +#define ILR_INTTGT_INT 0x00 +#define ILR_INTTGT_CINT 0x01 /* critical */ +#define ILR_INTTGT_MCP 0x02 /* machine check */ +#define NUM_OUTPUTS 3 + +#define MSIIR_OFFSET 0x140 +#define MSIIR_SRS_SHIFT 29 +#define MSIIR_SRS_MASK (0x7 << MSIIR_SRS_SHIFT) +#define MSIIR_IBS_SHIFT 24 +#define MSIIR_IBS_MASK (0x1f << MSIIR_IBS_SHIFT) + +static int get_current_cpu(void) +{ +#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) + struct kvm_vcpu *vcpu = current->thread.kvm_vcpu; + return vcpu ? vcpu->arch.irq_cpu_id : -1; +#else + /* XXX */ + return -1; +#endif +} + +static int openpic_cpu_write_internal(void *opaque, gpa_t addr, + u32 val, int idx); +static int openpic_cpu_read_internal(void *opaque, gpa_t addr, + u32 *ptr, int idx); + +enum irq_type { + IRQ_TYPE_NORMAL = 0, + IRQ_TYPE_FSLINT, /* FSL internal interrupt -- level only */ + IRQ_TYPE_FSLSPECIAL, /* FSL timer/IPI interrupt, edge, no polarity */ +}; + +struct irq_queue { + /* Round up to the nearest 64 IRQs so that the queue length + * won't change when moving between 32 and 64 bit hosts. + */ + unsigned long queue[BITS_TO_LONGS((MAX_IRQ + 63) & ~63)]; + int next; + int priority; +}; + +struct irq_source { + uint32_t ivpr; /* IRQ vector/priority register */ + uint32_t idr; /* IRQ destination register */ + uint32_t destmask; /* bitmap of CPU destinations */ + int last_cpu; + int output; /* IRQ level, e.g. ILR_INTTGT_INT */ + int pending; /* TRUE if IRQ is pending */ + enum irq_type type; + bool level:1; /* level-triggered */ + bool nomask:1; /* critical interrupts ignore mask on some FSL MPICs */ +}; + +#define IVPR_MASK_SHIFT 31 +#define IVPR_MASK_MASK (1 << IVPR_MASK_SHIFT) +#define IVPR_ACTIVITY_SHIFT 30 +#define IVPR_ACTIVITY_MASK (1 << IVPR_ACTIVITY_SHIFT) +#define IVPR_MODE_SHIFT 29 +#define IVPR_MODE_MASK (1 << IVPR_MODE_SHIFT) +#define IVPR_POLARITY_SHIFT 23 +#define IVPR_POLARITY_MASK (1 << IVPR_POLARITY_SHIFT) +#define IVPR_SENSE_SHIFT 22 +#define IVPR_SENSE_MASK (1 << IVPR_SENSE_SHIFT) + +#define IVPR_PRIORITY_MASK (0xF << 16) +#define IVPR_PRIORITY(_ivprr_) ((int)(((_ivprr_) & IVPR_PRIORITY_MASK) >> 16)) +#define IVPR_VECTOR(opp, _ivprr_) ((_ivprr_) & (opp)->vector_mask) + +/* IDR[EP/CI] are only for FSL MPIC prior to v4.0 */ +#define IDR_EP 0x80000000 /* external pin */ +#define IDR_CI 0x40000000 /* critical interrupt */ + +struct irq_dest { + struct kvm_vcpu *vcpu; + + int32_t ctpr; /* CPU current task priority */ + struct irq_queue raised; + struct irq_queue servicing; + + /* Count of IRQ sources asserting on non-INT outputs */ + uint32_t outputs_active[NUM_OUTPUTS]; +}; + +#define MAX_MMIO_REGIONS 10 + +struct openpic { + struct kvm *kvm; + struct kvm_device *dev; + struct kvm_io_device mmio; + const struct mem_reg *mmio_regions[MAX_MMIO_REGIONS]; + int num_mmio_regions; + + gpa_t reg_base; + spinlock_t lock; + + /* Behavior control */ + struct fsl_mpic_info *fsl; + uint32_t model; + uint32_t flags; + uint32_t nb_irqs; + uint32_t vid; + uint32_t vir; /* Vendor identification register */ + uint32_t vector_mask; + uint32_t tfrr_reset; + uint32_t ivpr_reset; + uint32_t idr_reset; + uint32_t brr1; + uint32_t mpic_mode_mask; + + /* Global registers */ + uint32_t frr; /* Feature reporting register */ + uint32_t gcr; /* Global configuration register */ + uint32_t pir; /* Processor initialization register */ + uint32_t spve; /* Spurious vector register */ + uint32_t tfrr; /* Timer frequency reporting register */ + /* Source registers */ + struct irq_source src[MAX_IRQ]; + /* Local registers per output pin */ + struct irq_dest dst[MAX_CPU]; + uint32_t nb_cpus; + /* Timer registers */ + struct { + uint32_t tccr; /* Global timer current count register */ + uint32_t tbcr; /* Global timer base count register */ + } timers[MAX_TMR]; + /* Shared MSI registers */ + struct { + uint32_t msir; /* Shared Message Signaled Interrupt Register */ + } msi[MAX_MSI]; + uint32_t max_irq; + uint32_t irq_ipi0; + uint32_t irq_tim0; + uint32_t irq_msi; +}; + + +static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst, + int output) +{ + struct kvm_interrupt irq = { + .irq = KVM_INTERRUPT_SET_LEVEL, + }; + + if (!dst->vcpu) { + pr_debug("%s: destination cpu %d does not exist\n", + __func__, (int)(dst - &opp->dst[0])); + return; + } + + pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id, + output); + + if (output != ILR_INTTGT_INT) /* TODO */ + return; + + kvm_vcpu_ioctl_interrupt(dst->vcpu, &irq); +} + +static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst, + int output) +{ + if (!dst->vcpu) { + pr_debug("%s: destination cpu %d does not exist\n", + __func__, (int)(dst - &opp->dst[0])); + return; + } + + pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id, + output); + + if (output != ILR_INTTGT_INT) /* TODO */ + return; + + kvmppc_core_dequeue_external(dst->vcpu); +} + +static inline void IRQ_setbit(struct irq_queue *q, int n_IRQ) +{ + set_bit(n_IRQ, q->queue); +} + +static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ) +{ + clear_bit(n_IRQ, q->queue); +} + +static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ) +{ + return test_bit(n_IRQ, q->queue); +} + +static void IRQ_check(struct openpic *opp, struct irq_queue *q) +{ + int irq = -1; + int next = -1; + int priority = -1; + + for (;;) { + irq = find_next_bit(q->queue, opp->max_irq, irq + 1); + if (irq == opp->max_irq) + break; + + pr_debug("IRQ_check: irq %d set ivpr_pr=%d pr=%d\n", + irq, IVPR_PRIORITY(opp->src[irq].ivpr), priority); + + if (IVPR_PRIORITY(opp->src[irq].ivpr) > priority) { + next = irq; + priority = IVPR_PRIORITY(opp->src[irq].ivpr); + } + } + + q->next = next; + q->priority = priority; +} + +static int IRQ_get_next(struct openpic *opp, struct irq_queue *q) +{ + /* XXX: optimize */ + IRQ_check(opp, q); + + return q->next; +} + +static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ, + bool active, bool was_active) +{ + struct irq_dest *dst; + struct irq_source *src; + int priority; + + dst = &opp->dst[n_CPU]; + src = &opp->src[n_IRQ]; + + pr_debug("%s: IRQ %d active %d was %d\n", + __func__, n_IRQ, active, was_active); + + if (src->output != ILR_INTTGT_INT) { + pr_debug("%s: output %d irq %d active %d was %d count %d\n", + __func__, src->output, n_IRQ, active, was_active, + dst->outputs_active[src->output]); + + /* On Freescale MPIC, critical interrupts ignore priority, + * IACK, EOI, etc. Before MPIC v4.1 they also ignore + * masking. + */ + if (active) { + if (!was_active && + dst->outputs_active[src->output]++ == 0) { + pr_debug("%s: Raise OpenPIC output %d cpu %d irq %d\n", + __func__, src->output, n_CPU, n_IRQ); + mpic_irq_raise(opp, dst, src->output); + } + } else { + if (was_active && + --dst->outputs_active[src->output] == 0) { + pr_debug("%s: Lower OpenPIC output %d cpu %d irq %d\n", + __func__, src->output, n_CPU, n_IRQ); + mpic_irq_lower(opp, dst, src->output); + } + } + + return; + } + + priority = IVPR_PRIORITY(src->ivpr); + + /* Even if the interrupt doesn't have enough priority, + * it is still raised, in case ctpr is lowered later. + */ + if (active) + IRQ_setbit(&dst->raised, n_IRQ); + else + IRQ_resetbit(&dst->raised, n_IRQ); + + IRQ_check(opp, &dst->raised); + + if (active && priority <= dst->ctpr) { + pr_debug("%s: IRQ %d priority %d too low for ctpr %d on CPU %d\n", + __func__, n_IRQ, priority, dst->ctpr, n_CPU); + active = 0; + } + + if (active) { + if (IRQ_get_next(opp, &dst->servicing) >= 0 && + priority <= dst->servicing.priority) { + pr_debug("%s: IRQ %d is hidden by servicing IRQ %d on CPU %d\n", + __func__, n_IRQ, dst->servicing.next, n_CPU); + } else { + pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n", + __func__, n_CPU, n_IRQ, dst->raised.next); + mpic_irq_raise(opp, dst, ILR_INTTGT_INT); + } + } else { + IRQ_get_next(opp, &dst->servicing); + if (dst->raised.priority > dst->ctpr && + dst->raised.priority > dst->servicing.priority) { + pr_debug("%s: IRQ %d inactive, IRQ %d prio %d above %d/%d, CPU %d\n", + __func__, n_IRQ, dst->raised.next, + dst->raised.priority, dst->ctpr, + dst->servicing.priority, n_CPU); + /* IRQ line stays asserted */ + } else { + pr_debug("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n", + __func__, n_IRQ, dst->ctpr, + dst->servicing.priority, n_CPU); + mpic_irq_lower(opp, dst, ILR_INTTGT_INT); + } + } +} + +/* update pic state because registers for n_IRQ have changed value */ +static void openpic_update_irq(struct openpic *opp, int n_IRQ) +{ + struct irq_source *src; + bool active, was_active; + int i; + + src = &opp->src[n_IRQ]; + active = src->pending; + + if ((src->ivpr & IVPR_MASK_MASK) && !src->nomask) { + /* Interrupt source is disabled */ + pr_debug("%s: IRQ %d is disabled\n", __func__, n_IRQ); + active = false; + } + + was_active = !!(src->ivpr & IVPR_ACTIVITY_MASK); + + /* + * We don't have a similar check for already-active because + * ctpr may have changed and we need to withdraw the interrupt. + */ + if (!active && !was_active) { + pr_debug("%s: IRQ %d is already inactive\n", __func__, n_IRQ); + return; + } + + if (active) + src->ivpr |= IVPR_ACTIVITY_MASK; + else + src->ivpr &= ~IVPR_ACTIVITY_MASK; + + if (src->destmask == 0) { + /* No target */ + pr_debug("%s: IRQ %d has no target\n", __func__, n_IRQ); + return; + } + + if (src->destmask == (1 << src->last_cpu)) { + /* Only one CPU is allowed to receive this IRQ */ + IRQ_local_pipe(opp, src->last_cpu, n_IRQ, active, was_active); + } else if (!(src->ivpr & IVPR_MODE_MASK)) { + /* Directed delivery mode */ + for (i = 0; i < opp->nb_cpus; i++) { + if (src->destmask & (1 << i)) { + IRQ_local_pipe(opp, i, n_IRQ, active, + was_active); + } + } + } else { + /* Distributed delivery mode */ + for (i = src->last_cpu + 1; i != src->last_cpu; i++) { + if (i == opp->nb_cpus) + i = 0; + + if (src->destmask & (1 << i)) { + IRQ_local_pipe(opp, i, n_IRQ, active, + was_active); + src->last_cpu = i; + break; + } + } + } +} + +static void openpic_set_irq(void *opaque, int n_IRQ, int level) +{ + struct openpic *opp = opaque; + struct irq_source *src; + + if (n_IRQ >= MAX_IRQ) { + WARN_ONCE(1, "%s: IRQ %d out of range\n", __func__, n_IRQ); + return; + } + + src = &opp->src[n_IRQ]; + pr_debug("openpic: set irq %d = %d ivpr=0x%08x\n", + n_IRQ, level, src->ivpr); + if (src->level) { + /* level-sensitive irq */ + src->pending = level; + openpic_update_irq(opp, n_IRQ); + } else { + /* edge-sensitive irq */ + if (level) { + src->pending = 1; + openpic_update_irq(opp, n_IRQ); + } + + if (src->output != ILR_INTTGT_INT) { + /* Edge-triggered interrupts shouldn't be used + * with non-INT delivery, but just in case, + * try to make it do something sane rather than + * cause an interrupt storm. This is close to + * what you'd probably see happen in real hardware. + */ + src->pending = 0; + openpic_update_irq(opp, n_IRQ); + } + } +} + +static void openpic_reset(struct openpic *opp) +{ + int i; + + opp->gcr = GCR_RESET; + /* Initialise controller registers */ + opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) | + (opp->vid << FRR_VID_SHIFT); + + opp->pir = 0; + opp->spve = -1 & opp->vector_mask; + opp->tfrr = opp->tfrr_reset; + /* Initialise IRQ sources */ + for (i = 0; i < opp->max_irq; i++) { + opp->src[i].ivpr = opp->ivpr_reset; + opp->src[i].idr = opp->idr_reset; + + switch (opp->src[i].type) { + case IRQ_TYPE_NORMAL: + opp->src[i].level = + !!(opp->ivpr_reset & IVPR_SENSE_MASK); + break; + + case IRQ_TYPE_FSLINT: + opp->src[i].ivpr |= IVPR_POLARITY_MASK; + break; + + case IRQ_TYPE_FSLSPECIAL: + break; + } + } + /* Initialise IRQ destinations */ + for (i = 0; i < MAX_CPU; i++) { + opp->dst[i].ctpr = 15; + memset(&opp->dst[i].raised, 0, sizeof(struct irq_queue)); + opp->dst[i].raised.next = -1; + memset(&opp->dst[i].servicing, 0, sizeof(struct irq_queue)); + opp->dst[i].servicing.next = -1; + } + /* Initialise timers */ + for (i = 0; i < MAX_TMR; i++) { + opp->timers[i].tccr = 0; + opp->timers[i].tbcr = TBCR_CI; + } + /* Go out of RESET state */ + opp->gcr = 0; +} + +static inline uint32_t read_IRQreg_idr(struct openpic *opp, int n_IRQ) +{ + return opp->src[n_IRQ].idr; +} + +static inline uint32_t read_IRQreg_ilr(struct openpic *opp, int n_IRQ) +{ + if (opp->flags & OPENPIC_FLAG_ILR) + return opp->src[n_IRQ].output; + + return 0xffffffff; +} + +static inline uint32_t read_IRQreg_ivpr(struct openpic *opp, int n_IRQ) +{ + return opp->src[n_IRQ].ivpr; +} + +static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ, + uint32_t val) +{ + struct irq_source *src = &opp->src[n_IRQ]; + uint32_t normal_mask = (1UL << opp->nb_cpus) - 1; + uint32_t crit_mask = 0; + uint32_t mask = normal_mask; + int crit_shift = IDR_EP_SHIFT - opp->nb_cpus; + int i; + + if (opp->flags & OPENPIC_FLAG_IDR_CRIT) { + crit_mask = mask << crit_shift; + mask |= crit_mask | IDR_EP; + } + + src->idr = val & mask; + pr_debug("Set IDR %d to 0x%08x\n", n_IRQ, src->idr); + + if (opp->flags & OPENPIC_FLAG_IDR_CRIT) { + if (src->idr & crit_mask) { + if (src->idr & normal_mask) { + pr_debug("%s: IRQ configured for multiple output types, using critical\n", + __func__); + } + + src->output = ILR_INTTGT_CINT; + src->nomask = true; + src->destmask = 0; + + for (i = 0; i < opp->nb_cpus; i++) { + int n_ci = IDR_CI0_SHIFT - i; + + if (src->idr & (1UL << n_ci)) + src->destmask |= 1UL << i; + } + } else { + src->output = ILR_INTTGT_INT; + src->nomask = false; + src->destmask = src->idr & normal_mask; + } + } else { + src->destmask = src->idr; + } +} + +static inline void write_IRQreg_ilr(struct openpic *opp, int n_IRQ, + uint32_t val) +{ + if (opp->flags & OPENPIC_FLAG_ILR) { + struct irq_source *src = &opp->src[n_IRQ]; + + src->output = val & ILR_INTTGT_MASK; + pr_debug("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr, + src->output); + + /* TODO: on MPIC v4.0 only, set nomask for non-INT */ + } +} + +static inline void write_IRQreg_ivpr(struct openpic *opp, int n_IRQ, + uint32_t val) +{ + uint32_t mask; + + /* NOTE when implementing newer FSL MPIC models: starting with v4.0, + * the polarity bit is read-only on internal interrupts. + */ + mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK | + IVPR_POLARITY_MASK | opp->vector_mask; + + /* ACTIVITY bit is read-only */ + opp->src[n_IRQ].ivpr = + (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask); + + /* For FSL internal interrupts, The sense bit is reserved and zero, + * and the interrupt is always level-triggered. Timers and IPIs + * have no sense or polarity bits, and are edge-triggered. + */ + switch (opp->src[n_IRQ].type) { + case IRQ_TYPE_NORMAL: + opp->src[n_IRQ].level = + !!(opp->src[n_IRQ].ivpr & IVPR_SENSE_MASK); + break; + + case IRQ_TYPE_FSLINT: + opp->src[n_IRQ].ivpr &= ~IVPR_SENSE_MASK; + break; + + case IRQ_TYPE_FSLSPECIAL: + opp->src[n_IRQ].ivpr &= ~(IVPR_POLARITY_MASK | IVPR_SENSE_MASK); + break; + } + + openpic_update_irq(opp, n_IRQ); + pr_debug("Set IVPR %d to 0x%08x -> 0x%08x\n", n_IRQ, val, + opp->src[n_IRQ].ivpr); +} + +static void openpic_gcr_write(struct openpic *opp, uint64_t val) +{ + if (val & GCR_RESET) { + openpic_reset(opp); + return; + } + + opp->gcr &= ~opp->mpic_mode_mask; + opp->gcr |= val & opp->mpic_mode_mask; +} + +static int openpic_gbl_write(void *opaque, gpa_t addr, u32 val) +{ + struct openpic *opp = opaque; + int err = 0; + + pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val); + if (addr & 0xF) + return 0; + + switch (addr) { + case 0x00: /* Block Revision Register1 (BRR1) is Readonly */ + break; + case 0x40: + case 0x50: + case 0x60: + case 0x70: + case 0x80: + case 0x90: + case 0xA0: + case 0xB0: + err = openpic_cpu_write_internal(opp, addr, val, + get_current_cpu()); + break; + case 0x1000: /* FRR */ + break; + case 0x1020: /* GCR */ + openpic_gcr_write(opp, val); + break; + case 0x1080: /* VIR */ + break; + case 0x1090: /* PIR */ + /* + * This register is used to reset a CPU core -- + * let userspace handle it. + */ + err = -ENXIO; + break; + case 0x10A0: /* IPI_IVPR */ + case 0x10B0: + case 0x10C0: + case 0x10D0: { + int idx; + idx = (addr - 0x10A0) >> 4; + write_IRQreg_ivpr(opp, opp->irq_ipi0 + idx, val); + break; + } + case 0x10E0: /* SPVE */ + opp->spve = val & opp->vector_mask; + break; + default: + break; + } + + return err; +} + +static int openpic_gbl_read(void *opaque, gpa_t addr, u32 *ptr) +{ + struct openpic *opp = opaque; + u32 retval; + int err = 0; + + pr_debug("%s: addr %#llx\n", __func__, addr); + retval = 0xFFFFFFFF; + if (addr & 0xF) + goto out; + + switch (addr) { + case 0x1000: /* FRR */ + retval = opp->frr; + retval |= (opp->nb_cpus - 1) << FRR_NCPU_SHIFT; + break; + case 0x1020: /* GCR */ + retval = opp->gcr; + break; + case 0x1080: /* VIR */ + retval = opp->vir; + break; + case 0x1090: /* PIR */ + retval = 0x00000000; + break; + case 0x00: /* Block Revision Register1 (BRR1) */ + retval = opp->brr1; + break; + case 0x40: + case 0x50: + case 0x60: + case 0x70: + case 0x80: + case 0x90: + case 0xA0: + case 0xB0: + err = openpic_cpu_read_internal(opp, addr, + &retval, get_current_cpu()); + break; + case 0x10A0: /* IPI_IVPR */ + case 0x10B0: + case 0x10C0: + case 0x10D0: + { + int idx; + idx = (addr - 0x10A0) >> 4; + retval = read_IRQreg_ivpr(opp, opp->irq_ipi0 + idx); + } + break; + case 0x10E0: /* SPVE */ + retval = opp->spve; + break; + default: + break; + } + +out: + pr_debug("%s: => 0x%08x\n", __func__, retval); + *ptr = retval; + return err; +} + +static int openpic_tmr_write(void *opaque, gpa_t addr, u32 val) +{ + struct openpic *opp = opaque; + int idx; + + addr += 0x10f0; + + pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val); + if (addr & 0xF) + return 0; + + if (addr == 0x10f0) { + /* TFRR */ + opp->tfrr = val; + return 0; + } + + idx = (addr >> 6) & 0x3; + addr = addr & 0x30; + + switch (addr & 0x30) { + case 0x00: /* TCCR */ + break; + case 0x10: /* TBCR */ + if ((opp->timers[idx].tccr & TCCR_TOG) != 0 && + (val & TBCR_CI) == 0 && + (opp->timers[idx].tbcr & TBCR_CI) != 0) + opp->timers[idx].tccr &= ~TCCR_TOG; + + opp->timers[idx].tbcr = val; + break; + case 0x20: /* TVPR */ + write_IRQreg_ivpr(opp, opp->irq_tim0 + idx, val); + break; + case 0x30: /* TDR */ + write_IRQreg_idr(opp, opp->irq_tim0 + idx, val); + break; + } + + return 0; +} + +static int openpic_tmr_read(void *opaque, gpa_t addr, u32 *ptr) +{ + struct openpic *opp = opaque; + uint32_t retval = -1; + int idx; + + pr_debug("%s: addr %#llx\n", __func__, addr); + if (addr & 0xF) + goto out; + + idx = (addr >> 6) & 0x3; + if (addr == 0x0) { + /* TFRR */ + retval = opp->tfrr; + goto out; + } + + switch (addr & 0x30) { + case 0x00: /* TCCR */ + retval = opp->timers[idx].tccr; + break; + case 0x10: /* TBCR */ + retval = opp->timers[idx].tbcr; + break; + case 0x20: /* TIPV */ + retval = read_IRQreg_ivpr(opp, opp->irq_tim0 + idx); + break; + case 0x30: /* TIDE (TIDR) */ + retval = read_IRQreg_idr(opp, opp->irq_tim0 + idx); + break; + } + +out: + pr_debug("%s: => 0x%08x\n", __func__, retval); + *ptr = retval; + return 0; +} + +static int openpic_src_write(void *opaque, gpa_t addr, u32 val) +{ + struct openpic *opp = opaque; + int idx; + + pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val); + + addr = addr & 0xffff; + idx = addr >> 5; + + switch (addr & 0x1f) { + case 0x00: + write_IRQreg_ivpr(opp, idx, val); + break; + case 0x10: + write_IRQreg_idr(opp, idx, val); + break; + case 0x18: + write_IRQreg_ilr(opp, idx, val); + break; + } + + return 0; +} + +static int openpic_src_read(void *opaque, gpa_t addr, u32 *ptr) +{ + struct openpic *opp = opaque; + uint32_t retval; + int idx; + + pr_debug("%s: addr %#llx\n", __func__, addr); + retval = 0xFFFFFFFF; + + addr = addr & 0xffff; + idx = addr >> 5; + + switch (addr & 0x1f) { + case 0x00: + retval = read_IRQreg_ivpr(opp, idx); + break; + case 0x10: + retval = read_IRQreg_idr(opp, idx); + break; + case 0x18: + retval = read_IRQreg_ilr(opp, idx); + break; + } + + pr_debug("%s: => 0x%08x\n", __func__, retval); + *ptr = retval; + return 0; +} + +static int openpic_msi_write(void *opaque, gpa_t addr, u32 val) +{ + struct openpic *opp = opaque; + int idx = opp->irq_msi; + int srs, ibs; + + pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val); + if (addr & 0xF) + return 0; + + switch (addr) { + case MSIIR_OFFSET: + srs = val >> MSIIR_SRS_SHIFT; + idx += srs; + ibs = (val & MSIIR_IBS_MASK) >> MSIIR_IBS_SHIFT; + opp->msi[srs].msir |= 1 << ibs; + openpic_set_irq(opp, idx, 1); + break; + default: + /* most registers are read-only, thus ignored */ + break; + } + + return 0; +} + +static int openpic_msi_read(void *opaque, gpa_t addr, u32 *ptr) +{ + struct openpic *opp = opaque; + uint32_t r = 0; + int i, srs; + + pr_debug("%s: addr %#llx\n", __func__, addr); + if (addr & 0xF) + return -ENXIO; + + srs = addr >> 4; + + switch (addr) { + case 0x00: + case 0x10: + case 0x20: + case 0x30: + case 0x40: + case 0x50: + case 0x60: + case 0x70: /* MSIRs */ + r = opp->msi[srs].msir; + /* Clear on read */ + opp->msi[srs].msir = 0; + openpic_set_irq(opp, opp->irq_msi + srs, 0); + break; + case 0x120: /* MSISR */ + for (i = 0; i < MAX_MSI; i++) + r |= (opp->msi[i].msir ? 1 : 0) << i; + break; + } + + pr_debug("%s: => 0x%08x\n", __func__, r); + *ptr = r; + return 0; +} + +static int openpic_summary_read(void *opaque, gpa_t addr, u32 *ptr) +{ + uint32_t r = 0; + + pr_debug("%s: addr %#llx\n", __func__, addr); + + /* TODO: EISR/EIMR */ + + *ptr = r; + return 0; +} + +static int openpic_summary_write(void *opaque, gpa_t addr, u32 val) +{ + pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val); + + /* TODO: EISR/EIMR */ + return 0; +} + +static int openpic_cpu_write_internal(void *opaque, gpa_t addr, + u32 val, int idx) +{ + struct openpic *opp = opaque; + struct irq_source *src; + struct irq_dest *dst; + int s_IRQ, n_IRQ; + + pr_debug("%s: cpu %d addr %#llx <= 0x%08x\n", __func__, idx, + addr, val); + + if (idx < 0) + return 0; + + if (addr & 0xF) + return 0; + + dst = &opp->dst[idx]; + addr &= 0xFF0; + switch (addr) { + case 0x40: /* IPIDR */ + case 0x50: + case 0x60: + case 0x70: + idx = (addr - 0x40) >> 4; + /* we use IDE as mask which CPUs to deliver the IPI to still. */ + opp->src[opp->irq_ipi0 + idx].destmask |= val; + openpic_set_irq(opp, opp->irq_ipi0 + idx, 1); + openpic_set_irq(opp, opp->irq_ipi0 + idx, 0); + break; + case 0x80: /* CTPR */ + dst->ctpr = val & 0x0000000F; + + pr_debug("%s: set CPU %d ctpr to %d, raised %d servicing %d\n", + __func__, idx, dst->ctpr, dst->raised.priority, + dst->servicing.priority); + + if (dst->raised.priority <= dst->ctpr) { + pr_debug("%s: Lower OpenPIC INT output cpu %d due to ctpr\n", + __func__, idx); + mpic_irq_lower(opp, dst, ILR_INTTGT_INT); + } else if (dst->raised.priority > dst->servicing.priority) { + pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d\n", + __func__, idx, dst->raised.next); + mpic_irq_raise(opp, dst, ILR_INTTGT_INT); + } + + break; + case 0x90: /* WHOAMI */ + /* Read-only register */ + break; + case 0xA0: /* IACK */ + /* Read-only register */ + break; + case 0xB0: { /* EOI */ + int notify_eoi; + + pr_debug("EOI\n"); + s_IRQ = IRQ_get_next(opp, &dst->servicing); + + if (s_IRQ < 0) { + pr_debug("%s: EOI with no interrupt in service\n", + __func__); + break; + } + + IRQ_resetbit(&dst->servicing, s_IRQ); + /* Notify listeners that the IRQ is over */ + notify_eoi = s_IRQ; + /* Set up next servicing IRQ */ + s_IRQ = IRQ_get_next(opp, &dst->servicing); + /* Check queued interrupts. */ + n_IRQ = IRQ_get_next(opp, &dst->raised); + src = &opp->src[n_IRQ]; + if (n_IRQ != -1 && + (s_IRQ == -1 || + IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) { + pr_debug("Raise OpenPIC INT output cpu %d irq %d\n", + idx, n_IRQ); + mpic_irq_raise(opp, dst, ILR_INTTGT_INT); + } + + spin_unlock(&opp->lock); + kvm_notify_acked_irq(opp->kvm, 0, notify_eoi); + spin_lock(&opp->lock); + + break; + } + default: + break; + } + + return 0; +} + +static int openpic_cpu_write(void *opaque, gpa_t addr, u32 val) +{ + struct openpic *opp = opaque; + + return openpic_cpu_write_internal(opp, addr, val, + (addr & 0x1f000) >> 12); +} + +static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst, + int cpu) +{ + struct irq_source *src; + int retval, irq; + + pr_debug("Lower OpenPIC INT output\n"); + mpic_irq_lower(opp, dst, ILR_INTTGT_INT); + + irq = IRQ_get_next(opp, &dst->raised); + pr_debug("IACK: irq=%d\n", irq); + + if (irq == -1) + /* No more interrupt pending */ + return opp->spve; + + src = &opp->src[irq]; + if (!(src->ivpr & IVPR_ACTIVITY_MASK) || + !(IVPR_PRIORITY(src->ivpr) > dst->ctpr)) { + pr_err("%s: bad raised IRQ %d ctpr %d ivpr 0x%08x\n", + __func__, irq, dst->ctpr, src->ivpr); + openpic_update_irq(opp, irq); + retval = opp->spve; + } else { + /* IRQ enter servicing state */ + IRQ_setbit(&dst->servicing, irq); + retval = IVPR_VECTOR(opp, src->ivpr); + } + + if (!src->level) { + /* edge-sensitive IRQ */ + src->ivpr &= ~IVPR_ACTIVITY_MASK; + src->pending = 0; + IRQ_resetbit(&dst->raised, irq); + } + + if ((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + MAX_IPI))) { + src->destmask &= ~(1 << cpu); + if (src->destmask && !src->level) { + /* trigger on CPUs that didn't know about it yet */ + openpic_set_irq(opp, irq, 1); + openpic_set_irq(opp, irq, 0); + /* if all CPUs knew about it, set active bit again */ + src->ivpr |= IVPR_ACTIVITY_MASK; + } + } + + return retval; +} + +void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu) +{ + struct openpic *opp = vcpu->arch.mpic; + int cpu = vcpu->arch.irq_cpu_id; + unsigned long flags; + + spin_lock_irqsave(&opp->lock, flags); + + if ((opp->gcr & opp->mpic_mode_mask) == GCR_MODE_PROXY) + kvmppc_set_epr(vcpu, openpic_iack(opp, &opp->dst[cpu], cpu)); + + spin_unlock_irqrestore(&opp->lock, flags); +} + +static int openpic_cpu_read_internal(void *opaque, gpa_t addr, + u32 *ptr, int idx) +{ + struct openpic *opp = opaque; + struct irq_dest *dst; + uint32_t retval; + + pr_debug("%s: cpu %d addr %#llx\n", __func__, idx, addr); + retval = 0xFFFFFFFF; + + if (idx < 0) + goto out; + + if (addr & 0xF) + goto out; + + dst = &opp->dst[idx]; + addr &= 0xFF0; + switch (addr) { + case 0x80: /* CTPR */ + retval = dst->ctpr; + break; + case 0x90: /* WHOAMI */ + retval = idx; + break; + case 0xA0: /* IACK */ + retval = openpic_iack(opp, dst, idx); + break; + case 0xB0: /* EOI */ + retval = 0; + break; + default: + break; + } + pr_debug("%s: => 0x%08x\n", __func__, retval); + +out: + *ptr = retval; + return 0; +} + +static int openpic_cpu_read(void *opaque, gpa_t addr, u32 *ptr) +{ + struct openpic *opp = opaque; + + return openpic_cpu_read_internal(opp, addr, ptr, + (addr & 0x1f000) >> 12); +} + +struct mem_reg { + int (*read)(void *opaque, gpa_t addr, u32 *ptr); + int (*write)(void *opaque, gpa_t addr, u32 val); + gpa_t start_addr; + int size; +}; + +static const struct mem_reg openpic_gbl_mmio = { + .write = openpic_gbl_write, + .read = openpic_gbl_read, + .start_addr = OPENPIC_GLB_REG_START, + .size = OPENPIC_GLB_REG_SIZE, +}; + +static const struct mem_reg openpic_tmr_mmio = { + .write = openpic_tmr_write, + .read = openpic_tmr_read, + .start_addr = OPENPIC_TMR_REG_START, + .size = OPENPIC_TMR_REG_SIZE, +}; + +static const struct mem_reg openpic_cpu_mmio = { + .write = openpic_cpu_write, + .read = openpic_cpu_read, + .start_addr = OPENPIC_CPU_REG_START, + .size = OPENPIC_CPU_REG_SIZE, +}; + +static const struct mem_reg openpic_src_mmio = { + .write = openpic_src_write, + .read = openpic_src_read, + .start_addr = OPENPIC_SRC_REG_START, + .size = OPENPIC_SRC_REG_SIZE, +}; + +static const struct mem_reg openpic_msi_mmio = { + .read = openpic_msi_read, + .write = openpic_msi_write, + .start_addr = OPENPIC_MSI_REG_START, + .size = OPENPIC_MSI_REG_SIZE, +}; + +static const struct mem_reg openpic_summary_mmio = { + .read = openpic_summary_read, + .write = openpic_summary_write, + .start_addr = OPENPIC_SUMMARY_REG_START, + .size = OPENPIC_SUMMARY_REG_SIZE, +}; + +static void add_mmio_region(struct openpic *opp, const struct mem_reg *mr) +{ + if (opp->num_mmio_regions >= MAX_MMIO_REGIONS) { + WARN(1, "kvm mpic: too many mmio regions\n"); + return; + } + + opp->mmio_regions[opp->num_mmio_regions++] = mr; +} + +static void fsl_common_init(struct openpic *opp) +{ + int i; + int virq = MAX_SRC; + + add_mmio_region(opp, &openpic_msi_mmio); + add_mmio_region(opp, &openpic_summary_mmio); + + opp->vid = VID_REVISION_1_2; + opp->vir = VIR_GENERIC; + opp->vector_mask = 0xFFFF; + opp->tfrr_reset = 0; + opp->ivpr_reset = IVPR_MASK_MASK; + opp->idr_reset = 1 << 0; + opp->max_irq = MAX_IRQ; + + opp->irq_ipi0 = virq; + virq += MAX_IPI; + opp->irq_tim0 = virq; + virq += MAX_TMR; + + BUG_ON(virq > MAX_IRQ); + + opp->irq_msi = 224; + + for (i = 0; i < opp->fsl->max_ext; i++) + opp->src[i].level = false; + + /* Internal interrupts, including message and MSI */ + for (i = 16; i < MAX_SRC; i++) { + opp->src[i].type = IRQ_TYPE_FSLINT; + opp->src[i].level = true; + } + + /* timers and IPIs */ + for (i = MAX_SRC; i < virq; i++) { + opp->src[i].type = IRQ_TYPE_FSLSPECIAL; + opp->src[i].level = false; + } +} + +static int kvm_mpic_read_internal(struct openpic *opp, gpa_t addr, u32 *ptr) +{ + int i; + + for (i = 0; i < opp->num_mmio_regions; i++) { + const struct mem_reg *mr = opp->mmio_regions[i]; + + if (mr->start_addr > addr || addr >= mr->start_addr + mr->size) + continue; + + return mr->read(opp, addr - mr->start_addr, ptr); + } + + return -ENXIO; +} + +static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val) +{ + int i; + + for (i = 0; i < opp->num_mmio_regions; i++) { + const struct mem_reg *mr = opp->mmio_regions[i]; + + if (mr->start_addr > addr || addr >= mr->start_addr + mr->size) + continue; + + return mr->write(opp, addr - mr->start_addr, val); + } + + return -ENXIO; +} + +static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr, + int len, void *ptr) +{ + struct openpic *opp = container_of(this, struct openpic, mmio); + int ret; + union { + u32 val; + u8 bytes[4]; + } u; + + if (addr & (len - 1)) { + pr_debug("%s: bad alignment %llx/%d\n", + __func__, addr, len); + return -EINVAL; + } + + spin_lock_irq(&opp->lock); + ret = kvm_mpic_read_internal(opp, addr - opp->reg_base, &u.val); + spin_unlock_irq(&opp->lock); + + /* + * Technically only 32-bit accesses are allowed, but be nice to + * people dumping registers a byte at a time -- it works in real + * hardware (reads only, not writes). + */ + if (len == 4) { + *(u32 *)ptr = u.val; + pr_debug("%s: addr %llx ret %d len 4 val %x\n", + __func__, addr, ret, u.val); + } else if (len == 1) { + *(u8 *)ptr = u.bytes[addr & 3]; + pr_debug("%s: addr %llx ret %d len 1 val %x\n", + __func__, addr, ret, u.bytes[addr & 3]); + } else { + pr_debug("%s: bad length %d\n", __func__, len); + return -EINVAL; + } + + return ret; +} + +static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr, + int len, const void *ptr) +{ + struct openpic *opp = container_of(this, struct openpic, mmio); + int ret; + + if (len != 4) { + pr_debug("%s: bad length %d\n", __func__, len); + return -EOPNOTSUPP; + } + if (addr & 3) { + pr_debug("%s: bad alignment %llx/%d\n", __func__, addr, len); + return -EOPNOTSUPP; + } + + spin_lock_irq(&opp->lock); + ret = kvm_mpic_write_internal(opp, addr - opp->reg_base, + *(const u32 *)ptr); + spin_unlock_irq(&opp->lock); + + pr_debug("%s: addr %llx ret %d val %x\n", + __func__, addr, ret, *(const u32 *)ptr); + + return ret; +} + +static const struct kvm_io_device_ops mpic_mmio_ops = { + .read = kvm_mpic_read, + .write = kvm_mpic_write, +}; + +static void map_mmio(struct openpic *opp) +{ + kvm_iodevice_init(&opp->mmio, &mpic_mmio_ops); + + kvm_io_bus_register_dev(opp->kvm, KVM_MMIO_BUS, + opp->reg_base, OPENPIC_REG_SIZE, + &opp->mmio); +} + +static void unmap_mmio(struct openpic *opp) +{ + kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio); +} + +static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr) +{ + u64 base; + + if (copy_from_user(&base, (u64 __user *)(long)attr->addr, sizeof(u64))) + return -EFAULT; + + if (base & 0x3ffff) { + pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx not aligned\n", + __func__, base); + return -EINVAL; + } + + if (base == opp->reg_base) + return 0; + + mutex_lock(&opp->kvm->slots_lock); + + unmap_mmio(opp); + opp->reg_base = base; + + pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx\n", + __func__, base); + + if (base == 0) + goto out; + + map_mmio(opp); + +out: + mutex_unlock(&opp->kvm->slots_lock); + return 0; +} + +#define ATTR_SET 0 +#define ATTR_GET 1 + +static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type) +{ + int ret; + + if (addr & 3) + return -ENXIO; + + spin_lock_irq(&opp->lock); + + if (type == ATTR_SET) + ret = kvm_mpic_write_internal(opp, addr, *val); + else + ret = kvm_mpic_read_internal(opp, addr, val); + + spin_unlock_irq(&opp->lock); + + pr_debug("%s: type %d addr %llx val %x\n", __func__, type, addr, *val); + + return ret; +} + +static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + struct openpic *opp = dev->private; + u32 attr32; + + switch (attr->group) { + case KVM_DEV_MPIC_GRP_MISC: + switch (attr->attr) { + case KVM_DEV_MPIC_BASE_ADDR: + return set_base_addr(opp, attr); + } + + break; + + case KVM_DEV_MPIC_GRP_REGISTER: + if (get_user(attr32, (u32 __user *)(long)attr->addr)) + return -EFAULT; + + return access_reg(opp, attr->attr, &attr32, ATTR_SET); + + case KVM_DEV_MPIC_GRP_IRQ_ACTIVE: + if (attr->attr > MAX_SRC) + return -EINVAL; + + if (get_user(attr32, (u32 __user *)(long)attr->addr)) + return -EFAULT; + + if (attr32 != 0 && attr32 != 1) + return -EINVAL; + + spin_lock_irq(&opp->lock); + openpic_set_irq(opp, attr->attr, attr32); + spin_unlock_irq(&opp->lock); + return 0; + } + + return -ENXIO; +} + +static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + struct openpic *opp = dev->private; + u64 attr64; + u32 attr32; + int ret; + + switch (attr->group) { + case KVM_DEV_MPIC_GRP_MISC: + switch (attr->attr) { + case KVM_DEV_MPIC_BASE_ADDR: + mutex_lock(&opp->kvm->slots_lock); + attr64 = opp->reg_base; + mutex_unlock(&opp->kvm->slots_lock); + + if (copy_to_user((u64 __user *)(long)attr->addr, + &attr64, sizeof(u64))) + return -EFAULT; + + return 0; + } + + break; + + case KVM_DEV_MPIC_GRP_REGISTER: + ret = access_reg(opp, attr->attr, &attr32, ATTR_GET); + if (ret) + return ret; + + if (put_user(attr32, (u32 __user *)(long)attr->addr)) + return -EFAULT; + + return 0; + + case KVM_DEV_MPIC_GRP_IRQ_ACTIVE: + if (attr->attr > MAX_SRC) + return -EINVAL; + + spin_lock_irq(&opp->lock); + attr32 = opp->src[attr->attr].pending; + spin_unlock_irq(&opp->lock); + + if (put_user(attr32, (u32 __user *)(long)attr->addr)) + return -EFAULT; + + return 0; + } + + return -ENXIO; +} + +static int mpic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_MPIC_GRP_MISC: + switch (attr->attr) { + case KVM_DEV_MPIC_BASE_ADDR: + return 0; + } + + break; + + case KVM_DEV_MPIC_GRP_REGISTER: + return 0; + + case KVM_DEV_MPIC_GRP_IRQ_ACTIVE: + if (attr->attr > MAX_SRC) + break; + + return 0; + } + + return -ENXIO; +} + +static void mpic_destroy(struct kvm_device *dev) +{ + struct openpic *opp = dev->private; + + dev->kvm->arch.mpic = NULL; + kfree(opp); +} + +static int mpic_set_default_irq_routing(struct openpic *opp) +{ + struct kvm_irq_routing_entry *routing; + + /* Create a nop default map, so that dereferencing it still works */ + routing = kzalloc((sizeof(*routing)), GFP_KERNEL); + if (!routing) + return -ENOMEM; + + kvm_set_irq_routing(opp->kvm, routing, 0, 0); + + kfree(routing); + return 0; +} + +static int mpic_create(struct kvm_device *dev, u32 type) +{ + struct openpic *opp; + int ret; + + /* We only support one MPIC at a time for now */ + if (dev->kvm->arch.mpic) + return -EINVAL; + + opp = kzalloc(sizeof(struct openpic), GFP_KERNEL); + if (!opp) + return -ENOMEM; + + dev->private = opp; + opp->kvm = dev->kvm; + opp->dev = dev; + opp->model = type; + spin_lock_init(&opp->lock); + + add_mmio_region(opp, &openpic_gbl_mmio); + add_mmio_region(opp, &openpic_tmr_mmio); + add_mmio_region(opp, &openpic_src_mmio); + add_mmio_region(opp, &openpic_cpu_mmio); + + switch (opp->model) { + case KVM_DEV_TYPE_FSL_MPIC_20: + opp->fsl = &fsl_mpic_20; + opp->brr1 = 0x00400200; + opp->flags |= OPENPIC_FLAG_IDR_CRIT; + opp->nb_irqs = 80; + opp->mpic_mode_mask = GCR_MODE_MIXED; + + fsl_common_init(opp); + + break; + + case KVM_DEV_TYPE_FSL_MPIC_42: + opp->fsl = &fsl_mpic_42; + opp->brr1 = 0x00400402; + opp->flags |= OPENPIC_FLAG_ILR; + opp->nb_irqs = 196; + opp->mpic_mode_mask = GCR_MODE_PROXY; + + fsl_common_init(opp); + + break; + + default: + ret = -ENODEV; + goto err; + } + + ret = mpic_set_default_irq_routing(opp); + if (ret) + goto err; + + openpic_reset(opp); + + smp_wmb(); + dev->kvm->arch.mpic = opp; + + return 0; + +err: + kfree(opp); + return ret; +} + +struct kvm_device_ops kvm_mpic_ops = { + .name = "kvm-mpic", + .create = mpic_create, + .destroy = mpic_destroy, + .set_attr = mpic_set_attr, + .get_attr = mpic_get_attr, + .has_attr = mpic_has_attr, +}; + +int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, + u32 cpu) +{ + struct openpic *opp = dev->private; + int ret = 0; + + if (dev->ops != &kvm_mpic_ops) + return -EPERM; + if (opp->kvm != vcpu->kvm) + return -EPERM; + if (cpu < 0 || cpu >= MAX_CPU) + return -EPERM; + + spin_lock_irq(&opp->lock); + + if (opp->dst[cpu].vcpu) { + ret = -EEXIST; + goto out; + } + if (vcpu->arch.irq_type) { + ret = -EBUSY; + goto out; + } + + opp->dst[cpu].vcpu = vcpu; + opp->nb_cpus = max(opp->nb_cpus, cpu + 1); + + vcpu->arch.mpic = opp; + vcpu->arch.irq_cpu_id = cpu; + vcpu->arch.irq_type = KVMPPC_IRQ_MPIC; + + /* This might need to be changed if GCR gets extended */ + if (opp->mpic_mode_mask == GCR_MODE_PROXY) + vcpu->arch.epr_flags |= KVMPPC_EPR_KERNEL; + +out: + spin_unlock_irq(&opp->lock); + return ret; +} + +/* + * This should only happen immediately before the mpic is destroyed, + * so we shouldn't need to worry about anything still trying to + * access the vcpu pointer. + */ +void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu) +{ + BUG_ON(!opp->dst[vcpu->arch.irq_cpu_id].vcpu); + + opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL; +} + +/* + * Return value: + * < 0 Interrupt was ignored (masked or not delivered for other reasons) + * = 0 Interrupt was coalesced (previous irq is still pending) + * > 0 Number of CPUs interrupt was delivered to + */ +static int mpic_set_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + u32 irq = e->irqchip.pin; + struct openpic *opp = kvm->arch.mpic; + unsigned long flags; + + spin_lock_irqsave(&opp->lock, flags); + openpic_set_irq(opp, irq, level); + spin_unlock_irqrestore(&opp->lock, flags); + + /* All code paths we care about don't check for the return value */ + return 0; +} + +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, bool line_status) +{ + struct openpic *opp = kvm->arch.mpic; + unsigned long flags; + + spin_lock_irqsave(&opp->lock, flags); + + /* + * XXX We ignore the target address for now, as we only support + * a single MSI bank. + */ + openpic_msi_write(kvm->arch.mpic, MSIIR_OFFSET, e->msi.data); + spin_unlock_irqrestore(&opp->lock, flags); + + /* All code paths we care about don't check for the return value */ + return 0; +} + +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + e->set = mpic_set_irq; + e->irqchip.irqchip = ue->u.irqchip.irqchip; + e->irqchip.pin = ue->u.irqchip.pin; + if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) + goto out; + rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; + break; + case KVM_IRQ_ROUTING_MSI: + e->set = kvm_set_msi; + e->msi.address_lo = ue->u.msi.address_lo; + e->msi.address_hi = ue->u.msi.address_hi; + e->msi.data = ue->u.msi.data; + break; + default: + goto out; + } + + r = 0; +out: + return r; +} diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 934413cd3a1..6316ee336e8 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -25,6 +25,7 @@ #include <linux/hrtimer.h> #include <linux/fs.h> #include <linux/slab.h> +#include <linux/file.h> #include <asm/cputable.h> #include <asm/uaccess.h> #include <asm/kvm_ppc.h> @@ -32,6 +33,7 @@ #include <asm/cputhreads.h> #include <asm/irqflags.h> #include "timing.h" +#include "irq.h" #include "../mm/mmu_decl.h" #define CREATE_TRACE_POINTS @@ -317,6 +319,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_ENABLE_CAP: case KVM_CAP_ONE_REG: case KVM_CAP_IOEVENTFD: + case KVM_CAP_DEVICE_CTRL: r = 1; break; #ifndef CONFIG_KVM_BOOK3S_64_HV @@ -326,6 +329,9 @@ int kvm_dev_ioctl_check_extension(long ext) #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) case KVM_CAP_SW_TLB: #endif +#ifdef CONFIG_KVM_MPIC + case KVM_CAP_IRQ_MPIC: +#endif r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -335,6 +341,10 @@ int kvm_dev_ioctl_check_extension(long ext) #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: case KVM_CAP_PPC_ALLOC_HTAB: + case KVM_CAP_PPC_RTAS: +#ifdef CONFIG_KVM_XICS + case KVM_CAP_IRQ_XICS: +#endif r = 1; break; #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -411,18 +421,17 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) } int kvm_arch_prepare_memory_region(struct kvm *kvm, - struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_memory_slot *memslot, + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) { return kvmppc_core_prepare_memory_region(kvm, memslot, mem); } void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { kvmppc_core_commit_memory_region(kvm, mem, old); } @@ -460,6 +469,16 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) tasklet_kill(&vcpu->arch.tasklet); kvmppc_remove_vcpu_debugfs(vcpu); + + switch (vcpu->arch.irq_type) { + case KVMPPC_IRQ_MPIC: + kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu); + break; + case KVMPPC_IRQ_XICS: + kvmppc_xics_free_icp(vcpu); + break; + } + kvmppc_core_vcpu_free(vcpu); } @@ -532,12 +551,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) #endif } -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *dbg) -{ - return -EINVAL; -} - static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { @@ -612,6 +625,8 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_bigendian) { + int idx, ret; + if (bytes > sizeof(run->mmio.data)) { printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, run->mmio.len); @@ -627,8 +642,14 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->mmio_is_write = 0; vcpu->arch.mmio_sign_extend = 0; - if (!kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, - bytes, &run->mmio.data)) { + idx = srcu_read_lock(&vcpu->kvm->srcu); + + ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, + bytes, &run->mmio.data); + + srcu_read_unlock(&vcpu->kvm->srcu, idx); + + if (!ret) { kvmppc_complete_mmio_load(vcpu, run); vcpu->mmio_needed = 0; return EMULATE_DONE; @@ -653,6 +674,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, u64 val, unsigned int bytes, int is_bigendian) { void *data = run->mmio.data; + int idx, ret; if (bytes > sizeof(run->mmio.data)) { printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, @@ -682,9 +704,14 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, } } - if (!kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, - bytes, &run->mmio.data)) { - kvmppc_complete_mmio_load(vcpu, run); + idx = srcu_read_lock(&vcpu->kvm->srcu); + + ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, + bytes, &run->mmio.data); + + srcu_read_unlock(&vcpu->kvm->srcu, idx); + + if (!ret) { vcpu->mmio_needed = 0; return EMULATE_DONE; } @@ -740,7 +767,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { if (irq->irq == KVM_INTERRUPT_UNSET) { - kvmppc_core_dequeue_external(vcpu, irq); + kvmppc_core_dequeue_external(vcpu); return 0; } @@ -770,7 +797,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, break; case KVM_CAP_PPC_EPR: r = 0; - vcpu->arch.epr_enabled = cap->args[0]; + if (cap->args[0]) + vcpu->arch.epr_flags |= KVMPPC_EPR_USER; + else + vcpu->arch.epr_flags &= ~KVMPPC_EPR_USER; break; #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_WATCHDOG: @@ -791,6 +821,44 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, break; } #endif +#ifdef CONFIG_KVM_MPIC + case KVM_CAP_IRQ_MPIC: { + struct file *filp; + struct kvm_device *dev; + + r = -EBADF; + filp = fget(cap->args[0]); + if (!filp) + break; + + r = -EPERM; + dev = kvm_device_from_filp(filp); + if (dev) + r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]); + + fput(filp); + break; + } +#endif +#ifdef CONFIG_KVM_XICS + case KVM_CAP_IRQ_XICS: { + struct file *filp; + struct kvm_device *dev; + + r = -EBADF; + filp = fget(cap->args[0]); + if (!filp) + break; + + r = -EPERM; + dev = kvm_device_from_filp(filp); + if (dev) + r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]); + + fput(filp); + break; + } +#endif /* CONFIG_KVM_XICS */ default: r = -EINVAL; break; @@ -913,9 +981,22 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) return 0; } +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, + bool line_status) +{ + if (!irqchip_in_kernel(kvm)) + return -ENXIO; + + irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + irq_event->irq, irq_event->level, + line_status); + return 0; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { + struct kvm *kvm __maybe_unused = filp->private_data; void __user *argp = (void __user *)arg; long r; @@ -934,7 +1015,6 @@ long kvm_arch_vm_ioctl(struct file *filp, #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CREATE_SPAPR_TCE: { struct kvm_create_spapr_tce create_tce; - struct kvm *kvm = filp->private_data; r = -EFAULT; if (copy_from_user(&create_tce, argp, sizeof(create_tce))) @@ -946,8 +1026,8 @@ long kvm_arch_vm_ioctl(struct file *filp, #ifdef CONFIG_KVM_BOOK3S_64_HV case KVM_ALLOCATE_RMA: { - struct kvm *kvm = filp->private_data; struct kvm_allocate_rma rma; + struct kvm *kvm = filp->private_data; r = kvm_vm_ioctl_allocate_rma(kvm, &rma); if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) @@ -956,7 +1036,6 @@ long kvm_arch_vm_ioctl(struct file *filp, } case KVM_PPC_ALLOCATE_HTAB: { - struct kvm *kvm = filp->private_data; u32 htab_order; r = -EFAULT; @@ -973,7 +1052,6 @@ long kvm_arch_vm_ioctl(struct file *filp, } case KVM_PPC_GET_HTAB_FD: { - struct kvm *kvm = filp->private_data; struct kvm_get_htab_fd ghf; r = -EFAULT; @@ -986,7 +1064,6 @@ long kvm_arch_vm_ioctl(struct file *filp, #ifdef CONFIG_PPC_BOOK3S_64 case KVM_PPC_GET_SMMU_INFO: { - struct kvm *kvm = filp->private_data; struct kvm_ppc_smmu_info info; memset(&info, 0, sizeof(info)); @@ -995,6 +1072,12 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; break; } + case KVM_PPC_RTAS_DEFINE_TOKEN: { + struct kvm *kvm = filp->private_data; + + r = kvm_vm_ioctl_rtas_define_token(kvm, argp); + break; + } #endif /* CONFIG_PPC_BOOK3S_64 */ default: r = -ENOTTY; diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index 89db29d17c2..7cd728b3b5e 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -51,6 +51,12 @@ static struct icp_ipl __iomem *icp_native_regs[NR_CPUS]; static inline unsigned int icp_native_get_xirr(void) { int cpu = smp_processor_id(); + unsigned int xirr; + + /* Handled an interrupt latched by KVM */ + xirr = kvmppc_get_xics_latch(); + if (xirr) + return xirr; return in_be32(&icp_native_regs[cpu]->xirr.word); } @@ -138,6 +144,7 @@ static unsigned int icp_native_get_irq(void) static void icp_native_cause_ipi(int cpu, unsigned long data) { + kvmppc_set_host_ipi(cpu, 1); icp_native_set_qirr(cpu, IPI_PRIORITY); } @@ -151,6 +158,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id) { int cpu = smp_processor_id(); + kvmppc_set_host_ipi(cpu, 0); icp_native_set_qirr(cpu, 0xff); return smp_ipi_demux(); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index b4622915bd1..4105b8221fd 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -306,6 +306,7 @@ extern unsigned long MODULES_END; #define RCP_HC_BIT 0x00200000UL #define RCP_GR_BIT 0x00040000UL #define RCP_GC_BIT 0x00020000UL +#define RCP_IN_BIT 0x00008000UL /* IPTE notify bit */ /* User dirty / referenced bit for KVM's migration feature */ #define KVM_UR_BIT 0x00008000UL @@ -373,6 +374,7 @@ extern unsigned long MODULES_END; #define RCP_HC_BIT 0x0020000000000000UL #define RCP_GR_BIT 0x0004000000000000UL #define RCP_GC_BIT 0x0002000000000000UL +#define RCP_IN_BIT 0x0000800000000000UL /* IPTE notify bit */ /* User dirty / referenced bit for KVM's migration feature */ #define KVM_UR_BIT 0x0000800000000000UL @@ -746,30 +748,42 @@ struct gmap { /** * struct gmap_rmap - reverse mapping for segment table entries - * @next: pointer to the next gmap_rmap structure in the list + * @gmap: pointer to the gmap_struct * @entry: pointer to a segment table entry + * @vmaddr: virtual address in the guest address space */ struct gmap_rmap { struct list_head list; + struct gmap *gmap; unsigned long *entry; + unsigned long vmaddr; }; /** * struct gmap_pgtable - gmap information attached to a page table * @vmaddr: address of the 1MB segment in the process virtual memory - * @mapper: list of segment table entries maping a page table + * @mapper: list of segment table entries mapping a page table */ struct gmap_pgtable { unsigned long vmaddr; struct list_head mapper; }; +/** + * struct gmap_notifier - notify function block for page invalidation + * @notifier_call: address of callback function + */ +struct gmap_notifier { + struct list_head list; + void (*notifier_call)(struct gmap *gmap, unsigned long address); +}; + struct gmap *gmap_alloc(struct mm_struct *mm); void gmap_free(struct gmap *gmap); void gmap_enable(struct gmap *gmap); void gmap_disable(struct gmap *gmap); int gmap_map_segment(struct gmap *gmap, unsigned long from, - unsigned long to, unsigned long length); + unsigned long to, unsigned long len); int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); unsigned long __gmap_translate(unsigned long address, struct gmap *); unsigned long gmap_translate(unsigned long address, struct gmap *); @@ -777,6 +791,24 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *); unsigned long gmap_fault(unsigned long address, struct gmap *); void gmap_discard(unsigned long from, unsigned long to, struct gmap *); +void gmap_register_ipte_notifier(struct gmap_notifier *); +void gmap_unregister_ipte_notifier(struct gmap_notifier *); +int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); +void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *); + +static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + if (pgste_val(pgste) & RCP_IN_BIT) { + pgste_val(pgste) &= ~RCP_IN_BIT; + gmap_do_ipte_notify(mm, addr, ptep); + } +#endif + return pgste; +} + /* * Certain architectures need to do special things when PTEs * within a page table are directly modified. Thus, the following @@ -1032,8 +1064,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, pte_t pte; mm->context.flush_mm = 1; - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, address, ptep, pgste); + } pte = *ptep; if (!mm_exclusive(mm)) @@ -1052,11 +1086,14 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long address, pte_t *ptep) { + pgste_t pgste; pte_t pte; mm->context.flush_mm = 1; - if (mm_has_pgste(mm)) - pgste_get_lock(ptep); + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste_ipte_notify(mm, address, ptep, pgste); + } pte = *ptep; if (!mm_exclusive(mm)) @@ -1082,8 +1119,10 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, pgste_t pgste; pte_t pte; - if (mm_has_pgste(vma->vm_mm)) + if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); + } pte = *ptep; __ptep_ipte(address, ptep); @@ -1111,8 +1150,11 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, pgste_t pgste; pte_t pte; - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); + if (!full) + pgste = pgste_ipte_notify(mm, address, ptep, pgste); + } pte = *ptep; if (!full) @@ -1135,8 +1177,10 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, if (pte_write(pte)) { mm->context.flush_mm = 1; - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, address, ptep, pgste); + } if (!mm_exclusive(mm)) __ptep_ipte(address, ptep); @@ -1160,8 +1204,10 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, if (pte_same(*ptep, entry)) return 0; - if (mm_has_pgste(vma->vm_mm)) + if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); + } __ptep_ipte(address, ptep); diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index ff67d730c00..59880dbaf36 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -33,8 +33,6 @@ #define CHUNK_READ_WRITE 0 #define CHUNK_READ_ONLY 1 -#define CHUNK_OLDMEM 4 -#define CHUNK_CRASHK 5 struct mem_chunk { unsigned long addr; @@ -43,13 +41,12 @@ struct mem_chunk { }; extern struct mem_chunk memory_chunk[]; -extern unsigned long real_memory_size; extern int memory_end_set; extern unsigned long memory_end; -void detect_memory_layout(struct mem_chunk chunk[]); -void create_mem_hole(struct mem_chunk memory_chunk[], unsigned long addr, - unsigned long size, int type); +void detect_memory_layout(struct mem_chunk chunk[], unsigned long maxsize); +void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, + unsigned long size); #define PRIMARY_SPACE_MODE 0 #define ACCESS_REGISTER_MODE 1 diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 7bf68fff7c5..9ccd1905bda 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -44,5 +44,6 @@ header-y += termios.h header-y += types.h header-y += ucontext.h header-y += unistd.h +header-y += virtio-ccw.h header-y += vtoc.h header-y += zcrypt.h diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h new file mode 100644 index 00000000000..a9a4ebf79fa --- /dev/null +++ b/arch/s390/include/uapi/asm/virtio-ccw.h @@ -0,0 +1,21 @@ +/* + * Definitions for virtio-ccw devices. + * + * Copyright IBM Corp. 2013 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> + */ +#ifndef __KVM_VIRTIO_CCW_H +#define __KVM_VIRTIO_CCW_H + +/* Alignment of vring buffers. */ +#define KVM_VIRTIO_CCW_RING_ALIGN 4096 + +/* Subcode for diagnose 500 (virtio hypercall). */ +#define KVM_S390_VIRTIO_CCW_NOTIFY 3 + +#endif diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 1386fcaf4ef..4bb2a465616 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -30,7 +30,7 @@ CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o -obj-y += debug.o irq.o ipl.o dis.o diag.o mem_detect.o sclp.o vdso.o +obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += dumpstack.o diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index fb8d8781a01..f703d91bf72 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -88,8 +88,8 @@ static struct mem_chunk *get_memory_layout(void) struct mem_chunk *chunk_array; chunk_array = kzalloc_panic(MEMORY_CHUNKS * sizeof(struct mem_chunk)); - detect_memory_layout(chunk_array); - create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE, CHUNK_CRASHK); + detect_memory_layout(chunk_array, 0); + create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE); return chunk_array; } @@ -344,7 +344,7 @@ static int loads_init(Elf64_Phdr *phdr, u64 loads_offset) for (i = 0; i < MEMORY_CHUNKS; i++) { mem_chunk = &chunk_array[i]; if (mem_chunk->size == 0) - break; + continue; if (chunk_array[i].type != CHUNK_READ_WRITE && chunk_array[i].type != CHUNK_READ_ONLY) continue; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index bda011e2f8a..dc8770d7173 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -482,7 +482,6 @@ void __init startup_init(void) detect_machine_facilities(); setup_topology(); sclp_facilities_detect(); - detect_memory_layout(memory_chunk); #ifdef CONFIG_DYNAMIC_FTRACE S390_lowcore.ftrace_func = (unsigned long)ftrace_caller; #endif diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c deleted file mode 100644 index 22d502e885e..00000000000 --- a/arch/s390/kernel/mem_detect.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright IBM Corp. 2008, 2009 - * - * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <asm/ipl.h> -#include <asm/sclp.h> -#include <asm/setup.h> - -#define ADDR2G (1ULL << 31) - -static void find_memory_chunks(struct mem_chunk chunk[]) -{ - unsigned long long memsize, rnmax, rzm; - unsigned long addr = 0, size; - int i = 0, type; - - rzm = sclp_get_rzm(); - rnmax = sclp_get_rnmax(); - memsize = rzm * rnmax; - if (!rzm) - rzm = 1ULL << 17; - if (sizeof(long) == 4) { - rzm = min(ADDR2G, rzm); - memsize = memsize ? min(ADDR2G, memsize) : ADDR2G; - } - do { - size = 0; - type = tprot(addr); - do { - size += rzm; - if (memsize && addr + size >= memsize) - break; - } while (type == tprot(addr + size)); - if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) { - chunk[i].addr = addr; - chunk[i].size = size; - chunk[i].type = type; - i++; - } - addr += size; - } while (addr < memsize && i < MEMORY_CHUNKS); -} - -void detect_memory_layout(struct mem_chunk chunk[]) -{ - unsigned long flags, cr0; - - memset(chunk, 0, MEMORY_CHUNKS * sizeof(struct mem_chunk)); - /* Disable IRQs, DAT and low address protection so tprot does the - * right thing and we don't get scheduled away with low address - * protection disabled. - */ - flags = __arch_local_irq_stnsm(0xf8); - __ctl_store(cr0, 0, 0); - __ctl_clear_bit(0, 28); - find_memory_chunks(chunk); - __ctl_load(cr0, 0, 0); - arch_local_irq_restore(flags); -} -EXPORT_SYMBOL(detect_memory_layout); - -/* - * Move memory chunks array from index "from" to index "to" - */ -static void mem_chunk_move(struct mem_chunk chunk[], int to, int from) -{ - int cnt = MEMORY_CHUNKS - to; - - memmove(&chunk[to], &chunk[from], cnt * sizeof(struct mem_chunk)); -} - -/* - * Initialize memory chunk - */ -static void mem_chunk_init(struct mem_chunk *chunk, unsigned long addr, - unsigned long size, int type) -{ - chunk->type = type; - chunk->addr = addr; - chunk->size = size; -} - -/* - * Create memory hole with given address, size, and type - */ -void create_mem_hole(struct mem_chunk chunk[], unsigned long addr, - unsigned long size, int type) -{ - unsigned long lh_start, lh_end, lh_size, ch_start, ch_end, ch_size; - int i, ch_type; - - for (i = 0; i < MEMORY_CHUNKS; i++) { - if (chunk[i].size == 0) - continue; - - /* Define chunk properties */ - ch_start = chunk[i].addr; - ch_size = chunk[i].size; - ch_end = ch_start + ch_size - 1; - ch_type = chunk[i].type; - - /* Is memory chunk hit by memory hole? */ - if (addr + size <= ch_start) - continue; /* No: memory hole in front of chunk */ - if (addr > ch_end) - continue; /* No: memory hole after chunk */ - - /* Yes: Define local hole properties */ - lh_start = max(addr, chunk[i].addr); - lh_end = min(addr + size - 1, ch_end); - lh_size = lh_end - lh_start + 1; - - if (lh_start == ch_start && lh_end == ch_end) { - /* Hole covers complete memory chunk */ - mem_chunk_init(&chunk[i], lh_start, lh_size, type); - } else if (lh_end == ch_end) { - /* Hole starts in memory chunk and convers chunk end */ - mem_chunk_move(chunk, i + 1, i); - mem_chunk_init(&chunk[i], ch_start, ch_size - lh_size, - ch_type); - mem_chunk_init(&chunk[i + 1], lh_start, lh_size, type); - i += 1; - } else if (lh_start == ch_start) { - /* Hole ends in memory chunk */ - mem_chunk_move(chunk, i + 1, i); - mem_chunk_init(&chunk[i], lh_start, lh_size, type); - mem_chunk_init(&chunk[i + 1], lh_end + 1, - ch_size - lh_size, ch_type); - break; - } else { - /* Hole splits memory chunk */ - mem_chunk_move(chunk, i + 2, i); - mem_chunk_init(&chunk[i], ch_start, - lh_start - ch_start, ch_type); - mem_chunk_init(&chunk[i + 1], lh_start, lh_size, type); - mem_chunk_init(&chunk[i + 2], lh_end + 1, - ch_end - lh_end, ch_type); - break; - } - } -} diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 0f419c5765c..0a49095104c 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -226,25 +226,17 @@ static void __init conmode_default(void) } #ifdef CONFIG_ZFCPDUMP -static void __init setup_zfcpdump(unsigned int console_devno) +static void __init setup_zfcpdump(void) { - static char str[41]; - if (ipl_info.type != IPL_TYPE_FCP_DUMP) return; if (OLDMEM_BASE) return; - if (console_devno != -1) - sprintf(str, " cio_ignore=all,!0.0.%04x,!0.0.%04x", - ipl_info.data.fcp.dev_id.devno, console_devno); - else - sprintf(str, " cio_ignore=all,!0.0.%04x", - ipl_info.data.fcp.dev_id.devno); - strcat(boot_command_line, str); + strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev"); console_loglevel = 2; } #else -static inline void setup_zfcpdump(unsigned int console_devno) {} +static inline void setup_zfcpdump(void) {} #endif /* CONFIG_ZFCPDUMP */ /* @@ -471,14 +463,10 @@ static void __init setup_resources(void) for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; - if (memory_chunk[i].type == CHUNK_OLDMEM || - memory_chunk[i].type == CHUNK_CRASHK) - continue; res = alloc_bootmem_low(sizeof(*res)); res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; switch (memory_chunk[i].type) { case CHUNK_READ_WRITE: - case CHUNK_CRASHK: res->name = "System RAM"; break; case CHUNK_READ_ONLY: @@ -510,12 +498,10 @@ static void __init setup_resources(void) } } -unsigned long real_memory_size; -EXPORT_SYMBOL_GPL(real_memory_size); - static void __init setup_memory_end(void) { unsigned long vmax, vmalloc_size, tmp; + unsigned long real_memory_size = 0; int i; @@ -525,7 +511,6 @@ static void __init setup_memory_end(void) memory_end_set = 1; } #endif - real_memory_size = 0; memory_end &= PAGE_MASK; /* @@ -538,6 +523,8 @@ static void __init setup_memory_end(void) unsigned long align; chunk = &memory_chunk[i]; + if (!chunk->size) + continue; align = 1UL << (MAX_ORDER + PAGE_SHIFT - 1); start = (chunk->addr + align - 1) & ~(align - 1); end = (chunk->addr + chunk->size) & ~(align - 1); @@ -588,6 +575,8 @@ static void __init setup_memory_end(void) for (i = 0; i < MEMORY_CHUNKS; i++) { struct mem_chunk *chunk = &memory_chunk[i]; + if (!chunk->size) + continue; if (chunk->addr >= memory_end) { memset(chunk, 0, sizeof(*chunk)); continue; @@ -688,15 +677,6 @@ static int __init verify_crash_base(unsigned long crash_base, } /* - * Reserve kdump memory by creating a memory hole in the mem_chunk array - */ -static void __init reserve_kdump_bootmem(unsigned long addr, unsigned long size, - int type) -{ - create_mem_hole(memory_chunk, addr, size, type); -} - -/* * When kdump is enabled, we have to ensure that no memory from * the area [0 - crashkernel memory size] and * [crashk_res.start - crashk_res.end] is set offline. @@ -727,16 +707,22 @@ static struct notifier_block kdump_mem_nb = { static void reserve_oldmem(void) { #ifdef CONFIG_CRASH_DUMP + unsigned long real_size = 0; + int i; + if (!OLDMEM_BASE) return; + for (i = 0; i < MEMORY_CHUNKS; i++) { + struct mem_chunk *chunk = &memory_chunk[i]; - reserve_kdump_bootmem(OLDMEM_BASE, OLDMEM_SIZE, CHUNK_OLDMEM); - reserve_kdump_bootmem(OLDMEM_SIZE, memory_end - OLDMEM_SIZE, - CHUNK_OLDMEM); - if (OLDMEM_BASE + OLDMEM_SIZE == real_memory_size) + real_size = max(real_size, chunk->addr + chunk->size); + } + create_mem_hole(memory_chunk, OLDMEM_BASE, OLDMEM_SIZE); + create_mem_hole(memory_chunk, OLDMEM_SIZE, real_size - OLDMEM_SIZE); + if (OLDMEM_BASE + OLDMEM_SIZE == real_size) saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1; else - saved_max_pfn = PFN_DOWN(real_memory_size) - 1; + saved_max_pfn = PFN_DOWN(real_size) - 1; #endif } @@ -775,7 +761,7 @@ static void __init reserve_crashkernel(void) crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; insert_resource(&iomem_resource, &crashk_res); - reserve_kdump_bootmem(crash_base, crash_size, CHUNK_CRASHK); + create_mem_hole(memory_chunk, crash_base, crash_size); pr_info("Reserving %lluMB of memory at %lluMB " "for crashkernel (System RAM: %luMB)\n", crash_size >> 20, crash_base >> 20, memory_end >> 20); @@ -847,11 +833,10 @@ static void __init setup_memory(void) * Register RAM areas with the bootmem allocator. */ - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + for (i = 0; i < MEMORY_CHUNKS; i++) { unsigned long start_chunk, end_chunk, pfn; - if (memory_chunk[i].type != CHUNK_READ_WRITE && - memory_chunk[i].type != CHUNK_CRASHK) + if (!memory_chunk[i].size) continue; start_chunk = PFN_DOWN(memory_chunk[i].addr); end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size); @@ -1067,12 +1052,12 @@ void __init setup_arch(char **cmdline_p) memcpy(&uaccess, &uaccess_std, sizeof(uaccess)); parse_early_param(); - + detect_memory_layout(memory_chunk, memory_end); os_info_init(); setup_ipl(); + reserve_oldmem(); setup_memory_end(); setup_addressing_mode(); - reserve_oldmem(); reserve_crashkernel(); setup_memory(); setup_resources(); @@ -1097,5 +1082,5 @@ void __init setup_arch(char **cmdline_p) set_preferred_console(); /* Setup zfcpdump support */ - setup_zfcpdump(console_devno); + setup_zfcpdump(); } diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 60f9f8ae0fc..70b46eacf8e 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -22,6 +22,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT + select HAVE_KVM_EVENTFD ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 3975722bb19..8fe9d65a458 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License (version 2 only) # as published by the Free Software Foundation. -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o) +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o) ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index a390687feb1..1c01a991298 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -13,6 +13,7 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> +#include <asm/virtio-ccw.h> #include "kvm-s390.h" #include "trace.h" #include "trace-s390.h" @@ -104,6 +105,29 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) return -EREMOTE; } +static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) +{ + int ret, idx; + + /* No virtio-ccw notification? Get out quickly. */ + if (!vcpu->kvm->arch.css_support || + (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) + return -EOPNOTSUPP; + + idx = srcu_read_lock(&vcpu->kvm->srcu); + /* + * The layout is as follows: + * - gpr 2 contains the subchannel id (passed as addr) + * - gpr 3 contains the virtqueue index (passed as datamatch) + */ + ret = kvm_io_bus_write(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, + vcpu->run->s.regs.gprs[2], + 8, &vcpu->run->s.regs.gprs[3]); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + /* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */ + return ret < 0 ? ret : 0; +} + int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) { int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; @@ -118,6 +142,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) return __diag_time_slice_end_directed(vcpu); case 0x308: return __diag_ipl_functions(vcpu); + case 0x500: + return __diag_virtio_hypercall(vcpu); default: return -EOPNOTSUPP; } diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 4703f129e95..302e0e52b00 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -18,369 +18,86 @@ #include <asm/uaccess.h> #include "kvm-s390.h" -static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu, - unsigned long guestaddr) +static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu, + void __user *gptr, + int prefixing) { unsigned long prefix = vcpu->arch.sie_block->prefix; - - if (guestaddr < 2 * PAGE_SIZE) - guestaddr += prefix; - else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE)) - guestaddr -= prefix; - - return (void __user *) gmap_fault(guestaddr, vcpu->arch.gmap); -} - -static inline int get_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u64 *result) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 7); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return get_user(*result, (unsigned long __user *) uptr); -} - -static inline int get_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u32 *result) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 3); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return get_user(*result, (u32 __user *) uptr); -} - -static inline int get_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u16 *result) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 1); - - if (IS_ERR(uptr)) - return PTR_ERR(uptr); - - return get_user(*result, (u16 __user *) uptr); -} - -static inline int get_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u8 *result) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return get_user(*result, (u8 __user *) uptr); -} - -static inline int put_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u64 value) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 7); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return put_user(value, (u64 __user *) uptr); -} - -static inline int put_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u32 value) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 3); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return put_user(value, (u32 __user *) uptr); -} - -static inline int put_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u16 value) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 1); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return put_user(value, (u16 __user *) uptr); -} - -static inline int put_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u8 value) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return put_user(value, (u8 __user *) uptr); -} - - -static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, - unsigned long guestdest, - void *from, unsigned long n) -{ - int rc; - unsigned long i; - u8 *data = from; - - for (i = 0; i < n; i++) { - rc = put_guest_u8(vcpu, guestdest++, *(data++)); - if (rc < 0) - return rc; + unsigned long gaddr = (unsigned long) gptr; + unsigned long uaddr; + + if (prefixing) { + if (gaddr < 2 * PAGE_SIZE) + gaddr += prefix; + else if ((gaddr >= prefix) && (gaddr < prefix + 2 * PAGE_SIZE)) + gaddr -= prefix; } - return 0; -} - -static inline int __copy_to_guest_fast(struct kvm_vcpu *vcpu, - unsigned long guestdest, - void *from, unsigned long n) -{ - int r; + uaddr = gmap_fault(gaddr, vcpu->arch.gmap); + if (IS_ERR_VALUE(uaddr)) + uaddr = -EFAULT; + return (void __user *)uaddr; +} + +#define get_guest(vcpu, x, gptr) \ +({ \ + __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ + int __mask = sizeof(__typeof__(*(gptr))) - 1; \ + int __ret = PTR_RET((void __force *)__uptr); \ + \ + if (!__ret) { \ + BUG_ON((unsigned long)__uptr & __mask); \ + __ret = get_user(x, __uptr); \ + } \ + __ret; \ +}) + +#define put_guest(vcpu, x, gptr) \ +({ \ + __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ + int __mask = sizeof(__typeof__(*(gptr))) - 1; \ + int __ret = PTR_RET((void __force *)__uptr); \ + \ + if (!__ret) { \ + BUG_ON((unsigned long)__uptr & __mask); \ + __ret = put_user(x, __uptr); \ + } \ + __ret; \ +}) + +static inline int __copy_guest(struct kvm_vcpu *vcpu, unsigned long to, + unsigned long from, unsigned long len, + int to_guest, int prefixing) +{ + unsigned long _len, rc; void __user *uptr; - unsigned long size; - - if (guestdest + n < guestdest) - return -EFAULT; - - /* simple case: all within one segment table entry? */ - if ((guestdest & PMD_MASK) == ((guestdest+n) & PMD_MASK)) { - uptr = (void __user *) gmap_fault(guestdest, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_to_user(uptr, from, n); - - if (r) - r = -EFAULT; - - goto out; - } - - /* copy first segment */ - uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - size = PMD_SIZE - (guestdest & ~PMD_MASK); - - r = copy_to_user(uptr, from, size); - - if (r) { - r = -EFAULT; - goto out; - } - from += size; - n -= size; - guestdest += size; - - /* copy full segments */ - while (n >= PMD_SIZE) { - uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_to_user(uptr, from, PMD_SIZE); - - if (r) { - r = -EFAULT; - goto out; - } - from += PMD_SIZE; - n -= PMD_SIZE; - guestdest += PMD_SIZE; - } - - /* copy the tail segment */ - if (n) { - uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_to_user(uptr, from, n); - - if (r) - r = -EFAULT; - } -out: - return r; -} - -static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, - unsigned long guestdest, - void *from, unsigned long n) -{ - return __copy_to_guest_fast(vcpu, guestdest, from, n); -} - -static inline int copy_to_guest(struct kvm_vcpu *vcpu, unsigned long guestdest, - void *from, unsigned long n) -{ - unsigned long prefix = vcpu->arch.sie_block->prefix; - - if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE)) - goto slowpath; - - if ((guestdest < prefix) && (guestdest + n > prefix)) - goto slowpath; - - if ((guestdest < prefix + 2 * PAGE_SIZE) - && (guestdest + n > prefix + 2 * PAGE_SIZE)) - goto slowpath; - - if (guestdest < 2 * PAGE_SIZE) - guestdest += prefix; - else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE)) - guestdest -= prefix; - - return __copy_to_guest_fast(vcpu, guestdest, from, n); -slowpath: - return __copy_to_guest_slow(vcpu, guestdest, from, n); -} - -static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to, - unsigned long guestsrc, - unsigned long n) -{ - int rc; - unsigned long i; - u8 *data = to; - - for (i = 0; i < n; i++) { - rc = get_guest_u8(vcpu, guestsrc++, data++); - if (rc < 0) - return rc; + while (len) { + uptr = to_guest ? (void __user *)to : (void __user *)from; + uptr = __gptr_to_uptr(vcpu, uptr, prefixing); + if (IS_ERR((void __force *)uptr)) + return -EFAULT; + _len = PAGE_SIZE - ((unsigned long)uptr & (PAGE_SIZE - 1)); + _len = min(_len, len); + if (to_guest) + rc = copy_to_user((void __user *) uptr, (void *)from, _len); + else + rc = copy_from_user((void *)to, (void __user *)uptr, _len); + if (rc) + return -EFAULT; + len -= _len; + from += _len; + to += _len; } return 0; } -static inline int __copy_from_guest_fast(struct kvm_vcpu *vcpu, void *to, - unsigned long guestsrc, - unsigned long n) -{ - int r; - void __user *uptr; - unsigned long size; - - if (guestsrc + n < guestsrc) - return -EFAULT; - - /* simple case: all within one segment table entry? */ - if ((guestsrc & PMD_MASK) == ((guestsrc+n) & PMD_MASK)) { - uptr = (void __user *) gmap_fault(guestsrc, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_from_user(to, uptr, n); - - if (r) - r = -EFAULT; - - goto out; - } - - /* copy first segment */ - uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - size = PMD_SIZE - (guestsrc & ~PMD_MASK); - - r = copy_from_user(to, uptr, size); - - if (r) { - r = -EFAULT; - goto out; - } - to += size; - n -= size; - guestsrc += size; - - /* copy full segments */ - while (n >= PMD_SIZE) { - uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_from_user(to, uptr, PMD_SIZE); - - if (r) { - r = -EFAULT; - goto out; - } - to += PMD_SIZE; - n -= PMD_SIZE; - guestsrc += PMD_SIZE; - } - - /* copy the tail segment */ - if (n) { - uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_from_user(to, uptr, n); - - if (r) - r = -EFAULT; - } -out: - return r; -} - -static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to, - unsigned long guestsrc, - unsigned long n) -{ - return __copy_from_guest_fast(vcpu, to, guestsrc, n); -} - -static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to, - unsigned long guestsrc, unsigned long n) -{ - unsigned long prefix = vcpu->arch.sie_block->prefix; - - if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE)) - goto slowpath; +#define copy_to_guest(vcpu, to, from, size) \ + __copy_guest(vcpu, to, (unsigned long)from, size, 1, 1) +#define copy_from_guest(vcpu, to, from, size) \ + __copy_guest(vcpu, (unsigned long)to, from, size, 0, 1) +#define copy_to_guest_absolute(vcpu, to, from, size) \ + __copy_guest(vcpu, to, (unsigned long)from, size, 1, 0) +#define copy_from_guest_absolute(vcpu, to, from, size) \ + __copy_guest(vcpu, (unsigned long)to, from, size, 0, 0) - if ((guestsrc < prefix) && (guestsrc + n > prefix)) - goto slowpath; - - if ((guestsrc < prefix + 2 * PAGE_SIZE) - && (guestsrc + n > prefix + 2 * PAGE_SIZE)) - goto slowpath; - - if (guestsrc < 2 * PAGE_SIZE) - guestsrc += prefix; - else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE)) - guestsrc -= prefix; - - return __copy_from_guest_fast(vcpu, to, guestsrc, n); -slowpath: - return __copy_from_guest_slow(vcpu, to, guestsrc, n); -} -#endif +#endif /* __KVM_S390_GACCESS_H */ diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index f26ff1e31bd..b7d1b2edeeb 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -43,12 +43,10 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); do { - rc = get_guest_u64(vcpu, useraddr, - &vcpu->arch.sie_block->gcr[reg]); - if (rc == -EFAULT) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - break; - } + rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], + (u64 __user *) useraddr); + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); useraddr += 8; if (reg == reg3) break; @@ -78,11 +76,9 @@ static int handle_lctl(struct kvm_vcpu *vcpu) reg = reg1; do { - rc = get_guest_u32(vcpu, useraddr, &val); - if (rc == -EFAULT) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - break; - } + rc = get_guest(vcpu, val, (u32 __user *) useraddr); + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; vcpu->arch.sie_block->gcr[reg] |= val; useraddr += 4; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 37116a77cb4..5c948177529 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -180,7 +180,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info *inti) { const unsigned short table[] = { 2, 4, 4, 6 }; - int rc, exception = 0; + int rc = 0; switch (inti->type) { case KVM_S390_INT_EMERGENCY: @@ -188,74 +188,41 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_emergency_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->emerg.code, 0); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->emerg.code); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = put_guest(vcpu, 0x1201, (u16 __user *)__LC_EXT_INT_CODE); + rc |= put_guest(vcpu, inti->emerg.code, + (u16 __user *)__LC_EXT_CPU_ADDR); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); break; - case KVM_S390_INT_EXTERNAL_CALL: VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); vcpu->stat.deliver_external_call++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->extcall.code, 0); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->extcall.code); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = put_guest(vcpu, 0x1202, (u16 __user *)__LC_EXT_INT_CODE); + rc |= put_guest(vcpu, inti->extcall.code, + (u16 __user *)__LC_EXT_CPU_ADDR); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); break; - case KVM_S390_INT_SERVICE: VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", inti->ext.ext_params); vcpu->stat.deliver_service_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->ext.ext_params, 0); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); - if (rc == -EFAULT) - exception = 1; + rc = put_guest(vcpu, 0x2401, (u16 __user *)__LC_EXT_INT_CODE); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + rc |= put_guest(vcpu, inti->ext.ext_params, + (u32 __user *)__LC_EXT_PARAMS); break; - case KVM_S390_INT_VIRTIO: VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", inti->ext.ext_params, inti->ext.ext_params2); @@ -263,34 +230,17 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->ext.ext_params, inti->ext.ext_params2); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, 0x0d00); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u64(vcpu, __LC_EXT_PARAMS2, - inti->ext.ext_params2); - if (rc == -EFAULT) - exception = 1; + rc = put_guest(vcpu, 0x2603, (u16 __user *)__LC_EXT_INT_CODE); + rc |= put_guest(vcpu, 0x0d00, (u16 __user *)__LC_EXT_CPU_ADDR); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + rc |= put_guest(vcpu, inti->ext.ext_params, + (u32 __user *)__LC_EXT_PARAMS); + rc |= put_guest(vcpu, inti->ext.ext_params2, + (u64 __user *)__LC_EXT_PARAMS2); break; - case KVM_S390_SIGP_STOP: VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); vcpu->stat.deliver_stop_signal++; @@ -313,18 +263,14 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_restart_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, 0); - rc = copy_to_guest(vcpu, offsetof(struct _lowcore, - restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - offsetof(struct _lowcore, restart_psw), sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = copy_to_guest(vcpu, + offsetof(struct _lowcore, restart_old_psw), + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + offsetof(struct _lowcore, restart_psw), + sizeof(psw_t)); atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); break; - case KVM_S390_PROGRAM_INT: VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", inti->pgm.code, @@ -332,24 +278,13 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_program_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->pgm.code, 0); - rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_PGM_ILC, - table[vcpu->arch.sie_block->ipa >> 14]); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_PGM_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = put_guest(vcpu, inti->pgm.code, (u16 __user *)__LC_PGM_INT_CODE); + rc |= put_guest(vcpu, table[vcpu->arch.sie_block->ipa >> 14], + (u16 __user *)__LC_PGM_ILC); + rc |= copy_to_guest(vcpu, __LC_PGM_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_PGM_NEW_PSW, sizeof(psw_t)); break; case KVM_S390_MCHK: @@ -358,24 +293,13 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->mchk.cr14, inti->mchk.mcic); - rc = kvm_s390_vcpu_store_status(vcpu, - KVM_S390_STORE_STATUS_PREFIXED); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u64(vcpu, __LC_MCCK_CODE, inti->mchk.mcic); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_MCK_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_MCK_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = kvm_s390_vcpu_store_status(vcpu, + KVM_S390_STORE_STATUS_PREFIXED); + rc |= put_guest(vcpu, inti->mchk.mcic, (u64 __user *) __LC_MCCK_CODE); + rc |= copy_to_guest(vcpu, __LC_MCK_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_MCK_NEW_PSW, sizeof(psw_t)); break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: @@ -388,67 +312,44 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_io_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, param0, param1); - rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_ID, - inti->io.subchannel_id); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_NR, - inti->io.subchannel_nr); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u32(vcpu, __LC_IO_INT_PARM, - inti->io.io_int_parm); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u32(vcpu, __LC_IO_INT_WORD, - inti->io.io_int_word); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_IO_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_IO_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = put_guest(vcpu, inti->io.subchannel_id, + (u16 __user *) __LC_SUBCHANNEL_ID); + rc |= put_guest(vcpu, inti->io.subchannel_nr, + (u16 __user *) __LC_SUBCHANNEL_NR); + rc |= put_guest(vcpu, inti->io.io_int_parm, + (u32 __user *) __LC_IO_INT_PARM); + rc |= put_guest(vcpu, inti->io.io_int_word, + (u32 __user *) __LC_IO_INT_WORD); + rc |= copy_to_guest(vcpu, __LC_IO_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_IO_NEW_PSW, sizeof(psw_t)); break; } default: BUG(); } - if (exception) { + if (rc) { printk("kvm: The guest lowcore is not mapped during interrupt " - "delivery, killing userspace\n"); + "delivery, killing userspace\n"); do_exit(SIGKILL); } } static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) { - int rc, exception = 0; + int rc; if (psw_extint_disabled(vcpu)) return 0; if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul)) return 0; - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004); - if (rc == -EFAULT) - exception = 1; - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - if (exception) { + rc = put_guest(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc) { printk("kvm: The guest lowcore is not mapped during interrupt " "delivery, killing userspace\n"); do_exit(SIGKILL); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4cf35a0a79e..c1c7c683fa2 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -142,12 +142,16 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: case KVM_CAP_S390_CSS_SUPPORT: + case KVM_CAP_IOEVENTFD: r = 1; break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; + case KVM_CAP_NR_MEMSLOTS: + r = KVM_USER_MEM_SLOTS; + break; case KVM_CAP_S390_COW: r = MACHINE_HAS_ESOP; break; @@ -632,8 +636,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) } else { VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); trace_kvm_s390_sie_fault(vcpu); - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - rc = 0; + rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } } VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", @@ -974,22 +977,13 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - bool user_alloc) + enum kvm_mr_change change) { - /* A few sanity checks. We can have exactly one memory slot which has - to start at guest virtual zero and which has to be located at a - page boundary in userland and which has to end at a page boundary. - The memory in userland is ok to be fragmented into various different - vmas. It is okay to mmap() and munmap() stuff in this slot after - doing this call at any time */ - - if (mem->slot) - return -EINVAL; - - if (mem->guest_phys_addr) - return -EINVAL; + /* A few sanity checks. We can have memory slots which have to be + located/ended at a segment boundary (1MB). The memory in userland is + ok to be fragmented into various different vmas. It is okay to mmap() + and munmap() stuff in this slot after doing this call at any time */ if (mem->userspace_addr & 0xffffful) return -EINVAL; @@ -997,19 +991,26 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (mem->memory_size & 0xffffful) return -EINVAL; - if (!user_alloc) - return -EINVAL; - return 0; } void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { int rc; + /* If the basics of the memslot do not change, we do not want + * to update the gmap. Every update causes several unnecessary + * segment translation exceptions. This is usually handled just + * fine by the normal fault handler + gmap, but it will also + * cause faults on the prefix page of running guest CPUs. + */ + if (old->userspace_addr == mem->userspace_addr && + old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && + old->npages * PAGE_SIZE == mem->memory_size) + return; rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, mem->guest_phys_addr, mem->memory_size); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 4d89d64a816..efc14f68726 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -110,12 +110,12 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); void kvm_s390_tasklet(unsigned long parm); void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); -int kvm_s390_inject_vm(struct kvm *kvm, - struct kvm_s390_interrupt *s390int); -int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt *s390int); -int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); -int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); +int __must_check kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int); +int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt *s390int); +int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); +int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, u64 cr6, u64 schid); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 0ef9894606e..6bbd7b5a0bb 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -14,6 +14,8 @@ #include <linux/kvm.h> #include <linux/gfp.h> #include <linux/errno.h> +#include <linux/compat.h> +#include <asm/asm-offsets.h> #include <asm/current.h> #include <asm/debug.h> #include <asm/ebcdic.h> @@ -35,31 +37,24 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ - if (operand2 & 3) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (operand2 & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); /* get the value */ - if (get_guest_u32(vcpu, operand2, &address)) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (get_guest(vcpu, address, (u32 __user *) operand2)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); address = address & 0x7fffe000u; /* make sure that the new value is valid memory */ if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || - (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); kvm_s390_set_prefix(vcpu, address); VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); trace_kvm_s390_handle_prefix(vcpu, 1, address); -out: return 0; } @@ -73,49 +68,37 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ - if (operand2 & 3) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (operand2 & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); address = vcpu->arch.sie_block->prefix; address = address & 0x7fffe000u; /* get the value */ - if (put_guest_u32(vcpu, operand2, address)) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (put_guest(vcpu, address, (u32 __user *)operand2)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); trace_kvm_s390_handle_prefix(vcpu, 0, address); -out: return 0; } static int handle_store_cpu_address(struct kvm_vcpu *vcpu) { u64 useraddr; - int rc; vcpu->stat.instruction_stap++; useraddr = kvm_s390_get_base_disp_s(vcpu); - if (useraddr & 1) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (useraddr & 1) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id); - if (rc == -EFAULT) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr); trace_kvm_s390_handle_stap(vcpu, useraddr); -out: return 0; } @@ -129,36 +112,38 @@ static int handle_skey(struct kvm_vcpu *vcpu) static int handle_tpi(struct kvm_vcpu *vcpu) { - u64 addr; struct kvm_s390_interrupt_info *inti; + u64 addr; int cc; addr = kvm_s390_get_base_disp_s(vcpu); - + if (addr & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + cc = 0; inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0); - if (inti) { - if (addr) { - /* - * Store the two-word I/O interruption code into the - * provided area. - */ - put_guest_u16(vcpu, addr, inti->io.subchannel_id); - put_guest_u16(vcpu, addr + 2, inti->io.subchannel_nr); - put_guest_u32(vcpu, addr + 4, inti->io.io_int_parm); - } else { - /* - * Store the three-word I/O interruption code into - * the appropriate lowcore area. - */ - put_guest_u16(vcpu, 184, inti->io.subchannel_id); - put_guest_u16(vcpu, 186, inti->io.subchannel_nr); - put_guest_u32(vcpu, 188, inti->io.io_int_parm); - put_guest_u32(vcpu, 192, inti->io.io_int_word); - } - cc = 1; - } else - cc = 0; + if (!inti) + goto no_interrupt; + cc = 1; + if (addr) { + /* + * Store the two-word I/O interruption code into the + * provided area. + */ + put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) addr); + put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) (addr + 2)); + put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) (addr + 4)); + } else { + /* + * Store the three-word I/O interruption code into + * the appropriate lowcore area. + */ + put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) __LC_SUBCHANNEL_ID); + put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) __LC_SUBCHANNEL_NR); + put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) __LC_IO_INT_PARM); + put_guest(vcpu, inti->io.io_int_word, (u32 __user *) __LC_IO_INT_WORD); + } kfree(inti); +no_interrupt: /* Set condition code and we're done. */ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44; @@ -230,13 +215,10 @@ static int handle_stfl(struct kvm_vcpu *vcpu) rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), &facility_list, sizeof(facility_list)); - if (rc == -EFAULT) - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - else { - VCPU_EVENT(vcpu, 5, "store facility list value %x", - facility_list); - trace_kvm_s390_handle_stfl(vcpu, facility_list); - } + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list); + trace_kvm_s390_handle_stfl(vcpu, facility_list); return 0; } @@ -249,112 +231,80 @@ static void handle_new_psw(struct kvm_vcpu *vcpu) #define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) #define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL -#define PSW_ADDR_24 0x00000000000fffffUL +#define PSW_ADDR_24 0x0000000000ffffffUL #define PSW_ADDR_31 0x000000007fffffffUL +static int is_valid_psw(psw_t *psw) { + if (psw->mask & PSW_MASK_UNASSIGNED) + return 0; + if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) { + if (psw->addr & ~PSW_ADDR_31) + return 0; + } + if (!(psw->mask & PSW_MASK_ADDR_MODE) && (psw->addr & ~PSW_ADDR_24)) + return 0; + if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_EA) + return 0; + return 1; +} + int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) { - u64 addr; + psw_t *gpsw = &vcpu->arch.sie_block->gpsw; psw_compat_t new_psw; + u64 addr; - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + if (gpsw->mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OPERATION); - addr = kvm_s390_get_base_disp_s(vcpu); - - if (addr & 7) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - - if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } - - if (!(new_psw.mask & PSW32_MASK_BASE)) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - - vcpu->arch.sie_block->gpsw.mask = - (new_psw.mask & ~PSW32_MASK_BASE) << 32; - vcpu->arch.sie_block->gpsw.addr = new_psw.addr; - - if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || - (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && - (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || - ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == - PSW_MASK_EA)) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - + if (addr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + if (!(new_psw.mask & PSW32_MASK_BASE)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32; + gpsw->mask |= new_psw.addr & PSW32_ADDR_AMODE; + gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE; + if (!is_valid_psw(gpsw)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); handle_new_psw(vcpu); -out: return 0; } static int handle_lpswe(struct kvm_vcpu *vcpu) { - u64 addr; psw_t new_psw; + u64 addr; addr = kvm_s390_get_base_disp_s(vcpu); - - if (addr & 7) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - - if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } - - vcpu->arch.sie_block->gpsw.mask = new_psw.mask; - vcpu->arch.sie_block->gpsw.addr = new_psw.addr; - - if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || - (((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == - PSW_MASK_BA) && - (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_31)) || - (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && - (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || - ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == - PSW_MASK_EA)) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - + if (addr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + vcpu->arch.sie_block->gpsw = new_psw; + if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); handle_new_psw(vcpu); -out: return 0; } static int handle_stidp(struct kvm_vcpu *vcpu) { u64 operand2; - int rc; vcpu->stat.instruction_stidp++; operand2 = kvm_s390_get_base_disp_s(vcpu); - if (operand2 & 7) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (operand2 & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data); - if (rc == -EFAULT) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); -out: return 0; } @@ -394,8 +344,9 @@ static int handle_stsi(struct kvm_vcpu *vcpu) int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28; int sel1 = vcpu->run->s.regs.gprs[0] & 0xff; int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff; + unsigned long mem = 0; u64 operand2; - unsigned long mem; + int rc = 0; vcpu->stat.instruction_stsi++; VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); @@ -414,37 +365,37 @@ static int handle_stsi(struct kvm_vcpu *vcpu) case 2: mem = get_zeroed_page(GFP_KERNEL); if (!mem) - goto out_fail; + goto out_no_data; if (stsi((void *) mem, fc, sel1, sel2)) - goto out_mem; + goto out_no_data; break; case 3: if (sel1 != 2 || sel2 != 2) - goto out_fail; + goto out_no_data; mem = get_zeroed_page(GFP_KERNEL); if (!mem) - goto out_fail; + goto out_no_data; handle_stsi_3_2_2(vcpu, (void *) mem); break; default: - goto out_fail; + goto out_no_data; } if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out_mem; + rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out_exception; } trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); free_page(mem); vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); vcpu->run->s.regs.gprs[0] = 0; return 0; -out_mem: - free_page(mem); -out_fail: +out_no_data: /* condition code 3 */ vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; - return 0; +out_exception: + free_page(mem); + return rc; } static const intercept_handler_t b2_handlers[256] = { @@ -575,20 +526,13 @@ static int handle_tprot(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) return -EOPNOTSUPP; - - /* we must resolve the address without holding the mmap semaphore. - * This is ok since the userspace hypervisor is not supposed to change - * the mapping while the guest queries the memory. Otherwise the guest - * might crash or get wrong info anyway. */ - user_address = (unsigned long) __guestaddr_to_user(vcpu, address1); - down_read(¤t->mm->mmap_sem); + user_address = __gmap_translate(address1, vcpu->arch.gmap); + if (IS_ERR_VALUE(user_address)) + goto out_inject; vma = find_vma(current->mm, user_address); - if (!vma) { - up_read(¤t->mm->mmap_sem); - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - } - + if (!vma) + goto out_inject; vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); if (!(vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_READ)) vcpu->arch.sie_block->gpsw.mask |= (1ul << 44); @@ -597,6 +541,10 @@ static int handle_tprot(struct kvm_vcpu *vcpu) up_read(¤t->mm->mmap_sem); return 0; + +out_inject: + up_read(¤t->mm->mmap_sem); + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 466fb338396..50ea137a2d3 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -89,16 +89,19 @@ static unsigned long follow_table(struct mm_struct *mm, if (unlikely(*table & _REGION_ENTRY_INV)) return -0x39UL; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ case _ASCE_TYPE_REGION2: table = table + ((address >> 42) & 0x7ff); if (unlikely(*table & _REGION_ENTRY_INV)) return -0x3aUL; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ case _ASCE_TYPE_REGION3: table = table + ((address >> 31) & 0x7ff); if (unlikely(*table & _REGION_ENTRY_INV)) return -0x3bUL; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ case _ASCE_TYPE_SEGMENT: table = table + ((address >> 20) & 0x7ff); if (unlikely(*table & _SEGMENT_ENTRY_INV)) diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 640bea12303..839592ca265 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -3,7 +3,7 @@ # obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o -obj-y += page-states.o gup.o extable.o pageattr.o +obj-y += page-states.o gup.o extable.o pageattr.o mem_detect.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 0b09b234230..89ebae4008f 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -21,6 +21,7 @@ #include <linux/init.h> #include <linux/pagemap.h> #include <linux/bootmem.h> +#include <linux/memory.h> #include <linux/pfn.h> #include <linux/poison.h> #include <linux/initrd.h> @@ -36,6 +37,7 @@ #include <asm/tlbflush.h> #include <asm/sections.h> #include <asm/ctl_reg.h> +#include <asm/sclp.h> pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); @@ -214,6 +216,15 @@ int arch_add_memory(int nid, u64 start, u64 size) return rc; } +unsigned long memory_block_size_bytes(void) +{ + /* + * Make sure the memory block size is always greater + * or equal than the memory increment size. + */ + return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp_get_rzm()); +} + #ifdef CONFIG_MEMORY_HOTREMOVE int arch_remove_memory(u64 start, u64 size) { diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c new file mode 100644 index 00000000000..3cbd3b8bf31 --- /dev/null +++ b/arch/s390/mm/mem_detect.c @@ -0,0 +1,134 @@ +/* + * Copyright IBM Corp. 2008, 2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <asm/ipl.h> +#include <asm/sclp.h> +#include <asm/setup.h> + +#define ADDR2G (1ULL << 31) + +static void find_memory_chunks(struct mem_chunk chunk[], unsigned long maxsize) +{ + unsigned long long memsize, rnmax, rzm; + unsigned long addr = 0, size; + int i = 0, type; + + rzm = sclp_get_rzm(); + rnmax = sclp_get_rnmax(); + memsize = rzm * rnmax; + if (!rzm) + rzm = 1ULL << 17; + if (sizeof(long) == 4) { + rzm = min(ADDR2G, rzm); + memsize = memsize ? min(ADDR2G, memsize) : ADDR2G; + } + if (maxsize) + memsize = memsize ? min((unsigned long)memsize, maxsize) : maxsize; + do { + size = 0; + type = tprot(addr); + do { + size += rzm; + if (memsize && addr + size >= memsize) + break; + } while (type == tprot(addr + size)); + if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) { + if (memsize && (addr + size > memsize)) + size = memsize - addr; + chunk[i].addr = addr; + chunk[i].size = size; + chunk[i].type = type; + i++; + } + addr += size; + } while (addr < memsize && i < MEMORY_CHUNKS); +} + +/** + * detect_memory_layout - fill mem_chunk array with memory layout data + * @chunk: mem_chunk array to be filled + * @maxsize: maximum address where memory detection should stop + * + * Fills the passed in memory chunk array with the memory layout of the + * machine. The array must have a size of at least MEMORY_CHUNKS and will + * be fully initialized afterwards. + * If the maxsize paramater has a value > 0 memory detection will stop at + * that address. It is guaranteed that all chunks have an ending address + * that is smaller than maxsize. + * If maxsize is 0 all memory will be detected. + */ +void detect_memory_layout(struct mem_chunk chunk[], unsigned long maxsize) +{ + unsigned long flags, flags_dat, cr0; + + memset(chunk, 0, MEMORY_CHUNKS * sizeof(struct mem_chunk)); + /* + * Disable IRQs, DAT and low address protection so tprot does the + * right thing and we don't get scheduled away with low address + * protection disabled. + */ + local_irq_save(flags); + flags_dat = __arch_local_irq_stnsm(0xfb); + /* + * In case DAT was enabled, make sure chunk doesn't reside in vmalloc + * space. We have disabled DAT and any access to vmalloc area will + * cause an exception. + * If DAT was disabled we are called from early ipl code. + */ + if (test_bit(5, &flags_dat)) { + if (WARN_ON_ONCE(is_vmalloc_or_module_addr(chunk))) + goto out; + } + __ctl_store(cr0, 0, 0); + __ctl_clear_bit(0, 28); + find_memory_chunks(chunk, maxsize); + __ctl_load(cr0, 0, 0); +out: + __arch_local_irq_ssm(flags_dat); + local_irq_restore(flags); +} +EXPORT_SYMBOL(detect_memory_layout); + +/* + * Create memory hole with given address and size. + */ +void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, + unsigned long size) +{ + int i; + + for (i = 0; i < MEMORY_CHUNKS; i++) { + struct mem_chunk *chunk = &mem_chunk[i]; + + if (chunk->size == 0) + continue; + if (addr > chunk->addr + chunk->size) + continue; + if (addr + size <= chunk->addr) + continue; + /* Split */ + if ((addr > chunk->addr) && + (addr + size < chunk->addr + chunk->size)) { + struct mem_chunk *new = chunk + 1; + + memmove(new, chunk, (MEMORY_CHUNKS-i-1) * sizeof(*new)); + new->addr = addr + size; + new->size = chunk->addr + chunk->size - new->addr; + chunk->size = addr - chunk->addr; + continue; + } else if ((addr <= chunk->addr) && + (addr + size >= chunk->addr + chunk->size)) { + memset(chunk, 0 , sizeof(*chunk)); + } else if (addr + size < chunk->addr + chunk->size) { + chunk->size = chunk->addr + chunk->size - addr - size; + chunk->addr = addr + size; + } else if (addr > chunk->addr) { + chunk->size = addr - chunk->addr; + } + } +} diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index bd954e96f51..7805ddca833 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -454,9 +454,8 @@ unsigned long gmap_translate(unsigned long address, struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_translate); -static int gmap_connect_pgtable(unsigned long segment, - unsigned long *segment_ptr, - struct gmap *gmap) +static int gmap_connect_pgtable(unsigned long address, unsigned long segment, + unsigned long *segment_ptr, struct gmap *gmap) { unsigned long vmaddr; struct vm_area_struct *vma; @@ -491,7 +490,9 @@ static int gmap_connect_pgtable(unsigned long segment, /* Link gmap segment table entry location to page table. */ page = pmd_page(*pmd); mp = (struct gmap_pgtable *) page->index; + rmap->gmap = gmap; rmap->entry = segment_ptr; + rmap->vmaddr = address; spin_lock(&mm->page_table_lock); if (*segment_ptr == segment) { list_add(&rmap->list, &mp->mapper); @@ -553,7 +554,7 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) if (!(segment & _SEGMENT_ENTRY_RO)) /* Nothing mapped in the gmap address space. */ break; - rc = gmap_connect_pgtable(segment, segment_ptr, gmap); + rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap); if (rc) return rc; } @@ -619,6 +620,118 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_discard); +static LIST_HEAD(gmap_notifier_list); +static DEFINE_SPINLOCK(gmap_notifier_lock); + +/** + * gmap_register_ipte_notifier - register a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_register_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_add(&nb->list, &gmap_notifier_list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); + +/** + * gmap_unregister_ipte_notifier - remove a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_del_init(&nb->list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); + +/** + * gmap_ipte_notify - mark a range of ptes for invalidation notification + * @gmap: pointer to guest mapping meta data structure + * @address: virtual address in the guest address space + * @len: size of area + * + * Returns 0 if for each page in the given range a gmap mapping exists and + * the invalidation notification could be set. If the gmap mapping is missing + * for one or more pages -EFAULT is returned. If no memory could be allocated + * -ENOMEM is returned. This function establishes missing page table entries. + */ +int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) +{ + unsigned long addr; + spinlock_t *ptl; + pte_t *ptep, entry; + pgste_t pgste; + int rc = 0; + + if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK)) + return -EINVAL; + down_read(&gmap->mm->mmap_sem); + while (len) { + /* Convert gmap address and connect the page tables */ + addr = __gmap_fault(start, gmap); + if (IS_ERR_VALUE(addr)) { + rc = addr; + break; + } + /* Get the page mapped */ + if (get_user_pages(current, gmap->mm, addr, 1, 1, 0, + NULL, NULL) != 1) { + rc = -EFAULT; + break; + } + /* Walk the process page table, lock and get pte pointer */ + ptep = get_locked_pte(gmap->mm, addr, &ptl); + if (unlikely(!ptep)) + continue; + /* Set notification bit in the pgste of the pte */ + entry = *ptep; + if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) { + pgste = pgste_get_lock(ptep); + pgste_val(pgste) |= RCP_IN_BIT; + pgste_set_unlock(ptep, pgste); + start += PAGE_SIZE; + len -= PAGE_SIZE; + } + spin_unlock(ptl); + } + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_ipte_notify); + +/** + * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. + * @mm: pointer to the process mm_struct + * @addr: virtual address in the process address space + * @pte: pointer to the page table entry + * + * This function is assumed to be called with the page table lock held + * for the pte to notify. + */ +void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte) +{ + unsigned long segment_offset; + struct gmap_notifier *nb; + struct gmap_pgtable *mp; + struct gmap_rmap *rmap; + struct page *page; + + segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); + segment_offset = segment_offset * (4096 / sizeof(pte_t)); + page = pfn_to_page(__pa(pte) >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + spin_lock(&gmap_notifier_lock); + list_for_each_entry(rmap, &mp->mapper, list) { + list_for_each_entry(nb, &gmap_notifier_list, list) + nb->notifier_call(rmap->gmap, + rmap->vmaddr + segment_offset); + } + spin_unlock(&gmap_notifier_lock); +} + static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, unsigned long vmaddr) { diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 35837054f73..8b268fcc461 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -375,9 +375,8 @@ void __init vmem_map_init(void) ro_start = PFN_ALIGN((unsigned long)&_stext); ro_end = (unsigned long)&_eshared & PAGE_MASK; - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { - if (memory_chunk[i].type == CHUNK_CRASHK || - memory_chunk[i].type == CHUNK_OLDMEM) + for (i = 0; i < MEMORY_CHUNKS; i++) { + if (!memory_chunk[i].size) continue; start = memory_chunk[i].addr; end = memory_chunk[i].addr + memory_chunk[i].size; @@ -412,9 +411,6 @@ static int __init vmem_convert_memory_chunk(void) for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; - if (memory_chunk[i].type == CHUNK_CRASHK || - memory_chunk[i].type == CHUNK_OLDMEM) - continue; seg = kzalloc(sizeof(*seg), GFP_KERNEL); if (!seg) panic("Out of memory...\n"); diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 2df313b6a58..c9230680902 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -30,8 +30,8 @@ DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); #ifdef CONFIG_PRINTK DEFINE(UML_CONFIG_PRINTK, CONFIG_PRINTK); #endif -#ifdef CONFIG_NO_HZ -DEFINE(UML_CONFIG_NO_HZ, CONFIG_NO_HZ); +#ifdef CONFIG_NO_HZ_COMMON +DEFINE(UML_CONFIG_NO_HZ_COMMON, CONFIG_NO_HZ_COMMON); #endif #ifdef CONFIG_UML_X86 DEFINE(UML_CONFIG_UML_X86, CONFIG_UML_X86); diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index fac388cb464..e9824d5dd7d 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -79,7 +79,7 @@ long long os_nsecs(void) return timeval_to_ns(&tv); } -#ifdef UML_CONFIG_NO_HZ +#ifdef UML_CONFIG_NO_HZ_COMMON static int after_sleep_interval(struct timespec *ts) { return 0; diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 40afa0005c6..9bd4ecac72b 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -19,6 +19,10 @@ BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) +#ifdef CONFIG_HAVE_KVM +BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR) +#endif + /* * every pentium local APIC has two 'local interrupts', with a * soft-definable vector attached to both interrupts, one of diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 81f04cee5f7..ab0ae1aa6d0 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -12,6 +12,9 @@ typedef struct { unsigned int irq_spurious_count; unsigned int icr_read_retry_count; #endif +#ifdef CONFIG_HAVE_KVM + unsigned int kvm_posted_intr_ipis; +#endif unsigned int x86_platform_ipis; /* arch dependent */ unsigned int apic_perf_irqs; unsigned int apic_irq_work_irqs; diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 10a78c3d3d5..1da97efad08 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -28,6 +28,7 @@ /* Interrupt handlers registered during init_IRQ */ extern void apic_timer_interrupt(void); extern void x86_platform_ipi(void); +extern void kvm_posted_intr_ipi(void); extern void error_interrupt(void); extern void irq_work_interrupt(void); diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 95fd3527f63..d806b228d2c 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -24,10 +24,18 @@ #include <asm/io_apic.h> +struct IO_APIC_route_entry; +struct io_apic_irq_attr; +struct irq_chip; +struct msi_msg; +struct pci_dev; +struct irq_cfg; + #ifdef CONFIG_IRQ_REMAP extern void setup_irq_remapping_ops(void); extern int irq_remapping_supported(void); +extern void set_irq_remapping_broken(void); extern int irq_remapping_prepare(void); extern int irq_remapping_enable(void); extern void irq_remapping_disable(void); @@ -54,6 +62,7 @@ void irq_remap_modify_chip_defaults(struct irq_chip *chip); static inline void setup_irq_remapping_ops(void) { } static inline int irq_remapping_supported(void) { return 0; } +static inline void set_irq_remapping_broken(void) { } static inline int irq_remapping_prepare(void) { return -ENODEV; } static inline int irq_remapping_enable(void) { return -ENODEV; } static inline void irq_remapping_disable(void) { } diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index aac5fa62a86..5702d7e3111 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -102,6 +102,11 @@ */ #define X86_PLATFORM_IPI_VECTOR 0xf7 +/* Vector for KVM to deliver posted interrupt IPI */ +#ifdef CONFIG_HAVE_KVM +#define POSTED_INTR_VECTOR 0xf2 +#endif + /* * IRQ work vector: */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4979778cc7f..3741c653767 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -31,7 +31,7 @@ #include <asm/msr-index.h> #include <asm/asm.h> -#define KVM_MAX_VCPUS 254 +#define KVM_MAX_VCPUS 255 #define KVM_SOFT_MAX_VCPUS 160 #define KVM_USER_MEM_SLOTS 125 /* memory slots that are not exposed to userspace */ @@ -43,6 +43,8 @@ #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS + #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ @@ -94,9 +96,6 @@ #define ASYNC_PF_PER_VCPU 64 -extern raw_spinlock_t kvm_lock; -extern struct list_head vm_list; - struct kvm_vcpu; struct kvm; struct kvm_async_pf; @@ -230,6 +229,7 @@ struct kvm_mmu_page { #endif int write_flooding_count; + bool mmio_cached; }; struct kvm_pio_request { @@ -345,7 +345,6 @@ struct kvm_vcpu_arch { unsigned long apic_attention; int32_t apic_arb_prio; int mp_state; - int sipi_vector; u64 ia32_misc_enable_msr; bool tpr_access_reporting; @@ -643,7 +642,7 @@ struct kvm_x86_ops { /* Create, but do not attach this VCPU */ struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); void (*vcpu_free)(struct kvm_vcpu *vcpu); - int (*vcpu_reset)(struct kvm_vcpu *vcpu); + void (*vcpu_reset)(struct kvm_vcpu *vcpu); void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); @@ -696,14 +695,16 @@ struct kvm_x86_ops { int (*nmi_allowed)(struct kvm_vcpu *vcpu); bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); - void (*enable_nmi_window)(struct kvm_vcpu *vcpu); - void (*enable_irq_window)(struct kvm_vcpu *vcpu); + int (*enable_nmi_window)(struct kvm_vcpu *vcpu); + int (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); int (*vm_has_apicv)(struct kvm *kvm); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm *kvm, int isr); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); + void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); + void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); @@ -730,6 +731,7 @@ struct kvm_x86_ops { int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage); + void (*handle_external_intr)(struct kvm_vcpu *vcpu); }; struct kvm_arch_async_pf { @@ -767,6 +769,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask); void kvm_mmu_zap_all(struct kvm *kvm); +void kvm_mmu_zap_mmio_sptes(struct kvm *kvm); unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); @@ -797,6 +800,7 @@ enum emulation_result { #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) #define EMULTYPE_RETRY (1 << 3) +#define EMULTYPE_NO_REEXECUTE (1 << 4) int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, int emulation_type, void *insn, int insn_len); @@ -807,6 +811,7 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu, } void kvm_enable_efer_bits(u64); +bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); @@ -819,6 +824,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); +void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector); int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); @@ -973,7 +979,6 @@ enum { * Trap the fault and ignore the instruction if that happens. */ asmlinkage void kvm_spurious_fault(void); -extern bool kvm_rebooting; #define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ "666: " insn "\n\t" \ @@ -1002,6 +1007,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); +void kvm_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_define_shared_msr(unsigned index, u32 msr); void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); @@ -1027,7 +1033,7 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu); void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); -int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); +int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); void kvm_deliver_pmi(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index b6fbf860e39..f3e01a2cbaa 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -65,11 +65,16 @@ #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 +#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 #define PIN_BASED_EXT_INTR_MASK 0x00000001 #define PIN_BASED_NMI_EXITING 0x00000008 #define PIN_BASED_VIRTUAL_NMIS 0x00000020 +#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 +#define PIN_BASED_POSTED_INTR 0x00000080 + +#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 #define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 @@ -81,6 +86,8 @@ #define VM_EXIT_LOAD_IA32_EFER 0x00200000 #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 +#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff + #define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000002 #define VM_ENTRY_IA32E_MODE 0x00000200 #define VM_ENTRY_SMM 0x00000400 @@ -89,9 +96,15 @@ #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 #define VM_ENTRY_LOAD_IA32_EFER 0x00008000 +#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff + +#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f +#define VMX_MISC_SAVE_EFER_LMA 0x00000020 + /* VMCS Encodings */ enum vmcs_field { VIRTUAL_PROCESSOR_ID = 0x00000000, + POSTED_INTR_NV = 0x00000002, GUEST_ES_SELECTOR = 0x00000800, GUEST_CS_SELECTOR = 0x00000802, GUEST_SS_SELECTOR = 0x00000804, @@ -126,6 +139,8 @@ enum vmcs_field { VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, APIC_ACCESS_ADDR = 0x00002014, APIC_ACCESS_ADDR_HIGH = 0x00002015, + POSTED_INTR_DESC_ADDR = 0x00002016, + POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, EPT_POINTER = 0x0000201a, EPT_POINTER_HIGH = 0x0000201b, EOI_EXIT_BITMAP0 = 0x0000201c, @@ -136,6 +151,8 @@ enum vmcs_field { EOI_EXIT_BITMAP2_HIGH = 0x00002021, EOI_EXIT_BITMAP3 = 0x00002022, EOI_EXIT_BITMAP3_HIGH = 0x00002023, + VMREAD_BITMAP = 0x00002026, + VMWRITE_BITMAP = 0x00002028, GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, @@ -209,6 +226,7 @@ enum vmcs_field { GUEST_INTERRUPTIBILITY_INFO = 0x00004824, GUEST_ACTIVITY_STATE = 0X00004826, GUEST_SYSENTER_CS = 0x0000482A, + VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, HOST_IA32_SYSENTER_CS = 0x00004c00, CR0_GUEST_HOST_MASK = 0x00006000, CR4_GUEST_HOST_MASK = 0x00006002, diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index a65ec29e6ff..5d9a3033b3d 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -29,7 +29,6 @@ #define __KVM_HAVE_PIT #define __KVM_HAVE_IOAPIC #define __KVM_HAVE_IRQ_LINE -#define __KVM_HAVE_DEVICE_ASSIGNMENT #define __KVM_HAVE_MSI #define __KVM_HAVE_USER_NMI #define __KVM_HAVE_GUEST_DEBUG diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index b5757885d7a..b3a4866661c 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -528,6 +528,8 @@ #define VMX_BASIC_MEM_TYPE_WB 6LLU #define VMX_BASIC_INOUT 0x0040000000000000LLU +/* MSR_IA32_VMX_MISC bits */ +#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) /* AMD-V MSRs */ #define MSR_VM_CR 0xc0010114 diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 2871fccfee6..d651082c7cf 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -65,6 +65,7 @@ #define EXIT_REASON_EOI_INDUCED 45 #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_PREEMPTION_TIMER 52 #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 #define EXIT_REASON_APIC_WRITE 56 @@ -110,7 +111,7 @@ { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ { EXIT_REASON_INVD, "INVD" }, \ - { EXIT_REASON_INVPCID, "INVPCID" } - + { EXIT_REASON_INVPCID, "INVPCID" }, \ + { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" } #endif /* _UAPIVMX_H */ diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ffd6050a1de..f60d41ff9a9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -128,10 +128,15 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly = INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ - INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ + /* + * Errata BV98 -- MEM_*_RETIRED events can leak between counters of SMT + * siblings; disable these events because they can corrupt unrelated + * counters. + */ + INTEL_EVENT_CONSTRAINT(0xd0, 0x0), /* MEM_UOPS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd1, 0x0), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0x0), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd3, 0x0), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ EVENT_CONSTRAINT_END }; diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index da02e9cc375..d978353c939 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -310,7 +310,7 @@ void intel_pmu_lbr_read(void) * - in case there is no HW filter * - in case the HW filter has errata or limitations */ -static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) +static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) { u64 br_type = event->attr.branch_sample_type; int mask = 0; @@ -318,8 +318,11 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) if (br_type & PERF_SAMPLE_BRANCH_USER) mask |= X86_BR_USER; - if (br_type & PERF_SAMPLE_BRANCH_KERNEL) + if (br_type & PERF_SAMPLE_BRANCH_KERNEL) { + if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) + return -EACCES; mask |= X86_BR_KERNEL; + } /* we ignore BRANCH_HV here */ @@ -339,6 +342,8 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) * be used by fixup code for some CPU */ event->hw.branch_reg.reg = mask; + + return 0; } /* @@ -386,7 +391,9 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event) /* * setup SW LBR filter */ - intel_pmu_setup_sw_lbr_filter(event); + ret = intel_pmu_setup_sw_lbr_filter(event); + if (ret) + return ret; /* * setup HW LBR filter, if any @@ -442,8 +449,18 @@ static int branch_type(unsigned long from, unsigned long to) return X86_BR_NONE; addr = buf; - } else - addr = (void *)from; + } else { + /* + * The LBR logs any address in the IP, even if the IP just + * faulted. This means userspace can control the from address. + * Ensure we don't blindy read any address by validating it is + * a known text address. + */ + if (kernel_text_address(from)) + addr = (void *)from; + else + return X86_BR_NONE; + } /* * decoder needs to know the ABI especially diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index d0f9e5aa215..52441a2af53 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -3093,7 +3093,7 @@ static void __init uncore_types_exit(struct intel_uncore_type **types) static int __init uncore_type_init(struct intel_uncore_type *type) { struct intel_uncore_pmu *pmus; - struct attribute_group *events_group; + struct attribute_group *attr_group; struct attribute **attrs; int i, j; @@ -3120,19 +3120,19 @@ static int __init uncore_type_init(struct intel_uncore_type *type) while (type->event_descs[i].attr.attr.name) i++; - events_group = kzalloc(sizeof(struct attribute *) * (i + 1) + - sizeof(*events_group), GFP_KERNEL); - if (!events_group) + attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) + + sizeof(*attr_group), GFP_KERNEL); + if (!attr_group) goto fail; - attrs = (struct attribute **)(events_group + 1); - events_group->name = "events"; - events_group->attrs = attrs; + attrs = (struct attribute **)(attr_group + 1); + attr_group->name = "events"; + attr_group->attrs = attrs; for (j = 0; j < i; j++) attrs[j] = &type->event_descs[j].attr.attr; - type->events_group = events_group; + type->events_group = attr_group; } type->pmu_group = &uncore_pmu_attr_group; @@ -3545,11 +3545,12 @@ static int __init uncore_cpu_init(void) msr_uncores = nhm_msr_uncores; break; case 42: /* Sandy Bridge */ + case 58: /* Ivy Bridge */ if (snb_uncore_cbox.num_boxes > max_cores) snb_uncore_cbox.num_boxes = max_cores; msr_uncores = snb_msr_uncores; break; - case 45: /* Sandy Birdge-EP */ + case 45: /* Sandy Bridge-EP */ if (snbep_uncore_cbox.num_boxes > max_cores) snbep_uncore_cbox.num_boxes = max_cores; msr_uncores = snbep_msr_uncores; diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 3755ef49439..94ab6b90dd3 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -18,6 +18,7 @@ #include <asm/apic.h> #include <asm/iommu.h> #include <asm/gart.h> +#include <asm/irq_remapping.h> static void __init fix_hypertransport_config(int num, int slot, int func) { @@ -192,6 +193,21 @@ static void __init ati_bugs_contd(int num, int slot, int func) } #endif +static void __init intel_remapping_check(int num, int slot, int func) +{ + u8 revision; + + revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID); + + /* + * Revision 0x13 of this chipset supports irq remapping + * but has an erratum that breaks its behavior, flag it as such + */ + if (revision == 0x13) + set_irq_remapping_broken(); + +} + #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -221,6 +237,10 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, + { PCI_VENDOR_ID_INTEL, 0x3403, PCI_CLASS_BRIDGE_HOST, + PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, + { PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST, + PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, {} }; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c1d01e6ca79..72720894103 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1166,6 +1166,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \ apicinterrupt X86_PLATFORM_IPI_VECTOR \ x86_platform_ipi smp_x86_platform_ipi +#ifdef CONFIG_HAVE_KVM +apicinterrupt POSTED_INTR_VECTOR \ + kvm_posted_intr_ipi smp_kvm_posted_intr_ipi +#endif + apicinterrupt THRESHOLD_APIC_VECTOR \ threshold_interrupt smp_threshold_interrupt apicinterrupt THERMAL_APIC_VECTOR \ diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index e4595f10591..ac0631d8996 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -165,10 +165,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu) u64 arch_irq_stat(void) { u64 sum = atomic_read(&irq_err_count); - -#ifdef CONFIG_X86_IO_APIC - sum += atomic_read(&irq_mis_count); -#endif return sum; } @@ -228,6 +224,28 @@ void smp_x86_platform_ipi(struct pt_regs *regs) set_irq_regs(old_regs); } +#ifdef CONFIG_HAVE_KVM +/* + * Handler for POSTED_INTERRUPT_VECTOR. + */ +void smp_kvm_posted_intr_ipi(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + ack_APIC_irq(); + + irq_enter(); + + exit_idle(); + + inc_irq_stat(kvm_posted_intr_ipis); + + irq_exit(); + + set_irq_regs(old_regs); +} +#endif + EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 7dc4e459c2b..a2a1fbc594f 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -172,6 +172,10 @@ static void __init apic_intr_init(void) /* IPI for X86 platform specific use */ alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); +#ifdef CONFIG_HAVE_KVM + /* IPI for KVM to deliver posted interrupt */ + alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); +#endif /* IPI vectors for APIC spurious and error interrupts */ alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 0732f0089a3..d2c381280e3 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -160,8 +160,12 @@ int kvm_register_clock(char *txt) { int cpu = smp_processor_id(); int low, high, ret; - struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti; + struct pvclock_vcpu_time_info *src; + + if (!hv_clock) + return 0; + src = &hv_clock[cpu].pvti; low = (int)slow_virt_to_phys(src) | 1; high = ((u64)slow_virt_to_phys(src) >> 32); ret = native_write_msr_safe(msr_kvm_system_time, low, high); @@ -276,6 +280,9 @@ int __init kvm_setup_vsyscall_timeinfo(void) struct pvclock_vcpu_time_info *vcpu_time; unsigned int size; + if (!hv_clock) + return 0; + size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); preempt_disable(); diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 586f0005980..a47a3e54b96 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -21,14 +21,13 @@ config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM depends on HIGH_RES_TIMERS - # for device assignment: - depends on PCI # for TASKSTATS/TASK_DELAY_ACCT: depends on NET select PREEMPT_NOTIFIERS select MMU_NOTIFIER select ANON_INODES select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_EVENTFD select KVM_APIC_ARCHITECTURE select KVM_ASYNC_PF @@ -82,6 +81,17 @@ config KVM_MMU_AUDIT This option adds a R/W kVM module parameter 'mmu_audit', which allows audit KVM MMU at runtime. +config KVM_DEVICE_ASSIGNMENT + bool "KVM legacy PCI device assignment support" + depends on KVM && PCI && IOMMU_API + default y + ---help--- + Provide support for legacy PCI device assignment through KVM. The + kernel now also supports a full featured userspace device driver + framework through VFIO, which supersedes much of this support. + + If unsure, say Y. + # OK, it's a little counter-intuitive to do this, but it puts it neatly under # the virtualization menu. source drivers/vhost/Kconfig diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 04d30401c5c..d609e1d8404 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -7,8 +7,9 @@ CFLAGS_vmx.o := -I. kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ coalesced_mmio.o irq_comm.o eventfd.o \ - assigned-dev.o) -kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) + irqchip.o) +kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(addprefix ../../../virt/kvm/, \ + assigned-dev.o iommu.o) kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a335cc6cde7..8e517bba6a7 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -132,8 +132,9 @@ #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ #define No64 (1<<28) #define PageTable (1 << 29) /* instruction used to write page table */ +#define NotImpl (1 << 30) /* instruction is not implemented */ /* Source 2 operand type */ -#define Src2Shift (30) +#define Src2Shift (31) #define Src2None (OpNone << Src2Shift) #define Src2CL (OpCL << Src2Shift) #define Src2ImmByte (OpImmByte << Src2Shift) @@ -1578,12 +1579,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, memset(&seg_desc, 0, sizeof seg_desc); - if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) - || ctxt->mode == X86EMUL_MODE_REAL) { - /* set real mode segment descriptor */ + if (ctxt->mode == X86EMUL_MODE_REAL) { + /* set real mode segment descriptor (keep limit etc. for + * unreal mode) */ ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg); set_desc_base(&seg_desc, selector << 4); goto load; + } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) { + /* VM86 needs a clean new segment descriptor */ + set_desc_base(&seg_desc, selector << 4); + set_desc_limit(&seg_desc, 0xffff); + seg_desc.type = 3; + seg_desc.p = 1; + seg_desc.s = 1; + seg_desc.dpl = 3; + goto load; } rpl = selector & 3; @@ -3615,7 +3625,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ .check_perm = (_p) } -#define N D(0) +#define N D(NotImpl) #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } @@ -3713,7 +3723,7 @@ static const struct opcode group5[] = { I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), I(SrcMem | Stack, em_grp45), I(SrcMemFAddr | ImplicitOps, em_grp45), - I(SrcMem | Stack, em_grp45), N, + I(SrcMem | Stack, em_grp45), D(Undefined), }; static const struct opcode group6[] = { @@ -4162,6 +4172,10 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, break; case OpMem8: ctxt->memop.bytes = 1; + if (ctxt->memop.type == OP_REG) { + ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1); + fetch_register_operand(&ctxt->memop); + } goto mem_common; case OpMem16: ctxt->memop.bytes = 2; @@ -4373,7 +4387,7 @@ done_prefixes: ctxt->intercept = opcode.intercept; /* Unrecognised? */ - if (ctxt->d == 0 || (ctxt->d & Undefined)) + if (ctxt->d == 0 || (ctxt->d & NotImpl)) return EMULATION_FAILED; if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) @@ -4511,7 +4525,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) ctxt->mem_read.pos = 0; - if (ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) { + if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || + (ctxt->d & Undefined)) { rc = emulate_ud(ctxt); goto done; } diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index c1d30b2fc9b..412a5aa0ef9 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -290,8 +290,8 @@ static void pit_do_work(struct kthread_work *work) } spin_unlock(&ps->inject_lock); if (inject) { - kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); - kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false); + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false); /* * Provides NMI watchdog support via Virtual Wire mode. diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index f77df1c5de6..e1adbb4aca7 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap) return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); } +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + return apic_test_vector(vector, apic->regs + APIC_ISR) || + apic_test_vector(vector, apic->regs + APIC_IRR); +} + static inline void apic_set_vector(int vec, void *bitmap) { set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); @@ -145,53 +153,6 @@ static inline int kvm_apic_id(struct kvm_lapic *apic) return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; } -void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, - struct kvm_lapic_irq *irq, - u64 *eoi_exit_bitmap) -{ - struct kvm_lapic **dst; - struct kvm_apic_map *map; - unsigned long bitmap = 1; - int i; - - rcu_read_lock(); - map = rcu_dereference(vcpu->kvm->arch.apic_map); - - if (unlikely(!map)) { - __set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap); - goto out; - } - - if (irq->dest_mode == 0) { /* physical mode */ - if (irq->delivery_mode == APIC_DM_LOWEST || - irq->dest_id == 0xff) { - __set_bit(irq->vector, - (unsigned long *)eoi_exit_bitmap); - goto out; - } - dst = &map->phys_map[irq->dest_id & 0xff]; - } else { - u32 mda = irq->dest_id << (32 - map->ldr_bits); - - dst = map->logical_map[apic_cluster_id(map, mda)]; - - bitmap = apic_logical_id(map, mda); - } - - for_each_set_bit(i, &bitmap, 16) { - if (!dst[i]) - continue; - if (dst[i]->vcpu == vcpu) { - __set_bit(irq->vector, - (unsigned long *)eoi_exit_bitmap); - break; - } - } - -out: - rcu_read_unlock(); -} - static void recalculate_apic_map(struct kvm *kvm) { struct kvm_apic_map *new, *old = NULL; @@ -256,7 +217,7 @@ out: if (old) kfree_rcu(old, rcu); - kvm_ioapic_make_eoibitmap_request(kvm); + kvm_vcpu_request_scan_ioapic(kvm); } static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) @@ -357,6 +318,19 @@ static u8 count_vectors(void *bitmap) return count; } +void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) +{ + u32 i, pir_val; + struct kvm_lapic *apic = vcpu->arch.apic; + + for (i = 0; i <= 7; i++) { + pir_val = xchg(&pir[i], 0); + if (pir_val) + *((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val; + } +} +EXPORT_SYMBOL_GPL(kvm_apic_update_irr); + static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) { apic->irr_pending = true; @@ -379,6 +353,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) if (!apic->irr_pending) return -1; + kvm_x86_ops->sync_pir_to_irr(apic->vcpu); result = apic_search_irr(apic); ASSERT(result == -1 || result >= 16); @@ -431,14 +406,16 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) } static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, - int vector, int level, int trig_mode); + int vector, int level, int trig_mode, + unsigned long *dest_map); -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, + unsigned long *dest_map) { struct kvm_lapic *apic = vcpu->arch.apic; return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, - irq->level, irq->trig_mode); + irq->level, irq->trig_mode, dest_map); } static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) @@ -505,6 +482,15 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic) return result; } +void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + int i; + + for (i = 0; i < 8; i++) + apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]); +} + static void apic_update_ppr(struct kvm_lapic *apic) { u32 tpr, isrv, ppr, old_ppr; @@ -611,7 +597,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, } bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, int *r) + struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map) { struct kvm_apic_map *map; unsigned long bitmap = 1; @@ -622,7 +608,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, *r = -1; if (irq->shorthand == APIC_DEST_SELF) { - *r = kvm_apic_set_irq(src->vcpu, irq); + *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); return true; } @@ -667,7 +653,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, continue; if (*r < 0) *r = 0; - *r += kvm_apic_set_irq(dst[i]->vcpu, irq); + *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); } ret = true; @@ -681,7 +667,8 @@ out: * Return 1 if successfully added and 0 if discarded. */ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, - int vector, int level, int trig_mode) + int vector, int level, int trig_mode, + unsigned long *dest_map) { int result = 0; struct kvm_vcpu *vcpu = apic->vcpu; @@ -694,24 +681,28 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, if (unlikely(!apic_enabled(apic))) break; - if (trig_mode) { - apic_debug("level trig mode for vector %d", vector); - apic_set_vector(vector, apic->regs + APIC_TMR); - } else - apic_clear_vector(vector, apic->regs + APIC_TMR); + if (dest_map) + __set_bit(vcpu->vcpu_id, dest_map); - result = !apic_test_and_set_irr(vector, apic); - trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, - trig_mode, vector, !result); - if (!result) { - if (trig_mode) - apic_debug("level trig mode repeatedly for " - "vector %d", vector); - break; - } + if (kvm_x86_ops->deliver_posted_interrupt) { + result = 1; + kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); + } else { + result = !apic_test_and_set_irr(vector, apic); - kvm_make_request(KVM_REQ_EVENT, vcpu); - kvm_vcpu_kick(vcpu); + if (!result) { + if (trig_mode) + apic_debug("level trig mode repeatedly " + "for vector %d", vector); + goto out; + } + + kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_vcpu_kick(vcpu); + } +out: + trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, + trig_mode, vector, !result); break; case APIC_DM_REMRD: @@ -731,7 +722,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_INIT: if (!trig_mode || level) { result = 1; - vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + /* assumes that there are only KVM_APIC_INIT/SIPI */ + apic->pending_events = (1UL << KVM_APIC_INIT); + /* make sure pending_events is visible before sending + * the request */ + smp_wmb(); kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_vcpu_kick(vcpu); } else { @@ -743,13 +738,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_STARTUP: apic_debug("SIPI to vcpu %d vector 0x%02x\n", vcpu->vcpu_id, vector); - if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { - result = 1; - vcpu->arch.sipi_vector = vector; - vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; - kvm_make_request(KVM_REQ_EVENT, vcpu); - kvm_vcpu_kick(vcpu); - } + result = 1; + apic->sipi_vector = vector; + /* make sure sipi_vector is visible for the receiver */ + smp_wmb(); + set_bit(KVM_APIC_SIPI, &apic->pending_events); + kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_vcpu_kick(vcpu); break; case APIC_DM_EXTINT: @@ -782,7 +777,7 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) trigger_mode = IOAPIC_LEVEL_TRIG; else trigger_mode = IOAPIC_EDGE_TRIG; - kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); + kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); } } @@ -848,7 +843,7 @@ static void apic_send_ipi(struct kvm_lapic *apic) irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, irq.vector); - kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); + kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); } static u32 apic_get_tmcct(struct kvm_lapic *apic) @@ -1484,7 +1479,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) vector = reg & APIC_VECTOR_MASK; mode = reg & APIC_MODE_MASK; trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; - return __apic_accept_irq(apic, mode, vector, 1, trig_mode); + return __apic_accept_irq(apic, mode, vector, 1, trig_mode, + NULL); } return 0; } @@ -1654,6 +1650,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, apic->highest_isr_cache = -1; kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_rtc_eoi_tracking_restore_one(vcpu); } void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) @@ -1860,6 +1857,34 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) addr, sizeof(u8)); } +void kvm_apic_accept_events(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + unsigned int sipi_vector; + + if (!kvm_vcpu_has_lapic(vcpu)) + return; + + if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) { + kvm_lapic_reset(vcpu); + kvm_vcpu_reset(vcpu); + if (kvm_vcpu_is_bsp(apic->vcpu)) + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + else + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + } + if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events) && + vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + /* evaluate pending_events before reading the vector */ + smp_rmb(); + sipi_vector = apic->sipi_vector; + pr_debug("vcpu %d received sipi with vector # %x\n", + vcpu->vcpu_id, sipi_vector); + kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + } +} + void kvm_lapic_init(void) { /* do not patch jump label more than once per second */ diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 1676d34ddb4..c730ac9fe80 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -5,6 +5,9 @@ #include <linux/kvm_host.h> +#define KVM_APIC_INIT 0 +#define KVM_APIC_SIPI 1 + struct kvm_timer { struct hrtimer timer; s64 period; /* unit: ns */ @@ -32,6 +35,8 @@ struct kvm_lapic { void *regs; gpa_t vapic_addr; struct page *vapic_page; + unsigned long pending_events; + unsigned int sipi_vector; }; int kvm_create_lapic(struct kvm_vcpu *vcpu); void kvm_free_lapic(struct kvm_vcpu *vcpu); @@ -39,6 +44,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu); int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); +void kvm_apic_accept_events(struct kvm_vcpu *vcpu); void kvm_lapic_reset(struct kvm_vcpu *vcpu); u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); @@ -47,13 +53,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); void kvm_apic_set_version(struct kvm_vcpu *vcpu); +void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr); +void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, + unsigned long *dest_map); int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, int *r); + struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map); u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); @@ -154,8 +163,11 @@ static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) return ldr & map->lid_mask; } -void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, - struct kvm_lapic_irq *irq, - u64 *eoi_bitmap); +static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.apic->pending_events; +} + +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 956ca358108..004cc87b781 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -199,8 +199,11 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access) { + struct kvm_mmu_page *sp = page_header(__pa(sptep)); + access &= ACC_WRITE_MASK | ACC_USER_MASK; + sp->mmio_cached = true; trace_mark_mmio_spte(sptep, gfn, access); mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT); } @@ -1502,6 +1505,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte, int direct) { struct kvm_mmu_page *sp; + sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); if (!direct) @@ -1644,16 +1648,14 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list); -#define for_each_gfn_sp(kvm, sp, gfn) \ - hlist_for_each_entry(sp, \ - &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ - if ((sp)->gfn != (gfn)) {} else +#define for_each_gfn_sp(_kvm, _sp, _gfn) \ + hlist_for_each_entry(_sp, \ + &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ + if ((_sp)->gfn != (_gfn)) {} else -#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn) \ - hlist_for_each_entry(sp, \ - &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ - if ((sp)->gfn != (gfn) || (sp)->role.direct || \ - (sp)->role.invalid) {} else +#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \ + for_each_gfn_sp(_kvm, _sp, _gfn) \ + if ((_sp)->role.direct || (_sp)->role.invalid) {} else /* @sp->gfn should be write-protected at the call site */ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, @@ -2089,7 +2091,7 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list) { - struct kvm_mmu_page *sp; + struct kvm_mmu_page *sp, *nsp; if (list_empty(invalid_list)) return; @@ -2106,11 +2108,25 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, */ kvm_flush_remote_tlbs(kvm); - do { - sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); + list_for_each_entry_safe(sp, nsp, invalid_list, link) { WARN_ON(!sp->role.invalid || sp->root_count); kvm_mmu_free_page(sp); - } while (!list_empty(invalid_list)); + } +} + +static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, + struct list_head *invalid_list) +{ + struct kvm_mmu_page *sp; + + if (list_empty(&kvm->arch.active_mmu_pages)) + return false; + + sp = list_entry(kvm->arch.active_mmu_pages.prev, + struct kvm_mmu_page, link); + kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); + + return true; } /* @@ -2120,23 +2136,15 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) { LIST_HEAD(invalid_list); - /* - * If we set the number of mmu pages to be smaller be than the - * number of actived pages , we must to free some mmu pages before we - * change the value - */ spin_lock(&kvm->mmu_lock); if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { - while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages && - !list_empty(&kvm->arch.active_mmu_pages)) { - struct kvm_mmu_page *page; + /* Need to free some mmu pages to achieve the goal. */ + while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) + if (!prepare_zap_oldest_mmu_page(kvm, &invalid_list)) + break; - page = container_of(kvm->arch.active_mmu_pages.prev, - struct kvm_mmu_page, link); - kvm_mmu_prepare_zap_page(kvm, page, &invalid_list); - } kvm_mmu_commit_zap_page(kvm, &invalid_list); goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages; } @@ -2794,6 +2802,7 @@ exit: static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, gva_t gva, pfn_t *pfn, bool write, bool *writable); +static void make_mmu_pages_available(struct kvm_vcpu *vcpu); static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, gfn_t gfn, bool prefault) @@ -2835,7 +2844,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; - kvm_mmu_free_some_pages(vcpu); + make_mmu_pages_available(vcpu); if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, @@ -2913,7 +2922,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { spin_lock(&vcpu->kvm->mmu_lock); - kvm_mmu_free_some_pages(vcpu); + make_mmu_pages_available(vcpu); sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL, NULL); ++sp->root_count; @@ -2925,7 +2934,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); spin_lock(&vcpu->kvm->mmu_lock); - kvm_mmu_free_some_pages(vcpu); + make_mmu_pages_available(vcpu); sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL, @@ -2964,7 +2973,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); spin_lock(&vcpu->kvm->mmu_lock); - kvm_mmu_free_some_pages(vcpu); + make_mmu_pages_available(vcpu); sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, 0, ACC_ALL, NULL); root = __pa(sp->spt); @@ -2998,7 +3007,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) return 1; } spin_lock(&vcpu->kvm->mmu_lock); - kvm_mmu_free_some_pages(vcpu); + make_mmu_pages_available(vcpu); sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, 0, ACC_ALL, NULL); @@ -3304,7 +3313,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; - kvm_mmu_free_some_pages(vcpu); + make_mmu_pages_available(vcpu); if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); r = __direct_map(vcpu, gpa, write, map_writable, @@ -4006,17 +4015,17 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) } EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); -void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) +static void make_mmu_pages_available(struct kvm_vcpu *vcpu) { LIST_HEAD(invalid_list); - while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES && - !list_empty(&vcpu->kvm->arch.active_mmu_pages)) { - struct kvm_mmu_page *sp; + if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES)) + return; + + while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) { + if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list)) + break; - sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, - struct kvm_mmu_page, link); - kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); ++vcpu->kvm->stat.mmu_recycled; } kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); @@ -4185,17 +4194,22 @@ restart: spin_unlock(&kvm->mmu_lock); } -static void kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, - struct list_head *invalid_list) +void kvm_mmu_zap_mmio_sptes(struct kvm *kvm) { - struct kvm_mmu_page *page; + struct kvm_mmu_page *sp, *node; + LIST_HEAD(invalid_list); - if (list_empty(&kvm->arch.active_mmu_pages)) - return; + spin_lock(&kvm->mmu_lock); +restart: + list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { + if (!sp->mmio_cached) + continue; + if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) + goto restart; + } - page = container_of(kvm->arch.active_mmu_pages.prev, - struct kvm_mmu_page, link); - kvm_mmu_prepare_zap_page(kvm, page, invalid_list); + kvm_mmu_commit_zap_page(kvm, &invalid_list); + spin_unlock(&kvm->mmu_lock); } static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) @@ -4232,7 +4246,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); - kvm_mmu_remove_some_alloc_mmu_pages(kvm, &invalid_list); + prepare_zap_oldest_mmu_page(kvm, &invalid_list); kvm_mmu_commit_zap_page(kvm, &invalid_list); spin_unlock(&kvm->mmu_lock); diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 69871080e86..2adcbc2cac6 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -57,14 +57,11 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) { - return kvm->arch.n_max_mmu_pages - - kvm->arch.n_used_mmu_pages; -} + if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages) + return kvm->arch.n_max_mmu_pages - + kvm->arch.n_used_mmu_pages; -static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) -{ - if (unlikely(kvm_mmu_available_pages(vcpu->kvm)< KVM_MIN_FREE_MMU_PAGES)) - __kvm_mmu_free_some_pages(vcpu); + return 0; } static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 105dd5bd550..da20860b457 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -627,7 +627,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, goto out_unlock; kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); - kvm_mmu_free_some_pages(vcpu); + make_mmu_pages_available(vcpu); if (!force_pt_level) transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); r = FNAME(fetch)(vcpu, addr, &walker, write_fault, diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index cfc258a6bf9..c53e797e736 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -360,10 +360,12 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) return 1; } -int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) +int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc; + u32 index = msr_info->index; + u64 data = msr_info->data; switch (index) { case MSR_CORE_PERF_FIXED_CTR_CTRL: @@ -375,6 +377,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) } break; case MSR_CORE_PERF_GLOBAL_STATUS: + if (msr_info->host_initiated) { + pmu->global_status = data; + return 0; + } break; /* RO MSR */ case MSR_CORE_PERF_GLOBAL_CTRL: if (pmu->global_ctrl == data) @@ -386,7 +392,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) break; case MSR_CORE_PERF_GLOBAL_OVF_CTRL: if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) { - pmu->global_status &= ~data; + if (!msr_info->host_initiated) + pmu->global_status &= ~data; pmu->global_ovf_ctrl = data; return 0; } @@ -394,7 +401,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) default: if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || (pmc = get_fixed_pmc(pmu, index))) { - data = (s64)(s32)data; + if (!msr_info->host_initiated) + data = (s64)(s32)data; pmc->counter += data - read_pmc(pmc); return 0; } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7d39d70647e..a14a6eaf871 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1131,17 +1131,11 @@ static void init_vmcb(struct vcpu_svm *svm) init_seg(&save->gs); save->cs.selector = 0xf000; + save->cs.base = 0xffff0000; /* Executable/Readable Code Segment */ save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK; save->cs.limit = 0xffff; - /* - * cs.base should really be 0xffff0000, but vmx can't handle that, so - * be consistent with it. - * - * Replace when we have real mode working for vmx. - */ - save->cs.base = 0xf0000; save->gdtr.limit = 0xffff; save->idtr.limit = 0xffff; @@ -1191,7 +1185,7 @@ static void init_vmcb(struct vcpu_svm *svm) enable_gif(svm); } -static int svm_vcpu_reset(struct kvm_vcpu *vcpu) +static void svm_vcpu_reset(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); u32 dummy; @@ -1199,16 +1193,8 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu) init_vmcb(svm); - if (!kvm_vcpu_is_bsp(vcpu)) { - kvm_rip_write(vcpu, 0); - svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12; - svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8; - } - kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); kvm_register_write(vcpu, VCPU_REGS_RDX, eax); - - return 0; } static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) @@ -3487,7 +3473,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) - printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " + printk(KERN_ERR "%s: unexpected exit_int_info 0x%x " "exit_code 0x%x\n", __func__, svm->vmcb->control.exit_int_info, exit_code); @@ -3591,6 +3577,11 @@ static void svm_hwapic_isr_update(struct kvm *kvm, int isr) return; } +static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) +{ + return; +} + static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3641,7 +3632,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) return ret; } -static void enable_irq_window(struct kvm_vcpu *vcpu) +static int enable_irq_window(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3655,15 +3646,16 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) svm_set_vintr(svm); svm_inject_irq(svm, 0x0); } + return 0; } -static void enable_nmi_window(struct kvm_vcpu *vcpu) +static int enable_nmi_window(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) == HF_NMI_MASK) - return; /* IRET will cause a vm exit */ + return 0; /* IRET will cause a vm exit */ /* * Something prevents NMI from been injected. Single step over possible @@ -3672,6 +3664,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) svm->nmi_singlestep = true; svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); update_db_bp_intercept(vcpu); + return 0; } static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) @@ -4247,6 +4240,11 @@ out: return ret; } +static void svm_handle_external_intr(struct kvm_vcpu *vcpu) +{ + local_irq_enable(); +} + static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -4314,6 +4312,7 @@ static struct kvm_x86_ops svm_x86_ops = { .vm_has_apicv = svm_vm_has_apicv, .load_eoi_exitmap = svm_load_eoi_exitmap, .hwapic_isr_update = svm_hwapic_isr_update, + .sync_pir_to_irr = svm_sync_pir_to_irr, .set_tss_addr = svm_set_tss_addr, .get_tdp_level = get_npt_level, @@ -4342,6 +4341,7 @@ static struct kvm_x86_ops svm_x86_ops = { .set_tdp_cr3 = set_tdp_cr3, .check_intercept = svm_check_intercept, + .handle_external_intr = svm_handle_external_intr, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 867b81037f9..25a791ed21c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -84,8 +84,11 @@ module_param(vmm_exclusive, bool, S_IRUGO); static bool __read_mostly fasteoi = 1; module_param(fasteoi, bool, S_IRUGO); -static bool __read_mostly enable_apicv_reg_vid; +static bool __read_mostly enable_apicv = 1; +module_param(enable_apicv, bool, S_IRUGO); +static bool __read_mostly enable_shadow_vmcs = 1; +module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO); /* * If nested=1, nested virtualization is supported, i.e., guests may use * VMX and be a hypervisor for its own guests. If nested=0, guests may not @@ -298,7 +301,8 @@ struct __packed vmcs12 { u32 guest_activity_state; u32 guest_sysenter_cs; u32 host_ia32_sysenter_cs; - u32 padding32[8]; /* room for future expansion */ + u32 vmx_preemption_timer_value; + u32 padding32[7]; /* room for future expansion */ u16 virtual_processor_id; u16 guest_es_selector; u16 guest_cs_selector; @@ -351,6 +355,12 @@ struct nested_vmx { /* The host-usable pointer to the above */ struct page *current_vmcs12_page; struct vmcs12 *current_vmcs12; + struct vmcs *current_shadow_vmcs; + /* + * Indicates if the shadow vmcs must be updated with the + * data hold by vmcs12 + */ + bool sync_shadow_vmcs; /* vmcs02_list cache of VMCSs recently used to run L2 guests */ struct list_head vmcs02_pool; @@ -365,6 +375,31 @@ struct nested_vmx { struct page *apic_access_page; }; +#define POSTED_INTR_ON 0 +/* Posted-Interrupt Descriptor */ +struct pi_desc { + u32 pir[8]; /* Posted interrupt requested */ + u32 control; /* bit 0 of control is outstanding notification bit */ + u32 rsvd[7]; +} __aligned(64); + +static bool pi_test_and_set_on(struct pi_desc *pi_desc) +{ + return test_and_set_bit(POSTED_INTR_ON, + (unsigned long *)&pi_desc->control); +} + +static bool pi_test_and_clear_on(struct pi_desc *pi_desc) +{ + return test_and_clear_bit(POSTED_INTR_ON, + (unsigned long *)&pi_desc->control); +} + +static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) +{ + return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); +} + struct vcpu_vmx { struct kvm_vcpu vcpu; unsigned long host_rsp; @@ -377,6 +412,7 @@ struct vcpu_vmx { struct shared_msr_entry *guest_msrs; int nmsrs; int save_nmsrs; + unsigned long host_idt_base; #ifdef CONFIG_X86_64 u64 msr_host_kernel_gs_base; u64 msr_guest_kernel_gs_base; @@ -428,6 +464,9 @@ struct vcpu_vmx { bool rdtscp_enabled; + /* Posted interrupt descriptor */ + struct pi_desc pi_desc; + /* Support for a guest hypervisor (nested VMX) */ struct nested_vmx nested; }; @@ -451,6 +490,64 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ [number##_HIGH] = VMCS12_OFFSET(name)+4 + +static const unsigned long shadow_read_only_fields[] = { + /* + * We do NOT shadow fields that are modified when L0 + * traps and emulates any vmx instruction (e.g. VMPTRLD, + * VMXON...) executed by L1. + * For example, VM_INSTRUCTION_ERROR is read + * by L1 if a vmx instruction fails (part of the error path). + * Note the code assumes this logic. If for some reason + * we start shadowing these fields then we need to + * force a shadow sync when L0 emulates vmx instructions + * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified + * by nested_vmx_failValid) + */ + VM_EXIT_REASON, + VM_EXIT_INTR_INFO, + VM_EXIT_INSTRUCTION_LEN, + IDT_VECTORING_INFO_FIELD, + IDT_VECTORING_ERROR_CODE, + VM_EXIT_INTR_ERROR_CODE, + EXIT_QUALIFICATION, + GUEST_LINEAR_ADDRESS, + GUEST_PHYSICAL_ADDRESS +}; +static const int max_shadow_read_only_fields = + ARRAY_SIZE(shadow_read_only_fields); + +static const unsigned long shadow_read_write_fields[] = { + GUEST_RIP, + GUEST_RSP, + GUEST_CR0, + GUEST_CR3, + GUEST_CR4, + GUEST_INTERRUPTIBILITY_INFO, + GUEST_RFLAGS, + GUEST_CS_SELECTOR, + GUEST_CS_AR_BYTES, + GUEST_CS_LIMIT, + GUEST_CS_BASE, + GUEST_ES_BASE, + CR0_GUEST_HOST_MASK, + CR0_READ_SHADOW, + CR4_READ_SHADOW, + TSC_OFFSET, + EXCEPTION_BITMAP, + CPU_BASED_VM_EXEC_CONTROL, + VM_ENTRY_EXCEPTION_ERROR_CODE, + VM_ENTRY_INTR_INFO_FIELD, + VM_ENTRY_INSTRUCTION_LEN, + VM_ENTRY_EXCEPTION_ERROR_CODE, + HOST_FS_BASE, + HOST_GS_BASE, + HOST_FS_SELECTOR, + HOST_GS_SELECTOR +}; +static const int max_shadow_read_write_fields = + ARRAY_SIZE(shadow_read_write_fields); + static const unsigned short vmcs_field_to_offset_table[] = { FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), FIELD(GUEST_ES_SELECTOR, guest_es_selector), @@ -537,6 +634,7 @@ static const unsigned short vmcs_field_to_offset_table[] = { FIELD(GUEST_ACTIVITY_STATE, guest_activity_state), FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs), FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs), + FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value), FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask), FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask), FIELD(CR0_READ_SHADOW, cr0_read_shadow), @@ -624,6 +722,9 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); static bool guest_state_valid(struct kvm_vcpu *vcpu); static u32 vmx_segment_access_rights(struct kvm_segment *var); +static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); +static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); +static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -640,6 +741,8 @@ static unsigned long *vmx_msr_bitmap_legacy; static unsigned long *vmx_msr_bitmap_longmode; static unsigned long *vmx_msr_bitmap_legacy_x2apic; static unsigned long *vmx_msr_bitmap_longmode_x2apic; +static unsigned long *vmx_vmread_bitmap; +static unsigned long *vmx_vmwrite_bitmap; static bool cpu_has_load_ia32_efer; static bool cpu_has_load_perf_global_ctrl; @@ -782,6 +885,18 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void) SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; } +static inline bool cpu_has_vmx_posted_intr(void) +{ + return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; +} + +static inline bool cpu_has_vmx_apicv(void) +{ + return cpu_has_vmx_apic_register_virt() && + cpu_has_vmx_virtual_intr_delivery() && + cpu_has_vmx_posted_intr(); +} + static inline bool cpu_has_vmx_flexpriority(void) { return cpu_has_vmx_tpr_shadow() && @@ -895,6 +1010,18 @@ static inline bool cpu_has_vmx_wbinvd_exit(void) SECONDARY_EXEC_WBINVD_EXITING; } +static inline bool cpu_has_vmx_shadow_vmcs(void) +{ + u64 vmx_msr; + rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); + /* check if the cpu supports writing r/o exit information fields */ + if (!(vmx_msr & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS)) + return false; + + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_SHADOW_VMCS; +} + static inline bool report_flexpriority(void) { return flexpriority_enabled; @@ -1790,7 +1917,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, u32 intr_info = nr | INTR_INFO_VALID_MASK; if (nr == PF_VECTOR && is_guest_mode(vcpu) && - nested_pf_handled(vcpu)) + !vmx->nested.nested_run_pending && nested_pf_handled(vcpu)) return; if (has_error_code) { @@ -2022,6 +2149,7 @@ static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high; static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; +static u32 nested_vmx_misc_low, nested_vmx_misc_high; static __init void nested_vmx_setup_ctls_msrs(void) { /* @@ -2040,30 +2168,40 @@ static __init void nested_vmx_setup_ctls_msrs(void) */ /* pin-based controls */ + rdmsr(MSR_IA32_VMX_PINBASED_CTLS, + nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high); /* * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR. */ - nested_vmx_pinbased_ctls_low = 0x16 ; - nested_vmx_pinbased_ctls_high = 0x16 | - PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | - PIN_BASED_VIRTUAL_NMIS; + nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; + nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | + PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS | + PIN_BASED_VMX_PREEMPTION_TIMER; + nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; - /* exit controls */ - nested_vmx_exit_ctls_low = 0; + /* + * Exit controls + * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and + * 17 must be 1. + */ + nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ #ifdef CONFIG_X86_64 nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE; #else nested_vmx_exit_ctls_high = 0; #endif + nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; /* entry controls */ rdmsr(MSR_IA32_VMX_ENTRY_CTLS, nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); - nested_vmx_entry_ctls_low = 0; + /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ + nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; nested_vmx_entry_ctls_high &= VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE; + nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; /* cpu-based controls */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, @@ -2080,6 +2218,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | + CPU_BASED_PAUSE_EXITING | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; /* * We can allow some features even when not supported by the @@ -2094,7 +2233,14 @@ static __init void nested_vmx_setup_ctls_msrs(void) nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high); nested_vmx_secondary_ctls_low = 0; nested_vmx_secondary_ctls_high &= - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_WBINVD_EXITING; + + /* miscellaneous data */ + rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); + nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | + VMX_MISC_SAVE_EFER_LMA; + nested_vmx_misc_high = 0; } static inline bool vmx_control_verify(u32 control, u32 low, u32 high) @@ -2165,7 +2311,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) nested_vmx_entry_ctls_high); break; case MSR_IA32_VMX_MISC: - *pdata = 0; + *pdata = vmx_control_msr(nested_vmx_misc_low, + nested_vmx_misc_high); break; /* * These MSRs specify bits which the guest must keep fixed (on or off) @@ -2529,12 +2676,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) u32 _vmexit_control = 0; u32 _vmentry_control = 0; - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; - opt = PIN_BASED_VIRTUAL_NMIS; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, - &_pin_based_exec_control) < 0) - return -EIO; - min = CPU_BASED_HLT_EXITING | #ifdef CONFIG_X86_64 CPU_BASED_CR8_LOAD_EXITING | @@ -2573,7 +2714,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_ENABLE_INVPCID | SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + SECONDARY_EXEC_SHADOW_VMCS; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) @@ -2605,11 +2747,23 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) #ifdef CONFIG_X86_64 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; #endif - opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT; + opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT | + VM_EXIT_ACK_INTR_ON_EXIT; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, &_vmexit_control) < 0) return -EIO; + min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; + opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR; + if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, + &_pin_based_exec_control) < 0) + return -EIO; + + if (!(_cpu_based_2nd_exec_control & + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) || + !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT)) + _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; + min = 0; opt = VM_ENTRY_LOAD_IA32_PAT; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, @@ -2762,6 +2916,8 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_vpid()) enable_vpid = 0; + if (!cpu_has_vmx_shadow_vmcs()) + enable_shadow_vmcs = 0; if (!cpu_has_vmx_ept() || !cpu_has_vmx_ept_4levels()) { @@ -2788,14 +2944,16 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_ple()) ple_gap = 0; - if (!cpu_has_vmx_apic_register_virt() || - !cpu_has_vmx_virtual_intr_delivery()) - enable_apicv_reg_vid = 0; + if (!cpu_has_vmx_apicv()) + enable_apicv = 0; - if (enable_apicv_reg_vid) + if (enable_apicv) kvm_x86_ops->update_cr8_intercept = NULL; - else + else { kvm_x86_ops->hwapic_irr_update = NULL; + kvm_x86_ops->deliver_posted_interrupt = NULL; + kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; + } if (nested) nested_vmx_setup_ctls_msrs(); @@ -2876,22 +3034,6 @@ static void enter_pmode(struct kvm_vcpu *vcpu) vmx->cpl = 0; } -static gva_t rmode_tss_base(struct kvm *kvm) -{ - if (!kvm->arch.tss_addr) { - struct kvm_memslots *slots; - struct kvm_memory_slot *slot; - gfn_t base_gfn; - - slots = kvm_memslots(kvm); - slot = id_to_memslot(slots, 0); - base_gfn = slot->base_gfn + slot->npages - 3; - - return base_gfn << PAGE_SHIFT; - } - return kvm->arch.tss_addr; -} - static void fix_rmode_seg(int seg, struct kvm_segment *save) { const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; @@ -2942,19 +3084,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu) /* * Very old userspace does not call KVM_SET_TSS_ADDR before entering - * vcpu. Call it here with phys address pointing 16M below 4G. + * vcpu. Warn the user that an update is overdue. */ - if (!vcpu->kvm->arch.tss_addr) { + if (!vcpu->kvm->arch.tss_addr) printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " "called before entering vcpu\n"); - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - vmx_set_tss_addr(vcpu->kvm, 0xfeffd000); - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - } vmx_segment_cache_clear(vmx); - vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); + vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr); vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); @@ -3214,7 +3352,9 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) */ if (!nested_vmx_allowed(vcpu)) return 1; - } else if (to_vmx(vcpu)->nested.vmxon) + } + if (to_vmx(vcpu)->nested.vmxon && + ((cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) return 1; vcpu->arch.cr4 = cr4; @@ -3550,7 +3690,7 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu) return true; /* real mode guest state checks */ - if (!is_protmode(vcpu)) { + if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) { if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) return false; if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) @@ -3599,7 +3739,7 @@ static int init_rmode_tss(struct kvm *kvm) int r, idx, ret = 0; idx = srcu_read_lock(&kvm->srcu); - fn = rmode_tss_base(kvm) >> PAGE_SHIFT; + fn = kvm->arch.tss_addr >> PAGE_SHIFT; r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); if (r < 0) goto out; @@ -3692,7 +3832,7 @@ static int alloc_apic_access_page(struct kvm *kvm) kvm_userspace_mem.flags = 0; kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); if (r) goto out; @@ -3722,7 +3862,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) kvm_userspace_mem.guest_phys_addr = kvm->arch.ept_identity_map_addr; kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); if (r) goto out; @@ -3869,13 +4009,59 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr) msr, MSR_TYPE_W); } +static int vmx_vm_has_apicv(struct kvm *kvm) +{ + return enable_apicv && irqchip_in_kernel(kvm); +} + +/* + * Send interrupt to vcpu via posted interrupt way. + * 1. If target vcpu is running(non-root mode), send posted interrupt + * notification to vcpu and hardware will sync PIR to vIRR atomically. + * 2. If target vcpu isn't running(root mode), kick it to pick up the + * interrupt from PIR in next vmentry. + */ +static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int r; + + if (pi_test_and_set_pir(vector, &vmx->pi_desc)) + return; + + r = pi_test_and_set_on(&vmx->pi_desc); + kvm_make_request(KVM_REQ_EVENT, vcpu); +#ifdef CONFIG_SMP + if (!r && (vcpu->mode == IN_GUEST_MODE)) + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), + POSTED_INTR_VECTOR); + else +#endif + kvm_vcpu_kick(vcpu); +} + +static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!pi_test_and_clear_on(&vmx->pi_desc)) + return; + + kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); +} + +static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu) +{ + return; +} + /* * Set up the vmcs's constant host-state fields, i.e., host-state fields that * will not change in the lifetime of the guest. * Note that host-state that does change is set elsewhere. E.g., host-state * that is set differently for each CPU is set in vmx_vcpu_load(), not here. */ -static void vmx_set_constant_host_state(void) +static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) { u32 low32, high32; unsigned long tmpl; @@ -3903,6 +4089,7 @@ static void vmx_set_constant_host_state(void) native_store_idt(&dt); vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ + vmx->host_idt_base = dt.address; vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */ @@ -3928,6 +4115,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); } +static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) +{ + u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; + + if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) + pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; + return pin_based_exec_ctrl; +} + static u32 vmx_exec_control(struct vcpu_vmx *vmx) { u32 exec_control = vmcs_config.cpu_based_exec_ctrl; @@ -3945,11 +4141,6 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) return exec_control; } -static int vmx_vm_has_apicv(struct kvm *kvm) -{ - return enable_apicv_reg_vid && irqchip_in_kernel(kvm); -} - static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) { u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; @@ -3971,6 +4162,12 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; + /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD + (handle_vmptrld). + We can NOT enable shadow_vmcs here because we don't have yet + a current VMCS12 + */ + exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; return exec_control; } @@ -3999,14 +4196,17 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a)); vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b)); + if (enable_shadow_vmcs) { + vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap)); + vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); + } if (cpu_has_vmx_msr_bitmap()) vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ /* Control */ - vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, - vmcs_config.pin_based_exec_ctrl); + vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); @@ -4015,13 +4215,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmx_secondary_exec_control(vmx)); } - if (enable_apicv_reg_vid) { + if (vmx_vm_has_apicv(vmx->vcpu.kvm)) { vmcs_write64(EOI_EXIT_BITMAP0, 0); vmcs_write64(EOI_EXIT_BITMAP1, 0); vmcs_write64(EOI_EXIT_BITMAP2, 0); vmcs_write64(EOI_EXIT_BITMAP3, 0); vmcs_write16(GUEST_INTR_STATUS, 0); + + vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); + vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); } if (ple_gap) { @@ -4035,7 +4238,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ - vmx_set_constant_host_state(); + vmx_set_constant_host_state(vmx); #ifdef CONFIG_X86_64 rdmsrl(MSR_FS_BASE, a); vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */ @@ -4089,11 +4292,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) return 0; } -static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) +static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u64 msr; - int ret; vmx->rmode.vm86_active = 0; @@ -4109,12 +4311,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx_segment_cache_clear(vmx); seg_setup(VCPU_SREG_CS); - if (kvm_vcpu_is_bsp(&vmx->vcpu)) - vmcs_write16(GUEST_CS_SELECTOR, 0xf000); - else { - vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); - vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); - } + vmcs_write16(GUEST_CS_SELECTOR, 0xf000); + vmcs_write32(GUEST_CS_BASE, 0xffff0000); seg_setup(VCPU_SREG_DS); seg_setup(VCPU_SREG_ES); @@ -4137,10 +4335,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_SYSENTER_EIP, 0); vmcs_writel(GUEST_RFLAGS, 0x02); - if (kvm_vcpu_is_bsp(&vmx->vcpu)) - kvm_rip_write(vcpu, 0xfff0); - else - kvm_rip_write(vcpu, 0); + kvm_rip_write(vcpu, 0xfff0); vmcs_writel(GUEST_GDTR_BASE, 0); vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); @@ -4171,23 +4366,20 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); + if (vmx_vm_has_apicv(vcpu->kvm)) + memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); + if (vmx->vpid != 0) vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); vmx_set_cr4(&vmx->vcpu, 0); vmx_set_efer(&vmx->vcpu, 0); vmx_fpu_activate(&vmx->vcpu); update_exception_bitmap(&vmx->vcpu); vpid_sync_context(vmx); - - ret = 0; - - return ret; } /* @@ -4200,40 +4392,45 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu) PIN_BASED_EXT_INTR_MASK; } -static void enable_irq_window(struct kvm_vcpu *vcpu) +static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) +{ + return get_vmcs12(vcpu)->pin_based_vm_exec_control & + PIN_BASED_NMI_EXITING; +} + +static int enable_irq_window(struct kvm_vcpu *vcpu) { u32 cpu_based_vm_exec_control; - if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { + + if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) /* * We get here if vmx_interrupt_allowed() said we can't - * inject to L1 now because L2 must run. Ask L2 to exit - * right after entry, so we can inject to L1 more promptly. + * inject to L1 now because L2 must run. The caller will have + * to make L2 exit right after entry, so we can inject to L1 + * more promptly. */ - kvm_make_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); - return; - } + return -EBUSY; cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + return 0; } -static void enable_nmi_window(struct kvm_vcpu *vcpu) +static int enable_nmi_window(struct kvm_vcpu *vcpu) { u32 cpu_based_vm_exec_control; - if (!cpu_has_virtual_nmis()) { - enable_irq_window(vcpu); - return; - } + if (!cpu_has_virtual_nmis()) + return enable_irq_window(vcpu); + + if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) + return enable_irq_window(vcpu); - if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { - enable_irq_window(vcpu); - return; - } cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + return 0; } static void vmx_inject_irq(struct kvm_vcpu *vcpu) @@ -4294,16 +4491,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); } -static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) -{ - if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) - return 0; - - return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI - | GUEST_INTR_STATE_NMI)); -} - static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) { if (!cpu_has_virtual_nmis()) @@ -4333,18 +4520,52 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) } } +static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) +{ + if (is_guest_mode(vcpu)) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + + if (to_vmx(vcpu)->nested.nested_run_pending) + return 0; + if (nested_exit_on_nmi(vcpu)) { + nested_vmx_vmexit(vcpu); + vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; + vmcs12->vm_exit_intr_info = NMI_VECTOR | + INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK; + /* + * The NMI-triggered VM exit counts as injection: + * clear this one and block further NMIs. + */ + vcpu->arch.nmi_pending = 0; + vmx_set_nmi_mask(vcpu, true); + return 0; + } + } + + if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) + return 0; + + return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI + | GUEST_INTR_STATE_NMI)); +} + static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { + if (is_guest_mode(vcpu)) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - if (to_vmx(vcpu)->nested.nested_run_pending || - (vmcs12->idt_vectoring_info_field & - VECTORING_INFO_VALID_MASK)) + + if (to_vmx(vcpu)->nested.nested_run_pending) return 0; - nested_vmx_vmexit(vcpu); - vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; - vmcs12->vm_exit_intr_info = 0; - /* fall through to normal code, but now in L1, not L2 */ + if (nested_exit_on_intr(vcpu)) { + nested_vmx_vmexit(vcpu); + vmcs12->vm_exit_reason = + EXIT_REASON_EXTERNAL_INTERRUPT; + vmcs12->vm_exit_intr_info = 0; + /* + * fall through to normal code, but now in L1, not L2 + */ + } } return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && @@ -4362,7 +4583,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) .flags = 0, }; - ret = kvm_set_memory_region(kvm, &tss_mem, false); + ret = kvm_set_memory_region(kvm, &tss_mem); if (ret) return ret; kvm->arch.tss_addr = addr; @@ -4603,34 +4824,50 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) { - if (to_vmx(vcpu)->nested.vmxon && - ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) - return 1; - if (is_guest_mode(vcpu)) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned long orig_val = val; + /* * We get here when L2 changed cr0 in a way that did not change * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), - * but did change L0 shadowed bits. This can currently happen - * with the TS bit: L0 may want to leave TS on (for lazy fpu - * loading) while pretending to allow the guest to change it. + * but did change L0 shadowed bits. So we first calculate the + * effective cr0 value that L1 would like to write into the + * hardware. It consists of the L2-owned bits from the new + * value combined with the L1-owned bits from L1's guest_cr0. */ - if (kvm_set_cr0(vcpu, (val & vcpu->arch.cr0_guest_owned_bits) | - (vcpu->arch.cr0 & ~vcpu->arch.cr0_guest_owned_bits))) + val = (val & ~vmcs12->cr0_guest_host_mask) | + (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); + + /* TODO: will have to take unrestricted guest mode into + * account */ + if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) return 1; - vmcs_writel(CR0_READ_SHADOW, val); + + if (kvm_set_cr0(vcpu, val)) + return 1; + vmcs_writel(CR0_READ_SHADOW, orig_val); return 0; - } else + } else { + if (to_vmx(vcpu)->nested.vmxon && + ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) + return 1; return kvm_set_cr0(vcpu, val); + } } static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) { if (is_guest_mode(vcpu)) { - if (kvm_set_cr4(vcpu, (val & vcpu->arch.cr4_guest_owned_bits) | - (vcpu->arch.cr4 & ~vcpu->arch.cr4_guest_owned_bits))) + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned long orig_val = val; + + /* analogously to handle_set_cr0 */ + val = (val & ~vmcs12->cr4_guest_host_mask) | + (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask); + if (kvm_set_cr4(vcpu, val)) return 1; - vmcs_writel(CR4_READ_SHADOW, val); + vmcs_writel(CR4_READ_SHADOW, orig_val); return 0; } else return kvm_set_cr4(vcpu, val); @@ -5183,7 +5420,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) return 1; - err = emulate_instruction(vcpu, 0); + err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); if (err == EMULATE_DO_MMIO) { ret = 0; @@ -5259,8 +5496,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) } /* Create a new VMCS */ - item = (struct vmcs02_list *) - kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); + item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); if (!item) return NULL; item->vmcs02.vmcs = alloc_vmcs(); @@ -5309,6 +5545,9 @@ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) free_loaded_vmcs(&vmx->vmcs01); } +static void nested_vmx_failValid(struct kvm_vcpu *vcpu, + u32 vm_instruction_error); + /* * Emulate the VMXON instruction. * Currently, we just remember that VMX is active, and do not save or even @@ -5321,6 +5560,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) { struct kvm_segment cs; struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs *shadow_vmcs; /* The Intel VMX Instruction Reference lists a bunch of bits that * are prerequisite to running VMXON, most notably cr4.VMXE must be @@ -5344,6 +5584,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu) kvm_inject_gp(vcpu, 0); return 1; } + if (vmx->nested.vmxon) { + nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION); + skip_emulated_instruction(vcpu); + return 1; + } + if (enable_shadow_vmcs) { + shadow_vmcs = alloc_vmcs(); + if (!shadow_vmcs) + return -ENOMEM; + /* mark vmcs as shadow */ + shadow_vmcs->revision_id |= (1u << 31); + /* init shadow vmcs */ + vmcs_clear(shadow_vmcs); + vmx->nested.current_shadow_vmcs = shadow_vmcs; + } INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); vmx->nested.vmcs02_num = 0; @@ -5384,6 +5639,25 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) return 1; } +static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) +{ + u32 exec_control; + if (enable_shadow_vmcs) { + if (vmx->nested.current_vmcs12 != NULL) { + /* copy to memory all shadowed fields in case + they were modified */ + copy_shadow_to_vmcs12(vmx); + vmx->nested.sync_shadow_vmcs = false; + exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); + vmcs_write64(VMCS_LINK_POINTER, -1ull); + } + } + kunmap(vmx->nested.current_vmcs12_page); + nested_release_page(vmx->nested.current_vmcs12_page); +} + /* * Free whatever needs to be freed from vmx->nested when L1 goes down, or * just stops using VMX. @@ -5394,11 +5668,12 @@ static void free_nested(struct vcpu_vmx *vmx) return; vmx->nested.vmxon = false; if (vmx->nested.current_vmptr != -1ull) { - kunmap(vmx->nested.current_vmcs12_page); - nested_release_page(vmx->nested.current_vmcs12_page); + nested_release_vmcs12(vmx); vmx->nested.current_vmptr = -1ull; vmx->nested.current_vmcs12 = NULL; } + if (enable_shadow_vmcs) + free_vmcs(vmx->nested.current_shadow_vmcs); /* Unpin physical memory we referred to in current vmcs02 */ if (vmx->nested.apic_access_page) { nested_release_page(vmx->nested.apic_access_page); @@ -5507,6 +5782,10 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu, X86_EFLAGS_SF | X86_EFLAGS_OF)) | X86_EFLAGS_ZF); get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; + /* + * We don't need to force a shadow sync because + * VM_INSTRUCTION_ERROR is not shadowed + */ } /* Emulate the VMCLEAR instruction */ @@ -5539,8 +5818,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) } if (vmptr == vmx->nested.current_vmptr) { - kunmap(vmx->nested.current_vmcs12_page); - nested_release_page(vmx->nested.current_vmcs12_page); + nested_release_vmcs12(vmx); vmx->nested.current_vmptr = -1ull; vmx->nested.current_vmcs12 = NULL; } @@ -5639,6 +5917,111 @@ static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu, } } + +static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu, + unsigned long field, u64 field_value){ + short offset = vmcs_field_to_offset(field); + char *p = ((char *) get_vmcs12(vcpu)) + offset; + if (offset < 0) + return false; + + switch (vmcs_field_type(field)) { + case VMCS_FIELD_TYPE_U16: + *(u16 *)p = field_value; + return true; + case VMCS_FIELD_TYPE_U32: + *(u32 *)p = field_value; + return true; + case VMCS_FIELD_TYPE_U64: + *(u64 *)p = field_value; + return true; + case VMCS_FIELD_TYPE_NATURAL_WIDTH: + *(natural_width *)p = field_value; + return true; + default: + return false; /* can never happen. */ + } + +} + +static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) +{ + int i; + unsigned long field; + u64 field_value; + struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; + unsigned long *fields = (unsigned long *)shadow_read_write_fields; + int num_fields = max_shadow_read_write_fields; + + vmcs_load(shadow_vmcs); + + for (i = 0; i < num_fields; i++) { + field = fields[i]; + switch (vmcs_field_type(field)) { + case VMCS_FIELD_TYPE_U16: + field_value = vmcs_read16(field); + break; + case VMCS_FIELD_TYPE_U32: + field_value = vmcs_read32(field); + break; + case VMCS_FIELD_TYPE_U64: + field_value = vmcs_read64(field); + break; + case VMCS_FIELD_TYPE_NATURAL_WIDTH: + field_value = vmcs_readl(field); + break; + } + vmcs12_write_any(&vmx->vcpu, field, field_value); + } + + vmcs_clear(shadow_vmcs); + vmcs_load(vmx->loaded_vmcs->vmcs); +} + +static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) +{ + unsigned long *fields[] = { + (unsigned long *)shadow_read_write_fields, + (unsigned long *)shadow_read_only_fields + }; + int num_lists = ARRAY_SIZE(fields); + int max_fields[] = { + max_shadow_read_write_fields, + max_shadow_read_only_fields + }; + int i, q; + unsigned long field; + u64 field_value = 0; + struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; + + vmcs_load(shadow_vmcs); + + for (q = 0; q < num_lists; q++) { + for (i = 0; i < max_fields[q]; i++) { + field = fields[q][i]; + vmcs12_read_any(&vmx->vcpu, field, &field_value); + + switch (vmcs_field_type(field)) { + case VMCS_FIELD_TYPE_U16: + vmcs_write16(field, (u16)field_value); + break; + case VMCS_FIELD_TYPE_U32: + vmcs_write32(field, (u32)field_value); + break; + case VMCS_FIELD_TYPE_U64: + vmcs_write64(field, (u64)field_value); + break; + case VMCS_FIELD_TYPE_NATURAL_WIDTH: + vmcs_writel(field, (long)field_value); + break; + } + } + } + + vmcs_clear(shadow_vmcs); + vmcs_load(vmx->loaded_vmcs->vmcs); +} + /* * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was * used before) all generate the same failure when it is missing. @@ -5703,8 +6086,6 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) gva_t gva; unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - char *p; - short offset; /* The value to write might be 32 or 64 bits, depending on L1's long * mode, and eventually we need to write that into a field of several * possible lengths. The code below first zero-extends the value to 64 @@ -5741,28 +6122,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) return 1; } - offset = vmcs_field_to_offset(field); - if (offset < 0) { - nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); - skip_emulated_instruction(vcpu); - return 1; - } - p = ((char *) get_vmcs12(vcpu)) + offset; - - switch (vmcs_field_type(field)) { - case VMCS_FIELD_TYPE_U16: - *(u16 *)p = field_value; - break; - case VMCS_FIELD_TYPE_U32: - *(u32 *)p = field_value; - break; - case VMCS_FIELD_TYPE_U64: - *(u64 *)p = field_value; - break; - case VMCS_FIELD_TYPE_NATURAL_WIDTH: - *(natural_width *)p = field_value; - break; - default: + if (!vmcs12_write_any(vcpu, field, field_value)) { nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); skip_emulated_instruction(vcpu); return 1; @@ -5780,6 +6140,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) gva_t gva; gpa_t vmptr; struct x86_exception e; + u32 exec_control; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -5818,14 +6179,20 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) skip_emulated_instruction(vcpu); return 1; } - if (vmx->nested.current_vmptr != -1ull) { - kunmap(vmx->nested.current_vmcs12_page); - nested_release_page(vmx->nested.current_vmcs12_page); - } + if (vmx->nested.current_vmptr != -1ull) + nested_release_vmcs12(vmx); vmx->nested.current_vmptr = vmptr; vmx->nested.current_vmcs12 = new_vmcs12; vmx->nested.current_vmcs12_page = page; + if (enable_shadow_vmcs) { + exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + exec_control |= SECONDARY_EXEC_SHADOW_VMCS; + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); + vmcs_write64(VMCS_LINK_POINTER, + __pa(vmx->nested.current_shadow_vmcs)); + vmx->nested.sync_shadow_vmcs = true; + } } nested_vmx_succeed(vcpu); @@ -5908,6 +6275,52 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { static const int kvm_vmx_max_exit_handlers = ARRAY_SIZE(kvm_vmx_exit_handlers); +static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + unsigned long exit_qualification; + gpa_t bitmap, last_bitmap; + unsigned int port; + int size; + u8 b; + + if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING)) + return 1; + + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) + return 0; + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + port = exit_qualification >> 16; + size = (exit_qualification & 7) + 1; + + last_bitmap = (gpa_t)-1; + b = -1; + + while (size > 0) { + if (port < 0x8000) + bitmap = vmcs12->io_bitmap_a; + else if (port < 0x10000) + bitmap = vmcs12->io_bitmap_b; + else + return 1; + bitmap += (port & 0x7fff) / 8; + + if (last_bitmap != bitmap) + if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1)) + return 1; + if (b & (1 << (port & 7))) + return 1; + + port++; + size--; + last_bitmap = bitmap; + } + + return 0; +} + /* * Return 1 if we should exit from L2 to L1 to handle an MSR access access, * rather than handle it ourselves in L0. I.e., check whether L1 expressed @@ -5939,7 +6352,8 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, /* Then read the msr_index'th bit from this bitmap: */ if (msr_index < 1024*8) { unsigned char b; - kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1); + if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1)) + return 1; return 1 & (b >> (msr_index & 7)); } else return 1; /* let L1 handle the wrong parameter */ @@ -6033,10 +6447,10 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, */ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) { - u32 exit_reason = vmcs_read32(VM_EXIT_REASON); u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + u32 exit_reason = vmx->exit_reason; if (vmx->nested.nested_run_pending) return 0; @@ -6060,14 +6474,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_TRIPLE_FAULT: return 1; case EXIT_REASON_PENDING_INTERRUPT: + return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING); case EXIT_REASON_NMI_WINDOW: - /* - * prepare_vmcs02() set the CPU_BASED_VIRTUAL_INTR_PENDING bit - * (aka Interrupt Window Exiting) only when L1 turned it on, - * so if we got a PENDING_INTERRUPT exit, this must be for L1. - * Same for NMI Window Exiting. - */ - return 1; + return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING); case EXIT_REASON_TASK_SWITCH: return 1; case EXIT_REASON_CPUID: @@ -6097,8 +6506,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_DR_ACCESS: return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING); case EXIT_REASON_IO_INSTRUCTION: - /* TODO: support IO bitmaps */ - return 1; + return nested_vmx_exit_handled_io(vcpu, vmcs12); case EXIT_REASON_MSR_READ: case EXIT_REASON_MSR_WRITE: return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); @@ -6122,6 +6530,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_EPT_VIOLATION: case EXIT_REASON_EPT_MISCONFIG: return 0; + case EXIT_REASON_PREEMPTION_TIMER: + return vmcs12->pin_based_vm_exec_control & + PIN_BASED_VMX_PREEMPTION_TIMER; case EXIT_REASON_WBINVD: return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); case EXIT_REASON_XSETBV: @@ -6316,6 +6727,9 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { + if (!vmx_vm_has_apicv(vcpu->kvm)) + return; + vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); @@ -6346,6 +6760,52 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) } } +static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) +{ + u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + /* + * If external interrupt exists, IF bit is set in rflags/eflags on the + * interrupt stack frame, and interrupt will be enabled on a return + * from interrupt handler. + */ + if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) + == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) { + unsigned int vector; + unsigned long entry; + gate_desc *desc; + struct vcpu_vmx *vmx = to_vmx(vcpu); +#ifdef CONFIG_X86_64 + unsigned long tmp; +#endif + + vector = exit_intr_info & INTR_INFO_VECTOR_MASK; + desc = (gate_desc *)vmx->host_idt_base + vector; + entry = gate_offset(*desc); + asm volatile( +#ifdef CONFIG_X86_64 + "mov %%" _ASM_SP ", %[sp]\n\t" + "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t" + "push $%c[ss]\n\t" + "push %[sp]\n\t" +#endif + "pushf\n\t" + "orl $0x200, (%%" _ASM_SP ")\n\t" + __ASM_SIZE(push) " $%c[cs]\n\t" + "call *%[entry]\n\t" + : +#ifdef CONFIG_X86_64 + [sp]"=&r"(tmp) +#endif + : + [entry]"r"(entry), + [ss]"i"(__KERNEL_DS), + [cs]"i"(__KERNEL_CS) + ); + } else + local_irq_enable(); +} + static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -6388,7 +6848,7 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); } -static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, +static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, u32 idt_vectoring_info, int instr_len_field, int error_code_field) @@ -6399,46 +6859,43 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; - vmx->vcpu.arch.nmi_injected = false; - kvm_clear_exception_queue(&vmx->vcpu); - kvm_clear_interrupt_queue(&vmx->vcpu); + vcpu->arch.nmi_injected = false; + kvm_clear_exception_queue(vcpu); + kvm_clear_interrupt_queue(vcpu); if (!idtv_info_valid) return; - kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu); + kvm_make_request(KVM_REQ_EVENT, vcpu); vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; switch (type) { case INTR_TYPE_NMI_INTR: - vmx->vcpu.arch.nmi_injected = true; + vcpu->arch.nmi_injected = true; /* * SDM 3: 27.7.1.2 (September 2008) * Clear bit "block by NMI" before VM entry if a NMI * delivery faulted. */ - vmx_set_nmi_mask(&vmx->vcpu, false); + vmx_set_nmi_mask(vcpu, false); break; case INTR_TYPE_SOFT_EXCEPTION: - vmx->vcpu.arch.event_exit_inst_len = - vmcs_read32(instr_len_field); + vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); /* fall through */ case INTR_TYPE_HARD_EXCEPTION: if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { u32 err = vmcs_read32(error_code_field); - kvm_queue_exception_e(&vmx->vcpu, vector, err); + kvm_queue_exception_e(vcpu, vector, err); } else - kvm_queue_exception(&vmx->vcpu, vector); + kvm_queue_exception(vcpu, vector); break; case INTR_TYPE_SOFT_INTR: - vmx->vcpu.arch.event_exit_inst_len = - vmcs_read32(instr_len_field); + vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); /* fall through */ case INTR_TYPE_EXT_INTR: - kvm_queue_interrupt(&vmx->vcpu, vector, - type == INTR_TYPE_SOFT_INTR); + kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR); break; default: break; @@ -6447,18 +6904,14 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, static void vmx_complete_interrupts(struct vcpu_vmx *vmx) { - if (is_guest_mode(&vmx->vcpu)) - return; - __vmx_complete_interrupts(vmx, vmx->idt_vectoring_info, + __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info, VM_EXIT_INSTRUCTION_LEN, IDT_VECTORING_ERROR_CODE); } static void vmx_cancel_injection(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu)) - return; - __vmx_complete_interrupts(to_vmx(vcpu), + __vmx_complete_interrupts(vcpu, vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), VM_ENTRY_INSTRUCTION_LEN, VM_ENTRY_EXCEPTION_ERROR_CODE); @@ -6489,21 +6942,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long debugctlmsr; - if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - if (vmcs12->idt_vectoring_info_field & - VECTORING_INFO_VALID_MASK) { - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - vmcs12->idt_vectoring_info_field); - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, - vmcs12->vm_exit_instruction_len); - if (vmcs12->idt_vectoring_info_field & - VECTORING_INFO_DELIVER_CODE_MASK) - vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, - vmcs12->idt_vectoring_error_code); - } - } - /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) vmx->entry_time = ktime_get(); @@ -6513,6 +6951,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (vmx->emulation_required) return; + if (vmx->nested.sync_shadow_vmcs) { + copy_vmcs12_to_shadow(vmx); + vmx->nested.sync_shadow_vmcs = false; + } + if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) @@ -6662,17 +7105,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); - if (is_guest_mode(vcpu)) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info; - if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) { - vmcs12->idt_vectoring_error_code = - vmcs_read32(IDT_VECTORING_ERROR_CODE); - vmcs12->vm_exit_instruction_len = - vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - } - } - vmx->loaded_vmcs->launched = 1; vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); @@ -6734,10 +7166,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) put_cpu(); if (err) goto free_vmcs; - if (vm_need_virtualize_apic_accesses(kvm)) + if (vm_need_virtualize_apic_accesses(kvm)) { err = alloc_apic_access_page(kvm); if (err) goto free_vmcs; + } if (enable_ept) { if (!kvm->arch.ept_identity_map_addr) @@ -6931,9 +7364,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->vm_entry_instruction_len); vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, vmcs12->guest_interruptibility_info); - vmcs_write32(GUEST_ACTIVITY_STATE, vmcs12->guest_activity_state); vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); - vmcs_writel(GUEST_DR7, vmcs12->guest_dr7); + kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags); vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, vmcs12->guest_pending_dbg_exceptions); @@ -6946,6 +7378,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) (vmcs_config.pin_based_exec_ctrl | vmcs12->pin_based_vm_exec_control)); + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, + vmcs12->vmx_preemption_timer_value); + /* * Whether page-faults are trapped is determined by a combination of * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. @@ -7016,7 +7452,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) * Other fields are different per CPU, and will be set later when * vmx_vcpu_load() is called, and when vmx_save_host_state() is called. */ - vmx_set_constant_host_state(); + vmx_set_constant_host_state(vmx); /* * HOST_RSP is normally set correctly in vmx_vcpu_run() just before @@ -7082,7 +7518,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) vcpu->arch.efer = vmcs12->guest_ia32_efer; - if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) + else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) vcpu->arch.efer |= (EFER_LMA | EFER_LME); else vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); @@ -7121,6 +7557,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) struct vcpu_vmx *vmx = to_vmx(vcpu); int cpu; struct loaded_vmcs *vmcs02; + bool ia32e; if (!nested_vmx_check_permission(vcpu) || !nested_vmx_check_vmcs12(vcpu)) @@ -7129,6 +7566,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) skip_emulated_instruction(vcpu); vmcs12 = get_vmcs12(vcpu); + if (enable_shadow_vmcs) + copy_shadow_to_vmcs12(vmx); + /* * The nested entry process starts with enforcing various prerequisites * on vmcs12 as required by the Intel SDM, and act appropriately when @@ -7146,6 +7586,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) return 1; } + if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) { + nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); + return 1; + } + if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) { /*TODO: Also verify bits beyond physical address width are 0*/ @@ -7204,6 +7649,45 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) } /* + * If the load IA32_EFER VM-entry control is 1, the following checks + * are performed on the field for the IA32_EFER MSR: + * - Bits reserved in the IA32_EFER MSR must be 0. + * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of + * the IA-32e mode guest VM-exit control. It must also be identical + * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to + * CR0.PG) is 1. + */ + if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) { + ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0; + if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) || + ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) || + ((vmcs12->guest_cr0 & X86_CR0_PG) && + ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) { + nested_vmx_entry_failure(vcpu, vmcs12, + EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); + return 1; + } + } + + /* + * If the load IA32_EFER VM-exit control is 1, bits reserved in the + * IA32_EFER MSR must be 0 in the field for that register. In addition, + * the values of the LMA and LME bits in the field must each be that of + * the host address-space size VM-exit control. + */ + if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) { + ia32e = (vmcs12->vm_exit_controls & + VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; + if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || + ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || + ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) { + nested_vmx_entry_failure(vcpu, vmcs12, + EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); + return 1; + } + } + + /* * We're finally done with prerequisite checking, and can start with * the nested entry. */ @@ -7223,6 +7707,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) vcpu->cpu = cpu; put_cpu(); + vmx_segment_cache_clear(vmx); + vmcs12->launch_state = 1; prepare_vmcs02(vcpu, vmcs12); @@ -7273,6 +7759,48 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vcpu->arch.cr4_guest_owned_bits)); } +static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + u32 idt_vectoring; + unsigned int nr; + + if (vcpu->arch.exception.pending) { + nr = vcpu->arch.exception.nr; + idt_vectoring = nr | VECTORING_INFO_VALID_MASK; + + if (kvm_exception_is_soft(nr)) { + vmcs12->vm_exit_instruction_len = + vcpu->arch.event_exit_inst_len; + idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION; + } else + idt_vectoring |= INTR_TYPE_HARD_EXCEPTION; + + if (vcpu->arch.exception.has_error_code) { + idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK; + vmcs12->idt_vectoring_error_code = + vcpu->arch.exception.error_code; + } + + vmcs12->idt_vectoring_info_field = idt_vectoring; + } else if (vcpu->arch.nmi_pending) { + vmcs12->idt_vectoring_info_field = + INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; + } else if (vcpu->arch.interrupt.pending) { + nr = vcpu->arch.interrupt.nr; + idt_vectoring = nr | VECTORING_INFO_VALID_MASK; + + if (vcpu->arch.interrupt.soft) { + idt_vectoring |= INTR_TYPE_SOFT_INTR; + vmcs12->vm_entry_instruction_len = + vcpu->arch.event_exit_inst_len; + } else + idt_vectoring |= INTR_TYPE_EXT_INTR; + + vmcs12->idt_vectoring_info_field = idt_vectoring; + } +} + /* * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), @@ -7284,7 +7812,7 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) * exit-information fields only. Other fields are modified by L1 with VMWRITE, * which already writes to vmcs12 directly. */ -void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) +static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { /* update guest state fields: */ vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); @@ -7332,16 +7860,19 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE); vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE); - vmcs12->guest_activity_state = vmcs_read32(GUEST_ACTIVITY_STATE); vmcs12->guest_interruptibility_info = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); vmcs12->guest_pending_dbg_exceptions = vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); + vmcs12->vm_entry_controls = + (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | + (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); + /* TODO: These cannot have changed unless we have MSR bitmaps and * the relevant bit asks not to trap the change */ vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); - if (vmcs12->vm_entry_controls & VM_EXIT_SAVE_IA32_PAT) + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); @@ -7349,21 +7880,38 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) /* update exit information fields: */ - vmcs12->vm_exit_reason = vmcs_read32(VM_EXIT_REASON); + vmcs12->vm_exit_reason = to_vmx(vcpu)->exit_reason; vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); - vmcs12->idt_vectoring_info_field = - vmcs_read32(IDT_VECTORING_INFO_FIELD); - vmcs12->idt_vectoring_error_code = - vmcs_read32(IDT_VECTORING_ERROR_CODE); + if ((vmcs12->vm_exit_intr_info & + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) + vmcs12->vm_exit_intr_error_code = + vmcs_read32(VM_EXIT_INTR_ERROR_CODE); + vmcs12->idt_vectoring_info_field = 0; vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - /* clear vm-entry fields which are to be cleared on exit */ - if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) + if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) { + /* vm_entry_intr_info_field is cleared on exit. Emulate this + * instead of reading the real value. */ vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK; + + /* + * Transfer the event that L0 or L1 may wanted to inject into + * L2 to IDT_VECTORING_INFO_FIELD. + */ + vmcs12_save_pending_event(vcpu, vmcs12); + } + + /* + * Drop what we picked up for L2 via vmx_complete_interrupts. It is + * preserved above and would only end up incorrectly in L1. + */ + vcpu->arch.nmi_injected = false; + kvm_clear_exception_queue(vcpu); + kvm_clear_interrupt_queue(vcpu); } /* @@ -7375,11 +7923,12 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) * Failures During or After Loading Guest State"). * This function should be called when the active VMCS is L1's (vmcs01). */ -void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) +static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) { if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) vcpu->arch.efer = vmcs12->host_ia32_efer; - if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) + else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) vcpu->arch.efer |= (EFER_LMA | EFER_LME); else vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); @@ -7387,6 +7936,7 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp); kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip); + vmx_set_rflags(vcpu, X86_EFLAGS_BIT1); /* * Note that calling vmx_set_cr0 is important, even if cr0 hasn't * actually changed, because it depends on the current state of @@ -7445,6 +7995,9 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, vmcs12->host_ia32_perf_global_ctrl); + + kvm_set_dr(vcpu, 7, 0x400); + vmcs_write64(GUEST_IA32_DEBUGCTL, 0); } /* @@ -7458,6 +8011,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) int cpu; struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + /* trying to cancel vmlaunch/vmresume is a bug */ + WARN_ON_ONCE(vmx->nested.nested_run_pending); + leave_guest_mode(vcpu); prepare_vmcs12(vcpu, vmcs12); @@ -7468,6 +8024,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) vcpu->cpu = cpu; put_cpu(); + vmx_segment_cache_clear(vmx); + /* if no vmcs02 cache requested, remove the one we used */ if (VMCS02_POOL_SIZE == 0) nested_free_vmcs02(vmx, vmx->nested.current_vmptr); @@ -7496,6 +8054,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR)); } else nested_vmx_succeed(vcpu); + if (enable_shadow_vmcs) + vmx->nested.sync_shadow_vmcs = true; } /* @@ -7513,6 +8073,8 @@ static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY; vmcs12->exit_qualification = qualification; nested_vmx_succeed(vcpu); + if (enable_shadow_vmcs) + to_vmx(vcpu)->nested.sync_shadow_vmcs = true; } static int vmx_check_intercept(struct kvm_vcpu *vcpu, @@ -7590,6 +8152,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .load_eoi_exitmap = vmx_load_eoi_exitmap, .hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_isr_update = vmx_hwapic_isr_update, + .sync_pir_to_irr = vmx_sync_pir_to_irr, + .deliver_posted_interrupt = vmx_deliver_posted_interrupt, .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, @@ -7618,6 +8182,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_tdp_cr3 = vmx_set_cr3, .check_intercept = vmx_check_intercept, + .handle_external_intr = vmx_handle_external_intr, }; static int __init vmx_init(void) @@ -7656,6 +8221,24 @@ static int __init vmx_init(void) (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_msr_bitmap_longmode_x2apic) goto out4; + vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_vmread_bitmap) + goto out5; + + vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_vmwrite_bitmap) + goto out6; + + memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); + memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); + /* shadowed read/write fields */ + for (i = 0; i < max_shadow_read_write_fields; i++) { + clear_bit(shadow_read_write_fields[i], vmx_vmwrite_bitmap); + clear_bit(shadow_read_write_fields[i], vmx_vmread_bitmap); + } + /* shadowed read only fields */ + for (i = 0; i < max_shadow_read_only_fields; i++) + clear_bit(shadow_read_only_fields[i], vmx_vmread_bitmap); /* * Allow direct access to the PC debug port (it is often used for I/O @@ -7674,7 +8257,7 @@ static int __init vmx_init(void) r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), __alignof__(struct vcpu_vmx), THIS_MODULE); if (r) - goto out3; + goto out7; #ifdef CONFIG_KEXEC rcu_assign_pointer(crash_vmclear_loaded_vmcss, @@ -7692,7 +8275,7 @@ static int __init vmx_init(void) memcpy(vmx_msr_bitmap_longmode_x2apic, vmx_msr_bitmap_longmode, PAGE_SIZE); - if (enable_apicv_reg_vid) { + if (enable_apicv) { for (msr = 0x800; msr <= 0x8ff; msr++) vmx_disable_intercept_msr_read_x2apic(msr); @@ -7722,6 +8305,12 @@ static int __init vmx_init(void) return 0; +out7: + free_page((unsigned long)vmx_vmwrite_bitmap); +out6: + free_page((unsigned long)vmx_vmread_bitmap); +out5: + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); out4: free_page((unsigned long)vmx_msr_bitmap_longmode); out3: @@ -7743,6 +8332,8 @@ static void __exit vmx_exit(void) free_page((unsigned long)vmx_msr_bitmap_longmode); free_page((unsigned long)vmx_io_bitmap_b); free_page((unsigned long)vmx_io_bitmap_a); + free_page((unsigned long)vmx_vmwrite_bitmap); + free_page((unsigned long)vmx_vmread_bitmap); #ifdef CONFIG_KEXEC rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e1721324c27..05a8b1a2300 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -162,8 +162,6 @@ u64 __read_mostly host_xcr0; static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); -static int kvm_vcpu_reset(struct kvm_vcpu *vcpu); - static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) { int i; @@ -263,6 +261,13 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) } EXPORT_SYMBOL_GPL(kvm_set_apic_base); +asmlinkage void kvm_spurious_fault(void) +{ + /* Fault while not rebooting. We want the trace. */ + BUG(); +} +EXPORT_SYMBOL_GPL(kvm_spurious_fault); + #define EXCPT_BENIGN 0 #define EXCPT_CONTRIBUTORY 1 #define EXCPT_PF 2 @@ -840,23 +845,17 @@ static const u32 emulated_msrs[] = { MSR_IA32_MCG_CTL, }; -static int set_efer(struct kvm_vcpu *vcpu, u64 efer) +bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) { - u64 old_efer = vcpu->arch.efer; - if (efer & efer_reserved_bits) - return 1; - - if (is_paging(vcpu) - && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) - return 1; + return false; if (efer & EFER_FFXSR) { struct kvm_cpuid_entry2 *feat; feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) - return 1; + return false; } if (efer & EFER_SVME) { @@ -864,9 +863,24 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer) feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) - return 1; + return false; } + return true; +} +EXPORT_SYMBOL_GPL(kvm_valid_efer); + +static int set_efer(struct kvm_vcpu *vcpu, u64 efer) +{ + u64 old_efer = vcpu->arch.efer; + + if (!kvm_valid_efer(vcpu, efer)) + return 1; + + if (is_paging(vcpu) + && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) + return 1; + efer &= ~EFER_LMA; efer |= vcpu->arch.efer & EFER_LMA; @@ -1079,6 +1093,10 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) u32 thresh_lo, thresh_hi; int use_scaling = 0; + /* tsc_khz can be zero if TSC calibration fails */ + if (this_tsc_khz == 0) + return; + /* Compute a scale to convert nanoseconds in TSC cycles */ kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, &vcpu->arch.virtual_tsc_shift, @@ -1156,20 +1174,23 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) ns = get_kernel_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; - /* n.b - signed multiplication and division required */ - usdiff = data - kvm->arch.last_tsc_write; + if (vcpu->arch.virtual_tsc_khz) { + /* n.b - signed multiplication and division required */ + usdiff = data - kvm->arch.last_tsc_write; #ifdef CONFIG_X86_64 - usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; + usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; #else - /* do_div() only does unsigned */ - asm("idivl %2; xor %%edx, %%edx" - : "=A"(usdiff) - : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz)); + /* do_div() only does unsigned */ + asm("idivl %2; xor %%edx, %%edx" + : "=A"(usdiff) + : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz)); #endif - do_div(elapsed, 1000); - usdiff -= elapsed; - if (usdiff < 0) - usdiff = -usdiff; + do_div(elapsed, 1000); + usdiff -= elapsed; + if (usdiff < 0) + usdiff = -usdiff; + } else + usdiff = USEC_PER_SEC; /* disable TSC match window below */ /* * Special case: TSC write with a small delta (1 second) of virtual @@ -2034,7 +2055,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_P6_EVNTSEL0: case MSR_P6_EVNTSEL1: if (kvm_pmu_msr(vcpu, msr)) - return kvm_pmu_set_msr(vcpu, msr, data); + return kvm_pmu_set_msr(vcpu, msr_info); if (pr || data != 0) vcpu_unimpl(vcpu, "disabled perfctr wrmsr: " @@ -2080,7 +2101,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) return xen_hvm_config(vcpu, data); if (kvm_pmu_msr(vcpu, msr)) - return kvm_pmu_set_msr(vcpu, msr, data); + return kvm_pmu_set_msr(vcpu, msr_info); if (!ignore_msrs) { vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); @@ -2479,7 +2500,6 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_USER_NMI: case KVM_CAP_REINJECT_CONTROL: case KVM_CAP_IRQ_INJECT_STATUS: - case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_IRQFD: case KVM_CAP_IOEVENTFD: case KVM_CAP_PIT2: @@ -2497,10 +2517,12 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_XSAVE: case KVM_CAP_ASYNC_PF: case KVM_CAP_GET_TSC_KHZ: - case KVM_CAP_PCI_2_3: case KVM_CAP_KVMCLOCK_CTRL: case KVM_CAP_READONLY_MEM: - case KVM_CAP_IRQFD_RESAMPLE: +#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT + case KVM_CAP_ASSIGN_DEV_IRQ: + case KVM_CAP_PCI_2_3: +#endif r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -2521,9 +2543,11 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PV_MMU: /* obsolete */ r = 0; break; +#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT case KVM_CAP_IOMMU: r = iommu_present(&pci_bus_type); break; +#endif case KVM_CAP_MCE: r = KVM_MAX_MCE_BANKS; break; @@ -2679,6 +2703,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { + kvm_x86_ops->sync_pir_to_irr(vcpu); memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); return 0; @@ -2696,7 +2721,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { - if (irq->irq < 0 || irq->irq >= KVM_NR_INTERRUPTS) + if (irq->irq >= KVM_NR_INTERRUPTS) return -EINVAL; if (irqchip_in_kernel(vcpu->kvm)) return -ENXIO; @@ -2819,10 +2844,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); events->nmi.pad = 0; - events->sipi_vector = vcpu->arch.sipi_vector; + events->sipi_vector = 0; /* never valid when reporting to user space */ events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING - | KVM_VCPUEVENT_VALID_SIPI_VECTOR | KVM_VCPUEVENT_VALID_SHADOW); memset(&events->reserved, 0, sizeof(events->reserved)); } @@ -2853,8 +2877,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.nmi_pending = events->nmi.pending; kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); - if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) - vcpu->arch.sipi_vector = events->sipi_vector; + if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR && + kvm_vcpu_has_lapic(vcpu)) + vcpu->arch.apic->sipi_vector = events->sipi_vector; kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -3478,13 +3503,15 @@ out: return r; } -int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event) +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, + bool line_status) { if (!irqchip_in_kernel(kvm)) return -ENXIO; irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, - irq_event->irq, irq_event->level); + irq_event->irq, irq_event->level, + line_status); return 0; } @@ -4752,11 +4779,15 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) } static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, - bool write_fault_to_shadow_pgtable) + bool write_fault_to_shadow_pgtable, + int emulation_type) { gpa_t gpa = cr2; pfn_t pfn; + if (emulation_type & EMULTYPE_NO_REEXECUTE) + return false; + if (!vcpu->arch.mmu.direct_map) { /* * Write permission should be allowed since only @@ -4899,8 +4930,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (r != EMULATION_OK) { if (emulation_type & EMULTYPE_TRAP_UD) return EMULATE_FAIL; - if (reexecute_instruction(vcpu, cr2, - write_fault_to_spt)) + if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, + emulation_type)) return EMULATE_DONE; if (emulation_type & EMULTYPE_SKIP) return EMULATE_FAIL; @@ -4930,7 +4961,8 @@ restart: return EMULATE_DONE; if (r == EMULATION_FAILED) { - if (reexecute_instruction(vcpu, cr2, write_fault_to_spt)) + if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, + emulation_type)) return EMULATE_DONE; return handle_emulation_failure(vcpu); @@ -5641,14 +5673,20 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) #endif } -static void update_eoi_exitmap(struct kvm_vcpu *vcpu) +static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { u64 eoi_exit_bitmap[4]; + u32 tmr[8]; + + if (!kvm_apic_hw_enabled(vcpu->arch.apic)) + return; memset(eoi_exit_bitmap, 0, 32); + memset(tmr, 0, 32); - kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap); + kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr); kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); + kvm_apic_update_tmr(vcpu, tmr); } static int vcpu_enter_guest(struct kvm_vcpu *vcpu) @@ -5656,7 +5694,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) int r; bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && vcpu->run->request_interrupt_window; - bool req_immediate_exit = 0; + bool req_immediate_exit = false; if (vcpu->requests) { if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) @@ -5698,24 +5736,30 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) record_steal_time(vcpu); if (kvm_check_request(KVM_REQ_NMI, vcpu)) process_nmi(vcpu); - req_immediate_exit = - kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); if (kvm_check_request(KVM_REQ_PMU, vcpu)) kvm_handle_pmu_event(vcpu); if (kvm_check_request(KVM_REQ_PMI, vcpu)) kvm_deliver_pmi(vcpu); - if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu)) - update_eoi_exitmap(vcpu); + if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) + vcpu_scan_ioapic(vcpu); } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { + kvm_apic_accept_events(vcpu); + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + r = 1; + goto out; + } + inject_pending_event(vcpu); /* enable NMI/IRQ window open exits if needed */ if (vcpu->arch.nmi_pending) - kvm_x86_ops->enable_nmi_window(vcpu); + req_immediate_exit = + kvm_x86_ops->enable_nmi_window(vcpu) != 0; else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) - kvm_x86_ops->enable_irq_window(vcpu); + req_immediate_exit = + kvm_x86_ops->enable_irq_window(vcpu) != 0; if (kvm_lapic_enabled(vcpu)) { /* @@ -5794,7 +5838,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); - local_irq_enable(); + + /* Interrupt is enabled by handle_external_intr() */ + kvm_x86_ops->handle_external_intr(vcpu); ++vcpu->stat.exits; @@ -5843,16 +5889,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) int r; struct kvm *kvm = vcpu->kvm; - if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { - pr_debug("vcpu %d received sipi with vector # %x\n", - vcpu->vcpu_id, vcpu->arch.sipi_vector); - kvm_lapic_reset(vcpu); - r = kvm_vcpu_reset(vcpu); - if (r) - return r; - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - } - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); r = vapic_enter(vcpu); if (r) { @@ -5869,8 +5905,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); kvm_vcpu_block(vcpu); vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); - if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) - { + if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) { + kvm_apic_accept_events(vcpu); switch(vcpu->arch.mp_state) { case KVM_MP_STATE_HALTED: vcpu->arch.mp_state = @@ -5878,7 +5914,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) case KVM_MP_STATE_RUNNABLE: vcpu->arch.apf.halted = false; break; - case KVM_MP_STATE_SIPI_RECEIVED: + case KVM_MP_STATE_INIT_RECEIVED: + break; default: r = -EINTR; break; @@ -6013,6 +6050,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { kvm_vcpu_block(vcpu); + kvm_apic_accept_events(vcpu); clear_bit(KVM_REQ_UNHALT, &vcpu->requests); r = -EAGAIN; goto out; @@ -6169,6 +6207,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { + kvm_apic_accept_events(vcpu); mp_state->mp_state = vcpu->arch.mp_state; return 0; } @@ -6176,7 +6215,15 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - vcpu->arch.mp_state = mp_state->mp_state; + if (!kvm_vcpu_has_lapic(vcpu) && + mp_state->mp_state != KVM_MP_STATE_RUNNABLE) + return -EINVAL; + + if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events); + } else + vcpu->arch.mp_state = mp_state->mp_state; kvm_make_request(KVM_REQ_EVENT, vcpu); return 0; } @@ -6475,9 +6522,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) r = vcpu_load(vcpu); if (r) return r; - r = kvm_vcpu_reset(vcpu); - if (r == 0) - r = kvm_mmu_setup(vcpu); + kvm_vcpu_reset(vcpu); + r = kvm_mmu_setup(vcpu); vcpu_put(vcpu); return r; @@ -6514,7 +6560,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_x86_ops->vcpu_free(vcpu); } -static int kvm_vcpu_reset(struct kvm_vcpu *vcpu) +void kvm_vcpu_reset(struct kvm_vcpu *vcpu) { atomic_set(&vcpu->arch.nmi_queued, 0); vcpu->arch.nmi_pending = 0; @@ -6541,7 +6587,18 @@ static int kvm_vcpu_reset(struct kvm_vcpu *vcpu) vcpu->arch.regs_avail = ~0; vcpu->arch.regs_dirty = ~0; - return kvm_x86_ops->vcpu_reset(vcpu); + kvm_x86_ops->vcpu_reset(vcpu); +} + +void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector) +{ + struct kvm_segment cs; + + kvm_get_segment(vcpu, &cs, VCPU_SREG_CS); + cs.selector = vector << 8; + cs.base = vector << 12; + kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); + kvm_rip_write(vcpu, 0); } int kvm_arch_hardware_enable(void *garbage) @@ -6706,8 +6763,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) } vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; - if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) + if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) { + r = -ENOMEM; goto fail_free_mce_banks; + } r = fx_init(vcpu); if (r) @@ -6811,6 +6870,23 @@ void kvm_arch_sync_events(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { + if (current->mm == kvm->mm) { + /* + * Free memory regions allocated on behalf of userspace, + * unless the the memory map has changed due to process exit + * or fd copying. + */ + struct kvm_userspace_memory_region mem; + memset(&mem, 0, sizeof(mem)); + mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; + kvm_set_memory_region(kvm, &mem); + + mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; + kvm_set_memory_region(kvm, &mem); + + mem.slot = TSS_PRIVATE_MEMSLOT; + kvm_set_memory_region(kvm, &mem); + } kvm_iommu_unmap_guest(kvm); kfree(kvm->arch.vpic); kfree(kvm->arch.vioapic); @@ -6903,24 +6979,21 @@ out_free: int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - bool user_alloc) + enum kvm_mr_change change) { - int npages = memslot->npages; - /* * Only private memory slots need to be mapped here since * KVM_SET_MEMORY_REGION ioctl is no longer supported. */ - if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) { + if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) { unsigned long userspace_addr; /* * MAP_SHARED to prevent internal slot pages from being moved * by fork()/COW. */ - userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE, + userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0); @@ -6935,17 +7008,17 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { - int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; + int nr_mmu_pages = 0; - if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) { + if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) { int ret; - ret = vm_munmap(old.userspace_addr, - old.npages * PAGE_SIZE); + ret = vm_munmap(old->userspace_addr, + old->npages * PAGE_SIZE); if (ret < 0) printk(KERN_WARNING "kvm_vm_ioctl_set_memory_region: " @@ -6962,14 +7035,14 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * Existing largepage mappings are destroyed here and new ones will * not be created until the end of the logging. */ - if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) + if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) kvm_mmu_slot_remove_write_access(kvm, mem->slot); /* * If memory slot is created, or moved, we need to clear all * mmio sptes. */ - if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) { - kvm_mmu_zap_all(kvm); + if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { + kvm_mmu_zap_mmio_sptes(kvm); kvm_reload_remote_mmus(kvm); } } @@ -6991,7 +7064,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && !vcpu->arch.apf.halted) || !list_empty_careful(&vcpu->async_pf.done) - || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED + || kvm_apic_has_events(vcpu) || atomic_read(&vcpu->arch.nmi_queued) || (kvm_arch_interrupt_allowed(vcpu) && kvm_cpu_has_interrupt(vcpu)); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index b7b7a88d9f6..c2ca1818f33 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1,3 +1,4 @@ + /* rbd.c -- Export ceph rados objects as a Linux block device @@ -32,12 +33,14 @@ #include <linux/ceph/mon_client.h> #include <linux/ceph/decode.h> #include <linux/parser.h> +#include <linux/bsearch.h> #include <linux/kernel.h> #include <linux/device.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/blkdev.h> +#include <linux/slab.h> #include "rbd_types.h" @@ -52,13 +55,6 @@ #define SECTOR_SHIFT 9 #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) -/* It might be useful to have these defined elsewhere */ - -#define U8_MAX ((u8) (~0U)) -#define U16_MAX ((u16) (~0U)) -#define U32_MAX ((u32) (~0U)) -#define U64_MAX ((u64) (~0ULL)) - #define RBD_DRV_NAME "rbd" #define RBD_DRV_NAME_LONG "rbd (rados block device)" @@ -72,6 +68,8 @@ #define RBD_SNAP_HEAD_NAME "-" +#define BAD_SNAP_INDEX U32_MAX /* invalid index into snap array */ + /* This allows a single page to hold an image name sent by OSD */ #define RBD_IMAGE_NAME_LEN_MAX (PAGE_SIZE - sizeof (__le32) - 1) #define RBD_IMAGE_ID_LEN_MAX 64 @@ -80,11 +78,14 @@ /* Feature bits */ -#define RBD_FEATURE_LAYERING 1 +#define RBD_FEATURE_LAYERING (1<<0) +#define RBD_FEATURE_STRIPINGV2 (1<<1) +#define RBD_FEATURES_ALL \ + (RBD_FEATURE_LAYERING | RBD_FEATURE_STRIPINGV2) /* Features supported by this (client software) implementation. */ -#define RBD_FEATURES_ALL (0) +#define RBD_FEATURES_SUPPORTED (RBD_FEATURES_ALL) /* * An RBD device name will be "rbd#", where the "rbd" comes from @@ -112,7 +113,8 @@ struct rbd_image_header { char *snap_names; u64 *snap_sizes; - u64 obj_version; + u64 stripe_unit; + u64 stripe_count; }; /* @@ -142,13 +144,13 @@ struct rbd_image_header { */ struct rbd_spec { u64 pool_id; - char *pool_name; + const char *pool_name; - char *image_id; - char *image_name; + const char *image_id; + const char *image_name; u64 snap_id; - char *snap_name; + const char *snap_name; struct kref kref; }; @@ -174,13 +176,44 @@ enum obj_request_type { OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES }; +enum obj_req_flags { + OBJ_REQ_DONE, /* completion flag: not done = 0, done = 1 */ + OBJ_REQ_IMG_DATA, /* object usage: standalone = 0, image = 1 */ + OBJ_REQ_KNOWN, /* EXISTS flag valid: no = 0, yes = 1 */ + OBJ_REQ_EXISTS, /* target exists: no = 0, yes = 1 */ +}; + struct rbd_obj_request { const char *object_name; u64 offset; /* object start byte */ u64 length; /* bytes from offset */ + unsigned long flags; - struct rbd_img_request *img_request; - struct list_head links; /* img_request->obj_requests */ + /* + * An object request associated with an image will have its + * img_data flag set; a standalone object request will not. + * + * A standalone object request will have which == BAD_WHICH + * and a null obj_request pointer. + * + * An object request initiated in support of a layered image + * object (to check for its existence before a write) will + * have which == BAD_WHICH and a non-null obj_request pointer. + * + * Finally, an object request for rbd image data will have + * which != BAD_WHICH, and will have a non-null img_request + * pointer. The value of which will be in the range + * 0..(img_request->obj_request_count-1). + */ + union { + struct rbd_obj_request *obj_request; /* STAT op */ + struct { + struct rbd_img_request *img_request; + u64 img_offset; + /* links for img_request->obj_requests list */ + struct list_head links; + }; + }; u32 which; /* posn image request list */ enum obj_request_type type; @@ -191,13 +224,12 @@ struct rbd_obj_request { u32 page_count; }; }; + struct page **copyup_pages; struct ceph_osd_request *osd_req; u64 xferred; /* bytes transferred */ - u64 version; int result; - atomic_t done; rbd_obj_callback_t callback; struct completion completion; @@ -205,19 +237,31 @@ struct rbd_obj_request { struct kref kref; }; +enum img_req_flags { + IMG_REQ_WRITE, /* I/O direction: read = 0, write = 1 */ + IMG_REQ_CHILD, /* initiator: block = 0, child image = 1 */ + IMG_REQ_LAYERED, /* ENOENT handling: normal = 0, layered = 1 */ +}; + struct rbd_img_request { - struct request *rq; struct rbd_device *rbd_dev; u64 offset; /* starting image byte offset */ u64 length; /* byte count from offset */ - bool write_request; /* false for read */ + unsigned long flags; union { + u64 snap_id; /* for reads */ struct ceph_snap_context *snapc; /* for writes */ - u64 snap_id; /* for reads */ }; + union { + struct request *rq; /* block request */ + struct rbd_obj_request *obj_request; /* obj req initiator */ + }; + struct page **copyup_pages; spinlock_t completion_lock;/* protects next_completion */ u32 next_completion; rbd_img_callback_t callback; + u64 xferred;/* aggregate bytes transferred */ + int result; /* first nonzero obj_request result */ u32 obj_request_count; struct list_head obj_requests; /* rbd_obj_request structs */ @@ -232,15 +276,6 @@ struct rbd_img_request { #define for_each_obj_request_safe(ireq, oreq, n) \ list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links) -struct rbd_snap { - struct device dev; - const char *name; - u64 size; - struct list_head node; - u64 id; - u64 features; -}; - struct rbd_mapping { u64 size; u64 features; @@ -276,6 +311,7 @@ struct rbd_device { struct rbd_spec *parent_spec; u64 parent_overlap; + struct rbd_device *parent; /* protects updating the header */ struct rw_semaphore header_rwsem; @@ -284,9 +320,6 @@ struct rbd_device { struct list_head node; - /* list of snapshots */ - struct list_head snaps; - /* sysfs related */ struct device dev; unsigned long open_count; /* protected by lock */ @@ -312,16 +345,21 @@ static DEFINE_SPINLOCK(rbd_dev_list_lock); static LIST_HEAD(rbd_client_list); /* clients */ static DEFINE_SPINLOCK(rbd_client_list_lock); -static int rbd_dev_snaps_update(struct rbd_device *rbd_dev); -static int rbd_dev_snaps_register(struct rbd_device *rbd_dev); +/* Slab caches for frequently-allocated structures */ + +static struct kmem_cache *rbd_img_request_cache; +static struct kmem_cache *rbd_obj_request_cache; +static struct kmem_cache *rbd_segment_name_cache; -static void rbd_dev_release(struct device *dev); -static void rbd_remove_snap_dev(struct rbd_snap *snap); +static int rbd_img_request_submit(struct rbd_img_request *img_request); + +static void rbd_dev_device_release(struct device *dev); static ssize_t rbd_add(struct bus_type *bus, const char *buf, size_t count); static ssize_t rbd_remove(struct bus_type *bus, const char *buf, size_t count); +static int rbd_dev_image_probe(struct rbd_device *rbd_dev); static struct bus_attribute rbd_bus_attrs[] = { __ATTR(add, S_IWUSR, NULL, rbd_add), @@ -383,8 +421,19 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) # define rbd_assert(expr) ((void) 0) #endif /* !RBD_DEBUG */ -static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver); -static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver); +static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request); +static void rbd_img_parent_read(struct rbd_obj_request *obj_request); +static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); + +static int rbd_dev_refresh(struct rbd_device *rbd_dev); +static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev); +static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, + u64 snap_id); +static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, + u8 *order, u64 *snap_size); +static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, + u64 *snap_features); +static u64 rbd_snap_id_by_name(struct rbd_device *rbd_dev, const char *name); static int rbd_open(struct block_device *bdev, fmode_t mode) { @@ -484,6 +533,13 @@ out_opt: return ERR_PTR(ret); } +static struct rbd_client *__rbd_get_client(struct rbd_client *rbdc) +{ + kref_get(&rbdc->kref); + + return rbdc; +} + /* * Find a ceph client with specific addr and configuration. If * found, bump its reference count. @@ -499,7 +555,8 @@ static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts) spin_lock(&rbd_client_list_lock); list_for_each_entry(client_node, &rbd_client_list, node) { if (!ceph_compare_options(ceph_opts, client_node->client)) { - kref_get(&client_node->kref); + __rbd_get_client(client_node); + found = true; break; } @@ -722,7 +779,6 @@ static int rbd_header_from_disk(struct rbd_image_header *header, header->snap_sizes[i] = le64_to_cpu(ondisk->snaps[i].image_size); } else { - WARN_ON(ondisk->snap_names_len); header->snap_names = NULL; header->snap_sizes = NULL; } @@ -735,18 +791,13 @@ static int rbd_header_from_disk(struct rbd_image_header *header, /* Allocate and fill in the snapshot context */ header->image_size = le64_to_cpu(ondisk->image_size); - size = sizeof (struct ceph_snap_context); - size += snap_count * sizeof (header->snapc->snaps[0]); - header->snapc = kzalloc(size, GFP_KERNEL); + + header->snapc = ceph_create_snap_context(snap_count, GFP_KERNEL); if (!header->snapc) goto out_err; - - atomic_set(&header->snapc->nref, 1); header->snapc->seq = le64_to_cpu(ondisk->snap_seq); - header->snapc->num_snaps = snap_count; for (i = 0; i < snap_count; i++) - header->snapc->snaps[i] = - le64_to_cpu(ondisk->snaps[i].id); + header->snapc->snaps[i] = le64_to_cpu(ondisk->snaps[i].id); return 0; @@ -761,70 +812,174 @@ out_err: return -ENOMEM; } -static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id) +static const char *_rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, u32 which) { - struct rbd_snap *snap; + const char *snap_name; + rbd_assert(which < rbd_dev->header.snapc->num_snaps); + + /* Skip over names until we find the one we are looking for */ + + snap_name = rbd_dev->header.snap_names; + while (which--) + snap_name += strlen(snap_name) + 1; + + return kstrdup(snap_name, GFP_KERNEL); +} + +/* + * Snapshot id comparison function for use with qsort()/bsearch(). + * Note that result is for snapshots in *descending* order. + */ +static int snapid_compare_reverse(const void *s1, const void *s2) +{ + u64 snap_id1 = *(u64 *)s1; + u64 snap_id2 = *(u64 *)s2; + + if (snap_id1 < snap_id2) + return 1; + return snap_id1 == snap_id2 ? 0 : -1; +} + +/* + * Search a snapshot context to see if the given snapshot id is + * present. + * + * Returns the position of the snapshot id in the array if it's found, + * or BAD_SNAP_INDEX otherwise. + * + * Note: The snapshot array is in kept sorted (by the osd) in + * reverse order, highest snapshot id first. + */ +static u32 rbd_dev_snap_index(struct rbd_device *rbd_dev, u64 snap_id) +{ + struct ceph_snap_context *snapc = rbd_dev->header.snapc; + u64 *found; + + found = bsearch(&snap_id, &snapc->snaps, snapc->num_snaps, + sizeof (snap_id), snapid_compare_reverse); + + return found ? (u32)(found - &snapc->snaps[0]) : BAD_SNAP_INDEX; +} + +static const char *rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, + u64 snap_id) +{ + u32 which; + + which = rbd_dev_snap_index(rbd_dev, snap_id); + if (which == BAD_SNAP_INDEX) + return NULL; + + return _rbd_dev_v1_snap_name(rbd_dev, which); +} + +static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id) +{ if (snap_id == CEPH_NOSNAP) return RBD_SNAP_HEAD_NAME; - list_for_each_entry(snap, &rbd_dev->snaps, node) - if (snap_id == snap->id) - return snap->name; + rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + if (rbd_dev->image_format == 1) + return rbd_dev_v1_snap_name(rbd_dev, snap_id); - return NULL; + return rbd_dev_v2_snap_name(rbd_dev, snap_id); } -static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name) +static int rbd_snap_size(struct rbd_device *rbd_dev, u64 snap_id, + u64 *snap_size) { + rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + if (snap_id == CEPH_NOSNAP) { + *snap_size = rbd_dev->header.image_size; + } else if (rbd_dev->image_format == 1) { + u32 which; - struct rbd_snap *snap; + which = rbd_dev_snap_index(rbd_dev, snap_id); + if (which == BAD_SNAP_INDEX) + return -ENOENT; - list_for_each_entry(snap, &rbd_dev->snaps, node) { - if (!strcmp(snap_name, snap->name)) { - rbd_dev->spec->snap_id = snap->id; - rbd_dev->mapping.size = snap->size; - rbd_dev->mapping.features = snap->features; + *snap_size = rbd_dev->header.snap_sizes[which]; + } else { + u64 size = 0; + int ret; - return 0; - } + ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, NULL, &size); + if (ret) + return ret; + + *snap_size = size; } + return 0; +} + +static int rbd_snap_features(struct rbd_device *rbd_dev, u64 snap_id, + u64 *snap_features) +{ + rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + if (snap_id == CEPH_NOSNAP) { + *snap_features = rbd_dev->header.features; + } else if (rbd_dev->image_format == 1) { + *snap_features = 0; /* No features for format 1 */ + } else { + u64 features = 0; + int ret; + + ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, &features); + if (ret) + return ret; - return -ENOENT; + *snap_features = features; + } + return 0; } -static int rbd_dev_set_mapping(struct rbd_device *rbd_dev) +static int rbd_dev_mapping_set(struct rbd_device *rbd_dev) { + const char *snap_name = rbd_dev->spec->snap_name; + u64 snap_id; + u64 size = 0; + u64 features = 0; int ret; - if (!memcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME, - sizeof (RBD_SNAP_HEAD_NAME))) { - rbd_dev->spec->snap_id = CEPH_NOSNAP; - rbd_dev->mapping.size = rbd_dev->header.image_size; - rbd_dev->mapping.features = rbd_dev->header.features; - ret = 0; + if (strcmp(snap_name, RBD_SNAP_HEAD_NAME)) { + snap_id = rbd_snap_id_by_name(rbd_dev, snap_name); + if (snap_id == CEPH_NOSNAP) + return -ENOENT; } else { - ret = snap_by_name(rbd_dev, rbd_dev->spec->snap_name); - if (ret < 0) - goto done; - rbd_dev->mapping.read_only = true; + snap_id = CEPH_NOSNAP; } - set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); -done: - return ret; + ret = rbd_snap_size(rbd_dev, snap_id, &size); + if (ret) + return ret; + ret = rbd_snap_features(rbd_dev, snap_id, &features); + if (ret) + return ret; + + rbd_dev->mapping.size = size; + rbd_dev->mapping.features = features; + + /* If we are mapping a snapshot it must be marked read-only */ + + if (snap_id != CEPH_NOSNAP) + rbd_dev->mapping.read_only = true; + + return 0; } -static void rbd_header_free(struct rbd_image_header *header) +static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev) { - kfree(header->object_prefix); - header->object_prefix = NULL; - kfree(header->snap_sizes); - header->snap_sizes = NULL; - kfree(header->snap_names); - header->snap_names = NULL; - ceph_put_snap_context(header->snapc); - header->snapc = NULL; + rbd_dev->mapping.size = 0; + rbd_dev->mapping.features = 0; + rbd_dev->mapping.read_only = true; +} + +static void rbd_dev_clear_mapping(struct rbd_device *rbd_dev) +{ + rbd_dev->mapping.size = 0; + rbd_dev->mapping.features = 0; + rbd_dev->mapping.read_only = true; } static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) @@ -833,7 +988,7 @@ static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) u64 segment; int ret; - name = kmalloc(MAX_OBJ_NAME_SIZE + 1, GFP_NOIO); + name = kmem_cache_alloc(rbd_segment_name_cache, GFP_NOIO); if (!name) return NULL; segment = offset >> rbd_dev->header.obj_order; @@ -849,6 +1004,13 @@ static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) return name; } +static void rbd_segment_name_free(const char *name) +{ + /* The explicit cast here is needed to drop the const qualifier */ + + kmem_cache_free(rbd_segment_name_cache, (void *)name); +} + static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset) { u64 segment_size = (u64) 1 << rbd_dev->header.obj_order; @@ -921,6 +1083,37 @@ static void zero_bio_chain(struct bio *chain, int start_ofs) } /* + * similar to zero_bio_chain(), zeros data defined by a page array, + * starting at the given byte offset from the start of the array and + * continuing up to the given end offset. The pages array is + * assumed to be big enough to hold all bytes up to the end. + */ +static void zero_pages(struct page **pages, u64 offset, u64 end) +{ + struct page **page = &pages[offset >> PAGE_SHIFT]; + + rbd_assert(end > offset); + rbd_assert(end - offset <= (u64)SIZE_MAX); + while (offset < end) { + size_t page_offset; + size_t length; + unsigned long flags; + void *kaddr; + + page_offset = (size_t)(offset & ~PAGE_MASK); + length = min(PAGE_SIZE - page_offset, (size_t)(end - offset)); + local_irq_save(flags); + kaddr = kmap_atomic(*page); + memset(kaddr + page_offset, 0, length); + kunmap_atomic(kaddr); + local_irq_restore(flags); + + offset += length; + page++; + } +} + +/* * Clone a portion of a bio, starting at the given byte offset * and continuing for the number of bytes indicated. */ @@ -1064,6 +1257,77 @@ out_err: return NULL; } +/* + * The default/initial value for all object request flags is 0. For + * each flag, once its value is set to 1 it is never reset to 0 + * again. + */ +static void obj_request_img_data_set(struct rbd_obj_request *obj_request) +{ + if (test_and_set_bit(OBJ_REQ_IMG_DATA, &obj_request->flags)) { + struct rbd_device *rbd_dev; + + rbd_dev = obj_request->img_request->rbd_dev; + rbd_warn(rbd_dev, "obj_request %p already marked img_data\n", + obj_request); + } +} + +static bool obj_request_img_data_test(struct rbd_obj_request *obj_request) +{ + smp_mb(); + return test_bit(OBJ_REQ_IMG_DATA, &obj_request->flags) != 0; +} + +static void obj_request_done_set(struct rbd_obj_request *obj_request) +{ + if (test_and_set_bit(OBJ_REQ_DONE, &obj_request->flags)) { + struct rbd_device *rbd_dev = NULL; + + if (obj_request_img_data_test(obj_request)) + rbd_dev = obj_request->img_request->rbd_dev; + rbd_warn(rbd_dev, "obj_request %p already marked done\n", + obj_request); + } +} + +static bool obj_request_done_test(struct rbd_obj_request *obj_request) +{ + smp_mb(); + return test_bit(OBJ_REQ_DONE, &obj_request->flags) != 0; +} + +/* + * This sets the KNOWN flag after (possibly) setting the EXISTS + * flag. The latter is set based on the "exists" value provided. + * + * Note that for our purposes once an object exists it never goes + * away again. It's possible that the response from two existence + * checks are separated by the creation of the target object, and + * the first ("doesn't exist") response arrives *after* the second + * ("does exist"). In that case we ignore the second one. + */ +static void obj_request_existence_set(struct rbd_obj_request *obj_request, + bool exists) +{ + if (exists) + set_bit(OBJ_REQ_EXISTS, &obj_request->flags); + set_bit(OBJ_REQ_KNOWN, &obj_request->flags); + smp_mb(); +} + +static bool obj_request_known_test(struct rbd_obj_request *obj_request) +{ + smp_mb(); + return test_bit(OBJ_REQ_KNOWN, &obj_request->flags) != 0; +} + +static bool obj_request_exists_test(struct rbd_obj_request *obj_request) +{ + smp_mb(); + return test_bit(OBJ_REQ_EXISTS, &obj_request->flags) != 0; +} + static void rbd_obj_request_get(struct rbd_obj_request *obj_request) { dout("%s: obj %p (was %d)\n", __func__, obj_request, @@ -1101,9 +1365,11 @@ static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request, { rbd_assert(obj_request->img_request == NULL); - rbd_obj_request_get(obj_request); + /* Image request now owns object's original reference */ obj_request->img_request = img_request; obj_request->which = img_request->obj_request_count; + rbd_assert(!obj_request_img_data_test(obj_request)); + obj_request_img_data_set(obj_request); rbd_assert(obj_request->which != BAD_WHICH); img_request->obj_request_count++; list_add_tail(&obj_request->links, &img_request->obj_requests); @@ -1123,6 +1389,7 @@ static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request, img_request->obj_request_count--; rbd_assert(obj_request->which == img_request->obj_request_count); obj_request->which = BAD_WHICH; + rbd_assert(obj_request_img_data_test(obj_request)); rbd_assert(obj_request->img_request == img_request); obj_request->img_request = NULL; obj_request->callback = NULL; @@ -1141,76 +1408,6 @@ static bool obj_request_type_valid(enum obj_request_type type) } } -static struct ceph_osd_req_op *rbd_osd_req_op_create(u16 opcode, ...) -{ - struct ceph_osd_req_op *op; - va_list args; - size_t size; - - op = kzalloc(sizeof (*op), GFP_NOIO); - if (!op) - return NULL; - op->op = opcode; - va_start(args, opcode); - switch (opcode) { - case CEPH_OSD_OP_READ: - case CEPH_OSD_OP_WRITE: - /* rbd_osd_req_op_create(READ, offset, length) */ - /* rbd_osd_req_op_create(WRITE, offset, length) */ - op->extent.offset = va_arg(args, u64); - op->extent.length = va_arg(args, u64); - if (opcode == CEPH_OSD_OP_WRITE) - op->payload_len = op->extent.length; - break; - case CEPH_OSD_OP_STAT: - break; - case CEPH_OSD_OP_CALL: - /* rbd_osd_req_op_create(CALL, class, method, data, datalen) */ - op->cls.class_name = va_arg(args, char *); - size = strlen(op->cls.class_name); - rbd_assert(size <= (size_t) U8_MAX); - op->cls.class_len = size; - op->payload_len = size; - - op->cls.method_name = va_arg(args, char *); - size = strlen(op->cls.method_name); - rbd_assert(size <= (size_t) U8_MAX); - op->cls.method_len = size; - op->payload_len += size; - - op->cls.argc = 0; - op->cls.indata = va_arg(args, void *); - size = va_arg(args, size_t); - rbd_assert(size <= (size_t) U32_MAX); - op->cls.indata_len = (u32) size; - op->payload_len += size; - break; - case CEPH_OSD_OP_NOTIFY_ACK: - case CEPH_OSD_OP_WATCH: - /* rbd_osd_req_op_create(NOTIFY_ACK, cookie, version) */ - /* rbd_osd_req_op_create(WATCH, cookie, version, flag) */ - op->watch.cookie = va_arg(args, u64); - op->watch.ver = va_arg(args, u64); - op->watch.ver = cpu_to_le64(op->watch.ver); - if (opcode == CEPH_OSD_OP_WATCH && va_arg(args, int)) - op->watch.flag = (u8) 1; - break; - default: - rbd_warn(NULL, "unsupported opcode %hu\n", opcode); - kfree(op); - op = NULL; - break; - } - va_end(args); - - return op; -} - -static void rbd_osd_req_op_destroy(struct ceph_osd_req_op *op) -{ - kfree(op); -} - static int rbd_obj_request_submit(struct ceph_osd_client *osdc, struct rbd_obj_request *obj_request) { @@ -1221,7 +1418,24 @@ static int rbd_obj_request_submit(struct ceph_osd_client *osdc, static void rbd_img_request_complete(struct rbd_img_request *img_request) { + dout("%s: img %p\n", __func__, img_request); + + /* + * If no error occurred, compute the aggregate transfer + * count for the image request. We could instead use + * atomic64_cmpxchg() to update it as each object request + * completes; not clear which way is better off hand. + */ + if (!img_request->result) { + struct rbd_obj_request *obj_request; + u64 xferred = 0; + + for_each_obj_request(img_request, obj_request) + xferred += obj_request->xferred; + img_request->xferred = xferred; + } + if (img_request->callback) img_request->callback(img_request); else @@ -1237,39 +1451,56 @@ static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) return wait_for_completion_interruptible(&obj_request->completion); } -static void obj_request_done_init(struct rbd_obj_request *obj_request) +/* + * The default/initial value for all image request flags is 0. Each + * is conditionally set to 1 at image request initialization time + * and currently never change thereafter. + */ +static void img_request_write_set(struct rbd_img_request *img_request) { - atomic_set(&obj_request->done, 0); - smp_wmb(); + set_bit(IMG_REQ_WRITE, &img_request->flags); + smp_mb(); } -static void obj_request_done_set(struct rbd_obj_request *obj_request) +static bool img_request_write_test(struct rbd_img_request *img_request) { - int done; + smp_mb(); + return test_bit(IMG_REQ_WRITE, &img_request->flags) != 0; +} - done = atomic_inc_return(&obj_request->done); - if (done > 1) { - struct rbd_img_request *img_request = obj_request->img_request; - struct rbd_device *rbd_dev; +static void img_request_child_set(struct rbd_img_request *img_request) +{ + set_bit(IMG_REQ_CHILD, &img_request->flags); + smp_mb(); +} - rbd_dev = img_request ? img_request->rbd_dev : NULL; - rbd_warn(rbd_dev, "obj_request %p was already done\n", - obj_request); - } +static bool img_request_child_test(struct rbd_img_request *img_request) +{ + smp_mb(); + return test_bit(IMG_REQ_CHILD, &img_request->flags) != 0; } -static bool obj_request_done_test(struct rbd_obj_request *obj_request) +static void img_request_layered_set(struct rbd_img_request *img_request) +{ + set_bit(IMG_REQ_LAYERED, &img_request->flags); + smp_mb(); +} + +static bool img_request_layered_test(struct rbd_img_request *img_request) { smp_mb(); - return atomic_read(&obj_request->done) != 0; + return test_bit(IMG_REQ_LAYERED, &img_request->flags) != 0; } static void rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request) { + u64 xferred = obj_request->xferred; + u64 length = obj_request->length; + dout("%s: obj %p img %p result %d %llu/%llu\n", __func__, obj_request, obj_request->img_request, obj_request->result, - obj_request->xferred, obj_request->length); + xferred, length); /* * ENOENT means a hole in the image. We zero-fill the * entire length of the request. A short read also implies @@ -1277,15 +1508,20 @@ rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request) * update the xferred count to indicate the whole request * was satisfied. */ - BUG_ON(obj_request->type != OBJ_REQUEST_BIO); + rbd_assert(obj_request->type != OBJ_REQUEST_NODATA); if (obj_request->result == -ENOENT) { - zero_bio_chain(obj_request->bio_list, 0); + if (obj_request->type == OBJ_REQUEST_BIO) + zero_bio_chain(obj_request->bio_list, 0); + else + zero_pages(obj_request->pages, 0, length); obj_request->result = 0; - obj_request->xferred = obj_request->length; - } else if (obj_request->xferred < obj_request->length && - !obj_request->result) { - zero_bio_chain(obj_request->bio_list, obj_request->xferred); - obj_request->xferred = obj_request->length; + obj_request->xferred = length; + } else if (xferred < length && !obj_request->result) { + if (obj_request->type == OBJ_REQUEST_BIO) + zero_bio_chain(obj_request->bio_list, xferred); + else + zero_pages(obj_request->pages, xferred, length); + obj_request->xferred = length; } obj_request_done_set(obj_request); } @@ -1308,9 +1544,23 @@ static void rbd_osd_trivial_callback(struct rbd_obj_request *obj_request) static void rbd_osd_read_callback(struct rbd_obj_request *obj_request) { - dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request, - obj_request->result, obj_request->xferred, obj_request->length); - if (obj_request->img_request) + struct rbd_img_request *img_request = NULL; + struct rbd_device *rbd_dev = NULL; + bool layered = false; + + if (obj_request_img_data_test(obj_request)) { + img_request = obj_request->img_request; + layered = img_request && img_request_layered_test(img_request); + rbd_dev = img_request->rbd_dev; + } + + dout("%s: obj %p img %p result %d %llu/%llu\n", __func__, + obj_request, img_request, obj_request->result, + obj_request->xferred, obj_request->length); + if (layered && obj_request->result == -ENOENT && + obj_request->img_offset < rbd_dev->parent_overlap) + rbd_img_parent_read(obj_request); + else if (img_request) rbd_img_obj_request_read_callback(obj_request); else obj_request_done_set(obj_request); @@ -1321,9 +1571,8 @@ static void rbd_osd_write_callback(struct rbd_obj_request *obj_request) dout("%s: obj %p result %d %llu\n", __func__, obj_request, obj_request->result, obj_request->length); /* - * There is no such thing as a successful short write. - * Our xferred value is the number of bytes transferred - * back. Set it to our originally-requested length. + * There is no such thing as a successful short write. Set + * it to our originally-requested length. */ obj_request->xferred = obj_request->length; obj_request_done_set(obj_request); @@ -1347,22 +1596,25 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, dout("%s: osd_req %p msg %p\n", __func__, osd_req, msg); rbd_assert(osd_req == obj_request->osd_req); - rbd_assert(!!obj_request->img_request ^ - (obj_request->which == BAD_WHICH)); + if (obj_request_img_data_test(obj_request)) { + rbd_assert(obj_request->img_request); + rbd_assert(obj_request->which != BAD_WHICH); + } else { + rbd_assert(obj_request->which == BAD_WHICH); + } if (osd_req->r_result < 0) obj_request->result = osd_req->r_result; - obj_request->version = le64_to_cpu(osd_req->r_reassert_version.version); - WARN_ON(osd_req->r_num_ops != 1); /* For now */ + BUG_ON(osd_req->r_num_ops > 2); /* * We support a 64-bit length, but ultimately it has to be * passed to blk_end_request(), which takes an unsigned int. */ obj_request->xferred = osd_req->r_reply_op_len[0]; - rbd_assert(obj_request->xferred < (u64) UINT_MAX); - opcode = osd_req->r_request_ops[0].op; + rbd_assert(obj_request->xferred < (u64)UINT_MAX); + opcode = osd_req->r_ops[0].op; switch (opcode) { case CEPH_OSD_OP_READ: rbd_osd_read_callback(obj_request); @@ -1388,28 +1640,49 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, rbd_obj_request_complete(obj_request); } +static void rbd_osd_req_format_read(struct rbd_obj_request *obj_request) +{ + struct rbd_img_request *img_request = obj_request->img_request; + struct ceph_osd_request *osd_req = obj_request->osd_req; + u64 snap_id; + + rbd_assert(osd_req != NULL); + + snap_id = img_request ? img_request->snap_id : CEPH_NOSNAP; + ceph_osdc_build_request(osd_req, obj_request->offset, + NULL, snap_id, NULL); +} + +static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) +{ + struct rbd_img_request *img_request = obj_request->img_request; + struct ceph_osd_request *osd_req = obj_request->osd_req; + struct ceph_snap_context *snapc; + struct timespec mtime = CURRENT_TIME; + + rbd_assert(osd_req != NULL); + + snapc = img_request ? img_request->snapc : NULL; + ceph_osdc_build_request(osd_req, obj_request->offset, + snapc, CEPH_NOSNAP, &mtime); +} + static struct ceph_osd_request *rbd_osd_req_create( struct rbd_device *rbd_dev, bool write_request, - struct rbd_obj_request *obj_request, - struct ceph_osd_req_op *op) + struct rbd_obj_request *obj_request) { - struct rbd_img_request *img_request = obj_request->img_request; struct ceph_snap_context *snapc = NULL; struct ceph_osd_client *osdc; struct ceph_osd_request *osd_req; - struct timespec now; - struct timespec *mtime; - u64 snap_id = CEPH_NOSNAP; - u64 offset = obj_request->offset; - u64 length = obj_request->length; - if (img_request) { - rbd_assert(img_request->write_request == write_request); - if (img_request->write_request) + if (obj_request_img_data_test(obj_request)) { + struct rbd_img_request *img_request = obj_request->img_request; + + rbd_assert(write_request == + img_request_write_test(img_request)); + if (write_request) snapc = img_request->snapc; - else - snap_id = img_request->snap_id; } /* Allocate and initialize the request, for the single op */ @@ -1419,31 +1692,10 @@ static struct ceph_osd_request *rbd_osd_req_create( if (!osd_req) return NULL; /* ENOMEM */ - rbd_assert(obj_request_type_valid(obj_request->type)); - switch (obj_request->type) { - case OBJ_REQUEST_NODATA: - break; /* Nothing to do */ - case OBJ_REQUEST_BIO: - rbd_assert(obj_request->bio_list != NULL); - osd_req->r_bio = obj_request->bio_list; - break; - case OBJ_REQUEST_PAGES: - osd_req->r_pages = obj_request->pages; - osd_req->r_num_pages = obj_request->page_count; - osd_req->r_page_alignment = offset & ~PAGE_MASK; - break; - } - - if (write_request) { + if (write_request) osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; - now = CURRENT_TIME; - mtime = &now; - } else { + else osd_req->r_flags = CEPH_OSD_FLAG_READ; - mtime = NULL; /* not needed for reads */ - offset = 0; /* These are not used... */ - length = 0; /* ...for osd read requests */ - } osd_req->r_callback = rbd_osd_req_callback; osd_req->r_priv = obj_request; @@ -1454,14 +1706,51 @@ static struct ceph_osd_request *rbd_osd_req_create( osd_req->r_file_layout = rbd_dev->layout; /* struct */ - /* osd_req will get its own reference to snapc (if non-null) */ + return osd_req; +} - ceph_osdc_build_request(osd_req, offset, length, 1, op, - snapc, snap_id, mtime); +/* + * Create a copyup osd request based on the information in the + * object request supplied. A copyup request has two osd ops, + * a copyup method call, and a "normal" write request. + */ +static struct ceph_osd_request * +rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request) +{ + struct rbd_img_request *img_request; + struct ceph_snap_context *snapc; + struct rbd_device *rbd_dev; + struct ceph_osd_client *osdc; + struct ceph_osd_request *osd_req; + + rbd_assert(obj_request_img_data_test(obj_request)); + img_request = obj_request->img_request; + rbd_assert(img_request); + rbd_assert(img_request_write_test(img_request)); + + /* Allocate and initialize the request, for the two ops */ + + snapc = img_request->snapc; + rbd_dev = img_request->rbd_dev; + osdc = &rbd_dev->rbd_client->client->osdc; + osd_req = ceph_osdc_alloc_request(osdc, snapc, 2, false, GFP_ATOMIC); + if (!osd_req) + return NULL; /* ENOMEM */ + + osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; + osd_req->r_callback = rbd_osd_req_callback; + osd_req->r_priv = obj_request; + + osd_req->r_oid_len = strlen(obj_request->object_name); + rbd_assert(osd_req->r_oid_len < sizeof (osd_req->r_oid)); + memcpy(osd_req->r_oid, obj_request->object_name, osd_req->r_oid_len); + + osd_req->r_file_layout = rbd_dev->layout; /* struct */ return osd_req; } + static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req) { ceph_osdc_put_request(osd_req); @@ -1480,18 +1769,23 @@ static struct rbd_obj_request *rbd_obj_request_create(const char *object_name, rbd_assert(obj_request_type_valid(type)); size = strlen(object_name) + 1; - obj_request = kzalloc(sizeof (*obj_request) + size, GFP_KERNEL); - if (!obj_request) + name = kmalloc(size, GFP_KERNEL); + if (!name) return NULL; - name = (char *)(obj_request + 1); + obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_KERNEL); + if (!obj_request) { + kfree(name); + return NULL; + } + obj_request->object_name = memcpy(name, object_name, size); obj_request->offset = offset; obj_request->length = length; + obj_request->flags = 0; obj_request->which = BAD_WHICH; obj_request->type = type; INIT_LIST_HEAD(&obj_request->links); - obj_request_done_init(obj_request); init_completion(&obj_request->completion); kref_init(&obj_request->kref); @@ -1530,7 +1824,9 @@ static void rbd_obj_request_destroy(struct kref *kref) break; } - kfree(obj_request); + kfree(obj_request->object_name); + obj_request->object_name = NULL; + kmem_cache_free(rbd_obj_request_cache, obj_request); } /* @@ -1541,37 +1837,40 @@ static void rbd_obj_request_destroy(struct kref *kref) static struct rbd_img_request *rbd_img_request_create( struct rbd_device *rbd_dev, u64 offset, u64 length, - bool write_request) + bool write_request, + bool child_request) { struct rbd_img_request *img_request; - struct ceph_snap_context *snapc = NULL; - img_request = kmalloc(sizeof (*img_request), GFP_ATOMIC); + img_request = kmem_cache_alloc(rbd_img_request_cache, GFP_ATOMIC); if (!img_request) return NULL; if (write_request) { down_read(&rbd_dev->header_rwsem); - snapc = ceph_get_snap_context(rbd_dev->header.snapc); + ceph_get_snap_context(rbd_dev->header.snapc); up_read(&rbd_dev->header_rwsem); - if (WARN_ON(!snapc)) { - kfree(img_request); - return NULL; /* Shouldn't happen */ - } } img_request->rq = NULL; img_request->rbd_dev = rbd_dev; img_request->offset = offset; img_request->length = length; - img_request->write_request = write_request; - if (write_request) - img_request->snapc = snapc; - else + img_request->flags = 0; + if (write_request) { + img_request_write_set(img_request); + img_request->snapc = rbd_dev->header.snapc; + } else { img_request->snap_id = rbd_dev->spec->snap_id; + } + if (child_request) + img_request_child_set(img_request); + if (rbd_dev->parent_spec) + img_request_layered_set(img_request); spin_lock_init(&img_request->completion_lock); img_request->next_completion = 0; img_request->callback = NULL; + img_request->result = 0; img_request->obj_request_count = 0; INIT_LIST_HEAD(&img_request->obj_requests); kref_init(&img_request->kref); @@ -1600,78 +1899,204 @@ static void rbd_img_request_destroy(struct kref *kref) rbd_img_obj_request_del(img_request, obj_request); rbd_assert(img_request->obj_request_count == 0); - if (img_request->write_request) + if (img_request_write_test(img_request)) ceph_put_snap_context(img_request->snapc); - kfree(img_request); + if (img_request_child_test(img_request)) + rbd_obj_request_put(img_request->obj_request); + + kmem_cache_free(rbd_img_request_cache, img_request); +} + +static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request) +{ + struct rbd_img_request *img_request; + unsigned int xferred; + int result; + bool more; + + rbd_assert(obj_request_img_data_test(obj_request)); + img_request = obj_request->img_request; + + rbd_assert(obj_request->xferred <= (u64)UINT_MAX); + xferred = (unsigned int)obj_request->xferred; + result = obj_request->result; + if (result) { + struct rbd_device *rbd_dev = img_request->rbd_dev; + + rbd_warn(rbd_dev, "%s %llx at %llx (%llx)\n", + img_request_write_test(img_request) ? "write" : "read", + obj_request->length, obj_request->img_offset, + obj_request->offset); + rbd_warn(rbd_dev, " result %d xferred %x\n", + result, xferred); + if (!img_request->result) + img_request->result = result; + } + + /* Image object requests don't own their page array */ + + if (obj_request->type == OBJ_REQUEST_PAGES) { + obj_request->pages = NULL; + obj_request->page_count = 0; + } + + if (img_request_child_test(img_request)) { + rbd_assert(img_request->obj_request != NULL); + more = obj_request->which < img_request->obj_request_count - 1; + } else { + rbd_assert(img_request->rq != NULL); + more = blk_end_request(img_request->rq, result, xferred); + } + + return more; } -static int rbd_img_request_fill_bio(struct rbd_img_request *img_request, - struct bio *bio_list) +static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) +{ + struct rbd_img_request *img_request; + u32 which = obj_request->which; + bool more = true; + + rbd_assert(obj_request_img_data_test(obj_request)); + img_request = obj_request->img_request; + + dout("%s: img %p obj %p\n", __func__, img_request, obj_request); + rbd_assert(img_request != NULL); + rbd_assert(img_request->obj_request_count > 0); + rbd_assert(which != BAD_WHICH); + rbd_assert(which < img_request->obj_request_count); + rbd_assert(which >= img_request->next_completion); + + spin_lock_irq(&img_request->completion_lock); + if (which != img_request->next_completion) + goto out; + + for_each_obj_request_from(img_request, obj_request) { + rbd_assert(more); + rbd_assert(which < img_request->obj_request_count); + + if (!obj_request_done_test(obj_request)) + break; + more = rbd_img_obj_end_request(obj_request); + which++; + } + + rbd_assert(more ^ (which == img_request->obj_request_count)); + img_request->next_completion = which; +out: + spin_unlock_irq(&img_request->completion_lock); + + if (!more) + rbd_img_request_complete(img_request); +} + +/* + * Split up an image request into one or more object requests, each + * to a different object. The "type" parameter indicates whether + * "data_desc" is the pointer to the head of a list of bio + * structures, or the base of a page array. In either case this + * function assumes data_desc describes memory sufficient to hold + * all data described by the image request. + */ +static int rbd_img_request_fill(struct rbd_img_request *img_request, + enum obj_request_type type, + void *data_desc) { struct rbd_device *rbd_dev = img_request->rbd_dev; struct rbd_obj_request *obj_request = NULL; struct rbd_obj_request *next_obj_request; - unsigned int bio_offset; - u64 image_offset; + bool write_request = img_request_write_test(img_request); + struct bio *bio_list; + unsigned int bio_offset = 0; + struct page **pages; + u64 img_offset; u64 resid; u16 opcode; - dout("%s: img %p bio %p\n", __func__, img_request, bio_list); + dout("%s: img %p type %d data_desc %p\n", __func__, img_request, + (int)type, data_desc); - opcode = img_request->write_request ? CEPH_OSD_OP_WRITE - : CEPH_OSD_OP_READ; - bio_offset = 0; - image_offset = img_request->offset; - rbd_assert(image_offset == bio_list->bi_sector << SECTOR_SHIFT); + opcode = write_request ? CEPH_OSD_OP_WRITE : CEPH_OSD_OP_READ; + img_offset = img_request->offset; resid = img_request->length; rbd_assert(resid > 0); + + if (type == OBJ_REQUEST_BIO) { + bio_list = data_desc; + rbd_assert(img_offset == bio_list->bi_sector << SECTOR_SHIFT); + } else { + rbd_assert(type == OBJ_REQUEST_PAGES); + pages = data_desc; + } + while (resid) { + struct ceph_osd_request *osd_req; const char *object_name; - unsigned int clone_size; - struct ceph_osd_req_op *op; u64 offset; u64 length; - object_name = rbd_segment_name(rbd_dev, image_offset); + object_name = rbd_segment_name(rbd_dev, img_offset); if (!object_name) goto out_unwind; - offset = rbd_segment_offset(rbd_dev, image_offset); - length = rbd_segment_length(rbd_dev, image_offset, resid); + offset = rbd_segment_offset(rbd_dev, img_offset); + length = rbd_segment_length(rbd_dev, img_offset, resid); obj_request = rbd_obj_request_create(object_name, - offset, length, - OBJ_REQUEST_BIO); - kfree(object_name); /* object request has its own copy */ + offset, length, type); + /* object request has its own copy of the object name */ + rbd_segment_name_free(object_name); if (!obj_request) goto out_unwind; - rbd_assert(length <= (u64) UINT_MAX); - clone_size = (unsigned int) length; - obj_request->bio_list = bio_chain_clone_range(&bio_list, - &bio_offset, clone_size, - GFP_ATOMIC); - if (!obj_request->bio_list) - goto out_partial; + if (type == OBJ_REQUEST_BIO) { + unsigned int clone_size; + + rbd_assert(length <= (u64)UINT_MAX); + clone_size = (unsigned int)length; + obj_request->bio_list = + bio_chain_clone_range(&bio_list, + &bio_offset, + clone_size, + GFP_ATOMIC); + if (!obj_request->bio_list) + goto out_partial; + } else { + unsigned int page_count; + + obj_request->pages = pages; + page_count = (u32)calc_pages_for(offset, length); + obj_request->page_count = page_count; + if ((offset + length) & ~PAGE_MASK) + page_count--; /* more on last page */ + pages += page_count; + } - /* - * Build up the op to use in building the osd - * request. Note that the contents of the op are - * copied by rbd_osd_req_create(). - */ - op = rbd_osd_req_op_create(opcode, offset, length); - if (!op) - goto out_partial; - obj_request->osd_req = rbd_osd_req_create(rbd_dev, - img_request->write_request, - obj_request, op); - rbd_osd_req_op_destroy(op); - if (!obj_request->osd_req) + osd_req = rbd_osd_req_create(rbd_dev, write_request, + obj_request); + if (!osd_req) goto out_partial; - /* status and version are initially zero-filled */ + obj_request->osd_req = osd_req; + obj_request->callback = rbd_img_obj_callback; + osd_req_op_extent_init(osd_req, 0, opcode, offset, length, + 0, 0); + if (type == OBJ_REQUEST_BIO) + osd_req_op_extent_osd_data_bio(osd_req, 0, + obj_request->bio_list, length); + else + osd_req_op_extent_osd_data_pages(osd_req, 0, + obj_request->pages, length, + offset & ~PAGE_MASK, false, false); + + if (write_request) + rbd_osd_req_format_write(obj_request); + else + rbd_osd_req_format_read(obj_request); + + obj_request->img_offset = img_offset; rbd_img_obj_request_add(img_request, obj_request); - image_offset += length; + img_offset += length; resid -= length; } @@ -1686,61 +2111,389 @@ out_unwind: return -ENOMEM; } -static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) +static void +rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request) { struct rbd_img_request *img_request; - u32 which = obj_request->which; - bool more = true; + struct rbd_device *rbd_dev; + u64 length; + u32 page_count; + rbd_assert(obj_request->type == OBJ_REQUEST_BIO); + rbd_assert(obj_request_img_data_test(obj_request)); img_request = obj_request->img_request; + rbd_assert(img_request); - dout("%s: img %p obj %p\n", __func__, img_request, obj_request); + rbd_dev = img_request->rbd_dev; + rbd_assert(rbd_dev); + length = (u64)1 << rbd_dev->header.obj_order; + page_count = (u32)calc_pages_for(0, length); + + rbd_assert(obj_request->copyup_pages); + ceph_release_page_vector(obj_request->copyup_pages, page_count); + obj_request->copyup_pages = NULL; + + /* + * We want the transfer count to reflect the size of the + * original write request. There is no such thing as a + * successful short write, so if the request was successful + * we can just set it to the originally-requested length. + */ + if (!obj_request->result) + obj_request->xferred = obj_request->length; + + /* Finish up with the normal image object callback */ + + rbd_img_obj_callback(obj_request); +} + +static void +rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) +{ + struct rbd_obj_request *orig_request; + struct ceph_osd_request *osd_req; + struct ceph_osd_client *osdc; + struct rbd_device *rbd_dev; + struct page **pages; + int result; + u64 obj_size; + u64 xferred; + + rbd_assert(img_request_child_test(img_request)); + + /* First get what we need from the image request */ + + pages = img_request->copyup_pages; + rbd_assert(pages != NULL); + img_request->copyup_pages = NULL; + + orig_request = img_request->obj_request; + rbd_assert(orig_request != NULL); + rbd_assert(orig_request->type == OBJ_REQUEST_BIO); + result = img_request->result; + obj_size = img_request->length; + xferred = img_request->xferred; + + rbd_dev = img_request->rbd_dev; + rbd_assert(rbd_dev); + rbd_assert(obj_size == (u64)1 << rbd_dev->header.obj_order); + + rbd_img_request_put(img_request); + + if (result) + goto out_err; + + /* Allocate the new copyup osd request for the original request */ + + result = -ENOMEM; + rbd_assert(!orig_request->osd_req); + osd_req = rbd_osd_req_create_copyup(orig_request); + if (!osd_req) + goto out_err; + orig_request->osd_req = osd_req; + orig_request->copyup_pages = pages; + + /* Initialize the copyup op */ + + osd_req_op_cls_init(osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup"); + osd_req_op_cls_request_data_pages(osd_req, 0, pages, obj_size, 0, + false, false); + + /* Then the original write request op */ + + osd_req_op_extent_init(osd_req, 1, CEPH_OSD_OP_WRITE, + orig_request->offset, + orig_request->length, 0, 0); + osd_req_op_extent_osd_data_bio(osd_req, 1, orig_request->bio_list, + orig_request->length); + + rbd_osd_req_format_write(orig_request); + + /* All set, send it off. */ + + orig_request->callback = rbd_img_obj_copyup_callback; + osdc = &rbd_dev->rbd_client->client->osdc; + result = rbd_obj_request_submit(osdc, orig_request); + if (!result) + return; +out_err: + /* Record the error code and complete the request */ + + orig_request->result = result; + orig_request->xferred = 0; + obj_request_done_set(orig_request); + rbd_obj_request_complete(orig_request); +} + +/* + * Read from the parent image the range of data that covers the + * entire target of the given object request. This is used for + * satisfying a layered image write request when the target of an + * object request from the image request does not exist. + * + * A page array big enough to hold the returned data is allocated + * and supplied to rbd_img_request_fill() as the "data descriptor." + * When the read completes, this page array will be transferred to + * the original object request for the copyup operation. + * + * If an error occurs, record it as the result of the original + * object request and mark it done so it gets completed. + */ +static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request) +{ + struct rbd_img_request *img_request = NULL; + struct rbd_img_request *parent_request = NULL; + struct rbd_device *rbd_dev; + u64 img_offset; + u64 length; + struct page **pages = NULL; + u32 page_count; + int result; + + rbd_assert(obj_request_img_data_test(obj_request)); + rbd_assert(obj_request->type == OBJ_REQUEST_BIO); + + img_request = obj_request->img_request; rbd_assert(img_request != NULL); - rbd_assert(img_request->rq != NULL); - rbd_assert(img_request->obj_request_count > 0); - rbd_assert(which != BAD_WHICH); - rbd_assert(which < img_request->obj_request_count); - rbd_assert(which >= img_request->next_completion); + rbd_dev = img_request->rbd_dev; + rbd_assert(rbd_dev->parent != NULL); - spin_lock_irq(&img_request->completion_lock); - if (which != img_request->next_completion) - goto out; + /* + * First things first. The original osd request is of no + * use to use any more, we'll need a new one that can hold + * the two ops in a copyup request. We'll get that later, + * but for now we can release the old one. + */ + rbd_osd_req_destroy(obj_request->osd_req); + obj_request->osd_req = NULL; - for_each_obj_request_from(img_request, obj_request) { - unsigned int xferred; - int result; + /* + * Determine the byte range covered by the object in the + * child image to which the original request was to be sent. + */ + img_offset = obj_request->img_offset - obj_request->offset; + length = (u64)1 << rbd_dev->header.obj_order; - rbd_assert(more); - rbd_assert(which < img_request->obj_request_count); + /* + * There is no defined parent data beyond the parent + * overlap, so limit what we read at that boundary if + * necessary. + */ + if (img_offset + length > rbd_dev->parent_overlap) { + rbd_assert(img_offset < rbd_dev->parent_overlap); + length = rbd_dev->parent_overlap - img_offset; + } - if (!obj_request_done_test(obj_request)) - break; + /* + * Allocate a page array big enough to receive the data read + * from the parent. + */ + page_count = (u32)calc_pages_for(0, length); + pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); + if (IS_ERR(pages)) { + result = PTR_ERR(pages); + pages = NULL; + goto out_err; + } - rbd_assert(obj_request->xferred <= (u64) UINT_MAX); - xferred = (unsigned int) obj_request->xferred; - result = (int) obj_request->result; - if (result) - rbd_warn(NULL, "obj_request %s result %d xferred %u\n", - img_request->write_request ? "write" : "read", - result, xferred); + result = -ENOMEM; + parent_request = rbd_img_request_create(rbd_dev->parent, + img_offset, length, + false, true); + if (!parent_request) + goto out_err; + rbd_obj_request_get(obj_request); + parent_request->obj_request = obj_request; - more = blk_end_request(img_request->rq, result, xferred); - which++; + result = rbd_img_request_fill(parent_request, OBJ_REQUEST_PAGES, pages); + if (result) + goto out_err; + parent_request->copyup_pages = pages; + + parent_request->callback = rbd_img_obj_parent_read_full_callback; + result = rbd_img_request_submit(parent_request); + if (!result) + return 0; + + parent_request->copyup_pages = NULL; + parent_request->obj_request = NULL; + rbd_obj_request_put(obj_request); +out_err: + if (pages) + ceph_release_page_vector(pages, page_count); + if (parent_request) + rbd_img_request_put(parent_request); + obj_request->result = result; + obj_request->xferred = 0; + obj_request_done_set(obj_request); + + return result; +} + +static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request) +{ + struct rbd_obj_request *orig_request; + int result; + + rbd_assert(!obj_request_img_data_test(obj_request)); + + /* + * All we need from the object request is the original + * request and the result of the STAT op. Grab those, then + * we're done with the request. + */ + orig_request = obj_request->obj_request; + obj_request->obj_request = NULL; + rbd_assert(orig_request); + rbd_assert(orig_request->img_request); + + result = obj_request->result; + obj_request->result = 0; + + dout("%s: obj %p for obj %p result %d %llu/%llu\n", __func__, + obj_request, orig_request, result, + obj_request->xferred, obj_request->length); + rbd_obj_request_put(obj_request); + + rbd_assert(orig_request); + rbd_assert(orig_request->img_request); + + /* + * Our only purpose here is to determine whether the object + * exists, and we don't want to treat the non-existence as + * an error. If something else comes back, transfer the + * error to the original request and complete it now. + */ + if (!result) { + obj_request_existence_set(orig_request, true); + } else if (result == -ENOENT) { + obj_request_existence_set(orig_request, false); + } else if (result) { + orig_request->result = result; + goto out; } - rbd_assert(more ^ (which == img_request->obj_request_count)); - img_request->next_completion = which; + /* + * Resubmit the original request now that we have recorded + * whether the target object exists. + */ + orig_request->result = rbd_img_obj_request_submit(orig_request); out: - spin_unlock_irq(&img_request->completion_lock); + if (orig_request->result) + rbd_obj_request_complete(orig_request); + rbd_obj_request_put(orig_request); +} - if (!more) - rbd_img_request_complete(img_request); +static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request) +{ + struct rbd_obj_request *stat_request; + struct rbd_device *rbd_dev; + struct ceph_osd_client *osdc; + struct page **pages = NULL; + u32 page_count; + size_t size; + int ret; + + /* + * The response data for a STAT call consists of: + * le64 length; + * struct { + * le32 tv_sec; + * le32 tv_nsec; + * } mtime; + */ + size = sizeof (__le64) + sizeof (__le32) + sizeof (__le32); + page_count = (u32)calc_pages_for(0, size); + pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); + if (IS_ERR(pages)) + return PTR_ERR(pages); + + ret = -ENOMEM; + stat_request = rbd_obj_request_create(obj_request->object_name, 0, 0, + OBJ_REQUEST_PAGES); + if (!stat_request) + goto out; + + rbd_obj_request_get(obj_request); + stat_request->obj_request = obj_request; + stat_request->pages = pages; + stat_request->page_count = page_count; + + rbd_assert(obj_request->img_request); + rbd_dev = obj_request->img_request->rbd_dev; + stat_request->osd_req = rbd_osd_req_create(rbd_dev, false, + stat_request); + if (!stat_request->osd_req) + goto out; + stat_request->callback = rbd_img_obj_exists_callback; + + osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT); + osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0, + false, false); + rbd_osd_req_format_read(stat_request); + + osdc = &rbd_dev->rbd_client->client->osdc; + ret = rbd_obj_request_submit(osdc, stat_request); +out: + if (ret) + rbd_obj_request_put(obj_request); + + return ret; +} + +static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request) +{ + struct rbd_img_request *img_request; + struct rbd_device *rbd_dev; + bool known; + + rbd_assert(obj_request_img_data_test(obj_request)); + + img_request = obj_request->img_request; + rbd_assert(img_request); + rbd_dev = img_request->rbd_dev; + + /* + * Only writes to layered images need special handling. + * Reads and non-layered writes are simple object requests. + * Layered writes that start beyond the end of the overlap + * with the parent have no parent data, so they too are + * simple object requests. Finally, if the target object is + * known to already exist, its parent data has already been + * copied, so a write to the object can also be handled as a + * simple object request. + */ + if (!img_request_write_test(img_request) || + !img_request_layered_test(img_request) || + rbd_dev->parent_overlap <= obj_request->img_offset || + ((known = obj_request_known_test(obj_request)) && + obj_request_exists_test(obj_request))) { + + struct rbd_device *rbd_dev; + struct ceph_osd_client *osdc; + + rbd_dev = obj_request->img_request->rbd_dev; + osdc = &rbd_dev->rbd_client->client->osdc; + + return rbd_obj_request_submit(osdc, obj_request); + } + + /* + * It's a layered write. The target object might exist but + * we may not know that yet. If we know it doesn't exist, + * start by reading the data for the full target object from + * the parent so we can use it for a copyup to the target. + */ + if (known) + return rbd_img_obj_parent_read_full(obj_request); + + /* We don't know whether the target exists. Go find out. */ + + return rbd_img_obj_exists_submit(obj_request); } static int rbd_img_request_submit(struct rbd_img_request *img_request) { - struct rbd_device *rbd_dev = img_request->rbd_dev; - struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; struct rbd_obj_request *obj_request; struct rbd_obj_request *next_obj_request; @@ -1748,27 +2501,105 @@ static int rbd_img_request_submit(struct rbd_img_request *img_request) for_each_obj_request_safe(img_request, obj_request, next_obj_request) { int ret; - obj_request->callback = rbd_img_obj_callback; - ret = rbd_obj_request_submit(osdc, obj_request); + ret = rbd_img_obj_request_submit(obj_request); if (ret) return ret; - /* - * The image request has its own reference to each - * of its object requests, so we can safely drop the - * initial one here. - */ - rbd_obj_request_put(obj_request); } return 0; } -static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, - u64 ver, u64 notify_id) +static void rbd_img_parent_read_callback(struct rbd_img_request *img_request) { struct rbd_obj_request *obj_request; - struct ceph_osd_req_op *op; - struct ceph_osd_client *osdc; + struct rbd_device *rbd_dev; + u64 obj_end; + + rbd_assert(img_request_child_test(img_request)); + + obj_request = img_request->obj_request; + rbd_assert(obj_request); + rbd_assert(obj_request->img_request); + + obj_request->result = img_request->result; + if (obj_request->result) + goto out; + + /* + * We need to zero anything beyond the parent overlap + * boundary. Since rbd_img_obj_request_read_callback() + * will zero anything beyond the end of a short read, an + * easy way to do this is to pretend the data from the + * parent came up short--ending at the overlap boundary. + */ + rbd_assert(obj_request->img_offset < U64_MAX - obj_request->length); + obj_end = obj_request->img_offset + obj_request->length; + rbd_dev = obj_request->img_request->rbd_dev; + if (obj_end > rbd_dev->parent_overlap) { + u64 xferred = 0; + + if (obj_request->img_offset < rbd_dev->parent_overlap) + xferred = rbd_dev->parent_overlap - + obj_request->img_offset; + + obj_request->xferred = min(img_request->xferred, xferred); + } else { + obj_request->xferred = img_request->xferred; + } +out: + rbd_img_request_put(img_request); + rbd_img_obj_request_read_callback(obj_request); + rbd_obj_request_complete(obj_request); +} + +static void rbd_img_parent_read(struct rbd_obj_request *obj_request) +{ + struct rbd_device *rbd_dev; + struct rbd_img_request *img_request; + int result; + + rbd_assert(obj_request_img_data_test(obj_request)); + rbd_assert(obj_request->img_request != NULL); + rbd_assert(obj_request->result == (s32) -ENOENT); + rbd_assert(obj_request->type == OBJ_REQUEST_BIO); + + rbd_dev = obj_request->img_request->rbd_dev; + rbd_assert(rbd_dev->parent != NULL); + /* rbd_read_finish(obj_request, obj_request->length); */ + img_request = rbd_img_request_create(rbd_dev->parent, + obj_request->img_offset, + obj_request->length, + false, true); + result = -ENOMEM; + if (!img_request) + goto out_err; + + rbd_obj_request_get(obj_request); + img_request->obj_request = obj_request; + + result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO, + obj_request->bio_list); + if (result) + goto out_err; + + img_request->callback = rbd_img_parent_read_callback; + result = rbd_img_request_submit(img_request); + if (result) + goto out_err; + + return; +out_err: + if (img_request) + rbd_img_request_put(img_request); + obj_request->result = result; + obj_request->xferred = 0; + obj_request_done_set(obj_request); +} + +static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, u64 notify_id) +{ + struct rbd_obj_request *obj_request; + struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; int ret; obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, @@ -1777,17 +2608,15 @@ static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, return -ENOMEM; ret = -ENOMEM; - op = rbd_osd_req_op_create(CEPH_OSD_OP_NOTIFY_ACK, notify_id, ver); - if (!op) - goto out; - obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, - obj_request, op); - rbd_osd_req_op_destroy(op); + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request); if (!obj_request->osd_req) goto out; - - osdc = &rbd_dev->rbd_client->client->osdc; obj_request->callback = rbd_obj_request_put; + + osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_NOTIFY_ACK, + notify_id, 0, 0); + rbd_osd_req_format_read(obj_request); + ret = rbd_obj_request_submit(osdc, obj_request); out: if (ret) @@ -1799,21 +2628,16 @@ out: static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) { struct rbd_device *rbd_dev = (struct rbd_device *)data; - u64 hver; - int rc; if (!rbd_dev) return; dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__, - rbd_dev->header_name, (unsigned long long) notify_id, - (unsigned int) opcode); - rc = rbd_dev_refresh(rbd_dev, &hver); - if (rc) - rbd_warn(rbd_dev, "got notification but failed to " - " update snaps: %d\n", rc); + rbd_dev->header_name, (unsigned long long)notify_id, + (unsigned int)opcode); + (void)rbd_dev_refresh(rbd_dev); - rbd_obj_notify_ack(rbd_dev, hver, notify_id); + rbd_obj_notify_ack(rbd_dev, notify_id); } /* @@ -1824,7 +2648,6 @@ static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start) { struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; struct rbd_obj_request *obj_request; - struct ceph_osd_req_op *op; int ret; rbd_assert(start ^ !!rbd_dev->watch_event); @@ -1844,14 +2667,7 @@ static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start) if (!obj_request) goto out_cancel; - op = rbd_osd_req_op_create(CEPH_OSD_OP_WATCH, - rbd_dev->watch_event->cookie, - rbd_dev->header.obj_version, start); - if (!op) - goto out_cancel; - obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, - obj_request, op); - rbd_osd_req_op_destroy(op); + obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, obj_request); if (!obj_request->osd_req) goto out_cancel; @@ -1860,6 +2676,11 @@ static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start) else ceph_osdc_unregister_linger_request(osdc, rbd_dev->watch_request->osd_req); + + osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH, + rbd_dev->watch_event->cookie, 0, start); + rbd_osd_req_format_write(obj_request); + ret = rbd_obj_request_submit(osdc, obj_request); if (ret) goto out_cancel; @@ -1899,40 +2720,38 @@ out_cancel: } /* - * Synchronous osd object method call + * Synchronous osd object method call. Returns the number of bytes + * returned in the outbound buffer, or a negative error code. */ static int rbd_obj_method_sync(struct rbd_device *rbd_dev, const char *object_name, const char *class_name, const char *method_name, - const char *outbound, + const void *outbound, size_t outbound_size, - char *inbound, - size_t inbound_size, - u64 *version) + void *inbound, + size_t inbound_size) { + struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; struct rbd_obj_request *obj_request; - struct ceph_osd_client *osdc; - struct ceph_osd_req_op *op; struct page **pages; u32 page_count; int ret; /* - * Method calls are ultimately read operations but they - * don't involve object data (so no offset or length). - * The result should placed into the inbound buffer - * provided. They also supply outbound data--parameters for - * the object method. Currently if this is present it will - * be a snapshot id. + * Method calls are ultimately read operations. The result + * should placed into the inbound buffer provided. They + * also supply outbound data--parameters for the object + * method. Currently if this is present it will be a + * snapshot id. */ - page_count = (u32) calc_pages_for(0, inbound_size); + page_count = (u32)calc_pages_for(0, inbound_size); pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); if (IS_ERR(pages)) return PTR_ERR(pages); ret = -ENOMEM; - obj_request = rbd_obj_request_create(object_name, 0, 0, + obj_request = rbd_obj_request_create(object_name, 0, inbound_size, OBJ_REQUEST_PAGES); if (!obj_request) goto out; @@ -1940,17 +2759,29 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev, obj_request->pages = pages; obj_request->page_count = page_count; - op = rbd_osd_req_op_create(CEPH_OSD_OP_CALL, class_name, - method_name, outbound, outbound_size); - if (!op) - goto out; - obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, - obj_request, op); - rbd_osd_req_op_destroy(op); + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request); if (!obj_request->osd_req) goto out; - osdc = &rbd_dev->rbd_client->client->osdc; + osd_req_op_cls_init(obj_request->osd_req, 0, CEPH_OSD_OP_CALL, + class_name, method_name); + if (outbound_size) { + struct ceph_pagelist *pagelist; + + pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS); + if (!pagelist) + goto out; + + ceph_pagelist_init(pagelist); + ceph_pagelist_append(pagelist, outbound, outbound_size); + osd_req_op_cls_request_data_pagelist(obj_request->osd_req, 0, + pagelist); + } + osd_req_op_cls_response_data_pages(obj_request->osd_req, 0, + obj_request->pages, inbound_size, + 0, false, false); + rbd_osd_req_format_read(obj_request); + ret = rbd_obj_request_submit(osdc, obj_request); if (ret) goto out; @@ -1961,10 +2792,10 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev, ret = obj_request->result; if (ret < 0) goto out; - ret = 0; + + rbd_assert(obj_request->xferred < (u64)INT_MAX); + ret = (int)obj_request->xferred; ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred); - if (version) - *version = obj_request->version; out: if (obj_request) rbd_obj_request_put(obj_request); @@ -2034,18 +2865,22 @@ static void rbd_request_fn(struct request_queue *q) } result = -EINVAL; - if (WARN_ON(offset && length > U64_MAX - offset + 1)) + if (offset && length > U64_MAX - offset + 1) { + rbd_warn(rbd_dev, "bad request range (%llu~%llu)\n", + offset, length); goto end_request; /* Shouldn't happen */ + } result = -ENOMEM; img_request = rbd_img_request_create(rbd_dev, offset, length, - write_request); + write_request, false); if (!img_request) goto end_request; img_request->rq = rq; - result = rbd_img_request_fill_bio(img_request, rq->bio); + result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO, + rq->bio); if (!result) result = rbd_img_request_submit(img_request); if (result) @@ -2053,8 +2888,10 @@ static void rbd_request_fn(struct request_queue *q) end_request: spin_lock_irq(q->queue_lock); if (result < 0) { - rbd_warn(rbd_dev, "obj_request %s result %d\n", - write_request ? "write" : "read", result); + rbd_warn(rbd_dev, "%s %llx at %llx result %d\n", + write_request ? "write" : "read", + length, offset, result); + __blk_end_request_all(rq, result); } } @@ -2113,22 +2950,22 @@ static void rbd_free_disk(struct rbd_device *rbd_dev) if (!disk) return; - if (disk->flags & GENHD_FL_UP) + rbd_dev->disk = NULL; + if (disk->flags & GENHD_FL_UP) { del_gendisk(disk); - if (disk->queue) - blk_cleanup_queue(disk->queue); + if (disk->queue) + blk_cleanup_queue(disk->queue); + } put_disk(disk); } static int rbd_obj_read_sync(struct rbd_device *rbd_dev, const char *object_name, - u64 offset, u64 length, - char *buf, u64 *version) + u64 offset, u64 length, void *buf) { - struct ceph_osd_req_op *op; + struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; struct rbd_obj_request *obj_request; - struct ceph_osd_client *osdc; struct page **pages = NULL; u32 page_count; size_t size; @@ -2148,16 +2985,19 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev, obj_request->pages = pages; obj_request->page_count = page_count; - op = rbd_osd_req_op_create(CEPH_OSD_OP_READ, offset, length); - if (!op) - goto out; - obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, - obj_request, op); - rbd_osd_req_op_destroy(op); + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request); if (!obj_request->osd_req) goto out; - osdc = &rbd_dev->rbd_client->client->osdc; + osd_req_op_extent_init(obj_request->osd_req, 0, CEPH_OSD_OP_READ, + offset, length, 0, 0); + osd_req_op_extent_osd_data_pages(obj_request->osd_req, 0, + obj_request->pages, + obj_request->length, + obj_request->offset & ~PAGE_MASK, + false, false); + rbd_osd_req_format_read(obj_request); + ret = rbd_obj_request_submit(osdc, obj_request); if (ret) goto out; @@ -2172,10 +3012,8 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev, rbd_assert(obj_request->xferred <= (u64) SIZE_MAX); size = (size_t) obj_request->xferred; ceph_copy_from_page_vector(pages, buf, 0, size); - rbd_assert(size <= (size_t) INT_MAX); - ret = (int) size; - if (version) - *version = obj_request->version; + rbd_assert(size <= (size_t)INT_MAX); + ret = (int)size; out: if (obj_request) rbd_obj_request_put(obj_request); @@ -2196,7 +3034,7 @@ out: * Returns a pointer-coded errno if a failure occurs. */ static struct rbd_image_header_ondisk * -rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version) +rbd_dev_v1_header_read(struct rbd_device *rbd_dev) { struct rbd_image_header_ondisk *ondisk = NULL; u32 snap_count = 0; @@ -2224,11 +3062,10 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version) return ERR_PTR(-ENOMEM); ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name, - 0, size, - (char *) ondisk, version); + 0, size, ondisk); if (ret < 0) goto out_err; - if (WARN_ON((size_t) ret < size)) { + if ((size_t)ret < size) { ret = -ENXIO; rbd_warn(rbd_dev, "short header read (want %zd got %d)", size, ret); @@ -2260,46 +3097,36 @@ static int rbd_read_header(struct rbd_device *rbd_dev, struct rbd_image_header *header) { struct rbd_image_header_ondisk *ondisk; - u64 ver = 0; int ret; - ondisk = rbd_dev_v1_header_read(rbd_dev, &ver); + ondisk = rbd_dev_v1_header_read(rbd_dev); if (IS_ERR(ondisk)) return PTR_ERR(ondisk); ret = rbd_header_from_disk(header, ondisk); - if (ret >= 0) - header->obj_version = ver; kfree(ondisk); return ret; } -static void rbd_remove_all_snaps(struct rbd_device *rbd_dev) -{ - struct rbd_snap *snap; - struct rbd_snap *next; - - list_for_each_entry_safe(snap, next, &rbd_dev->snaps, node) - rbd_remove_snap_dev(snap); -} - static void rbd_update_mapping_size(struct rbd_device *rbd_dev) { - sector_t size; - if (rbd_dev->spec->snap_id != CEPH_NOSNAP) return; - size = (sector_t) rbd_dev->header.image_size / SECTOR_SIZE; - dout("setting size to %llu sectors", (unsigned long long) size); - rbd_dev->mapping.size = (u64) size; - set_capacity(rbd_dev->disk, size); + if (rbd_dev->mapping.size != rbd_dev->header.image_size) { + sector_t size; + + rbd_dev->mapping.size = rbd_dev->header.image_size; + size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; + dout("setting size to %llu sectors", (unsigned long long)size); + set_capacity(rbd_dev->disk, size); + } } /* * only read the first part of the ondisk header, without the snaps info */ -static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev, u64 *hver) +static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev) { int ret; struct rbd_image_header h; @@ -2320,37 +3147,61 @@ static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev, u64 *hver) /* osd requests may still refer to snapc */ ceph_put_snap_context(rbd_dev->header.snapc); - if (hver) - *hver = h.obj_version; - rbd_dev->header.obj_version = h.obj_version; rbd_dev->header.image_size = h.image_size; rbd_dev->header.snapc = h.snapc; rbd_dev->header.snap_names = h.snap_names; rbd_dev->header.snap_sizes = h.snap_sizes; /* Free the extra copy of the object prefix */ - WARN_ON(strcmp(rbd_dev->header.object_prefix, h.object_prefix)); + if (strcmp(rbd_dev->header.object_prefix, h.object_prefix)) + rbd_warn(rbd_dev, "object prefix changed (ignoring)"); kfree(h.object_prefix); - ret = rbd_dev_snaps_update(rbd_dev); - if (!ret) - ret = rbd_dev_snaps_register(rbd_dev); - up_write(&rbd_dev->header_rwsem); return ret; } -static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver) +/* + * Clear the rbd device's EXISTS flag if the snapshot it's mapped to + * has disappeared from the (just updated) snapshot context. + */ +static void rbd_exists_validate(struct rbd_device *rbd_dev) +{ + u64 snap_id; + + if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) + return; + + snap_id = rbd_dev->spec->snap_id; + if (snap_id == CEPH_NOSNAP) + return; + + if (rbd_dev_snap_index(rbd_dev, snap_id) == BAD_SNAP_INDEX) + clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); +} + +static int rbd_dev_refresh(struct rbd_device *rbd_dev) { + u64 image_size; int ret; rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + image_size = rbd_dev->header.image_size; mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); if (rbd_dev->image_format == 1) - ret = rbd_dev_v1_refresh(rbd_dev, hver); + ret = rbd_dev_v1_refresh(rbd_dev); else - ret = rbd_dev_v2_refresh(rbd_dev, hver); + ret = rbd_dev_v2_refresh(rbd_dev); + + /* If it's a mapped snapshot, validate its EXISTS flag */ + + rbd_exists_validate(rbd_dev); mutex_unlock(&ctl_mutex); + if (ret) + rbd_warn(rbd_dev, "got notification but failed to " + " update snaps: %d\n", ret); + if (image_size != rbd_dev->header.image_size) + revalidate_disk(rbd_dev->disk); return ret; } @@ -2394,8 +3245,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) rbd_dev->disk = disk; - set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); - return 0; out_disk: put_disk(disk); @@ -2416,13 +3265,9 @@ static ssize_t rbd_size_show(struct device *dev, struct device_attribute *attr, char *buf) { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - sector_t size; - down_read(&rbd_dev->header_rwsem); - size = get_capacity(rbd_dev->disk); - up_read(&rbd_dev->header_rwsem); - - return sprintf(buf, "%llu\n", (unsigned long long) size * SECTOR_SIZE); + return sprintf(buf, "%llu\n", + (unsigned long long)rbd_dev->mapping.size); } /* @@ -2435,7 +3280,7 @@ static ssize_t rbd_features_show(struct device *dev, struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); return sprintf(buf, "0x%016llx\n", - (unsigned long long) rbd_dev->mapping.features); + (unsigned long long)rbd_dev->mapping.features); } static ssize_t rbd_major_show(struct device *dev, @@ -2443,7 +3288,11 @@ static ssize_t rbd_major_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - return sprintf(buf, "%d\n", rbd_dev->major); + if (rbd_dev->major) + return sprintf(buf, "%d\n", rbd_dev->major); + + return sprintf(buf, "(none)\n"); + } static ssize_t rbd_client_id_show(struct device *dev, @@ -2469,7 +3318,7 @@ static ssize_t rbd_pool_id_show(struct device *dev, struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); return sprintf(buf, "%llu\n", - (unsigned long long) rbd_dev->spec->pool_id); + (unsigned long long) rbd_dev->spec->pool_id); } static ssize_t rbd_name_show(struct device *dev, @@ -2555,7 +3404,7 @@ static ssize_t rbd_image_refresh(struct device *dev, struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); int ret; - ret = rbd_dev_refresh(rbd_dev, NULL); + ret = rbd_dev_refresh(rbd_dev); return ret < 0 ? ret : size; } @@ -2606,71 +3455,6 @@ static struct device_type rbd_device_type = { .release = rbd_sysfs_dev_release, }; - -/* - sysfs - snapshots -*/ - -static ssize_t rbd_snap_size_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); - - return sprintf(buf, "%llu\n", (unsigned long long)snap->size); -} - -static ssize_t rbd_snap_id_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); - - return sprintf(buf, "%llu\n", (unsigned long long)snap->id); -} - -static ssize_t rbd_snap_features_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); - - return sprintf(buf, "0x%016llx\n", - (unsigned long long) snap->features); -} - -static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL); -static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL); -static DEVICE_ATTR(snap_features, S_IRUGO, rbd_snap_features_show, NULL); - -static struct attribute *rbd_snap_attrs[] = { - &dev_attr_snap_size.attr, - &dev_attr_snap_id.attr, - &dev_attr_snap_features.attr, - NULL, -}; - -static struct attribute_group rbd_snap_attr_group = { - .attrs = rbd_snap_attrs, -}; - -static void rbd_snap_dev_release(struct device *dev) -{ - struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev); - kfree(snap->name); - kfree(snap); -} - -static const struct attribute_group *rbd_snap_attr_groups[] = { - &rbd_snap_attr_group, - NULL -}; - -static struct device_type rbd_snap_device_type = { - .groups = rbd_snap_attr_groups, - .release = rbd_snap_dev_release, -}; - static struct rbd_spec *rbd_spec_get(struct rbd_spec *spec) { kref_get(&spec->kref); @@ -2694,8 +3478,6 @@ static struct rbd_spec *rbd_spec_alloc(void) return NULL; kref_init(&spec->kref); - rbd_spec_put(rbd_spec_get(spec)); /* TEMPORARY */ - return spec; } @@ -2722,7 +3504,6 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, spin_lock_init(&rbd_dev->lock); rbd_dev->flags = 0; INIT_LIST_HEAD(&rbd_dev->node); - INIT_LIST_HEAD(&rbd_dev->snaps); init_rwsem(&rbd_dev->header_rwsem); rbd_dev->spec = spec; @@ -2740,96 +3521,11 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, static void rbd_dev_destroy(struct rbd_device *rbd_dev) { - rbd_spec_put(rbd_dev->parent_spec); - kfree(rbd_dev->header_name); rbd_put_client(rbd_dev->rbd_client); rbd_spec_put(rbd_dev->spec); kfree(rbd_dev); } -static bool rbd_snap_registered(struct rbd_snap *snap) -{ - bool ret = snap->dev.type == &rbd_snap_device_type; - bool reg = device_is_registered(&snap->dev); - - rbd_assert(!ret ^ reg); - - return ret; -} - -static void rbd_remove_snap_dev(struct rbd_snap *snap) -{ - list_del(&snap->node); - if (device_is_registered(&snap->dev)) - device_unregister(&snap->dev); -} - -static int rbd_register_snap_dev(struct rbd_snap *snap, - struct device *parent) -{ - struct device *dev = &snap->dev; - int ret; - - dev->type = &rbd_snap_device_type; - dev->parent = parent; - dev->release = rbd_snap_dev_release; - dev_set_name(dev, "%s%s", RBD_SNAP_DEV_NAME_PREFIX, snap->name); - dout("%s: registering device for snapshot %s\n", __func__, snap->name); - - ret = device_register(dev); - - return ret; -} - -static struct rbd_snap *__rbd_add_snap_dev(struct rbd_device *rbd_dev, - const char *snap_name, - u64 snap_id, u64 snap_size, - u64 snap_features) -{ - struct rbd_snap *snap; - int ret; - - snap = kzalloc(sizeof (*snap), GFP_KERNEL); - if (!snap) - return ERR_PTR(-ENOMEM); - - ret = -ENOMEM; - snap->name = kstrdup(snap_name, GFP_KERNEL); - if (!snap->name) - goto err; - - snap->id = snap_id; - snap->size = snap_size; - snap->features = snap_features; - - return snap; - -err: - kfree(snap->name); - kfree(snap); - - return ERR_PTR(ret); -} - -static char *rbd_dev_v1_snap_info(struct rbd_device *rbd_dev, u32 which, - u64 *snap_size, u64 *snap_features) -{ - char *snap_name; - - rbd_assert(which < rbd_dev->header.snapc->num_snaps); - - *snap_size = rbd_dev->header.snap_sizes[which]; - *snap_features = 0; /* No features for v1 */ - - /* Skip over names until we find the one we are looking for */ - - snap_name = rbd_dev->header.snap_names; - while (which--) - snap_name += strlen(snap_name) + 1; - - return snap_name; -} - /* * Get the size and object order for an image snapshot, or if * snap_id is CEPH_NOSNAP, gets this information for the base @@ -2847,18 +3543,21 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_size", - (char *) &snapid, sizeof (snapid), - (char *) &size_buf, sizeof (size_buf), NULL); + &snapid, sizeof (snapid), + &size_buf, sizeof (size_buf)); dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; + if (ret < sizeof (size_buf)) + return -ERANGE; - *order = size_buf.order; + if (order) + *order = size_buf.order; *snap_size = le64_to_cpu(size_buf.size); dout(" snap_id 0x%016llx order = %u, snap_size = %llu\n", - (unsigned long long) snap_id, (unsigned int) *order, - (unsigned long long) *snap_size); + (unsigned long long)snap_id, (unsigned int)*order, + (unsigned long long)*snap_size); return 0; } @@ -2881,17 +3580,16 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) return -ENOMEM; ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, - "rbd", "get_object_prefix", - NULL, 0, - reply_buf, RBD_OBJ_PREFIX_LEN_MAX, NULL); + "rbd", "get_object_prefix", NULL, 0, + reply_buf, RBD_OBJ_PREFIX_LEN_MAX); dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; p = reply_buf; rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, - p + RBD_OBJ_PREFIX_LEN_MAX, - NULL, GFP_NOIO); + p + ret, NULL, GFP_NOIO); + ret = 0; if (IS_ERR(rbd_dev->header.object_prefix)) { ret = PTR_ERR(rbd_dev->header.object_prefix); @@ -2899,7 +3597,6 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) } else { dout(" object_prefix = %s\n", rbd_dev->header.object_prefix); } - out: kfree(reply_buf); @@ -2913,29 +3610,30 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, struct { __le64 features; __le64 incompat; - } features_buf = { 0 }; + } __attribute__ ((packed)) features_buf = { 0 }; u64 incompat; int ret; ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_features", - (char *) &snapid, sizeof (snapid), - (char *) &features_buf, sizeof (features_buf), - NULL); + &snapid, sizeof (snapid), + &features_buf, sizeof (features_buf)); dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; + if (ret < sizeof (features_buf)) + return -ERANGE; incompat = le64_to_cpu(features_buf.incompat); - if (incompat & ~RBD_FEATURES_ALL) + if (incompat & ~RBD_FEATURES_SUPPORTED) return -ENXIO; *snap_features = le64_to_cpu(features_buf.features); dout(" snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n", - (unsigned long long) snap_id, - (unsigned long long) *snap_features, - (unsigned long long) le64_to_cpu(features_buf.incompat)); + (unsigned long long)snap_id, + (unsigned long long)*snap_features, + (unsigned long long)le64_to_cpu(features_buf.incompat)); return 0; } @@ -2975,15 +3673,15 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) snapid = cpu_to_le64(CEPH_NOSNAP); ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_parent", - (char *) &snapid, sizeof (snapid), - (char *) reply_buf, size, NULL); + &snapid, sizeof (snapid), + reply_buf, size); dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out_err; - ret = -ERANGE; p = reply_buf; - end = (char *) reply_buf + size; + end = reply_buf + ret; + ret = -ERANGE; ceph_decode_64_safe(&p, end, parent_spec->pool_id, out_err); if (parent_spec->pool_id == CEPH_NOPOOL) goto out; /* No parent? No problem. */ @@ -2991,8 +3689,11 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) /* The ceph file layout needs to fit pool id in 32 bits */ ret = -EIO; - if (WARN_ON(parent_spec->pool_id > (u64) U32_MAX)) - goto out; + if (parent_spec->pool_id > (u64)U32_MAX) { + rbd_warn(NULL, "parent pool id too large (%llu > %u)\n", + (unsigned long long)parent_spec->pool_id, U32_MAX); + goto out_err; + } image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); if (IS_ERR(image_id)) { @@ -3015,6 +3716,56 @@ out_err: return ret; } +static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev) +{ + struct { + __le64 stripe_unit; + __le64 stripe_count; + } __attribute__ ((packed)) striping_info_buf = { 0 }; + size_t size = sizeof (striping_info_buf); + void *p; + u64 obj_size; + u64 stripe_unit; + u64 stripe_count; + int ret; + + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, + "rbd", "get_stripe_unit_count", NULL, 0, + (char *)&striping_info_buf, size); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); + if (ret < 0) + return ret; + if (ret < size) + return -ERANGE; + + /* + * We don't actually support the "fancy striping" feature + * (STRIPINGV2) yet, but if the striping sizes are the + * defaults the behavior is the same as before. So find + * out, and only fail if the image has non-default values. + */ + ret = -EINVAL; + obj_size = (u64)1 << rbd_dev->header.obj_order; + p = &striping_info_buf; + stripe_unit = ceph_decode_64(&p); + if (stripe_unit != obj_size) { + rbd_warn(rbd_dev, "unsupported stripe unit " + "(got %llu want %llu)", + stripe_unit, obj_size); + return -EINVAL; + } + stripe_count = ceph_decode_64(&p); + if (stripe_count != 1) { + rbd_warn(rbd_dev, "unsupported stripe count " + "(got %llu want 1)", stripe_count); + return -EINVAL; + } + rbd_dev->header.stripe_unit = stripe_unit; + rbd_dev->header.stripe_count = stripe_count; + + return 0; +} + static char *rbd_dev_image_name(struct rbd_device *rbd_dev) { size_t image_id_size; @@ -3036,8 +3787,8 @@ static char *rbd_dev_image_name(struct rbd_device *rbd_dev) return NULL; p = image_id; - end = (char *) image_id + image_id_size; - ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32) len); + end = image_id + image_id_size; + ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32)len); size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX; reply_buf = kmalloc(size, GFP_KERNEL); @@ -3047,11 +3798,12 @@ static char *rbd_dev_image_name(struct rbd_device *rbd_dev) ret = rbd_obj_method_sync(rbd_dev, RBD_DIRECTORY, "rbd", "dir_get_name", image_id, image_id_size, - (char *) reply_buf, size, NULL); + reply_buf, size); if (ret < 0) goto out; p = reply_buf; - end = (char *) reply_buf + size; + end = reply_buf + ret; + image_name = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL); if (IS_ERR(image_name)) image_name = NULL; @@ -3064,69 +3816,134 @@ out: return image_name; } +static u64 rbd_v1_snap_id_by_name(struct rbd_device *rbd_dev, const char *name) +{ + struct ceph_snap_context *snapc = rbd_dev->header.snapc; + const char *snap_name; + u32 which = 0; + + /* Skip over names until we find the one we are looking for */ + + snap_name = rbd_dev->header.snap_names; + while (which < snapc->num_snaps) { + if (!strcmp(name, snap_name)) + return snapc->snaps[which]; + snap_name += strlen(snap_name) + 1; + which++; + } + return CEPH_NOSNAP; +} + +static u64 rbd_v2_snap_id_by_name(struct rbd_device *rbd_dev, const char *name) +{ + struct ceph_snap_context *snapc = rbd_dev->header.snapc; + u32 which; + bool found = false; + u64 snap_id; + + for (which = 0; !found && which < snapc->num_snaps; which++) { + const char *snap_name; + + snap_id = snapc->snaps[which]; + snap_name = rbd_dev_v2_snap_name(rbd_dev, snap_id); + if (IS_ERR(snap_name)) + break; + found = !strcmp(name, snap_name); + kfree(snap_name); + } + return found ? snap_id : CEPH_NOSNAP; +} + /* - * When a parent image gets probed, we only have the pool, image, - * and snapshot ids but not the names of any of them. This call - * is made later to fill in those names. It has to be done after - * rbd_dev_snaps_update() has completed because some of the - * information (in particular, snapshot name) is not available - * until then. + * Assumes name is never RBD_SNAP_HEAD_NAME; returns CEPH_NOSNAP if + * no snapshot by that name is found, or if an error occurs. */ -static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) +static u64 rbd_snap_id_by_name(struct rbd_device *rbd_dev, const char *name) { - struct ceph_osd_client *osdc; - const char *name; - void *reply_buf = NULL; + if (rbd_dev->image_format == 1) + return rbd_v1_snap_id_by_name(rbd_dev, name); + + return rbd_v2_snap_id_by_name(rbd_dev, name); +} + +/* + * When an rbd image has a parent image, it is identified by the + * pool, image, and snapshot ids (not names). This function fills + * in the names for those ids. (It's OK if we can't figure out the + * name for an image id, but the pool and snapshot ids should always + * exist and have names.) All names in an rbd spec are dynamically + * allocated. + * + * When an image being mapped (not a parent) is probed, we have the + * pool name and pool id, image name and image id, and the snapshot + * name. The only thing we're missing is the snapshot id. + */ +static int rbd_dev_spec_update(struct rbd_device *rbd_dev) +{ + struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct rbd_spec *spec = rbd_dev->spec; + const char *pool_name; + const char *image_name; + const char *snap_name; int ret; - if (rbd_dev->spec->pool_name) - return 0; /* Already have the names */ + /* + * An image being mapped will have the pool name (etc.), but + * we need to look up the snapshot id. + */ + if (spec->pool_name) { + if (strcmp(spec->snap_name, RBD_SNAP_HEAD_NAME)) { + u64 snap_id; + + snap_id = rbd_snap_id_by_name(rbd_dev, spec->snap_name); + if (snap_id == CEPH_NOSNAP) + return -ENOENT; + spec->snap_id = snap_id; + } else { + spec->snap_id = CEPH_NOSNAP; + } - /* Look up the pool name */ + return 0; + } - osdc = &rbd_dev->rbd_client->client->osdc; - name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id); - if (!name) { - rbd_warn(rbd_dev, "there is no pool with id %llu", - rbd_dev->spec->pool_id); /* Really a BUG() */ + /* Get the pool name; we have to make our own copy of this */ + + pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, spec->pool_id); + if (!pool_name) { + rbd_warn(rbd_dev, "no pool with id %llu", spec->pool_id); return -EIO; } - - rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL); - if (!rbd_dev->spec->pool_name) + pool_name = kstrdup(pool_name, GFP_KERNEL); + if (!pool_name) return -ENOMEM; /* Fetch the image name; tolerate failure here */ - name = rbd_dev_image_name(rbd_dev); - if (name) - rbd_dev->spec->image_name = (char *) name; - else + image_name = rbd_dev_image_name(rbd_dev); + if (!image_name) rbd_warn(rbd_dev, "unable to get image name"); - /* Look up the snapshot name. */ + /* Look up the snapshot name, and make a copy */ - name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id); - if (!name) { - rbd_warn(rbd_dev, "no snapshot with id %llu", - rbd_dev->spec->snap_id); /* Really a BUG() */ - ret = -EIO; + snap_name = rbd_snap_name(rbd_dev, spec->snap_id); + if (!snap_name) { + ret = -ENOMEM; goto out_err; } - rbd_dev->spec->snap_name = kstrdup(name, GFP_KERNEL); - if(!rbd_dev->spec->snap_name) - goto out_err; + + spec->pool_name = pool_name; + spec->image_name = image_name; + spec->snap_name = snap_name; return 0; out_err: - kfree(reply_buf); - kfree(rbd_dev->spec->pool_name); - rbd_dev->spec->pool_name = NULL; + kfree(image_name); + kfree(pool_name); return ret; } -static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver) +static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev) { size_t size; int ret; @@ -3151,16 +3968,15 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver) return -ENOMEM; ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, - "rbd", "get_snapcontext", - NULL, 0, - reply_buf, size, ver); + "rbd", "get_snapcontext", NULL, 0, + reply_buf, size); dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; - ret = -ERANGE; p = reply_buf; - end = (char *) reply_buf + size; + end = reply_buf + ret; + ret = -ERANGE; ceph_decode_64_safe(&p, end, seq, out); ceph_decode_32_safe(&p, end, snap_count, out); @@ -3177,37 +3993,33 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver) } if (!ceph_has_room(&p, end, snap_count * sizeof (__le64))) goto out; + ret = 0; - size = sizeof (struct ceph_snap_context) + - snap_count * sizeof (snapc->snaps[0]); - snapc = kmalloc(size, GFP_KERNEL); + snapc = ceph_create_snap_context(snap_count, GFP_KERNEL); if (!snapc) { ret = -ENOMEM; goto out; } - - atomic_set(&snapc->nref, 1); snapc->seq = seq; - snapc->num_snaps = snap_count; for (i = 0; i < snap_count; i++) snapc->snaps[i] = ceph_decode_64(&p); rbd_dev->header.snapc = snapc; dout(" snap context seq = %llu, snap_count = %u\n", - (unsigned long long) seq, (unsigned int) snap_count); - + (unsigned long long)seq, (unsigned int)snap_count); out: kfree(reply_buf); - return 0; + return ret; } -static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which) +static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, + u64 snap_id) { size_t size; void *reply_buf; - __le64 snap_id; + __le64 snapid; int ret; void *p; void *end; @@ -3218,236 +4030,52 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which) if (!reply_buf) return ERR_PTR(-ENOMEM); - snap_id = cpu_to_le64(rbd_dev->header.snapc->snaps[which]); + snapid = cpu_to_le64(snap_id); ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_snapshot_name", - (char *) &snap_id, sizeof (snap_id), - reply_buf, size, NULL); + &snapid, sizeof (snapid), + reply_buf, size); dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); - if (ret < 0) + if (ret < 0) { + snap_name = ERR_PTR(ret); goto out; + } p = reply_buf; - end = (char *) reply_buf + size; + end = reply_buf + ret; snap_name = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); - if (IS_ERR(snap_name)) { - ret = PTR_ERR(snap_name); + if (IS_ERR(snap_name)) goto out; - } else { - dout(" snap_id 0x%016llx snap_name = %s\n", - (unsigned long long) le64_to_cpu(snap_id), snap_name); - } - kfree(reply_buf); - return snap_name; + dout(" snap_id 0x%016llx snap_name = %s\n", + (unsigned long long)snap_id, snap_name); out: kfree(reply_buf); - return ERR_PTR(ret); -} - -static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which, - u64 *snap_size, u64 *snap_features) -{ - u64 snap_id; - u8 order; - int ret; - - snap_id = rbd_dev->header.snapc->snaps[which]; - ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, &order, snap_size); - if (ret) - return ERR_PTR(ret); - ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, snap_features); - if (ret) - return ERR_PTR(ret); - - return rbd_dev_v2_snap_name(rbd_dev, which); -} - -static char *rbd_dev_snap_info(struct rbd_device *rbd_dev, u32 which, - u64 *snap_size, u64 *snap_features) -{ - if (rbd_dev->image_format == 1) - return rbd_dev_v1_snap_info(rbd_dev, which, - snap_size, snap_features); - if (rbd_dev->image_format == 2) - return rbd_dev_v2_snap_info(rbd_dev, which, - snap_size, snap_features); - return ERR_PTR(-EINVAL); + return snap_name; } -static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver) +static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev) { int ret; - __u8 obj_order; down_write(&rbd_dev->header_rwsem); - /* Grab old order first, to see if it changes */ - - obj_order = rbd_dev->header.obj_order, ret = rbd_dev_v2_image_size(rbd_dev); if (ret) goto out; - if (rbd_dev->header.obj_order != obj_order) { - ret = -EIO; - goto out; - } rbd_update_mapping_size(rbd_dev); - ret = rbd_dev_v2_snap_context(rbd_dev, hver); + ret = rbd_dev_v2_snap_context(rbd_dev); dout("rbd_dev_v2_snap_context returned %d\n", ret); if (ret) goto out; - ret = rbd_dev_snaps_update(rbd_dev); - dout("rbd_dev_snaps_update returned %d\n", ret); - if (ret) - goto out; - ret = rbd_dev_snaps_register(rbd_dev); - dout("rbd_dev_snaps_register returned %d\n", ret); out: up_write(&rbd_dev->header_rwsem); return ret; } -/* - * Scan the rbd device's current snapshot list and compare it to the - * newly-received snapshot context. Remove any existing snapshots - * not present in the new snapshot context. Add a new snapshot for - * any snaphots in the snapshot context not in the current list. - * And verify there are no changes to snapshots we already know - * about. - * - * Assumes the snapshots in the snapshot context are sorted by - * snapshot id, highest id first. (Snapshots in the rbd_dev's list - * are also maintained in that order.) - */ -static int rbd_dev_snaps_update(struct rbd_device *rbd_dev) -{ - struct ceph_snap_context *snapc = rbd_dev->header.snapc; - const u32 snap_count = snapc->num_snaps; - struct list_head *head = &rbd_dev->snaps; - struct list_head *links = head->next; - u32 index = 0; - - dout("%s: snap count is %u\n", __func__, (unsigned int) snap_count); - while (index < snap_count || links != head) { - u64 snap_id; - struct rbd_snap *snap; - char *snap_name; - u64 snap_size = 0; - u64 snap_features = 0; - - snap_id = index < snap_count ? snapc->snaps[index] - : CEPH_NOSNAP; - snap = links != head ? list_entry(links, struct rbd_snap, node) - : NULL; - rbd_assert(!snap || snap->id != CEPH_NOSNAP); - - if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) { - struct list_head *next = links->next; - - /* - * A previously-existing snapshot is not in - * the new snap context. - * - * If the now missing snapshot is the one the - * image is mapped to, clear its exists flag - * so we can avoid sending any more requests - * to it. - */ - if (rbd_dev->spec->snap_id == snap->id) - clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); - rbd_remove_snap_dev(snap); - dout("%ssnap id %llu has been removed\n", - rbd_dev->spec->snap_id == snap->id ? - "mapped " : "", - (unsigned long long) snap->id); - - /* Done with this list entry; advance */ - - links = next; - continue; - } - - snap_name = rbd_dev_snap_info(rbd_dev, index, - &snap_size, &snap_features); - if (IS_ERR(snap_name)) - return PTR_ERR(snap_name); - - dout("entry %u: snap_id = %llu\n", (unsigned int) snap_count, - (unsigned long long) snap_id); - if (!snap || (snap_id != CEPH_NOSNAP && snap->id < snap_id)) { - struct rbd_snap *new_snap; - - /* We haven't seen this snapshot before */ - - new_snap = __rbd_add_snap_dev(rbd_dev, snap_name, - snap_id, snap_size, snap_features); - if (IS_ERR(new_snap)) { - int err = PTR_ERR(new_snap); - - dout(" failed to add dev, error %d\n", err); - - return err; - } - - /* New goes before existing, or at end of list */ - - dout(" added dev%s\n", snap ? "" : " at end\n"); - if (snap) - list_add_tail(&new_snap->node, &snap->node); - else - list_add_tail(&new_snap->node, head); - } else { - /* Already have this one */ - - dout(" already present\n"); - - rbd_assert(snap->size == snap_size); - rbd_assert(!strcmp(snap->name, snap_name)); - rbd_assert(snap->features == snap_features); - - /* Done with this list entry; advance */ - - links = links->next; - } - - /* Advance to the next entry in the snapshot context */ - - index++; - } - dout("%s: done\n", __func__); - - return 0; -} - -/* - * Scan the list of snapshots and register the devices for any that - * have not already been registered. - */ -static int rbd_dev_snaps_register(struct rbd_device *rbd_dev) -{ - struct rbd_snap *snap; - int ret = 0; - - dout("%s:\n", __func__); - if (WARN_ON(!device_is_registered(&rbd_dev->dev))) - return -EIO; - - list_for_each_entry(snap, &rbd_dev->snaps, node) { - if (!rbd_snap_registered(snap)) { - ret = rbd_register_snap_dev(snap, &rbd_dev->dev); - if (ret < 0) - break; - } - } - dout("%s: returning %d\n", __func__, ret); - - return ret; -} - static int rbd_bus_add_dev(struct rbd_device *rbd_dev) { struct device *dev; @@ -3459,7 +4087,7 @@ static int rbd_bus_add_dev(struct rbd_device *rbd_dev) dev->bus = &rbd_bus_type; dev->type = &rbd_device_type; dev->parent = &rbd_root_dev; - dev->release = rbd_dev_release; + dev->release = rbd_dev_device_release; dev_set_name(dev, "%d", rbd_dev->dev_id); ret = device_register(dev); @@ -3673,6 +4301,7 @@ static int rbd_add_parse_args(const char *buf, size_t len; char *options; const char *mon_addrs; + char *snap_name; size_t mon_addrs_size; struct rbd_spec *spec = NULL; struct rbd_options *rbd_opts = NULL; @@ -3731,10 +4360,11 @@ static int rbd_add_parse_args(const char *buf, ret = -ENAMETOOLONG; goto out_err; } - spec->snap_name = kmemdup(buf, len + 1, GFP_KERNEL); - if (!spec->snap_name) + snap_name = kmemdup(buf, len + 1, GFP_KERNEL); + if (!snap_name) goto out_mem; - *(spec->snap_name + len) = '\0'; + *(snap_name + len) = '\0'; + spec->snap_name = snap_name; /* Initialize all rbd options to the defaults */ @@ -3788,15 +4418,19 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) size_t size; char *object_name; void *response; - void *p; + char *image_id; /* * When probing a parent image, the image id is already * known (and the image name likely is not). There's no - * need to fetch the image id again in this case. + * need to fetch the image id again in this case. We + * do still need to set the image format though. */ - if (rbd_dev->spec->image_id) + if (rbd_dev->spec->image_id) { + rbd_dev->image_format = *rbd_dev->spec->image_id ? 2 : 1; + return 0; + } /* * First, see if the format 2 image id file exists, and if @@ -3818,23 +4452,32 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) goto out; } + /* If it doesn't exist we'll assume it's a format 1 image */ + ret = rbd_obj_method_sync(rbd_dev, object_name, - "rbd", "get_id", - NULL, 0, - response, RBD_IMAGE_ID_LEN_MAX, NULL); + "rbd", "get_id", NULL, 0, + response, RBD_IMAGE_ID_LEN_MAX); dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); - if (ret < 0) - goto out; - - p = response; - rbd_dev->spec->image_id = ceph_extract_encoded_string(&p, - p + RBD_IMAGE_ID_LEN_MAX, + if (ret == -ENOENT) { + image_id = kstrdup("", GFP_KERNEL); + ret = image_id ? 0 : -ENOMEM; + if (!ret) + rbd_dev->image_format = 1; + } else if (ret > sizeof (__le32)) { + void *p = response; + + image_id = ceph_extract_encoded_string(&p, p + ret, NULL, GFP_NOIO); - if (IS_ERR(rbd_dev->spec->image_id)) { - ret = PTR_ERR(rbd_dev->spec->image_id); - rbd_dev->spec->image_id = NULL; + ret = IS_ERR(image_id) ? PTR_ERR(image_id) : 0; + if (!ret) + rbd_dev->image_format = 2; } else { - dout("image_id is %s\n", rbd_dev->spec->image_id); + ret = -EINVAL; + } + + if (!ret) { + rbd_dev->spec->image_id = image_id; + dout("image_id is %s\n", image_id); } out: kfree(response); @@ -3843,27 +4486,30 @@ out: return ret; } -static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) +/* Undo whatever state changes are made by v1 or v2 image probe */ + +static void rbd_dev_unprobe(struct rbd_device *rbd_dev) { - int ret; - size_t size; + struct rbd_image_header *header; - /* Version 1 images have no id; empty string is used */ + rbd_dev_remove_parent(rbd_dev); + rbd_spec_put(rbd_dev->parent_spec); + rbd_dev->parent_spec = NULL; + rbd_dev->parent_overlap = 0; - rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL); - if (!rbd_dev->spec->image_id) - return -ENOMEM; + /* Free dynamic fields from the header, then zero it out */ - /* Record the header object name for this rbd image. */ + header = &rbd_dev->header; + ceph_put_snap_context(header->snapc); + kfree(header->snap_sizes); + kfree(header->snap_names); + kfree(header->object_prefix); + memset(header, 0, sizeof (*header)); +} - size = strlen(rbd_dev->spec->image_name) + sizeof (RBD_SUFFIX); - rbd_dev->header_name = kmalloc(size, GFP_KERNEL); - if (!rbd_dev->header_name) { - ret = -ENOMEM; - goto out_err; - } - sprintf(rbd_dev->header_name, "%s%s", - rbd_dev->spec->image_name, RBD_SUFFIX); +static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) +{ + int ret; /* Populate rbd image metadata */ @@ -3876,8 +4522,6 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) rbd_dev->parent_spec = NULL; rbd_dev->parent_overlap = 0; - rbd_dev->image_format = 1; - dout("discovered version 1 image, header name is %s\n", rbd_dev->header_name); @@ -3894,43 +4538,45 @@ out_err: static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) { - size_t size; int ret; - u64 ver = 0; - - /* - * Image id was filled in by the caller. Record the header - * object name for this rbd image. - */ - size = sizeof (RBD_HEADER_PREFIX) + strlen(rbd_dev->spec->image_id); - rbd_dev->header_name = kmalloc(size, GFP_KERNEL); - if (!rbd_dev->header_name) - return -ENOMEM; - sprintf(rbd_dev->header_name, "%s%s", - RBD_HEADER_PREFIX, rbd_dev->spec->image_id); - - /* Get the size and object order for the image */ ret = rbd_dev_v2_image_size(rbd_dev); - if (ret < 0) + if (ret) goto out_err; /* Get the object prefix (a.k.a. block_name) for the image */ ret = rbd_dev_v2_object_prefix(rbd_dev); - if (ret < 0) + if (ret) goto out_err; /* Get the and check features for the image */ ret = rbd_dev_v2_features(rbd_dev); - if (ret < 0) + if (ret) goto out_err; /* If the image supports layering, get the parent info */ if (rbd_dev->header.features & RBD_FEATURE_LAYERING) { ret = rbd_dev_v2_parent_info(rbd_dev); + if (ret) + goto out_err; + + /* + * Don't print a warning for parent images. We can + * tell this point because we won't know its pool + * name yet (just its pool id). + */ + if (rbd_dev->spec->pool_name) + rbd_warn(rbd_dev, "WARNING: kernel layering " + "is EXPERIMENTAL!"); + } + + /* If the image supports fancy striping, get its parameters */ + + if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) { + ret = rbd_dev_v2_striping_info(rbd_dev); if (ret < 0) goto out_err; } @@ -3942,12 +4588,9 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) /* Get the snapshot context, plus the header version */ - ret = rbd_dev_v2_snap_context(rbd_dev, &ver); + ret = rbd_dev_v2_snap_context(rbd_dev); if (ret) goto out_err; - rbd_dev->header.obj_version = ver; - - rbd_dev->image_format = 2; dout("discovered version 2 image, header name is %s\n", rbd_dev->header_name); @@ -3965,22 +4608,54 @@ out_err: return ret; } -static int rbd_dev_probe_finish(struct rbd_device *rbd_dev) +static int rbd_dev_probe_parent(struct rbd_device *rbd_dev) { + struct rbd_device *parent = NULL; + struct rbd_spec *parent_spec; + struct rbd_client *rbdc; int ret; - /* no need to lock here, as rbd_dev is not registered yet */ - ret = rbd_dev_snaps_update(rbd_dev); - if (ret) - return ret; + if (!rbd_dev->parent_spec) + return 0; + /* + * We need to pass a reference to the client and the parent + * spec when creating the parent rbd_dev. Images related by + * parent/child relationships always share both. + */ + parent_spec = rbd_spec_get(rbd_dev->parent_spec); + rbdc = __rbd_get_client(rbd_dev->rbd_client); - ret = rbd_dev_probe_update_spec(rbd_dev); - if (ret) - goto err_out_snaps; + ret = -ENOMEM; + parent = rbd_dev_create(rbdc, parent_spec); + if (!parent) + goto out_err; + + ret = rbd_dev_image_probe(parent); + if (ret < 0) + goto out_err; + rbd_dev->parent = parent; + + return 0; +out_err: + if (parent) { + rbd_spec_put(rbd_dev->parent_spec); + kfree(rbd_dev->header_name); + rbd_dev_destroy(parent); + } else { + rbd_put_client(rbdc); + rbd_spec_put(parent_spec); + } + + return ret; +} + +static int rbd_dev_device_setup(struct rbd_device *rbd_dev) +{ + int ret; - ret = rbd_dev_set_mapping(rbd_dev); + ret = rbd_dev_mapping_set(rbd_dev); if (ret) - goto err_out_snaps; + return ret; /* generate unique id: find highest unique id, add one */ rbd_dev_id_get(rbd_dev); @@ -4007,54 +4682,81 @@ static int rbd_dev_probe_finish(struct rbd_device *rbd_dev) if (ret) goto err_out_disk; - /* - * At this point cleanup in the event of an error is the job - * of the sysfs code (initiated by rbd_bus_del_dev()). - */ - down_write(&rbd_dev->header_rwsem); - ret = rbd_dev_snaps_register(rbd_dev); - up_write(&rbd_dev->header_rwsem); - if (ret) - goto err_out_bus; - - ret = rbd_dev_header_watch_sync(rbd_dev, 1); - if (ret) - goto err_out_bus; - /* Everything's ready. Announce the disk to the world. */ + set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); + set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); add_disk(rbd_dev->disk); pr_info("%s: added with size 0x%llx\n", rbd_dev->disk->disk_name, (unsigned long long) rbd_dev->mapping.size); return ret; -err_out_bus: - /* this will also clean up rest of rbd_dev stuff */ - rbd_bus_del_dev(rbd_dev); - - return ret; err_out_disk: rbd_free_disk(rbd_dev); err_out_blkdev: unregister_blkdev(rbd_dev->major, rbd_dev->name); err_out_id: rbd_dev_id_put(rbd_dev); -err_out_snaps: - rbd_remove_all_snaps(rbd_dev); + rbd_dev_mapping_clear(rbd_dev); return ret; } +static int rbd_dev_header_name(struct rbd_device *rbd_dev) +{ + struct rbd_spec *spec = rbd_dev->spec; + size_t size; + + /* Record the header object name for this rbd image. */ + + rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + + if (rbd_dev->image_format == 1) + size = strlen(spec->image_name) + sizeof (RBD_SUFFIX); + else + size = sizeof (RBD_HEADER_PREFIX) + strlen(spec->image_id); + + rbd_dev->header_name = kmalloc(size, GFP_KERNEL); + if (!rbd_dev->header_name) + return -ENOMEM; + + if (rbd_dev->image_format == 1) + sprintf(rbd_dev->header_name, "%s%s", + spec->image_name, RBD_SUFFIX); + else + sprintf(rbd_dev->header_name, "%s%s", + RBD_HEADER_PREFIX, spec->image_id); + return 0; +} + +static void rbd_dev_image_release(struct rbd_device *rbd_dev) +{ + int ret; + + rbd_dev_unprobe(rbd_dev); + ret = rbd_dev_header_watch_sync(rbd_dev, 0); + if (ret) + rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret); + kfree(rbd_dev->header_name); + rbd_dev->header_name = NULL; + rbd_dev->image_format = 0; + kfree(rbd_dev->spec->image_id); + rbd_dev->spec->image_id = NULL; + + rbd_dev_destroy(rbd_dev); +} + /* * Probe for the existence of the header object for the given rbd * device. For format 2 images this includes determining the image * id. */ -static int rbd_dev_probe(struct rbd_device *rbd_dev) +static int rbd_dev_image_probe(struct rbd_device *rbd_dev) { int ret; + int tmp; /* * Get the id from the image id object. If it's not a @@ -4063,18 +4765,48 @@ static int rbd_dev_probe(struct rbd_device *rbd_dev) */ ret = rbd_dev_image_id(rbd_dev); if (ret) + return ret; + rbd_assert(rbd_dev->spec->image_id); + rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + + ret = rbd_dev_header_name(rbd_dev); + if (ret) + goto err_out_format; + + ret = rbd_dev_header_watch_sync(rbd_dev, 1); + if (ret) + goto out_header_name; + + if (rbd_dev->image_format == 1) ret = rbd_dev_v1_probe(rbd_dev); else ret = rbd_dev_v2_probe(rbd_dev); - if (ret) { - dout("probe failed, returning %d\n", ret); - - return ret; - } + if (ret) + goto err_out_watch; - ret = rbd_dev_probe_finish(rbd_dev); + ret = rbd_dev_spec_update(rbd_dev); if (ret) - rbd_header_free(&rbd_dev->header); + goto err_out_probe; + + ret = rbd_dev_probe_parent(rbd_dev); + if (!ret) + return 0; + +err_out_probe: + rbd_dev_unprobe(rbd_dev); +err_out_watch: + tmp = rbd_dev_header_watch_sync(rbd_dev, 0); + if (tmp) + rbd_warn(rbd_dev, "unable to tear down watch request\n"); +out_header_name: + kfree(rbd_dev->header_name); + rbd_dev->header_name = NULL; +err_out_format: + rbd_dev->image_format = 0; + kfree(rbd_dev->spec->image_id); + rbd_dev->spec->image_id = NULL; + + dout("probe failed, returning %d\n", ret); return ret; } @@ -4111,11 +4843,13 @@ static ssize_t rbd_add(struct bus_type *bus, rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name); if (rc < 0) goto err_out_client; - spec->pool_id = (u64) rc; + spec->pool_id = (u64)rc; /* The ceph file layout needs to fit pool id in 32 bits */ - if (WARN_ON(spec->pool_id > (u64) U32_MAX)) { + if (spec->pool_id > (u64)U32_MAX) { + rbd_warn(NULL, "pool id too large (%llu > %u)\n", + (unsigned long long)spec->pool_id, U32_MAX); rc = -EIO; goto err_out_client; } @@ -4130,11 +4864,15 @@ static ssize_t rbd_add(struct bus_type *bus, kfree(rbd_opts); rbd_opts = NULL; /* done with this */ - rc = rbd_dev_probe(rbd_dev); + rc = rbd_dev_image_probe(rbd_dev); if (rc < 0) goto err_out_rbd_dev; - return count; + rc = rbd_dev_device_setup(rbd_dev); + if (!rc) + return count; + + rbd_dev_image_release(rbd_dev); err_out_rbd_dev: rbd_dev_destroy(rbd_dev); err_out_client: @@ -4149,7 +4887,7 @@ err_out_module: dout("Error adding device %s\n", buf); - return (ssize_t) rc; + return (ssize_t)rc; } static struct rbd_device *__rbd_get_dev(unsigned long dev_id) @@ -4169,27 +4907,43 @@ static struct rbd_device *__rbd_get_dev(unsigned long dev_id) return NULL; } -static void rbd_dev_release(struct device *dev) +static void rbd_dev_device_release(struct device *dev) { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - if (rbd_dev->watch_event) - rbd_dev_header_watch_sync(rbd_dev, 0); - - /* clean up and free blkdev */ rbd_free_disk(rbd_dev); + clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); + rbd_dev_clear_mapping(rbd_dev); unregister_blkdev(rbd_dev->major, rbd_dev->name); - - /* release allocated disk header fields */ - rbd_header_free(&rbd_dev->header); - - /* done with the id, and with the rbd_dev */ + rbd_dev->major = 0; rbd_dev_id_put(rbd_dev); - rbd_assert(rbd_dev->rbd_client != NULL); - rbd_dev_destroy(rbd_dev); + rbd_dev_mapping_clear(rbd_dev); +} - /* release module ref */ - module_put(THIS_MODULE); +static void rbd_dev_remove_parent(struct rbd_device *rbd_dev) +{ + while (rbd_dev->parent) { + struct rbd_device *first = rbd_dev; + struct rbd_device *second = first->parent; + struct rbd_device *third; + + /* + * Follow to the parent with no grandparent and + * remove it. + */ + while (second && (third = second->parent)) { + first = second; + second = third; + } + rbd_assert(second); + rbd_dev_image_release(second); + first->parent = NULL; + first->parent_overlap = 0; + + rbd_assert(first->parent_spec); + rbd_spec_put(first->parent_spec); + first->parent_spec = NULL; + } } static ssize_t rbd_remove(struct bus_type *bus, @@ -4197,13 +4951,13 @@ static ssize_t rbd_remove(struct bus_type *bus, size_t count) { struct rbd_device *rbd_dev = NULL; - int target_id, rc; + int target_id; unsigned long ul; - int ret = count; + int ret; - rc = strict_strtoul(buf, 10, &ul); - if (rc) - return rc; + ret = strict_strtoul(buf, 10, &ul); + if (ret) + return ret; /* convert to int; abort if we lost anything in the conversion */ target_id = (int) ul; @@ -4226,10 +4980,10 @@ static ssize_t rbd_remove(struct bus_type *bus, spin_unlock_irq(&rbd_dev->lock); if (ret < 0) goto done; - - rbd_remove_all_snaps(rbd_dev); + ret = count; rbd_bus_del_dev(rbd_dev); - + rbd_dev_image_release(rbd_dev); + module_put(THIS_MODULE); done: mutex_unlock(&ctl_mutex); @@ -4261,6 +5015,56 @@ static void rbd_sysfs_cleanup(void) device_unregister(&rbd_root_dev); } +static int rbd_slab_init(void) +{ + rbd_assert(!rbd_img_request_cache); + rbd_img_request_cache = kmem_cache_create("rbd_img_request", + sizeof (struct rbd_img_request), + __alignof__(struct rbd_img_request), + 0, NULL); + if (!rbd_img_request_cache) + return -ENOMEM; + + rbd_assert(!rbd_obj_request_cache); + rbd_obj_request_cache = kmem_cache_create("rbd_obj_request", + sizeof (struct rbd_obj_request), + __alignof__(struct rbd_obj_request), + 0, NULL); + if (!rbd_obj_request_cache) + goto out_err; + + rbd_assert(!rbd_segment_name_cache); + rbd_segment_name_cache = kmem_cache_create("rbd_segment_name", + MAX_OBJ_NAME_SIZE + 1, 1, 0, NULL); + if (rbd_segment_name_cache) + return 0; +out_err: + if (rbd_obj_request_cache) { + kmem_cache_destroy(rbd_obj_request_cache); + rbd_obj_request_cache = NULL; + } + + kmem_cache_destroy(rbd_img_request_cache); + rbd_img_request_cache = NULL; + + return -ENOMEM; +} + +static void rbd_slab_exit(void) +{ + rbd_assert(rbd_segment_name_cache); + kmem_cache_destroy(rbd_segment_name_cache); + rbd_segment_name_cache = NULL; + + rbd_assert(rbd_obj_request_cache); + kmem_cache_destroy(rbd_obj_request_cache); + rbd_obj_request_cache = NULL; + + rbd_assert(rbd_img_request_cache); + kmem_cache_destroy(rbd_img_request_cache); + rbd_img_request_cache = NULL; +} + static int __init rbd_init(void) { int rc; @@ -4270,16 +5074,22 @@ static int __init rbd_init(void) return -EINVAL; } - rc = rbd_sysfs_init(); + rc = rbd_slab_init(); if (rc) return rc; - pr_info("loaded " RBD_DRV_NAME_LONG "\n"); - return 0; + rc = rbd_sysfs_init(); + if (rc) + rbd_slab_exit(); + else + pr_info("loaded " RBD_DRV_NAME_LONG "\n"); + + return rc; } static void __exit rbd_exit(void) { rbd_sysfs_cleanup(); + rbd_slab_exit(); } module_init(rbd_init); diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index b166e30b3bc..ff7f0c87745 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -297,12 +297,21 @@ config GPIO_GE_FPGA config GPIO_LYNXPOINT bool "Intel Lynxpoint GPIO support" - depends on ACPI + depends on ACPI && X86 select IRQ_DOMAIN help driver for GPIO functionality on Intel Lynxpoint PCH chipset Requires ACPI device enumeration code to set up a platform device. +config GPIO_GRGPIO + tristate "Aeroflex Gaisler GRGPIO support" + depends on OF + select GPIO_GENERIC + select IRQ_DOMAIN + help + Select this to support Aeroflex Gaisler GRGPIO cores from the GRLIB + VHDL IP core library. + comment "I2C GPIO expanders:" config GPIO_ARIZONA diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index a274d7df3c8..6aab73d577d 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_ARCH_DAVINCI) += gpio-davinci.o obj-$(CONFIG_GPIO_EM) += gpio-em.o obj-$(CONFIG_GPIO_EP93XX) += gpio-ep93xx.o obj-$(CONFIG_GPIO_GE_FPGA) += gpio-ge.o +obj-$(CONFIG_GPIO_GRGPIO) += gpio-grgpio.o obj-$(CONFIG_GPIO_ICH) += gpio-ich.o obj-$(CONFIG_GPIO_IT8761E) += gpio-it8761e.o obj-$(CONFIG_GPIO_JANZ_TTL) += gpio-janz-ttl.o diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c index 464be961f60..721607904d0 100644 --- a/drivers/gpio/gpio-74x164.c +++ b/drivers/gpio/gpio-74x164.c @@ -137,7 +137,7 @@ static int gen_74x164_probe(struct spi_device *spi) mutex_init(&chip->lock); - dev_set_drvdata(&spi->dev, chip); + spi_set_drvdata(spi, chip); chip->spi = spi; @@ -176,7 +176,7 @@ static int gen_74x164_probe(struct spi_device *spi) return ret; exit_destroy: - dev_set_drvdata(&spi->dev, NULL); + spi_set_drvdata(spi, NULL); mutex_destroy(&chip->lock); return ret; } @@ -186,11 +186,11 @@ static int gen_74x164_remove(struct spi_device *spi) struct gen_74x164_chip *chip; int ret; - chip = dev_get_drvdata(&spi->dev); + chip = spi_get_drvdata(spi); if (chip == NULL) return -ENODEV; - dev_set_drvdata(&spi->dev, NULL); + spi_set_drvdata(spi, NULL); ret = gpiochip_remove(&chip->gpio_chip); if (!ret) diff --git a/drivers/gpio/gpio-adp5520.c b/drivers/gpio/gpio-adp5520.c index 8afa95f831b..f33f78dcada 100644 --- a/drivers/gpio/gpio-adp5520.c +++ b/drivers/gpio/gpio-adp5520.c @@ -105,7 +105,7 @@ static int adp5520_gpio_probe(struct platform_device *pdev) return -ENODEV; } - dev = kzalloc(sizeof(*dev), GFP_KERNEL); + dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL); if (dev == NULL) { dev_err(&pdev->dev, "failed to alloc memory\n"); return -ENOMEM; @@ -163,7 +163,6 @@ static int adp5520_gpio_probe(struct platform_device *pdev) return 0; err: - kfree(dev); return ret; } @@ -180,7 +179,6 @@ static int adp5520_gpio_remove(struct platform_device *pdev) return ret; } - kfree(dev); return 0; } diff --git a/drivers/gpio/gpio-em.c b/drivers/gpio/gpio-em.c index deca78f9931..5cba855638b 100644 --- a/drivers/gpio/gpio-em.c +++ b/drivers/gpio/gpio-em.c @@ -231,10 +231,12 @@ static int em_gio_irq_domain_map(struct irq_domain *h, unsigned int virq, static struct irq_domain_ops em_gio_irq_domain_ops = { .map = em_gio_irq_domain_map, + .xlate = irq_domain_xlate_twocell, }; static int em_gio_probe(struct platform_device *pdev) { + struct gpio_em_config pdata_dt; struct gpio_em_config *pdata = pdev->dev.platform_data; struct em_gio_priv *p; struct resource *io[2], *irq[2]; @@ -243,7 +245,7 @@ static int em_gio_probe(struct platform_device *pdev) const char *name = dev_name(&pdev->dev); int ret; - p = kzalloc(sizeof(*p), GFP_KERNEL); + p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL); if (!p) { dev_err(&pdev->dev, "failed to allocate driver data\n"); ret = -ENOMEM; @@ -259,24 +261,45 @@ static int em_gio_probe(struct platform_device *pdev) irq[0] = platform_get_resource(pdev, IORESOURCE_IRQ, 0); irq[1] = platform_get_resource(pdev, IORESOURCE_IRQ, 1); - if (!io[0] || !io[1] || !irq[0] || !irq[1] || !pdata) { - dev_err(&pdev->dev, "missing IRQ, IOMEM or configuration\n"); + if (!io[0] || !io[1] || !irq[0] || !irq[1]) { + dev_err(&pdev->dev, "missing IRQ or IOMEM\n"); ret = -EINVAL; - goto err1; + goto err0; } - p->base0 = ioremap_nocache(io[0]->start, resource_size(io[0])); + p->base0 = devm_ioremap_nocache(&pdev->dev, io[0]->start, + resource_size(io[0])); if (!p->base0) { dev_err(&pdev->dev, "failed to remap low I/O memory\n"); ret = -ENXIO; - goto err1; + goto err0; } - p->base1 = ioremap_nocache(io[1]->start, resource_size(io[1])); + p->base1 = devm_ioremap_nocache(&pdev->dev, io[1]->start, + resource_size(io[1])); if (!p->base1) { dev_err(&pdev->dev, "failed to remap high I/O memory\n"); ret = -ENXIO; - goto err2; + goto err0; + } + + if (!pdata) { + memset(&pdata_dt, 0, sizeof(pdata_dt)); + pdata = &pdata_dt; + + if (of_property_read_u32(pdev->dev.of_node, "ngpios", + &pdata->number_of_pins)) { + dev_err(&pdev->dev, "Missing ngpios OF property\n"); + ret = -EINVAL; + goto err0; + } + + ret = of_alias_get_id(pdev->dev.of_node, "gpio"); + if (ret < 0) { + dev_err(&pdev->dev, "Couldn't get OF id\n"); + goto err0; + } + pdata->gpio_base = ret * 32; /* 32 GPIOs per instance */ } gpio_chip = &p->gpio_chip; @@ -306,40 +329,32 @@ static int em_gio_probe(struct platform_device *pdev) if (!p->irq_domain) { ret = -ENXIO; dev_err(&pdev->dev, "cannot initialize irq domain\n"); - goto err3; + goto err0; } - if (request_irq(irq[0]->start, em_gio_irq_handler, 0, name, p)) { + if (devm_request_irq(&pdev->dev, irq[0]->start, + em_gio_irq_handler, 0, name, p)) { dev_err(&pdev->dev, "failed to request low IRQ\n"); ret = -ENOENT; - goto err4; + goto err1; } - if (request_irq(irq[1]->start, em_gio_irq_handler, 0, name, p)) { + if (devm_request_irq(&pdev->dev, irq[1]->start, + em_gio_irq_handler, 0, name, p)) { dev_err(&pdev->dev, "failed to request high IRQ\n"); ret = -ENOENT; - goto err5; + goto err1; } ret = gpiochip_add(gpio_chip); if (ret) { dev_err(&pdev->dev, "failed to add GPIO controller\n"); - goto err6; + goto err1; } return 0; -err6: - free_irq(irq[1]->start, pdev); -err5: - free_irq(irq[0]->start, pdev); -err4: - irq_domain_remove(p->irq_domain); -err3: - iounmap(p->base1); -err2: - iounmap(p->base0); err1: - kfree(p); + irq_domain_remove(p->irq_domain); err0: return ret; } @@ -347,34 +362,43 @@ err0: static int em_gio_remove(struct platform_device *pdev) { struct em_gio_priv *p = platform_get_drvdata(pdev); - struct resource *irq[2]; int ret; ret = gpiochip_remove(&p->gpio_chip); if (ret) return ret; - irq[0] = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - irq[1] = platform_get_resource(pdev, IORESOURCE_IRQ, 1); - - free_irq(irq[1]->start, pdev); - free_irq(irq[0]->start, pdev); irq_domain_remove(p->irq_domain); - iounmap(p->base1); - iounmap(p->base0); - kfree(p); return 0; } +static const struct of_device_id em_gio_dt_ids[] = { + { .compatible = "renesas,em-gio", }, + {}, +}; +MODULE_DEVICE_TABLE(of, em_gio_dt_ids); + static struct platform_driver em_gio_device_driver = { .probe = em_gio_probe, .remove = em_gio_remove, .driver = { .name = "em_gio", + .of_match_table = em_gio_dt_ids, + .owner = THIS_MODULE, } }; -module_platform_driver(em_gio_device_driver); +static int __init em_gio_init(void) +{ + return platform_driver_register(&em_gio_device_driver); +} +postcore_initcall(em_gio_init); + +static void __exit em_gio_exit(void) +{ + platform_driver_unregister(&em_gio_device_driver); +} +module_exit(em_gio_exit); MODULE_AUTHOR("Magnus Damm"); MODULE_DESCRIPTION("Renesas Emma Mobile GIO Driver"); diff --git a/drivers/gpio/gpio-generic.c b/drivers/gpio/gpio-generic.c index 05fcc0f247c..d2196bf7384 100644 --- a/drivers/gpio/gpio-generic.c +++ b/drivers/gpio/gpio-generic.c @@ -104,6 +104,26 @@ static unsigned long bgpio_read64(void __iomem *reg) } #endif /* BITS_PER_LONG >= 64 */ +static void bgpio_write16be(void __iomem *reg, unsigned long data) +{ + iowrite16be(data, reg); +} + +static unsigned long bgpio_read16be(void __iomem *reg) +{ + return ioread16be(reg); +} + +static void bgpio_write32be(void __iomem *reg, unsigned long data) +{ + iowrite32be(data, reg); +} + +static unsigned long bgpio_read32be(void __iomem *reg) +{ + return ioread32be(reg); +} + static unsigned long bgpio_pin2mask(struct bgpio_chip *bgc, unsigned int pin) { return 1 << pin; @@ -249,7 +269,8 @@ static int bgpio_dir_out_inv(struct gpio_chip *gc, unsigned int gpio, int val) static int bgpio_setup_accessors(struct device *dev, struct bgpio_chip *bgc, - bool be) + bool bit_be, + bool byte_be) { switch (bgc->bits) { @@ -258,17 +279,33 @@ static int bgpio_setup_accessors(struct device *dev, bgc->write_reg = bgpio_write8; break; case 16: - bgc->read_reg = bgpio_read16; - bgc->write_reg = bgpio_write16; + if (byte_be) { + bgc->read_reg = bgpio_read16be; + bgc->write_reg = bgpio_write16be; + } else { + bgc->read_reg = bgpio_read16; + bgc->write_reg = bgpio_write16; + } break; case 32: - bgc->read_reg = bgpio_read32; - bgc->write_reg = bgpio_write32; + if (byte_be) { + bgc->read_reg = bgpio_read32be; + bgc->write_reg = bgpio_write32be; + } else { + bgc->read_reg = bgpio_read32; + bgc->write_reg = bgpio_write32; + } break; #if BITS_PER_LONG >= 64 case 64: - bgc->read_reg = bgpio_read64; - bgc->write_reg = bgpio_write64; + if (byte_be) { + dev_err(dev, + "64 bit big endian byte order unsupported\n"); + return -EINVAL; + } else { + bgc->read_reg = bgpio_read64; + bgc->write_reg = bgpio_write64; + } break; #endif /* BITS_PER_LONG >= 64 */ default: @@ -276,7 +313,7 @@ static int bgpio_setup_accessors(struct device *dev, return -EINVAL; } - bgc->pin2mask = be ? bgpio_pin2mask_be : bgpio_pin2mask; + bgc->pin2mask = bit_be ? bgpio_pin2mask_be : bgpio_pin2mask; return 0; } @@ -353,11 +390,7 @@ static int bgpio_setup_direction(struct bgpio_chip *bgc, int bgpio_remove(struct bgpio_chip *bgc) { - int err = gpiochip_remove(&bgc->gc); - - kfree(bgc); - - return err; + return gpiochip_remove(&bgc->gc); } EXPORT_SYMBOL_GPL(bgpio_remove); @@ -385,7 +418,8 @@ int bgpio_init(struct bgpio_chip *bgc, struct device *dev, if (ret) return ret; - ret = bgpio_setup_accessors(dev, bgc, flags & BGPIOF_BIG_ENDIAN); + ret = bgpio_setup_accessors(dev, bgc, flags & BGPIOF_BIG_ENDIAN, + flags & BGPIOF_BIG_ENDIAN_BYTE_ORDER); if (ret) return ret; diff --git a/drivers/gpio/gpio-grgpio.c b/drivers/gpio/gpio-grgpio.c new file mode 100644 index 00000000000..8e08b864765 --- /dev/null +++ b/drivers/gpio/gpio-grgpio.c @@ -0,0 +1,505 @@ +/* + * Driver for Aeroflex Gaisler GRGPIO General Purpose I/O cores. + * + * 2013 (c) Aeroflex Gaisler AB + * + * This driver supports the GRGPIO GPIO core available in the GRLIB VHDL + * IP core library. + * + * Full documentation of the GRGPIO core can be found here: + * http://www.gaisler.com/products/grlib/grip.pdf + * + * See "Documentation/devicetree/bindings/gpio/gpio-grgpio.txt" for + * information on open firmware properties. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * Contributors: Andreas Larsson <andreas@gaisler.com> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/of_gpio.h> +#include <linux/of_platform.h> +#include <linux/gpio.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/basic_mmio_gpio.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> + +#define GRGPIO_MAX_NGPIO 32 + +#define GRGPIO_DATA 0x00 +#define GRGPIO_OUTPUT 0x04 +#define GRGPIO_DIR 0x08 +#define GRGPIO_IMASK 0x0c +#define GRGPIO_IPOL 0x10 +#define GRGPIO_IEDGE 0x14 +#define GRGPIO_BYPASS 0x18 +#define GRGPIO_IMAP_BASE 0x20 + +/* Structure for an irq of the core - called an underlying irq */ +struct grgpio_uirq { + u8 refcnt; /* Reference counter to manage requesting/freeing of uirq */ + u8 uirq; /* Underlying irq of the gpio driver */ +}; + +/* + * Structure for an irq of a gpio line handed out by this driver. The index is + * used to map to the corresponding underlying irq. + */ +struct grgpio_lirq { + s8 index; /* Index into struct grgpio_priv's uirqs, or -1 */ + u8 irq; /* irq for the gpio line */ +}; + +struct grgpio_priv { + struct bgpio_chip bgc; + void __iomem *regs; + struct device *dev; + + u32 imask; /* irq mask shadow register */ + + /* + * The grgpio core can have multiple "underlying" irqs. The gpio lines + * can be mapped to any one or none of these underlying irqs + * independently of each other. This driver sets up an irq domain and + * hands out separate irqs to each gpio line + */ + struct irq_domain *domain; + + /* + * This array contains information on each underlying irq, each + * irq of the grgpio core itself. + */ + struct grgpio_uirq uirqs[GRGPIO_MAX_NGPIO]; + + /* + * This array contains information for each gpio line on the irqs + * obtains from this driver. An index value of -1 for a certain gpio + * line indicates that the line has no irq. Otherwise the index connects + * the irq to the underlying irq by pointing into the uirqs array. + */ + struct grgpio_lirq lirqs[GRGPIO_MAX_NGPIO]; +}; + +static inline struct grgpio_priv *grgpio_gc_to_priv(struct gpio_chip *gc) +{ + struct bgpio_chip *bgc = to_bgpio_chip(gc); + + return container_of(bgc, struct grgpio_priv, bgc); +} + +static void grgpio_set_imask(struct grgpio_priv *priv, unsigned int offset, + int val) +{ + struct bgpio_chip *bgc = &priv->bgc; + unsigned long mask = bgc->pin2mask(bgc, offset); + unsigned long flags; + + spin_lock_irqsave(&bgc->lock, flags); + + if (val) + priv->imask |= mask; + else + priv->imask &= ~mask; + bgc->write_reg(priv->regs + GRGPIO_IMASK, priv->imask); + + spin_unlock_irqrestore(&bgc->lock, flags); +} + +static int grgpio_to_irq(struct gpio_chip *gc, unsigned offset) +{ + struct grgpio_priv *priv = grgpio_gc_to_priv(gc); + + if (offset > gc->ngpio) + return -ENXIO; + + if (priv->lirqs[offset].index < 0) + return -ENXIO; + + return irq_create_mapping(priv->domain, offset); +} + +/* -------------------- IRQ chip functions -------------------- */ + +static int grgpio_irq_set_type(struct irq_data *d, unsigned int type) +{ + struct grgpio_priv *priv = irq_data_get_irq_chip_data(d); + unsigned long flags; + u32 mask = BIT(d->hwirq); + u32 ipol; + u32 iedge; + u32 pol; + u32 edge; + + switch (type) { + case IRQ_TYPE_LEVEL_LOW: + pol = 0; + edge = 0; + break; + case IRQ_TYPE_LEVEL_HIGH: + pol = mask; + edge = 0; + break; + case IRQ_TYPE_EDGE_FALLING: + pol = 0; + edge = mask; + break; + case IRQ_TYPE_EDGE_RISING: + pol = mask; + edge = mask; + break; + default: + return -EINVAL; + } + + spin_lock_irqsave(&priv->bgc.lock, flags); + + ipol = priv->bgc.read_reg(priv->regs + GRGPIO_IPOL) & ~mask; + iedge = priv->bgc.read_reg(priv->regs + GRGPIO_IEDGE) & ~mask; + + priv->bgc.write_reg(priv->regs + GRGPIO_IPOL, ipol | pol); + priv->bgc.write_reg(priv->regs + GRGPIO_IEDGE, iedge | edge); + + spin_unlock_irqrestore(&priv->bgc.lock, flags); + + return 0; +} + +static void grgpio_irq_mask(struct irq_data *d) +{ + struct grgpio_priv *priv = irq_data_get_irq_chip_data(d); + int offset = d->hwirq; + + grgpio_set_imask(priv, offset, 0); +} + +static void grgpio_irq_unmask(struct irq_data *d) +{ + struct grgpio_priv *priv = irq_data_get_irq_chip_data(d); + int offset = d->hwirq; + + grgpio_set_imask(priv, offset, 1); +} + +static struct irq_chip grgpio_irq_chip = { + .name = "grgpio", + .irq_mask = grgpio_irq_mask, + .irq_unmask = grgpio_irq_unmask, + .irq_set_type = grgpio_irq_set_type, +}; + +static irqreturn_t grgpio_irq_handler(int irq, void *dev) +{ + struct grgpio_priv *priv = dev; + int ngpio = priv->bgc.gc.ngpio; + unsigned long flags; + int i; + int match = 0; + + spin_lock_irqsave(&priv->bgc.lock, flags); + + /* + * For each gpio line, call its interrupt handler if it its underlying + * irq matches the current irq that is handled. + */ + for (i = 0; i < ngpio; i++) { + struct grgpio_lirq *lirq = &priv->lirqs[i]; + + if (priv->imask & BIT(i) && lirq->index >= 0 && + priv->uirqs[lirq->index].uirq == irq) { + generic_handle_irq(lirq->irq); + match = 1; + } + } + + spin_unlock_irqrestore(&priv->bgc.lock, flags); + + if (!match) + dev_warn(priv->dev, "No gpio line matched irq %d\n", irq); + + return IRQ_HANDLED; +} + +/* + * This function will be called as a consequence of the call to + * irq_create_mapping in grgpio_to_irq + */ +int grgpio_irq_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + struct grgpio_priv *priv = d->host_data; + struct grgpio_lirq *lirq; + struct grgpio_uirq *uirq; + unsigned long flags; + int offset = hwirq; + int ret = 0; + + if (!priv) + return -EINVAL; + + lirq = &priv->lirqs[offset]; + if (lirq->index < 0) + return -EINVAL; + + dev_dbg(priv->dev, "Mapping irq %d for gpio line %d\n", + irq, offset); + + spin_lock_irqsave(&priv->bgc.lock, flags); + + /* Request underlying irq if not already requested */ + lirq->irq = irq; + uirq = &priv->uirqs[lirq->index]; + if (uirq->refcnt == 0) { + ret = request_irq(uirq->uirq, grgpio_irq_handler, 0, + dev_name(priv->dev), priv); + if (ret) { + dev_err(priv->dev, + "Could not request underlying irq %d\n", + uirq->uirq); + + spin_unlock_irqrestore(&priv->bgc.lock, flags); + + return ret; + } + } + uirq->refcnt++; + + spin_unlock_irqrestore(&priv->bgc.lock, flags); + + /* Setup irq */ + irq_set_chip_data(irq, priv); + irq_set_chip_and_handler(irq, &grgpio_irq_chip, + handle_simple_irq); + irq_clear_status_flags(irq, IRQ_NOREQUEST); +#ifdef CONFIG_ARM + set_irq_flags(irq, IRQF_VALID); +#else + irq_set_noprobe(irq); +#endif + + return ret; +} + +void grgpio_irq_unmap(struct irq_domain *d, unsigned int irq) +{ + struct grgpio_priv *priv = d->host_data; + int index; + struct grgpio_lirq *lirq; + struct grgpio_uirq *uirq; + unsigned long flags; + int ngpio = priv->bgc.gc.ngpio; + int i; + +#ifdef CONFIG_ARM + set_irq_flags(irq, 0); +#endif + irq_set_chip_and_handler(irq, NULL, NULL); + irq_set_chip_data(irq, NULL); + + spin_lock_irqsave(&priv->bgc.lock, flags); + + /* Free underlying irq if last user unmapped */ + index = -1; + for (i = 0; i < ngpio; i++) { + lirq = &priv->lirqs[i]; + if (lirq->irq == irq) { + grgpio_set_imask(priv, i, 0); + lirq->irq = 0; + index = lirq->index; + break; + } + } + WARN_ON(index < 0); + + if (index >= 0) { + uirq = &priv->uirqs[lirq->index]; + uirq->refcnt--; + if (uirq->refcnt == 0) + free_irq(uirq->uirq, priv); + } + + spin_unlock_irqrestore(&priv->bgc.lock, flags); +} + +static struct irq_domain_ops grgpio_irq_domain_ops = { + .map = grgpio_irq_map, + .unmap = grgpio_irq_unmap, +}; + +/* ------------------------------------------------------------ */ + +static int grgpio_probe(struct platform_device *ofdev) +{ + struct device_node *np = ofdev->dev.of_node; + void __iomem *regs; + struct gpio_chip *gc; + struct bgpio_chip *bgc; + struct grgpio_priv *priv; + struct resource *res; + int err; + u32 prop; + s32 *irqmap; + int size; + int i; + + priv = devm_kzalloc(&ofdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + res = platform_get_resource(ofdev, IORESOURCE_MEM, 0); + regs = devm_ioremap_resource(&ofdev->dev, res); + if (IS_ERR(regs)) + return PTR_ERR(regs); + + bgc = &priv->bgc; + err = bgpio_init(bgc, &ofdev->dev, 4, regs + GRGPIO_DATA, + regs + GRGPIO_OUTPUT, NULL, regs + GRGPIO_DIR, NULL, + BGPIOF_BIG_ENDIAN_BYTE_ORDER); + if (err) { + dev_err(&ofdev->dev, "bgpio_init() failed\n"); + return err; + } + + priv->regs = regs; + priv->imask = bgc->read_reg(regs + GRGPIO_IMASK); + priv->dev = &ofdev->dev; + + gc = &bgc->gc; + gc->of_node = np; + gc->owner = THIS_MODULE; + gc->to_irq = grgpio_to_irq; + gc->label = np->full_name; + gc->base = -1; + + err = of_property_read_u32(np, "nbits", &prop); + if (err || prop <= 0 || prop > GRGPIO_MAX_NGPIO) { + gc->ngpio = GRGPIO_MAX_NGPIO; + dev_dbg(&ofdev->dev, + "No or invalid nbits property: assume %d\n", gc->ngpio); + } else { + gc->ngpio = prop; + } + + /* + * The irqmap contains the index values indicating which underlying irq, + * if anyone, is connected to that line + */ + irqmap = (s32 *)of_get_property(np, "irqmap", &size); + if (irqmap) { + if (size < gc->ngpio) { + dev_err(&ofdev->dev, + "irqmap shorter than ngpio (%d < %d)\n", + size, gc->ngpio); + return -EINVAL; + } + + priv->domain = irq_domain_add_linear(np, gc->ngpio, + &grgpio_irq_domain_ops, + priv); + if (!priv->domain) { + dev_err(&ofdev->dev, "Could not add irq domain\n"); + return -EINVAL; + } + + for (i = 0; i < gc->ngpio; i++) { + struct grgpio_lirq *lirq; + int ret; + + lirq = &priv->lirqs[i]; + lirq->index = irqmap[i]; + + if (lirq->index < 0) + continue; + + ret = platform_get_irq(ofdev, lirq->index); + if (ret <= 0) { + /* + * Continue without irq functionality for that + * gpio line + */ + dev_err(priv->dev, + "Failed to get irq for offset %d\n", i); + continue; + } + priv->uirqs[lirq->index].uirq = ret; + } + } + + platform_set_drvdata(ofdev, priv); + + err = gpiochip_add(gc); + if (err) { + dev_err(&ofdev->dev, "Could not add gpiochip\n"); + return err; + } + + dev_info(&ofdev->dev, "regs=0x%p, base=%d, ngpio=%d, irqs=%s\n", + priv->regs, gc->base, gc->ngpio, priv->domain ? "on" : "off"); + + return 0; +} + +static int grgpio_remove(struct platform_device *ofdev) +{ + struct grgpio_priv *priv = platform_get_drvdata(ofdev); + unsigned long flags; + int i; + int ret = 0; + + spin_lock_irqsave(&priv->bgc.lock, flags); + + if (priv->domain) { + for (i = 0; i < GRGPIO_MAX_NGPIO; i++) { + if (priv->uirqs[i].refcnt != 0) { + ret = -EBUSY; + goto out; + } + } + } + + ret = gpiochip_remove(&priv->bgc.gc); + if (ret) + goto out; + + if (priv->domain) + irq_domain_remove(priv->domain); + +out: + spin_unlock_irqrestore(&priv->bgc.lock, flags); + + return ret; +} + +static struct of_device_id grgpio_match[] = { + {.name = "GAISLER_GPIO"}, + {.name = "01_01a"}, + {}, +}; + +MODULE_DEVICE_TABLE(of, grgpio_match); + +static struct platform_driver grgpio_driver = { + .driver = { + .name = "grgpio", + .owner = THIS_MODULE, + .of_match_table = grgpio_match, + }, + .probe = grgpio_probe, + .remove = grgpio_remove, +}; +module_platform_driver(grgpio_driver); + +MODULE_AUTHOR("Aeroflex Gaisler AB."); +MODULE_DESCRIPTION("Driver for Aeroflex Gaisler GRGPIO"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpio/gpio-ich.c b/drivers/gpio/gpio-ich.c index de3c317bd3e..e16d932fd44 100644 --- a/drivers/gpio/gpio-ich.c +++ b/drivers/gpio/gpio-ich.c @@ -130,14 +130,11 @@ static int ichx_read_bit(int reg, unsigned nr) static bool ichx_gpio_check_available(struct gpio_chip *gpio, unsigned nr) { - return ichx_priv.use_gpio & (1 << (nr / 32)); + return !!(ichx_priv.use_gpio & (1 << (nr / 32))); } static int ichx_gpio_direction_input(struct gpio_chip *gpio, unsigned nr) { - if (!ichx_gpio_check_available(gpio, nr)) - return -ENXIO; - /* * Try setting pin as an input and verify it worked since many pins * are output-only. @@ -151,9 +148,6 @@ static int ichx_gpio_direction_input(struct gpio_chip *gpio, unsigned nr) static int ichx_gpio_direction_output(struct gpio_chip *gpio, unsigned nr, int val) { - if (!ichx_gpio_check_available(gpio, nr)) - return -ENXIO; - /* Set GPIO output value. */ ichx_write_bit(GPIO_LVL, nr, val, 0); @@ -169,9 +163,6 @@ static int ichx_gpio_direction_output(struct gpio_chip *gpio, unsigned nr, static int ichx_gpio_get(struct gpio_chip *chip, unsigned nr) { - if (!ichx_gpio_check_available(chip, nr)) - return -ENXIO; - return ichx_read_bit(GPIO_LVL, nr); } @@ -180,9 +171,6 @@ static int ich6_gpio_get(struct gpio_chip *chip, unsigned nr) unsigned long flags; u32 data; - if (!ichx_gpio_check_available(chip, nr)) - return -ENXIO; - /* * GPI 0 - 15 need to be read from the power management registers on * a ICH6/3100 bridge. @@ -207,6 +195,9 @@ static int ich6_gpio_get(struct gpio_chip *chip, unsigned nr) static int ichx_gpio_request(struct gpio_chip *chip, unsigned nr) { + if (!ichx_gpio_check_available(chip, nr)) + return -ENXIO; + /* * Note we assume the BIOS properly set a bridge's USE value. Some * chips (eg Intel 3100) have bogus USE values though, so first see if diff --git a/drivers/gpio/gpio-lpc32xx.c b/drivers/gpio/gpio-lpc32xx.c index 36d7dee07b2..dda6a756a3d 100644 --- a/drivers/gpio/gpio-lpc32xx.c +++ b/drivers/gpio/gpio-lpc32xx.c @@ -533,7 +533,7 @@ static int lpc32xx_of_xlate(struct gpio_chip *gc, { /* Is this the correct bank? */ u32 bank = gpiospec->args[0]; - if ((bank > ARRAY_SIZE(lpc32xx_gpiochip) || + if ((bank >= ARRAY_SIZE(lpc32xx_gpiochip) || (gc != &lpc32xx_gpiochip[bank].chip))) return -EINVAL; diff --git a/drivers/gpio/gpio-lynxpoint.c b/drivers/gpio/gpio-lynxpoint.c index 3472b05ac51..86c17de8769 100644 --- a/drivers/gpio/gpio-lynxpoint.c +++ b/drivers/gpio/gpio-lynxpoint.c @@ -32,6 +32,7 @@ #include <linux/acpi.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <linux/io.h> /* LynxPoint chipset has support for 94 gpio pins */ diff --git a/drivers/gpio/gpio-max7300.c b/drivers/gpio/gpio-max7300.c index 4b6b9a04e32..40ab6dfb602 100644 --- a/drivers/gpio/gpio-max7300.c +++ b/drivers/gpio/gpio-max7300.c @@ -41,7 +41,7 @@ static int max7300_probe(struct i2c_client *client, I2C_FUNC_SMBUS_BYTE_DATA)) return -EIO; - ts = kzalloc(sizeof(struct max7301), GFP_KERNEL); + ts = devm_kzalloc(&client->dev, sizeof(struct max7301), GFP_KERNEL); if (!ts) return -ENOMEM; @@ -50,8 +50,6 @@ static int max7300_probe(struct i2c_client *client, ts->dev = &client->dev; ret = __max730x_probe(ts); - if (ret) - kfree(ts); return ret; } diff --git a/drivers/gpio/gpio-max7301.c b/drivers/gpio/gpio-max7301.c index c6c535c1310..3b16ab70163 100644 --- a/drivers/gpio/gpio-max7301.c +++ b/drivers/gpio/gpio-max7301.c @@ -56,12 +56,13 @@ static int max7301_probe(struct spi_device *spi) int ret; /* bits_per_word cannot be configured in platform data */ - spi->bits_per_word = 16; + if (spi->dev.platform_data) + spi->bits_per_word = 16; ret = spi_setup(spi); if (ret < 0) return ret; - ts = kzalloc(sizeof(struct max7301), GFP_KERNEL); + ts = devm_kzalloc(&spi->dev, sizeof(struct max7301), GFP_KERNEL); if (!ts) return -ENOMEM; @@ -70,8 +71,6 @@ static int max7301_probe(struct spi_device *spi) ts->dev = &spi->dev; ret = __max730x_probe(ts); - if (ret) - kfree(ts); return ret; } diff --git a/drivers/gpio/gpio-max732x.c b/drivers/gpio/gpio-max732x.c index 1e0467ce4c3..d4b51b163b0 100644 --- a/drivers/gpio/gpio-max732x.c +++ b/drivers/gpio/gpio-max732x.c @@ -589,7 +589,8 @@ static int max732x_probe(struct i2c_client *client, return -EINVAL; } - chip = kzalloc(sizeof(struct max732x_chip), GFP_KERNEL); + chip = devm_kzalloc(&client->dev, sizeof(struct max732x_chip), + GFP_KERNEL); if (chip == NULL) return -ENOMEM; chip->client = client; @@ -647,7 +648,6 @@ static int max732x_probe(struct i2c_client *client, out_failed: max732x_irq_teardown(chip); - kfree(chip); return ret; } @@ -680,7 +680,6 @@ static int max732x_remove(struct i2c_client *client) if (chip->client_dummy) i2c_unregister_device(chip->client_dummy); - kfree(chip); return 0; } diff --git a/drivers/gpio/gpio-mc33880.c b/drivers/gpio/gpio-mc33880.c index 6a8fdc26ae6..63a7a1bfb2d 100644 --- a/drivers/gpio/gpio-mc33880.c +++ b/drivers/gpio/gpio-mc33880.c @@ -101,13 +101,13 @@ static int mc33880_probe(struct spi_device *spi) if (ret < 0) return ret; - mc = kzalloc(sizeof(struct mc33880), GFP_KERNEL); + mc = devm_kzalloc(&spi->dev, sizeof(struct mc33880), GFP_KERNEL); if (!mc) return -ENOMEM; mutex_init(&mc->lock); - dev_set_drvdata(&spi->dev, mc); + spi_set_drvdata(spi, mc); mc->spi = spi; @@ -130,7 +130,8 @@ static int mc33880_probe(struct spi_device *spi) ret = mc33880_write_config(mc); if (ret) { - printk(KERN_ERR "Failed writing to " DRIVER_NAME ": %d\n", ret); + dev_err(&spi->dev, "Failed writing to " DRIVER_NAME ": %d\n", + ret); goto exit_destroy; } @@ -141,9 +142,8 @@ static int mc33880_probe(struct spi_device *spi) return ret; exit_destroy: - dev_set_drvdata(&spi->dev, NULL); + spi_set_drvdata(spi, NULL); mutex_destroy(&mc->lock); - kfree(mc); return ret; } @@ -152,17 +152,16 @@ static int mc33880_remove(struct spi_device *spi) struct mc33880 *mc; int ret; - mc = dev_get_drvdata(&spi->dev); + mc = spi_get_drvdata(spi); if (mc == NULL) return -ENODEV; - dev_set_drvdata(&spi->dev, NULL); + spi_set_drvdata(spi, NULL); ret = gpiochip_remove(&mc->chip); - if (!ret) { + if (!ret) mutex_destroy(&mc->lock); - kfree(mc); - } else + else dev_err(&spi->dev, "Failed to remove the GPIO controller: %d\n", ret); diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c index 3cea0ea79e8..6a4470b8448 100644 --- a/drivers/gpio/gpio-mcp23s08.c +++ b/drivers/gpio/gpio-mcp23s08.c @@ -12,6 +12,8 @@ #include <linux/spi/mcp23s08.h> #include <linux/slab.h> #include <asm/byteorder.h> +#include <linux/of.h> +#include <linux/of_device.h> /** * MCP types supported by driver @@ -383,6 +385,10 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, mcp->chip.direction_output = mcp23s08_direction_output; mcp->chip.set = mcp23s08_set; mcp->chip.dbg_show = mcp23s08_dbg_show; +#ifdef CONFIG_OF + mcp->chip.of_gpio_n_cells = 2; + mcp->chip.of_node = dev->of_node; +#endif switch (type) { #ifdef CONFIG_SPI_MASTER @@ -473,6 +479,35 @@ fail: /*----------------------------------------------------------------------*/ +#ifdef CONFIG_OF +#ifdef CONFIG_SPI_MASTER +static struct of_device_id mcp23s08_spi_of_match[] = { + { + .compatible = "mcp,mcp23s08", .data = (void *) MCP_TYPE_S08, + }, + { + .compatible = "mcp,mcp23s17", .data = (void *) MCP_TYPE_S17, + }, + { }, +}; +MODULE_DEVICE_TABLE(of, mcp23s08_spi_of_match); +#endif + +#if IS_ENABLED(CONFIG_I2C) +static struct of_device_id mcp23s08_i2c_of_match[] = { + { + .compatible = "mcp,mcp23008", .data = (void *) MCP_TYPE_008, + }, + { + .compatible = "mcp,mcp23017", .data = (void *) MCP_TYPE_017, + }, + { }, +}; +MODULE_DEVICE_TABLE(of, mcp23s08_i2c_of_match); +#endif +#endif /* CONFIG_OF */ + + #if IS_ENABLED(CONFIG_I2C) static int mcp230xx_probe(struct i2c_client *client, @@ -480,12 +515,23 @@ static int mcp230xx_probe(struct i2c_client *client, { struct mcp23s08_platform_data *pdata; struct mcp23s08 *mcp; - int status; - - pdata = client->dev.platform_data; - if (!pdata || !gpio_is_valid(pdata->base)) { - dev_dbg(&client->dev, "invalid or missing platform data\n"); - return -EINVAL; + int status, base, pullups; + const struct of_device_id *match; + + match = of_match_device(of_match_ptr(mcp23s08_i2c_of_match), + &client->dev); + if (match) { + base = -1; + pullups = 0; + } else { + pdata = client->dev.platform_data; + if (!pdata || !gpio_is_valid(pdata->base)) { + dev_dbg(&client->dev, + "invalid or missing platform data\n"); + return -EINVAL; + } + base = pdata->base; + pullups = pdata->chip[0].pullups; } mcp = kzalloc(sizeof *mcp, GFP_KERNEL); @@ -493,8 +539,7 @@ static int mcp230xx_probe(struct i2c_client *client, return -ENOMEM; status = mcp23s08_probe_one(mcp, &client->dev, client, client->addr, - id->driver_data, pdata->base, - pdata->chip[0].pullups); + id->driver_data, base, pullups); if (status) goto fail; @@ -531,6 +576,7 @@ static struct i2c_driver mcp230xx_driver = { .driver = { .name = "mcp230xx", .owner = THIS_MODULE, + .of_match_table = of_match_ptr(mcp23s08_i2c_of_match), }, .probe = mcp230xx_probe, .remove = mcp230xx_remove, @@ -565,28 +611,55 @@ static int mcp23s08_probe(struct spi_device *spi) unsigned chips = 0; struct mcp23s08_driver_data *data; int status, type; - unsigned base; - - type = spi_get_device_id(spi)->driver_data; - - pdata = spi->dev.platform_data; - if (!pdata || !gpio_is_valid(pdata->base)) { - dev_dbg(&spi->dev, "invalid or missing platform data\n"); - return -EINVAL; - } + unsigned base = -1, + ngpio = 0, + pullups[ARRAY_SIZE(pdata->chip)]; + const struct of_device_id *match; + u32 spi_present_mask = 0; + + match = of_match_device(of_match_ptr(mcp23s08_spi_of_match), &spi->dev); + if (match) { + type = (int)match->data; + status = of_property_read_u32(spi->dev.of_node, + "mcp,spi-present-mask", &spi_present_mask); + if (status) { + dev_err(&spi->dev, "DT has no spi-present-mask\n"); + return -ENODEV; + } + if ((spi_present_mask <= 0) || (spi_present_mask >= 256)) { + dev_err(&spi->dev, "invalid spi-present-mask\n"); + return -ENODEV; + } - for (addr = 0; addr < ARRAY_SIZE(pdata->chip); addr++) { - if (!pdata->chip[addr].is_present) - continue; - chips++; - if ((type == MCP_TYPE_S08) && (addr > 3)) { - dev_err(&spi->dev, - "mcp23s08 only supports address 0..3\n"); + for (addr = 0; addr < ARRAY_SIZE(pdata->chip); addr++) + pullups[addr] = 0; + } else { + type = spi_get_device_id(spi)->driver_data; + pdata = spi->dev.platform_data; + if (!pdata || !gpio_is_valid(pdata->base)) { + dev_dbg(&spi->dev, + "invalid or missing platform data\n"); return -EINVAL; } + + for (addr = 0; addr < ARRAY_SIZE(pdata->chip); addr++) { + if (!pdata->chip[addr].is_present) + continue; + chips++; + if ((type == MCP_TYPE_S08) && (addr > 3)) { + dev_err(&spi->dev, + "mcp23s08 only supports address 0..3\n"); + return -EINVAL; + } + spi_present_mask |= 1 << addr; + pullups[addr] = pdata->chip[addr].pullups; + } + + if (!chips) + return -ENODEV; + + base = pdata->base; } - if (!chips) - return -ENODEV; data = kzalloc(sizeof *data + chips * sizeof(struct mcp23s08), GFP_KERNEL); @@ -594,21 +667,22 @@ static int mcp23s08_probe(struct spi_device *spi) return -ENOMEM; spi_set_drvdata(spi, data); - base = pdata->base; for (addr = 0; addr < ARRAY_SIZE(pdata->chip); addr++) { - if (!pdata->chip[addr].is_present) + if (!(spi_present_mask & (1 << addr))) continue; chips--; data->mcp[addr] = &data->chip[chips]; status = mcp23s08_probe_one(data->mcp[addr], &spi->dev, spi, 0x40 | (addr << 1), type, base, - pdata->chip[addr].pullups); + pullups[addr]); if (status < 0) goto fail; - base += (type == MCP_TYPE_S17) ? 16 : 8; + if (base != -1) + base += (type == MCP_TYPE_S17) ? 16 : 8; + ngpio += (type == MCP_TYPE_S17) ? 16 : 8; } - data->ngpio = base - pdata->base; + data->ngpio = ngpio; /* NOTE: these chips have a relatively sane IRQ framework, with * per-signal masking and level/edge triggering. It's not yet @@ -668,6 +742,7 @@ static struct spi_driver mcp23s08_driver = { .driver = { .name = "mcp23s08", .owner = THIS_MODULE, + .of_match_table = of_match_ptr(mcp23s08_spi_of_match), }, }; diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 61a6fde6c08..bf69a7eff37 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -117,7 +117,7 @@ static inline void __iomem *mvebu_gpioreg_edge_cause(struct mvebu_gpio_chip *mvc { int cpu; - switch(mvchip->soc_variant) { + switch (mvchip->soc_variant) { case MVEBU_GPIO_SOC_VARIANT_ORION: case MVEBU_GPIO_SOC_VARIANT_MV78200: return mvchip->membase + GPIO_EDGE_CAUSE_OFF; @@ -133,7 +133,7 @@ static inline void __iomem *mvebu_gpioreg_edge_mask(struct mvebu_gpio_chip *mvch { int cpu; - switch(mvchip->soc_variant) { + switch (mvchip->soc_variant) { case MVEBU_GPIO_SOC_VARIANT_ORION: return mvchip->membase + GPIO_EDGE_MASK_OFF; case MVEBU_GPIO_SOC_VARIANT_MV78200: @@ -151,7 +151,7 @@ static void __iomem *mvebu_gpioreg_level_mask(struct mvebu_gpio_chip *mvchip) { int cpu; - switch(mvchip->soc_variant) { + switch (mvchip->soc_variant) { case MVEBU_GPIO_SOC_VARIANT_ORION: return mvchip->membase + GPIO_LEVEL_MASK_OFF; case MVEBU_GPIO_SOC_VARIANT_MV78200: @@ -401,7 +401,7 @@ static int mvebu_gpio_irq_set_type(struct irq_data *d, unsigned int type) /* * Configure interrupt polarity. */ - switch(type) { + switch (type) { case IRQ_TYPE_EDGE_RISING: case IRQ_TYPE_LEVEL_HIGH: u = readl_relaxed(mvebu_gpioreg_in_pol(mvchip)); @@ -470,18 +470,76 @@ static void mvebu_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) } } +#ifdef CONFIG_DEBUG_FS +#include <linux/seq_file.h> + +static void mvebu_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) +{ + struct mvebu_gpio_chip *mvchip = + container_of(chip, struct mvebu_gpio_chip, chip); + u32 out, io_conf, blink, in_pol, data_in, cause, edg_msk, lvl_msk; + int i; + + out = readl_relaxed(mvebu_gpioreg_out(mvchip)); + io_conf = readl_relaxed(mvebu_gpioreg_io_conf(mvchip)); + blink = readl_relaxed(mvebu_gpioreg_blink(mvchip)); + in_pol = readl_relaxed(mvebu_gpioreg_in_pol(mvchip)); + data_in = readl_relaxed(mvebu_gpioreg_data_in(mvchip)); + cause = readl_relaxed(mvebu_gpioreg_edge_cause(mvchip)); + edg_msk = readl_relaxed(mvebu_gpioreg_edge_mask(mvchip)); + lvl_msk = readl_relaxed(mvebu_gpioreg_level_mask(mvchip)); + + for (i = 0; i < chip->ngpio; i++) { + const char *label; + u32 msk; + bool is_out; + + label = gpiochip_is_requested(chip, i); + if (!label) + continue; + + msk = 1 << i; + is_out = !(io_conf & msk); + + seq_printf(s, " gpio-%-3d (%-20.20s)", chip->base + i, label); + + if (is_out) { + seq_printf(s, " out %s %s\n", + out & msk ? "hi" : "lo", + blink & msk ? "(blink )" : ""); + continue; + } + + seq_printf(s, " in %s (act %s) - IRQ", + (data_in ^ in_pol) & msk ? "hi" : "lo", + in_pol & msk ? "lo" : "hi"); + if (!((edg_msk | lvl_msk) & msk)) { + seq_printf(s, " disabled\n"); + continue; + } + if (edg_msk & msk) + seq_printf(s, " edge "); + if (lvl_msk & msk) + seq_printf(s, " level"); + seq_printf(s, " (%s)\n", cause & msk ? "pending" : "clear "); + } +} +#else +#define mvebu_gpio_dbg_show NULL +#endif + static struct of_device_id mvebu_gpio_of_match[] = { { .compatible = "marvell,orion-gpio", - .data = (void*) MVEBU_GPIO_SOC_VARIANT_ORION, + .data = (void *) MVEBU_GPIO_SOC_VARIANT_ORION, }, { .compatible = "marvell,mv78200-gpio", - .data = (void*) MVEBU_GPIO_SOC_VARIANT_MV78200, + .data = (void *) MVEBU_GPIO_SOC_VARIANT_MV78200, }, { .compatible = "marvell,armadaxp-gpio", - .data = (void*) MVEBU_GPIO_SOC_VARIANT_ARMADAXP, + .data = (void *) MVEBU_GPIO_SOC_VARIANT_ARMADAXP, }, { /* sentinel */ @@ -509,13 +567,13 @@ static int mvebu_gpio_probe(struct platform_device *pdev) soc_variant = MVEBU_GPIO_SOC_VARIANT_ORION; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (! res) { + if (!res) { dev_err(&pdev->dev, "Cannot get memory resource\n"); return -ENODEV; } mvchip = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_gpio_chip), GFP_KERNEL); - if (! mvchip){ + if (!mvchip) { dev_err(&pdev->dev, "Cannot allocate memory\n"); return -ENOMEM; } @@ -550,6 +608,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev) mvchip->chip.ngpio = ngpios; mvchip->chip.can_sleep = 0; mvchip->chip.of_node = np; + mvchip->chip.dbg_show = mvebu_gpio_dbg_show; spin_lock_init(&mvchip->lock); mvchip->membase = devm_ioremap_resource(&pdev->dev, res); @@ -560,21 +619,21 @@ static int mvebu_gpio_probe(struct platform_device *pdev) * per-CPU registers */ if (soc_variant == MVEBU_GPIO_SOC_VARIANT_ARMADAXP) { res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (! res) { + if (!res) { dev_err(&pdev->dev, "Cannot get memory resource\n"); return -ENODEV; } mvchip->percpu_membase = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(mvchip->percpu_membase)) + if (IS_ERR(mvchip->percpu_membase)) return PTR_ERR(mvchip->percpu_membase); } /* * Mask and clear GPIO interrupts. */ - switch(soc_variant) { + switch (soc_variant) { case MVEBU_GPIO_SOC_VARIANT_ORION: writel_relaxed(0, mvchip->membase + GPIO_EDGE_CAUSE_OFF); writel_relaxed(0, mvchip->membase + GPIO_EDGE_MASK_OFF); @@ -632,7 +691,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev) gc = irq_alloc_generic_chip("mvebu_gpio_irq", 2, mvchip->irqbase, mvchip->membase, handle_level_irq); - if (! gc) { + if (!gc) { dev_err(&pdev->dev, "Cannot allocate generic irq_chip\n"); return -ENOMEM; } diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index a612ea1c53c..2050891d9c6 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -52,7 +52,6 @@ struct gpio_bank { struct list_head node; void __iomem *base; u16 irq; - int irq_base; struct irq_domain *domain; u32 non_wakeup_gpios; u32 enabled_non_wakeup_gpios; @@ -88,7 +87,14 @@ struct gpio_bank { static int irq_to_gpio(struct gpio_bank *bank, unsigned int gpio_irq) { - return gpio_irq - bank->irq_base + bank->chip.base; + return bank->chip.base + gpio_irq; +} + +static int omap_gpio_to_irq(struct gpio_chip *chip, unsigned offset) +{ + struct gpio_bank *bank = container_of(chip, struct gpio_bank, chip); + + return irq_find_mapping(bank->domain, offset); } static void _set_gpio_direction(struct gpio_bank *bank, int gpio, int is_input) @@ -420,13 +426,16 @@ static int gpio_irq_type(struct irq_data *d, unsigned type) int retval; unsigned long flags; + if (WARN_ON(!bank->mod_usage)) + return -EINVAL; + #ifdef CONFIG_ARCH_OMAP1 if (d->irq > IH_MPUIO_BASE) gpio = OMAP_MPUIO(d->irq - IH_MPUIO_BASE); #endif if (!gpio) - gpio = irq_to_gpio(bank, d->irq); + gpio = irq_to_gpio(bank, d->hwirq); if (type & ~IRQ_TYPE_SENSE_MASK) return -EINVAL; @@ -579,7 +588,7 @@ static void _reset_gpio(struct gpio_bank *bank, int gpio) static int gpio_wake_enable(struct irq_data *d, unsigned int enable) { struct gpio_bank *bank = irq_data_get_irq_chip_data(d); - unsigned int gpio = irq_to_gpio(bank, d->irq); + unsigned int gpio = irq_to_gpio(bank, d->hwirq); return _set_gpio_wakeup(bank, gpio, enable); } @@ -679,7 +688,7 @@ static void gpio_irq_handler(unsigned int irq, struct irq_desc *desc) { void __iomem *isr_reg = NULL; u32 isr; - unsigned int gpio_irq, gpio_index; + unsigned int bit; struct gpio_bank *bank; int unmasked = 0; struct irq_chip *chip = irq_desc_get_chip(desc); @@ -693,7 +702,7 @@ static void gpio_irq_handler(unsigned int irq, struct irq_desc *desc) if (WARN_ON(!isr_reg)) goto exit; - while(1) { + while (1) { u32 isr_saved, level_mask = 0; u32 enabled; @@ -720,14 +729,9 @@ static void gpio_irq_handler(unsigned int irq, struct irq_desc *desc) if (!isr) break; - gpio_irq = bank->irq_base; - for (; isr != 0; isr >>= 1, gpio_irq++) { - int gpio = irq_to_gpio(bank, gpio_irq); - - if (!(isr & 1)) - continue; - - gpio_index = GPIO_INDEX(bank, gpio); + while (isr) { + bit = __ffs(isr); + isr &= ~(1 << bit); /* * Some chips can't respond to both rising and falling @@ -736,10 +740,10 @@ static void gpio_irq_handler(unsigned int irq, struct irq_desc *desc) * to respond to the IRQ for the opposite direction. * This will be indicated in the bank toggle_mask. */ - if (bank->toggle_mask & (1 << gpio_index)) - _toggle_gpio_edge_triggering(bank, gpio_index); + if (bank->toggle_mask & (1 << bit)) + _toggle_gpio_edge_triggering(bank, bit); - generic_handle_irq(gpio_irq); + generic_handle_irq(irq_find_mapping(bank->domain, bit)); } } /* if bank has any level sensitive GPIO pin interrupt @@ -755,7 +759,7 @@ exit: static void gpio_irq_shutdown(struct irq_data *d) { struct gpio_bank *bank = irq_data_get_irq_chip_data(d); - unsigned int gpio = irq_to_gpio(bank, d->irq); + unsigned int gpio = irq_to_gpio(bank, d->hwirq); unsigned long flags; spin_lock_irqsave(&bank->lock, flags); @@ -766,7 +770,7 @@ static void gpio_irq_shutdown(struct irq_data *d) static void gpio_ack_irq(struct irq_data *d) { struct gpio_bank *bank = irq_data_get_irq_chip_data(d); - unsigned int gpio = irq_to_gpio(bank, d->irq); + unsigned int gpio = irq_to_gpio(bank, d->hwirq); _clear_gpio_irqstatus(bank, gpio); } @@ -774,7 +778,7 @@ static void gpio_ack_irq(struct irq_data *d) static void gpio_mask_irq(struct irq_data *d) { struct gpio_bank *bank = irq_data_get_irq_chip_data(d); - unsigned int gpio = irq_to_gpio(bank, d->irq); + unsigned int gpio = irq_to_gpio(bank, d->hwirq); unsigned long flags; spin_lock_irqsave(&bank->lock, flags); @@ -786,7 +790,7 @@ static void gpio_mask_irq(struct irq_data *d) static void gpio_unmask_irq(struct irq_data *d) { struct gpio_bank *bank = irq_data_get_irq_chip_data(d); - unsigned int gpio = irq_to_gpio(bank, d->irq); + unsigned int gpio = irq_to_gpio(bank, d->hwirq); unsigned int irq_mask = GPIO_BIT(bank, gpio); u32 trigger = irqd_get_trigger_type(d); unsigned long flags; @@ -952,14 +956,6 @@ static void gpio_set(struct gpio_chip *chip, unsigned offset, int value) spin_unlock_irqrestore(&bank->lock, flags); } -static int gpio_2irq(struct gpio_chip *chip, unsigned offset) -{ - struct gpio_bank *bank; - - bank = container_of(chip, struct gpio_bank, chip); - return bank->irq_base + offset; -} - /*---------------------------------------------------------------------*/ static void __init omap_gpio_show_rev(struct gpio_bank *bank) @@ -1056,7 +1052,7 @@ static void omap_gpio_chip_init(struct gpio_bank *bank) bank->chip.direction_output = gpio_output; bank->chip.set_debounce = gpio_debounce; bank->chip.set = gpio_set; - bank->chip.to_irq = gpio_2irq; + bank->chip.to_irq = omap_gpio_to_irq; if (bank->is_mpuio) { bank->chip.label = "mpuio"; if (bank->regs->wkup_en) @@ -1071,15 +1067,16 @@ static void omap_gpio_chip_init(struct gpio_bank *bank) gpiochip_add(&bank->chip); - for (j = bank->irq_base; j < bank->irq_base + bank->width; j++) { - irq_set_lockdep_class(j, &gpio_lock_class); - irq_set_chip_data(j, bank); + for (j = 0; j < bank->width; j++) { + int irq = irq_create_mapping(bank->domain, j); + irq_set_lockdep_class(irq, &gpio_lock_class); + irq_set_chip_data(irq, bank); if (bank->is_mpuio) { - omap_mpuio_alloc_gc(bank, j, bank->width); + omap_mpuio_alloc_gc(bank, irq, bank->width); } else { - irq_set_chip(j, &gpio_irq_chip); - irq_set_handler(j, handle_simple_irq); - set_irq_flags(j, IRQF_VALID); + irq_set_chip_and_handler(irq, &gpio_irq_chip, + handle_simple_irq); + set_irq_flags(irq, IRQF_VALID); } } irq_set_chained_handler(bank->irq, gpio_irq_handler); @@ -1096,7 +1093,6 @@ static int omap_gpio_probe(struct platform_device *pdev) const struct omap_gpio_platform_data *pdata; struct resource *res; struct gpio_bank *bank; - int ret = 0; match = of_match_device(of_match_ptr(omap_gpio_match), dev); @@ -1123,20 +1119,22 @@ static int omap_gpio_probe(struct platform_device *pdev) bank->width = pdata->bank_width; bank->is_mpuio = pdata->is_mpuio; bank->non_wakeup_gpios = pdata->non_wakeup_gpios; - bank->loses_context = pdata->loses_context; bank->regs = pdata->regs; #ifdef CONFIG_OF_GPIO bank->chip.of_node = of_node_get(node); #endif - - bank->irq_base = irq_alloc_descs(-1, 0, bank->width, 0); - if (bank->irq_base < 0) { - dev_err(dev, "Couldn't allocate IRQ numbers\n"); - return -ENODEV; + if (node) { + if (!of_property_read_bool(node, "ti,gpio-always-on")) + bank->loses_context = true; + } else { + bank->loses_context = pdata->loses_context; } - bank->domain = irq_domain_add_legacy(node, bank->width, bank->irq_base, - 0, &irq_domain_simple_ops, NULL); + + bank->domain = irq_domain_add_linear(node, bank->width, + &irq_domain_simple_ops, NULL); + if (!bank->domain) + return -ENODEV; if (bank->regs->set_dataout && bank->regs->clr_dataout) bank->set_dataout = _set_gpio_dataout_reg; @@ -1149,18 +1147,21 @@ static int omap_gpio_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (unlikely(!res)) { dev_err(dev, "Invalid mem resource\n"); + irq_domain_remove(bank->domain); return -ENODEV; } if (!devm_request_mem_region(dev, res->start, resource_size(res), pdev->name)) { dev_err(dev, "Region already claimed\n"); + irq_domain_remove(bank->domain); return -EBUSY; } bank->base = devm_ioremap(dev, res->start, resource_size(res)); if (!bank->base) { dev_err(dev, "Could not ioremap\n"); + irq_domain_remove(bank->domain); return -ENOMEM; } @@ -1184,7 +1185,7 @@ static int omap_gpio_probe(struct platform_device *pdev) list_add_tail(&bank->node, &omap_gpio_list); - return ret; + return 0; } #ifdef CONFIG_ARCH_OMAP2PLUS @@ -1262,9 +1263,9 @@ static int omap_gpio_runtime_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct gpio_bank *bank = platform_get_drvdata(pdev); - int context_lost_cnt_after; u32 l = 0, gen, gen0, gen1; unsigned long flags; + int c; spin_lock_irqsave(&bank->lock, flags); _gpio_dbck_enable(bank); @@ -1280,14 +1281,17 @@ static int omap_gpio_runtime_resume(struct device *dev) __raw_writel(bank->context.risingdetect, bank->base + bank->regs->risingdetect); - if (bank->get_context_loss_count) { - context_lost_cnt_after = - bank->get_context_loss_count(bank->dev); - if (context_lost_cnt_after != bank->context_loss_count) { + if (bank->loses_context) { + if (!bank->get_context_loss_count) { omap_gpio_restore_context(bank); } else { - spin_unlock_irqrestore(&bank->lock, flags); - return 0; + c = bank->get_context_loss_count(bank->dev); + if (c != bank->context_loss_count) { + omap_gpio_restore_context(bank); + } else { + spin_unlock_irqrestore(&bank->lock, flags); + return 0; + } } } @@ -1296,10 +1300,6 @@ static int omap_gpio_runtime_resume(struct device *dev) return 0; } - __raw_writel(bank->context.fallingdetect, - bank->base + bank->regs->fallingdetect); - __raw_writel(bank->context.risingdetect, - bank->base + bank->regs->risingdetect); l = __raw_readl(bank->base + bank->regs->datain); /* diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 9391cf16e99..426c51dd420 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -146,8 +146,7 @@ static int pca953x_write_regs(struct pca953x_chip *chip, int reg, u8 *val) ret = i2c_smbus_write_i2c_block_data(chip->client, (reg << bank_shift) | REG_ADDR_AI, NBANK(chip), val); - } - else { + } else { switch (chip->chip_type) { case PCA953X_TYPE: ret = i2c_smbus_write_word_data(chip->client, diff --git a/drivers/gpio/gpio-pcf857x.c b/drivers/gpio/gpio-pcf857x.c index a19b7457a72..e8faf53f387 100644 --- a/drivers/gpio/gpio-pcf857x.c +++ b/drivers/gpio/gpio-pcf857x.c @@ -45,6 +45,7 @@ static const struct i2c_device_id pcf857x_id[] = { { "pca9675", 16 }, { "max7328", 8 }, { "max7329", 8 }, + { "tca9554", 8 }, { } }; MODULE_DEVICE_TABLE(i2c, pcf857x_id); @@ -267,7 +268,7 @@ static int pcf857x_probe(struct i2c_client *client, } /* Allocate, initialize, and register this gpio_chip. */ - gpio = kzalloc(sizeof *gpio, GFP_KERNEL); + gpio = devm_kzalloc(&client->dev, sizeof(*gpio), GFP_KERNEL); if (!gpio) return -ENOMEM; @@ -390,7 +391,6 @@ fail: if (pdata && client->irq) pcf857x_irq_domain_cleanup(gpio); - kfree(gpio); return status; } @@ -415,9 +415,7 @@ static int pcf857x_remove(struct i2c_client *client) pcf857x_irq_domain_cleanup(gpio); status = gpiochip_remove(&gpio->chip); - if (status == 0) - kfree(gpio); - else + if (status) dev_err(&client->dev, "%s --> %d\n", "remove", status); return status; } diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c index d7a5c9d7552..df2199dd149 100644 --- a/drivers/gpio/gpio-pxa.c +++ b/drivers/gpio/gpio-pxa.c @@ -628,7 +628,7 @@ static int pxa_gpio_probe(struct platform_device *pdev) for_each_gpio_chip(gpio, c) { writel_relaxed(0, c->regbase + GFER_OFFSET); writel_relaxed(0, c->regbase + GRER_OFFSET); - writel_relaxed(~0,c->regbase + GEDR_OFFSET); + writel_relaxed(~0, c->regbase + GEDR_OFFSET); /* unmask GPIO edge detect for AP side */ if (gpio_is_mmp_type(gpio_type)) writel_relaxed(~0, c->regbase + ED_MASK_OFFSET); @@ -712,7 +712,7 @@ static void pxa_gpio_resume(void) for_each_gpio_chip(gpio, c) { /* restore level with set/clear */ - writel_relaxed( c->saved_gplr, c->regbase + GPSR_OFFSET); + writel_relaxed(c->saved_gplr, c->regbase + GPSR_OFFSET); writel_relaxed(~c->saved_gplr, c->regbase + GPCR_OFFSET); writel_relaxed(c->saved_grer, c->regbase + GRER_OFFSET); diff --git a/drivers/gpio/gpio-samsung.c b/drivers/gpio/gpio-samsung.c index 99e0fa49fcb..b22ca793374 100644 --- a/drivers/gpio/gpio-samsung.c +++ b/drivers/gpio/gpio-samsung.c @@ -3030,6 +3030,7 @@ static __init int samsung_gpiolib_init(void) { .compatible = "samsung,exynos4x12-pinctrl", }, { .compatible = "samsung,exynos5250-pinctrl", }, { .compatible = "samsung,exynos5440-pinctrl", }, + { } }; for_each_matching_node(pctrl_np, exynos_pinctrl_ids) if (pctrl_np && of_device_is_available(pctrl_np)) diff --git a/drivers/gpio/gpio-sch.c b/drivers/gpio/gpio-sch.c index edae963f462..1e4de16ceb4 100644 --- a/drivers/gpio/gpio-sch.c +++ b/drivers/gpio/gpio-sch.c @@ -125,13 +125,17 @@ static int sch_gpio_resume_direction_in(struct gpio_chip *gc, unsigned gpio_num) { u8 curr_dirs; + unsigned short offset, bit; spin_lock(&gpio_lock); - curr_dirs = inb(gpio_ba + RGIO); + offset = RGIO + gpio_num / 8; + bit = gpio_num % 8; + + curr_dirs = inb(gpio_ba + offset); - if (!(curr_dirs & (1 << gpio_num))) - outb(curr_dirs | (1 << gpio_num) , gpio_ba + RGIO); + if (!(curr_dirs & (1 << bit))) + outb(curr_dirs | (1 << bit), gpio_ba + offset); spin_unlock(&gpio_lock); return 0; @@ -139,22 +143,31 @@ static int sch_gpio_resume_direction_in(struct gpio_chip *gc, static int sch_gpio_resume_get(struct gpio_chip *gc, unsigned gpio_num) { - return !!(inb(gpio_ba + RGLV) & (1 << gpio_num)); + unsigned short offset, bit; + + offset = RGLV + gpio_num / 8; + bit = gpio_num % 8; + + return !!(inb(gpio_ba + offset) & (1 << bit)); } static void sch_gpio_resume_set(struct gpio_chip *gc, unsigned gpio_num, int val) { u8 curr_vals; + unsigned short offset, bit; spin_lock(&gpio_lock); - curr_vals = inb(gpio_ba + RGLV); + offset = RGLV + gpio_num / 8; + bit = gpio_num % 8; + + curr_vals = inb(gpio_ba + offset); if (val) - outb(curr_vals | (1 << gpio_num), gpio_ba + RGLV); + outb(curr_vals | (1 << bit), gpio_ba + offset); else - outb((curr_vals & ~(1 << gpio_num)), gpio_ba + RGLV); + outb((curr_vals & ~(1 << bit)), gpio_ba + offset); spin_unlock(&gpio_lock); } @@ -163,14 +176,18 @@ static int sch_gpio_resume_direction_out(struct gpio_chip *gc, unsigned gpio_num, int val) { u8 curr_dirs; + unsigned short offset, bit; sch_gpio_resume_set(gc, gpio_num, val); + offset = RGIO + gpio_num / 8; + bit = gpio_num % 8; + spin_lock(&gpio_lock); - curr_dirs = inb(gpio_ba + RGIO); - if (curr_dirs & (1 << gpio_num)) - outb(curr_dirs & ~(1 << gpio_num), gpio_ba + RGIO); + curr_dirs = inb(gpio_ba + offset); + if (curr_dirs & (1 << bit)) + outb(curr_dirs & ~(1 << bit), gpio_ba + offset); spin_unlock(&gpio_lock); return 0; @@ -204,45 +221,41 @@ static int sch_gpio_probe(struct platform_device *pdev) gpio_ba = res->start; switch (id) { - case PCI_DEVICE_ID_INTEL_SCH_LPC: - sch_gpio_core.base = 0; - sch_gpio_core.ngpio = 10; - - sch_gpio_resume.base = 10; - sch_gpio_resume.ngpio = 4; - - /* - * GPIO[6:0] enabled by default - * GPIO7 is configured by the CMC as SLPIOVR - * Enable GPIO[9:8] core powered gpios explicitly - */ - outb(0x3, gpio_ba + CGEN + 1); - /* - * SUS_GPIO[2:0] enabled by default - * Enable SUS_GPIO3 resume powered gpio explicitly - */ - outb(0x8, gpio_ba + RGEN); - break; - - case PCI_DEVICE_ID_INTEL_ITC_LPC: - sch_gpio_core.base = 0; - sch_gpio_core.ngpio = 5; - - sch_gpio_resume.base = 5; - sch_gpio_resume.ngpio = 9; - break; - - case PCI_DEVICE_ID_INTEL_CENTERTON_ILB: - sch_gpio_core.base = 0; - sch_gpio_core.ngpio = 21; - - sch_gpio_resume.base = 21; - sch_gpio_resume.ngpio = 9; - break; - - default: - err = -ENODEV; - goto err_sch_gpio_core; + case PCI_DEVICE_ID_INTEL_SCH_LPC: + sch_gpio_core.base = 0; + sch_gpio_core.ngpio = 10; + sch_gpio_resume.base = 10; + sch_gpio_resume.ngpio = 4; + /* + * GPIO[6:0] enabled by default + * GPIO7 is configured by the CMC as SLPIOVR + * Enable GPIO[9:8] core powered gpios explicitly + */ + outb(0x3, gpio_ba + CGEN + 1); + /* + * SUS_GPIO[2:0] enabled by default + * Enable SUS_GPIO3 resume powered gpio explicitly + */ + outb(0x8, gpio_ba + RGEN); + break; + + case PCI_DEVICE_ID_INTEL_ITC_LPC: + sch_gpio_core.base = 0; + sch_gpio_core.ngpio = 5; + sch_gpio_resume.base = 5; + sch_gpio_resume.ngpio = 9; + break; + + case PCI_DEVICE_ID_INTEL_CENTERTON_ILB: + sch_gpio_core.base = 0; + sch_gpio_core.ngpio = 21; + sch_gpio_resume.base = 21; + sch_gpio_resume.ngpio = 9; + break; + + default: + err = -ENODEV; + goto err_sch_gpio_core; } sch_gpio_core.dev = &pdev->dev; diff --git a/drivers/gpio/gpio-stp-xway.c b/drivers/gpio/gpio-stp-xway.c index c20e0515121..04882a911b6 100644 --- a/drivers/gpio/gpio-stp-xway.c +++ b/drivers/gpio/gpio-stp-xway.c @@ -217,7 +217,7 @@ static int xway_stp_probe(struct platform_device *pdev) chip->virt = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(chip->virt)) return PTR_ERR(chip->virt); - + chip->gc.dev = &pdev->dev; chip->gc.label = "stp-xway"; chip->gc.direction_output = xway_stp_dir_out; diff --git a/drivers/gpio/gpio-tc3589x.c b/drivers/gpio/gpio-tc3589x.c index c0595bbf326..d34d80dfb08 100644 --- a/drivers/gpio/gpio-tc3589x.c +++ b/drivers/gpio/gpio-tc3589x.c @@ -282,9 +282,9 @@ static void tc3589x_gpio_irq_unmap(struct irq_domain *d, unsigned int virq) } static struct irq_domain_ops tc3589x_irq_ops = { - .map = tc3589x_gpio_irq_map, - .unmap = tc3589x_gpio_irq_unmap, - .xlate = irq_domain_xlate_twocell, + .map = tc3589x_gpio_irq_map, + .unmap = tc3589x_gpio_irq_unmap, + .xlate = irq_domain_xlate_twocell, }; static int tc3589x_gpio_irq_init(struct tc3589x_gpio *tc3589x_gpio, @@ -344,7 +344,7 @@ static int tc3589x_gpio_probe(struct platform_device *pdev) tc3589x_gpio->chip.base = (pdata) ? pdata->gpio_base : -1; #ifdef CONFIG_OF_GPIO - tc3589x_gpio->chip.of_node = np; + tc3589x_gpio->chip.of_node = np; #endif tc3589x_gpio->irq_base = tc3589x->irq_base ? diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c index dde0656ea95..da4cb5b0cb8 100644 --- a/drivers/gpio/gpio-tegra.c +++ b/drivers/gpio/gpio-tegra.c @@ -414,10 +414,11 @@ static int tegra_gpio_probe(struct platform_device *pdev) int j; match = of_match_device(tegra_gpio_of_match, &pdev->dev); - if (match) - config = (struct tegra_gpio_soc_config *)match->data; - else - config = &tegra20_gpio_config; + if (!match) { + dev_err(&pdev->dev, "Error: No device match found\n"); + return -ENODEV; + } + config = (struct tegra_gpio_soc_config *)match->data; tegra_gpio_bank_stride = config->bank_stride; tegra_gpio_upper_offset = config->upper_offset; @@ -478,9 +479,7 @@ static int tegra_gpio_probe(struct platform_device *pdev) } } -#ifdef CONFIG_OF_GPIO tegra_gpio_chip.of_node = pdev->dev.of_node; -#endif gpiochip_add(&tegra_gpio_chip); diff --git a/drivers/gpio/gpio-timberdale.c b/drivers/gpio/gpio-timberdale.c index 702cca9284f..43774058b69 100644 --- a/drivers/gpio/gpio-timberdale.c +++ b/drivers/gpio/gpio-timberdale.c @@ -167,8 +167,7 @@ static int timbgpio_irq_type(struct irq_data *d, unsigned trigger) if (ver < 3) { ret = -EINVAL; goto out; - } - else { + } else { flr |= 1 << offset; bflr |= 1 << offset; } diff --git a/drivers/gpio/gpio-tps65910.c b/drivers/gpio/gpio-tps65910.c index 5083825a034..06146219d9d 100644 --- a/drivers/gpio/gpio-tps65910.c +++ b/drivers/gpio/gpio-tps65910.c @@ -133,7 +133,7 @@ static int tps65910_gpio_probe(struct platform_device *pdev) tps65910_gpio->gpio_chip.owner = THIS_MODULE; tps65910_gpio->gpio_chip.label = tps65910->i2c_client->name; - switch(tps65910_chip_id(tps65910)) { + switch (tps65910_chip_id(tps65910)) { case TPS65910: tps65910_gpio->gpio_chip.ngpio = TPS65910_NUM_GPIO; break; diff --git a/drivers/gpio/gpio-ucb1400.c b/drivers/gpio/gpio-ucb1400.c index 26405efe0f9..6d0feb234d3 100644 --- a/drivers/gpio/gpio-ucb1400.c +++ b/drivers/gpio/gpio-ucb1400.c @@ -12,8 +12,6 @@ #include <linux/module.h> #include <linux/ucb1400.h> -struct ucb1400_gpio_data *ucbdata; - static int ucb1400_gpio_dir_in(struct gpio_chip *gc, unsigned off) { struct ucb1400_gpio *gpio; @@ -50,7 +48,7 @@ static int ucb1400_gpio_probe(struct platform_device *dev) struct ucb1400_gpio *ucb = dev->dev.platform_data; int err = 0; - if (!(ucbdata && ucbdata->gpio_offset)) { + if (!(ucb && ucb->gpio_offset)) { err = -EINVAL; goto err; } @@ -58,7 +56,7 @@ static int ucb1400_gpio_probe(struct platform_device *dev) platform_set_drvdata(dev, ucb); ucb->gc.label = "ucb1400_gpio"; - ucb->gc.base = ucbdata->gpio_offset; + ucb->gc.base = ucb->gpio_offset; ucb->gc.ngpio = 10; ucb->gc.owner = THIS_MODULE; @@ -72,8 +70,8 @@ static int ucb1400_gpio_probe(struct platform_device *dev) if (err) goto err; - if (ucbdata && ucbdata->gpio_setup) - err = ucbdata->gpio_setup(&dev->dev, ucb->gc.ngpio); + if (ucb && ucb->gpio_setup) + err = ucb->gpio_setup(&dev->dev, ucb->gc.ngpio); err: return err; @@ -85,8 +83,8 @@ static int ucb1400_gpio_remove(struct platform_device *dev) int err = 0; struct ucb1400_gpio *ucb = platform_get_drvdata(dev); - if (ucbdata && ucbdata->gpio_teardown) { - err = ucbdata->gpio_teardown(&dev->dev, ucb->gc.ngpio); + if (ucb && ucb->gpio_teardown) { + err = ucb->gpio_teardown(&dev->dev, ucb->gc.ngpio); if (err) return err; } @@ -103,11 +101,6 @@ static struct platform_driver ucb1400_gpio_driver = { }, }; -void __init ucb1400_gpio_set_data(struct ucb1400_gpio_data *data) -{ - ucbdata = data; -} - module_platform_driver(ucb1400_gpio_driver); MODULE_DESCRIPTION("Philips UCB1400 GPIO driver"); diff --git a/drivers/gpio/gpio-viperboard.c b/drivers/gpio/gpio-viperboard.c index 59d72391de2..095ab14cea4 100644 --- a/drivers/gpio/gpio-viperboard.c +++ b/drivers/gpio/gpio-viperboard.c @@ -380,10 +380,6 @@ static int vprbrd_gpiob_direction_output(struct gpio_chip *chip, struct vprbrd *vb = gpio->vb; gpio->gpiob_out |= (1 << offset); - if (value) - gpio->gpiob_val |= (1 << offset); - else - gpio->gpiob_val &= ~(1 << offset); mutex_lock(&vb->lock); diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index a063eb04b6c..5c1ef2b3ef1 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -17,6 +17,13 @@ #include <linux/acpi.h> #include <linux/interrupt.h> +struct acpi_gpio_evt_pin { + struct list_head node; + acpi_handle *evt_handle; + unsigned int pin; + unsigned int irq; +}; + static int acpi_gpiochip_find(struct gpio_chip *gc, void *data) { if (!gc->dev) @@ -54,7 +61,6 @@ int acpi_get_gpio(char *path, int pin) } EXPORT_SYMBOL_GPL(acpi_get_gpio); - static irqreturn_t acpi_gpio_irq_handler(int irq, void *data) { acpi_handle handle = data; @@ -64,6 +70,27 @@ static irqreturn_t acpi_gpio_irq_handler(int irq, void *data) return IRQ_HANDLED; } +static irqreturn_t acpi_gpio_irq_handler_evt(int irq, void *data) +{ + struct acpi_gpio_evt_pin *evt_pin = data; + struct acpi_object_list args; + union acpi_object arg; + + arg.type = ACPI_TYPE_INTEGER; + arg.integer.value = evt_pin->pin; + args.count = 1; + args.pointer = &arg; + + acpi_evaluate_object(evt_pin->evt_handle, NULL, &args, NULL); + + return IRQ_HANDLED; +} + +static void acpi_gpio_evt_dh(acpi_handle handle, void *data) +{ + /* The address of this function is used as a key. */ +} + /** * acpi_gpiochip_request_interrupts() - Register isr for gpio chip ACPI events * @chip: gpio chip @@ -73,15 +100,13 @@ static irqreturn_t acpi_gpio_irq_handler(int irq, void *data) * chip's interrupt handler. acpi_gpiochip_request_interrupts finds out which * gpio pins have acpi event methods and assigns interrupt handlers that calls * the acpi event methods for those pins. - * - * Interrupts are automatically freed on driver detach */ - void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) { struct acpi_buffer buf = {ACPI_ALLOCATE_BUFFER, NULL}; struct acpi_resource *res; - acpi_handle handle, ev_handle; + acpi_handle handle, evt_handle; + struct list_head *evt_pins = NULL; acpi_status status; unsigned int pin; int irq, ret; @@ -98,13 +123,30 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) if (ACPI_FAILURE(status)) return; - /* If a gpio interrupt has an acpi event handler method, then - * set up an interrupt handler that calls the acpi event handler - */ + status = acpi_get_handle(handle, "_EVT", &evt_handle); + if (ACPI_SUCCESS(status)) { + evt_pins = kzalloc(sizeof(*evt_pins), GFP_KERNEL); + if (evt_pins) { + INIT_LIST_HEAD(evt_pins); + status = acpi_attach_data(handle, acpi_gpio_evt_dh, + evt_pins); + if (ACPI_FAILURE(status)) { + kfree(evt_pins); + evt_pins = NULL; + } + } + } + /* + * If a GPIO interrupt has an ACPI event handler method, or _EVT is + * present, set up an interrupt handler that calls the ACPI event + * handler. + */ for (res = buf.pointer; res && (res->type != ACPI_RESOURCE_TYPE_END_TAG); res = ACPI_NEXT_RESOURCE(res)) { + irq_handler_t handler = NULL; + void *data; if (res->type != ACPI_RESOURCE_TYPE_GPIO || res->data.gpio.connection_type != @@ -115,23 +157,42 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) if (pin > chip->ngpio) continue; - sprintf(ev_name, "_%c%02X", - res->data.gpio.triggering ? 'E' : 'L', pin); - - status = acpi_get_handle(handle, ev_name, &ev_handle); - if (ACPI_FAILURE(status)) - continue; - irq = chip->to_irq(chip, pin); if (irq < 0) continue; + if (pin <= 255) { + acpi_handle ev_handle; + + sprintf(ev_name, "_%c%02X", + res->data.gpio.triggering ? 'E' : 'L', pin); + status = acpi_get_handle(handle, ev_name, &ev_handle); + if (ACPI_SUCCESS(status)) { + handler = acpi_gpio_irq_handler; + data = ev_handle; + } + } + if (!handler && evt_pins) { + struct acpi_gpio_evt_pin *evt_pin; + + evt_pin = kzalloc(sizeof(*evt_pin), GFP_KERNEL); + if (!evt_pin) + continue; + + list_add_tail(&evt_pin->node, evt_pins); + evt_pin->evt_handle = evt_handle; + evt_pin->pin = pin; + evt_pin->irq = irq; + handler = acpi_gpio_irq_handler_evt; + data = evt_pin; + } + if (!handler) + continue; + /* Assume BIOS sets the triggering, so no flags */ - ret = devm_request_threaded_irq(chip->dev, irq, NULL, - acpi_gpio_irq_handler, - 0, - "GPIO-signaled-ACPI-event", - ev_handle); + ret = devm_request_threaded_irq(chip->dev, irq, NULL, handler, + 0, "GPIO-signaled-ACPI-event", + data); if (ret) dev_err(chip->dev, "Failed to request IRQ %d ACPI event handler\n", @@ -139,3 +200,119 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) } } EXPORT_SYMBOL(acpi_gpiochip_request_interrupts); + +struct acpi_gpio_lookup { + struct acpi_gpio_info info; + int index; + int gpio; + int n; +}; + +static int acpi_find_gpio(struct acpi_resource *ares, void *data) +{ + struct acpi_gpio_lookup *lookup = data; + + if (ares->type != ACPI_RESOURCE_TYPE_GPIO) + return 1; + + if (lookup->n++ == lookup->index && lookup->gpio < 0) { + const struct acpi_resource_gpio *agpio = &ares->data.gpio; + + lookup->gpio = acpi_get_gpio(agpio->resource_source.string_ptr, + agpio->pin_table[0]); + lookup->info.gpioint = + agpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT; + } + + return 1; +} + +/** + * acpi_get_gpio_by_index() - get a GPIO number from device resources + * @dev: pointer to a device to get GPIO from + * @index: index of GpioIo/GpioInt resource (starting from %0) + * @info: info pointer to fill in (optional) + * + * Function goes through ACPI resources for @dev and based on @index looks + * up a GpioIo/GpioInt resource, translates it to the Linux GPIO number, + * and returns it. @index matches GpioIo/GpioInt resources only so if there + * are total %3 GPIO resources, the index goes from %0 to %2. + * + * If the GPIO cannot be translated or there is an error, negative errno is + * returned. + * + * Note: if the GPIO resource has multiple entries in the pin list, this + * function only returns the first. + */ +int acpi_get_gpio_by_index(struct device *dev, int index, + struct acpi_gpio_info *info) +{ + struct acpi_gpio_lookup lookup; + struct list_head resource_list; + struct acpi_device *adev; + acpi_handle handle; + int ret; + + if (!dev) + return -EINVAL; + + handle = ACPI_HANDLE(dev); + if (!handle || acpi_bus_get_device(handle, &adev)) + return -ENODEV; + + memset(&lookup, 0, sizeof(lookup)); + lookup.index = index; + lookup.gpio = -ENODEV; + + INIT_LIST_HEAD(&resource_list); + ret = acpi_dev_get_resources(adev, &resource_list, acpi_find_gpio, + &lookup); + if (ret < 0) + return ret; + + acpi_dev_free_resource_list(&resource_list); + + if (lookup.gpio >= 0 && info) + *info = lookup.info; + + return lookup.gpio; +} +EXPORT_SYMBOL_GPL(acpi_get_gpio_by_index); + +/** + * acpi_gpiochip_free_interrupts() - Free GPIO _EVT ACPI event interrupts. + * @chip: gpio chip + * + * Free interrupts associated with the _EVT method for the given GPIO chip. + * + * The remaining ACPI event interrupts associated with the chip are freed + * automatically. + */ +void acpi_gpiochip_free_interrupts(struct gpio_chip *chip) +{ + acpi_handle handle; + acpi_status status; + struct list_head *evt_pins; + struct acpi_gpio_evt_pin *evt_pin, *ep; + + if (!chip->dev || !chip->to_irq) + return; + + handle = ACPI_HANDLE(chip->dev); + if (!handle) + return; + + status = acpi_get_data(handle, acpi_gpio_evt_dh, (void **)&evt_pins); + if (ACPI_FAILURE(status)) + return; + + list_for_each_entry_safe_reverse(evt_pin, ep, evt_pins, node) { + devm_free_irq(chip->dev, evt_pin->irq, evt_pin); + list_del(&evt_pin->node); + kfree(evt_pin); + } + + acpi_detach_data(handle, acpi_gpio_evt_dh); + kfree(evt_pins); +} +EXPORT_SYMBOL(acpi_gpiochip_free_interrupts); diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 465f4ca57e8..665f9530c95 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -61,7 +61,7 @@ static int of_gpiochip_find_and_xlate(struct gpio_chip *gc, void *data) * in flags for the GPIO. */ int of_get_named_gpio_flags(struct device_node *np, const char *propname, - int index, enum of_gpio_flags *flags) + int index, enum of_gpio_flags *flags) { /* Return -EPROBE_DEFER to support probe() functions to be called * later when the GPIO actually becomes available diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index ac050066700..6a195d5e90f 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -628,4 +628,16 @@ config KEYBOARD_W90P910 To compile this driver as a module, choose M here: the module will be called w90p910_keypad. +config KEYBOARD_CROS_EC + tristate "ChromeOS EC keyboard" + select INPUT_MATRIXKMAP + depends on MFD_CROS_EC + help + Say Y here to enable the matrix keyboard used by ChromeOS devices + and implemented on the ChromeOS EC. You must enable one bus option + (MFD_CROS_EC_I2C or MFD_CROS_EC_SPI) to use this. + + To compile this driver as a module, choose M here: the + module will be called cros_ec_keyb. + endif diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile index 49b16453d00..0c43e8cf8d0 100644 --- a/drivers/input/keyboard/Makefile +++ b/drivers/input/keyboard/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_KEYBOARD_AMIGA) += amikbd.o obj-$(CONFIG_KEYBOARD_ATARI) += atakbd.o obj-$(CONFIG_KEYBOARD_ATKBD) += atkbd.o obj-$(CONFIG_KEYBOARD_BFIN) += bf54x-keys.o +obj-$(CONFIG_KEYBOARD_CROS_EC) += cros_ec_keyb.o obj-$(CONFIG_KEYBOARD_DAVINCI) += davinci_keyscan.o obj-$(CONFIG_KEYBOARD_EP93XX) += ep93xx_keypad.o obj-$(CONFIG_KEYBOARD_GOLDFISH_EVENTS) += goldfish_events.o diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c new file mode 100644 index 00000000000..49557f27bfa --- /dev/null +++ b/drivers/input/keyboard/cros_ec_keyb.c @@ -0,0 +1,334 @@ +/* + * ChromeOS EC keyboard driver + * + * Copyright (C) 2012 Google, Inc + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * This driver uses the Chrome OS EC byte-level message-based protocol for + * communicating the keyboard state (which keys are pressed) from a keyboard EC + * to the AP over some bus (such as i2c, lpc, spi). The EC does debouncing, + * but everything else (including deghosting) is done here. The main + * motivation for this is to keep the EC firmware as simple as possible, since + * it cannot be easily upgraded and EC flash/IRAM space is relatively + * expensive. + */ + +#include <linux/module.h> +#include <linux/i2c.h> +#include <linux/input.h> +#include <linux/kernel.h> +#include <linux/notifier.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/input/matrix_keypad.h> +#include <linux/mfd/cros_ec.h> +#include <linux/mfd/cros_ec_commands.h> + +/* + * @rows: Number of rows in the keypad + * @cols: Number of columns in the keypad + * @row_shift: log2 or number of rows, rounded up + * @keymap_data: Matrix keymap data used to convert to keyscan values + * @ghost_filter: true to enable the matrix key-ghosting filter + * @dev: Device pointer + * @idev: Input device + * @ec: Top level ChromeOS device to use to talk to EC + * @event_notifier: interrupt event notifier for transport devices + */ +struct cros_ec_keyb { + unsigned int rows; + unsigned int cols; + int row_shift; + const struct matrix_keymap_data *keymap_data; + bool ghost_filter; + + struct device *dev; + struct input_dev *idev; + struct cros_ec_device *ec; + struct notifier_block notifier; +}; + + +static bool cros_ec_keyb_row_has_ghosting(struct cros_ec_keyb *ckdev, + uint8_t *buf, int row) +{ + int pressed_in_row = 0; + int row_has_teeth = 0; + int col, mask; + + mask = 1 << row; + for (col = 0; col < ckdev->cols; col++) { + if (buf[col] & mask) { + pressed_in_row++; + row_has_teeth |= buf[col] & ~mask; + if (pressed_in_row > 1 && row_has_teeth) { + /* ghosting */ + dev_dbg(ckdev->dev, + "ghost found at: r%d c%d, pressed %d, teeth 0x%x\n", + row, col, pressed_in_row, + row_has_teeth); + return true; + } + } + } + + return false; +} + +/* + * Returns true when there is at least one combination of pressed keys that + * results in ghosting. + */ +static bool cros_ec_keyb_has_ghosting(struct cros_ec_keyb *ckdev, uint8_t *buf) +{ + int row; + + /* + * Ghosting happens if for any pressed key X there are other keys + * pressed both in the same row and column of X as, for instance, + * in the following diagram: + * + * . . Y . g . + * . . . . . . + * . . . . . . + * . . X . Z . + * + * In this case only X, Y, and Z are pressed, but g appears to be + * pressed too (see Wikipedia). + * + * We can detect ghosting in a single pass (*) over the keyboard state + * by maintaining two arrays. pressed_in_row counts how many pressed + * keys we have found in a row. row_has_teeth is true if any of the + * pressed keys for this row has other pressed keys in its column. If + * at any point of the scan we find that a row has multiple pressed + * keys, and at least one of them is at the intersection with a column + * with multiple pressed keys, we're sure there is ghosting. + * Conversely, if there is ghosting, we will detect such situation for + * at least one key during the pass. + * + * (*) This looks linear in the number of keys, but it's not. We can + * cheat because the number of rows is small. + */ + for (row = 0; row < ckdev->rows; row++) + if (cros_ec_keyb_row_has_ghosting(ckdev, buf, row)) + return true; + + return false; +} + +/* + * Compares the new keyboard state to the old one and produces key + * press/release events accordingly. The keyboard state is 13 bytes (one byte + * per column) + */ +static void cros_ec_keyb_process(struct cros_ec_keyb *ckdev, + uint8_t *kb_state, int len) +{ + struct input_dev *idev = ckdev->idev; + int col, row; + int new_state; + int num_cols; + + num_cols = len; + + if (ckdev->ghost_filter && cros_ec_keyb_has_ghosting(ckdev, kb_state)) { + /* + * Simple-minded solution: ignore this state. The obvious + * improvement is to only ignore changes to keys involved in + * the ghosting, but process the other changes. + */ + dev_dbg(ckdev->dev, "ghosting found\n"); + return; + } + + for (col = 0; col < ckdev->cols; col++) { + for (row = 0; row < ckdev->rows; row++) { + int pos = MATRIX_SCAN_CODE(row, col, ckdev->row_shift); + const unsigned short *keycodes = idev->keycode; + int code; + + code = keycodes[pos]; + new_state = kb_state[col] & (1 << row); + if (!!new_state != test_bit(code, idev->key)) { + dev_dbg(ckdev->dev, + "changed: [r%d c%d]: byte %02x\n", + row, col, new_state); + + input_report_key(idev, code, new_state); + } + } + } + input_sync(ckdev->idev); +} + +static int cros_ec_keyb_open(struct input_dev *dev) +{ + struct cros_ec_keyb *ckdev = input_get_drvdata(dev); + + return blocking_notifier_chain_register(&ckdev->ec->event_notifier, + &ckdev->notifier); +} + +static void cros_ec_keyb_close(struct input_dev *dev) +{ + struct cros_ec_keyb *ckdev = input_get_drvdata(dev); + + blocking_notifier_chain_unregister(&ckdev->ec->event_notifier, + &ckdev->notifier); +} + +static int cros_ec_keyb_get_state(struct cros_ec_keyb *ckdev, uint8_t *kb_state) +{ + return ckdev->ec->command_recv(ckdev->ec, EC_CMD_MKBP_STATE, + kb_state, ckdev->cols); +} + +static int cros_ec_keyb_work(struct notifier_block *nb, + unsigned long state, void *_notify) +{ + int ret; + struct cros_ec_keyb *ckdev = container_of(nb, struct cros_ec_keyb, + notifier); + uint8_t kb_state[ckdev->cols]; + + ret = cros_ec_keyb_get_state(ckdev, kb_state); + if (ret >= 0) + cros_ec_keyb_process(ckdev, kb_state, ret); + + return NOTIFY_DONE; +} + +/* Clear any keys in the buffer */ +static void cros_ec_keyb_clear_keyboard(struct cros_ec_keyb *ckdev) +{ + uint8_t old_state[ckdev->cols]; + uint8_t new_state[ckdev->cols]; + unsigned long duration; + int i, ret; + + /* + * Keep reading until we see that the scan state does not change. + * That indicates that we are done. + * + * Assume that the EC keyscan buffer is at most 32 deep. + */ + duration = jiffies; + ret = cros_ec_keyb_get_state(ckdev, new_state); + for (i = 1; !ret && i < 32; i++) { + memcpy(old_state, new_state, sizeof(old_state)); + ret = cros_ec_keyb_get_state(ckdev, new_state); + if (0 == memcmp(old_state, new_state, sizeof(old_state))) + break; + } + duration = jiffies - duration; + dev_info(ckdev->dev, "Discarded %d keyscan(s) in %dus\n", i, + jiffies_to_usecs(duration)); +} + +static int cros_ec_keyb_probe(struct platform_device *pdev) +{ + struct cros_ec_device *ec = dev_get_drvdata(pdev->dev.parent); + struct device *dev = ec->dev; + struct cros_ec_keyb *ckdev; + struct input_dev *idev; + struct device_node *np; + int err; + + np = pdev->dev.of_node; + if (!np) + return -ENODEV; + + ckdev = devm_kzalloc(&pdev->dev, sizeof(*ckdev), GFP_KERNEL); + if (!ckdev) + return -ENOMEM; + err = matrix_keypad_parse_of_params(&pdev->dev, &ckdev->rows, + &ckdev->cols); + if (err) + return err; + + idev = devm_input_allocate_device(&pdev->dev); + if (!idev) + return -ENOMEM; + + ckdev->ec = ec; + ckdev->notifier.notifier_call = cros_ec_keyb_work; + ckdev->dev = dev; + dev_set_drvdata(&pdev->dev, ckdev); + + idev->name = ec->ec_name; + idev->phys = ec->phys_name; + __set_bit(EV_REP, idev->evbit); + + idev->id.bustype = BUS_VIRTUAL; + idev->id.version = 1; + idev->id.product = 0; + idev->dev.parent = &pdev->dev; + idev->open = cros_ec_keyb_open; + idev->close = cros_ec_keyb_close; + + ckdev->ghost_filter = of_property_read_bool(np, + "google,needs-ghost-filter"); + + err = matrix_keypad_build_keymap(NULL, NULL, ckdev->rows, ckdev->cols, + NULL, idev); + if (err) { + dev_err(dev, "cannot build key matrix\n"); + return err; + } + + ckdev->row_shift = get_count_order(ckdev->cols); + + input_set_capability(idev, EV_MSC, MSC_SCAN); + input_set_drvdata(idev, ckdev); + ckdev->idev = idev; + err = input_register_device(ckdev->idev); + if (err) { + dev_err(dev, "cannot register input device\n"); + return err; + } + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int cros_ec_keyb_resume(struct device *dev) +{ + struct cros_ec_keyb *ckdev = dev_get_drvdata(dev); + + /* + * When the EC is not a wake source, then it could not have caused the + * resume, so we clear the EC's key scan buffer. If the EC was a + * wake source (e.g. the lid is open and the user might press a key to + * wake) then the key scan buffer should be preserved. + */ + if (ckdev->ec->was_wake_device) + cros_ec_keyb_clear_keyboard(ckdev); + + return 0; +} + +#endif + +static SIMPLE_DEV_PM_OPS(cros_ec_keyb_pm_ops, NULL, cros_ec_keyb_resume); + +static struct platform_driver cros_ec_keyb_driver = { + .probe = cros_ec_keyb_probe, + .driver = { + .name = "cros-ec-keyb", + .pm = &cros_ec_keyb_pm_ops, + }, +}; + +module_platform_driver(cros_ec_keyb_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ChromeOS EC keyboard driver"); +MODULE_ALIAS("platform:cros-ec-keyb"); diff --git a/drivers/input/keyboard/lpc32xx-keys.c b/drivers/input/keyboard/lpc32xx-keys.c index 1b8add6cfb9..42181435fe6 100644 --- a/drivers/input/keyboard/lpc32xx-keys.c +++ b/drivers/input/keyboard/lpc32xx-keys.c @@ -144,12 +144,13 @@ static int lpc32xx_parse_dt(struct device *dev, { struct device_node *np = dev->of_node; u32 rows = 0, columns = 0; + int err; - of_property_read_u32(np, "keypad,num-rows", &rows); - of_property_read_u32(np, "keypad,num-columns", &columns); - if (!rows || rows != columns) { - dev_err(dev, - "rows and columns must be specified and be equal!\n"); + err = matrix_keypad_parse_of_params(dev, &rows, &columns); + if (err) + return err; + if (rows != columns) { + dev_err(dev, "rows and columns must be equal!\n"); return -EINVAL; } diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c index e25b022692c..1b289092f4e 100644 --- a/drivers/input/keyboard/omap4-keypad.c +++ b/drivers/input/keyboard/omap4-keypad.c @@ -215,18 +215,12 @@ static int omap4_keypad_parse_dt(struct device *dev, struct omap4_keypad *keypad_data) { struct device_node *np = dev->of_node; + int err; - if (!np) { - dev_err(dev, "missing DT data"); - return -EINVAL; - } - - of_property_read_u32(np, "keypad,num-rows", &keypad_data->rows); - of_property_read_u32(np, "keypad,num-columns", &keypad_data->cols); - if (!keypad_data->rows || !keypad_data->cols) { - dev_err(dev, "number of keypad rows/columns not specified\n"); - return -EINVAL; - } + err = matrix_keypad_parse_of_params(dev, &keypad_data->rows, + &keypad_data->cols); + if (err) + return err; if (of_get_property(np, "linux,input-no-autorepeat", NULL)) keypad_data->no_autorepeat = true; diff --git a/drivers/input/keyboard/tca8418_keypad.c b/drivers/input/keyboard/tca8418_keypad.c index a34cc6714e5..55c15304ddb 100644 --- a/drivers/input/keyboard/tca8418_keypad.c +++ b/drivers/input/keyboard/tca8418_keypad.c @@ -288,8 +288,11 @@ static int tca8418_keypad_probe(struct i2c_client *client, irq_is_gpio = pdata->irq_is_gpio; } else { struct device_node *np = dev->of_node; - of_property_read_u32(np, "keypad,num-rows", &rows); - of_property_read_u32(np, "keypad,num-columns", &cols); + int err; + + err = matrix_keypad_parse_of_params(dev, &rows, &cols); + if (err) + return err; rep = of_property_read_bool(np, "keypad,autorepeat"); } diff --git a/drivers/input/matrix-keymap.c b/drivers/input/matrix-keymap.c index 3ae496ea5fe..08b61f506db 100644 --- a/drivers/input/matrix-keymap.c +++ b/drivers/input/matrix-keymap.c @@ -50,6 +50,26 @@ static bool matrix_keypad_map_key(struct input_dev *input_dev, } #ifdef CONFIG_OF +int matrix_keypad_parse_of_params(struct device *dev, + unsigned int *rows, unsigned int *cols) +{ + struct device_node *np = dev->of_node; + + if (!np) { + dev_err(dev, "missing DT data"); + return -EINVAL; + } + of_property_read_u32(np, "keypad,num-rows", rows); + of_property_read_u32(np, "keypad,num-columns", cols); + if (!*rows || !*cols) { + dev_err(dev, "number of keypad rows/columns not specified\n"); + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(matrix_keypad_parse_of_params); + static int matrix_keypad_parse_of_keymap(const char *propname, unsigned int rows, unsigned int cols, struct input_dev *input_dev) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 830183737b0..21d02b0d907 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -46,6 +46,7 @@ #include "amd_iommu_proto.h" #include "amd_iommu_types.h" #include "irq_remapping.h" +#include "pci.h" #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) @@ -263,12 +264,6 @@ static bool check_device(struct device *dev) return true; } -static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to) -{ - pci_dev_put(*from); - *from = to; -} - static struct pci_bus *find_hosted_bus(struct pci_bus *bus) { while (!bus->self) { @@ -701,9 +696,6 @@ retry: static void iommu_poll_events(struct amd_iommu *iommu) { u32 head, tail; - unsigned long flags; - - spin_lock_irqsave(&iommu->lock, flags); head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); @@ -714,8 +706,6 @@ static void iommu_poll_events(struct amd_iommu *iommu) } writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); - - spin_unlock_irqrestore(&iommu->lock, flags); } static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw) @@ -740,17 +730,11 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw) static void iommu_poll_ppr_log(struct amd_iommu *iommu) { - unsigned long flags; u32 head, tail; if (iommu->ppr_log == NULL) return; - /* enable ppr interrupts again */ - writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET); - - spin_lock_irqsave(&iommu->lock, flags); - head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); @@ -786,34 +770,50 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu) head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); - /* - * Release iommu->lock because ppr-handling might need to - * re-acquire it - */ - spin_unlock_irqrestore(&iommu->lock, flags); - /* Handle PPR entry */ iommu_handle_ppr_entry(iommu, entry); - spin_lock_irqsave(&iommu->lock, flags); - /* Refresh ring-buffer information */ head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); } - - spin_unlock_irqrestore(&iommu->lock, flags); } irqreturn_t amd_iommu_int_thread(int irq, void *data) { - struct amd_iommu *iommu; + struct amd_iommu *iommu = (struct amd_iommu *) data; + u32 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); - for_each_iommu(iommu) { - iommu_poll_events(iommu); - iommu_poll_ppr_log(iommu); - } + while (status & (MMIO_STATUS_EVT_INT_MASK | MMIO_STATUS_PPR_INT_MASK)) { + /* Enable EVT and PPR interrupts again */ + writel((MMIO_STATUS_EVT_INT_MASK | MMIO_STATUS_PPR_INT_MASK), + iommu->mmio_base + MMIO_STATUS_OFFSET); + if (status & MMIO_STATUS_EVT_INT_MASK) { + pr_devel("AMD-Vi: Processing IOMMU Event Log\n"); + iommu_poll_events(iommu); + } + + if (status & MMIO_STATUS_PPR_INT_MASK) { + pr_devel("AMD-Vi: Processing IOMMU PPR Log\n"); + iommu_poll_ppr_log(iommu); + } + + /* + * Hardware bug: ERBT1312 + * When re-enabling interrupt (by writing 1 + * to clear the bit), the hardware might also try to set + * the interrupt bit in the event status register. + * In this scenario, the bit will be set, and disable + * subsequent interrupts. + * + * Workaround: The IOMMU driver should read back the + * status register and check if the interrupt bits are cleared. + * If not, driver will need to go through the interrupt handler + * again and re-clear the bits + */ + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + } return IRQ_HANDLED; } @@ -2839,24 +2839,6 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, } /* - * This is a special map_sg function which is used if we should map a - * device which is not handled by an AMD IOMMU in the system. - */ -static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, - int nelems, int dir) -{ - struct scatterlist *s; - int i; - - for_each_sg(sglist, s, nelems, i) { - s->dma_address = (dma_addr_t)sg_phys(s); - s->dma_length = s->length; - } - - return nelems; -} - -/* * The exported map_sg function for dma_ops (handles scatter-gather * lists). */ @@ -2875,9 +2857,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, INC_STATS_COUNTER(cnt_map_sg); domain = get_domain(dev); - if (PTR_ERR(domain) == -EINVAL) - return map_sg_no_iommu(dev, sglist, nelems, dir); - else if (IS_ERR(domain)) + if (IS_ERR(domain)) return 0; dma_mask = *dev->dma_mask; @@ -3410,7 +3390,7 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, } static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, - unsigned long iova) + dma_addr_t iova) { struct protection_domain *domain = dom->priv; unsigned long offset_mask; @@ -3947,6 +3927,9 @@ static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic) if (!table) goto out; + /* Initialize table spin-lock */ + spin_lock_init(&table->lock); + if (ioapic) /* Keep the first 32 indexes free for IOAPIC interrupts */ table->min_index = 32; @@ -4007,7 +3990,7 @@ static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count) c = 0; if (c == count) { - struct irq_2_iommu *irte_info; + struct irq_2_irte *irte_info; for (; c != 0; --c) table->table[index - c + 1] = IRTE_ALLOCATED; @@ -4015,9 +3998,9 @@ static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count) index -= count - 1; cfg->remapped = 1; - irte_info = &cfg->irq_2_iommu; - irte_info->sub_handle = devid; - irte_info->irte_index = index; + irte_info = &cfg->irq_2_irte; + irte_info->devid = devid; + irte_info->index = index; goto out; } @@ -4098,7 +4081,7 @@ static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, struct io_apic_irq_attr *attr) { struct irq_remap_table *table; - struct irq_2_iommu *irte_info; + struct irq_2_irte *irte_info; struct irq_cfg *cfg; union irte irte; int ioapic_id; @@ -4110,7 +4093,7 @@ static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, if (!cfg) return -EINVAL; - irte_info = &cfg->irq_2_iommu; + irte_info = &cfg->irq_2_irte; ioapic_id = mpc_ioapic_id(attr->ioapic); devid = get_ioapic_devid(ioapic_id); @@ -4125,8 +4108,8 @@ static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, /* Setup IRQ remapping info */ cfg->remapped = 1; - irte_info->sub_handle = devid; - irte_info->irte_index = index; + irte_info->devid = devid; + irte_info->index = index; /* Setup IRTE for IOMMU */ irte.val = 0; @@ -4160,7 +4143,7 @@ static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, static int set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - struct irq_2_iommu *irte_info; + struct irq_2_irte *irte_info; unsigned int dest, irq; struct irq_cfg *cfg; union irte irte; @@ -4171,12 +4154,12 @@ static int set_affinity(struct irq_data *data, const struct cpumask *mask, cfg = data->chip_data; irq = data->irq; - irte_info = &cfg->irq_2_iommu; + irte_info = &cfg->irq_2_irte; if (!cpumask_intersects(mask, cpu_online_mask)) return -EINVAL; - if (get_irte(irte_info->sub_handle, irte_info->irte_index, &irte)) + if (get_irte(irte_info->devid, irte_info->index, &irte)) return -EBUSY; if (assign_irq_vector(irq, cfg, mask)) @@ -4192,7 +4175,7 @@ static int set_affinity(struct irq_data *data, const struct cpumask *mask, irte.fields.vector = cfg->vector; irte.fields.destination = dest; - modify_irte(irte_info->sub_handle, irte_info->irte_index, irte); + modify_irte(irte_info->devid, irte_info->index, irte); if (cfg->move_in_progress) send_cleanup_vector(cfg); @@ -4204,16 +4187,16 @@ static int set_affinity(struct irq_data *data, const struct cpumask *mask, static int free_irq(int irq) { - struct irq_2_iommu *irte_info; + struct irq_2_irte *irte_info; struct irq_cfg *cfg; cfg = irq_get_chip_data(irq); if (!cfg) return -EINVAL; - irte_info = &cfg->irq_2_iommu; + irte_info = &cfg->irq_2_irte; - free_irte(irte_info->sub_handle, irte_info->irte_index); + free_irte(irte_info->devid, irte_info->index); return 0; } @@ -4222,7 +4205,7 @@ static void compose_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id) { - struct irq_2_iommu *irte_info; + struct irq_2_irte *irte_info; struct irq_cfg *cfg; union irte irte; @@ -4230,7 +4213,7 @@ static void compose_msi_msg(struct pci_dev *pdev, if (!cfg) return; - irte_info = &cfg->irq_2_iommu; + irte_info = &cfg->irq_2_irte; irte.val = 0; irte.fields.vector = cfg->vector; @@ -4239,11 +4222,11 @@ static void compose_msi_msg(struct pci_dev *pdev, irte.fields.dm = apic->irq_dest_mode; irte.fields.valid = 1; - modify_irte(irte_info->sub_handle, irte_info->irte_index, irte); + modify_irte(irte_info->devid, irte_info->index, irte); msg->address_hi = MSI_ADDR_BASE_HI; msg->address_lo = MSI_ADDR_BASE_LO; - msg->data = irte_info->irte_index; + msg->data = irte_info->index; } static int msi_alloc_irq(struct pci_dev *pdev, int irq, int nvec) @@ -4268,7 +4251,7 @@ static int msi_alloc_irq(struct pci_dev *pdev, int irq, int nvec) static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq, int index, int offset) { - struct irq_2_iommu *irte_info; + struct irq_2_irte *irte_info; struct irq_cfg *cfg; u16 devid; @@ -4283,18 +4266,18 @@ static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq, return 0; devid = get_device_id(&pdev->dev); - irte_info = &cfg->irq_2_iommu; + irte_info = &cfg->irq_2_irte; cfg->remapped = 1; - irte_info->sub_handle = devid; - irte_info->irte_index = index + offset; + irte_info->devid = devid; + irte_info->index = index + offset; return 0; } static int setup_hpet_msi(unsigned int irq, unsigned int id) { - struct irq_2_iommu *irte_info; + struct irq_2_irte *irte_info; struct irq_cfg *cfg; int index, devid; @@ -4302,7 +4285,7 @@ static int setup_hpet_msi(unsigned int irq, unsigned int id) if (!cfg) return -EINVAL; - irte_info = &cfg->irq_2_iommu; + irte_info = &cfg->irq_2_irte; devid = get_hpet_devid(id); if (devid < 0) return devid; @@ -4312,8 +4295,8 @@ static int setup_hpet_msi(unsigned int irq, unsigned int id) return index; cfg->remapped = 1; - irte_info->sub_handle = devid; - irte_info->irte_index = index; + irte_info->devid = devid; + irte_info->index = index; return 0; } diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 2f46881256a..bf51abb78de 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -213,6 +213,14 @@ enum iommu_init_state { IOMMU_INIT_ERROR, }; +/* Early ioapic and hpet maps from kernel command line */ +#define EARLY_MAP_SIZE 4 +static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE]; +static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE]; +static int __initdata early_ioapic_map_size; +static int __initdata early_hpet_map_size; +static bool __initdata cmdline_maps; + static enum iommu_init_state init_state = IOMMU_START_STATE; static int amd_iommu_enable_interrupts(void); @@ -703,31 +711,66 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, set_iommu_for_device(iommu, devid); } -static int add_special_device(u8 type, u8 id, u16 devid) +static int __init add_special_device(u8 type, u8 id, u16 devid, bool cmd_line) { struct devid_map *entry; struct list_head *list; - if (type != IVHD_SPECIAL_IOAPIC && type != IVHD_SPECIAL_HPET) + if (type == IVHD_SPECIAL_IOAPIC) + list = &ioapic_map; + else if (type == IVHD_SPECIAL_HPET) + list = &hpet_map; + else return -EINVAL; + list_for_each_entry(entry, list, list) { + if (!(entry->id == id && entry->cmd_line)) + continue; + + pr_info("AMD-Vi: Command-line override present for %s id %d - ignoring\n", + type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id); + + return 0; + } + entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; - entry->id = id; - entry->devid = devid; - - if (type == IVHD_SPECIAL_IOAPIC) - list = &ioapic_map; - else - list = &hpet_map; + entry->id = id; + entry->devid = devid; + entry->cmd_line = cmd_line; list_add_tail(&entry->list, list); return 0; } +static int __init add_early_maps(void) +{ + int i, ret; + + for (i = 0; i < early_ioapic_map_size; ++i) { + ret = add_special_device(IVHD_SPECIAL_IOAPIC, + early_ioapic_map[i].id, + early_ioapic_map[i].devid, + early_ioapic_map[i].cmd_line); + if (ret) + return ret; + } + + for (i = 0; i < early_hpet_map_size; ++i) { + ret = add_special_device(IVHD_SPECIAL_HPET, + early_hpet_map[i].id, + early_hpet_map[i].devid, + early_hpet_map[i].cmd_line); + if (ret) + return ret; + } + + return 0; +} + /* * Reads the device exclusion range from ACPI and initializes the IOMMU with * it @@ -764,6 +807,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, u32 dev_i, ext_flags = 0; bool alias = false; struct ivhd_entry *e; + int ret; + + + ret = add_early_maps(); + if (ret) + return ret; /* * First save the recommended feature enable bits from ACPI @@ -929,7 +978,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, PCI_FUNC(devid)); set_dev_entry_from_acpi(iommu, devid, e->flags, 0); - ret = add_special_device(type, handle, devid); + ret = add_special_device(type, handle, devid, false); if (ret) return ret; break; @@ -1275,7 +1324,7 @@ static int iommu_setup_msi(struct amd_iommu *iommu) amd_iommu_int_handler, amd_iommu_int_thread, 0, "AMD-Vi", - iommu->dev); + iommu); if (r) { pci_disable_msi(iommu->dev); @@ -1638,18 +1687,28 @@ static void __init free_on_init_error(void) static bool __init check_ioapic_information(void) { + const char *fw_bug = FW_BUG; bool ret, has_sb_ioapic; int idx; has_sb_ioapic = false; ret = false; + /* + * If we have map overrides on the kernel command line the + * messages in this function might not describe firmware bugs + * anymore - so be careful + */ + if (cmdline_maps) + fw_bug = ""; + for (idx = 0; idx < nr_ioapics; idx++) { int devid, id = mpc_ioapic_id(idx); devid = get_ioapic_devid(id); if (devid < 0) { - pr_err(FW_BUG "AMD-Vi: IOAPIC[%d] not in IVRS table\n", id); + pr_err("%sAMD-Vi: IOAPIC[%d] not in IVRS table\n", + fw_bug, id); ret = false; } else if (devid == IOAPIC_SB_DEVID) { has_sb_ioapic = true; @@ -1666,11 +1725,11 @@ static bool __init check_ioapic_information(void) * when the BIOS is buggy and provides us the wrong * device id for the IOAPIC in the system. */ - pr_err(FW_BUG "AMD-Vi: No southbridge IOAPIC found in IVRS table\n"); + pr_err("%sAMD-Vi: No southbridge IOAPIC found\n", fw_bug); } if (!ret) - pr_err("AMD-Vi: Disabling interrupt remapping due to BIOS Bug(s)\n"); + pr_err("AMD-Vi: Disabling interrupt remapping\n"); return ret; } @@ -1801,6 +1860,7 @@ static int __init early_amd_iommu_init(void) * Interrupt remapping enabled, create kmem_cache for the * remapping tables. */ + ret = -ENOMEM; amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache", MAX_IRQS_PER_TABLE * sizeof(u32), IRQ_TABLE_ALIGNMENT, @@ -2097,8 +2157,70 @@ static int __init parse_amd_iommu_options(char *str) return 1; } -__setup("amd_iommu_dump", parse_amd_iommu_dump); -__setup("amd_iommu=", parse_amd_iommu_options); +static int __init parse_ivrs_ioapic(char *str) +{ + unsigned int bus, dev, fn; + int ret, id, i; + u16 devid; + + ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); + + if (ret != 4) { + pr_err("AMD-Vi: Invalid command line: ivrs_ioapic%s\n", str); + return 1; + } + + if (early_ioapic_map_size == EARLY_MAP_SIZE) { + pr_err("AMD-Vi: Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n", + str); + return 1; + } + + devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7); + + cmdline_maps = true; + i = early_ioapic_map_size++; + early_ioapic_map[i].id = id; + early_ioapic_map[i].devid = devid; + early_ioapic_map[i].cmd_line = true; + + return 1; +} + +static int __init parse_ivrs_hpet(char *str) +{ + unsigned int bus, dev, fn; + int ret, id, i; + u16 devid; + + ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); + + if (ret != 4) { + pr_err("AMD-Vi: Invalid command line: ivrs_hpet%s\n", str); + return 1; + } + + if (early_hpet_map_size == EARLY_MAP_SIZE) { + pr_err("AMD-Vi: Early HPET map overflow - ignoring ivrs_hpet%s\n", + str); + return 1; + } + + devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7); + + cmdline_maps = true; + i = early_hpet_map_size++; + early_hpet_map[i].id = id; + early_hpet_map[i].devid = devid; + early_hpet_map[i].cmd_line = true; + + return 1; +} + +__setup("amd_iommu_dump", parse_amd_iommu_dump); +__setup("amd_iommu=", parse_amd_iommu_options); +__setup("ivrs_ioapic", parse_ivrs_ioapic); +__setup("ivrs_hpet", parse_ivrs_hpet); IOMMU_INIT_FINISH(amd_iommu_detect, gart_iommu_hole_init, diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index ec36cf63e0c..0285a215df1 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -100,6 +100,7 @@ #define PASID_MASK 0x000fffff /* MMIO status bits */ +#define MMIO_STATUS_EVT_INT_MASK (1 << 1) #define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2) #define MMIO_STATUS_PPR_INT_MASK (1 << 6) @@ -589,6 +590,7 @@ struct devid_map { struct list_head list; u8 id; u16 devid; + bool cmd_line; }; /* Map HPET and IOAPIC ids to the devid used by the IOMMU */ diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index b8008f679bc..a7967ceb79e 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -646,7 +646,7 @@ out: int alloc_iommu(struct dmar_drhd_unit *drhd) { struct intel_iommu *iommu; - u32 ver; + u32 ver, sts; static int iommu_allocated = 0; int agaw = 0; int msagaw = 0; @@ -696,6 +696,15 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) (unsigned long long)iommu->cap, (unsigned long long)iommu->ecap); + /* Reflect status in gcmd */ + sts = readl(iommu->reg + DMAR_GSTS_REG); + if (sts & DMA_GSTS_IRES) + iommu->gcmd |= DMA_GCMD_IRE; + if (sts & DMA_GSTS_TES) + iommu->gcmd |= DMA_GCMD_TE; + if (sts & DMA_GSTS_QIES) + iommu->gcmd |= DMA_GCMD_QIE; + raw_spin_lock_init(&iommu->register_lock); drhd->iommu = iommu; @@ -1205,7 +1214,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id) /* TBD: ignore advanced fault log currently */ if (!(fault_status & DMA_FSTS_PPF)) - goto clear_rest; + goto unlock_exit; fault_index = dma_fsts_fault_record_index(fault_status); reg = cap_fault_reg_offset(iommu->cap); @@ -1246,11 +1255,10 @@ irqreturn_t dmar_fault(int irq, void *dev_id) fault_index = 0; raw_spin_lock_irqsave(&iommu->register_lock, flag); } -clear_rest: - /* clear all the other faults */ - fault_status = readl(iommu->reg + DMAR_FSTS_REG); - writel(fault_status, iommu->reg + DMAR_FSTS_REG); + writel(DMA_FSTS_PFO | DMA_FSTS_PPF, iommu->reg + DMAR_FSTS_REG); + +unlock_exit: raw_spin_unlock_irqrestore(&iommu->register_lock, flag); return IRQ_HANDLED; } @@ -1298,6 +1306,7 @@ int __init enable_drhd_fault_handling(void) for_each_drhd_unit(drhd) { int ret; struct intel_iommu *iommu = drhd->iommu; + u32 fault_status; ret = dmar_set_interrupt(iommu); if (ret) { @@ -1310,6 +1319,8 @@ int __init enable_drhd_fault_handling(void) * Clear any previous faults. */ dmar_fault(iommu->irq, iommu); + fault_status = readl(iommu->reg + DMAR_FSTS_REG); + writel(fault_status, iommu->reg + DMAR_FSTS_REG); } return 0; diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 238a3caa949..3f32d64ab87 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1027,7 +1027,7 @@ done: } static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) + dma_addr_t iova) { struct exynos_iommu_domain *priv = domain->priv; unsigned long *entry; diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0099667a397..b4f0e28dfa4 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -47,6 +47,7 @@ #include <asm/iommu.h> #include "irq_remapping.h" +#include "pci.h" #define ROOT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE @@ -3665,6 +3666,7 @@ static struct notifier_block device_nb = { int __init intel_iommu_init(void) { int ret = 0; + struct dmar_drhd_unit *drhd; /* VT-d is required for a TXT/tboot launch, so enforce that */ force_on = tboot_force_iommu(); @@ -3675,6 +3677,20 @@ int __init intel_iommu_init(void) return -ENODEV; } + /* + * Disable translation if already enabled prior to OS handover. + */ + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu; + + if (drhd->ignored) + continue; + + iommu = drhd->iommu; + if (iommu->gcmd & DMA_GCMD_TE) + iommu_disable_translation(iommu); + } + if (dmar_dev_scope_init() < 0) { if (force_on) panic("tboot: Failed to initialize DMAR device scope\n"); @@ -4111,7 +4127,7 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, } static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) + dma_addr_t iova) { struct dmar_domain *dmar_domain = domain->priv; struct dma_pte *pte; @@ -4137,12 +4153,6 @@ static int intel_iommu_domain_has_cap(struct iommu_domain *domain, return 0; } -static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to) -{ - pci_dev_put(*from); - *from = to; -} - #define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) static int intel_iommu_add_device(struct device *dev) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index f3b8f23b5d8..5b19b2d6ec2 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -524,6 +524,16 @@ static int __init intel_irq_remapping_supported(void) if (disable_irq_remap) return 0; + if (irq_remap_broken) { + WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND, + "This system BIOS has enabled interrupt remapping\n" + "on a chipset that contains an erratum making that\n" + "feature unstable. To maintain system stability\n" + "interrupt remapping is being disabled. Please\n" + "contact your BIOS vendor for an update\n"); + disable_irq_remap = 1; + return 0; + } if (!dmar_ir_support()) return 0; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index b972d430d92..d8f98b14e2f 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -204,6 +204,35 @@ again: } EXPORT_SYMBOL_GPL(iommu_group_alloc); +struct iommu_group *iommu_group_get_by_id(int id) +{ + struct kobject *group_kobj; + struct iommu_group *group; + const char *name; + + if (!iommu_group_kset) + return NULL; + + name = kasprintf(GFP_KERNEL, "%d", id); + if (!name) + return NULL; + + group_kobj = kset_find_obj(iommu_group_kset, name); + kfree(name); + + if (!group_kobj) + return NULL; + + group = container_of(group_kobj, struct iommu_group, kobj); + BUG_ON(group->id != id); + + kobject_get(group->devices_kobj); + kobject_put(&group->kobj); + + return group; +} +EXPORT_SYMBOL_GPL(iommu_group_get_by_id); + /** * iommu_group_get_iommudata - retrieve iommu_data registered for a group * @group: the group @@ -706,8 +735,7 @@ void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) } EXPORT_SYMBOL_GPL(iommu_detach_group); -phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) +phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { if (unlikely(domain->ops->iova_to_phys == NULL)) return 0; @@ -854,12 +882,13 @@ EXPORT_SYMBOL_GPL(iommu_unmap); int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, - phys_addr_t paddr, u64 size) + phys_addr_t paddr, u64 size, int prot) { if (unlikely(domain->ops->domain_window_enable == NULL)) return -ENODEV; - return domain->ops->domain_window_enable(domain, wnd_nr, paddr, size); + return domain->ops->domain_window_enable(domain, wnd_nr, paddr, size, + prot); } EXPORT_SYMBOL_GPL(iommu_domain_window_enable); diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 7c11ff368d0..dcfea4e39be 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -18,6 +18,7 @@ int irq_remapping_enabled; int disable_irq_remap; +int irq_remap_broken; int disable_sourceid_checking; int no_x2apic_optout; @@ -210,6 +211,11 @@ void __init setup_irq_remapping_ops(void) #endif } +void set_irq_remapping_broken(void) +{ + irq_remap_broken = 1; +} + int irq_remapping_supported(void) { if (disable_irq_remap) diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index ecb63767040..90c4dae5a46 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -32,6 +32,7 @@ struct pci_dev; struct msi_msg; extern int disable_irq_remap; +extern int irq_remap_broken; extern int disable_sourceid_checking; extern int no_x2apic_optout; extern int irq_remapping_enabled; @@ -89,6 +90,7 @@ extern struct irq_remap_ops amd_iommu_irq_ops; #define irq_remapping_enabled 0 #define disable_irq_remap 1 +#define irq_remap_broken 0 #endif /* CONFIG_IRQ_REMAP */ diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 6a8870a3166..8ab4f41090a 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -554,7 +554,7 @@ fail: } static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long va) + dma_addr_t va) { struct msm_priv *priv; struct msm_iommu_drvdata *iommu_drvdata; diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 6ac02fa5910..e02e5d71745 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1219,7 +1219,7 @@ static void omap_iommu_domain_destroy(struct iommu_domain *domain) } static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long da) + dma_addr_t da) { struct omap_iommu_domain *omap_domain = domain->priv; struct omap_iommu *oiommu = omap_domain->iommu_dev; diff --git a/drivers/iommu/pci.h b/drivers/iommu/pci.h new file mode 100644 index 00000000000..352d80ae744 --- /dev/null +++ b/drivers/iommu/pci.h @@ -0,0 +1,29 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2013 Red Hat, Inc. + * Copyright (C) 2013 Freescale Semiconductor, Inc. + * + */ +#ifndef __IOMMU_PCI_H +#define __IOMMU_PCI_H + +/* Helper function for swapping pci device reference */ +static inline void swap_pci_ref(struct pci_dev **from, struct pci_dev *to) +{ + pci_dev_put(*from); + *from = to; +} + +#endif /* __IOMMU_PCI_H */ diff --git a/drivers/iommu/shmobile-iommu.c b/drivers/iommu/shmobile-iommu.c index b6e8b57cf0a..d572863dfcc 100644 --- a/drivers/iommu/shmobile-iommu.c +++ b/drivers/iommu/shmobile-iommu.c @@ -296,7 +296,7 @@ done: } static phys_addr_t shmobile_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) + dma_addr_t iova) { struct shmobile_iommu_domain *sh_domain = domain->priv; uint32_t l1entry = 0, l2entry = 0; diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 86437575f94..108c0e9c24d 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -279,7 +279,7 @@ static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova, } static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) + dma_addr_t iova) { struct gart_device *gart = domain->priv; unsigned long pte; @@ -295,7 +295,8 @@ static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain, pa = (pte & GART_PAGE_MASK); if (!pfn_valid(__phys_to_pfn(pa))) { - dev_err(gart->dev, "No entry for %08lx:%08x\n", iova, pa); + dev_err(gart->dev, "No entry for %08llx:%08x\n", + (unsigned long long)iova, pa); gart_dump_table(gart); return -EINVAL; } diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index b34e5fd7fd9..f6f120e2540 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -757,7 +757,7 @@ static size_t smmu_iommu_unmap(struct iommu_domain *domain, unsigned long iova, } static phys_addr_t smmu_iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) + dma_addr_t iova) { struct smmu_as *as = domain->priv; unsigned long *pte; @@ -772,7 +772,8 @@ static phys_addr_t smmu_iommu_iova_to_phys(struct iommu_domain *domain, pfn = *pte & SMMU_PFN_MASK; WARN_ON(!pfn_valid(pfn)); dev_dbg(as->smmu->dev, - "iova:%08lx pfn:%08lx asid:%d\n", iova, pfn, as->asid); + "iova:%08llx pfn:%08lx asid:%d\n", (unsigned long long)iova, + pfn, as->asid); spin_unlock_irqrestore(&as->lock, flags); return PFN_PHYS(pfn); diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index ec50824c02e..d44806d41b4 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -194,8 +194,8 @@ config LEDS_LP3944 module will be called leds-lp3944. config LEDS_LP55XX_COMMON - tristate "Common Driver for TI/National LP5521 and LP5523/55231" - depends on LEDS_LP5521 || LEDS_LP5523 + tristate "Common Driver for TI/National LP5521, LP5523/55231 and LP5562" + depends on LEDS_LP5521 || LEDS_LP5523 || LEDS_LP5562 select FW_LOADER help This option supports common operations for LP5521 and LP5523/55231 @@ -222,6 +222,16 @@ config LEDS_LP5523 Driver provides direct control via LED class and interface for programming the engines. +config LEDS_LP5562 + tristate "LED Support for TI LP5562 LED driver chip" + depends on LEDS_CLASS && I2C + select LEDS_LP55XX_COMMON + help + If you say yes here you get support for TI LP5562 LED driver. + It is 4 channels chip with programmable engines. + Driver provides direct control via LED class and interface for + programming the engines. + config LEDS_LP8788 tristate "LED support for the TI LP8788 PMIC" depends on LEDS_CLASS @@ -469,106 +479,7 @@ config LEDS_BLINKM This option enables support for the BlinkM RGB LED connected through I2C. Say Y to enable support for the BlinkM LED. -config LEDS_TRIGGERS - bool "LED Trigger support" - depends on LEDS_CLASS - help - This option enables trigger support for the leds class. - These triggers allow kernel events to drive the LEDs and can - be configured via sysfs. If unsure, say Y. - comment "LED Triggers" - -config LEDS_TRIGGER_TIMER - tristate "LED Timer Trigger" - depends on LEDS_TRIGGERS - help - This allows LEDs to be controlled by a programmable timer - via sysfs. Some LED hardware can be programmed to start - blinking the LED without any further software interaction. - For more details read Documentation/leds/leds-class.txt. - - If unsure, say Y. - -config LEDS_TRIGGER_ONESHOT - tristate "LED One-shot Trigger" - depends on LEDS_TRIGGERS - help - This allows LEDs to blink in one-shot pulses with parameters - controlled via sysfs. It's useful to notify the user on - sporadic events, when there are no clear begin and end trap points, - or on dense events, where this blinks the LED at constant rate if - rearmed continuously. - - It also shows how to use the led_blink_set_oneshot() function. - - If unsure, say Y. - -config LEDS_TRIGGER_IDE_DISK - bool "LED IDE Disk Trigger" - depends on IDE_GD_ATA - depends on LEDS_TRIGGERS - help - This allows LEDs to be controlled by IDE disk activity. - If unsure, say Y. - -config LEDS_TRIGGER_HEARTBEAT - tristate "LED Heartbeat Trigger" - depends on LEDS_TRIGGERS - help - This allows LEDs to be controlled by a CPU load average. - The flash frequency is a hyperbolic function of the 1-minute - load average. - If unsure, say Y. - -config LEDS_TRIGGER_BACKLIGHT - tristate "LED backlight Trigger" - depends on LEDS_TRIGGERS - help - This allows LEDs to be controlled as a backlight device: they - turn off and on when the display is blanked and unblanked. - - If unsure, say N. - -config LEDS_TRIGGER_CPU - bool "LED CPU Trigger" - depends on LEDS_TRIGGERS - help - This allows LEDs to be controlled by active CPUs. This shows - the active CPUs across an array of LEDs so you can see which - CPUs are active on the system at any given moment. - - If unsure, say N. - -config LEDS_TRIGGER_GPIO - tristate "LED GPIO Trigger" - depends on LEDS_TRIGGERS - depends on GPIOLIB - help - This allows LEDs to be controlled by gpio events. It's good - when using gpios as switches and triggering the needed LEDs - from there. One use case is n810's keypad LEDs that could - be triggered by this trigger when user slides up to show - keypad. - - If unsure, say N. - -config LEDS_TRIGGER_DEFAULT_ON - tristate "LED Default ON Trigger" - depends on LEDS_TRIGGERS - help - This allows LEDs to be initialised in the ON state. - If unsure, say Y. - -comment "iptables trigger is under Netfilter config (LED target)" - depends on LEDS_TRIGGERS - -config LEDS_TRIGGER_TRANSIENT - tristate "LED Transient Trigger" - depends on LEDS_TRIGGERS - help - This allows one time activation of a transient state on - GPIO/PWM based hardware. - If unsure, say Y. +source "drivers/leds/trigger/Kconfig" endif # NEW_LEDS diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index 215e7e3b617..ac2897732b0 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_LEDS_LP3944) += leds-lp3944.o obj-$(CONFIG_LEDS_LP55XX_COMMON) += leds-lp55xx-common.o obj-$(CONFIG_LEDS_LP5521) += leds-lp5521.o obj-$(CONFIG_LEDS_LP5523) += leds-lp5523.o +obj-$(CONFIG_LEDS_LP5562) += leds-lp5562.o obj-$(CONFIG_LEDS_LP8788) += leds-lp8788.o obj-$(CONFIG_LEDS_TCA6507) += leds-tca6507.o obj-$(CONFIG_LEDS_CLEVO_MAIL) += leds-clevo-mail.o @@ -57,12 +58,4 @@ obj-$(CONFIG_LEDS_BLINKM) += leds-blinkm.o obj-$(CONFIG_LEDS_DAC124S085) += leds-dac124s085.o # LED Triggers -obj-$(CONFIG_LEDS_TRIGGER_TIMER) += ledtrig-timer.o -obj-$(CONFIG_LEDS_TRIGGER_ONESHOT) += ledtrig-oneshot.o -obj-$(CONFIG_LEDS_TRIGGER_IDE_DISK) += ledtrig-ide-disk.o -obj-$(CONFIG_LEDS_TRIGGER_HEARTBEAT) += ledtrig-heartbeat.o -obj-$(CONFIG_LEDS_TRIGGER_BACKLIGHT) += ledtrig-backlight.o -obj-$(CONFIG_LEDS_TRIGGER_GPIO) += ledtrig-gpio.o -obj-$(CONFIG_LEDS_TRIGGER_CPU) += ledtrig-cpu.o -obj-$(CONFIG_LEDS_TRIGGER_DEFAULT_ON) += ledtrig-default-on.o -obj-$(CONFIG_LEDS_TRIGGER_TRANSIENT) += ledtrig-transient.o +obj-$(CONFIG_LEDS_TRIGGERS) += trigger/ diff --git a/drivers/leds/leds-asic3.c b/drivers/leds/leds-asic3.c index b474745e001..cf9efe421c2 100644 --- a/drivers/leds/leds-asic3.c +++ b/drivers/leds/leds-asic3.c @@ -134,6 +134,7 @@ static int asic3_led_remove(struct platform_device *pdev) return mfd_cell_disable(pdev); } +#ifdef CONFIG_PM_SLEEP static int asic3_led_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -159,11 +160,9 @@ static int asic3_led_resume(struct device *dev) return ret; } +#endif -static const struct dev_pm_ops asic3_led_pm_ops = { - .suspend = asic3_led_suspend, - .resume = asic3_led_resume, -}; +static SIMPLE_DEV_PM_OPS(asic3_led_pm_ops, asic3_led_suspend, asic3_led_resume); static struct platform_driver asic3_led_driver = { .probe = asic3_led_probe, diff --git a/drivers/leds/leds-atmel-pwm.c b/drivers/leds/leds-atmel-pwm.c index 386773532d9..8a39c5b20f7 100644 --- a/drivers/leds/leds-atmel-pwm.c +++ b/drivers/leds/leds-atmel-pwm.c @@ -113,7 +113,7 @@ err: return status; } -static int __exit pwmled_remove(struct platform_device *pdev) +static int pwmled_remove(struct platform_device *pdev) { const struct gpio_led_platform_data *pdata; struct pwmled *leds; @@ -140,7 +140,7 @@ static struct platform_driver pwmled_driver = { }, /* REVISIT add suspend() and resume() methods */ .probe = pwmled_probe, - .remove = __exit_p(pwmled_remove), + .remove = pwmled_remove, }; module_platform_driver(pwmled_driver); diff --git a/drivers/leds/leds-bd2802.c b/drivers/leds/leds-bd2802.c index 851517030cc..2db04231a79 100644 --- a/drivers/leds/leds-bd2802.c +++ b/drivers/leds/leds-bd2802.c @@ -732,7 +732,7 @@ failed_unregister_dev_file: return ret; } -static int __exit bd2802_remove(struct i2c_client *client) +static int bd2802_remove(struct i2c_client *client) { struct bd2802_led *led = i2c_get_clientdata(client); int i; @@ -747,8 +747,7 @@ static int __exit bd2802_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM - +#ifdef CONFIG_PM_SLEEP static void bd2802_restore_state(struct bd2802_led *led) { int i; @@ -785,12 +784,9 @@ static int bd2802_resume(struct device *dev) return 0; } +#endif static SIMPLE_DEV_PM_OPS(bd2802_pm, bd2802_suspend, bd2802_resume); -#define BD2802_PM (&bd2802_pm) -#else /* CONFIG_PM */ -#define BD2802_PM NULL -#endif static const struct i2c_device_id bd2802_id[] = { { "BD2802", 0 }, @@ -801,10 +797,10 @@ MODULE_DEVICE_TABLE(i2c, bd2802_id); static struct i2c_driver bd2802_i2c_driver = { .driver = { .name = "BD2802", - .pm = BD2802_PM, + .pm = &bd2802_pm, }, .probe = bd2802_probe, - .remove = __exit_p(bd2802_remove), + .remove = bd2802_remove, .id_table = bd2802_id, }; diff --git a/drivers/leds/leds-lm355x.c b/drivers/leds/leds-lm355x.c index 4117235ba61..d81a8e7afd6 100644 --- a/drivers/leds/leds-lm355x.c +++ b/drivers/leds/leds-lm355x.c @@ -477,6 +477,7 @@ static int lm355x_probe(struct i2c_client *client, chip->cdev_flash.name = "flash"; chip->cdev_flash.max_brightness = 16; chip->cdev_flash.brightness_set = lm355x_strobe_brightness_set; + chip->cdev_flash.default_trigger = "flash"; err = led_classdev_register((struct device *) &client->dev, &chip->cdev_flash); if (err < 0) @@ -486,6 +487,7 @@ static int lm355x_probe(struct i2c_client *client, chip->cdev_torch.name = "torch"; chip->cdev_torch.max_brightness = 8; chip->cdev_torch.brightness_set = lm355x_torch_brightness_set; + chip->cdev_torch.default_trigger = "torch"; err = led_classdev_register((struct device *) &client->dev, &chip->cdev_torch); if (err < 0) diff --git a/drivers/leds/leds-lm3642.c b/drivers/leds/leds-lm3642.c index 9f428d9dfe9..f361bbef2de 100644 --- a/drivers/leds/leds-lm3642.c +++ b/drivers/leds/leds-lm3642.c @@ -363,6 +363,7 @@ static int lm3642_probe(struct i2c_client *client, chip->cdev_flash.name = "flash"; chip->cdev_flash.max_brightness = 16; chip->cdev_flash.brightness_set = lm3642_strobe_brightness_set; + chip->cdev_flash.default_trigger = "flash"; err = led_classdev_register((struct device *) &client->dev, &chip->cdev_flash); if (err < 0) { @@ -380,6 +381,7 @@ static int lm3642_probe(struct i2c_client *client, chip->cdev_torch.name = "torch"; chip->cdev_torch.max_brightness = 8; chip->cdev_torch.brightness_set = lm3642_torch_brightness_set; + chip->cdev_torch.default_trigger = "torch"; err = led_classdev_register((struct device *) &client->dev, &chip->cdev_torch); if (err < 0) { diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c index 1001347ba70..19752c928aa 100644 --- a/drivers/leds/leds-lp5521.c +++ b/drivers/leds/leds-lp5521.c @@ -68,6 +68,18 @@ #define LP5521_ENABLE_RUN_PROGRAM \ (LP5521_ENABLE_DEFAULT | LP5521_EXEC_RUN) +/* CONFIG register */ +#define LP5521_PWM_HF 0x40 /* PWM: 0 = 256Hz, 1 = 558Hz */ +#define LP5521_PWRSAVE_EN 0x20 /* 1 = Power save mode */ +#define LP5521_CP_MODE_OFF 0 /* Charge pump (CP) off */ +#define LP5521_CP_MODE_BYPASS 8 /* CP forced to bypass mode */ +#define LP5521_CP_MODE_1X5 0x10 /* CP forced to 1.5x mode */ +#define LP5521_CP_MODE_AUTO 0x18 /* Automatic mode selection */ +#define LP5521_R_TO_BATT 0x04 /* R out: 0 = CP, 1 = Vbat */ +#define LP5521_CLK_INT 0x01 /* Internal clock */ +#define LP5521_DEFAULT_CFG \ + (LP5521_PWM_HF | LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO) + /* Status */ #define LP5521_EXT_CLK_USED 0x08 @@ -296,8 +308,11 @@ static int lp5521_post_init_device(struct lp55xx_chip *chip) /* Set all PWMs to direct control mode */ ret = lp55xx_write(chip, LP5521_REG_OP_MODE, LP5521_CMD_DIRECT); - val = chip->pdata->update_config ? - : (LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO | LP5521_R_TO_BATT); + /* Update configuration for the clock setting */ + val = LP5521_DEFAULT_CFG; + if (!lp55xx_is_extclk_used(chip)) + val |= LP5521_CLK_INT; + ret = lp55xx_write(chip, LP5521_REG_CONFIG, val); if (ret) return ret; @@ -360,7 +375,8 @@ static ssize_t lp5521_selftest(struct device *dev, mutex_lock(&chip->lock); ret = lp5521_run_selftest(chip, buf); mutex_unlock(&chip->lock); - return sprintf(buf, "%s\n", ret ? "FAIL" : "OK"); + + return scnprintf(buf, PAGE_SIZE, "%s\n", ret ? "FAIL" : "OK"); } /* device attributes */ diff --git a/drivers/leds/leds-lp5562.c b/drivers/leds/leds-lp5562.c new file mode 100644 index 00000000000..513f2390ca2 --- /dev/null +++ b/drivers/leds/leds-lp5562.c @@ -0,0 +1,599 @@ +/* + * LP5562 LED driver + * + * Copyright (C) 2013 Texas Instruments + * + * Author: Milo(Woogyom) Kim <milo.kim@ti.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/delay.h> +#include <linux/firmware.h> +#include <linux/i2c.h> +#include <linux/init.h> +#include <linux/leds.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/platform_data/leds-lp55xx.h> +#include <linux/slab.h> + +#include "leds-lp55xx-common.h" + +#define LP5562_PROGRAM_LENGTH 32 +#define LP5562_MAX_LEDS 4 + +/* ENABLE Register 00h */ +#define LP5562_REG_ENABLE 0x00 +#define LP5562_EXEC_ENG1_M 0x30 +#define LP5562_EXEC_ENG2_M 0x0C +#define LP5562_EXEC_ENG3_M 0x03 +#define LP5562_EXEC_M 0x3F +#define LP5562_MASTER_ENABLE 0x40 /* Chip master enable */ +#define LP5562_LOGARITHMIC_PWM 0x80 /* Logarithmic PWM adjustment */ +#define LP5562_EXEC_RUN 0x2A +#define LP5562_ENABLE_DEFAULT \ + (LP5562_MASTER_ENABLE | LP5562_LOGARITHMIC_PWM) +#define LP5562_ENABLE_RUN_PROGRAM \ + (LP5562_ENABLE_DEFAULT | LP5562_EXEC_RUN) + +/* OPMODE Register 01h */ +#define LP5562_REG_OP_MODE 0x01 +#define LP5562_MODE_ENG1_M 0x30 +#define LP5562_MODE_ENG2_M 0x0C +#define LP5562_MODE_ENG3_M 0x03 +#define LP5562_LOAD_ENG1 0x10 +#define LP5562_LOAD_ENG2 0x04 +#define LP5562_LOAD_ENG3 0x01 +#define LP5562_RUN_ENG1 0x20 +#define LP5562_RUN_ENG2 0x08 +#define LP5562_RUN_ENG3 0x02 +#define LP5562_ENG1_IS_LOADING(mode) \ + ((mode & LP5562_MODE_ENG1_M) == LP5562_LOAD_ENG1) +#define LP5562_ENG2_IS_LOADING(mode) \ + ((mode & LP5562_MODE_ENG2_M) == LP5562_LOAD_ENG2) +#define LP5562_ENG3_IS_LOADING(mode) \ + ((mode & LP5562_MODE_ENG3_M) == LP5562_LOAD_ENG3) + +/* BRIGHTNESS Registers */ +#define LP5562_REG_R_PWM 0x04 +#define LP5562_REG_G_PWM 0x03 +#define LP5562_REG_B_PWM 0x02 +#define LP5562_REG_W_PWM 0x0E + +/* CURRENT Registers */ +#define LP5562_REG_R_CURRENT 0x07 +#define LP5562_REG_G_CURRENT 0x06 +#define LP5562_REG_B_CURRENT 0x05 +#define LP5562_REG_W_CURRENT 0x0F + +/* CONFIG Register 08h */ +#define LP5562_REG_CONFIG 0x08 +#define LP5562_PWM_HF 0x40 +#define LP5562_PWRSAVE_EN 0x20 +#define LP5562_CLK_INT 0x01 /* Internal clock */ +#define LP5562_DEFAULT_CFG (LP5562_PWM_HF | LP5562_PWRSAVE_EN) + +/* RESET Register 0Dh */ +#define LP5562_REG_RESET 0x0D +#define LP5562_RESET 0xFF + +/* PROGRAM ENGINE Registers */ +#define LP5562_REG_PROG_MEM_ENG1 0x10 +#define LP5562_REG_PROG_MEM_ENG2 0x30 +#define LP5562_REG_PROG_MEM_ENG3 0x50 + +/* LEDMAP Register 70h */ +#define LP5562_REG_ENG_SEL 0x70 +#define LP5562_ENG_SEL_PWM 0 +#define LP5562_ENG_FOR_RGB_M 0x3F +#define LP5562_ENG_SEL_RGB 0x1B /* R:ENG1, G:ENG2, B:ENG3 */ +#define LP5562_ENG_FOR_W_M 0xC0 +#define LP5562_ENG1_FOR_W 0x40 /* W:ENG1 */ +#define LP5562_ENG2_FOR_W 0x80 /* W:ENG2 */ +#define LP5562_ENG3_FOR_W 0xC0 /* W:ENG3 */ + +/* Program Commands */ +#define LP5562_CMD_DISABLE 0x00 +#define LP5562_CMD_LOAD 0x15 +#define LP5562_CMD_RUN 0x2A +#define LP5562_CMD_DIRECT 0x3F +#define LP5562_PATTERN_OFF 0 + +static inline void lp5562_wait_opmode_done(void) +{ + /* operation mode change needs to be longer than 153 us */ + usleep_range(200, 300); +} + +static inline void lp5562_wait_enable_done(void) +{ + /* it takes more 488 us to update ENABLE register */ + usleep_range(500, 600); +} + +static void lp5562_set_led_current(struct lp55xx_led *led, u8 led_current) +{ + u8 addr[] = { + LP5562_REG_R_CURRENT, + LP5562_REG_G_CURRENT, + LP5562_REG_B_CURRENT, + LP5562_REG_W_CURRENT, + }; + + led->led_current = led_current; + lp55xx_write(led->chip, addr[led->chan_nr], led_current); +} + +static void lp5562_load_engine(struct lp55xx_chip *chip) +{ + enum lp55xx_engine_index idx = chip->engine_idx; + u8 mask[] = { + [LP55XX_ENGINE_1] = LP5562_MODE_ENG1_M, + [LP55XX_ENGINE_2] = LP5562_MODE_ENG2_M, + [LP55XX_ENGINE_3] = LP5562_MODE_ENG3_M, + }; + + u8 val[] = { + [LP55XX_ENGINE_1] = LP5562_LOAD_ENG1, + [LP55XX_ENGINE_2] = LP5562_LOAD_ENG2, + [LP55XX_ENGINE_3] = LP5562_LOAD_ENG3, + }; + + lp55xx_update_bits(chip, LP5562_REG_OP_MODE, mask[idx], val[idx]); + + lp5562_wait_opmode_done(); +} + +static void lp5562_stop_engine(struct lp55xx_chip *chip) +{ + lp55xx_write(chip, LP5562_REG_OP_MODE, LP5562_CMD_DISABLE); + lp5562_wait_opmode_done(); +} + +static void lp5562_run_engine(struct lp55xx_chip *chip, bool start) +{ + int ret; + u8 mode; + u8 exec; + + /* stop engine */ + if (!start) { + lp55xx_write(chip, LP5562_REG_ENABLE, LP5562_ENABLE_DEFAULT); + lp5562_wait_enable_done(); + lp5562_stop_engine(chip); + lp55xx_write(chip, LP5562_REG_ENG_SEL, LP5562_ENG_SEL_PWM); + lp55xx_write(chip, LP5562_REG_OP_MODE, LP5562_CMD_DIRECT); + lp5562_wait_opmode_done(); + return; + } + + /* + * To run the engine, + * operation mode and enable register should updated at the same time + */ + + ret = lp55xx_read(chip, LP5562_REG_OP_MODE, &mode); + if (ret) + return; + + ret = lp55xx_read(chip, LP5562_REG_ENABLE, &exec); + if (ret) + return; + + /* change operation mode to RUN only when each engine is loading */ + if (LP5562_ENG1_IS_LOADING(mode)) { + mode = (mode & ~LP5562_MODE_ENG1_M) | LP5562_RUN_ENG1; + exec = (exec & ~LP5562_EXEC_ENG1_M) | LP5562_RUN_ENG1; + } + + if (LP5562_ENG2_IS_LOADING(mode)) { + mode = (mode & ~LP5562_MODE_ENG2_M) | LP5562_RUN_ENG2; + exec = (exec & ~LP5562_EXEC_ENG2_M) | LP5562_RUN_ENG2; + } + + if (LP5562_ENG3_IS_LOADING(mode)) { + mode = (mode & ~LP5562_MODE_ENG3_M) | LP5562_RUN_ENG3; + exec = (exec & ~LP5562_EXEC_ENG3_M) | LP5562_RUN_ENG3; + } + + lp55xx_write(chip, LP5562_REG_OP_MODE, mode); + lp5562_wait_opmode_done(); + + lp55xx_update_bits(chip, LP5562_REG_ENABLE, LP5562_EXEC_M, exec); + lp5562_wait_enable_done(); +} + +static int lp5562_update_firmware(struct lp55xx_chip *chip, + const u8 *data, size_t size) +{ + enum lp55xx_engine_index idx = chip->engine_idx; + u8 pattern[LP5562_PROGRAM_LENGTH] = {0}; + u8 addr[] = { + [LP55XX_ENGINE_1] = LP5562_REG_PROG_MEM_ENG1, + [LP55XX_ENGINE_2] = LP5562_REG_PROG_MEM_ENG2, + [LP55XX_ENGINE_3] = LP5562_REG_PROG_MEM_ENG3, + }; + unsigned cmd; + char c[3]; + int program_size; + int nrchars; + int offset = 0; + int ret; + int i; + + /* clear program memory before updating */ + for (i = 0; i < LP5562_PROGRAM_LENGTH; i++) + lp55xx_write(chip, addr[idx] + i, 0); + + i = 0; + while ((offset < size - 1) && (i < LP5562_PROGRAM_LENGTH)) { + /* separate sscanfs because length is working only for %s */ + ret = sscanf(data + offset, "%2s%n ", c, &nrchars); + if (ret != 1) + goto err; + + ret = sscanf(c, "%2x", &cmd); + if (ret != 1) + goto err; + + pattern[i] = (u8)cmd; + offset += nrchars; + i++; + } + + /* Each instruction is 16bit long. Check that length is even */ + if (i % 2) + goto err; + + program_size = i; + for (i = 0; i < program_size; i++) + lp55xx_write(chip, addr[idx] + i, pattern[i]); + + return 0; + +err: + dev_err(&chip->cl->dev, "wrong pattern format\n"); + return -EINVAL; +} + +static void lp5562_firmware_loaded(struct lp55xx_chip *chip) +{ + const struct firmware *fw = chip->fw; + + if (fw->size > LP5562_PROGRAM_LENGTH) { + dev_err(&chip->cl->dev, "firmware data size overflow: %zu\n", + fw->size); + return; + } + + /* + * Program momery sequence + * 1) set engine mode to "LOAD" + * 2) write firmware data into program memory + */ + + lp5562_load_engine(chip); + lp5562_update_firmware(chip, fw->data, fw->size); +} + +static int lp5562_post_init_device(struct lp55xx_chip *chip) +{ + int ret; + u8 cfg = LP5562_DEFAULT_CFG; + + /* Set all PWMs to direct control mode */ + ret = lp55xx_write(chip, LP5562_REG_OP_MODE, LP5562_CMD_DIRECT); + if (ret) + return ret; + + lp5562_wait_opmode_done(); + + /* Update configuration for the clock setting */ + if (!lp55xx_is_extclk_used(chip)) + cfg |= LP5562_CLK_INT; + + ret = lp55xx_write(chip, LP5562_REG_CONFIG, cfg); + if (ret) + return ret; + + /* Initialize all channels PWM to zero -> leds off */ + lp55xx_write(chip, LP5562_REG_R_PWM, 0); + lp55xx_write(chip, LP5562_REG_G_PWM, 0); + lp55xx_write(chip, LP5562_REG_B_PWM, 0); + lp55xx_write(chip, LP5562_REG_W_PWM, 0); + + /* Set LED map as register PWM by default */ + lp55xx_write(chip, LP5562_REG_ENG_SEL, LP5562_ENG_SEL_PWM); + + return 0; +} + +static void lp5562_led_brightness_work(struct work_struct *work) +{ + struct lp55xx_led *led = container_of(work, struct lp55xx_led, + brightness_work); + struct lp55xx_chip *chip = led->chip; + u8 addr[] = { + LP5562_REG_R_PWM, + LP5562_REG_G_PWM, + LP5562_REG_B_PWM, + LP5562_REG_W_PWM, + }; + + mutex_lock(&chip->lock); + lp55xx_write(chip, addr[led->chan_nr], led->brightness); + mutex_unlock(&chip->lock); +} + +static void lp5562_write_program_memory(struct lp55xx_chip *chip, + u8 base, const u8 *rgb, int size) +{ + int i; + + if (!rgb || size <= 0) + return; + + for (i = 0; i < size; i++) + lp55xx_write(chip, base + i, *(rgb + i)); + + lp55xx_write(chip, base + i, 0); + lp55xx_write(chip, base + i + 1, 0); +} + +/* check the size of program count */ +static inline bool _is_pc_overflow(struct lp55xx_predef_pattern *ptn) +{ + return (ptn->size_r >= LP5562_PROGRAM_LENGTH || + ptn->size_g >= LP5562_PROGRAM_LENGTH || + ptn->size_b >= LP5562_PROGRAM_LENGTH); +} + +static int lp5562_run_predef_led_pattern(struct lp55xx_chip *chip, int mode) +{ + struct lp55xx_predef_pattern *ptn; + int i; + + if (mode == LP5562_PATTERN_OFF) { + lp5562_run_engine(chip, false); + return 0; + } + + ptn = chip->pdata->patterns + (mode - 1); + if (!ptn || _is_pc_overflow(ptn)) { + dev_err(&chip->cl->dev, "invalid pattern data\n"); + return -EINVAL; + } + + lp5562_stop_engine(chip); + + /* Set LED map as RGB */ + lp55xx_write(chip, LP5562_REG_ENG_SEL, LP5562_ENG_SEL_RGB); + + /* Load engines */ + for (i = LP55XX_ENGINE_1; i <= LP55XX_ENGINE_3; i++) { + chip->engine_idx = i; + lp5562_load_engine(chip); + } + + /* Clear program registers */ + lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG1, 0); + lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG1 + 1, 0); + lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG2, 0); + lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG2 + 1, 0); + lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG3, 0); + lp55xx_write(chip, LP5562_REG_PROG_MEM_ENG3 + 1, 0); + + /* Program engines */ + lp5562_write_program_memory(chip, LP5562_REG_PROG_MEM_ENG1, + ptn->r, ptn->size_r); + lp5562_write_program_memory(chip, LP5562_REG_PROG_MEM_ENG2, + ptn->g, ptn->size_g); + lp5562_write_program_memory(chip, LP5562_REG_PROG_MEM_ENG3, + ptn->b, ptn->size_b); + + /* Run engines */ + lp5562_run_engine(chip, true); + + return 0; +} + +static ssize_t lp5562_store_pattern(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + struct lp55xx_predef_pattern *ptn = chip->pdata->patterns; + int num_patterns = chip->pdata->num_patterns; + unsigned long mode; + int ret; + + ret = kstrtoul(buf, 0, &mode); + if (ret) + return ret; + + if (mode > num_patterns || !ptn) + return -EINVAL; + + mutex_lock(&chip->lock); + ret = lp5562_run_predef_led_pattern(chip, mode); + mutex_unlock(&chip->lock); + + if (ret) + return ret; + + return len; +} + +static ssize_t lp5562_store_engine_mux(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev)); + struct lp55xx_chip *chip = led->chip; + u8 mask; + u8 val; + + /* LED map + * R ... Engine 1 (fixed) + * G ... Engine 2 (fixed) + * B ... Engine 3 (fixed) + * W ... Engine 1 or 2 or 3 + */ + + if (sysfs_streq(buf, "RGB")) { + mask = LP5562_ENG_FOR_RGB_M; + val = LP5562_ENG_SEL_RGB; + } else if (sysfs_streq(buf, "W")) { + enum lp55xx_engine_index idx = chip->engine_idx; + + mask = LP5562_ENG_FOR_W_M; + switch (idx) { + case LP55XX_ENGINE_1: + val = LP5562_ENG1_FOR_W; + break; + case LP55XX_ENGINE_2: + val = LP5562_ENG2_FOR_W; + break; + case LP55XX_ENGINE_3: + val = LP5562_ENG3_FOR_W; + break; + default: + return -EINVAL; + } + + } else { + dev_err(dev, "choose RGB or W\n"); + return -EINVAL; + } + + mutex_lock(&chip->lock); + lp55xx_update_bits(chip, LP5562_REG_ENG_SEL, mask, val); + mutex_unlock(&chip->lock); + + return len; +} + +static DEVICE_ATTR(led_pattern, S_IWUSR, NULL, lp5562_store_pattern); +static DEVICE_ATTR(engine_mux, S_IWUSR, NULL, lp5562_store_engine_mux); + +static struct attribute *lp5562_attributes[] = { + &dev_attr_led_pattern.attr, + &dev_attr_engine_mux.attr, + NULL, +}; + +static const struct attribute_group lp5562_group = { + .attrs = lp5562_attributes, +}; + +/* Chip specific configurations */ +static struct lp55xx_device_config lp5562_cfg = { + .max_channel = LP5562_MAX_LEDS, + .reset = { + .addr = LP5562_REG_RESET, + .val = LP5562_RESET, + }, + .enable = { + .addr = LP5562_REG_ENABLE, + .val = LP5562_ENABLE_DEFAULT, + }, + .post_init_device = lp5562_post_init_device, + .set_led_current = lp5562_set_led_current, + .brightness_work_fn = lp5562_led_brightness_work, + .run_engine = lp5562_run_engine, + .firmware_cb = lp5562_firmware_loaded, + .dev_attr_group = &lp5562_group, +}; + +static int lp5562_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int ret; + struct lp55xx_chip *chip; + struct lp55xx_led *led; + struct lp55xx_platform_data *pdata = client->dev.platform_data; + + if (!pdata) { + dev_err(&client->dev, "no platform data\n"); + return -EINVAL; + } + + chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + led = devm_kzalloc(&client->dev, + sizeof(*led) * pdata->num_channels, GFP_KERNEL); + if (!led) + return -ENOMEM; + + chip->cl = client; + chip->pdata = pdata; + chip->cfg = &lp5562_cfg; + + mutex_init(&chip->lock); + + i2c_set_clientdata(client, led); + + ret = lp55xx_init_device(chip); + if (ret) + goto err_init; + + ret = lp55xx_register_leds(led, chip); + if (ret) + goto err_register_leds; + + ret = lp55xx_register_sysfs(chip); + if (ret) { + dev_err(&client->dev, "registering sysfs failed\n"); + goto err_register_sysfs; + } + + return 0; + +err_register_sysfs: + lp55xx_unregister_leds(led, chip); +err_register_leds: + lp55xx_deinit_device(chip); +err_init: + return ret; +} + +static int lp5562_remove(struct i2c_client *client) +{ + struct lp55xx_led *led = i2c_get_clientdata(client); + struct lp55xx_chip *chip = led->chip; + + lp5562_stop_engine(chip); + + lp55xx_unregister_sysfs(chip); + lp55xx_unregister_leds(led, chip); + lp55xx_deinit_device(chip); + + return 0; +} + +static const struct i2c_device_id lp5562_id[] = { + { "lp5562", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, lp5562_id); + +static struct i2c_driver lp5562_driver = { + .driver = { + .name = "lp5562", + }, + .probe = lp5562_probe, + .remove = lp5562_remove, + .id_table = lp5562_id, +}; + +module_i2c_driver(lp5562_driver); + +MODULE_DESCRIPTION("Texas Instruments LP5562 LED Driver"); +MODULE_AUTHOR("Milo Kim"); +MODULE_LICENSE("GPL"); diff --git a/drivers/leds/leds-lp55xx-common.c b/drivers/leds/leds-lp55xx-common.c index d9eb8415742..ba34199dc3d 100644 --- a/drivers/leds/leds-lp55xx-common.c +++ b/drivers/leds/leds-lp55xx-common.c @@ -1,5 +1,5 @@ /* - * LP5521/LP5523/LP55231 Common Driver + * LP5521/LP5523/LP55231/LP5562 Common Driver * * Copyright 2012 Texas Instruments * @@ -12,6 +12,7 @@ * Derived from leds-lp5521.c, leds-lp5523.c */ +#include <linux/clk.h> #include <linux/delay.h> #include <linux/firmware.h> #include <linux/i2c.h> @@ -21,6 +22,9 @@ #include "leds-lp55xx-common.h" +/* External clock rate */ +#define LP55XX_CLK_32K 32768 + static struct lp55xx_led *cdev_to_lp55xx_led(struct led_classdev *cdev) { return container_of(cdev, struct lp55xx_led, cdev); @@ -80,7 +84,7 @@ static ssize_t lp55xx_show_current(struct device *dev, { struct lp55xx_led *led = dev_to_lp55xx_led(dev); - return sprintf(buf, "%d\n", led->led_current); + return scnprintf(buf, PAGE_SIZE, "%d\n", led->led_current); } static ssize_t lp55xx_store_current(struct device *dev, @@ -113,7 +117,7 @@ static ssize_t lp55xx_show_max_current(struct device *dev, { struct lp55xx_led *led = dev_to_lp55xx_led(dev); - return sprintf(buf, "%d\n", led->max_current); + return scnprintf(buf, PAGE_SIZE, "%d\n", led->max_current); } static DEVICE_ATTR(led_current, S_IRUGO | S_IWUSR, lp55xx_show_current, @@ -357,6 +361,35 @@ int lp55xx_update_bits(struct lp55xx_chip *chip, u8 reg, u8 mask, u8 val) } EXPORT_SYMBOL_GPL(lp55xx_update_bits); +bool lp55xx_is_extclk_used(struct lp55xx_chip *chip) +{ + struct clk *clk; + int err; + + clk = devm_clk_get(&chip->cl->dev, "32k_clk"); + if (IS_ERR(clk)) + goto use_internal_clk; + + err = clk_prepare_enable(clk); + if (err) + goto use_internal_clk; + + if (clk_get_rate(clk) != LP55XX_CLK_32K) { + clk_disable_unprepare(clk); + goto use_internal_clk; + } + + dev_info(&chip->cl->dev, "%dHz external clock used\n", LP55XX_CLK_32K); + + chip->clk = clk; + return true; + +use_internal_clk: + dev_info(&chip->cl->dev, "internal clock used\n"); + return false; +} +EXPORT_SYMBOL_GPL(lp55xx_is_extclk_used); + int lp55xx_init_device(struct lp55xx_chip *chip) { struct lp55xx_platform_data *pdata; @@ -421,6 +454,9 @@ void lp55xx_deinit_device(struct lp55xx_chip *chip) { struct lp55xx_platform_data *pdata = chip->pdata; + if (chip->clk) + clk_disable_unprepare(chip->clk); + if (pdata->enable) pdata->enable(0); diff --git a/drivers/leds/leds-lp55xx-common.h b/drivers/leds/leds-lp55xx-common.h index ece4761a130..fa6a078bf54 100644 --- a/drivers/leds/leds-lp55xx-common.h +++ b/drivers/leds/leds-lp55xx-common.h @@ -83,6 +83,7 @@ struct lp55xx_device_config { */ struct lp55xx_chip { struct i2c_client *cl; + struct clk *clk; struct lp55xx_platform_data *pdata; struct mutex lock; /* lock for user-space interface */ int num_leds; @@ -117,6 +118,9 @@ extern int lp55xx_read(struct lp55xx_chip *chip, u8 reg, u8 *val); extern int lp55xx_update_bits(struct lp55xx_chip *chip, u8 reg, u8 mask, u8 val); +/* external clock detection */ +extern bool lp55xx_is_extclk_used(struct lp55xx_chip *chip); + /* common device init/deinit functions */ extern int lp55xx_init_device(struct lp55xx_chip *chip); extern void lp55xx_deinit_device(struct lp55xx_chip *chip); diff --git a/drivers/leds/leds-lt3593.c b/drivers/leds/leds-lt3593.c index c9b9e1fec58..ca48a7d5502 100644 --- a/drivers/leds/leds-lt3593.c +++ b/drivers/leds/leds-lt3593.c @@ -106,8 +106,9 @@ static int create_lt3593_led(const struct gpio_led *template, if (!template->retain_state_suspended) led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME; - ret = devm_gpio_request_one(parent, template->gpio, - GPIOF_DIR_OUT | state, template->name); + ret = devm_gpio_request_one(parent, template->gpio, state ? + GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW, + template->name); if (ret < 0) return ret; diff --git a/drivers/leds/leds-ns2.c b/drivers/leds/leds-ns2.c index d978171c25b..70137b1eecf 100644 --- a/drivers/leds/leds-ns2.c +++ b/drivers/leds/leds-ns2.c @@ -193,7 +193,8 @@ create_ns2_led(struct platform_device *pdev, struct ns2_led_data *led_dat, enum ns2_led_modes mode; ret = devm_gpio_request_one(&pdev->dev, template->cmd, - GPIOF_DIR_OUT | gpio_get_value(template->cmd), + gpio_get_value(template->cmd) ? + GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW, template->name); if (ret) { dev_err(&pdev->dev, "%s: failed to setup command GPIO\n", @@ -202,7 +203,8 @@ create_ns2_led(struct platform_device *pdev, struct ns2_led_data *led_dat, } ret = devm_gpio_request_one(&pdev->dev, template->slow, - GPIOF_DIR_OUT | gpio_get_value(template->slow), + gpio_get_value(template->slow) ? + GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW, template->name); if (ret) { dev_err(&pdev->dev, "%s: failed to setup slow GPIO\n", @@ -306,10 +308,21 @@ static const struct of_device_id of_ns2_leds_match[] = { }; #endif /* CONFIG_OF_GPIO */ +struct ns2_led_priv { + int num_leds; + struct ns2_led_data leds_data[]; +}; + +static inline int sizeof_ns2_led_priv(int num_leds) +{ + return sizeof(struct ns2_led_priv) + + (sizeof(struct ns2_led_data) * num_leds); +} + static int ns2_led_probe(struct platform_device *pdev) { struct ns2_led_platform_data *pdata = pdev->dev.platform_data; - struct ns2_led_data *leds_data; + struct ns2_led_priv *priv; int i; int ret; @@ -330,21 +343,23 @@ static int ns2_led_probe(struct platform_device *pdev) return -EINVAL; #endif /* CONFIG_OF_GPIO */ - leds_data = devm_kzalloc(&pdev->dev, sizeof(struct ns2_led_data) * - pdata->num_leds, GFP_KERNEL); - if (!leds_data) + priv = devm_kzalloc(&pdev->dev, + sizeof_ns2_led_priv(pdata->num_leds), GFP_KERNEL); + if (!priv) return -ENOMEM; + priv->num_leds = pdata->num_leds; - for (i = 0; i < pdata->num_leds; i++) { - ret = create_ns2_led(pdev, &leds_data[i], &pdata->leds[i]); + for (i = 0; i < priv->num_leds; i++) { + ret = create_ns2_led(pdev, &priv->leds_data[i], + &pdata->leds[i]); if (ret < 0) { for (i = i - 1; i >= 0; i--) - delete_ns2_led(&leds_data[i]); + delete_ns2_led(&priv->leds_data[i]); return ret; } } - platform_set_drvdata(pdev, leds_data); + platform_set_drvdata(pdev, priv); return 0; } @@ -352,13 +367,12 @@ static int ns2_led_probe(struct platform_device *pdev) static int ns2_led_remove(struct platform_device *pdev) { int i; - struct ns2_led_platform_data *pdata = pdev->dev.platform_data; - struct ns2_led_data *leds_data; + struct ns2_led_priv *priv; - leds_data = platform_get_drvdata(pdev); + priv = platform_get_drvdata(pdev); - for (i = 0; i < pdata->num_leds; i++) - delete_ns2_led(&leds_data[i]); + for (i = 0; i < priv->num_leds; i++) + delete_ns2_led(&priv->leds_data[i]); platform_set_drvdata(pdev, NULL); diff --git a/drivers/leds/leds-pwm.c b/drivers/leds/leds-pwm.c index a1ea5f6a8d3..faf52c005e8 100644 --- a/drivers/leds/leds-pwm.c +++ b/drivers/leds/leds-pwm.c @@ -23,12 +23,16 @@ #include <linux/pwm.h> #include <linux/leds_pwm.h> #include <linux/slab.h> +#include <linux/workqueue.h> struct led_pwm_data { struct led_classdev cdev; struct pwm_device *pwm; + struct work_struct work; unsigned int active_low; unsigned int period; + int duty; + bool can_sleep; }; struct led_pwm_priv { @@ -36,6 +40,26 @@ struct led_pwm_priv { struct led_pwm_data leds[0]; }; +static void __led_pwm_set(struct led_pwm_data *led_dat) +{ + int new_duty = led_dat->duty; + + pwm_config(led_dat->pwm, new_duty, led_dat->period); + + if (new_duty == 0) + pwm_disable(led_dat->pwm); + else + pwm_enable(led_dat->pwm); +} + +static void led_pwm_work(struct work_struct *work) +{ + struct led_pwm_data *led_dat = + container_of(work, struct led_pwm_data, work); + + __led_pwm_set(led_dat); +} + static void led_pwm_set(struct led_classdev *led_cdev, enum led_brightness brightness) { @@ -44,13 +68,12 @@ static void led_pwm_set(struct led_classdev *led_cdev, unsigned int max = led_dat->cdev.max_brightness; unsigned int period = led_dat->period; - if (brightness == 0) { - pwm_config(led_dat->pwm, 0, period); - pwm_disable(led_dat->pwm); - } else { - pwm_config(led_dat->pwm, brightness * period / max, period); - pwm_enable(led_dat->pwm); - } + led_dat->duty = brightness * period / max; + + if (led_dat->can_sleep) + schedule_work(&led_dat->work); + else + __led_pwm_set(led_dat); } static inline size_t sizeof_pwm_leds_priv(int num_leds) @@ -100,6 +123,10 @@ static struct led_pwm_priv *led_pwm_create_of(struct platform_device *pdev) led_dat->cdev.brightness = LED_OFF; led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME; + led_dat->can_sleep = pwm_can_sleep(led_dat->pwm); + if (led_dat->can_sleep) + INIT_WORK(&led_dat->work, led_pwm_work); + ret = led_classdev_register(&pdev->dev, &led_dat->cdev); if (ret < 0) { dev_err(&pdev->dev, "failed to register for %s\n", @@ -153,6 +180,10 @@ static int led_pwm_probe(struct platform_device *pdev) led_dat->cdev.max_brightness = cur_led->max_brightness; led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME; + led_dat->can_sleep = pwm_can_sleep(led_dat->pwm); + if (led_dat->can_sleep) + INIT_WORK(&led_dat->work, led_pwm_work); + ret = led_classdev_register(&pdev->dev, &led_dat->cdev); if (ret < 0) goto err; @@ -180,8 +211,11 @@ static int led_pwm_remove(struct platform_device *pdev) struct led_pwm_priv *priv = platform_get_drvdata(pdev); int i; - for (i = 0; i < priv->num_leds; i++) + for (i = 0; i < priv->num_leds; i++) { led_classdev_unregister(&priv->leds[i].cdev); + if (priv->leds[i].can_sleep) + cancel_work_sync(&priv->leds[i].work); + } return 0; } diff --git a/drivers/leds/leds-renesas-tpu.c b/drivers/leds/leds-renesas-tpu.c index d3c2b7e68fb..9483f1c1078 100644 --- a/drivers/leds/leds-renesas-tpu.c +++ b/drivers/leds/leds-renesas-tpu.c @@ -205,7 +205,8 @@ static void r_tpu_set_pin(struct r_tpu_priv *p, enum r_tpu_pin new_state, gpio_free(cfg->pin_gpio_fn); if (new_state == R_TPU_PIN_GPIO) - gpio_request_one(cfg->pin_gpio, GPIOF_DIR_OUT | !!brightness, + gpio_request_one(cfg->pin_gpio, !!brightness ? + GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW, cfg->name); if (new_state == R_TPU_PIN_GPIO_FN) diff --git a/drivers/leds/leds-tca6507.c b/drivers/leds/leds-tca6507.c index 070ba0741b2..98fe021ba27 100644 --- a/drivers/leds/leds-tca6507.c +++ b/drivers/leds/leds-tca6507.c @@ -85,6 +85,7 @@ #include <linux/gpio.h> #include <linux/workqueue.h> #include <linux/leds-tca6507.h> +#include <linux/of.h> /* LED select registers determine the source that drives LED outputs */ #define TCA6507_LS_LED_OFF 0x0 /* Output HI-Z (off) */ @@ -724,7 +725,6 @@ tca6507_led_dt_init(struct i2c_client *client) return ERR_PTR(-ENODEV); } -#define of_tca6507_leds_match NULL #endif static int tca6507_probe(struct i2c_client *client, @@ -813,7 +813,7 @@ static struct i2c_driver tca6507_driver = { .driver = { .name = "leds-tca6507", .owner = THIS_MODULE, - .of_match_table = of_tca6507_leds_match, + .of_match_table = of_match_ptr(of_tca6507_leds_match), }, .probe = tca6507_probe, .remove = tca6507_remove, diff --git a/drivers/leds/leds-wm8350.c b/drivers/leds/leds-wm8350.c index ed15157c8f6..8a181d56602 100644 --- a/drivers/leds/leds-wm8350.c +++ b/drivers/leds/leds-wm8350.c @@ -129,7 +129,10 @@ static void wm8350_led_disable(struct wm8350_led *led) ret = regulator_disable(led->isink); if (ret != 0) { dev_err(led->cdev.dev, "Failed to disable ISINK: %d\n", ret); - regulator_enable(led->dcdc); + ret = regulator_enable(led->dcdc); + if (ret != 0) + dev_err(led->cdev.dev, "Failed to reenable DCDC: %d\n", + ret); return; } diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig new file mode 100644 index 00000000000..49794b47b51 --- /dev/null +++ b/drivers/leds/trigger/Kconfig @@ -0,0 +1,111 @@ +menuconfig LEDS_TRIGGERS + bool "LED Trigger support" + depends on LEDS_CLASS + help + This option enables trigger support for the leds class. + These triggers allow kernel events to drive the LEDs and can + be configured via sysfs. If unsure, say Y. + +if LEDS_TRIGGERS + +config LEDS_TRIGGER_TIMER + tristate "LED Timer Trigger" + depends on LEDS_TRIGGERS + help + This allows LEDs to be controlled by a programmable timer + via sysfs. Some LED hardware can be programmed to start + blinking the LED without any further software interaction. + For more details read Documentation/leds/leds-class.txt. + + If unsure, say Y. + +config LEDS_TRIGGER_ONESHOT + tristate "LED One-shot Trigger" + depends on LEDS_TRIGGERS + help + This allows LEDs to blink in one-shot pulses with parameters + controlled via sysfs. It's useful to notify the user on + sporadic events, when there are no clear begin and end trap points, + or on dense events, where this blinks the LED at constant rate if + rearmed continuously. + + It also shows how to use the led_blink_set_oneshot() function. + + If unsure, say Y. + +config LEDS_TRIGGER_IDE_DISK + bool "LED IDE Disk Trigger" + depends on IDE_GD_ATA + depends on LEDS_TRIGGERS + help + This allows LEDs to be controlled by IDE disk activity. + If unsure, say Y. + +config LEDS_TRIGGER_HEARTBEAT + tristate "LED Heartbeat Trigger" + depends on LEDS_TRIGGERS + help + This allows LEDs to be controlled by a CPU load average. + The flash frequency is a hyperbolic function of the 1-minute + load average. + If unsure, say Y. + +config LEDS_TRIGGER_BACKLIGHT + tristate "LED backlight Trigger" + depends on LEDS_TRIGGERS + help + This allows LEDs to be controlled as a backlight device: they + turn off and on when the display is blanked and unblanked. + + If unsure, say N. + +config LEDS_TRIGGER_CPU + bool "LED CPU Trigger" + depends on LEDS_TRIGGERS + help + This allows LEDs to be controlled by active CPUs. This shows + the active CPUs across an array of LEDs so you can see which + CPUs are active on the system at any given moment. + + If unsure, say N. + +config LEDS_TRIGGER_GPIO + tristate "LED GPIO Trigger" + depends on LEDS_TRIGGERS + depends on GPIOLIB + help + This allows LEDs to be controlled by gpio events. It's good + when using gpios as switches and triggering the needed LEDs + from there. One use case is n810's keypad LEDs that could + be triggered by this trigger when user slides up to show + keypad. + + If unsure, say N. + +config LEDS_TRIGGER_DEFAULT_ON + tristate "LED Default ON Trigger" + depends on LEDS_TRIGGERS + help + This allows LEDs to be initialised in the ON state. + If unsure, say Y. + +comment "iptables trigger is under Netfilter config (LED target)" + depends on LEDS_TRIGGERS + +config LEDS_TRIGGER_TRANSIENT + tristate "LED Transient Trigger" + depends on LEDS_TRIGGERS + help + This allows one time activation of a transient state on + GPIO/PWM based hardware. + If unsure, say Y. + +config LEDS_TRIGGER_CAMERA + tristate "LED Camera Flash/Torch Trigger" + depends on LEDS_TRIGGERS + help + This allows LEDs to be controlled as a camera flash/torch device. + This enables direct flash/torch on/off by the driver, kernel space. + If unsure, say Y. + +endif # LEDS_TRIGGERS diff --git a/drivers/leds/trigger/Makefile b/drivers/leds/trigger/Makefile new file mode 100644 index 00000000000..1abf48dacf7 --- /dev/null +++ b/drivers/leds/trigger/Makefile @@ -0,0 +1,10 @@ +obj-$(CONFIG_LEDS_TRIGGER_TIMER) += ledtrig-timer.o +obj-$(CONFIG_LEDS_TRIGGER_ONESHOT) += ledtrig-oneshot.o +obj-$(CONFIG_LEDS_TRIGGER_IDE_DISK) += ledtrig-ide-disk.o +obj-$(CONFIG_LEDS_TRIGGER_HEARTBEAT) += ledtrig-heartbeat.o +obj-$(CONFIG_LEDS_TRIGGER_BACKLIGHT) += ledtrig-backlight.o +obj-$(CONFIG_LEDS_TRIGGER_GPIO) += ledtrig-gpio.o +obj-$(CONFIG_LEDS_TRIGGER_CPU) += ledtrig-cpu.o +obj-$(CONFIG_LEDS_TRIGGER_DEFAULT_ON) += ledtrig-default-on.o +obj-$(CONFIG_LEDS_TRIGGER_TRANSIENT) += ledtrig-transient.o +obj-$(CONFIG_LEDS_TRIGGER_CAMERA) += ledtrig-camera.o diff --git a/drivers/leds/ledtrig-backlight.c b/drivers/leds/trigger/ledtrig-backlight.c index 027a2b15d7d..3c9c88a07eb 100644 --- a/drivers/leds/ledtrig-backlight.c +++ b/drivers/leds/trigger/ledtrig-backlight.c @@ -16,7 +16,7 @@ #include <linux/init.h> #include <linux/fb.h> #include <linux/leds.h> -#include "leds.h" +#include "../leds.h" #define BLANK 1 #define UNBLANK 0 diff --git a/drivers/leds/trigger/ledtrig-camera.c b/drivers/leds/trigger/ledtrig-camera.c new file mode 100644 index 00000000000..9bd73a8bad5 --- /dev/null +++ b/drivers/leds/trigger/ledtrig-camera.c @@ -0,0 +1,57 @@ +/* + * Camera Flash and Torch On/Off Trigger + * + * based on ledtrig-ide-disk.c + * + * Copyright 2013 Texas Instruments + * + * Author: Milo(Woogyom) Kim <milo.kim@ti.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/leds.h> + +DEFINE_LED_TRIGGER(ledtrig_flash); +DEFINE_LED_TRIGGER(ledtrig_torch); + +void ledtrig_flash_ctrl(bool on) +{ + enum led_brightness brt = on ? LED_FULL : LED_OFF; + + led_trigger_event(ledtrig_flash, brt); +} +EXPORT_SYMBOL_GPL(ledtrig_flash_ctrl); + +void ledtrig_torch_ctrl(bool on) +{ + enum led_brightness brt = on ? LED_FULL : LED_OFF; + + led_trigger_event(ledtrig_torch, brt); +} +EXPORT_SYMBOL_GPL(ledtrig_torch_ctrl); + +static int __init ledtrig_camera_init(void) +{ + led_trigger_register_simple("flash", &ledtrig_flash); + led_trigger_register_simple("torch", &ledtrig_torch); + return 0; +} +module_init(ledtrig_camera_init); + +static void __exit ledtrig_camera_exit(void) +{ + led_trigger_unregister_simple(ledtrig_torch); + led_trigger_unregister_simple(ledtrig_flash); +} +module_exit(ledtrig_camera_exit); + +MODULE_DESCRIPTION("LED Trigger for Camera Flash/Torch Control"); +MODULE_AUTHOR("Milo Kim"); +MODULE_LICENSE("GPL"); diff --git a/drivers/leds/ledtrig-cpu.c b/drivers/leds/trigger/ledtrig-cpu.c index 4239b3955ff..118335eccc5 100644 --- a/drivers/leds/ledtrig-cpu.c +++ b/drivers/leds/trigger/ledtrig-cpu.c @@ -26,7 +26,7 @@ #include <linux/percpu.h> #include <linux/syscore_ops.h> #include <linux/rwsem.h> -#include "leds.h" +#include "../leds.h" #define MAX_NAME_LEN 8 diff --git a/drivers/leds/ledtrig-default-on.c b/drivers/leds/trigger/ledtrig-default-on.c index eac1f1b1ada..81a91be8e18 100644 --- a/drivers/leds/ledtrig-default-on.c +++ b/drivers/leds/trigger/ledtrig-default-on.c @@ -15,7 +15,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/leds.h> -#include "leds.h" +#include "../leds.h" static void defon_trig_activate(struct led_classdev *led_cdev) { diff --git a/drivers/leds/ledtrig-gpio.c b/drivers/leds/trigger/ledtrig-gpio.c index 72e3ebfc281..35812e3a37f 100644 --- a/drivers/leds/ledtrig-gpio.c +++ b/drivers/leds/trigger/ledtrig-gpio.c @@ -17,7 +17,7 @@ #include <linux/workqueue.h> #include <linux/leds.h> #include <linux/slab.h> -#include "leds.h" +#include "../leds.h" struct gpio_trig_data { struct led_classdev *led; diff --git a/drivers/leds/ledtrig-heartbeat.c b/drivers/leds/trigger/ledtrig-heartbeat.c index 1edc7463ce8..5c8464a3317 100644 --- a/drivers/leds/ledtrig-heartbeat.c +++ b/drivers/leds/trigger/ledtrig-heartbeat.c @@ -19,7 +19,7 @@ #include <linux/sched.h> #include <linux/leds.h> #include <linux/reboot.h> -#include "leds.h" +#include "../leds.h" static int panic_heartbeats; diff --git a/drivers/leds/ledtrig-ide-disk.c b/drivers/leds/trigger/ledtrig-ide-disk.c index 2cd7c0cf592..2cd7c0cf592 100644 --- a/drivers/leds/ledtrig-ide-disk.c +++ b/drivers/leds/trigger/ledtrig-ide-disk.c diff --git a/drivers/leds/ledtrig-oneshot.c b/drivers/leds/trigger/ledtrig-oneshot.c index 2c029aa5c4f..cb4c7466692 100644 --- a/drivers/leds/ledtrig-oneshot.c +++ b/drivers/leds/trigger/ledtrig-oneshot.c @@ -18,7 +18,7 @@ #include <linux/ctype.h> #include <linux/slab.h> #include <linux/leds.h> -#include "leds.h" +#include "../leds.h" #define DEFAULT_DELAY 100 diff --git a/drivers/leds/ledtrig-timer.c b/drivers/leds/trigger/ledtrig-timer.c index f774d059220..8d09327b571 100644 --- a/drivers/leds/ledtrig-timer.c +++ b/drivers/leds/trigger/ledtrig-timer.c @@ -17,7 +17,6 @@ #include <linux/device.h> #include <linux/ctype.h> #include <linux/leds.h> -#include "leds.h" static ssize_t led_delay_on_show(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/drivers/leds/ledtrig-transient.c b/drivers/leds/trigger/ledtrig-transient.c index 398f1042c43..e5abc00bb00 100644 --- a/drivers/leds/ledtrig-transient.c +++ b/drivers/leds/trigger/ledtrig-transient.c @@ -25,7 +25,7 @@ #include <linux/slab.h> #include <linux/timer.h> #include <linux/leds.h> -#include "leds.h" +#include "../leds.h" struct transient_trig_data { int activate; diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c index 893fc1ba6ea..31ca55548ef 100644 --- a/drivers/mfd/88pm860x-core.c +++ b/drivers/mfd/88pm860x-core.c @@ -1144,17 +1144,15 @@ static int pm860x_probe(struct i2c_client *client, return -ENOMEM; ret = pm860x_dt_init(node, &client->dev, pdata); if (ret) - goto err; + return ret; } else if (!pdata) { pr_info("No platform data in %s!\n", __func__); return -EINVAL; } chip = kzalloc(sizeof(struct pm860x_chip), GFP_KERNEL); - if (chip == NULL) { - ret = -ENOMEM; - goto err; - } + if (chip == NULL) + return -ENOMEM; chip->id = verify_addr(client); chip->regmap = regmap_init_i2c(client, &pm860x_regmap_config); @@ -1194,10 +1192,6 @@ static int pm860x_probe(struct i2c_client *client, pm860x_device_init(chip, pdata); return 0; -err: - if (node) - devm_kfree(&client->dev, pdata); - return ret; } static int pm860x_remove(struct i2c_client *client) diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index ca86581d02c..d9aed1593e5 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -10,19 +10,240 @@ config MFD_CORE select IRQ_DOMAIN default n -config MFD_88PM860X - bool "Support Marvell 88PM8606/88PM8607" +config MFD_CS5535 + tristate "AMD CS5535 and CS5536 southbridge core functions" + select MFD_CORE + depends on PCI && X86 + ---help--- + This is the core driver for CS5535/CS5536 MFD functions. This is + necessary for using the board's GPIO and MFGPT functionality. + +config MFD_AS3711 + bool "AMS AS3711" + select MFD_CORE + select REGMAP_I2C + select REGMAP_IRQ depends on I2C=y && GENERIC_HARDIRQS + help + Support for the AS3711 PMIC from AMS + +config PMIC_ADP5520 + bool "Analog Devices ADP5520/01 MFD PMIC Core Support" + depends on I2C=y + help + Say yes here to add support for Analog Devices AD5520 and ADP5501, + Multifunction Power Management IC. This includes + the I2C driver and the core APIs _only_, you have to select + individual components like LCD backlight, LEDs, GPIOs and Kepad + under the corresponding menus. + +config MFD_AAT2870_CORE + bool "AnalogicTech AAT2870" + select MFD_CORE + depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS + help + If you say yes here you get support for the AAT2870. + This driver provides common support for accessing the device, + additional drivers must be enabled in order to use the + functionality of the device. + +config MFD_CROS_EC + tristate "ChromeOS Embedded Controller" + select MFD_CORE + help + If you say Y here you get support for the ChromeOS Embedded + Controller (EC) providing keyboard, battery and power services. + You also ned to enable the driver for the bus you are using. The + protocol for talking to the EC is defined by the bus driver. + +config MFD_CROS_EC_I2C + tristate "ChromeOS Embedded Controller (I2C)" + depends on MFD_CROS_EC && I2C + + help + If you say Y here, you get support for talking to the ChromeOS + EC through an I2C bus. This uses a simple byte-level protocol with + a checksum. Failing accesses will be retried three times to + improve reliability. + +config MFD_CROS_EC_SPI + tristate "ChromeOS Embedded Controller (SPI)" + depends on MFD_CROS_EC && SPI + + ---help--- + If you say Y here, you get support for talking to the ChromeOS EC + through a SPI bus, using a byte-level protocol. Since the EC's + response time cannot be guaranteed, we support ignoring + 'pre-amble' bytes before the response actually starts. + +config MFD_ASIC3 + bool "Compaq ASIC3" + depends on GENERIC_HARDIRQS && GPIOLIB && ARM + select MFD_CORE + ---help--- + This driver supports the ASIC3 multifunction chip found on many + PDAs (mainly iPAQ and HTC based ones) + +config PMIC_DA903X + bool "Dialog Semiconductor DA9030/DA9034 PMIC Support" + depends on I2C=y + help + Say yes here to support for Dialog Semiconductor DA9030 (a.k.a + ARAVA) and DA9034 (a.k.a MICCO), these are Power Management IC + usually found on PXA processors-based platforms. This includes + the I2C driver and the core APIs _only_, you have to select + individual components like LCD backlight, voltage regulators, + LEDs and battery-charger under the corresponding menus. + +config PMIC_DA9052 + bool + select MFD_CORE + +config MFD_DA9052_SPI + bool "Dialog Semiconductor DA9052/53 PMIC variants with SPI" + select REGMAP_SPI + select REGMAP_IRQ + select PMIC_DA9052 + depends on SPI_MASTER=y && GENERIC_HARDIRQS + help + Support for the Dialog Semiconductor DA9052 PMIC + when controlled using SPI. This driver provides common support + for accessing the device, additional drivers must be enabled in + order to use the functionality of the device. + +config MFD_DA9052_I2C + bool "Dialog Semiconductor DA9052/53 PMIC variants with I2C" select REGMAP_I2C + select REGMAP_IRQ + select PMIC_DA9052 + depends on I2C=y && GENERIC_HARDIRQS + help + Support for the Dialog Semiconductor DA9052 PMIC + when controlled using I2C. This driver provides common support + for accessing the device, additional drivers must be enabled in + order to use the functionality of the device. + +config MFD_DA9055 + bool "Dialog Semiconductor DA9055 PMIC Support" + select REGMAP_I2C + select REGMAP_IRQ select MFD_CORE + depends on I2C=y && GENERIC_HARDIRQS help - This supports for Marvell 88PM8606/88PM8607 Power Management IC. - This includes the I2C driver and the core APIs _only_, you have to - select individual components like voltage regulators, RTC and - battery-charger under the corresponding menus. + Say yes here for support of Dialog Semiconductor DA9055. This is + a Power Management IC. This driver provides common support for + accessing the device as well as the I2C interface to the chip itself. + Additional drivers must be enabled in order to use the functionality + of the device. + + This driver can be built as a module. If built as a module it will be + called "da9055" + +config MFD_MC13783 + tristate + +config MFD_MC13XXX + tristate + depends on (SPI_MASTER || I2C) && GENERIC_HARDIRQS + select MFD_CORE + select MFD_MC13783 + help + Enable support for the Freescale MC13783 and MC13892 PMICs. + This driver provides common support for accessing the device, + additional drivers must be enabled in order to use the + functionality of the device. + +config MFD_MC13XXX_SPI + tristate "Freescale MC13783 and MC13892 SPI interface" + depends on SPI_MASTER && GENERIC_HARDIRQS + select REGMAP_SPI + select MFD_MC13XXX + help + Select this if your MC13xxx is connected via an SPI bus. + +config MFD_MC13XXX_I2C + tristate "Freescale MC13892 I2C interface" + depends on I2C && GENERIC_HARDIRQS + select REGMAP_I2C + select MFD_MC13XXX + help + Select this if your MC13xxx is connected via an I2C bus. + +config HTC_EGPIO + bool "HTC EGPIO support" + depends on GENERIC_HARDIRQS && GPIOLIB && ARM + help + This driver supports the CPLD egpio chip present on + several HTC phones. It provides basic support for input + pins, output pins, and irqs. + +config HTC_PASIC3 + tristate "HTC PASIC3 LED/DS1WM chip support" + select MFD_CORE + depends on GENERIC_HARDIRQS + help + This core driver provides register access for the LED/DS1WM + chips labeled "AIC2" and "AIC3", found on HTC Blueangel and + HTC Magician devices, respectively. Actual functionality is + handled by the leds-pasic3 and ds1wm drivers. + +config HTC_I2CPLD + bool "HTC I2C PLD chip support" + depends on I2C=y && GPIOLIB + help + If you say yes here you get support for the supposed CPLD + found on omap850 HTC devices like the HTC Wizard and HTC Herald. + This device provides input and output GPIOs through an I2C + interface to one or more sub-chips. + +config LPC_ICH + tristate "Intel ICH LPC" + depends on PCI && GENERIC_HARDIRQS + select MFD_CORE + help + The LPC bridge function of the Intel ICH provides support for + many functional units. This driver provides needed support for + other drivers to control these functions, currently GPIO and + watchdog. + +config LPC_SCH + tristate "Intel SCH LPC" + depends on PCI && GENERIC_HARDIRQS + select MFD_CORE + help + LPC bridge function of the Intel SCH provides support for + System Management Bus and General Purpose I/O. + +config MFD_INTEL_MSIC + bool "Intel MSIC" + depends on INTEL_SCU_IPC + select MFD_CORE + help + Select this option to enable access to Intel MSIC (Avatele + Passage) chip. This chip embeds audio, battery, GPIO, etc. + devices used in Intel Medfield platforms. + +config MFD_JANZ_CMODIO + tristate "Janz CMOD-IO PCI MODULbus Carrier Board" + select MFD_CORE + depends on PCI && GENERIC_HARDIRQS + help + This is the core driver for the Janz CMOD-IO PCI MODULbus + carrier board. This device is a PCI to MODULbus bridge which may + host many different types of MODULbus daughterboards, including + CAN and GPIO controllers. + +config MFD_JZ4740_ADC + bool "Janz JZ4740 ADC core" + select MFD_CORE + select GENERIC_IRQ_CHIP + depends on MACH_JZ4740 + help + Say yes here if you want support for the ADC unit in the JZ4740 SoC. + This driver is necessary for jz4740-battery and jz4740-hwmon driver. config MFD_88PM800 - tristate "Support Marvell 88PM800" + tristate "Marvell 88PM800" depends on I2C=y && GENERIC_HARDIRQS select REGMAP_I2C select REGMAP_IRQ @@ -34,7 +255,7 @@ config MFD_88PM800 battery-charger under the corresponding menus. config MFD_88PM805 - tristate "Support Marvell 88PM805" + tristate "Marvell 88PM805" depends on I2C=y && GENERIC_HARDIRQS select REGMAP_I2C select REGMAP_IRQ @@ -45,8 +266,242 @@ config MFD_88PM805 components like codec device, headset/Mic device under the corresponding menus. +config MFD_88PM860X + bool "Marvell 88PM8606/88PM8607" + depends on I2C=y && GENERIC_HARDIRQS + select REGMAP_I2C + select MFD_CORE + help + This supports for Marvell 88PM8606/88PM8607 Power Management IC. + This includes the I2C driver and the core APIs _only_, you have to + select individual components like voltage regulators, RTC and + battery-charger under the corresponding menus. + +config MFD_MAX77686 + bool "Maxim Semiconductor MAX77686 PMIC Support" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + select REGMAP_I2C + select IRQ_DOMAIN + help + Say yes here to support for Maxim Semiconductor MAX77686. + This is a Power Management IC with RTC on chip. + This driver provides common support for accessing the device; + additional drivers must be enabled in order to use the functionality + of the device. + +config MFD_MAX77693 + bool "Maxim Semiconductor MAX77693 PMIC Support" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + select REGMAP_I2C + help + Say yes here to support for Maxim Semiconductor MAX77693. + This is a companion Power Management IC with Flash, Haptic, Charger, + and MUIC(Micro USB Interface Controller) controls on chip. + This driver provides common support for accessing the device; + additional drivers must be enabled in order to use the functionality + of the device. + +config MFD_MAX8907 + tristate "Maxim Semiconductor MAX8907 PMIC Support" + select MFD_CORE + depends on I2C=y && GENERIC_HARDIRQS + select REGMAP_I2C + select REGMAP_IRQ + help + Say yes here to support for Maxim Semiconductor MAX8907. This is + a Power Management IC. This driver provides common support for + accessing the device; additional drivers must be enabled in order + to use the functionality of the device. + +config MFD_MAX8925 + bool "Maxim Semiconductor MAX8925 PMIC Support" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + help + Say yes here to support for Maxim Semiconductor MAX8925. This is + a Power Management IC. This driver provides common support for + accessing the device, additional drivers must be enabled in order + to use the functionality of the device. + +config MFD_MAX8997 + bool "Maxim Semiconductor MAX8997/8966 PMIC Support" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + select IRQ_DOMAIN + help + Say yes here to support for Maxim Semiconductor MAX8997/8966. + This is a Power Management IC with RTC, Flash, Fuel Gauge, Haptic, + MUIC controls on chip. + This driver provides common support for accessing the device; + additional drivers must be enabled in order to use the functionality + of the device. + +config MFD_MAX8998 + bool "Maxim Semiconductor MAX8998/National LP3974 PMIC Support" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + help + Say yes here to support for Maxim Semiconductor MAX8998 and + National Semiconductor LP3974. This is a Power Management IC. + This driver provides common support for accessing the device, + additional drivers must be enabled in order to use the functionality + of the device. + +config EZX_PCAP + bool "Motorola EZXPCAP Support" + depends on GENERIC_HARDIRQS && SPI_MASTER + help + This enables the PCAP ASIC present on EZX Phones. This is + needed for MMC, TouchScreen, Sound, USB, etc.. + +config MFD_VIPERBOARD + tristate "Nano River Technologies Viperboard" + select MFD_CORE + depends on USB && GENERIC_HARDIRQS + default n + help + Say yes here if you want support for Nano River Technologies + Viperboard. + There are mfd cell drivers available for i2c master, adc and + both gpios found on the board. The spi part does not yet + have a driver. + You need to select the mfd cell drivers separately. + The drivers do not support all features the board exposes. + +config MFD_RETU + tristate "Nokia Retu and Tahvo multi-function device" + select MFD_CORE + depends on I2C && GENERIC_HARDIRQS + select REGMAP_IRQ + help + Retu and Tahvo are a multi-function devices found on Nokia + Internet Tablets (770, N800 and N810). + +config MFD_PCF50633 + tristate "NXP PCF50633" + depends on I2C + select REGMAP_I2C + help + Say yes here if you have NXP PCF50633 chip on your board. + This core driver provides register access and IRQ handling + facilities, and registers devices for the various functions + so that function-specific drivers can bind to them. + +config PCF50633_ADC + tristate "NXP PCF50633 ADC" + depends on MFD_PCF50633 + help + Say yes here if you want to include support for ADC in the + NXP PCF50633 chip. + +config PCF50633_GPIO + tristate "NXP PCF50633 GPIO" + depends on MFD_PCF50633 + help + Say yes here if you want to include support GPIO for pins on + the PCF50633 chip. + +config UCB1400_CORE + tristate "Philips UCB1400 Core driver" + depends on AC97_BUS + depends on GPIOLIB + help + This enables support for the Philips UCB1400 core functions. + The UCB1400 is an AC97 audio codec. + + To compile this driver as a module, choose M here: the + module will be called ucb1400_core. + +config MFD_PM8XXX + tristate + +config MFD_PM8921_CORE + tristate "Qualcomm PM8921 PMIC chip" + depends on SSBI && BROKEN + select MFD_CORE + select MFD_PM8XXX + help + If you say yes to this option, support will be included for the + built-in PM8921 PMIC chip. + + This is required if your board has a PM8921 and uses its features, + such as: MPPs, GPIOs, regulators, interrupts, and PWM. + + Say M here if you want to include support for PM8921 chip as a module. + This will build a module called "pm8921-core". + +config MFD_PM8XXX_IRQ + bool "Qualcomm PM8xxx IRQ features" + depends on MFD_PM8XXX + default y if MFD_PM8XXX + help + This is the IRQ driver for Qualcomm PM 8xxx PMIC chips. + + This is required to use certain other PM 8xxx features, such as GPIO + and MPP. + +config MFD_RDC321X + tristate "RDC R-321x southbridge" + select MFD_CORE + depends on PCI && GENERIC_HARDIRQS + help + Say yes here if you want to have support for the RDC R-321x SoC + southbridge which provides access to GPIOs and Watchdog using the + southbridge PCI device configuration space. + +config MFD_RTSX_PCI + tristate "Realtek PCI-E card reader" + depends on PCI && GENERIC_HARDIRQS + select MFD_CORE + help + This supports for Realtek PCI-Express card reader including rts5209, + rts5229, rtl8411, etc. Realtek card reader supports access to many + types of memory cards, such as Memory Stick, Memory Stick Pro, + Secure Digital and MultiMediaCard. + +config MFD_RC5T583 + bool "Ricoh RC5T583 Power Management system device" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + select REGMAP_I2C + help + Select this option to get support for the RICOH583 Power + Management system device. + This driver provides common support for accessing the device + through i2c interface. The device supports multiple sub-devices + like GPIO, interrupts, RTC, LDO and DCDC regulators, onkey. + Additional drivers must be enabled in order to use the + different functionality of the device. + +config MFD_SEC_CORE + bool "SAMSUNG Electronics PMIC Series Support" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + select REGMAP_I2C + select REGMAP_IRQ + help + Support for the Samsung Electronics MFD series. + This driver provides common support for accessing the device, + additional drivers must be enabled in order to use the functionality + of the device + +config MFD_SI476X_CORE + tristate "Silicon Laboratories 4761/64/68 AM/FM radio." + depends on I2C + select MFD_CORE + select REGMAP_I2C + help + This is the core driver for the SI476x series of AM/FM + radio. This MFD driver connects the radio-si476x V4L2 module + and the si476x audio codec. + + To compile this driver as a module, choose M here: the + module will be called si476x-core. + config MFD_SM501 - tristate "Support for Silicon Motion SM501" + tristate "Silicon Motion SM501" ---help--- This is the core driver for the Silicon Motion SM501 multimedia companion chip. This device is a multifunction device which may @@ -63,46 +518,147 @@ config MFD_SM501_GPIO lines on the SM501. The platform data is used to supply the base number for the first GPIO line to register. -config MFD_RTSX_PCI - tristate "Support for Realtek PCI-E card reader" - depends on PCI && GENERIC_HARDIRQS +config MFD_SMSC + bool "SMSC ECE1099 series chips" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + select REGMAP_I2C + help + If you say yes here you get support for the + ece1099 chips from SMSC. + + To compile this driver as a module, choose M here: the + module will be called smsc. + +config ABX500_CORE + bool "ST-Ericsson ABX500 Mixed Signal Circuit register functions" + default y if ARCH_U300 || ARCH_U8500 + help + Say yes here if you have the ABX500 Mixed Signal IC family + chips. This core driver expose register access functions. + Functionality specific drivers using these functions can + remain unchanged when IC changes. Binding of the functions to + actual register access is done by the IC core driver. + +config AB3100_CORE + bool "ST-Ericsson AB3100 Mixed Signal Circuit core functions" + depends on I2C=y && ABX500_CORE && GENERIC_HARDIRQS select MFD_CORE + default y if ARCH_U300 help - This supports for Realtek PCI-Express card reader including rts5209, - rts5229, rtl8411, etc. Realtek card reader supports access to many - types of memory cards, such as Memory Stick, Memory Stick Pro, - Secure Digital and MultiMediaCard. + Select this to enable the AB3100 Mixed Signal IC core + functionality. This connects to a AB3100 on the I2C bus + and expose a number of symbols needed for dependent devices + to read and write registers and subscribe to events from + this multi-functional IC. This is needed to use other features + of the AB3100 such as battery-backed RTC, charging control, + LEDs, vibrator, system power and temperature, power management + and ALSA sound. -config MFD_ASIC3 - bool "Support for Compaq ASIC3" - depends on GENERIC_HARDIRQS && GPIOLIB && ARM +config AB3100_OTP + tristate "ST-Ericsson AB3100 OTP functions" + depends on AB3100_CORE + default y if AB3100_CORE + help + Select this to enable the AB3100 Mixed Signal IC OTP (one-time + programmable memory) support. This exposes a sysfs file to read + out OTP values. + +config AB8500_CORE + bool "ST-Ericsson AB8500 Mixed Signal Power Management chip" + depends on GENERIC_HARDIRQS && ABX500_CORE && MFD_DB8500_PRCMU + select POWER_SUPPLY select MFD_CORE - ---help--- - This driver supports the ASIC3 multifunction chip found on many - PDAs (mainly iPAQ and HTC based ones) + select IRQ_DOMAIN + help + Select this option to enable access to AB8500 power management + chip. This connects to U8500 either on the SSP/SPI bus (deprecated + since hardware version v1.0) or the I2C bus via PRCMU. It also adds + the irq_chip parts for handling the Mixed Signal chip events. + This chip embeds various other multimedia funtionalities as well. -config MFD_DAVINCI_VOICECODEC - tristate +config AB8500_DEBUG + bool "Enable debug info via debugfs" + depends on AB8500_CORE && DEBUG_FS + default y if DEBUG_FS + help + Select this option if you want debug information using the debug + filesystem, debugfs. + +config AB8500_GPADC + bool "ST-Ericsson AB8500 GPADC driver" + depends on AB8500_CORE && REGULATOR_AB8500 + default y + help + AB8500 GPADC driver used to convert Acc and battery/ac/usb voltage + +config MFD_DB8500_PRCMU + bool "ST-Ericsson DB8500 Power Reset Control Management Unit" + depends on UX500_SOC_DB8500 select MFD_CORE + help + Select this option to enable support for the DB8500 Power Reset + and Control Management Unit. This is basically an autonomous + system controller running an XP70 microprocessor, which is accessed + through a register map. -config MFD_DM355EVM_MSP - bool "DaVinci DM355 EVM microcontroller" - depends on I2C=y && MACH_DAVINCI_DM355_EVM +config MFD_STMPE + bool "STMicroelectronics STMPE" + depends on (I2C=y || SPI_MASTER=y) && GENERIC_HARDIRQS + select MFD_CORE help - This driver supports the MSP430 microcontroller used on these - boards. MSP430 firmware manages resets and power sequencing, - inputs from buttons and the IR remote, LEDs, an RTC, and more. + Support for the STMPE family of I/O Expanders from + STMicroelectronics. -config MFD_TI_SSP - tristate "TI Sequencer Serial Port support" - depends on ARCH_DAVINCI_TNETV107X && GENERIC_HARDIRQS + Currently supported devices are: + + STMPE811: GPIO, Touchscreen + STMPE1601: GPIO, Keypad + STMPE1801: GPIO, Keypad + STMPE2401: GPIO, Keypad + STMPE2403: GPIO, Keypad + + This driver provides common support for accessing the device, + additional drivers must be enabled in order to use the functionality + of the device. Currently available sub drivers are: + + GPIO: stmpe-gpio + Keypad: stmpe-keypad + Touchscreen: stmpe-ts + +menu "STMicroelectronics STMPE Interface Drivers" +depends on MFD_STMPE + +config STMPE_I2C + bool "STMicroelectronics STMPE I2C Inteface" + depends on I2C=y + default y + help + This is used to enable I2C interface of STMPE + +config STMPE_SPI + bool "STMicroelectronics STMPE SPI Inteface" + depends on SPI_MASTER + help + This is used to enable SPI interface of STMPE +endmenu + +config MFD_STA2X11 + bool "STMicroelectronics STA2X11" + depends on STA2X11 && GENERIC_HARDIRQS select MFD_CORE - ---help--- - Say Y here if you want support for the Sequencer Serial Port - in a Texas Instruments TNETV107X SoC. + select REGMAP_MMIO - To compile this driver as a module, choose M here: the - module will be called ti-ssp. +config MFD_SYSCON + bool "System Controller Register R/W Based on Regmap" + select REGMAP_MMIO + help + Select this option to enable accessing system control registers + via regmap. + +config MFD_DAVINCI_VOICECODEC + tristate + select MFD_CORE config MFD_TI_AM335X_TSCADC tristate "TI ADC / Touch Screen chip support" @@ -116,60 +672,56 @@ config MFD_TI_AM335X_TSCADC To compile this driver as a module, choose M here: the module will be called ti_am335x_tscadc. -config HTC_EGPIO - bool "HTC EGPIO support" - depends on GENERIC_HARDIRQS && GPIOLIB && ARM +config MFD_DM355EVM_MSP + bool "TI DaVinci DM355 EVM microcontroller" + depends on I2C=y && MACH_DAVINCI_DM355_EVM help - This driver supports the CPLD egpio chip present on - several HTC phones. It provides basic support for input - pins, output pins, and irqs. + This driver supports the MSP430 microcontroller used on these + boards. MSP430 firmware manages resets and power sequencing, + inputs from buttons and the IR remote, LEDs, an RTC, and more. -config HTC_PASIC3 - tristate "HTC PASIC3 LED/DS1WM chip support" +config MFD_LP8788 + bool "TI LP8788 Power Management Unit Driver" + depends on I2C=y && GENERIC_HARDIRQS select MFD_CORE - depends on GENERIC_HARDIRQS - help - This core driver provides register access for the LED/DS1WM - chips labeled "AIC2" and "AIC3", found on HTC Blueangel and - HTC Magician devices, respectively. Actual functionality is - handled by the leds-pasic3 and ds1wm drivers. - -config HTC_I2CPLD - bool "HTC I2C PLD chip support" - depends on I2C=y && GPIOLIB + select REGMAP_I2C + select IRQ_DOMAIN help - If you say yes here you get support for the supposed CPLD - found on omap850 HTC devices like the HTC Wizard and HTC Herald. - This device provides input and output GPIOs through an I2C - interface to one or more sub-chips. + TI LP8788 PMU supports regulators, battery charger, RTC, + ADC, backlight driver and current sinks. -config UCB1400_CORE - tristate "Philips UCB1400 Core driver" - depends on AC97_BUS - depends on GPIOLIB +config MFD_OMAP_USB_HOST + bool "TI OMAP USBHS core and TLL driver" + depends on USB_EHCI_HCD_OMAP || USB_OHCI_HCD_OMAP3 + default y help - This enables support for the Philips UCB1400 core functions. - The UCB1400 is an AC97 audio codec. - - To compile this driver as a module, choose M here: the - module will be called ucb1400_core. + This is the core driver for the OAMP EHCI and OHCI drivers. + This MFD driver does the required setup functionalities for + OMAP USB Host drivers. -config MFD_LM3533 - tristate "LM3533 Lighting Power chip" - depends on I2C +config MFD_PALMAS + bool "TI Palmas series chips" select MFD_CORE select REGMAP_I2C - depends on GENERIC_HARDIRQS + select REGMAP_IRQ + depends on I2C=y && GENERIC_HARDIRQS help - Say yes here to enable support for National Semiconductor / TI - LM3533 Lighting Power chips. + If you say yes here you get support for the Palmas + series of PMIC chips from Texas Instruments. - This driver provides common support for accessing the device; - additional drivers must be enabled in order to use the LED, - backlight or ambient-light-sensor functionality of the device. +config MFD_TI_SSP + tristate "TI Sequencer Serial Port support" + depends on ARCH_DAVINCI_TNETV107X && GENERIC_HARDIRQS + select MFD_CORE + ---help--- + Say Y here if you want support for the Sequencer Serial Port + in a Texas Instruments TNETV107X SoC. + + To compile this driver as a module, choose M here: the + module will be called ti-ssp. config TPS6105X - tristate "TPS61050/61052 Boost Converters" + tristate "TI TPS61050/61052 Boost Converters" depends on I2C select REGULATOR select MFD_CORE @@ -182,7 +734,7 @@ config TPS6105X also contains a GPIO pin. config TPS65010 - tristate "TPS6501x Power Management chips" + tristate "TI TPS6501x Power Management chips" depends on I2C && GPIOLIB default y if MACH_OMAP_H2 || MACH_OMAP_H3 || MACH_OMAP_OSK help @@ -195,7 +747,7 @@ config TPS65010 will be called tps65010. config TPS6507X - tristate "TPS6507x Power Management / Touch Screen chips" + tristate "TI TPS6507x Power Management / Touch Screen chips" select MFD_CORE depends on I2C && GENERIC_HARDIRQS help @@ -206,8 +758,24 @@ config TPS6507X This driver can also be built as a module. If so, the module will be called tps6507x. +config TPS65911_COMPARATOR + tristate + +config MFD_TPS65090 + bool "TI TPS65090 Power Management chips" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + select REGMAP_I2C + select REGMAP_IRQ + help + If you say yes here you get support for the TPS65090 series of + Power Management chips. + This driver provides common support for accessing the device, + additional drivers must be enabled in order to use the + functionality of the device. + config MFD_TPS65217 - tristate "TPS65217 Power Management / White LED chips" + tristate "TI TPS65217 Power Management / White LED chips" depends on I2C && GENERIC_HARDIRQS select MFD_CORE select REGMAP_I2C @@ -222,7 +790,7 @@ config MFD_TPS65217 will be called tps65217. config MFD_TPS6586X - bool "TPS6586x Power Management chips" + bool "TI TPS6586x Power Management chips" depends on I2C=y && GENERIC_HARDIRQS select MFD_CORE select REGMAP_I2C @@ -237,7 +805,7 @@ config MFD_TPS6586X will be called tps6586x. config MFD_TPS65910 - bool "TPS65910 Power Management chip" + bool "TI TPS65910 Power Management chip" depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS select MFD_CORE select REGMAP_I2C @@ -248,11 +816,14 @@ config MFD_TPS65910 Power Management chips. config MFD_TPS65912 - bool + bool "TI TPS65912 Power Management chip" depends on GPIOLIB + help + If you say yes here you get support for the TPS65912 series of + PM chips. config MFD_TPS65912_I2C - bool "TPS65912 Power Management chip with I2C" + bool "TI TPS65912 Power Management chip with I2C" select MFD_CORE select MFD_TPS65912 depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS @@ -261,7 +832,7 @@ config MFD_TPS65912_I2C PM chips with I2C interface. config MFD_TPS65912_SPI - bool "TPS65912 Power Management chip with SPI" + bool "TI TPS65912 Power Management chip with SPI" select MFD_CORE select MFD_TPS65912 depends on SPI_MASTER && GPIOLIB && GENERIC_HARDIRQS @@ -283,18 +854,8 @@ config MFD_TPS80031 ADC, RTC, 2 PWM, System Voltage Regulator/Battery Charger with Power Path from USB, 32K clock generator. -config MENELAUS - bool "Texas Instruments TWL92330/Menelaus PM chip" - depends on I2C=y && ARCH_OMAP2 - help - If you say yes here you get support for the Texas Instruments - TWL92330/Menelaus Power Management chip. This include voltage - regulators, Dual slot memory card transceivers, real-time clock - and other features that are often used in portable devices like - cell phones and PDAs. - config TWL4030_CORE - bool "Texas Instruments TWL4030/TWL5030/TWL6030/TPS659x0 Support" + bool "TI TWL4030/TWL5030/TWL6030/TPS659x0 Support" depends on I2C=y && GENERIC_HARDIRQS select IRQ_DOMAIN select REGMAP_I2C @@ -310,7 +871,7 @@ config TWL4030_CORE versions) and many other features. config TWL4030_MADC - tristate "Texas Instruments TWL4030 MADC" + tristate "TI TWL4030 MADC" depends on TWL4030_CORE help This driver provides support for triton TWL4030-MADC. The @@ -320,7 +881,7 @@ config TWL4030_MADC named twl4030-madc config TWL4030_POWER - bool "Support power resources on TWL4030 family chips" + bool "TI TWL4030 power resources" depends on TWL4030_CORE && ARM help Say yes here if you want to use the power resources on the @@ -333,13 +894,13 @@ config TWL4030_POWER or reset when a sleep, wakeup or warm reset event occurs. config MFD_TWL4030_AUDIO - bool + bool "TI TWL4030 Audio" depends on TWL4030_CORE && GENERIC_HARDIRQS select MFD_CORE default n config TWL6040_CORE - bool "Support for TWL6040 audio codec" + bool "TI TWL6040 audio codec" depends on I2C=y && GENERIC_HARDIRQS select MFD_CORE select REGMAP_I2C @@ -352,48 +913,53 @@ config TWL6040_CORE additional drivers must be enabled in order to use the functionality of the device (audio, vibra). -config MFD_STMPE - bool "Support STMicroelectronics STMPE" - depends on (I2C=y || SPI_MASTER=y) && GENERIC_HARDIRQS - select MFD_CORE +config MENELAUS + bool "TI TWL92330/Menelaus PM chip" + depends on I2C=y && ARCH_OMAP2 help - Support for the STMPE family of I/O Expanders from - STMicroelectronics. - - Currently supported devices are: - - STMPE811: GPIO, Touchscreen - STMPE1601: GPIO, Keypad - STMPE2401: GPIO, Keypad - STMPE2403: GPIO, Keypad + If you say yes here you get support for the Texas Instruments + TWL92330/Menelaus Power Management chip. This include voltage + regulators, Dual slot memory card transceivers, real-time clock + and other features that are often used in portable devices like + cell phones and PDAs. - This driver provides common support for accessing the device, - additional drivers must be enabled in order to use the functionality - of the device. Currently available sub drivers are: +config MFD_WL1273_CORE + tristate "TI WL1273 FM radio" + depends on I2C && GENERIC_HARDIRQS + select MFD_CORE + default n + help + This is the core driver for the TI WL1273 FM radio. This MFD + driver connects the radio-wl1273 V4L2 module and the wl1273 + audio codec. - GPIO: stmpe-gpio - Keypad: stmpe-keypad - Touchscreen: stmpe-ts +config MFD_LM3533 + tristate "TI/National Semiconductor LM3533 Lighting Power chip" + depends on I2C + select MFD_CORE + select REGMAP_I2C + depends on GENERIC_HARDIRQS + help + Say yes here to enable support for National Semiconductor / TI + LM3533 Lighting Power chips. -menu "STMPE Interface Drivers" -depends on MFD_STMPE + This driver provides common support for accessing the device; + additional drivers must be enabled in order to use the LED, + backlight or ambient-light-sensor functionality of the device. -config STMPE_I2C - bool "STMPE I2C Inteface" - depends on I2C=y - default y - help - This is used to enable I2C interface of STMPE +config MFD_TIMBERDALE + tristate "Timberdale FPGA" + select MFD_CORE + depends on PCI && GPIOLIB + ---help--- + This is the core driver for the timberdale FPGA. This device is a + multifunction device which exposes numerous platform devices. -config STMPE_SPI - bool "STMPE SPI Inteface" - depends on SPI_MASTER - help - This is used to enable SPI interface of STMPE -endmenu + The timberdale FPGA can be found on the Intel Atom development board + for in-vehicle infontainment, called Russellville. config MFD_TC3589X - bool "Support Toshiba TC35892 and variants" + bool "Toshiba TC35892 and variants" depends on I2C=y && GENERIC_HARDIRQS select MFD_CORE help @@ -408,27 +974,15 @@ config MFD_TMIO default n config MFD_T7L66XB - bool "Support Toshiba T7L66XB" + bool "Toshiba T7L66XB" depends on ARM && HAVE_CLK && GENERIC_HARDIRQS select MFD_CORE select MFD_TMIO help Support for Toshiba Mobile IO Controller T7L66XB -config MFD_SMSC - bool "Support for the SMSC ECE1099 series chips" - depends on I2C=y && GENERIC_HARDIRQS - select MFD_CORE - select REGMAP_I2C - help - If you say yes here you get support for the - ece1099 chips from SMSC. - - To compile this driver as a module, choose M here: the - module will be called smsc. - config MFD_TC6387XB - bool "Support Toshiba TC6387XB" + bool "Toshiba TC6387XB" depends on ARM && HAVE_CLK select MFD_CORE select MFD_TMIO @@ -436,7 +990,7 @@ config MFD_TC6387XB Support for Toshiba Mobile IO Controller TC6387XB config MFD_TC6393XB - bool "Support Toshiba TC6393XB" + bool "Toshiba TC6393XB" depends on ARM && HAVE_CLK select GPIOLIB select MFD_CORE @@ -444,165 +998,14 @@ config MFD_TC6393XB help Support for Toshiba Mobile IO Controller TC6393XB -config PMIC_DA903X - bool "Dialog Semiconductor DA9030/DA9034 PMIC Support" - depends on I2C=y - help - Say yes here to support for Dialog Semiconductor DA9030 (a.k.a - ARAVA) and DA9034 (a.k.a MICCO), these are Power Management IC - usually found on PXA processors-based platforms. This includes - the I2C driver and the core APIs _only_, you have to select - individual components like LCD backlight, voltage regulators, - LEDs and battery-charger under the corresponding menus. - -config PMIC_DA9052 - bool - select MFD_CORE - -config MFD_DA9052_SPI - bool "Support Dialog Semiconductor DA9052/53 PMIC variants with SPI" - select REGMAP_SPI - select REGMAP_IRQ - select PMIC_DA9052 - depends on SPI_MASTER=y && GENERIC_HARDIRQS - help - Support for the Dialog Semiconductor DA9052 PMIC - when controlled using SPI. This driver provides common support - for accessing the device, additional drivers must be enabled in - order to use the functionality of the device. - -config MFD_DA9052_I2C - bool "Support Dialog Semiconductor DA9052/53 PMIC variants with I2C" - select REGMAP_I2C - select REGMAP_IRQ - select PMIC_DA9052 - depends on I2C=y && GENERIC_HARDIRQS - help - Support for the Dialog Semiconductor DA9052 PMIC - when controlled using I2C. This driver provides common support - for accessing the device, additional drivers must be enabled in - order to use the functionality of the device. - -config MFD_DA9055 - bool "Dialog Semiconductor DA9055 PMIC Support" - select REGMAP_I2C - select REGMAP_IRQ - select PMIC_DA9055 - select MFD_CORE - depends on I2C=y && GENERIC_HARDIRQS - help - Say yes here for support of Dialog Semiconductor DA9055. This is - a Power Management IC. This driver provides common support for - accessing the device as well as the I2C interface to the chip itself. - Additional drivers must be enabled in order to use the functionality - of the device. - - This driver can be built as a module. If built as a module it will be - called "da9055" - -config PMIC_ADP5520 - bool "Analog Devices ADP5520/01 MFD PMIC Core Support" - depends on I2C=y - help - Say yes here to add support for Analog Devices AD5520 and ADP5501, - Multifunction Power Management IC. This includes - the I2C driver and the core APIs _only_, you have to select - individual components like LCD backlight, LEDs, GPIOs and Kepad - under the corresponding menus. - -config MFD_LP8788 - bool "Texas Instruments LP8788 Power Management Unit Driver" - depends on I2C=y && GENERIC_HARDIRQS - select MFD_CORE - select REGMAP_I2C - select IRQ_DOMAIN - help - TI LP8788 PMU supports regulators, battery charger, RTC, - ADC, backlight driver and current sinks. - -config MFD_MAX77686 - bool "Maxim Semiconductor MAX77686 PMIC Support" - depends on I2C=y && GENERIC_HARDIRQS - select MFD_CORE - select REGMAP_I2C - select IRQ_DOMAIN - help - Say yes here to support for Maxim Semiconductor MAX77686. - This is a Power Management IC with RTC on chip. - This driver provides common support for accessing the device; - additional drivers must be enabled in order to use the functionality - of the device. - -config MFD_MAX77693 - bool "Maxim Semiconductor MAX77693 PMIC Support" - depends on I2C=y && GENERIC_HARDIRQS - select MFD_CORE - select REGMAP_I2C - help - Say yes here to support for Maxim Semiconductor MAX77693. - This is a companion Power Management IC with Flash, Haptic, Charger, - and MUIC(Micro USB Interface Controller) controls on chip. - This driver provides common support for accessing the device; - additional drivers must be enabled in order to use the functionality - of the device. - -config MFD_MAX8907 - tristate "Maxim Semiconductor MAX8907 PMIC Support" - select MFD_CORE - depends on I2C=y && GENERIC_HARDIRQS - select REGMAP_I2C - select REGMAP_IRQ - help - Say yes here to support for Maxim Semiconductor MAX8907. This is - a Power Management IC. This driver provides common support for - accessing the device; additional drivers must be enabled in order - to use the functionality of the device. - -config MFD_MAX8925 - bool "Maxim Semiconductor MAX8925 PMIC Support" - depends on I2C=y && GENERIC_HARDIRQS - select MFD_CORE - help - Say yes here to support for Maxim Semiconductor MAX8925. This is - a Power Management IC. This driver provides common support for - accessing the device, additional drivers must be enabled in order - to use the functionality of the device. - -config MFD_MAX8997 - bool "Maxim Semiconductor MAX8997/8966 PMIC Support" - depends on I2C=y && GENERIC_HARDIRQS - select MFD_CORE - select IRQ_DOMAIN - help - Say yes here to support for Maxim Semiconductor MAX8997/8966. - This is a Power Management IC with RTC, Flash, Fuel Gauge, Haptic, - MUIC controls on chip. - This driver provides common support for accessing the device; - additional drivers must be enabled in order to use the functionality - of the device. - -config MFD_MAX8998 - bool "Maxim Semiconductor MAX8998/National LP3974 PMIC Support" - depends on I2C=y && GENERIC_HARDIRQS - select MFD_CORE - help - Say yes here to support for Maxim Semiconductor MAX8998 and - National Semiconductor LP3974. This is a Power Management IC. - This driver provides common support for accessing the device, - additional drivers must be enabled in order to use the functionality - of the device. - -config MFD_SEC_CORE - bool "SAMSUNG Electronics PMIC Series Support" - depends on I2C=y && GENERIC_HARDIRQS +config MFD_VX855 + tristate "VIA VX855/VX875 integrated south bridge" + depends on PCI && GENERIC_HARDIRQS select MFD_CORE - select REGMAP_I2C - select REGMAP_IRQ help - Support for the Samsung Electronics MFD series. - This driver provides common support for accessing the device, - additional drivers must be enabled in order to use the functionality - of the device + Say yes here to enable support for various functions of the + VIA VX855/VX875 south bridge. You will need to enable the vx855_spi + and/or vx855_gpio drivers for this to do anything useful. config MFD_ARIZONA select REGMAP @@ -611,7 +1014,7 @@ config MFD_ARIZONA bool config MFD_ARIZONA_I2C - tristate "Support Wolfson Microelectronics Arizona platform with I2C" + tristate "Wolfson Microelectronics Arizona platform with I2C" select MFD_ARIZONA select MFD_CORE select REGMAP_I2C @@ -621,7 +1024,7 @@ config MFD_ARIZONA_I2C core functionality controlled via I2C. config MFD_ARIZONA_SPI - tristate "Support Wolfson Microelectronics Arizona platform with SPI" + tristate "Wolfson Microelectronics Arizona platform with SPI" select MFD_ARIZONA select MFD_CORE select REGMAP_SPI @@ -631,19 +1034,19 @@ config MFD_ARIZONA_SPI core functionality controlled via I2C. config MFD_WM5102 - bool "Support Wolfson Microelectronics WM5102" + bool "Wolfson Microelectronics WM5102" depends on MFD_ARIZONA help Support for Wolfson Microelectronics WM5102 low power audio SoC config MFD_WM5110 - bool "Support Wolfson Microelectronics WM5110" + bool "Wolfson Microelectronics WM5110" depends on MFD_ARIZONA help Support for Wolfson Microelectronics WM5110 low power audio SoC config MFD_WM8400 - bool "Support Wolfson Microelectronics WM8400" + bool "Wolfson Microelectronics WM8400" select MFD_CORE depends on I2C=y && GENERIC_HARDIRQS select REGMAP_I2C @@ -658,7 +1061,7 @@ config MFD_WM831X depends on GENERIC_HARDIRQS config MFD_WM831X_I2C - bool "Support Wolfson Microelectronics WM831x/2x PMICs with I2C" + bool "Wolfson Microelectronics WM831x/2x PMICs with I2C" select MFD_CORE select MFD_WM831X select REGMAP_I2C @@ -671,7 +1074,7 @@ config MFD_WM831X_I2C order to use the functionality of the device. config MFD_WM831X_SPI - bool "Support Wolfson Microelectronics WM831x/2x PMICs with SPI" + bool "Wolfson Microelectronics WM831x/2x PMICs with SPI" select MFD_CORE select MFD_WM831X select REGMAP_SPI @@ -687,56 +1090,8 @@ config MFD_WM8350 bool depends on GENERIC_HARDIRQS -config MFD_WM8350_CONFIG_MODE_0 - bool - depends on MFD_WM8350 - -config MFD_WM8350_CONFIG_MODE_1 - bool - depends on MFD_WM8350 - -config MFD_WM8350_CONFIG_MODE_2 - bool - depends on MFD_WM8350 - -config MFD_WM8350_CONFIG_MODE_3 - bool - depends on MFD_WM8350 - -config MFD_WM8351_CONFIG_MODE_0 - bool - depends on MFD_WM8350 - -config MFD_WM8351_CONFIG_MODE_1 - bool - depends on MFD_WM8350 - -config MFD_WM8351_CONFIG_MODE_2 - bool - depends on MFD_WM8350 - -config MFD_WM8351_CONFIG_MODE_3 - bool - depends on MFD_WM8350 - -config MFD_WM8352_CONFIG_MODE_0 - bool - depends on MFD_WM8350 - -config MFD_WM8352_CONFIG_MODE_1 - bool - depends on MFD_WM8350 - -config MFD_WM8352_CONFIG_MODE_2 - bool - depends on MFD_WM8350 - -config MFD_WM8352_CONFIG_MODE_3 - bool - depends on MFD_WM8350 - config MFD_WM8350_I2C - bool "Support Wolfson Microelectronics WM8350 with I2C" + bool "Wolfson Microelectronics WM8350 with I2C" select MFD_WM8350 depends on I2C=y && GENERIC_HARDIRQS help @@ -747,7 +1102,7 @@ config MFD_WM8350_I2C selected to enable support for the functionality of the chip. config MFD_WM8994 - bool "Support Wolfson Microelectronics WM8994" + bool "Wolfson Microelectronics WM8994" select MFD_CORE select REGMAP_I2C select REGMAP_IRQ @@ -760,365 +1115,6 @@ config MFD_WM8994 core support for the WM8994, in order to use the actual functionaltiy of the device other drivers must be enabled. -config MFD_PCF50633 - tristate "Support for NXP PCF50633" - depends on I2C - select REGMAP_I2C - help - Say yes here if you have NXP PCF50633 chip on your board. - This core driver provides register access and IRQ handling - facilities, and registers devices for the various functions - so that function-specific drivers can bind to them. - -config PCF50633_ADC - tristate "Support for NXP PCF50633 ADC" - depends on MFD_PCF50633 - help - Say yes here if you want to include support for ADC in the - NXP PCF50633 chip. - -config PCF50633_GPIO - tristate "Support for NXP PCF50633 GPIO" - depends on MFD_PCF50633 - help - Say yes here if you want to include support GPIO for pins on - the PCF50633 chip. - -config MFD_MC13783 - tristate - -config MFD_MC13XXX - tristate - depends on (SPI_MASTER || I2C) && GENERIC_HARDIRQS - select MFD_CORE - select MFD_MC13783 - help - Enable support for the Freescale MC13783 and MC13892 PMICs. - This driver provides common support for accessing the device, - additional drivers must be enabled in order to use the - functionality of the device. - -config MFD_MC13XXX_SPI - tristate "Freescale MC13783 and MC13892 SPI interface" - depends on SPI_MASTER && GENERIC_HARDIRQS - select REGMAP_SPI - select MFD_MC13XXX - help - Select this if your MC13xxx is connected via an SPI bus. - -config MFD_MC13XXX_I2C - tristate "Freescale MC13892 I2C interface" - depends on I2C && GENERIC_HARDIRQS - select REGMAP_I2C - select MFD_MC13XXX - help - Select this if your MC13xxx is connected via an I2C bus. - -config ABX500_CORE - bool "ST-Ericsson ABX500 Mixed Signal Circuit register functions" - default y if ARCH_U300 || ARCH_U8500 - help - Say yes here if you have the ABX500 Mixed Signal IC family - chips. This core driver expose register access functions. - Functionality specific drivers using these functions can - remain unchanged when IC changes. Binding of the functions to - actual register access is done by the IC core driver. - -config AB3100_CORE - bool "ST-Ericsson AB3100 Mixed Signal Circuit core functions" - depends on I2C=y && ABX500_CORE && GENERIC_HARDIRQS - select MFD_CORE - default y if ARCH_U300 - help - Select this to enable the AB3100 Mixed Signal IC core - functionality. This connects to a AB3100 on the I2C bus - and expose a number of symbols needed for dependent devices - to read and write registers and subscribe to events from - this multi-functional IC. This is needed to use other features - of the AB3100 such as battery-backed RTC, charging control, - LEDs, vibrator, system power and temperature, power management - and ALSA sound. - -config AB3100_OTP - tristate "ST-Ericsson AB3100 OTP functions" - depends on AB3100_CORE - default y if AB3100_CORE - help - Select this to enable the AB3100 Mixed Signal IC OTP (one-time - programmable memory) support. This exposes a sysfs file to read - out OTP values. - -config EZX_PCAP - bool "PCAP Support" - depends on GENERIC_HARDIRQS && SPI_MASTER - help - This enables the PCAP ASIC present on EZX Phones. This is - needed for MMC, TouchScreen, Sound, USB, etc.. - -config AB8500_CORE - bool "ST-Ericsson AB8500 Mixed Signal Power Management chip" - depends on GENERIC_HARDIRQS && ABX500_CORE && MFD_DB8500_PRCMU - select POWER_SUPPLY - select MFD_CORE - select IRQ_DOMAIN - help - Select this option to enable access to AB8500 power management - chip. This connects to U8500 either on the SSP/SPI bus (deprecated - since hardware version v1.0) or the I2C bus via PRCMU. It also adds - the irq_chip parts for handling the Mixed Signal chip events. - This chip embeds various other multimedia funtionalities as well. - -config AB8500_DEBUG - bool "Enable debug info via debugfs" - depends on AB8500_CORE && DEBUG_FS - default y if DEBUG_FS - help - Select this option if you want debug information using the debug - filesystem, debugfs. - -config AB8500_GPADC - bool "AB8500 GPADC driver" - depends on AB8500_CORE && REGULATOR_AB8500 - default y - help - AB8500 GPADC driver used to convert Acc and battery/ac/usb voltage - -config MFD_DB8500_PRCMU - bool "ST-Ericsson DB8500 Power Reset Control Management Unit" - depends on UX500_SOC_DB8500 - select MFD_CORE - help - Select this option to enable support for the DB8500 Power Reset - and Control Management Unit. This is basically an autonomous - system controller running an XP70 microprocessor, which is accessed - through a register map. - -config MFD_CS5535 - tristate "Support for CS5535 and CS5536 southbridge core functions" - select MFD_CORE - depends on PCI && X86 - ---help--- - This is the core driver for CS5535/CS5536 MFD functions. This is - necessary for using the board's GPIO and MFGPT functionality. - -config MFD_TIMBERDALE - tristate "Support for the Timberdale FPGA" - select MFD_CORE - depends on PCI && GPIOLIB - ---help--- - This is the core driver for the timberdale FPGA. This device is a - multifunction device which exposes numerous platform devices. - - The timberdale FPGA can be found on the Intel Atom development board - for in-vehicle infontainment, called Russellville. - -config LPC_SCH - tristate "Intel SCH LPC" - depends on PCI && GENERIC_HARDIRQS - select MFD_CORE - help - LPC bridge function of the Intel SCH provides support for - System Management Bus and General Purpose I/O. - -config LPC_ICH - tristate "Intel ICH LPC" - depends on PCI && GENERIC_HARDIRQS - select MFD_CORE - help - The LPC bridge function of the Intel ICH provides support for - many functional units. This driver provides needed support for - other drivers to control these functions, currently GPIO and - watchdog. - -config MFD_RDC321X - tristate "Support for RDC-R321x southbridge" - select MFD_CORE - depends on PCI && GENERIC_HARDIRQS - help - Say yes here if you want to have support for the RDC R-321x SoC - southbridge which provides access to GPIOs and Watchdog using the - southbridge PCI device configuration space. - -config MFD_JANZ_CMODIO - tristate "Support for Janz CMOD-IO PCI MODULbus Carrier Board" - select MFD_CORE - depends on PCI && GENERIC_HARDIRQS - help - This is the core driver for the Janz CMOD-IO PCI MODULbus - carrier board. This device is a PCI to MODULbus bridge which may - host many different types of MODULbus daughterboards, including - CAN and GPIO controllers. - -config MFD_JZ4740_ADC - bool "Support for the JZ4740 SoC ADC core" - select MFD_CORE - select GENERIC_IRQ_CHIP - depends on MACH_JZ4740 - help - Say yes here if you want support for the ADC unit in the JZ4740 SoC. - This driver is necessary for jz4740-battery and jz4740-hwmon driver. - -config MFD_VX855 - tristate "Support for VIA VX855/VX875 integrated south bridge" - depends on PCI && GENERIC_HARDIRQS - select MFD_CORE - help - Say yes here to enable support for various functions of the - VIA VX855/VX875 south bridge. You will need to enable the vx855_spi - and/or vx855_gpio drivers for this to do anything useful. - -config MFD_WL1273_CORE - tristate "Support for TI WL1273 FM radio." - depends on I2C && GENERIC_HARDIRQS - select MFD_CORE - default n - help - This is the core driver for the TI WL1273 FM radio. This MFD - driver connects the radio-wl1273 V4L2 module and the wl1273 - audio codec. - -config MFD_OMAP_USB_HOST - bool "Support OMAP USBHS core and TLL driver" - depends on USB_EHCI_HCD_OMAP || USB_OHCI_HCD_OMAP3 - default y - help - This is the core driver for the OAMP EHCI and OHCI drivers. - This MFD driver does the required setup functionalities for - OMAP USB Host drivers. - -config MFD_PM8XXX - tristate - -config MFD_PM8921_CORE - tristate "Qualcomm PM8921 PMIC chip" - depends on SSBI && BROKEN - select MFD_CORE - select MFD_PM8XXX - help - If you say yes to this option, support will be included for the - built-in PM8921 PMIC chip. - - This is required if your board has a PM8921 and uses its features, - such as: MPPs, GPIOs, regulators, interrupts, and PWM. - - Say M here if you want to include support for PM8921 chip as a module. - This will build a module called "pm8921-core". - -config MFD_PM8XXX_IRQ - bool "Support for Qualcomm PM8xxx IRQ features" - depends on MFD_PM8XXX - default y if MFD_PM8XXX - help - This is the IRQ driver for Qualcomm PM 8xxx PMIC chips. - - This is required to use certain other PM 8xxx features, such as GPIO - and MPP. - -config TPS65911_COMPARATOR - tristate - -config MFD_TPS65090 - bool "TPS65090 Power Management chips" - depends on I2C=y && GENERIC_HARDIRQS - select MFD_CORE - select REGMAP_I2C - select REGMAP_IRQ - help - If you say yes here you get support for the TPS65090 series of - Power Management chips. - This driver provides common support for accessing the device, - additional drivers must be enabled in order to use the - functionality of the device. - -config MFD_AAT2870_CORE - bool "Support for the AnalogicTech AAT2870" - select MFD_CORE - depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS - help - If you say yes here you get support for the AAT2870. - This driver provides common support for accessing the device, - additional drivers must be enabled in order to use the - functionality of the device. - -config MFD_INTEL_MSIC - bool "Support for Intel MSIC" - depends on INTEL_SCU_IPC - select MFD_CORE - help - Select this option to enable access to Intel MSIC (Avatele - Passage) chip. This chip embeds audio, battery, GPIO, etc. - devices used in Intel Medfield platforms. - -config MFD_RC5T583 - bool "Ricoh RC5T583 Power Management system device" - depends on I2C=y && GENERIC_HARDIRQS - select MFD_CORE - select REGMAP_I2C - help - Select this option to get support for the RICOH583 Power - Management system device. - This driver provides common support for accessing the device - through i2c interface. The device supports multiple sub-devices - like GPIO, interrupts, RTC, LDO and DCDC regulators, onkey. - Additional drivers must be enabled in order to use the - different functionality of the device. - -config MFD_STA2X11 - bool "STA2X11 multi function device support" - depends on STA2X11 && GENERIC_HARDIRQS - select MFD_CORE - select REGMAP_MMIO - -config MFD_SYSCON - bool "System Controller Register R/W Based on Regmap" - depends on OF - select REGMAP_MMIO - help - Select this option to enable accessing system control registers - via regmap. - -config MFD_PALMAS - bool "Support for the TI Palmas series chips" - select MFD_CORE - select REGMAP_I2C - select REGMAP_IRQ - depends on I2C=y && GENERIC_HARDIRQS - help - If you say yes here you get support for the Palmas - series of PMIC chips from Texas Instruments. - -config MFD_VIPERBOARD - tristate "Support for Nano River Technologies Viperboard" - select MFD_CORE - depends on USB && GENERIC_HARDIRQS - default n - help - Say yes here if you want support for Nano River Technologies - Viperboard. - There are mfd cell drivers available for i2c master, adc and - both gpios found on the board. The spi part does not yet - have a driver. - You need to select the mfd cell drivers separately. - The drivers do not support all features the board exposes. - -config MFD_RETU - tristate "Support for Retu multi-function device" - select MFD_CORE - depends on I2C && GENERIC_HARDIRQS - select REGMAP_IRQ - help - Retu is a multi-function device found on Nokia Internet Tablets - (770, N800 and N810). - -config MFD_AS3711 - bool "Support for AS3711" - select MFD_CORE - select REGMAP_I2C - select REGMAP_IRQ - depends on I2C=y && GENERIC_HARDIRQS - help - Support for the AS3711 PMIC from AMS - endmenu endif diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index b90409c2366..718e94a2a9a 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -8,8 +8,11 @@ obj-$(CONFIG_MFD_88PM800) += 88pm800.o 88pm80x.o obj-$(CONFIG_MFD_88PM805) += 88pm805.o 88pm80x.o obj-$(CONFIG_MFD_SM501) += sm501.o obj-$(CONFIG_MFD_ASIC3) += asic3.o tmio_core.o +obj-$(CONFIG_MFD_CROS_EC) += cros_ec.o +obj-$(CONFIG_MFD_CROS_EC_I2C) += cros_ec_i2c.o +obj-$(CONFIG_MFD_CROS_EC_SPI) += cros_ec_spi.o -rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o +rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o obj-$(CONFIG_MFD_RTSX_PCI) += rtsx_pci.o obj-$(CONFIG_HTC_EGPIO) += htc-egpio.o @@ -131,6 +134,10 @@ obj-$(CONFIG_MFD_JZ4740_ADC) += jz4740-adc.o obj-$(CONFIG_MFD_TPS6586X) += tps6586x.o obj-$(CONFIG_MFD_VX855) += vx855.o obj-$(CONFIG_MFD_WL1273_CORE) += wl1273-core.o + +si476x-core-y := si476x-cmd.o si476x-prop.o si476x-i2c.o +obj-$(CONFIG_MFD_SI476X_CORE) += si476x-core.o + obj-$(CONFIG_MFD_CS5535) += cs5535-mfd.o obj-$(CONFIG_MFD_OMAP_USB_HOST) += omap-usb-host.o omap-usb-tll.o obj-$(CONFIG_MFD_PM8921_CORE) += pm8921-core.o diff --git a/drivers/mfd/aat2870-core.c b/drivers/mfd/aat2870-core.c index f1beb4971f8..dfdb0a2b683 100644 --- a/drivers/mfd/aat2870-core.c +++ b/drivers/mfd/aat2870-core.c @@ -367,12 +367,12 @@ static int aat2870_i2c_probe(struct i2c_client *client, int i, j; int ret = 0; - aat2870 = kzalloc(sizeof(struct aat2870_data), GFP_KERNEL); + aat2870 = devm_kzalloc(&client->dev, sizeof(struct aat2870_data), + GFP_KERNEL); if (!aat2870) { dev_err(&client->dev, "Failed to allocate memory for aat2870\n"); - ret = -ENOMEM; - goto out; + return -ENOMEM; } aat2870->dev = &client->dev; @@ -400,12 +400,12 @@ static int aat2870_i2c_probe(struct i2c_client *client, aat2870->init(aat2870); if (aat2870->en_pin >= 0) { - ret = gpio_request_one(aat2870->en_pin, GPIOF_OUT_INIT_HIGH, - "aat2870-en"); + ret = devm_gpio_request_one(&client->dev, aat2870->en_pin, + GPIOF_OUT_INIT_HIGH, "aat2870-en"); if (ret < 0) { dev_err(&client->dev, "Failed to request GPIO %d\n", aat2870->en_pin); - goto out_kfree; + return ret; } } @@ -436,11 +436,6 @@ static int aat2870_i2c_probe(struct i2c_client *client, out_disable: aat2870_disable(aat2870); - if (aat2870->en_pin >= 0) - gpio_free(aat2870->en_pin); -out_kfree: - kfree(aat2870); -out: return ret; } @@ -452,11 +447,8 @@ static int aat2870_i2c_remove(struct i2c_client *client) mfd_remove_devices(aat2870->dev); aat2870_disable(aat2870); - if (aat2870->en_pin >= 0) - gpio_free(aat2870->en_pin); if (aat2870->uninit) aat2870->uninit(aat2870); - kfree(aat2870); return 0; } diff --git a/drivers/mfd/ab3100-otp.c b/drivers/mfd/ab3100-otp.c index 8440010eb2b..d7ce016029f 100644 --- a/drivers/mfd/ab3100-otp.c +++ b/drivers/mfd/ab3100-otp.c @@ -248,19 +248,7 @@ static struct platform_driver ab3100_otp_driver = { .remove = __exit_p(ab3100_otp_remove), }; -static int __init ab3100_otp_init(void) -{ - return platform_driver_probe(&ab3100_otp_driver, - ab3100_otp_probe); -} - -static void __exit ab3100_otp_exit(void) -{ - platform_driver_unregister(&ab3100_otp_driver); -} - -module_init(ab3100_otp_init); -module_exit(ab3100_otp_exit); +module_platform_driver_probe(ab3100_otp_driver, ab3100_otp_probe); MODULE_AUTHOR("Linus Walleij <linus.walleij@stericsson.com>"); MODULE_DESCRIPTION("AB3100 OTP Readout Driver"); diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c index f276352cc9e..8e8a016effe 100644 --- a/drivers/mfd/ab8500-core.c +++ b/drivers/mfd/ab8500-core.c @@ -458,22 +458,23 @@ static void update_latch_offset(u8 *offset, int i) static int ab8500_handle_hierarchical_line(struct ab8500 *ab8500, int latch_offset, u8 latch_val) { - int int_bit = __ffs(latch_val); - int line, i; + int int_bit, line, i; - do { - int_bit = __ffs(latch_val); + for (i = 0; i < ab8500->mask_size; i++) + if (ab8500->irq_reg_offset[i] == latch_offset) + break; - for (i = 0; i < ab8500->mask_size; i++) - if (ab8500->irq_reg_offset[i] == latch_offset) - break; + if (i >= ab8500->mask_size) { + dev_err(ab8500->dev, "Register offset 0x%2x not declared\n", + latch_offset); + return -ENXIO; + } - if (i >= ab8500->mask_size) { - dev_err(ab8500->dev, "Register offset 0x%2x not declared\n", - latch_offset); - return -ENXIO; - } + /* ignore masked out interrupts */ + latch_val &= ~ab8500->mask[i]; + while (latch_val) { + int_bit = __ffs(latch_val); line = (i << 3) + int_bit; latch_val &= ~(1 << int_bit); @@ -491,7 +492,7 @@ static int ab8500_handle_hierarchical_line(struct ab8500 *ab8500, line += 1; handle_nested_irq(ab8500->irq_base + line); - } while (latch_val); + } return 0; } @@ -1107,6 +1108,7 @@ static struct mfd_cell ab8500_devs[] = { }, { .name = "ab8500-usb", + .of_compatible = "stericsson,ab8500-usb", .num_resources = ARRAY_SIZE(ab8500_usb_resources), .resources = ab8500_usb_resources, }, diff --git a/drivers/mfd/ab8500-gpadc.c b/drivers/mfd/ab8500-gpadc.c index 65f72284185..5e65b28a5d0 100644 --- a/drivers/mfd/ab8500-gpadc.c +++ b/drivers/mfd/ab8500-gpadc.c @@ -332,7 +332,7 @@ if (ad_value < 0) { return voltage; } -EXPORT_SYMBOL(ab8500_gpadc_convert); +EXPORT_SYMBOL(ab8500_gpadc_sw_hw_convert); /** * ab8500_gpadc_read_raw() - gpadc read diff --git a/drivers/mfd/ab8500-sysctrl.c b/drivers/mfd/ab8500-sysctrl.c index 272479cdb10..fbca1ced49f 100644 --- a/drivers/mfd/ab8500-sysctrl.c +++ b/drivers/mfd/ab8500-sysctrl.c @@ -242,7 +242,7 @@ static int __init ab8500_sysctrl_init(void) { return platform_driver_register(&ab8500_sysctrl_driver); } -subsys_initcall(ab8500_sysctrl_init); +arch_initcall(ab8500_sysctrl_init); MODULE_AUTHOR("Mattias Nilsson <mattias.i.nilsson@stericsson.com"); MODULE_DESCRIPTION("AB8500 system control driver"); diff --git a/drivers/mfd/adp5520.c b/drivers/mfd/adp5520.c index 210dd038bb5..0d2eba02343 100644 --- a/drivers/mfd/adp5520.c +++ b/drivers/mfd/adp5520.c @@ -36,6 +36,7 @@ struct adp5520_chip { struct blocking_notifier_head notifier_list; int irq; unsigned long id; + uint8_t mode; }; static int __adp5520_read(struct i2c_client *client, @@ -326,7 +327,10 @@ static int adp5520_suspend(struct device *dev) struct i2c_client *client = to_i2c_client(dev); struct adp5520_chip *chip = dev_get_drvdata(&client->dev); - adp5520_clr_bits(chip->dev, ADP5520_MODE_STATUS, ADP5520_nSTNBY); + adp5520_read(chip->dev, ADP5520_MODE_STATUS, &chip->mode); + /* All other bits are W1C */ + chip->mode &= ADP5520_BL_EN | ADP5520_DIM_EN | ADP5520_nSTNBY; + adp5520_write(chip->dev, ADP5520_MODE_STATUS, 0); return 0; } @@ -335,7 +339,7 @@ static int adp5520_resume(struct device *dev) struct i2c_client *client = to_i2c_client(dev); struct adp5520_chip *chip = dev_get_drvdata(&client->dev); - adp5520_set_bits(chip->dev, ADP5520_MODE_STATUS, ADP5520_nSTNBY); + adp5520_write(chip->dev, ADP5520_MODE_STATUS, chip->mode); return 0; } #endif @@ -360,17 +364,7 @@ static struct i2c_driver adp5520_driver = { .id_table = adp5520_id, }; -static int __init adp5520_init(void) -{ - return i2c_add_driver(&adp5520_driver); -} -module_init(adp5520_init); - -static void __exit adp5520_exit(void) -{ - i2c_del_driver(&adp5520_driver); -} -module_exit(adp5520_exit); +module_i2c_driver(adp5520_driver); MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>"); MODULE_DESCRIPTION("ADP5520(01) PMIC-MFD Driver"); diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c index b562c7bf8a4..6ab03043fd6 100644 --- a/drivers/mfd/arizona-core.c +++ b/drivers/mfd/arizona-core.c @@ -39,11 +39,21 @@ int arizona_clk32k_enable(struct arizona *arizona) arizona->clk32k_ref++; - if (arizona->clk32k_ref == 1) + if (arizona->clk32k_ref == 1) { + switch (arizona->pdata.clk32k_src) { + case ARIZONA_32KZ_MCLK1: + ret = pm_runtime_get_sync(arizona->dev); + if (ret != 0) + goto out; + break; + } + ret = regmap_update_bits(arizona->regmap, ARIZONA_CLOCK_32K_1, ARIZONA_CLK_32K_ENA, ARIZONA_CLK_32K_ENA); + } +out: if (ret != 0) arizona->clk32k_ref--; @@ -63,10 +73,17 @@ int arizona_clk32k_disable(struct arizona *arizona) arizona->clk32k_ref--; - if (arizona->clk32k_ref == 0) + if (arizona->clk32k_ref == 0) { regmap_update_bits(arizona->regmap, ARIZONA_CLOCK_32K_1, ARIZONA_CLK_32K_ENA, 0); + switch (arizona->pdata.clk32k_src) { + case ARIZONA_32KZ_MCLK1: + pm_runtime_put_sync(arizona->dev); + break; + } + } + mutex_unlock(&arizona->clk_lock); return ret; @@ -179,42 +196,134 @@ static irqreturn_t arizona_overclocked(int irq, void *data) return IRQ_HANDLED; } -static int arizona_wait_for_boot(struct arizona *arizona) +static int arizona_poll_reg(struct arizona *arizona, + int timeout, unsigned int reg, + unsigned int mask, unsigned int target) { - unsigned int reg; + unsigned int val = 0; int ret, i; + for (i = 0; i < timeout; i++) { + ret = regmap_read(arizona->regmap, reg, &val); + if (ret != 0) { + dev_err(arizona->dev, "Failed to read reg %u: %d\n", + reg, ret); + continue; + } + + if ((val & mask) == target) + return 0; + + msleep(1); + } + + dev_err(arizona->dev, "Polling reg %u timed out: %x\n", reg, val); + return -ETIMEDOUT; +} + +static int arizona_wait_for_boot(struct arizona *arizona) +{ + int ret; + /* * We can't use an interrupt as we need to runtime resume to do so, * we won't race with the interrupt handler as it'll be blocked on * runtime resume. */ - for (i = 0; i < 5; i++) { - msleep(1); + ret = arizona_poll_reg(arizona, 5, ARIZONA_INTERRUPT_RAW_STATUS_5, + ARIZONA_BOOT_DONE_STS, ARIZONA_BOOT_DONE_STS); - ret = regmap_read(arizona->regmap, - ARIZONA_INTERRUPT_RAW_STATUS_5, ®); - if (ret != 0) { - dev_err(arizona->dev, "Failed to read boot state: %d\n", - ret); - continue; - } + if (!ret) + regmap_write(arizona->regmap, ARIZONA_INTERRUPT_STATUS_5, + ARIZONA_BOOT_DONE_STS); - if (reg & ARIZONA_BOOT_DONE_STS) - break; + pm_runtime_mark_last_busy(arizona->dev); + + return ret; +} + +static int arizona_apply_hardware_patch(struct arizona* arizona) +{ + unsigned int fll, sysclk; + int ret, err; + + regcache_cache_bypass(arizona->regmap, true); + + /* Cache existing FLL and SYSCLK settings */ + ret = regmap_read(arizona->regmap, ARIZONA_FLL1_CONTROL_1, &fll); + if (ret != 0) { + dev_err(arizona->dev, "Failed to cache FLL settings: %d\n", + ret); + return ret; + } + ret = regmap_read(arizona->regmap, ARIZONA_SYSTEM_CLOCK_1, &sysclk); + if (ret != 0) { + dev_err(arizona->dev, "Failed to cache SYSCLK settings: %d\n", + ret); + return ret; } - if (reg & ARIZONA_BOOT_DONE_STS) { - regmap_write(arizona->regmap, ARIZONA_INTERRUPT_STATUS_5, - ARIZONA_BOOT_DONE_STS); - } else { - dev_err(arizona->dev, "Device boot timed out: %x\n", reg); - return -ETIMEDOUT; + /* Start up SYSCLK using the FLL in free running mode */ + ret = regmap_write(arizona->regmap, ARIZONA_FLL1_CONTROL_1, + ARIZONA_FLL1_ENA | ARIZONA_FLL1_FREERUN); + if (ret != 0) { + dev_err(arizona->dev, + "Failed to start FLL in freerunning mode: %d\n", + ret); + return ret; + } + ret = arizona_poll_reg(arizona, 25, ARIZONA_INTERRUPT_RAW_STATUS_5, + ARIZONA_FLL1_CLOCK_OK_STS, + ARIZONA_FLL1_CLOCK_OK_STS); + if (ret != 0) { + ret = -ETIMEDOUT; + goto err_fll; } - pm_runtime_mark_last_busy(arizona->dev); + ret = regmap_write(arizona->regmap, ARIZONA_SYSTEM_CLOCK_1, 0x0144); + if (ret != 0) { + dev_err(arizona->dev, "Failed to start SYSCLK: %d\n", ret); + goto err_fll; + } - return 0; + /* Start the write sequencer and wait for it to finish */ + ret = regmap_write(arizona->regmap, ARIZONA_WRITE_SEQUENCER_CTRL_0, + ARIZONA_WSEQ_ENA | ARIZONA_WSEQ_START | 160); + if (ret != 0) { + dev_err(arizona->dev, "Failed to start write sequencer: %d\n", + ret); + goto err_sysclk; + } + ret = arizona_poll_reg(arizona, 5, ARIZONA_WRITE_SEQUENCER_CTRL_1, + ARIZONA_WSEQ_BUSY, 0); + if (ret != 0) { + regmap_write(arizona->regmap, ARIZONA_WRITE_SEQUENCER_CTRL_0, + ARIZONA_WSEQ_ABORT); + ret = -ETIMEDOUT; + } + +err_sysclk: + err = regmap_write(arizona->regmap, ARIZONA_SYSTEM_CLOCK_1, sysclk); + if (err != 0) { + dev_err(arizona->dev, + "Failed to re-apply old SYSCLK settings: %d\n", + err); + } + +err_fll: + err = regmap_write(arizona->regmap, ARIZONA_FLL1_CONTROL_1, fll); + if (err != 0) { + dev_err(arizona->dev, + "Failed to re-apply old FLL settings: %d\n", + err); + } + + regcache_cache_bypass(arizona->regmap, false); + + if (ret != 0) + return ret; + else + return err; } #ifdef CONFIG_PM_RUNTIME @@ -233,20 +342,44 @@ static int arizona_runtime_resume(struct device *dev) regcache_cache_only(arizona->regmap, false); - ret = arizona_wait_for_boot(arizona); - if (ret != 0) { - regulator_disable(arizona->dcvdd); - return ret; + switch (arizona->type) { + case WM5102: + ret = wm5102_patch(arizona); + if (ret != 0) { + dev_err(arizona->dev, "Failed to apply patch: %d\n", + ret); + goto err; + } + + ret = arizona_apply_hardware_patch(arizona); + if (ret != 0) { + dev_err(arizona->dev, + "Failed to apply hardware patch: %d\n", + ret); + goto err; + } + break; + default: + ret = arizona_wait_for_boot(arizona); + if (ret != 0) { + goto err; + } + + break; } ret = regcache_sync(arizona->regmap); if (ret != 0) { dev_err(arizona->dev, "Failed to restore register cache\n"); - regulator_disable(arizona->dcvdd); - return ret; + goto err; } return 0; + +err: + regcache_cache_only(arizona->regmap, true); + regulator_disable(arizona->dcvdd); + return ret; } static int arizona_runtime_suspend(struct device *dev) @@ -371,6 +504,17 @@ int arizona_dev_init(struct arizona *arizona) goto err_early; } + if (arizona->pdata.reset) { + /* Start out with /RESET low to put the chip into reset */ + ret = gpio_request_one(arizona->pdata.reset, + GPIOF_DIR_OUT | GPIOF_INIT_LOW, + "arizona /RESET"); + if (ret != 0) { + dev_err(dev, "Failed to request /RESET: %d\n", ret); + goto err_early; + } + } + ret = regulator_bulk_enable(arizona->num_core_supplies, arizona->core_supplies); if (ret != 0) { @@ -386,16 +530,8 @@ int arizona_dev_init(struct arizona *arizona) } if (arizona->pdata.reset) { - /* Start out with /RESET low to put the chip into reset */ - ret = gpio_request_one(arizona->pdata.reset, - GPIOF_DIR_OUT | GPIOF_INIT_LOW, - "arizona /RESET"); - if (ret != 0) { - dev_err(dev, "Failed to request /RESET: %d\n", ret); - goto err_dcvdd; - } - gpio_set_value_cansleep(arizona->pdata.reset, 1); + msleep(1); } regcache_cache_only(arizona->regmap, false); @@ -424,6 +560,7 @@ int arizona_dev_init(struct arizona *arizona) arizona->type = WM5102; } apply_patch = wm5102_patch; + arizona->rev &= 0x7; break; #endif #ifdef CONFIG_MFD_WM5110 @@ -454,6 +591,8 @@ int arizona_dev_init(struct arizona *arizona) goto err_reset; } + msleep(1); + ret = regcache_sync(arizona->regmap); if (ret != 0) { dev_err(dev, "Failed to sync device: %d\n", ret); @@ -461,10 +600,24 @@ int arizona_dev_init(struct arizona *arizona) } } - ret = arizona_wait_for_boot(arizona); - if (ret != 0) { - dev_err(arizona->dev, "Device failed initial boot: %d\n", ret); - goto err_reset; + switch (arizona->type) { + case WM5102: + ret = regmap_read(arizona->regmap, 0x19, &val); + if (ret != 0) + dev_err(dev, + "Failed to check write sequencer state: %d\n", + ret); + else if (val & 0x01) + break; + /* Fall through */ + default: + ret = arizona_wait_for_boot(arizona); + if (ret != 0) { + dev_err(arizona->dev, + "Device failed initial boot: %d\n", ret); + goto err_reset; + } + break; } if (apply_patch) { @@ -474,6 +627,20 @@ int arizona_dev_init(struct arizona *arizona) ret); goto err_reset; } + + switch (arizona->type) { + case WM5102: + ret = arizona_apply_hardware_patch(arizona); + if (ret != 0) { + dev_err(arizona->dev, + "Failed to apply hardware patch: %d\n", + ret); + goto err_reset; + } + break; + default: + break; + } } for (i = 0; i < ARRAY_SIZE(arizona->pdata.gpio_defaults); i++) { @@ -498,6 +665,7 @@ int arizona_dev_init(struct arizona *arizona) regmap_update_bits(arizona->regmap, ARIZONA_CLOCK_32K_1, ARIZONA_CLK_32K_SRC_MASK, arizona->pdata.clk32k_src - 1); + arizona_clk32k_enable(arizona); break; case ARIZONA_32KZ_NONE: regmap_update_bits(arizona->regmap, ARIZONA_CLOCK_32K_1, @@ -511,10 +679,16 @@ int arizona_dev_init(struct arizona *arizona) } for (i = 0; i < ARIZONA_MAX_MICBIAS; i++) { - if (!arizona->pdata.micbias[i].mV) + if (!arizona->pdata.micbias[i].mV && + !arizona->pdata.micbias[i].bypass) continue; + /* Apply default for bypass mode */ + if (!arizona->pdata.micbias[i].mV) + arizona->pdata.micbias[i].mV = 2800; + val = (arizona->pdata.micbias[i].mV - 1500) / 100; + val <<= ARIZONA_MICB1_LVL_SHIFT; if (arizona->pdata.micbias[i].ext_cap) @@ -526,10 +700,14 @@ int arizona_dev_init(struct arizona *arizona) if (arizona->pdata.micbias[i].fast_start) val |= ARIZONA_MICB1_RATE; + if (arizona->pdata.micbias[i].bypass) + val |= ARIZONA_MICB1_BYPASS; + regmap_update_bits(arizona->regmap, ARIZONA_MIC_BIAS_CTRL_1 + i, ARIZONA_MICB1_LVL_MASK | ARIZONA_MICB1_DISCH | + ARIZONA_MICB1_BYPASS | ARIZONA_MICB1_RATE, val); } @@ -610,10 +788,9 @@ err_irq: arizona_irq_exit(arizona); err_reset: if (arizona->pdata.reset) { - gpio_set_value_cansleep(arizona->pdata.reset, 1); + gpio_set_value_cansleep(arizona->pdata.reset, 0); gpio_free(arizona->pdata.reset); } -err_dcvdd: regulator_disable(arizona->dcvdd); err_enable: regulator_bulk_disable(arizona->num_core_supplies, diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c index 2bec5f0db3e..64cd9b6dac9 100644 --- a/drivers/mfd/arizona-irq.c +++ b/drivers/mfd/arizona-irq.c @@ -94,6 +94,7 @@ static irqreturn_t arizona_ctrlif_err(int irq, void *data) static irqreturn_t arizona_irq_thread(int irq, void *data) { struct arizona *arizona = data; + bool poll; unsigned int val; int ret; @@ -103,20 +104,39 @@ static irqreturn_t arizona_irq_thread(int irq, void *data) return IRQ_NONE; } - /* Always handle the AoD domain */ - handle_nested_irq(irq_find_mapping(arizona->virq, 0)); + do { + poll = false; + + /* Always handle the AoD domain */ + handle_nested_irq(irq_find_mapping(arizona->virq, 0)); + + /* + * Check if one of the main interrupts is asserted and only + * check that domain if it is. + */ + ret = regmap_read(arizona->regmap, ARIZONA_IRQ_PIN_STATUS, + &val); + if (ret == 0 && val & ARIZONA_IRQ1_STS) { + handle_nested_irq(irq_find_mapping(arizona->virq, 1)); + } else if (ret != 0) { + dev_err(arizona->dev, + "Failed to read main IRQ status: %d\n", ret); + } - /* - * Check if one of the main interrupts is asserted and only - * check that domain if it is. - */ - ret = regmap_read(arizona->regmap, ARIZONA_IRQ_PIN_STATUS, &val); - if (ret == 0 && val & ARIZONA_IRQ1_STS) { - handle_nested_irq(irq_find_mapping(arizona->virq, 1)); - } else if (ret != 0) { - dev_err(arizona->dev, "Failed to read main IRQ status: %d\n", - ret); - } + /* + * Poll the IRQ pin status to see if we're really done + * if the interrupt controller can't do it for us. + */ + if (!arizona->pdata.irq_gpio) { + break; + } else if (arizona->pdata.irq_flags & IRQF_TRIGGER_RISING && + gpio_get_value_cansleep(arizona->pdata.irq_gpio)) { + poll = true; + } else if (arizona->pdata.irq_flags & IRQF_TRIGGER_FALLING && + !gpio_get_value_cansleep(arizona->pdata.irq_gpio)) { + poll = true; + } + } while (poll); pm_runtime_mark_last_busy(arizona->dev); pm_runtime_put_autosuspend(arizona->dev); @@ -169,6 +189,7 @@ int arizona_irq_init(struct arizona *arizona) int ret, i; const struct regmap_irq_chip *aod, *irq; bool ctrlif_error = true; + struct irq_data *irq_data; switch (arizona->type) { #ifdef CONFIG_MFD_WM5102 @@ -192,7 +213,36 @@ int arizona_irq_init(struct arizona *arizona) return -EINVAL; } - if (arizona->pdata.irq_active_high) { + /* Disable all wake sources by default */ + regmap_write(arizona->regmap, ARIZONA_WAKE_CONTROL, 0); + + /* Read the flags from the interrupt controller if not specified */ + if (!arizona->pdata.irq_flags) { + irq_data = irq_get_irq_data(arizona->irq); + if (!irq_data) { + dev_err(arizona->dev, "Invalid IRQ: %d\n", + arizona->irq); + return -EINVAL; + } + + arizona->pdata.irq_flags = irqd_get_trigger_type(irq_data); + switch (arizona->pdata.irq_flags) { + case IRQF_TRIGGER_LOW: + case IRQF_TRIGGER_HIGH: + case IRQF_TRIGGER_RISING: + case IRQF_TRIGGER_FALLING: + break; + + case IRQ_TYPE_NONE: + default: + /* Device default */ + arizona->pdata.irq_flags = IRQF_TRIGGER_LOW; + break; + } + } + + if (arizona->pdata.irq_flags & (IRQF_TRIGGER_HIGH | + IRQF_TRIGGER_RISING)) { ret = regmap_update_bits(arizona->regmap, ARIZONA_IRQ_CTRL_1, ARIZONA_IRQ_POL, 0); if (ret != 0) { @@ -200,12 +250,10 @@ int arizona_irq_init(struct arizona *arizona) ret); goto err; } - - flags |= IRQF_TRIGGER_HIGH; - } else { - flags |= IRQF_TRIGGER_LOW; } + flags |= arizona->pdata.irq_flags; + /* Allocate a virtual IRQ domain to distribute to the regmap domains */ arizona->virq = irq_domain_add_linear(NULL, 2, &arizona_domain_ops, arizona); @@ -257,11 +305,31 @@ int arizona_irq_init(struct arizona *arizona) } } + /* Used to emulate edge trigger and to work around broken pinmux */ + if (arizona->pdata.irq_gpio) { + if (gpio_to_irq(arizona->pdata.irq_gpio) != arizona->irq) { + dev_warn(arizona->dev, "IRQ %d is not GPIO %d (%d)\n", + arizona->irq, arizona->pdata.irq_gpio, + gpio_to_irq(arizona->pdata.irq_gpio)); + arizona->irq = gpio_to_irq(arizona->pdata.irq_gpio); + } + + ret = devm_gpio_request_one(arizona->dev, + arizona->pdata.irq_gpio, + GPIOF_IN, "arizona IRQ"); + if (ret != 0) { + dev_err(arizona->dev, + "Failed to request IRQ GPIO %d:: %d\n", + arizona->pdata.irq_gpio, ret); + arizona->pdata.irq_gpio = 0; + } + } + ret = request_threaded_irq(arizona->irq, NULL, arizona_irq_thread, flags, "arizona", arizona); if (ret != 0) { - dev_err(arizona->dev, "Failed to request IRQ %d: %d\n", + dev_err(arizona->dev, "Failed to request primary IRQ %d: %d\n", arizona->irq, ret); goto err_main_irq; } diff --git a/drivers/mfd/arizona-spi.c b/drivers/mfd/arizona-spi.c index 1b9fdd698b0..b57e642d2b4 100644 --- a/drivers/mfd/arizona-spi.c +++ b/drivers/mfd/arizona-spi.c @@ -67,7 +67,7 @@ static int arizona_spi_probe(struct spi_device *spi) static int arizona_spi_remove(struct spi_device *spi) { - struct arizona *arizona = dev_get_drvdata(&spi->dev); + struct arizona *arizona = spi_get_drvdata(spi); arizona_dev_exit(arizona); return 0; } diff --git a/drivers/mfd/as3711.c b/drivers/mfd/as3711.c index e994c969112..01e41416270 100644 --- a/drivers/mfd/as3711.c +++ b/drivers/mfd/as3711.c @@ -112,16 +112,34 @@ static const struct regmap_config as3711_regmap_config = { .cache_type = REGCACHE_RBTREE, }; +#ifdef CONFIG_OF +static struct of_device_id as3711_of_match[] = { + {.compatible = "ams,as3711",}, + {} +}; +MODULE_DEVICE_TABLE(of, as3711_of_match); +#endif + static int as3711_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct as3711 *as3711; - struct as3711_platform_data *pdata = client->dev.platform_data; + struct as3711_platform_data *pdata; unsigned int id1, id2; int ret; - if (!pdata) - dev_dbg(&client->dev, "Platform data not found\n"); + if (!client->dev.of_node) { + pdata = client->dev.platform_data; + if (!pdata) + dev_dbg(&client->dev, "Platform data not found\n"); + } else { + pdata = devm_kzalloc(&client->dev, + sizeof(*pdata), GFP_KERNEL); + if (!pdata) { + dev_err(&client->dev, "Failed to allocate pdata\n"); + return -ENOMEM; + } + } as3711 = devm_kzalloc(&client->dev, sizeof(struct as3711), GFP_KERNEL); if (!as3711) { @@ -193,7 +211,8 @@ static struct i2c_driver as3711_i2c_driver = { .driver = { .name = "as3711", .owner = THIS_MODULE, - }, + .of_match_table = of_match_ptr(as3711_of_match), + }, .probe = as3711_i2c_probe, .remove = as3711_i2c_remove, .id_table = as3711_i2c_id, diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c new file mode 100644 index 00000000000..10cd14e35eb --- /dev/null +++ b/drivers/mfd/cros_ec.c @@ -0,0 +1,196 @@ +/* + * ChromeOS EC multi-function device + * + * Copyright (C) 2012 Google, Inc + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * The ChromeOS EC multi function device is used to mux all the requests + * to the EC device for its multiple features: keyboard controller, + * battery charging and regulator control, firmware update. + */ + +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/mfd/core.h> +#include <linux/mfd/cros_ec.h> +#include <linux/mfd/cros_ec_commands.h> + +int cros_ec_prepare_tx(struct cros_ec_device *ec_dev, + struct cros_ec_msg *msg) +{ + uint8_t *out; + int csum, i; + + BUG_ON(msg->out_len > EC_HOST_PARAM_SIZE); + out = ec_dev->dout; + out[0] = EC_CMD_VERSION0 + msg->version; + out[1] = msg->cmd; + out[2] = msg->out_len; + csum = out[0] + out[1] + out[2]; + for (i = 0; i < msg->out_len; i++) + csum += out[EC_MSG_TX_HEADER_BYTES + i] = msg->out_buf[i]; + out[EC_MSG_TX_HEADER_BYTES + msg->out_len] = (uint8_t)(csum & 0xff); + + return EC_MSG_TX_PROTO_BYTES + msg->out_len; +} +EXPORT_SYMBOL(cros_ec_prepare_tx); + +static int cros_ec_command_sendrecv(struct cros_ec_device *ec_dev, + uint16_t cmd, void *out_buf, int out_len, + void *in_buf, int in_len) +{ + struct cros_ec_msg msg; + + msg.version = cmd >> 8; + msg.cmd = cmd & 0xff; + msg.out_buf = out_buf; + msg.out_len = out_len; + msg.in_buf = in_buf; + msg.in_len = in_len; + + return ec_dev->command_xfer(ec_dev, &msg); +} + +static int cros_ec_command_recv(struct cros_ec_device *ec_dev, + uint16_t cmd, void *buf, int buf_len) +{ + return cros_ec_command_sendrecv(ec_dev, cmd, NULL, 0, buf, buf_len); +} + +static int cros_ec_command_send(struct cros_ec_device *ec_dev, + uint16_t cmd, void *buf, int buf_len) +{ + return cros_ec_command_sendrecv(ec_dev, cmd, buf, buf_len, NULL, 0); +} + +static irqreturn_t ec_irq_thread(int irq, void *data) +{ + struct cros_ec_device *ec_dev = data; + + if (device_may_wakeup(ec_dev->dev)) + pm_wakeup_event(ec_dev->dev, 0); + + blocking_notifier_call_chain(&ec_dev->event_notifier, 1, ec_dev); + + return IRQ_HANDLED; +} + +static struct mfd_cell cros_devs[] = { + { + .name = "cros-ec-keyb", + .id = 1, + .of_compatible = "google,cros-ec-keyb", + }, +}; + +int cros_ec_register(struct cros_ec_device *ec_dev) +{ + struct device *dev = ec_dev->dev; + int err = 0; + + BLOCKING_INIT_NOTIFIER_HEAD(&ec_dev->event_notifier); + + ec_dev->command_send = cros_ec_command_send; + ec_dev->command_recv = cros_ec_command_recv; + ec_dev->command_sendrecv = cros_ec_command_sendrecv; + + if (ec_dev->din_size) { + ec_dev->din = kmalloc(ec_dev->din_size, GFP_KERNEL); + if (!ec_dev->din) { + err = -ENOMEM; + goto fail_din; + } + } + if (ec_dev->dout_size) { + ec_dev->dout = kmalloc(ec_dev->dout_size, GFP_KERNEL); + if (!ec_dev->dout) { + err = -ENOMEM; + goto fail_dout; + } + } + + if (!ec_dev->irq) { + dev_dbg(dev, "no valid IRQ: %d\n", ec_dev->irq); + goto fail_irq; + } + + err = request_threaded_irq(ec_dev->irq, NULL, ec_irq_thread, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + "chromeos-ec", ec_dev); + if (err) { + dev_err(dev, "request irq %d: error %d\n", ec_dev->irq, err); + goto fail_irq; + } + + err = mfd_add_devices(dev, 0, cros_devs, + ARRAY_SIZE(cros_devs), + NULL, ec_dev->irq, NULL); + if (err) { + dev_err(dev, "failed to add mfd devices\n"); + goto fail_mfd; + } + + dev_info(dev, "Chrome EC (%s)\n", ec_dev->name); + + return 0; + +fail_mfd: + free_irq(ec_dev->irq, ec_dev); +fail_irq: + kfree(ec_dev->dout); +fail_dout: + kfree(ec_dev->din); +fail_din: + return err; +} +EXPORT_SYMBOL(cros_ec_register); + +int cros_ec_remove(struct cros_ec_device *ec_dev) +{ + mfd_remove_devices(ec_dev->dev); + free_irq(ec_dev->irq, ec_dev); + kfree(ec_dev->dout); + kfree(ec_dev->din); + + return 0; +} +EXPORT_SYMBOL(cros_ec_remove); + +#ifdef CONFIG_PM_SLEEP +int cros_ec_suspend(struct cros_ec_device *ec_dev) +{ + struct device *dev = ec_dev->dev; + + if (device_may_wakeup(dev)) + ec_dev->wake_enabled = !enable_irq_wake(ec_dev->irq); + + disable_irq(ec_dev->irq); + ec_dev->was_wake_device = ec_dev->wake_enabled; + + return 0; +} +EXPORT_SYMBOL(cros_ec_suspend); + +int cros_ec_resume(struct cros_ec_device *ec_dev) +{ + enable_irq(ec_dev->irq); + + if (ec_dev->wake_enabled) { + disable_irq_wake(ec_dev->irq); + ec_dev->wake_enabled = 0; + } + + return 0; +} +EXPORT_SYMBOL(cros_ec_resume); + +#endif diff --git a/drivers/mfd/cros_ec_i2c.c b/drivers/mfd/cros_ec_i2c.c new file mode 100644 index 00000000000..123044608b6 --- /dev/null +++ b/drivers/mfd/cros_ec_i2c.c @@ -0,0 +1,201 @@ +/* + * ChromeOS EC multi-function device (I2C) + * + * Copyright (C) 2012 Google, Inc + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/i2c.h> +#include <linux/interrupt.h> +#include <linux/mfd/cros_ec.h> +#include <linux/mfd/cros_ec_commands.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +static inline struct cros_ec_device *to_ec_dev(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + return i2c_get_clientdata(client); +} + +static int cros_ec_command_xfer(struct cros_ec_device *ec_dev, + struct cros_ec_msg *msg) +{ + struct i2c_client *client = ec_dev->priv; + int ret = -ENOMEM; + int i; + int packet_len; + u8 *out_buf = NULL; + u8 *in_buf = NULL; + u8 sum; + struct i2c_msg i2c_msg[2]; + + i2c_msg[0].addr = client->addr; + i2c_msg[0].flags = 0; + i2c_msg[1].addr = client->addr; + i2c_msg[1].flags = I2C_M_RD; + + /* + * allocate larger packet (one byte for checksum, one byte for + * length, and one for result code) + */ + packet_len = msg->in_len + 3; + in_buf = kzalloc(packet_len, GFP_KERNEL); + if (!in_buf) + goto done; + i2c_msg[1].len = packet_len; + i2c_msg[1].buf = (char *)in_buf; + + /* + * allocate larger packet (one byte for checksum, one for + * command code, one for length, and one for command version) + */ + packet_len = msg->out_len + 4; + out_buf = kzalloc(packet_len, GFP_KERNEL); + if (!out_buf) + goto done; + i2c_msg[0].len = packet_len; + i2c_msg[0].buf = (char *)out_buf; + + out_buf[0] = EC_CMD_VERSION0 + msg->version; + out_buf[1] = msg->cmd; + out_buf[2] = msg->out_len; + + /* copy message payload and compute checksum */ + sum = out_buf[0] + out_buf[1] + out_buf[2]; + for (i = 0; i < msg->out_len; i++) { + out_buf[3 + i] = msg->out_buf[i]; + sum += out_buf[3 + i]; + } + out_buf[3 + msg->out_len] = sum; + + /* send command to EC and read answer */ + ret = i2c_transfer(client->adapter, i2c_msg, 2); + if (ret < 0) { + dev_err(ec_dev->dev, "i2c transfer failed: %d\n", ret); + goto done; + } else if (ret != 2) { + dev_err(ec_dev->dev, "failed to get response: %d\n", ret); + ret = -EIO; + goto done; + } + + /* check response error code */ + if (i2c_msg[1].buf[0]) { + dev_warn(ec_dev->dev, "command 0x%02x returned an error %d\n", + msg->cmd, i2c_msg[1].buf[0]); + ret = -EINVAL; + goto done; + } + + /* copy response packet payload and compute checksum */ + sum = in_buf[0] + in_buf[1]; + for (i = 0; i < msg->in_len; i++) { + msg->in_buf[i] = in_buf[2 + i]; + sum += in_buf[2 + i]; + } + dev_dbg(ec_dev->dev, "packet: %*ph, sum = %02x\n", + i2c_msg[1].len, in_buf, sum); + if (sum != in_buf[2 + msg->in_len]) { + dev_err(ec_dev->dev, "bad packet checksum\n"); + ret = -EBADMSG; + goto done; + } + + ret = 0; + done: + kfree(in_buf); + kfree(out_buf); + return ret; +} + +static int cros_ec_probe_i2c(struct i2c_client *client, + const struct i2c_device_id *dev_id) +{ + struct device *dev = &client->dev; + struct cros_ec_device *ec_dev = NULL; + int err; + + ec_dev = devm_kzalloc(dev, sizeof(*ec_dev), GFP_KERNEL); + if (!ec_dev) + return -ENOMEM; + + i2c_set_clientdata(client, ec_dev); + ec_dev->name = "I2C"; + ec_dev->dev = dev; + ec_dev->priv = client; + ec_dev->irq = client->irq; + ec_dev->command_xfer = cros_ec_command_xfer; + ec_dev->ec_name = client->name; + ec_dev->phys_name = client->adapter->name; + ec_dev->parent = &client->dev; + + err = cros_ec_register(ec_dev); + if (err) { + dev_err(dev, "cannot register EC\n"); + return err; + } + + return 0; +} + +static int cros_ec_remove_i2c(struct i2c_client *client) +{ + struct cros_ec_device *ec_dev = i2c_get_clientdata(client); + + cros_ec_remove(ec_dev); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int cros_ec_i2c_suspend(struct device *dev) +{ + struct cros_ec_device *ec_dev = to_ec_dev(dev); + + return cros_ec_suspend(ec_dev); +} + +static int cros_ec_i2c_resume(struct device *dev) +{ + struct cros_ec_device *ec_dev = to_ec_dev(dev); + + return cros_ec_resume(ec_dev); +} +#endif + +static SIMPLE_DEV_PM_OPS(cros_ec_i2c_pm_ops, cros_ec_i2c_suspend, + cros_ec_i2c_resume); + +static const struct i2c_device_id cros_ec_i2c_id[] = { + { "cros-ec-i2c", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, cros_ec_i2c_id); + +static struct i2c_driver cros_ec_driver = { + .driver = { + .name = "cros-ec-i2c", + .owner = THIS_MODULE, + .pm = &cros_ec_i2c_pm_ops, + }, + .probe = cros_ec_probe_i2c, + .remove = cros_ec_remove_i2c, + .id_table = cros_ec_i2c_id, +}; + +module_i2c_driver(cros_ec_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ChromeOS EC multi function device"); diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c new file mode 100644 index 00000000000..19193cf1e7a --- /dev/null +++ b/drivers/mfd/cros_ec_spi.c @@ -0,0 +1,375 @@ +/* + * ChromeOS EC multi-function device (SPI) + * + * Copyright (C) 2012 Google, Inc + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mfd/cros_ec.h> +#include <linux/mfd/cros_ec_commands.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/spi/spi.h> + + +/* The header byte, which follows the preamble */ +#define EC_MSG_HEADER 0xec + +/* + * Number of EC preamble bytes we read at a time. Since it takes + * about 400-500us for the EC to respond there is not a lot of + * point in tuning this. If the EC could respond faster then + * we could increase this so that might expect the preamble and + * message to occur in a single transaction. However, the maximum + * SPI transfer size is 256 bytes, so at 5MHz we need a response + * time of perhaps <320us (200 bytes / 1600 bits). + */ +#define EC_MSG_PREAMBLE_COUNT 32 + +/* + * We must get a response from the EC in 5ms. This is a very long + * time, but the flash write command can take 2-3ms. The EC command + * processing is currently not very fast (about 500us). We could + * look at speeding this up and making the flash write command a + * 'slow' command, requiring a GET_STATUS wait loop, like flash + * erase. + */ +#define EC_MSG_DEADLINE_MS 5 + +/* + * Time between raising the SPI chip select (for the end of a + * transaction) and dropping it again (for the next transaction). + * If we go too fast, the EC will miss the transaction. It seems + * that 50us is enough with the 16MHz STM32 EC. + */ +#define EC_SPI_RECOVERY_TIME_NS (50 * 1000) + +/** + * struct cros_ec_spi - information about a SPI-connected EC + * + * @spi: SPI device we are connected to + * @last_transfer_ns: time that we last finished a transfer, or 0 if there + * if no record + */ +struct cros_ec_spi { + struct spi_device *spi; + s64 last_transfer_ns; +}; + +static void debug_packet(struct device *dev, const char *name, u8 *ptr, + int len) +{ +#ifdef DEBUG + int i; + + dev_dbg(dev, "%s: ", name); + for (i = 0; i < len; i++) + dev_cont(dev, " %02x", ptr[i]); +#endif +} + +/** + * cros_ec_spi_receive_response - Receive a response from the EC. + * + * This function has two phases: reading the preamble bytes (since if we read + * data from the EC before it is ready to send, we just get preamble) and + * reading the actual message. + * + * The received data is placed into ec_dev->din. + * + * @ec_dev: ChromeOS EC device + * @need_len: Number of message bytes we need to read + */ +static int cros_ec_spi_receive_response(struct cros_ec_device *ec_dev, + int need_len) +{ + struct cros_ec_spi *ec_spi = ec_dev->priv; + struct spi_transfer trans; + struct spi_message msg; + u8 *ptr, *end; + int ret; + unsigned long deadline; + int todo; + + /* Receive data until we see the header byte */ + deadline = jiffies + msecs_to_jiffies(EC_MSG_DEADLINE_MS); + do { + memset(&trans, '\0', sizeof(trans)); + trans.cs_change = 1; + trans.rx_buf = ptr = ec_dev->din; + trans.len = EC_MSG_PREAMBLE_COUNT; + + spi_message_init(&msg); + spi_message_add_tail(&trans, &msg); + ret = spi_sync(ec_spi->spi, &msg); + if (ret < 0) { + dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); + return ret; + } + + for (end = ptr + EC_MSG_PREAMBLE_COUNT; ptr != end; ptr++) { + if (*ptr == EC_MSG_HEADER) { + dev_dbg(ec_dev->dev, "msg found at %ld\n", + ptr - ec_dev->din); + break; + } + } + + if (time_after(jiffies, deadline)) { + dev_warn(ec_dev->dev, "EC failed to respond in time\n"); + return -ETIMEDOUT; + } + } while (ptr == end); + + /* + * ptr now points to the header byte. Copy any valid data to the + * start of our buffer + */ + todo = end - ++ptr; + BUG_ON(todo < 0 || todo > ec_dev->din_size); + todo = min(todo, need_len); + memmove(ec_dev->din, ptr, todo); + ptr = ec_dev->din + todo; + dev_dbg(ec_dev->dev, "need %d, got %d bytes from preamble\n", + need_len, todo); + need_len -= todo; + + /* Receive data until we have it all */ + while (need_len > 0) { + /* + * We can't support transfers larger than the SPI FIFO size + * unless we have DMA. We don't have DMA on the ISP SPI ports + * for Exynos. We need a way of asking SPI driver for + * maximum-supported transfer size. + */ + todo = min(need_len, 256); + dev_dbg(ec_dev->dev, "loop, todo=%d, need_len=%d, ptr=%ld\n", + todo, need_len, ptr - ec_dev->din); + + memset(&trans, '\0', sizeof(trans)); + trans.cs_change = 1; + trans.rx_buf = ptr; + trans.len = todo; + spi_message_init(&msg); + spi_message_add_tail(&trans, &msg); + + /* send command to EC and read answer */ + BUG_ON((u8 *)trans.rx_buf - ec_dev->din + todo > + ec_dev->din_size); + ret = spi_sync(ec_spi->spi, &msg); + if (ret < 0) { + dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); + return ret; + } + + debug_packet(ec_dev->dev, "interim", ptr, todo); + ptr += todo; + need_len -= todo; + } + + dev_dbg(ec_dev->dev, "loop done, ptr=%ld\n", ptr - ec_dev->din); + + return 0; +} + +/** + * cros_ec_command_spi_xfer - Transfer a message over SPI and receive the reply + * + * @ec_dev: ChromeOS EC device + * @ec_msg: Message to transfer + */ +static int cros_ec_command_spi_xfer(struct cros_ec_device *ec_dev, + struct cros_ec_msg *ec_msg) +{ + struct cros_ec_spi *ec_spi = ec_dev->priv; + struct spi_transfer trans; + struct spi_message msg; + int i, len; + u8 *ptr; + int sum; + int ret = 0, final_ret; + struct timespec ts; + + len = cros_ec_prepare_tx(ec_dev, ec_msg); + dev_dbg(ec_dev->dev, "prepared, len=%d\n", len); + + /* If it's too soon to do another transaction, wait */ + if (ec_spi->last_transfer_ns) { + struct timespec ts; + unsigned long delay; /* The delay completed so far */ + + ktime_get_ts(&ts); + delay = timespec_to_ns(&ts) - ec_spi->last_transfer_ns; + if (delay < EC_SPI_RECOVERY_TIME_NS) + ndelay(delay); + } + + /* Transmit phase - send our message */ + debug_packet(ec_dev->dev, "out", ec_dev->dout, len); + memset(&trans, '\0', sizeof(trans)); + trans.tx_buf = ec_dev->dout; + trans.len = len; + trans.cs_change = 1; + spi_message_init(&msg); + spi_message_add_tail(&trans, &msg); + ret = spi_sync(ec_spi->spi, &msg); + + /* Get the response */ + if (!ret) { + ret = cros_ec_spi_receive_response(ec_dev, + ec_msg->in_len + EC_MSG_TX_PROTO_BYTES); + } else { + dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); + } + + /* turn off CS */ + spi_message_init(&msg); + final_ret = spi_sync(ec_spi->spi, &msg); + ktime_get_ts(&ts); + ec_spi->last_transfer_ns = timespec_to_ns(&ts); + if (!ret) + ret = final_ret; + if (ret < 0) { + dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); + return ret; + } + + /* check response error code */ + ptr = ec_dev->din; + if (ptr[0]) { + dev_warn(ec_dev->dev, "command 0x%02x returned an error %d\n", + ec_msg->cmd, ptr[0]); + debug_packet(ec_dev->dev, "in_err", ptr, len); + return -EINVAL; + } + len = ptr[1]; + sum = ptr[0] + ptr[1]; + if (len > ec_msg->in_len) { + dev_err(ec_dev->dev, "packet too long (%d bytes, expected %d)", + len, ec_msg->in_len); + return -ENOSPC; + } + + /* copy response packet payload and compute checksum */ + for (i = 0; i < len; i++) { + sum += ptr[i + 2]; + if (ec_msg->in_len) + ec_msg->in_buf[i] = ptr[i + 2]; + } + sum &= 0xff; + + debug_packet(ec_dev->dev, "in", ptr, len + 3); + + if (sum != ptr[len + 2]) { + dev_err(ec_dev->dev, + "bad packet checksum, expected %02x, got %02x\n", + sum, ptr[len + 2]); + return -EBADMSG; + } + + return 0; +} + +static int cros_ec_probe_spi(struct spi_device *spi) +{ + struct device *dev = &spi->dev; + struct cros_ec_device *ec_dev; + struct cros_ec_spi *ec_spi; + int err; + + spi->bits_per_word = 8; + spi->mode = SPI_MODE_0; + err = spi_setup(spi); + if (err < 0) + return err; + + ec_spi = devm_kzalloc(dev, sizeof(*ec_spi), GFP_KERNEL); + if (ec_spi == NULL) + return -ENOMEM; + ec_spi->spi = spi; + ec_dev = devm_kzalloc(dev, sizeof(*ec_dev), GFP_KERNEL); + if (!ec_dev) + return -ENOMEM; + + spi_set_drvdata(spi, ec_dev); + ec_dev->name = "SPI"; + ec_dev->dev = dev; + ec_dev->priv = ec_spi; + ec_dev->irq = spi->irq; + ec_dev->command_xfer = cros_ec_command_spi_xfer; + ec_dev->ec_name = ec_spi->spi->modalias; + ec_dev->phys_name = dev_name(&ec_spi->spi->dev); + ec_dev->parent = &ec_spi->spi->dev; + ec_dev->din_size = EC_MSG_BYTES + EC_MSG_PREAMBLE_COUNT; + ec_dev->dout_size = EC_MSG_BYTES; + + err = cros_ec_register(ec_dev); + if (err) { + dev_err(dev, "cannot register EC\n"); + return err; + } + + return 0; +} + +static int cros_ec_remove_spi(struct spi_device *spi) +{ + struct cros_ec_device *ec_dev; + + ec_dev = spi_get_drvdata(spi); + cros_ec_remove(ec_dev); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int cros_ec_spi_suspend(struct device *dev) +{ + struct cros_ec_device *ec_dev = dev_get_drvdata(dev); + + return cros_ec_suspend(ec_dev); +} + +static int cros_ec_spi_resume(struct device *dev) +{ + struct cros_ec_device *ec_dev = dev_get_drvdata(dev); + + return cros_ec_resume(ec_dev); +} +#endif + +static SIMPLE_DEV_PM_OPS(cros_ec_spi_pm_ops, cros_ec_spi_suspend, + cros_ec_spi_resume); + +static const struct spi_device_id cros_ec_spi_id[] = { + { "cros-ec-spi", 0 }, + { } +}; +MODULE_DEVICE_TABLE(spi, cros_ec_spi_id); + +static struct spi_driver cros_ec_driver_spi = { + .driver = { + .name = "cros-ec-spi", + .owner = THIS_MODULE, + .pm = &cros_ec_spi_pm_ops, + }, + .probe = cros_ec_probe_spi, + .remove = cros_ec_remove_spi, + .id_table = cros_ec_spi_id, +}; + +module_spi_driver(cros_ec_driver_spi); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ChromeOS EC multi function device (SPI)"); diff --git a/drivers/mfd/da903x.c b/drivers/mfd/da903x.c index 05176cd2862..f1a316e0d6a 100644 --- a/drivers/mfd/da903x.c +++ b/drivers/mfd/da903x.c @@ -499,7 +499,8 @@ static int da903x_probe(struct i2c_client *client, unsigned int tmp; int ret; - chip = kzalloc(sizeof(struct da903x_chip), GFP_KERNEL); + chip = devm_kzalloc(&client->dev, sizeof(struct da903x_chip), + GFP_KERNEL); if (chip == NULL) return -ENOMEM; @@ -515,33 +516,27 @@ static int da903x_probe(struct i2c_client *client, ret = chip->ops->init_chip(chip); if (ret) - goto out_free_chip; + return ret; /* mask and clear all IRQs */ chip->events_mask = 0xffffffff; chip->ops->mask_events(chip, chip->events_mask); chip->ops->read_events(chip, &tmp); - ret = request_irq(client->irq, da903x_irq_handler, + ret = devm_request_irq(&client->dev, client->irq, da903x_irq_handler, IRQF_TRIGGER_FALLING, "da903x", chip); if (ret) { dev_err(&client->dev, "failed to request irq %d\n", client->irq); - goto out_free_chip; + return ret; } ret = da903x_add_subdevs(chip, pdata); if (ret) - goto out_free_irq; + return ret; return 0; - -out_free_irq: - free_irq(client->irq, chip); -out_free_chip: - kfree(chip); - return ret; } static int da903x_remove(struct i2c_client *client) @@ -549,8 +544,6 @@ static int da903x_remove(struct i2c_client *client) struct da903x_chip *chip = i2c_get_clientdata(client); da903x_remove_subdevs(chip); - free_irq(client->irq, chip); - kfree(chip); return 0; } diff --git a/drivers/mfd/da9052-spi.c b/drivers/mfd/da9052-spi.c index 61d63b93576..0680bcbc53d 100644 --- a/drivers/mfd/da9052-spi.c +++ b/drivers/mfd/da9052-spi.c @@ -38,7 +38,7 @@ static int da9052_spi_probe(struct spi_device *spi) da9052->dev = &spi->dev; da9052->chip_irq = spi->irq; - dev_set_drvdata(&spi->dev, da9052); + spi_set_drvdata(spi, da9052); da9052_regmap_config.read_flag_mask = 1; da9052_regmap_config.write_flag_mask = 0; @@ -60,7 +60,7 @@ static int da9052_spi_probe(struct spi_device *spi) static int da9052_spi_remove(struct spi_device *spi) { - struct da9052 *da9052 = dev_get_drvdata(&spi->dev); + struct da9052 *da9052 = spi_get_drvdata(spi); da9052_device_exit(da9052); return 0; diff --git a/drivers/mfd/da9055-core.c b/drivers/mfd/da9055-core.c index f56a1a9f777..49cb23d3746 100644 --- a/drivers/mfd/da9055-core.c +++ b/drivers/mfd/da9055-core.c @@ -391,7 +391,7 @@ int da9055_device_init(struct da9055 *da9055) da9055->irq_base = pdata->irq_base; ret = regmap_add_irq_chip(da9055->regmap, da9055->chip_irq, - IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, da9055->irq_base, &da9055_regmap_irq_chip, &da9055->irq_data); if (ret < 0) diff --git a/drivers/mfd/davinci_voicecodec.c b/drivers/mfd/davinci_voicecodec.c index c0bcc872af4..c60ab0c3c4d 100644 --- a/drivers/mfd/davinci_voicecodec.c +++ b/drivers/mfd/davinci_voicecodec.c @@ -177,17 +177,7 @@ static struct platform_driver davinci_vc_driver = { .remove = davinci_vc_remove, }; -static int __init davinci_vc_init(void) -{ - return platform_driver_probe(&davinci_vc_driver, davinci_vc_probe); -} -module_init(davinci_vc_init); - -static void __exit davinci_vc_exit(void) -{ - platform_driver_unregister(&davinci_vc_driver); -} -module_exit(davinci_vc_exit); +module_platform_driver_probe(davinci_vc_driver, davinci_vc_probe); MODULE_AUTHOR("Miguel Aguilar"); MODULE_DESCRIPTION("Texas Instruments DaVinci Voice Codec Core Interface"); diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c index 21434beb420..319b8abe742 100644 --- a/drivers/mfd/db8500-prcmu.c +++ b/drivers/mfd/db8500-prcmu.c @@ -24,6 +24,7 @@ #include <linux/jiffies.h> #include <linux/bitops.h> #include <linux/fs.h> +#include <linux/of.h> #include <linux/platform_device.h> #include <linux/uaccess.h> #include <linux/mfd/core.h> @@ -2704,6 +2705,7 @@ static void dbx500_fw_version_init(struct platform_device *pdev, { struct resource *res; void __iomem *tcpm_base; + u32 version; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "prcmu-tcpm"); @@ -2713,26 +2715,27 @@ static void dbx500_fw_version_init(struct platform_device *pdev, return; } tcpm_base = ioremap(res->start, resource_size(res)); - if (tcpm_base != NULL) { - u32 version; - - version = readl(tcpm_base + version_offset); - fw_info.version.project = (version & 0xFF); - fw_info.version.api_version = (version >> 8) & 0xFF; - fw_info.version.func_version = (version >> 16) & 0xFF; - fw_info.version.errata = (version >> 24) & 0xFF; - strncpy(fw_info.version.project_name, - fw_project_name(fw_info.version.project), - PRCMU_FW_PROJECT_NAME_LEN); - fw_info.valid = true; - pr_info("PRCMU firmware: %s(%d), version %d.%d.%d\n", - fw_info.version.project_name, - fw_info.version.project, - fw_info.version.api_version, - fw_info.version.func_version, - fw_info.version.errata); - iounmap(tcpm_base); + if (!tcpm_base) { + dev_err(&pdev->dev, "no prcmu tcpm mem region provided\n"); + return; } + + version = readl(tcpm_base + version_offset); + fw_info.version.project = (version & 0xFF); + fw_info.version.api_version = (version >> 8) & 0xFF; + fw_info.version.func_version = (version >> 16) & 0xFF; + fw_info.version.errata = (version >> 24) & 0xFF; + strncpy(fw_info.version.project_name, + fw_project_name(fw_info.version.project), + PRCMU_FW_PROJECT_NAME_LEN); + fw_info.valid = true; + pr_info("PRCMU firmware: %s(%d), version %d.%d.%d\n", + fw_info.version.project_name, + fw_info.version.project, + fw_info.version.api_version, + fw_info.version.func_version, + fw_info.version.errata); + iounmap(tcpm_base); } void __init db8500_prcmu_early_init(u32 phy_base, u32 size) @@ -3065,6 +3068,15 @@ static struct db8500_thsens_platform_data db8500_thsens_data = { .num_trips = 4, }; +static struct mfd_cell common_prcmu_devs[] = { + { + .name = "ux500_wdt", + .platform_data = &db8500_wdt_pdata, + .pdata_size = sizeof(db8500_wdt_pdata), + .id = -1, + }, +}; + static struct mfd_cell db8500_prcmu_devs[] = { { .name = "db8500-prcmu-regulators", @@ -3079,12 +3091,6 @@ static struct mfd_cell db8500_prcmu_devs[] = { .pdata_size = sizeof(db8500_cpufreq_table), }, { - .name = "ux500_wdt", - .platform_data = &db8500_wdt_pdata, - .pdata_size = sizeof(db8500_wdt_pdata), - .id = -1, - }, - { .name = "db8500-thermal", .num_resources = ARRAY_SIZE(db8500_thsens_resources), .resources = db8500_thsens_resources, @@ -3173,13 +3179,25 @@ static int db8500_prcmu_probe(struct platform_device *pdev) db8500_prcmu_update_cpufreq(); - err = mfd_add_devices(&pdev->dev, 0, db8500_prcmu_devs, - ARRAY_SIZE(db8500_prcmu_devs), NULL, 0, db8500_irq_domain); + err = mfd_add_devices(&pdev->dev, 0, common_prcmu_devs, + ARRAY_SIZE(common_prcmu_devs), NULL, 0, db8500_irq_domain); if (err) { pr_err("prcmu: Failed to add subdevices\n"); return err; } + /* TODO: Remove restriction when clk definitions are available. */ + if (!of_machine_is_compatible("st-ericsson,u8540")) { + err = mfd_add_devices(&pdev->dev, 0, db8500_prcmu_devs, + ARRAY_SIZE(db8500_prcmu_devs), NULL, 0, + db8500_irq_domain); + if (err) { + mfd_remove_devices(&pdev->dev); + pr_err("prcmu: Failed to add subdevices\n"); + goto no_irq_return; + } + } + err = db8500_prcmu_register_ab8500(&pdev->dev, pdata->ab_platdata, pdata->ab_irq); if (err) { diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c index b7a61f0f27a..5502106ad51 100644 --- a/drivers/mfd/ezx-pcap.c +++ b/drivers/mfd/ezx-pcap.c @@ -393,7 +393,7 @@ static int pcap_add_subdev(struct pcap_chip *pcap, static int ezx_pcap_remove(struct spi_device *spi) { - struct pcap_chip *pcap = dev_get_drvdata(&spi->dev); + struct pcap_chip *pcap = spi_get_drvdata(spi); struct pcap_platform_data *pdata = spi->dev.platform_data; int i, adc_irq; @@ -403,7 +403,7 @@ static int ezx_pcap_remove(struct spi_device *spi) /* cleanup ADC */ adc_irq = pcap_to_irq(pcap, (pdata->config & PCAP_SECOND_PORT) ? PCAP_IRQ_ADCDONE2 : PCAP_IRQ_ADCDONE); - free_irq(adc_irq, pcap); + devm_free_irq(&spi->dev, adc_irq, pcap); mutex_lock(&pcap->adc_mutex); for (i = 0; i < PCAP_ADC_MAXQ; i++) kfree(pcap->adc_queue[i]); @@ -415,8 +415,6 @@ static int ezx_pcap_remove(struct spi_device *spi) destroy_workqueue(pcap->workqueue); - kfree(pcap); - return 0; } @@ -431,7 +429,7 @@ static int ezx_pcap_probe(struct spi_device *spi) if (!pdata) goto ret; - pcap = kzalloc(sizeof(*pcap), GFP_KERNEL); + pcap = devm_kzalloc(&spi->dev, sizeof(*pcap), GFP_KERNEL); if (!pcap) { ret = -ENOMEM; goto ret; @@ -441,14 +439,14 @@ static int ezx_pcap_probe(struct spi_device *spi) mutex_init(&pcap->adc_mutex); INIT_WORK(&pcap->isr_work, pcap_isr_work); INIT_WORK(&pcap->msr_work, pcap_msr_work); - dev_set_drvdata(&spi->dev, pcap); + spi_set_drvdata(spi, pcap); /* setup spi */ spi->bits_per_word = 32; spi->mode = SPI_MODE_0 | (pdata->config & PCAP_CS_AH ? SPI_CS_HIGH : 0); ret = spi_setup(spi); if (ret) - goto free_pcap; + goto ret; pcap->spi = spi; @@ -458,7 +456,7 @@ static int ezx_pcap_probe(struct spi_device *spi) if (!pcap->workqueue) { ret = -ENOMEM; dev_err(&spi->dev, "can't create pcap thread\n"); - goto free_pcap; + goto ret; } /* redirect interrupts to AP, except adcdone2 */ @@ -491,7 +489,8 @@ static int ezx_pcap_probe(struct spi_device *spi) adc_irq = pcap_to_irq(pcap, (pdata->config & PCAP_SECOND_PORT) ? PCAP_IRQ_ADCDONE2 : PCAP_IRQ_ADCDONE); - ret = request_irq(adc_irq, pcap_adc_irq, 0, "ADC", pcap); + ret = devm_request_irq(&spi->dev, adc_irq, pcap_adc_irq, 0, "ADC", + pcap); if (ret) goto free_irqchip; @@ -511,14 +510,12 @@ static int ezx_pcap_probe(struct spi_device *spi) remove_subdevs: device_for_each_child(&spi->dev, NULL, pcap_remove_subdev); /* free_adc: */ - free_irq(adc_irq, pcap); + devm_free_irq(&spi->dev, adc_irq, pcap); free_irqchip: for (i = pcap->irq_base; i < (pcap->irq_base + PCAP_NIRQS); i++) irq_set_chip_and_handler(i, NULL, NULL); /* destroy_workqueue: */ destroy_workqueue(pcap->workqueue); -free_pcap: - kfree(pcap); ret: return ret; } diff --git a/drivers/mfd/htc-pasic3.c b/drivers/mfd/htc-pasic3.c index 9e5453d21a6..0285fceb99a 100644 --- a/drivers/mfd/htc-pasic3.c +++ b/drivers/mfd/htc-pasic3.c @@ -208,18 +208,7 @@ static struct platform_driver pasic3_driver = { .remove = pasic3_remove, }; -static int __init pasic3_base_init(void) -{ - return platform_driver_probe(&pasic3_driver, pasic3_probe); -} - -static void __exit pasic3_base_exit(void) -{ - platform_driver_unregister(&pasic3_driver); -} - -module_init(pasic3_base_init); -module_exit(pasic3_base_exit); +module_platform_driver_probe(pasic3_driver, pasic3_probe); MODULE_AUTHOR("Philipp Zabel <philipp.zabel@gmail.com>"); MODULE_DESCRIPTION("Core driver for HTC PASIC3"); diff --git a/drivers/mfd/intel_msic.c b/drivers/mfd/intel_msic.c index 1804331bd52..5be3b5e1385 100644 --- a/drivers/mfd/intel_msic.c +++ b/drivers/mfd/intel_msic.c @@ -323,7 +323,8 @@ static int intel_msic_init_devices(struct intel_msic *msic) if (pdata->ocd) { unsigned gpio = pdata->ocd->gpio; - ret = gpio_request_one(gpio, GPIOF_IN, "ocd_gpio"); + ret = devm_gpio_request_one(&pdev->dev, gpio, + GPIOF_IN, "ocd_gpio"); if (ret) { dev_err(&pdev->dev, "failed to register OCD GPIO\n"); return ret; @@ -332,7 +333,6 @@ static int intel_msic_init_devices(struct intel_msic *msic) ret = gpio_to_irq(gpio); if (ret < 0) { dev_err(&pdev->dev, "no IRQ number for OCD GPIO\n"); - gpio_free(gpio); return ret; } @@ -359,8 +359,6 @@ static int intel_msic_init_devices(struct intel_msic *msic) fail: mfd_remove_devices(&pdev->dev); - if (pdata->ocd) - gpio_free(pdata->ocd->gpio); return ret; } @@ -368,12 +366,8 @@ fail: static void intel_msic_remove_devices(struct intel_msic *msic) { struct platform_device *pdev = msic->pdev; - struct intel_msic_platform_data *pdata = pdev->dev.platform_data; mfd_remove_devices(&pdev->dev); - - if (pdata->ocd) - gpio_free(pdata->ocd->gpio); } static int intel_msic_probe(struct platform_device *pdev) diff --git a/drivers/mfd/lm3533-core.c b/drivers/mfd/lm3533-core.c index ceebf2c1ea9..4b7e6dac1de 100644 --- a/drivers/mfd/lm3533-core.c +++ b/drivers/mfd/lm3533-core.c @@ -496,8 +496,8 @@ static int lm3533_device_init(struct lm3533 *lm3533) dev_set_drvdata(lm3533->dev, lm3533); if (gpio_is_valid(lm3533->gpio_hwen)) { - ret = gpio_request_one(lm3533->gpio_hwen, GPIOF_OUT_INIT_LOW, - "lm3533-hwen"); + ret = devm_gpio_request_one(lm3533->dev, lm3533->gpio_hwen, + GPIOF_OUT_INIT_LOW, "lm3533-hwen"); if (ret < 0) { dev_err(lm3533->dev, "failed to request HWEN GPIO %d\n", @@ -528,8 +528,6 @@ err_unregister: mfd_remove_devices(lm3533->dev); err_disable: lm3533_disable(lm3533); - if (gpio_is_valid(lm3533->gpio_hwen)) - gpio_free(lm3533->gpio_hwen); return ret; } @@ -542,8 +540,6 @@ static void lm3533_device_exit(struct lm3533 *lm3533) mfd_remove_devices(lm3533->dev); lm3533_disable(lm3533); - if (gpio_is_valid(lm3533->gpio_hwen)) - gpio_free(lm3533->gpio_hwen); } static bool lm3533_readable_register(struct device *dev, unsigned int reg) diff --git a/drivers/mfd/max77686.c b/drivers/mfd/max77686.c index 4d73963cd8f..1cbb17609c8 100644 --- a/drivers/mfd/max77686.c +++ b/drivers/mfd/max77686.c @@ -46,7 +46,7 @@ static struct regmap_config max77686_regmap_config = { #ifdef CONFIG_OF static struct of_device_id max77686_pmic_dt_match[] = { - {.compatible = "maxim,max77686", .data = 0}, + {.compatible = "maxim,max77686", .data = NULL}, {}, }; diff --git a/drivers/mfd/mc13xxx-spi.c b/drivers/mfd/mc13xxx-spi.c index 3032bae20b6..77189daadf1 100644 --- a/drivers/mfd/mc13xxx-spi.c +++ b/drivers/mfd/mc13xxx-spi.c @@ -131,7 +131,7 @@ static int mc13xxx_spi_probe(struct spi_device *spi) if (!mc13xxx) return -ENOMEM; - dev_set_drvdata(&spi->dev, mc13xxx); + spi_set_drvdata(spi, mc13xxx); spi->mode = SPI_MODE_0 | SPI_CS_HIGH; mc13xxx->dev = &spi->dev; @@ -144,7 +144,7 @@ static int mc13xxx_spi_probe(struct spi_device *spi) ret = PTR_ERR(mc13xxx->regmap); dev_err(mc13xxx->dev, "Failed to initialize register map: %d\n", ret); - dev_set_drvdata(&spi->dev, NULL); + spi_set_drvdata(spi, NULL); return ret; } @@ -164,7 +164,7 @@ static int mc13xxx_spi_probe(struct spi_device *spi) static int mc13xxx_spi_remove(struct spi_device *spi) { - struct mc13xxx *mc13xxx = dev_get_drvdata(&spi->dev); + struct mc13xxx *mc13xxx = spi_get_drvdata(spi); mc13xxx_common_cleanup(mc13xxx); diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c index 4febc5c7fde..759fae3ca7f 100644 --- a/drivers/mfd/omap-usb-host.c +++ b/drivers/mfd/omap-usb-host.c @@ -1,8 +1,9 @@ /** * omap-usb-host.c - The USBHS core driver for OMAP EHCI & OHCI * - * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2011-2013 Texas Instruments Incorporated - http://www.ti.com * Author: Keshava Munegowda <keshava_mgowda@ti.com> + * Author: Roger Quadros <rogerq@ti.com> * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 of @@ -27,6 +28,9 @@ #include <linux/platform_device.h> #include <linux/platform_data/usb-omap.h> #include <linux/pm_runtime.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/err.h> #include "omap-usb.h" @@ -137,6 +141,49 @@ static inline u8 usbhs_readb(void __iomem *base, u8 reg) /*-------------------------------------------------------------------------*/ +/** + * Map 'enum usbhs_omap_port_mode' found in <linux/platform_data/usb-omap.h> + * to the device tree binding portN-mode found in + * 'Documentation/devicetree/bindings/mfd/omap-usb-host.txt' + */ +static const char * const port_modes[] = { + [OMAP_USBHS_PORT_MODE_UNUSED] = "", + [OMAP_EHCI_PORT_MODE_PHY] = "ehci-phy", + [OMAP_EHCI_PORT_MODE_TLL] = "ehci-tll", + [OMAP_EHCI_PORT_MODE_HSIC] = "ehci-hsic", + [OMAP_OHCI_PORT_MODE_PHY_6PIN_DATSE0] = "ohci-phy-6pin-datse0", + [OMAP_OHCI_PORT_MODE_PHY_6PIN_DPDM] = "ohci-phy-6pin-dpdm", + [OMAP_OHCI_PORT_MODE_PHY_3PIN_DATSE0] = "ohci-phy-3pin-datse0", + [OMAP_OHCI_PORT_MODE_PHY_4PIN_DPDM] = "ohci-phy-4pin-dpdm", + [OMAP_OHCI_PORT_MODE_TLL_6PIN_DATSE0] = "ohci-tll-6pin-datse0", + [OMAP_OHCI_PORT_MODE_TLL_6PIN_DPDM] = "ohci-tll-6pin-dpdm", + [OMAP_OHCI_PORT_MODE_TLL_3PIN_DATSE0] = "ohci-tll-3pin-datse0", + [OMAP_OHCI_PORT_MODE_TLL_4PIN_DPDM] = "ohci-tll-4pin-dpdm", + [OMAP_OHCI_PORT_MODE_TLL_2PIN_DATSE0] = "ohci-tll-2pin-datse0", + [OMAP_OHCI_PORT_MODE_TLL_2PIN_DPDM] = "ohci-tll-2pin-dpdm", +}; + +/** + * omap_usbhs_get_dt_port_mode - Get the 'enum usbhs_omap_port_mode' + * from the port mode string. + * @mode: The port mode string, usually obtained from device tree. + * + * The function returns the 'enum usbhs_omap_port_mode' that matches the + * provided port mode string as per the port_modes table. + * If no match is found it returns -ENODEV + */ +static const int omap_usbhs_get_dt_port_mode(const char *mode) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(port_modes); i++) { + if (!strcmp(mode, port_modes[i])) + return i; + } + + return -ENODEV; +} + static struct platform_device *omap_usbhs_alloc_child(const char *name, struct resource *res, int num_resources, void *pdata, size_t pdata_size, struct device *dev) @@ -278,7 +325,7 @@ static int usbhs_runtime_resume(struct device *dev) dev_dbg(dev, "usbhs_runtime_resume\n"); - omap_tll_enable(); + omap_tll_enable(pdata); if (!IS_ERR(omap->ehci_logic_fck)) clk_enable(omap->ehci_logic_fck); @@ -353,7 +400,7 @@ static int usbhs_runtime_suspend(struct device *dev) if (!IS_ERR(omap->ehci_logic_fck)) clk_disable(omap->ehci_logic_fck); - omap_tll_disable(); + omap_tll_disable(pdata); return 0; } @@ -430,24 +477,10 @@ static unsigned omap_usbhs_rev2_hostconfig(struct usbhs_hcd_omap *omap, static void omap_usbhs_init(struct device *dev) { struct usbhs_hcd_omap *omap = dev_get_drvdata(dev); - struct usbhs_omap_platform_data *pdata = omap->pdata; unsigned reg; dev_dbg(dev, "starting TI HSUSB Controller\n"); - if (pdata->phy_reset) { - if (gpio_is_valid(pdata->reset_gpio_port[0])) - gpio_request_one(pdata->reset_gpio_port[0], - GPIOF_OUT_INIT_LOW, "USB1 PHY reset"); - - if (gpio_is_valid(pdata->reset_gpio_port[1])) - gpio_request_one(pdata->reset_gpio_port[1], - GPIOF_OUT_INIT_LOW, "USB2 PHY reset"); - - /* Hold the PHY in RESET for enough time till DIR is high */ - udelay(10); - } - pm_runtime_get_sync(dev); reg = usbhs_read(omap->uhh_base, OMAP_UHH_HOSTCONFIG); @@ -476,36 +509,59 @@ static void omap_usbhs_init(struct device *dev) dev_dbg(dev, "UHH setup done, uhh_hostconfig=%x\n", reg); pm_runtime_put_sync(dev); - if (pdata->phy_reset) { - /* Hold the PHY in RESET for enough time till - * PHY is settled and ready - */ - udelay(10); +} + +static int usbhs_omap_get_dt_pdata(struct device *dev, + struct usbhs_omap_platform_data *pdata) +{ + int ret, i; + struct device_node *node = dev->of_node; - if (gpio_is_valid(pdata->reset_gpio_port[0])) - gpio_set_value_cansleep - (pdata->reset_gpio_port[0], 1); + ret = of_property_read_u32(node, "num-ports", &pdata->nports); + if (ret) + pdata->nports = 0; - if (gpio_is_valid(pdata->reset_gpio_port[1])) - gpio_set_value_cansleep - (pdata->reset_gpio_port[1], 1); + if (pdata->nports > OMAP3_HS_USB_PORTS) { + dev_warn(dev, "Too many num_ports <%d> in device tree. Max %d\n", + pdata->nports, OMAP3_HS_USB_PORTS); + return -ENODEV; } -} -static void omap_usbhs_deinit(struct device *dev) -{ - struct usbhs_hcd_omap *omap = dev_get_drvdata(dev); - struct usbhs_omap_platform_data *pdata = omap->pdata; + /* get port modes */ + for (i = 0; i < OMAP3_HS_USB_PORTS; i++) { + char prop[11]; + const char *mode; - if (pdata->phy_reset) { - if (gpio_is_valid(pdata->reset_gpio_port[0])) - gpio_free(pdata->reset_gpio_port[0]); + pdata->port_mode[i] = OMAP_USBHS_PORT_MODE_UNUSED; + + snprintf(prop, sizeof(prop), "port%d-mode", i + 1); + ret = of_property_read_string(node, prop, &mode); + if (ret < 0) + continue; + + ret = omap_usbhs_get_dt_port_mode(mode); + if (ret < 0) { + dev_warn(dev, "Invalid port%d-mode \"%s\" in device tree\n", + i, mode); + return -ENODEV; + } - if (gpio_is_valid(pdata->reset_gpio_port[1])) - gpio_free(pdata->reset_gpio_port[1]); + dev_dbg(dev, "port%d-mode: %s -> %d\n", i, mode, ret); + pdata->port_mode[i] = ret; } + + /* get flags */ + pdata->single_ulpi_bypass = of_property_read_bool(node, + "single-ulpi-bypass"); + + return 0; } +static struct of_device_id usbhs_child_match_table[] = { + { .compatible = "ti,omap-ehci", }, + { .compatible = "ti,omap-ohci", }, + { } +}; /** * usbhs_omap_probe - initialize TI-based HCDs @@ -522,26 +578,46 @@ static int usbhs_omap_probe(struct platform_device *pdev) int i; bool need_logic_fck; + if (dev->of_node) { + /* For DT boot we populate platform data from OF node */ + pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + + ret = usbhs_omap_get_dt_pdata(dev, pdata); + if (ret) + return ret; + + dev->platform_data = pdata; + } + if (!pdata) { dev_err(dev, "Missing platform data\n"); return -ENODEV; } + if (pdata->nports > OMAP3_HS_USB_PORTS) { + dev_info(dev, "Too many num_ports <%d> in platform_data. Max %d\n", + pdata->nports, OMAP3_HS_USB_PORTS); + return -ENODEV; + } + omap = devm_kzalloc(dev, sizeof(*omap), GFP_KERNEL); if (!omap) { dev_err(dev, "Memory allocation failed\n"); return -ENOMEM; } - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "uhh"); - omap->uhh_base = devm_request_and_ioremap(dev, res); - if (!omap->uhh_base) { - dev_err(dev, "Resource request/ioremap failed\n"); - return -EADDRNOTAVAIL; - } + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + omap->uhh_base = devm_ioremap_resource(dev, res); + if (IS_ERR(omap->uhh_base)) + return PTR_ERR(omap->uhh_base); omap->pdata = pdata; + /* Initialize the TLL subsystem */ + omap_tll_init(pdata); + pm_runtime_enable(dev); platform_set_drvdata(pdev, omap); @@ -575,6 +651,7 @@ static int usbhs_omap_probe(struct platform_device *pdev) omap->usbhs_rev, omap->nports); break; } + pdata->nports = omap->nports; } i = sizeof(struct clk *) * omap->nports; @@ -700,17 +777,28 @@ static int usbhs_omap_probe(struct platform_device *pdev) } omap_usbhs_init(dev); - ret = omap_usbhs_alloc_children(pdev); - if (ret) { - dev_err(dev, "omap_usbhs_alloc_children failed\n"); - goto err_alloc; + + if (dev->of_node) { + ret = of_platform_populate(dev->of_node, + usbhs_child_match_table, NULL, dev); + + if (ret) { + dev_err(dev, "Failed to create DT children: %d\n", ret); + goto err_alloc; + } + + } else { + ret = omap_usbhs_alloc_children(pdev); + if (ret) { + dev_err(dev, "omap_usbhs_alloc_children failed: %d\n", + ret); + goto err_alloc; + } } return 0; err_alloc: - omap_usbhs_deinit(&pdev->dev); - for (i = 0; i < omap->nports; i++) { if (!IS_ERR(omap->utmi_clk[i])) clk_put(omap->utmi_clk[i]); @@ -744,6 +832,13 @@ err_mem: return ret; } +static int usbhs_omap_remove_child(struct device *dev, void *data) +{ + dev_info(dev, "unregistering\n"); + platform_device_unregister(to_platform_device(dev)); + return 0; +} + /** * usbhs_omap_remove - shutdown processing for UHH & TLL HCDs * @pdev: USB Host Controller being removed @@ -755,8 +850,6 @@ static int usbhs_omap_remove(struct platform_device *pdev) struct usbhs_hcd_omap *omap = platform_get_drvdata(pdev); int i; - omap_usbhs_deinit(&pdev->dev); - for (i = 0; i < omap->nports; i++) { if (!IS_ERR(omap->utmi_clk[i])) clk_put(omap->utmi_clk[i]); @@ -777,6 +870,8 @@ static int usbhs_omap_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); + /* remove children */ + device_for_each_child(&pdev->dev, NULL, usbhs_omap_remove_child); return 0; } @@ -785,16 +880,26 @@ static const struct dev_pm_ops usbhsomap_dev_pm_ops = { .runtime_resume = usbhs_runtime_resume, }; +static const struct of_device_id usbhs_omap_dt_ids[] = { + { .compatible = "ti,usbhs-host" }, + { } +}; + +MODULE_DEVICE_TABLE(of, usbhs_omap_dt_ids); + + static struct platform_driver usbhs_omap_driver = { .driver = { .name = (char *)usbhs_driver_name, .owner = THIS_MODULE, .pm = &usbhsomap_dev_pm_ops, + .of_match_table = of_match_ptr(usbhs_omap_dt_ids), }, .remove = usbhs_omap_remove, }; MODULE_AUTHOR("Keshava Munegowda <keshava_mgowda@ti.com>"); +MODULE_AUTHOR("Roger Quadros <rogerq@ti.com>"); MODULE_ALIAS("platform:" USBHS_DRIVER_NAME); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("usb host common core driver for omap EHCI and OHCI"); diff --git a/drivers/mfd/omap-usb-tll.c b/drivers/mfd/omap-usb-tll.c index 0aef1a76888..e59ac4cbac9 100644 --- a/drivers/mfd/omap-usb-tll.c +++ b/drivers/mfd/omap-usb-tll.c @@ -1,8 +1,9 @@ /** * omap-usb-tll.c - The USB TLL driver for OMAP EHCI & OHCI * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2012-2013 Texas Instruments Incorporated - http://www.ti.com * Author: Keshava Munegowda <keshava_mgowda@ti.com> + * Author: Roger Quadros <rogerq@ti.com> * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 of @@ -27,6 +28,7 @@ #include <linux/err.h> #include <linux/pm_runtime.h> #include <linux/platform_data/usb-omap.h> +#include <linux/of.h> #define USBTLL_DRIVER_NAME "usbhs_tll" @@ -105,8 +107,8 @@ struct usbtll_omap { int nch; /* num. of channels */ - struct usbhs_omap_platform_data *pdata; struct clk **ch_clk; + void __iomem *base; }; /*-------------------------------------------------------------------------*/ @@ -210,14 +212,10 @@ static unsigned ohci_omap3_fslsmode(enum usbhs_omap_port_mode mode) static int usbtll_omap_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct usbhs_omap_platform_data *pdata = dev->platform_data; - void __iomem *base; struct resource *res; struct usbtll_omap *tll; - unsigned reg; int ret = 0; int i, ver; - bool needs_tll; dev_dbg(dev, "starting TI HSUSB TLL Controller\n"); @@ -227,26 +225,16 @@ static int usbtll_omap_probe(struct platform_device *pdev) return -ENOMEM; } - if (!pdata) { - dev_err(dev, "Platform data missing\n"); - return -ENODEV; - } - - tll->pdata = pdata; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - base = devm_request_and_ioremap(dev, res); - if (!base) { - ret = -EADDRNOTAVAIL; - dev_err(dev, "Resource request/ioremap failed:%d\n", ret); - return ret; - } + tll->base = devm_ioremap_resource(dev, res); + if (IS_ERR(tll->base)) + return PTR_ERR(tll->base); platform_set_drvdata(pdev, tll); pm_runtime_enable(dev); pm_runtime_get_sync(dev); - ver = usbtll_read(base, OMAP_USBTLL_REVISION); + ver = usbtll_read(tll->base, OMAP_USBTLL_REVISION); switch (ver) { case OMAP_USBTLL_REV1: case OMAP_USBTLL_REV4: @@ -283,11 +271,85 @@ static int usbtll_omap_probe(struct platform_device *pdev) dev_dbg(dev, "can't get clock : %s\n", clkname); } + pm_runtime_put_sync(dev); + /* only after this can omap_tll_enable/disable work */ + spin_lock(&tll_lock); + tll_dev = dev; + spin_unlock(&tll_lock); + + return 0; + +err_clk_alloc: + pm_runtime_put_sync(dev); + pm_runtime_disable(dev); + + return ret; +} + +/** + * usbtll_omap_remove - shutdown processing for UHH & TLL HCDs + * @pdev: USB Host Controller being removed + * + * Reverses the effect of usbtll_omap_probe(). + */ +static int usbtll_omap_remove(struct platform_device *pdev) +{ + struct usbtll_omap *tll = platform_get_drvdata(pdev); + int i; + + spin_lock(&tll_lock); + tll_dev = NULL; + spin_unlock(&tll_lock); + + for (i = 0; i < tll->nch; i++) + if (!IS_ERR(tll->ch_clk[i])) + clk_put(tll->ch_clk[i]); + + pm_runtime_disable(&pdev->dev); + return 0; +} + +static const struct of_device_id usbtll_omap_dt_ids[] = { + { .compatible = "ti,usbhs-tll" }, + { } +}; + +MODULE_DEVICE_TABLE(of, usbtll_omap_dt_ids); + +static struct platform_driver usbtll_omap_driver = { + .driver = { + .name = (char *)usbtll_driver_name, + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(usbtll_omap_dt_ids), + }, + .probe = usbtll_omap_probe, + .remove = usbtll_omap_remove, +}; + +int omap_tll_init(struct usbhs_omap_platform_data *pdata) +{ + int i; + bool needs_tll; + unsigned reg; + struct usbtll_omap *tll; + + spin_lock(&tll_lock); + + if (!tll_dev) { + spin_unlock(&tll_lock); + return -ENODEV; + } + + tll = dev_get_drvdata(tll_dev); + needs_tll = false; for (i = 0; i < tll->nch; i++) needs_tll |= omap_usb_mode_needs_tll(pdata->port_mode[i]); + pm_runtime_get_sync(tll_dev); + if (needs_tll) { + void __iomem *base = tll->base; /* Program Common TLL register */ reg = usbtll_read(base, OMAP_TLL_SHARED_CONF); @@ -336,51 +398,29 @@ static int usbtll_omap_probe(struct platform_device *pdev) } } - pm_runtime_put_sync(dev); - /* only after this can omap_tll_enable/disable work */ - spin_lock(&tll_lock); - tll_dev = dev; + pm_runtime_put_sync(tll_dev); + spin_unlock(&tll_lock); return 0; - -err_clk_alloc: - pm_runtime_put_sync(dev); - pm_runtime_disable(dev); - - return ret; } +EXPORT_SYMBOL_GPL(omap_tll_init); -/** - * usbtll_omap_remove - shutdown processing for UHH & TLL HCDs - * @pdev: USB Host Controller being removed - * - * Reverses the effect of usbtll_omap_probe(). - */ -static int usbtll_omap_remove(struct platform_device *pdev) +int omap_tll_enable(struct usbhs_omap_platform_data *pdata) { - struct usbtll_omap *tll = platform_get_drvdata(pdev); int i; + struct usbtll_omap *tll; spin_lock(&tll_lock); - tll_dev = NULL; - spin_unlock(&tll_lock); - for (i = 0; i < tll->nch; i++) - if (!IS_ERR(tll->ch_clk[i])) - clk_put(tll->ch_clk[i]); - - pm_runtime_disable(&pdev->dev); - return 0; -} + if (!tll_dev) { + spin_unlock(&tll_lock); + return -ENODEV; + } -static int usbtll_runtime_resume(struct device *dev) -{ - struct usbtll_omap *tll = dev_get_drvdata(dev); - struct usbhs_omap_platform_data *pdata = tll->pdata; - int i; + tll = dev_get_drvdata(tll_dev); - dev_dbg(dev, "usbtll_runtime_resume\n"); + pm_runtime_get_sync(tll_dev); for (i = 0; i < tll->nch; i++) { if (omap_usb_mode_needs_tll(pdata->port_mode[i])) { @@ -391,22 +431,31 @@ static int usbtll_runtime_resume(struct device *dev) r = clk_enable(tll->ch_clk[i]); if (r) { - dev_err(dev, + dev_err(tll_dev, "Error enabling ch %d clock: %d\n", i, r); } } } + spin_unlock(&tll_lock); + return 0; } +EXPORT_SYMBOL_GPL(omap_tll_enable); -static int usbtll_runtime_suspend(struct device *dev) +int omap_tll_disable(struct usbhs_omap_platform_data *pdata) { - struct usbtll_omap *tll = dev_get_drvdata(dev); - struct usbhs_omap_platform_data *pdata = tll->pdata; int i; + struct usbtll_omap *tll; - dev_dbg(dev, "usbtll_runtime_suspend\n"); + spin_lock(&tll_lock); + + if (!tll_dev) { + spin_unlock(&tll_lock); + return -ENODEV; + } + + tll = dev_get_drvdata(tll_dev); for (i = 0; i < tll->nch; i++) { if (omap_usb_mode_needs_tll(pdata->port_mode[i])) { @@ -415,64 +464,16 @@ static int usbtll_runtime_suspend(struct device *dev) } } - return 0; -} - -static const struct dev_pm_ops usbtllomap_dev_pm_ops = { - SET_RUNTIME_PM_OPS(usbtll_runtime_suspend, - usbtll_runtime_resume, - NULL) -}; - -static struct platform_driver usbtll_omap_driver = { - .driver = { - .name = (char *)usbtll_driver_name, - .owner = THIS_MODULE, - .pm = &usbtllomap_dev_pm_ops, - }, - .probe = usbtll_omap_probe, - .remove = usbtll_omap_remove, -}; - -int omap_tll_enable(void) -{ - int ret; - - spin_lock(&tll_lock); - - if (!tll_dev) { - pr_err("%s: OMAP USB TLL not initialized\n", __func__); - ret = -ENODEV; - } else { - ret = pm_runtime_get_sync(tll_dev); - } - - spin_unlock(&tll_lock); - - return ret; -} -EXPORT_SYMBOL_GPL(omap_tll_enable); - -int omap_tll_disable(void) -{ - int ret; - - spin_lock(&tll_lock); - - if (!tll_dev) { - pr_err("%s: OMAP USB TLL not initialized\n", __func__); - ret = -ENODEV; - } else { - ret = pm_runtime_put_sync(tll_dev); - } + pm_runtime_put_sync(tll_dev); spin_unlock(&tll_lock); - return ret; + return 0; } EXPORT_SYMBOL_GPL(omap_tll_disable); MODULE_AUTHOR("Keshava Munegowda <keshava_mgowda@ti.com>"); +MODULE_AUTHOR("Roger Quadros <rogerq@ti.com>"); MODULE_ALIAS("platform:" USBHS_DRIVER_NAME); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("usb tll driver for TI OMAP EHCI and OHCI controllers"); diff --git a/drivers/mfd/omap-usb.h b/drivers/mfd/omap-usb.h index 972aa961b06..2a508b6aeac 100644 --- a/drivers/mfd/omap-usb.h +++ b/drivers/mfd/omap-usb.h @@ -1,2 +1,3 @@ -extern int omap_tll_enable(void); -extern int omap_tll_disable(void); +extern int omap_tll_init(struct usbhs_omap_platform_data *pdata); +extern int omap_tll_enable(struct usbhs_omap_platform_data *pdata); +extern int omap_tll_disable(struct usbhs_omap_platform_data *pdata); diff --git a/drivers/mfd/palmas.c b/drivers/mfd/palmas.c index 73bf76df104..53e9fe638d3 100644 --- a/drivers/mfd/palmas.c +++ b/drivers/mfd/palmas.c @@ -278,20 +278,20 @@ static void palmas_dt_to_pdata(struct i2c_client *i2c, int ret; u32 prop; - ret = of_property_read_u32(node, "ti,mux_pad1", &prop); + ret = of_property_read_u32(node, "ti,mux-pad1", &prop); if (!ret) { pdata->mux_from_pdata = 1; pdata->pad1 = prop; } - ret = of_property_read_u32(node, "ti,mux_pad2", &prop); + ret = of_property_read_u32(node, "ti,mux-pad2", &prop); if (!ret) { pdata->mux_from_pdata = 1; pdata->pad2 = prop; } /* The default for this register is all masked */ - ret = of_property_read_u32(node, "ti,power_ctrl", &prop); + ret = of_property_read_u32(node, "ti,power-ctrl", &prop); if (!ret) pdata->power_ctrl = prop; else @@ -349,6 +349,7 @@ static int palmas_i2c_probe(struct i2c_client *i2c, ret = -ENOMEM; goto err; } + palmas->i2c_clients[i]->dev.of_node = of_node_get(node); } palmas->regmap[i] = devm_regmap_init_i2c(palmas->i2c_clients[i], &palmas_regmap_config[i]); diff --git a/drivers/mfd/retu-mfd.c b/drivers/mfd/retu-mfd.c index 3ba048655bf..a1830986eeb 100644 --- a/drivers/mfd/retu-mfd.c +++ b/drivers/mfd/retu-mfd.c @@ -1,5 +1,5 @@ /* - * Retu MFD driver + * Retu/Tahvo MFD driver * * Copyright (C) 2004, 2005 Nokia Corporation * @@ -33,7 +33,8 @@ #define RETU_REG_ASICR 0x00 /* ASIC ID and revision */ #define RETU_REG_ASICR_VILMA (1 << 7) /* Bit indicating Vilma */ #define RETU_REG_IDR 0x01 /* Interrupt ID */ -#define RETU_REG_IMR 0x02 /* Interrupt mask */ +#define RETU_REG_IMR 0x02 /* Interrupt mask (Retu) */ +#define TAHVO_REG_IMR 0x03 /* Interrupt mask (Tahvo) */ /* Interrupt sources */ #define RETU_INT_PWR 0 /* Power button */ @@ -84,6 +85,62 @@ static struct regmap_irq_chip retu_irq_chip = { /* Retu device registered for the power off. */ static struct retu_dev *retu_pm_power_off; +static struct resource tahvo_usb_res[] = { + { + .name = "tahvo-usb", + .start = TAHVO_INT_VBUS, + .end = TAHVO_INT_VBUS, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct mfd_cell tahvo_devs[] = { + { + .name = "tahvo-usb", + .resources = tahvo_usb_res, + .num_resources = ARRAY_SIZE(tahvo_usb_res), + }, +}; + +static struct regmap_irq tahvo_irqs[] = { + [TAHVO_INT_VBUS] = { + .mask = 1 << TAHVO_INT_VBUS, + } +}; + +static struct regmap_irq_chip tahvo_irq_chip = { + .name = "TAHVO", + .irqs = tahvo_irqs, + .num_irqs = ARRAY_SIZE(tahvo_irqs), + .num_regs = 1, + .status_base = RETU_REG_IDR, + .mask_base = TAHVO_REG_IMR, + .ack_base = RETU_REG_IDR, +}; + +static const struct retu_data { + char *chip_name; + char *companion_name; + struct regmap_irq_chip *irq_chip; + struct mfd_cell *children; + int nchildren; +} retu_data[] = { + [0] = { + .chip_name = "Retu", + .companion_name = "Vilma", + .irq_chip = &retu_irq_chip, + .children = retu_devs, + .nchildren = ARRAY_SIZE(retu_devs), + }, + [1] = { + .chip_name = "Tahvo", + .companion_name = "Betty", + .irq_chip = &tahvo_irq_chip, + .children = tahvo_devs, + .nchildren = ARRAY_SIZE(tahvo_devs), + } +}; + int retu_read(struct retu_dev *rdev, u8 reg) { int ret; @@ -173,9 +230,14 @@ static struct regmap_config retu_config = { static int retu_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { + struct retu_data const *rdat; struct retu_dev *rdev; int ret; + if (i2c->addr > ARRAY_SIZE(retu_data)) + return -ENODEV; + rdat = &retu_data[i2c->addr - 1]; + rdev = devm_kzalloc(&i2c->dev, sizeof(*rdev), GFP_KERNEL); if (rdev == NULL) return -ENOMEM; @@ -190,25 +252,27 @@ static int retu_probe(struct i2c_client *i2c, const struct i2c_device_id *id) ret = retu_read(rdev, RETU_REG_ASICR); if (ret < 0) { - dev_err(rdev->dev, "could not read Retu revision: %d\n", ret); + dev_err(rdev->dev, "could not read %s revision: %d\n", + rdat->chip_name, ret); return ret; } - dev_info(rdev->dev, "Retu%s v%d.%d found\n", - (ret & RETU_REG_ASICR_VILMA) ? " & Vilma" : "", + dev_info(rdev->dev, "%s%s%s v%d.%d found\n", rdat->chip_name, + (ret & RETU_REG_ASICR_VILMA) ? " & " : "", + (ret & RETU_REG_ASICR_VILMA) ? rdat->companion_name : "", (ret >> 4) & 0x7, ret & 0xf); - /* Mask all RETU interrupts. */ - ret = retu_write(rdev, RETU_REG_IMR, 0xffff); + /* Mask all interrupts. */ + ret = retu_write(rdev, rdat->irq_chip->mask_base, 0xffff); if (ret < 0) return ret; ret = regmap_add_irq_chip(rdev->regmap, i2c->irq, IRQF_ONESHOT, -1, - &retu_irq_chip, &rdev->irq_data); + rdat->irq_chip, &rdev->irq_data); if (ret < 0) return ret; - ret = mfd_add_devices(rdev->dev, -1, retu_devs, ARRAY_SIZE(retu_devs), + ret = mfd_add_devices(rdev->dev, -1, rdat->children, rdat->nchildren, NULL, regmap_irq_chip_get_base(rdev->irq_data), NULL); if (ret < 0) { @@ -216,7 +280,7 @@ static int retu_probe(struct i2c_client *i2c, const struct i2c_device_id *id) return ret; } - if (!pm_power_off) { + if (i2c->addr == 1 && !pm_power_off) { retu_pm_power_off = rdev; pm_power_off = retu_power_off; } @@ -240,6 +304,7 @@ static int retu_remove(struct i2c_client *i2c) static const struct i2c_device_id retu_id[] = { { "retu-mfd", 0 }, + { "tahvo-mfd", 0 }, { } }; MODULE_DEVICE_TABLE(i2c, retu_id); diff --git a/drivers/mfd/rts5249.c b/drivers/mfd/rts5249.c new file mode 100644 index 00000000000..15dc848bc08 --- /dev/null +++ b/drivers/mfd/rts5249.c @@ -0,0 +1,241 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + * + * Author: + * Wei WANG <wei_wang@realsil.com.cn> + * No. 128, West Shenhu Road, Suzhou Industry Park, Suzhou, China + */ + +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/mfd/rtsx_pci.h> + +#include "rtsx_pcr.h" + +static u8 rts5249_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val); + return val & 0x0F; +} + +static int rts5249_extra_init_hw(struct rtsx_pcr *pcr) +{ + rtsx_pci_init_cmd(pcr); + + /* Configure GPIO as output */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02); + /* Switch LDO3318 source from DV33 to card_3v3 */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01); + /* LED shine disabled, set initial shine cycle period */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02); + /* Correct driving */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, + SD30_CLK_DRIVE_SEL, 0xFF, 0x99); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, + SD30_CMD_DRIVE_SEL, 0xFF, 0x99); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, + SD30_DAT_DRIVE_SEL, 0xFF, 0x92); + + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5249_optimize_phy(struct rtsx_pcr *pcr) +{ + int err; + + err = rtsx_pci_write_phy_register(pcr, PHY_REG_REV, 0xFE46); + if (err < 0) + return err; + + msleep(1); + + return rtsx_pci_write_phy_register(pcr, PHY_BPCR, 0x05C0); +} + +static int rts5249_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02); +} + +static int rts5249_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00); +} + +static int rts5249_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08); +} + +static int rts5249_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00); +} + +static int rts5249_card_power_on(struct rtsx_pcr *pcr, int card) +{ + int err; + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_VCC_PARTIAL_POWER_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x02); + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + msleep(5); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_VCC_POWER_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x06); + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + return 0; +} + +static int rts5249_card_power_off(struct rtsx_pcr *pcr, int card) +{ + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_POWER_OFF); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x00); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5249_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + int err; + u8 clk_drive, cmd_drive, dat_drive; + + if (voltage == OUTPUT_3V3) { + err = rtsx_pci_write_phy_register(pcr, PHY_TUNE, 0x4FC0 | 0x24); + if (err < 0) + return err; + clk_drive = 0x99; + cmd_drive = 0x99; + dat_drive = 0x92; + } else if (voltage == OUTPUT_1V8) { + err = rtsx_pci_write_phy_register(pcr, PHY_BACR, 0x3C02); + if (err < 0) + return err; + err = rtsx_pci_write_phy_register(pcr, PHY_TUNE, 0x4C40 | 0x24); + if (err < 0) + return err; + clk_drive = 0xb3; + cmd_drive = 0xb3; + dat_drive = 0xb3; + } else { + return -EINVAL; + } + + /* set pad drive */ + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CLK_DRIVE_SEL, + 0xFF, clk_drive); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CMD_DRIVE_SEL, + 0xFF, cmd_drive); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DAT_DRIVE_SEL, + 0xFF, dat_drive); + return rtsx_pci_send_cmd(pcr, 100); +} + +static const struct pcr_ops rts5249_pcr_ops = { + .extra_init_hw = rts5249_extra_init_hw, + .optimize_phy = rts5249_optimize_phy, + .turn_on_led = rts5249_turn_on_led, + .turn_off_led = rts5249_turn_off_led, + .enable_auto_blink = rts5249_enable_auto_blink, + .disable_auto_blink = rts5249_disable_auto_blink, + .card_power_on = rts5249_card_power_on, + .card_power_off = rts5249_card_power_off, + .switch_output_voltage = rts5249_switch_output_voltage, +}; + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rts5249_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0xAA), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rts5249_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + 0, +}; + +/* MS Pull Control Enable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5249_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +/* MS Pull Control Disable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5249_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +void rts5249_init_params(struct rtsx_pcr *pcr) +{ + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; + pcr->num_slots = 2; + pcr->ops = &rts5249_pcr_ops; + + pcr->ic_version = rts5249_get_ic_version(pcr); + pcr->sd_pull_ctl_enable_tbl = rts5249_sd_pull_ctl_enable_tbl; + pcr->sd_pull_ctl_disable_tbl = rts5249_sd_pull_ctl_disable_tbl; + pcr->ms_pull_ctl_enable_tbl = rts5249_ms_pull_ctl_enable_tbl; + pcr->ms_pull_ctl_disable_tbl = rts5249_ms_pull_ctl_disable_tbl; +} diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c index 2f12cc13489..e968c01ca2a 100644 --- a/drivers/mfd/rtsx_pcr.c +++ b/drivers/mfd/rtsx_pcr.c @@ -56,6 +56,7 @@ static DEFINE_PCI_DEVICE_TABLE(rtsx_pci_ids) = { { PCI_DEVICE(0x10EC, 0x5229), PCI_CLASS_OTHERS << 16, 0xFF0000 }, { PCI_DEVICE(0x10EC, 0x5289), PCI_CLASS_OTHERS << 16, 0xFF0000 }, { PCI_DEVICE(0x10EC, 0x5227), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5249), PCI_CLASS_OTHERS << 16, 0xFF0000 }, { 0, } }; @@ -1033,6 +1034,10 @@ static int rtsx_pci_init_chip(struct rtsx_pcr *pcr) case 0x5227: rts5227_init_params(pcr); break; + + case 0x5249: + rts5249_init_params(pcr); + break; } dev_dbg(&(pcr->pci->dev), "PID: 0x%04x, IC version: 0x%02x\n", @@ -1138,7 +1143,7 @@ static int rtsx_pci_probe(struct pci_dev *pcidev, ret = rtsx_pci_acquire_irq(pcr); if (ret < 0) - goto free_dma; + goto disable_msi; pci_set_master(pcidev); synchronize_irq(pcr->irq); @@ -1162,7 +1167,9 @@ static int rtsx_pci_probe(struct pci_dev *pcidev, disable_irq: free_irq(pcr->irq, (void *)pcr); -free_dma: +disable_msi: + if (pcr->msi_en) + pci_disable_msi(pcr->pci); dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN, pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr); unmap: diff --git a/drivers/mfd/rtsx_pcr.h b/drivers/mfd/rtsx_pcr.h index 2b3ab8a0482..55fcfc25c4e 100644 --- a/drivers/mfd/rtsx_pcr.h +++ b/drivers/mfd/rtsx_pcr.h @@ -32,5 +32,6 @@ void rts5209_init_params(struct rtsx_pcr *pcr); void rts5229_init_params(struct rtsx_pcr *pcr); void rtl8411_init_params(struct rtsx_pcr *pcr); void rts5227_init_params(struct rtsx_pcr *pcr); +void rts5249_init_params(struct rtsx_pcr *pcr); #endif diff --git a/drivers/mfd/si476x-cmd.c b/drivers/mfd/si476x-cmd.c new file mode 100644 index 00000000000..de48b4e8845 --- /dev/null +++ b/drivers/mfd/si476x-cmd.c @@ -0,0 +1,1553 @@ +/* + * drivers/mfd/si476x-cmd.c -- Subroutines implementing command + * protocol of si476x series of chips + * + * Copyright (C) 2012 Innovative Converged Devices(ICD) + * Copyright (C) 2013 Andrey Smirnov + * + * Author: Andrey Smirnov <andrew.smirnov@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <linux/module.h> +#include <linux/completion.h> +#include <linux/delay.h> +#include <linux/atomic.h> +#include <linux/i2c.h> +#include <linux/device.h> +#include <linux/gpio.h> +#include <linux/videodev2.h> + +#include <linux/mfd/si476x-core.h> + +#define msb(x) ((u8)((u16) x >> 8)) +#define lsb(x) ((u8)((u16) x & 0x00FF)) + + + +#define CMD_POWER_UP 0x01 +#define CMD_POWER_UP_A10_NRESP 1 +#define CMD_POWER_UP_A10_NARGS 5 + +#define CMD_POWER_UP_A20_NRESP 1 +#define CMD_POWER_UP_A20_NARGS 5 + +#define POWER_UP_DELAY_MS 110 + +#define CMD_POWER_DOWN 0x11 +#define CMD_POWER_DOWN_A10_NRESP 1 + +#define CMD_POWER_DOWN_A20_NRESP 1 +#define CMD_POWER_DOWN_A20_NARGS 1 + +#define CMD_FUNC_INFO 0x12 +#define CMD_FUNC_INFO_NRESP 7 + +#define CMD_SET_PROPERTY 0x13 +#define CMD_SET_PROPERTY_NARGS 5 +#define CMD_SET_PROPERTY_NRESP 1 + +#define CMD_GET_PROPERTY 0x14 +#define CMD_GET_PROPERTY_NARGS 3 +#define CMD_GET_PROPERTY_NRESP 4 + +#define CMD_AGC_STATUS 0x17 +#define CMD_AGC_STATUS_NRESP_A10 2 +#define CMD_AGC_STATUS_NRESP_A20 6 + +#define PIN_CFG_BYTE(x) (0x7F & (x)) +#define CMD_DIG_AUDIO_PIN_CFG 0x18 +#define CMD_DIG_AUDIO_PIN_CFG_NARGS 4 +#define CMD_DIG_AUDIO_PIN_CFG_NRESP 5 + +#define CMD_ZIF_PIN_CFG 0x19 +#define CMD_ZIF_PIN_CFG_NARGS 4 +#define CMD_ZIF_PIN_CFG_NRESP 5 + +#define CMD_IC_LINK_GPO_CTL_PIN_CFG 0x1A +#define CMD_IC_LINK_GPO_CTL_PIN_CFG_NARGS 4 +#define CMD_IC_LINK_GPO_CTL_PIN_CFG_NRESP 5 + +#define CMD_ANA_AUDIO_PIN_CFG 0x1B +#define CMD_ANA_AUDIO_PIN_CFG_NARGS 1 +#define CMD_ANA_AUDIO_PIN_CFG_NRESP 2 + +#define CMD_INTB_PIN_CFG 0x1C +#define CMD_INTB_PIN_CFG_NARGS 2 +#define CMD_INTB_PIN_CFG_A10_NRESP 6 +#define CMD_INTB_PIN_CFG_A20_NRESP 3 + +#define CMD_FM_TUNE_FREQ 0x30 +#define CMD_FM_TUNE_FREQ_A10_NARGS 5 +#define CMD_FM_TUNE_FREQ_A20_NARGS 3 +#define CMD_FM_TUNE_FREQ_NRESP 1 + +#define CMD_FM_RSQ_STATUS 0x32 + +#define CMD_FM_RSQ_STATUS_A10_NARGS 1 +#define CMD_FM_RSQ_STATUS_A10_NRESP 17 +#define CMD_FM_RSQ_STATUS_A30_NARGS 1 +#define CMD_FM_RSQ_STATUS_A30_NRESP 23 + + +#define CMD_FM_SEEK_START 0x31 +#define CMD_FM_SEEK_START_NARGS 1 +#define CMD_FM_SEEK_START_NRESP 1 + +#define CMD_FM_RDS_STATUS 0x36 +#define CMD_FM_RDS_STATUS_NARGS 1 +#define CMD_FM_RDS_STATUS_NRESP 16 + +#define CMD_FM_RDS_BLOCKCOUNT 0x37 +#define CMD_FM_RDS_BLOCKCOUNT_NARGS 1 +#define CMD_FM_RDS_BLOCKCOUNT_NRESP 8 + +#define CMD_FM_PHASE_DIVERSITY 0x38 +#define CMD_FM_PHASE_DIVERSITY_NARGS 1 +#define CMD_FM_PHASE_DIVERSITY_NRESP 1 + +#define CMD_FM_PHASE_DIV_STATUS 0x39 +#define CMD_FM_PHASE_DIV_STATUS_NRESP 2 + +#define CMD_AM_TUNE_FREQ 0x40 +#define CMD_AM_TUNE_FREQ_NARGS 3 +#define CMD_AM_TUNE_FREQ_NRESP 1 + +#define CMD_AM_RSQ_STATUS 0x42 +#define CMD_AM_RSQ_STATUS_NARGS 1 +#define CMD_AM_RSQ_STATUS_NRESP 13 + +#define CMD_AM_SEEK_START 0x41 +#define CMD_AM_SEEK_START_NARGS 1 +#define CMD_AM_SEEK_START_NRESP 1 + + +#define CMD_AM_ACF_STATUS 0x45 +#define CMD_AM_ACF_STATUS_NRESP 6 +#define CMD_AM_ACF_STATUS_NARGS 1 + +#define CMD_FM_ACF_STATUS 0x35 +#define CMD_FM_ACF_STATUS_NRESP 8 +#define CMD_FM_ACF_STATUS_NARGS 1 + +#define CMD_MAX_ARGS_COUNT (10) + + +enum si476x_acf_status_report_bits { + SI476X_ACF_BLEND_INT = (1 << 4), + SI476X_ACF_HIBLEND_INT = (1 << 3), + SI476X_ACF_HICUT_INT = (1 << 2), + SI476X_ACF_CHBW_INT = (1 << 1), + SI476X_ACF_SOFTMUTE_INT = (1 << 0), + + SI476X_ACF_SMUTE = (1 << 0), + SI476X_ACF_SMATTN = 0b11111, + SI476X_ACF_PILOT = (1 << 7), + SI476X_ACF_STBLEND = ~SI476X_ACF_PILOT, +}; + +enum si476x_agc_status_report_bits { + SI476X_AGC_MXHI = (1 << 5), + SI476X_AGC_MXLO = (1 << 4), + SI476X_AGC_LNAHI = (1 << 3), + SI476X_AGC_LNALO = (1 << 2), +}; + +enum si476x_errors { + SI476X_ERR_BAD_COMMAND = 0x10, + SI476X_ERR_BAD_ARG1 = 0x11, + SI476X_ERR_BAD_ARG2 = 0x12, + SI476X_ERR_BAD_ARG3 = 0x13, + SI476X_ERR_BAD_ARG4 = 0x14, + SI476X_ERR_BUSY = 0x18, + SI476X_ERR_BAD_INTERNAL_MEMORY = 0x20, + SI476X_ERR_BAD_PATCH = 0x30, + SI476X_ERR_BAD_BOOT_MODE = 0x31, + SI476X_ERR_BAD_PROPERTY = 0x40, +}; + +static int si476x_core_parse_and_nag_about_error(struct si476x_core *core) +{ + int err; + char *cause; + u8 buffer[2]; + + if (core->revision != SI476X_REVISION_A10) { + err = si476x_core_i2c_xfer(core, SI476X_I2C_RECV, + buffer, sizeof(buffer)); + if (err == sizeof(buffer)) { + switch (buffer[1]) { + case SI476X_ERR_BAD_COMMAND: + cause = "Bad command"; + err = -EINVAL; + break; + case SI476X_ERR_BAD_ARG1: + cause = "Bad argument #1"; + err = -EINVAL; + break; + case SI476X_ERR_BAD_ARG2: + cause = "Bad argument #2"; + err = -EINVAL; + break; + case SI476X_ERR_BAD_ARG3: + cause = "Bad argument #3"; + err = -EINVAL; + break; + case SI476X_ERR_BAD_ARG4: + cause = "Bad argument #4"; + err = -EINVAL; + break; + case SI476X_ERR_BUSY: + cause = "Chip is busy"; + err = -EBUSY; + break; + case SI476X_ERR_BAD_INTERNAL_MEMORY: + cause = "Bad internal memory"; + err = -EIO; + break; + case SI476X_ERR_BAD_PATCH: + cause = "Bad patch"; + err = -EINVAL; + break; + case SI476X_ERR_BAD_BOOT_MODE: + cause = "Bad boot mode"; + err = -EINVAL; + break; + case SI476X_ERR_BAD_PROPERTY: + cause = "Bad property"; + err = -EINVAL; + break; + default: + cause = "Unknown"; + err = -EIO; + } + + dev_err(&core->client->dev, + "[Chip error status]: %s\n", cause); + } else { + dev_err(&core->client->dev, + "Failed to fetch error code\n"); + err = (err >= 0) ? -EIO : err; + } + } else { + err = -EIO; + } + + return err; +} + +/** + * si476x_core_send_command() - sends a command to si476x and waits its + * response + * @core: si476x_device structure for the device we are + * communicating with + * @command: command id + * @args: command arguments we are sending + * @argn: actual size of @args + * @response: buffer to place the expected response from the device + * @respn: actual size of @response + * @usecs: amount of time to wait before reading the response (in + * usecs) + * + * Function returns 0 on succsess and negative error code on + * failure + */ +static int si476x_core_send_command(struct si476x_core *core, + const u8 command, + const u8 args[], + const int argn, + u8 resp[], + const int respn, + const int usecs) +{ + struct i2c_client *client = core->client; + int err; + u8 data[CMD_MAX_ARGS_COUNT + 1]; + + if (argn > CMD_MAX_ARGS_COUNT) { + err = -ENOMEM; + goto exit; + } + + if (!client->adapter) { + err = -ENODEV; + goto exit; + } + + /* First send the command and its arguments */ + data[0] = command; + memcpy(&data[1], args, argn); + dev_dbg(&client->dev, "Command:\n %*ph\n", argn + 1, data); + + err = si476x_core_i2c_xfer(core, SI476X_I2C_SEND, + (char *) data, argn + 1); + if (err != argn + 1) { + dev_err(&core->client->dev, + "Error while sending command 0x%02x\n", + command); + err = (err >= 0) ? -EIO : err; + goto exit; + } + /* Set CTS to zero only after the command is send to avoid + * possible racing conditions when working in polling mode */ + atomic_set(&core->cts, 0); + + /* if (unlikely(command == CMD_POWER_DOWN) */ + if (!wait_event_timeout(core->command, + atomic_read(&core->cts), + usecs_to_jiffies(usecs) + 1)) + dev_warn(&core->client->dev, + "(%s) [CMD 0x%02x] Answer timeout.\n", + __func__, command); + + /* + When working in polling mode, for some reason the tuner will + report CTS bit as being set in the first status byte read, + but all the consequtive ones will return zeros until the + tuner is actually completed the POWER_UP command. To + workaround that we wait for second CTS to be reported + */ + if (unlikely(!core->client->irq && command == CMD_POWER_UP)) { + if (!wait_event_timeout(core->command, + atomic_read(&core->cts), + usecs_to_jiffies(usecs) + 1)) + dev_warn(&core->client->dev, + "(%s) Power up took too much time.\n", + __func__); + } + + /* Then get the response */ + err = si476x_core_i2c_xfer(core, SI476X_I2C_RECV, resp, respn); + if (err != respn) { + dev_err(&core->client->dev, + "Error while reading response for command 0x%02x\n", + command); + err = (err >= 0) ? -EIO : err; + goto exit; + } + dev_dbg(&client->dev, "Response:\n %*ph\n", respn, resp); + + err = 0; + + if (resp[0] & SI476X_ERR) { + dev_err(&core->client->dev, + "[CMD 0x%02x] Chip set error flag\n", command); + err = si476x_core_parse_and_nag_about_error(core); + goto exit; + } + + if (!(resp[0] & SI476X_CTS)) + err = -EBUSY; +exit: + return err; +} + +static int si476x_cmd_clear_stc(struct si476x_core *core) +{ + int err; + struct si476x_rsq_status_args args = { + .primary = false, + .rsqack = false, + .attune = false, + .cancel = false, + .stcack = true, + }; + + switch (core->power_up_parameters.func) { + case SI476X_FUNC_FM_RECEIVER: + err = si476x_core_cmd_fm_rsq_status(core, &args, NULL); + break; + case SI476X_FUNC_AM_RECEIVER: + err = si476x_core_cmd_am_rsq_status(core, &args, NULL); + break; + default: + err = -EINVAL; + } + + return err; +} + +static int si476x_cmd_tune_seek_freq(struct si476x_core *core, + uint8_t cmd, + const uint8_t args[], size_t argn, + uint8_t *resp, size_t respn) +{ + int err; + + + atomic_set(&core->stc, 0); + err = si476x_core_send_command(core, cmd, args, argn, resp, respn, + SI476X_TIMEOUT_TUNE); + if (!err) { + wait_event_killable(core->tuning, + atomic_read(&core->stc)); + si476x_cmd_clear_stc(core); + } + + return err; +} + +/** + * si476x_cmd_func_info() - send 'FUNC_INFO' command to the device + * @core: device to send the command to + * @info: struct si476x_func_info to fill all the information + * returned by the command + * + * The command requests the firmware and patch version for currently + * loaded firmware (dependent on the function of the device FM/AM/WB) + * + * Function returns 0 on succsess and negative error code on + * failure + */ +int si476x_core_cmd_func_info(struct si476x_core *core, + struct si476x_func_info *info) +{ + int err; + u8 resp[CMD_FUNC_INFO_NRESP]; + + err = si476x_core_send_command(core, CMD_FUNC_INFO, + NULL, 0, + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + + info->firmware.major = resp[1]; + info->firmware.minor[0] = resp[2]; + info->firmware.minor[1] = resp[3]; + + info->patch_id = ((u16) resp[4] << 8) | resp[5]; + info->func = resp[6]; + + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_func_info); + +/** + * si476x_cmd_set_property() - send 'SET_PROPERTY' command to the device + * @core: device to send the command to + * @property: property address + * @value: property value + * + * Function returns 0 on succsess and negative error code on + * failure + */ +int si476x_core_cmd_set_property(struct si476x_core *core, + u16 property, u16 value) +{ + u8 resp[CMD_SET_PROPERTY_NRESP]; + const u8 args[CMD_SET_PROPERTY_NARGS] = { + 0x00, + msb(property), + lsb(property), + msb(value), + lsb(value), + }; + + return si476x_core_send_command(core, CMD_SET_PROPERTY, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_set_property); + +/** + * si476x_cmd_get_property() - send 'GET_PROPERTY' command to the device + * @core: device to send the command to + * @property: property address + * + * Function return the value of property as u16 on success or a + * negative error on failure + */ +int si476x_core_cmd_get_property(struct si476x_core *core, u16 property) +{ + int err; + u8 resp[CMD_GET_PROPERTY_NRESP]; + const u8 args[CMD_GET_PROPERTY_NARGS] = { + 0x00, + msb(property), + lsb(property), + }; + + err = si476x_core_send_command(core, CMD_GET_PROPERTY, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + if (err < 0) + return err; + else + return be16_to_cpup((__be16 *)(resp + 2)); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_get_property); + +/** + * si476x_cmd_dig_audio_pin_cfg() - send 'DIG_AUDIO_PIN_CFG' command to + * the device + * @core: device to send the command to + * @dclk: DCLK pin function configuration: + * #SI476X_DCLK_NOOP - do not modify the behaviour + * #SI476X_DCLK_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * #SI476X_DCLK_DAUDIO - set the pin to be a part of digital + * audio interface + * @dfs: DFS pin function configuration: + * #SI476X_DFS_NOOP - do not modify the behaviour + * #SI476X_DFS_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_DFS_DAUDIO - set the pin to be a part of digital + * audio interface + * @dout - DOUT pin function configuration: + * SI476X_DOUT_NOOP - do not modify the behaviour + * SI476X_DOUT_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_DOUT_I2S_OUTPUT - set this pin to be digital out on I2S + * port 1 + * SI476X_DOUT_I2S_INPUT - set this pin to be digital in on I2S + * port 1 + * @xout - XOUT pin function configuration: + * SI476X_XOUT_NOOP - do not modify the behaviour + * SI476X_XOUT_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_XOUT_I2S_INPUT - set this pin to be digital in on I2S + * port 1 + * SI476X_XOUT_MODE_SELECT - set this pin to be the input that + * selects the mode of the I2S audio + * combiner (analog or HD) + * [SI4761/63/65/67 Only] + * + * Function returns 0 on success and negative error code on failure + */ +int si476x_core_cmd_dig_audio_pin_cfg(struct si476x_core *core, + enum si476x_dclk_config dclk, + enum si476x_dfs_config dfs, + enum si476x_dout_config dout, + enum si476x_xout_config xout) +{ + u8 resp[CMD_DIG_AUDIO_PIN_CFG_NRESP]; + const u8 args[CMD_DIG_AUDIO_PIN_CFG_NARGS] = { + PIN_CFG_BYTE(dclk), + PIN_CFG_BYTE(dfs), + PIN_CFG_BYTE(dout), + PIN_CFG_BYTE(xout), + }; + + return si476x_core_send_command(core, CMD_DIG_AUDIO_PIN_CFG, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_dig_audio_pin_cfg); + +/** + * si476x_cmd_zif_pin_cfg - send 'ZIF_PIN_CFG_COMMAND' + * @core - device to send the command to + * @iqclk - IQCL pin function configuration: + * SI476X_IQCLK_NOOP - do not modify the behaviour + * SI476X_IQCLK_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_IQCLK_IQ - set pin to be a part of I/Q interace + * in master mode + * @iqfs - IQFS pin function configuration: + * SI476X_IQFS_NOOP - do not modify the behaviour + * SI476X_IQFS_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_IQFS_IQ - set pin to be a part of I/Q interace + * in master mode + * @iout - IOUT pin function configuration: + * SI476X_IOUT_NOOP - do not modify the behaviour + * SI476X_IOUT_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_IOUT_OUTPUT - set pin to be I out + * @qout - QOUT pin function configuration: + * SI476X_QOUT_NOOP - do not modify the behaviour + * SI476X_QOUT_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_QOUT_OUTPUT - set pin to be Q out + * + * Function returns 0 on success and negative error code on failure + */ +int si476x_core_cmd_zif_pin_cfg(struct si476x_core *core, + enum si476x_iqclk_config iqclk, + enum si476x_iqfs_config iqfs, + enum si476x_iout_config iout, + enum si476x_qout_config qout) +{ + u8 resp[CMD_ZIF_PIN_CFG_NRESP]; + const u8 args[CMD_ZIF_PIN_CFG_NARGS] = { + PIN_CFG_BYTE(iqclk), + PIN_CFG_BYTE(iqfs), + PIN_CFG_BYTE(iout), + PIN_CFG_BYTE(qout), + }; + + return si476x_core_send_command(core, CMD_ZIF_PIN_CFG, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_zif_pin_cfg); + +/** + * si476x_cmd_ic_link_gpo_ctl_pin_cfg - send + * 'IC_LINK_GPIO_CTL_PIN_CFG' comand to the device + * @core - device to send the command to + * @icin - ICIN pin function configuration: + * SI476X_ICIN_NOOP - do not modify the behaviour + * SI476X_ICIN_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_ICIN_GPO1_HIGH - set pin to be an output, drive it high + * SI476X_ICIN_GPO1_LOW - set pin to be an output, drive it low + * SI476X_ICIN_IC_LINK - set the pin to be a part of Inter-Chip link + * @icip - ICIP pin function configuration: + * SI476X_ICIP_NOOP - do not modify the behaviour + * SI476X_ICIP_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_ICIP_GPO1_HIGH - set pin to be an output, drive it high + * SI476X_ICIP_GPO1_LOW - set pin to be an output, drive it low + * SI476X_ICIP_IC_LINK - set the pin to be a part of Inter-Chip link + * @icon - ICON pin function configuration: + * SI476X_ICON_NOOP - do not modify the behaviour + * SI476X_ICON_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_ICON_I2S - set the pin to be a part of audio + * interface in slave mode (DCLK) + * SI476X_ICON_IC_LINK - set the pin to be a part of Inter-Chip link + * @icop - ICOP pin function configuration: + * SI476X_ICOP_NOOP - do not modify the behaviour + * SI476X_ICOP_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_ICOP_I2S - set the pin to be a part of audio + * interface in slave mode (DOUT) + * [Si4761/63/65/67 Only] + * SI476X_ICOP_IC_LINK - set the pin to be a part of Inter-Chip link + * + * Function returns 0 on success and negative error code on failure + */ +int si476x_core_cmd_ic_link_gpo_ctl_pin_cfg(struct si476x_core *core, + enum si476x_icin_config icin, + enum si476x_icip_config icip, + enum si476x_icon_config icon, + enum si476x_icop_config icop) +{ + u8 resp[CMD_IC_LINK_GPO_CTL_PIN_CFG_NRESP]; + const u8 args[CMD_IC_LINK_GPO_CTL_PIN_CFG_NARGS] = { + PIN_CFG_BYTE(icin), + PIN_CFG_BYTE(icip), + PIN_CFG_BYTE(icon), + PIN_CFG_BYTE(icop), + }; + + return si476x_core_send_command(core, CMD_IC_LINK_GPO_CTL_PIN_CFG, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_ic_link_gpo_ctl_pin_cfg); + +/** + * si476x_cmd_ana_audio_pin_cfg - send 'ANA_AUDIO_PIN_CFG' to the + * device + * @core - device to send the command to + * @lrout - LROUT pin function configuration: + * SI476X_LROUT_NOOP - do not modify the behaviour + * SI476X_LROUT_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_LROUT_AUDIO - set pin to be audio output + * SI476X_LROUT_MPX - set pin to be MPX output + * + * Function returns 0 on success and negative error code on failure + */ +int si476x_core_cmd_ana_audio_pin_cfg(struct si476x_core *core, + enum si476x_lrout_config lrout) +{ + u8 resp[CMD_ANA_AUDIO_PIN_CFG_NRESP]; + const u8 args[CMD_ANA_AUDIO_PIN_CFG_NARGS] = { + PIN_CFG_BYTE(lrout), + }; + + return si476x_core_send_command(core, CMD_ANA_AUDIO_PIN_CFG, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_ana_audio_pin_cfg); + + +/** + * si476x_cmd_intb_pin_cfg - send 'INTB_PIN_CFG' command to the device + * @core - device to send the command to + * @intb - INTB pin function configuration: + * SI476X_INTB_NOOP - do not modify the behaviour + * SI476X_INTB_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_INTB_DAUDIO - set pin to be a part of digital + * audio interface in slave mode + * SI476X_INTB_IRQ - set pin to be an interrupt request line + * @a1 - A1 pin function configuration: + * SI476X_A1_NOOP - do not modify the behaviour + * SI476X_A1_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_A1_IRQ - set pin to be an interrupt request line + * + * Function returns 0 on success and negative error code on failure + */ +static int si476x_core_cmd_intb_pin_cfg_a10(struct si476x_core *core, + enum si476x_intb_config intb, + enum si476x_a1_config a1) +{ + u8 resp[CMD_INTB_PIN_CFG_A10_NRESP]; + const u8 args[CMD_INTB_PIN_CFG_NARGS] = { + PIN_CFG_BYTE(intb), + PIN_CFG_BYTE(a1), + }; + + return si476x_core_send_command(core, CMD_INTB_PIN_CFG, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} + +static int si476x_core_cmd_intb_pin_cfg_a20(struct si476x_core *core, + enum si476x_intb_config intb, + enum si476x_a1_config a1) +{ + u8 resp[CMD_INTB_PIN_CFG_A20_NRESP]; + const u8 args[CMD_INTB_PIN_CFG_NARGS] = { + PIN_CFG_BYTE(intb), + PIN_CFG_BYTE(a1), + }; + + return si476x_core_send_command(core, CMD_INTB_PIN_CFG, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} + + + +/** + * si476x_cmd_am_rsq_status - send 'AM_RSQ_STATUS' command to the + * device + * @core - device to send the command to + * @rsqack - if set command clears RSQINT, SNRINT, SNRLINT, RSSIHINT, + * RSSSILINT, BLENDINT, MULTHINT and MULTLINT + * @attune - when set the values in the status report are the values + * that were calculated at tune + * @cancel - abort ongoing seek/tune opertation + * @stcack - clear the STCINT bin in status register + * @report - all signal quality information retured by the command + * (if NULL then the output of the command is ignored) + * + * Function returns 0 on success and negative error code on failure + */ +int si476x_core_cmd_am_rsq_status(struct si476x_core *core, + struct si476x_rsq_status_args *rsqargs, + struct si476x_rsq_status_report *report) +{ + int err; + u8 resp[CMD_AM_RSQ_STATUS_NRESP]; + const u8 args[CMD_AM_RSQ_STATUS_NARGS] = { + rsqargs->rsqack << 3 | rsqargs->attune << 2 | + rsqargs->cancel << 1 | rsqargs->stcack, + }; + + err = si476x_core_send_command(core, CMD_AM_RSQ_STATUS, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + /* + * Besides getting received signal quality information this + * command can be used to just acknowledge different interrupt + * flags in those cases it is useless to copy and parse + * received data so user can pass NULL, and thus avoid + * unnecessary copying. + */ + if (!report) + return err; + + report->snrhint = 0b00001000 & resp[1]; + report->snrlint = 0b00000100 & resp[1]; + report->rssihint = 0b00000010 & resp[1]; + report->rssilint = 0b00000001 & resp[1]; + + report->bltf = 0b10000000 & resp[2]; + report->snr_ready = 0b00100000 & resp[2]; + report->rssiready = 0b00001000 & resp[2]; + report->afcrl = 0b00000010 & resp[2]; + report->valid = 0b00000001 & resp[2]; + + report->readfreq = be16_to_cpup((__be16 *)(resp + 3)); + report->freqoff = resp[5]; + report->rssi = resp[6]; + report->snr = resp[7]; + report->lassi = resp[9]; + report->hassi = resp[10]; + report->mult = resp[11]; + report->dev = resp[12]; + + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_am_rsq_status); + +int si476x_core_cmd_fm_acf_status(struct si476x_core *core, + struct si476x_acf_status_report *report) +{ + int err; + u8 resp[CMD_FM_ACF_STATUS_NRESP]; + const u8 args[CMD_FM_ACF_STATUS_NARGS] = { + 0x0, + }; + + if (!report) + return -EINVAL; + + err = si476x_core_send_command(core, CMD_FM_ACF_STATUS, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + if (err < 0) + return err; + + report->blend_int = resp[1] & SI476X_ACF_BLEND_INT; + report->hblend_int = resp[1] & SI476X_ACF_HIBLEND_INT; + report->hicut_int = resp[1] & SI476X_ACF_HICUT_INT; + report->chbw_int = resp[1] & SI476X_ACF_CHBW_INT; + report->softmute_int = resp[1] & SI476X_ACF_SOFTMUTE_INT; + report->smute = resp[2] & SI476X_ACF_SMUTE; + report->smattn = resp[3] & SI476X_ACF_SMATTN; + report->chbw = resp[4]; + report->hicut = resp[5]; + report->hiblend = resp[6]; + report->pilot = resp[7] & SI476X_ACF_PILOT; + report->stblend = resp[7] & SI476X_ACF_STBLEND; + + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_acf_status); + +int si476x_core_cmd_am_acf_status(struct si476x_core *core, + struct si476x_acf_status_report *report) +{ + int err; + u8 resp[CMD_AM_ACF_STATUS_NRESP]; + const u8 args[CMD_AM_ACF_STATUS_NARGS] = { + 0x0, + }; + + if (!report) + return -EINVAL; + + err = si476x_core_send_command(core, CMD_AM_ACF_STATUS, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + if (err < 0) + return err; + + report->blend_int = resp[1] & SI476X_ACF_BLEND_INT; + report->hblend_int = resp[1] & SI476X_ACF_HIBLEND_INT; + report->hicut_int = resp[1] & SI476X_ACF_HICUT_INT; + report->chbw_int = resp[1] & SI476X_ACF_CHBW_INT; + report->softmute_int = resp[1] & SI476X_ACF_SOFTMUTE_INT; + report->smute = resp[2] & SI476X_ACF_SMUTE; + report->smattn = resp[3] & SI476X_ACF_SMATTN; + report->chbw = resp[4]; + report->hicut = resp[5]; + + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_am_acf_status); + + +/** + * si476x_cmd_fm_seek_start - send 'FM_SEEK_START' command to the + * device + * @core - device to send the command to + * @seekup - if set the direction of the search is 'up' + * @wrap - if set seek wraps when hitting band limit + * + * This function begins search for a valid station. The station is + * considered valid when 'FM_VALID_SNR_THRESHOLD' and + * 'FM_VALID_RSSI_THRESHOLD' and 'FM_VALID_MAX_TUNE_ERROR' criteria + * are met. +} * + * Function returns 0 on success and negative error code on failure + */ +int si476x_core_cmd_fm_seek_start(struct si476x_core *core, + bool seekup, bool wrap) +{ + u8 resp[CMD_FM_SEEK_START_NRESP]; + const u8 args[CMD_FM_SEEK_START_NARGS] = { + seekup << 3 | wrap << 2, + }; + + return si476x_cmd_tune_seek_freq(core, CMD_FM_SEEK_START, + args, sizeof(args), + resp, sizeof(resp)); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_seek_start); + +/** + * si476x_cmd_fm_rds_status - send 'FM_RDS_STATUS' command to the + * device + * @core - device to send the command to + * @status_only - if set the data is not removed from RDSFIFO, + * RDSFIFOUSED is not decremented and data in all the + * rest RDS data contains the last valid info received + * @mtfifo if set the command clears RDS receive FIFO + * @intack if set the command clards the RDSINT bit. + * + * Function returns 0 on success and negative error code on failure + */ +int si476x_core_cmd_fm_rds_status(struct si476x_core *core, + bool status_only, + bool mtfifo, + bool intack, + struct si476x_rds_status_report *report) +{ + int err; + u8 resp[CMD_FM_RDS_STATUS_NRESP]; + const u8 args[CMD_FM_RDS_STATUS_NARGS] = { + status_only << 2 | mtfifo << 1 | intack, + }; + + err = si476x_core_send_command(core, CMD_FM_RDS_STATUS, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + /* + * Besides getting RDS status information this command can be + * used to just acknowledge different interrupt flags in those + * cases it is useless to copy and parse received data so user + * can pass NULL, and thus avoid unnecessary copying. + */ + if (err < 0 || report == NULL) + return err; + + report->rdstpptyint = 0b00010000 & resp[1]; + report->rdspiint = 0b00001000 & resp[1]; + report->rdssyncint = 0b00000010 & resp[1]; + report->rdsfifoint = 0b00000001 & resp[1]; + + report->tpptyvalid = 0b00010000 & resp[2]; + report->pivalid = 0b00001000 & resp[2]; + report->rdssync = 0b00000010 & resp[2]; + report->rdsfifolost = 0b00000001 & resp[2]; + + report->tp = 0b00100000 & resp[3]; + report->pty = 0b00011111 & resp[3]; + + report->pi = be16_to_cpup((__be16 *)(resp + 4)); + report->rdsfifoused = resp[6]; + + report->ble[V4L2_RDS_BLOCK_A] = 0b11000000 & resp[7]; + report->ble[V4L2_RDS_BLOCK_B] = 0b00110000 & resp[7]; + report->ble[V4L2_RDS_BLOCK_C] = 0b00001100 & resp[7]; + report->ble[V4L2_RDS_BLOCK_D] = 0b00000011 & resp[7]; + + report->rds[V4L2_RDS_BLOCK_A].block = V4L2_RDS_BLOCK_A; + report->rds[V4L2_RDS_BLOCK_A].msb = resp[8]; + report->rds[V4L2_RDS_BLOCK_A].lsb = resp[9]; + + report->rds[V4L2_RDS_BLOCK_B].block = V4L2_RDS_BLOCK_B; + report->rds[V4L2_RDS_BLOCK_B].msb = resp[10]; + report->rds[V4L2_RDS_BLOCK_B].lsb = resp[11]; + + report->rds[V4L2_RDS_BLOCK_C].block = V4L2_RDS_BLOCK_C; + report->rds[V4L2_RDS_BLOCK_C].msb = resp[12]; + report->rds[V4L2_RDS_BLOCK_C].lsb = resp[13]; + + report->rds[V4L2_RDS_BLOCK_D].block = V4L2_RDS_BLOCK_D; + report->rds[V4L2_RDS_BLOCK_D].msb = resp[14]; + report->rds[V4L2_RDS_BLOCK_D].lsb = resp[15]; + + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_rds_status); + +int si476x_core_cmd_fm_rds_blockcount(struct si476x_core *core, + bool clear, + struct si476x_rds_blockcount_report *report) +{ + int err; + u8 resp[CMD_FM_RDS_BLOCKCOUNT_NRESP]; + const u8 args[CMD_FM_RDS_BLOCKCOUNT_NARGS] = { + clear, + }; + + if (!report) + return -EINVAL; + + err = si476x_core_send_command(core, CMD_FM_RDS_BLOCKCOUNT, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + + if (!err) { + report->expected = be16_to_cpup((__be16 *)(resp + 2)); + report->received = be16_to_cpup((__be16 *)(resp + 4)); + report->uncorrectable = be16_to_cpup((__be16 *)(resp + 6)); + } + + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_rds_blockcount); + +int si476x_core_cmd_fm_phase_diversity(struct si476x_core *core, + enum si476x_phase_diversity_mode mode) +{ + u8 resp[CMD_FM_PHASE_DIVERSITY_NRESP]; + const u8 args[CMD_FM_PHASE_DIVERSITY_NARGS] = { + mode & 0b111, + }; + + return si476x_core_send_command(core, CMD_FM_PHASE_DIVERSITY, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_phase_diversity); +/** + * si476x_core_cmd_fm_phase_div_status() - get the phase diversity + * status + * + * @core: si476x device + * + * NOTE caller must hold core lock + * + * Function returns the value of the status bit in case of success and + * negative error code in case of failre. + */ +int si476x_core_cmd_fm_phase_div_status(struct si476x_core *core) +{ + int err; + u8 resp[CMD_FM_PHASE_DIV_STATUS_NRESP]; + + err = si476x_core_send_command(core, CMD_FM_PHASE_DIV_STATUS, + NULL, 0, + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + + return (err < 0) ? err : resp[1]; +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_phase_div_status); + + +/** + * si476x_cmd_am_seek_start - send 'FM_SEEK_START' command to the + * device + * @core - device to send the command to + * @seekup - if set the direction of the search is 'up' + * @wrap - if set seek wraps when hitting band limit + * + * This function begins search for a valid station. The station is + * considered valid when 'FM_VALID_SNR_THRESHOLD' and + * 'FM_VALID_RSSI_THRESHOLD' and 'FM_VALID_MAX_TUNE_ERROR' criteria + * are met. + * + * Function returns 0 on success and negative error code on failure + */ +int si476x_core_cmd_am_seek_start(struct si476x_core *core, + bool seekup, bool wrap) +{ + u8 resp[CMD_AM_SEEK_START_NRESP]; + const u8 args[CMD_AM_SEEK_START_NARGS] = { + seekup << 3 | wrap << 2, + }; + + return si476x_cmd_tune_seek_freq(core, CMD_AM_SEEK_START, + args, sizeof(args), + resp, sizeof(resp)); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_am_seek_start); + + + +static int si476x_core_cmd_power_up_a10(struct si476x_core *core, + struct si476x_power_up_args *puargs) +{ + u8 resp[CMD_POWER_UP_A10_NRESP]; + const bool intsel = (core->pinmux.a1 == SI476X_A1_IRQ); + const bool ctsen = (core->client->irq != 0); + const u8 args[CMD_POWER_UP_A10_NARGS] = { + 0xF7, /* Reserved, always 0xF7 */ + 0x3F & puargs->xcload, /* First two bits are reserved to be + * zeros */ + ctsen << 7 | intsel << 6 | 0x07, /* Last five bits + * are reserved to + * be written as 0x7 */ + puargs->func << 4 | puargs->freq, + 0x11, /* Reserved, always 0x11 */ + }; + + return si476x_core_send_command(core, CMD_POWER_UP, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_TIMEOUT_POWER_UP); +} + +static int si476x_core_cmd_power_up_a20(struct si476x_core *core, + struct si476x_power_up_args *puargs) +{ + u8 resp[CMD_POWER_UP_A20_NRESP]; + const bool intsel = (core->pinmux.a1 == SI476X_A1_IRQ); + const bool ctsen = (core->client->irq != 0); + const u8 args[CMD_POWER_UP_A20_NARGS] = { + puargs->ibias6x << 7 | puargs->xstart, + 0x3F & puargs->xcload, /* First two bits are reserved to be + * zeros */ + ctsen << 7 | intsel << 6 | puargs->fastboot << 5 | + puargs->xbiashc << 3 | puargs->xbias, + puargs->func << 4 | puargs->freq, + 0x10 | puargs->xmode, + }; + + return si476x_core_send_command(core, CMD_POWER_UP, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_TIMEOUT_POWER_UP); +} + +static int si476x_core_cmd_power_down_a10(struct si476x_core *core, + struct si476x_power_down_args *pdargs) +{ + u8 resp[CMD_POWER_DOWN_A10_NRESP]; + + return si476x_core_send_command(core, CMD_POWER_DOWN, + NULL, 0, + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} + +static int si476x_core_cmd_power_down_a20(struct si476x_core *core, + struct si476x_power_down_args *pdargs) +{ + u8 resp[CMD_POWER_DOWN_A20_NRESP]; + const u8 args[CMD_POWER_DOWN_A20_NARGS] = { + pdargs->xosc, + }; + return si476x_core_send_command(core, CMD_POWER_DOWN, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} + +static int si476x_core_cmd_am_tune_freq_a10(struct si476x_core *core, + struct si476x_tune_freq_args *tuneargs) +{ + + const int am_freq = tuneargs->freq; + u8 resp[CMD_AM_TUNE_FREQ_NRESP]; + const u8 args[CMD_AM_TUNE_FREQ_NARGS] = { + (tuneargs->hd << 6), + msb(am_freq), + lsb(am_freq), + }; + + return si476x_cmd_tune_seek_freq(core, CMD_AM_TUNE_FREQ, args, + sizeof(args), + resp, sizeof(resp)); +} + +static int si476x_core_cmd_am_tune_freq_a20(struct si476x_core *core, + struct si476x_tune_freq_args *tuneargs) +{ + const int am_freq = tuneargs->freq; + u8 resp[CMD_AM_TUNE_FREQ_NRESP]; + const u8 args[CMD_AM_TUNE_FREQ_NARGS] = { + (tuneargs->zifsr << 6) | (tuneargs->injside & 0b11), + msb(am_freq), + lsb(am_freq), + }; + + return si476x_cmd_tune_seek_freq(core, CMD_AM_TUNE_FREQ, + args, sizeof(args), + resp, sizeof(resp)); +} + +static int si476x_core_cmd_fm_rsq_status_a10(struct si476x_core *core, + struct si476x_rsq_status_args *rsqargs, + struct si476x_rsq_status_report *report) +{ + int err; + u8 resp[CMD_FM_RSQ_STATUS_A10_NRESP]; + const u8 args[CMD_FM_RSQ_STATUS_A10_NARGS] = { + rsqargs->rsqack << 3 | rsqargs->attune << 2 | + rsqargs->cancel << 1 | rsqargs->stcack, + }; + + err = si476x_core_send_command(core, CMD_FM_RSQ_STATUS, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + /* + * Besides getting received signal quality information this + * command can be used to just acknowledge different interrupt + * flags in those cases it is useless to copy and parse + * received data so user can pass NULL, and thus avoid + * unnecessary copying. + */ + if (err < 0 || report == NULL) + return err; + + report->multhint = 0b10000000 & resp[1]; + report->multlint = 0b01000000 & resp[1]; + report->snrhint = 0b00001000 & resp[1]; + report->snrlint = 0b00000100 & resp[1]; + report->rssihint = 0b00000010 & resp[1]; + report->rssilint = 0b00000001 & resp[1]; + + report->bltf = 0b10000000 & resp[2]; + report->snr_ready = 0b00100000 & resp[2]; + report->rssiready = 0b00001000 & resp[2]; + report->afcrl = 0b00000010 & resp[2]; + report->valid = 0b00000001 & resp[2]; + + report->readfreq = be16_to_cpup((__be16 *)(resp + 3)); + report->freqoff = resp[5]; + report->rssi = resp[6]; + report->snr = resp[7]; + report->lassi = resp[9]; + report->hassi = resp[10]; + report->mult = resp[11]; + report->dev = resp[12]; + report->readantcap = be16_to_cpup((__be16 *)(resp + 13)); + report->assi = resp[15]; + report->usn = resp[16]; + + return err; +} + +static int si476x_core_cmd_fm_rsq_status_a20(struct si476x_core *core, + struct si476x_rsq_status_args *rsqargs, + struct si476x_rsq_status_report *report) +{ + int err; + u8 resp[CMD_FM_RSQ_STATUS_A10_NRESP]; + const u8 args[CMD_FM_RSQ_STATUS_A30_NARGS] = { + rsqargs->primary << 4 | rsqargs->rsqack << 3 | + rsqargs->attune << 2 | rsqargs->cancel << 1 | + rsqargs->stcack, + }; + + err = si476x_core_send_command(core, CMD_FM_RSQ_STATUS, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + /* + * Besides getting received signal quality information this + * command can be used to just acknowledge different interrupt + * flags in those cases it is useless to copy and parse + * received data so user can pass NULL, and thus avoid + * unnecessary copying. + */ + if (err < 0 || report == NULL) + return err; + + report->multhint = 0b10000000 & resp[1]; + report->multlint = 0b01000000 & resp[1]; + report->snrhint = 0b00001000 & resp[1]; + report->snrlint = 0b00000100 & resp[1]; + report->rssihint = 0b00000010 & resp[1]; + report->rssilint = 0b00000001 & resp[1]; + + report->bltf = 0b10000000 & resp[2]; + report->snr_ready = 0b00100000 & resp[2]; + report->rssiready = 0b00001000 & resp[2]; + report->afcrl = 0b00000010 & resp[2]; + report->valid = 0b00000001 & resp[2]; + + report->readfreq = be16_to_cpup((__be16 *)(resp + 3)); + report->freqoff = resp[5]; + report->rssi = resp[6]; + report->snr = resp[7]; + report->lassi = resp[9]; + report->hassi = resp[10]; + report->mult = resp[11]; + report->dev = resp[12]; + report->readantcap = be16_to_cpup((__be16 *)(resp + 13)); + report->assi = resp[15]; + report->usn = resp[16]; + + return err; +} + + +static int si476x_core_cmd_fm_rsq_status_a30(struct si476x_core *core, + struct si476x_rsq_status_args *rsqargs, + struct si476x_rsq_status_report *report) +{ + int err; + u8 resp[CMD_FM_RSQ_STATUS_A30_NRESP]; + const u8 args[CMD_FM_RSQ_STATUS_A30_NARGS] = { + rsqargs->primary << 4 | rsqargs->rsqack << 3 | + rsqargs->attune << 2 | rsqargs->cancel << 1 | + rsqargs->stcack, + }; + + err = si476x_core_send_command(core, CMD_FM_RSQ_STATUS, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + /* + * Besides getting received signal quality information this + * command can be used to just acknowledge different interrupt + * flags in those cases it is useless to copy and parse + * received data so user can pass NULL, and thus avoid + * unnecessary copying. + */ + if (err < 0 || report == NULL) + return err; + + report->multhint = 0b10000000 & resp[1]; + report->multlint = 0b01000000 & resp[1]; + report->snrhint = 0b00001000 & resp[1]; + report->snrlint = 0b00000100 & resp[1]; + report->rssihint = 0b00000010 & resp[1]; + report->rssilint = 0b00000001 & resp[1]; + + report->bltf = 0b10000000 & resp[2]; + report->snr_ready = 0b00100000 & resp[2]; + report->rssiready = 0b00001000 & resp[2]; + report->injside = 0b00000100 & resp[2]; + report->afcrl = 0b00000010 & resp[2]; + report->valid = 0b00000001 & resp[2]; + + report->readfreq = be16_to_cpup((__be16 *)(resp + 3)); + report->freqoff = resp[5]; + report->rssi = resp[6]; + report->snr = resp[7]; + report->issi = resp[8]; + report->lassi = resp[9]; + report->hassi = resp[10]; + report->mult = resp[11]; + report->dev = resp[12]; + report->readantcap = be16_to_cpup((__be16 *)(resp + 13)); + report->assi = resp[15]; + report->usn = resp[16]; + + report->pilotdev = resp[17]; + report->rdsdev = resp[18]; + report->assidev = resp[19]; + report->strongdev = resp[20]; + report->rdspi = be16_to_cpup((__be16 *)(resp + 21)); + + return err; +} + +static int si476x_core_cmd_fm_tune_freq_a10(struct si476x_core *core, + struct si476x_tune_freq_args *tuneargs) +{ + u8 resp[CMD_FM_TUNE_FREQ_NRESP]; + const u8 args[CMD_FM_TUNE_FREQ_A10_NARGS] = { + (tuneargs->hd << 6) | (tuneargs->tunemode << 4) + | (tuneargs->smoothmetrics << 2), + msb(tuneargs->freq), + lsb(tuneargs->freq), + msb(tuneargs->antcap), + lsb(tuneargs->antcap) + }; + + return si476x_cmd_tune_seek_freq(core, CMD_FM_TUNE_FREQ, + args, sizeof(args), + resp, sizeof(resp)); +} + +static int si476x_core_cmd_fm_tune_freq_a20(struct si476x_core *core, + struct si476x_tune_freq_args *tuneargs) +{ + u8 resp[CMD_FM_TUNE_FREQ_NRESP]; + const u8 args[CMD_FM_TUNE_FREQ_A20_NARGS] = { + (tuneargs->hd << 6) | (tuneargs->tunemode << 4) + | (tuneargs->smoothmetrics << 2) | (tuneargs->injside), + msb(tuneargs->freq), + lsb(tuneargs->freq), + }; + + return si476x_cmd_tune_seek_freq(core, CMD_FM_TUNE_FREQ, + args, sizeof(args), + resp, sizeof(resp)); +} + +static int si476x_core_cmd_agc_status_a20(struct si476x_core *core, + struct si476x_agc_status_report *report) +{ + int err; + u8 resp[CMD_AGC_STATUS_NRESP_A20]; + + if (!report) + return -EINVAL; + + err = si476x_core_send_command(core, CMD_AGC_STATUS, + NULL, 0, + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + if (err < 0) + return err; + + report->mxhi = resp[1] & SI476X_AGC_MXHI; + report->mxlo = resp[1] & SI476X_AGC_MXLO; + report->lnahi = resp[1] & SI476X_AGC_LNAHI; + report->lnalo = resp[1] & SI476X_AGC_LNALO; + report->fmagc1 = resp[2]; + report->fmagc2 = resp[3]; + report->pgagain = resp[4]; + report->fmwblang = resp[5]; + + return err; +} + +static int si476x_core_cmd_agc_status_a10(struct si476x_core *core, + struct si476x_agc_status_report *report) +{ + int err; + u8 resp[CMD_AGC_STATUS_NRESP_A10]; + + if (!report) + return -EINVAL; + + err = si476x_core_send_command(core, CMD_AGC_STATUS, + NULL, 0, + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + if (err < 0) + return err; + + report->mxhi = resp[1] & SI476X_AGC_MXHI; + report->mxlo = resp[1] & SI476X_AGC_MXLO; + report->lnahi = resp[1] & SI476X_AGC_LNAHI; + report->lnalo = resp[1] & SI476X_AGC_LNALO; + + return err; +} + +typedef int (*tune_freq_func_t) (struct si476x_core *core, + struct si476x_tune_freq_args *tuneargs); + +static struct { + int (*power_up) (struct si476x_core *, + struct si476x_power_up_args *); + int (*power_down) (struct si476x_core *, + struct si476x_power_down_args *); + + tune_freq_func_t fm_tune_freq; + tune_freq_func_t am_tune_freq; + + int (*fm_rsq_status)(struct si476x_core *, + struct si476x_rsq_status_args *, + struct si476x_rsq_status_report *); + + int (*agc_status)(struct si476x_core *, + struct si476x_agc_status_report *); + int (*intb_pin_cfg)(struct si476x_core *core, + enum si476x_intb_config intb, + enum si476x_a1_config a1); +} si476x_cmds_vtable[] = { + [SI476X_REVISION_A10] = { + .power_up = si476x_core_cmd_power_up_a10, + .power_down = si476x_core_cmd_power_down_a10, + .fm_tune_freq = si476x_core_cmd_fm_tune_freq_a10, + .am_tune_freq = si476x_core_cmd_am_tune_freq_a10, + .fm_rsq_status = si476x_core_cmd_fm_rsq_status_a10, + .agc_status = si476x_core_cmd_agc_status_a10, + .intb_pin_cfg = si476x_core_cmd_intb_pin_cfg_a10, + }, + [SI476X_REVISION_A20] = { + .power_up = si476x_core_cmd_power_up_a20, + .power_down = si476x_core_cmd_power_down_a20, + .fm_tune_freq = si476x_core_cmd_fm_tune_freq_a20, + .am_tune_freq = si476x_core_cmd_am_tune_freq_a20, + .fm_rsq_status = si476x_core_cmd_fm_rsq_status_a20, + .agc_status = si476x_core_cmd_agc_status_a20, + .intb_pin_cfg = si476x_core_cmd_intb_pin_cfg_a20, + }, + [SI476X_REVISION_A30] = { + .power_up = si476x_core_cmd_power_up_a20, + .power_down = si476x_core_cmd_power_down_a20, + .fm_tune_freq = si476x_core_cmd_fm_tune_freq_a20, + .am_tune_freq = si476x_core_cmd_am_tune_freq_a20, + .fm_rsq_status = si476x_core_cmd_fm_rsq_status_a30, + .agc_status = si476x_core_cmd_agc_status_a20, + .intb_pin_cfg = si476x_core_cmd_intb_pin_cfg_a20, + }, +}; + +int si476x_core_cmd_power_up(struct si476x_core *core, + struct si476x_power_up_args *args) +{ + BUG_ON(core->revision > SI476X_REVISION_A30 || + core->revision == -1); + return si476x_cmds_vtable[core->revision].power_up(core, args); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_power_up); + +int si476x_core_cmd_power_down(struct si476x_core *core, + struct si476x_power_down_args *args) +{ + BUG_ON(core->revision > SI476X_REVISION_A30 || + core->revision == -1); + return si476x_cmds_vtable[core->revision].power_down(core, args); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_power_down); + +int si476x_core_cmd_fm_tune_freq(struct si476x_core *core, + struct si476x_tune_freq_args *args) +{ + BUG_ON(core->revision > SI476X_REVISION_A30 || + core->revision == -1); + return si476x_cmds_vtable[core->revision].fm_tune_freq(core, args); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_tune_freq); + +int si476x_core_cmd_am_tune_freq(struct si476x_core *core, + struct si476x_tune_freq_args *args) +{ + BUG_ON(core->revision > SI476X_REVISION_A30 || + core->revision == -1); + return si476x_cmds_vtable[core->revision].am_tune_freq(core, args); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_am_tune_freq); + +int si476x_core_cmd_fm_rsq_status(struct si476x_core *core, + struct si476x_rsq_status_args *args, + struct si476x_rsq_status_report *report) + +{ + BUG_ON(core->revision > SI476X_REVISION_A30 || + core->revision == -1); + return si476x_cmds_vtable[core->revision].fm_rsq_status(core, args, + report); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_rsq_status); + +int si476x_core_cmd_agc_status(struct si476x_core *core, + struct si476x_agc_status_report *report) + +{ + BUG_ON(core->revision > SI476X_REVISION_A30 || + core->revision == -1); + return si476x_cmds_vtable[core->revision].agc_status(core, report); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_agc_status); + +int si476x_core_cmd_intb_pin_cfg(struct si476x_core *core, + enum si476x_intb_config intb, + enum si476x_a1_config a1) +{ + BUG_ON(core->revision > SI476X_REVISION_A30 || + core->revision == -1); + + return si476x_cmds_vtable[core->revision].intb_pin_cfg(core, intb, a1); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_intb_pin_cfg); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Andrey Smirnov <andrew.smirnov@gmail.com>"); +MODULE_DESCRIPTION("API for command exchange for si476x"); diff --git a/drivers/mfd/si476x-i2c.c b/drivers/mfd/si476x-i2c.c new file mode 100644 index 00000000000..f5bc8e4bd4b --- /dev/null +++ b/drivers/mfd/si476x-i2c.c @@ -0,0 +1,886 @@ +/* + * drivers/mfd/si476x-i2c.c -- Core device driver for si476x MFD + * device + * + * Copyright (C) 2012 Innovative Converged Devices(ICD) + * Copyright (C) 2013 Andrey Smirnov + * + * Author: Andrey Smirnov <andrew.smirnov@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ +#include <linux/module.h> + +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/gpio.h> +#include <linux/regulator/consumer.h> +#include <linux/i2c.h> +#include <linux/err.h> + +#include <linux/mfd/si476x-core.h> + +#define SI476X_MAX_IO_ERRORS 10 +#define SI476X_DRIVER_RDS_FIFO_DEPTH 128 + +/** + * si476x_core_config_pinmux() - pin function configuration function + * + * @core: Core device structure + * + * Configure the functions of the pins of the radio chip. + * + * The function returns zero in case of succes or negative error code + * otherwise. + */ +static int si476x_core_config_pinmux(struct si476x_core *core) +{ + int err; + dev_dbg(&core->client->dev, "Configuring pinmux\n"); + err = si476x_core_cmd_dig_audio_pin_cfg(core, + core->pinmux.dclk, + core->pinmux.dfs, + core->pinmux.dout, + core->pinmux.xout); + if (err < 0) { + dev_err(&core->client->dev, + "Failed to configure digital audio pins(err = %d)\n", + err); + return err; + } + + err = si476x_core_cmd_zif_pin_cfg(core, + core->pinmux.iqclk, + core->pinmux.iqfs, + core->pinmux.iout, + core->pinmux.qout); + if (err < 0) { + dev_err(&core->client->dev, + "Failed to configure ZIF pins(err = %d)\n", + err); + return err; + } + + err = si476x_core_cmd_ic_link_gpo_ctl_pin_cfg(core, + core->pinmux.icin, + core->pinmux.icip, + core->pinmux.icon, + core->pinmux.icop); + if (err < 0) { + dev_err(&core->client->dev, + "Failed to configure IC-Link/GPO pins(err = %d)\n", + err); + return err; + } + + err = si476x_core_cmd_ana_audio_pin_cfg(core, + core->pinmux.lrout); + if (err < 0) { + dev_err(&core->client->dev, + "Failed to configure analog audio pins(err = %d)\n", + err); + return err; + } + + err = si476x_core_cmd_intb_pin_cfg(core, + core->pinmux.intb, + core->pinmux.a1); + if (err < 0) { + dev_err(&core->client->dev, + "Failed to configure interrupt pins(err = %d)\n", + err); + return err; + } + + return 0; +} + +static inline void si476x_core_schedule_polling_work(struct si476x_core *core) +{ + schedule_delayed_work(&core->status_monitor, + usecs_to_jiffies(SI476X_STATUS_POLL_US)); +} + +/** + * si476x_core_start() - early chip startup function + * @core: Core device structure + * @soft: When set, this flag forces "soft" startup, where "soft" + * power down is the one done by sending appropriate command instead + * of using reset pin of the tuner + * + * Perform required startup sequence to correctly power + * up the chip and perform initial configuration. It does the + * following sequence of actions: + * 1. Claims and enables the power supplies VD and VIO1 required + * for I2C interface of the chip operation. + * 2. Waits for 100us, pulls the reset line up, enables irq, + * waits for another 100us as it is specified by the + * datasheet. + * 3. Sends 'POWER_UP' command to the device with all provided + * information about power-up parameters. + * 4. Configures, pin multiplexor, disables digital audio and + * configures interrupt sources. + * + * The function returns zero in case of succes or negative error code + * otherwise. + */ +int si476x_core_start(struct si476x_core *core, bool soft) +{ + struct i2c_client *client = core->client; + int err; + + if (!soft) { + if (gpio_is_valid(core->gpio_reset)) + gpio_set_value_cansleep(core->gpio_reset, 1); + + if (client->irq) + enable_irq(client->irq); + + udelay(100); + + if (!client->irq) { + atomic_set(&core->is_alive, 1); + si476x_core_schedule_polling_work(core); + } + } else { + if (client->irq) + enable_irq(client->irq); + else { + atomic_set(&core->is_alive, 1); + si476x_core_schedule_polling_work(core); + } + } + + err = si476x_core_cmd_power_up(core, + &core->power_up_parameters); + + if (err < 0) { + dev_err(&core->client->dev, + "Power up failure(err = %d)\n", + err); + goto disable_irq; + } + + if (client->irq) + atomic_set(&core->is_alive, 1); + + err = si476x_core_config_pinmux(core); + if (err < 0) { + dev_err(&core->client->dev, + "Failed to configure pinmux(err = %d)\n", + err); + goto disable_irq; + } + + if (client->irq) { + err = regmap_write(core->regmap, + SI476X_PROP_INT_CTL_ENABLE, + SI476X_RDSIEN | + SI476X_STCIEN | + SI476X_CTSIEN); + if (err < 0) { + dev_err(&core->client->dev, + "Failed to configure interrupt sources" + "(err = %d)\n", err); + goto disable_irq; + } + } + + return 0; + +disable_irq: + if (err == -ENODEV) + atomic_set(&core->is_alive, 0); + + if (client->irq) + disable_irq(client->irq); + else + cancel_delayed_work_sync(&core->status_monitor); + + if (gpio_is_valid(core->gpio_reset)) + gpio_set_value_cansleep(core->gpio_reset, 0); + + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_start); + +/** + * si476x_core_stop() - chip power-down function + * @core: Core device structure + * @soft: When set, function sends a POWER_DOWN command instead of + * bringing reset line low + * + * Power down the chip by performing following actions: + * 1. Disable IRQ or stop the polling worker + * 2. Send the POWER_DOWN command if the power down is soft or bring + * reset line low if not. + * + * The function returns zero in case of succes or negative error code + * otherwise. + */ +int si476x_core_stop(struct si476x_core *core, bool soft) +{ + int err = 0; + atomic_set(&core->is_alive, 0); + + if (soft) { + /* TODO: This probably shoud be a configurable option, + * so it is possible to have the chips keep their + * oscillators running + */ + struct si476x_power_down_args args = { + .xosc = false, + }; + err = si476x_core_cmd_power_down(core, &args); + } + + /* We couldn't disable those before + * 'si476x_core_cmd_power_down' since we expect to get CTS + * interrupt */ + if (core->client->irq) + disable_irq(core->client->irq); + else + cancel_delayed_work_sync(&core->status_monitor); + + if (!soft) { + if (gpio_is_valid(core->gpio_reset)) + gpio_set_value_cansleep(core->gpio_reset, 0); + } + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_stop); + +/** + * si476x_core_set_power_state() - set the level at which the power is + * supplied for the chip. + * @core: Core device structure + * @next_state: enum si476x_power_state describing power state to + * switch to. + * + * Switch on all the required power supplies + * + * This function returns 0 in case of suvccess and negative error code + * otherwise. + */ +int si476x_core_set_power_state(struct si476x_core *core, + enum si476x_power_state next_state) +{ + /* + It is not clear form the datasheet if it is possible to + work with device if not all power domains are operational. + So for now the power-up policy is "power-up all the things!" + */ + int err = 0; + + if (core->power_state == SI476X_POWER_INCONSISTENT) { + dev_err(&core->client->dev, + "The device in inconsistent power state\n"); + return -EINVAL; + } + + if (next_state != core->power_state) { + switch (next_state) { + case SI476X_POWER_UP_FULL: + err = regulator_bulk_enable(ARRAY_SIZE(core->supplies), + core->supplies); + if (err < 0) { + core->power_state = SI476X_POWER_INCONSISTENT; + break; + } + /* + * Startup timing diagram recommends to have a + * 100 us delay between enabling of the power + * supplies and turning the tuner on. + */ + udelay(100); + + err = si476x_core_start(core, false); + if (err < 0) + goto disable_regulators; + + core->power_state = next_state; + break; + + case SI476X_POWER_DOWN: + core->power_state = next_state; + err = si476x_core_stop(core, false); + if (err < 0) + core->power_state = SI476X_POWER_INCONSISTENT; +disable_regulators: + err = regulator_bulk_disable(ARRAY_SIZE(core->supplies), + core->supplies); + if (err < 0) + core->power_state = SI476X_POWER_INCONSISTENT; + break; + default: + BUG(); + } + } + + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_set_power_state); + +/** + * si476x_core_report_drainer_stop() - mark the completion of the RDS + * buffer drain porcess by the worker. + * + * @core: Core device structure + */ +static inline void si476x_core_report_drainer_stop(struct si476x_core *core) +{ + mutex_lock(&core->rds_drainer_status_lock); + core->rds_drainer_is_working = false; + mutex_unlock(&core->rds_drainer_status_lock); +} + +/** + * si476x_core_start_rds_drainer_once() - start RDS drainer worker if + * ther is none working, do nothing otherwise + * + * @core: Datastructure corresponding to the chip. + */ +static inline void si476x_core_start_rds_drainer_once(struct si476x_core *core) +{ + mutex_lock(&core->rds_drainer_status_lock); + if (!core->rds_drainer_is_working) { + core->rds_drainer_is_working = true; + schedule_work(&core->rds_fifo_drainer); + } + mutex_unlock(&core->rds_drainer_status_lock); +} +/** + * si476x_drain_rds_fifo() - RDS buffer drainer. + * @work: struct work_struct being ppassed to the function by the + * kernel. + * + * Drain the contents of the RDS FIFO of + */ +static void si476x_core_drain_rds_fifo(struct work_struct *work) +{ + int err; + + struct si476x_core *core = container_of(work, struct si476x_core, + rds_fifo_drainer); + + struct si476x_rds_status_report report; + + si476x_core_lock(core); + err = si476x_core_cmd_fm_rds_status(core, true, false, false, &report); + if (!err) { + int i = report.rdsfifoused; + dev_dbg(&core->client->dev, + "%d elements in RDS FIFO. Draining.\n", i); + for (; i > 0; --i) { + err = si476x_core_cmd_fm_rds_status(core, false, false, + (i == 1), &report); + if (err < 0) + goto unlock; + + kfifo_in(&core->rds_fifo, report.rds, + sizeof(report.rds)); + dev_dbg(&core->client->dev, "RDS data:\n %*ph\n", + (int)sizeof(report.rds), report.rds); + } + dev_dbg(&core->client->dev, "Drrrrained!\n"); + wake_up_interruptible(&core->rds_read_queue); + } + +unlock: + si476x_core_unlock(core); + si476x_core_report_drainer_stop(core); +} + +/** + * si476x_core_pronounce_dead() + * + * @core: Core device structure + * + * Mark the device as being dead and wake up all potentially waiting + * threads of execution. + * + */ +static void si476x_core_pronounce_dead(struct si476x_core *core) +{ + dev_info(&core->client->dev, "Core device is dead.\n"); + + atomic_set(&core->is_alive, 0); + + /* Wake up al possible waiting processes */ + wake_up_interruptible(&core->rds_read_queue); + + atomic_set(&core->cts, 1); + wake_up(&core->command); + + atomic_set(&core->stc, 1); + wake_up(&core->tuning); +} + +/** + * si476x_core_i2c_xfer() + * + * @core: Core device structure + * @type: Transfer type + * @buf: Transfer buffer for/with data + * @count: Transfer buffer size + * + * Perfrom and I2C transfer(either read or write) and keep a counter + * of I/O errors. If the error counter rises above the threshold + * pronounce device dead. + * + * The function returns zero on succes or negative error code on + * failure. + */ +int si476x_core_i2c_xfer(struct si476x_core *core, + enum si476x_i2c_type type, + char *buf, int count) +{ + static int io_errors_count; + int err; + if (type == SI476X_I2C_SEND) + err = i2c_master_send(core->client, buf, count); + else + err = i2c_master_recv(core->client, buf, count); + + if (err < 0) { + if (io_errors_count++ > SI476X_MAX_IO_ERRORS) + si476x_core_pronounce_dead(core); + } else { + io_errors_count = 0; + } + + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_i2c_xfer); + +/** + * si476x_get_status() + * @core: Core device structure + * + * Get the status byte of the core device by berforming one byte I2C + * read. + * + * The function returns a status value or a negative error code on + * error. + */ +static int si476x_core_get_status(struct si476x_core *core) +{ + u8 response; + int err = si476x_core_i2c_xfer(core, SI476X_I2C_RECV, + &response, sizeof(response)); + + return (err < 0) ? err : response; +} + +/** + * si476x_get_and_signal_status() - IRQ dispatcher + * @core: Core device structure + * + * Dispatch the arrived interrupt request based on the value of the + * status byte reported by the tuner. + * + */ +static void si476x_core_get_and_signal_status(struct si476x_core *core) +{ + int status = si476x_core_get_status(core); + if (status < 0) { + dev_err(&core->client->dev, "Failed to get status\n"); + return; + } + + if (status & SI476X_CTS) { + /* Unfortunately completions could not be used for + * signalling CTS since this flag cannot be cleared + * in status byte, and therefore once it becomes true + * multiple calls to 'complete' would cause the + * commands following the current one to be completed + * before they actually are */ + dev_dbg(&core->client->dev, "[interrupt] CTSINT\n"); + atomic_set(&core->cts, 1); + wake_up(&core->command); + } + + if (status & SI476X_FM_RDS_INT) { + dev_dbg(&core->client->dev, "[interrupt] RDSINT\n"); + si476x_core_start_rds_drainer_once(core); + } + + if (status & SI476X_STC_INT) { + dev_dbg(&core->client->dev, "[interrupt] STCINT\n"); + atomic_set(&core->stc, 1); + wake_up(&core->tuning); + } +} + +static void si476x_core_poll_loop(struct work_struct *work) +{ + struct si476x_core *core = SI476X_WORK_TO_CORE(work); + + si476x_core_get_and_signal_status(core); + + if (atomic_read(&core->is_alive)) + si476x_core_schedule_polling_work(core); +} + +static irqreturn_t si476x_core_interrupt(int irq, void *dev) +{ + struct si476x_core *core = dev; + + si476x_core_get_and_signal_status(core); + + return IRQ_HANDLED; +} + +/** + * si476x_firmware_version_to_revision() + * @core: Core device structure + * @major: Firmware major number + * @minor1: Firmware first minor number + * @minor2: Firmware second minor number + * + * Convert a chip's firmware version number into an offset that later + * will be used to as offset in "vtable" of tuner functions + * + * This function returns a positive offset in case of success and a -1 + * in case of failure. + */ +static int si476x_core_fwver_to_revision(struct si476x_core *core, + int func, int major, + int minor1, int minor2) +{ + switch (func) { + case SI476X_FUNC_FM_RECEIVER: + switch (major) { + case 5: + return SI476X_REVISION_A10; + case 8: + return SI476X_REVISION_A20; + case 10: + return SI476X_REVISION_A30; + default: + goto unknown_revision; + } + case SI476X_FUNC_AM_RECEIVER: + switch (major) { + case 5: + return SI476X_REVISION_A10; + case 7: + return SI476X_REVISION_A20; + case 9: + return SI476X_REVISION_A30; + default: + goto unknown_revision; + } + case SI476X_FUNC_WB_RECEIVER: + switch (major) { + case 3: + return SI476X_REVISION_A10; + case 5: + return SI476X_REVISION_A20; + case 7: + return SI476X_REVISION_A30; + default: + goto unknown_revision; + } + case SI476X_FUNC_BOOTLOADER: + default: /* FALLTHROUG */ + BUG(); + return -1; + } + +unknown_revision: + dev_err(&core->client->dev, + "Unsupported version of the firmware: %d.%d.%d, " + "reverting to A10 comptible functions\n", + major, minor1, minor2); + + return SI476X_REVISION_A10; +} + +/** + * si476x_get_revision_info() + * @core: Core device structure + * + * Get the firmware version number of the device. It is done in + * following three steps: + * 1. Power-up the device + * 2. Send the 'FUNC_INFO' command + * 3. Powering the device down. + * + * The function return zero on success and a negative error code on + * failure. + */ +static int si476x_core_get_revision_info(struct si476x_core *core) +{ + int rval; + struct si476x_func_info info; + + si476x_core_lock(core); + rval = si476x_core_set_power_state(core, SI476X_POWER_UP_FULL); + if (rval < 0) + goto exit; + + rval = si476x_core_cmd_func_info(core, &info); + if (rval < 0) + goto power_down; + + core->revision = si476x_core_fwver_to_revision(core, info.func, + info.firmware.major, + info.firmware.minor[0], + info.firmware.minor[1]); +power_down: + si476x_core_set_power_state(core, SI476X_POWER_DOWN); +exit: + si476x_core_unlock(core); + + return rval; +} + +bool si476x_core_has_am(struct si476x_core *core) +{ + return core->chip_id == SI476X_CHIP_SI4761 || + core->chip_id == SI476X_CHIP_SI4764; +} +EXPORT_SYMBOL_GPL(si476x_core_has_am); + +bool si476x_core_has_diversity(struct si476x_core *core) +{ + return core->chip_id == SI476X_CHIP_SI4764; +} +EXPORT_SYMBOL_GPL(si476x_core_has_diversity); + +bool si476x_core_is_a_secondary_tuner(struct si476x_core *core) +{ + return si476x_core_has_diversity(core) && + (core->diversity_mode == SI476X_PHDIV_SECONDARY_ANTENNA || + core->diversity_mode == SI476X_PHDIV_SECONDARY_COMBINING); +} +EXPORT_SYMBOL_GPL(si476x_core_is_a_secondary_tuner); + +bool si476x_core_is_a_primary_tuner(struct si476x_core *core) +{ + return si476x_core_has_diversity(core) && + (core->diversity_mode == SI476X_PHDIV_PRIMARY_ANTENNA || + core->diversity_mode == SI476X_PHDIV_PRIMARY_COMBINING); +} +EXPORT_SYMBOL_GPL(si476x_core_is_a_primary_tuner); + +bool si476x_core_is_in_am_receiver_mode(struct si476x_core *core) +{ + return si476x_core_has_am(core) && + (core->power_up_parameters.func == SI476X_FUNC_AM_RECEIVER); +} +EXPORT_SYMBOL_GPL(si476x_core_is_in_am_receiver_mode); + +bool si476x_core_is_powered_up(struct si476x_core *core) +{ + return core->power_state == SI476X_POWER_UP_FULL; +} +EXPORT_SYMBOL_GPL(si476x_core_is_powered_up); + +static int si476x_core_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int rval; + struct si476x_core *core; + struct si476x_platform_data *pdata; + struct mfd_cell *cell; + int cell_num; + + core = devm_kzalloc(&client->dev, sizeof(*core), GFP_KERNEL); + if (!core) { + dev_err(&client->dev, + "failed to allocate 'struct si476x_core'\n"); + return -ENOMEM; + } + core->client = client; + + core->regmap = devm_regmap_init_si476x(core); + if (IS_ERR(core->regmap)) { + rval = PTR_ERR(core->regmap); + dev_err(&client->dev, + "Failed to allocate register map: %d\n", + rval); + return rval; + } + + i2c_set_clientdata(client, core); + + atomic_set(&core->is_alive, 0); + core->power_state = SI476X_POWER_DOWN; + + pdata = client->dev.platform_data; + if (pdata) { + memcpy(&core->power_up_parameters, + &pdata->power_up_parameters, + sizeof(core->power_up_parameters)); + + core->gpio_reset = -1; + if (gpio_is_valid(pdata->gpio_reset)) { + rval = gpio_request(pdata->gpio_reset, "si476x reset"); + if (rval) { + dev_err(&client->dev, + "Failed to request gpio: %d\n", rval); + return rval; + } + core->gpio_reset = pdata->gpio_reset; + gpio_direction_output(core->gpio_reset, 0); + } + + core->diversity_mode = pdata->diversity_mode; + memcpy(&core->pinmux, &pdata->pinmux, + sizeof(struct si476x_pinmux)); + } else { + dev_err(&client->dev, "No platform data provided\n"); + return -EINVAL; + } + + core->supplies[0].supply = "vd"; + core->supplies[1].supply = "va"; + core->supplies[2].supply = "vio1"; + core->supplies[3].supply = "vio2"; + + rval = devm_regulator_bulk_get(&client->dev, + ARRAY_SIZE(core->supplies), + core->supplies); + if (rval) { + dev_err(&client->dev, "Failet to gett all of the regulators\n"); + goto free_gpio; + } + + mutex_init(&core->cmd_lock); + init_waitqueue_head(&core->command); + init_waitqueue_head(&core->tuning); + + rval = kfifo_alloc(&core->rds_fifo, + SI476X_DRIVER_RDS_FIFO_DEPTH * + sizeof(struct v4l2_rds_data), + GFP_KERNEL); + if (rval) { + dev_err(&client->dev, "Could not alloate the FIFO\n"); + goto free_gpio; + } + mutex_init(&core->rds_drainer_status_lock); + init_waitqueue_head(&core->rds_read_queue); + INIT_WORK(&core->rds_fifo_drainer, si476x_core_drain_rds_fifo); + + if (client->irq) { + rval = devm_request_threaded_irq(&client->dev, + client->irq, NULL, + si476x_core_interrupt, + IRQF_TRIGGER_FALLING, + client->name, core); + if (rval < 0) { + dev_err(&client->dev, "Could not request IRQ %d\n", + client->irq); + goto free_kfifo; + } + disable_irq(client->irq); + dev_dbg(&client->dev, "IRQ requested.\n"); + + core->rds_fifo_depth = 20; + } else { + INIT_DELAYED_WORK(&core->status_monitor, + si476x_core_poll_loop); + dev_info(&client->dev, + "No IRQ number specified, will use polling\n"); + + core->rds_fifo_depth = 5; + } + + core->chip_id = id->driver_data; + + rval = si476x_core_get_revision_info(core); + if (rval < 0) { + rval = -ENODEV; + goto free_kfifo; + } + + cell_num = 0; + + cell = &core->cells[SI476X_RADIO_CELL]; + cell->name = "si476x-radio"; + cell_num++; + +#ifdef CONFIG_SND_SOC_SI476X + if ((core->chip_id == SI476X_CHIP_SI4761 || + core->chip_id == SI476X_CHIP_SI4764) && + core->pinmux.dclk == SI476X_DCLK_DAUDIO && + core->pinmux.dfs == SI476X_DFS_DAUDIO && + core->pinmux.dout == SI476X_DOUT_I2S_OUTPUT && + core->pinmux.xout == SI476X_XOUT_TRISTATE) { + cell = &core->cells[SI476X_CODEC_CELL]; + cell->name = "si476x-codec"; + cell_num++; + } +#endif + rval = mfd_add_devices(&client->dev, + (client->adapter->nr << 8) + client->addr, + core->cells, cell_num, + NULL, 0, NULL); + if (!rval) + return 0; + +free_kfifo: + kfifo_free(&core->rds_fifo); + +free_gpio: + if (gpio_is_valid(core->gpio_reset)) + gpio_free(core->gpio_reset); + + return rval; +} + +static int si476x_core_remove(struct i2c_client *client) +{ + struct si476x_core *core = i2c_get_clientdata(client); + + si476x_core_pronounce_dead(core); + mfd_remove_devices(&client->dev); + + if (client->irq) + disable_irq(client->irq); + else + cancel_delayed_work_sync(&core->status_monitor); + + kfifo_free(&core->rds_fifo); + + if (gpio_is_valid(core->gpio_reset)) + gpio_free(core->gpio_reset); + + return 0; +} + + +static const struct i2c_device_id si476x_id[] = { + { "si4761", SI476X_CHIP_SI4761 }, + { "si4764", SI476X_CHIP_SI4764 }, + { "si4768", SI476X_CHIP_SI4768 }, + { }, +}; +MODULE_DEVICE_TABLE(i2c, si476x_id); + +static struct i2c_driver si476x_core_driver = { + .driver = { + .name = "si476x-core", + .owner = THIS_MODULE, + }, + .probe = si476x_core_probe, + .remove = si476x_core_remove, + .id_table = si476x_id, +}; +module_i2c_driver(si476x_core_driver); + + +MODULE_AUTHOR("Andrey Smirnov <andrew.smirnov@gmail.com>"); +MODULE_DESCRIPTION("Si4761/64/68 AM/FM MFD core device driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/mfd/si476x-prop.c b/drivers/mfd/si476x-prop.c new file mode 100644 index 00000000000..cfeffa6e15d --- /dev/null +++ b/drivers/mfd/si476x-prop.c @@ -0,0 +1,241 @@ +/* + * drivers/mfd/si476x-prop.c -- Subroutines to access + * properties of si476x chips + * + * Copyright (C) 2012 Innovative Converged Devices(ICD) + * Copyright (C) 2013 Andrey Smirnov + * + * Author: Andrey Smirnov <andrew.smirnov@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/module.h> + +#include <linux/mfd/si476x-core.h> + +struct si476x_property_range { + u16 low, high; +}; + +static bool si476x_core_element_is_in_array(u16 element, + const u16 array[], + size_t size) +{ + int i; + + for (i = 0; i < size; i++) + if (element == array[i]) + return true; + + return false; +} + +static bool si476x_core_element_is_in_range(u16 element, + const struct si476x_property_range range[], + size_t size) +{ + int i; + + for (i = 0; i < size; i++) + if (element <= range[i].high && element >= range[i].low) + return true; + + return false; +} + +static bool si476x_core_is_valid_property_a10(struct si476x_core *core, + u16 property) +{ + static const u16 valid_properties[] = { + 0x0000, + 0x0500, 0x0501, + 0x0600, + 0x0709, 0x070C, 0x070D, 0x70E, 0x710, + 0x0718, + 0x1207, 0x1208, + 0x2007, + 0x2300, + }; + + static const struct si476x_property_range valid_ranges[] = { + { 0x0200, 0x0203 }, + { 0x0300, 0x0303 }, + { 0x0400, 0x0404 }, + { 0x0700, 0x0707 }, + { 0x1100, 0x1102 }, + { 0x1200, 0x1204 }, + { 0x1300, 0x1306 }, + { 0x2000, 0x2005 }, + { 0x2100, 0x2104 }, + { 0x2106, 0x2106 }, + { 0x2200, 0x220E }, + { 0x3100, 0x3104 }, + { 0x3207, 0x320F }, + { 0x3300, 0x3304 }, + { 0x3500, 0x3517 }, + { 0x3600, 0x3617 }, + { 0x3700, 0x3717 }, + { 0x4000, 0x4003 }, + }; + + return si476x_core_element_is_in_range(property, valid_ranges, + ARRAY_SIZE(valid_ranges)) || + si476x_core_element_is_in_array(property, valid_properties, + ARRAY_SIZE(valid_properties)); +} + +static bool si476x_core_is_valid_property_a20(struct si476x_core *core, + u16 property) +{ + static const u16 valid_properties[] = { + 0x071B, + 0x1006, + 0x2210, + 0x3401, + }; + + static const struct si476x_property_range valid_ranges[] = { + { 0x2215, 0x2219 }, + }; + + return si476x_core_is_valid_property_a10(core, property) || + si476x_core_element_is_in_range(property, valid_ranges, + ARRAY_SIZE(valid_ranges)) || + si476x_core_element_is_in_array(property, valid_properties, + ARRAY_SIZE(valid_properties)); +} + +static bool si476x_core_is_valid_property_a30(struct si476x_core *core, + u16 property) +{ + static const u16 valid_properties[] = { + 0x071C, 0x071D, + 0x1007, 0x1008, + 0x220F, 0x2214, + 0x2301, + 0x3105, 0x3106, + 0x3402, + }; + + static const struct si476x_property_range valid_ranges[] = { + { 0x0405, 0x0411 }, + { 0x2008, 0x200B }, + { 0x2220, 0x2223 }, + { 0x3100, 0x3106 }, + }; + + return si476x_core_is_valid_property_a20(core, property) || + si476x_core_element_is_in_range(property, valid_ranges, + ARRAY_SIZE(valid_ranges)) || + si476x_core_element_is_in_array(property, valid_properties, + ARRAY_SIZE(valid_properties)); +} + +typedef bool (*valid_property_pred_t) (struct si476x_core *, u16); + +static bool si476x_core_is_valid_property(struct si476x_core *core, + u16 property) +{ + static const valid_property_pred_t is_valid_property[] = { + [SI476X_REVISION_A10] = si476x_core_is_valid_property_a10, + [SI476X_REVISION_A20] = si476x_core_is_valid_property_a20, + [SI476X_REVISION_A30] = si476x_core_is_valid_property_a30, + }; + + BUG_ON(core->revision > SI476X_REVISION_A30 || + core->revision == -1); + return is_valid_property[core->revision](core, property); +} + + +static bool si476x_core_is_readonly_property(struct si476x_core *core, + u16 property) +{ + BUG_ON(core->revision > SI476X_REVISION_A30 || + core->revision == -1); + + switch (core->revision) { + case SI476X_REVISION_A10: + return (property == 0x3200); + case SI476X_REVISION_A20: + return (property == 0x1006 || + property == 0x2210 || + property == 0x3200); + case SI476X_REVISION_A30: + return false; + } + + return false; +} + +static bool si476x_core_regmap_readable_register(struct device *dev, + unsigned int reg) +{ + struct i2c_client *client = to_i2c_client(dev); + struct si476x_core *core = i2c_get_clientdata(client); + + return si476x_core_is_valid_property(core, (u16) reg); + +} + +static bool si476x_core_regmap_writable_register(struct device *dev, + unsigned int reg) +{ + struct i2c_client *client = to_i2c_client(dev); + struct si476x_core *core = i2c_get_clientdata(client); + + return si476x_core_is_valid_property(core, (u16) reg) && + !si476x_core_is_readonly_property(core, (u16) reg); +} + + +static int si476x_core_regmap_write(void *context, unsigned int reg, + unsigned int val) +{ + return si476x_core_cmd_set_property(context, reg, val); +} + +static int si476x_core_regmap_read(void *context, unsigned int reg, + unsigned *val) +{ + struct si476x_core *core = context; + int err; + + err = si476x_core_cmd_get_property(core, reg); + if (err < 0) + return err; + + *val = err; + + return 0; +} + + +static const struct regmap_config si476x_regmap_config = { + .reg_bits = 16, + .val_bits = 16, + + .max_register = 0x4003, + + .writeable_reg = si476x_core_regmap_writable_register, + .readable_reg = si476x_core_regmap_readable_register, + + .reg_read = si476x_core_regmap_read, + .reg_write = si476x_core_regmap_write, + + .cache_type = REGCACHE_RBTREE, +}; + +struct regmap *devm_regmap_init_si476x(struct si476x_core *core) +{ + return devm_regmap_init(&core->client->dev, NULL, + core, &si476x_regmap_config); +} +EXPORT_SYMBOL_GPL(devm_regmap_init_si476x); diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c index 9bd33169a11..d70a343078f 100644 --- a/drivers/mfd/sta2x11-mfd.c +++ b/drivers/mfd/sta2x11-mfd.c @@ -98,17 +98,6 @@ static int sta2x11_mfd_add(struct pci_dev *pdev, gfp_t flags) return 0; } -static int mfd_remove(struct pci_dev *pdev) -{ - struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev); - - if (!mfd) - return -ENODEV; - list_del(&mfd->list); - kfree(mfd); - return 0; -} - /* This function is exported and is not expected to fail */ u32 __sta2x11_mfd_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val, enum sta2x11_mfd_plat_dev index) diff --git a/drivers/mfd/stmpe-i2c.c b/drivers/mfd/stmpe-i2c.c index fd5fcb63068..0da02e11d58 100644 --- a/drivers/mfd/stmpe-i2c.c +++ b/drivers/mfd/stmpe-i2c.c @@ -75,6 +75,7 @@ static const struct i2c_device_id stmpe_i2c_id[] = { { "stmpe801", STMPE801 }, { "stmpe811", STMPE811 }, { "stmpe1601", STMPE1601 }, + { "stmpe1801", STMPE1801 }, { "stmpe2401", STMPE2401 }, { "stmpe2403", STMPE2403 }, { } diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c index 973659f8abd..a81badbaa91 100644 --- a/drivers/mfd/stmpe-spi.c +++ b/drivers/mfd/stmpe-spi.c @@ -103,7 +103,7 @@ stmpe_spi_probe(struct spi_device *spi) static int stmpe_spi_remove(struct spi_device *spi) { - struct stmpe *stmpe = dev_get_drvdata(&spi->dev); + struct stmpe *stmpe = spi_get_drvdata(spi); return stmpe_remove(stmpe); } diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c index 4b11202061b..bbccd514d3e 100644 --- a/drivers/mfd/stmpe.c +++ b/drivers/mfd/stmpe.c @@ -19,6 +19,7 @@ #include <linux/pm.h> #include <linux/slab.h> #include <linux/mfd/core.h> +#include <linux/delay.h> #include "stmpe.h" static int __stmpe_enable(struct stmpe *stmpe, unsigned int blocks) @@ -643,6 +644,88 @@ static struct stmpe_variant_info stmpe1601 = { }; /* + * STMPE1801 + */ +static const u8 stmpe1801_regs[] = { + [STMPE_IDX_CHIP_ID] = STMPE1801_REG_CHIP_ID, + [STMPE_IDX_ICR_LSB] = STMPE1801_REG_INT_CTRL_LOW, + [STMPE_IDX_IER_LSB] = STMPE1801_REG_INT_EN_MASK_LOW, + [STMPE_IDX_ISR_LSB] = STMPE1801_REG_INT_STA_LOW, + [STMPE_IDX_GPMR_LSB] = STMPE1801_REG_GPIO_MP_LOW, + [STMPE_IDX_GPSR_LSB] = STMPE1801_REG_GPIO_SET_LOW, + [STMPE_IDX_GPCR_LSB] = STMPE1801_REG_GPIO_CLR_LOW, + [STMPE_IDX_GPDR_LSB] = STMPE1801_REG_GPIO_SET_DIR_LOW, + [STMPE_IDX_GPRER_LSB] = STMPE1801_REG_GPIO_RE_LOW, + [STMPE_IDX_GPFER_LSB] = STMPE1801_REG_GPIO_FE_LOW, + [STMPE_IDX_IEGPIOR_LSB] = STMPE1801_REG_INT_EN_GPIO_MASK_LOW, + [STMPE_IDX_ISGPIOR_LSB] = STMPE1801_REG_INT_STA_GPIO_LOW, +}; + +static struct stmpe_variant_block stmpe1801_blocks[] = { + { + .cell = &stmpe_gpio_cell, + .irq = STMPE1801_IRQ_GPIOC, + .block = STMPE_BLOCK_GPIO, + }, + { + .cell = &stmpe_keypad_cell, + .irq = STMPE1801_IRQ_KEYPAD, + .block = STMPE_BLOCK_KEYPAD, + }, +}; + +static int stmpe1801_enable(struct stmpe *stmpe, unsigned int blocks, + bool enable) +{ + unsigned int mask = 0; + if (blocks & STMPE_BLOCK_GPIO) + mask |= STMPE1801_MSK_INT_EN_GPIO; + + if (blocks & STMPE_BLOCK_KEYPAD) + mask |= STMPE1801_MSK_INT_EN_KPC; + + return __stmpe_set_bits(stmpe, STMPE1801_REG_INT_EN_MASK_LOW, mask, + enable ? mask : 0); +} + +static int stmpe1801_reset(struct stmpe *stmpe) +{ + unsigned long timeout; + int ret = 0; + + ret = __stmpe_set_bits(stmpe, STMPE1801_REG_SYS_CTRL, + STMPE1801_MSK_SYS_CTRL_RESET, STMPE1801_MSK_SYS_CTRL_RESET); + if (ret < 0) + return ret; + + timeout = jiffies + msecs_to_jiffies(100); + while (time_before(jiffies, timeout)) { + ret = __stmpe_reg_read(stmpe, STMPE1801_REG_SYS_CTRL); + if (ret < 0) + return ret; + if (!(ret & STMPE1801_MSK_SYS_CTRL_RESET)) + return 0; + usleep_range(100, 200); + }; + return -EIO; +} + +static struct stmpe_variant_info stmpe1801 = { + .name = "stmpe1801", + .id_val = STMPE1801_ID, + .id_mask = 0xfff0, + .num_gpios = 18, + .af_bits = 0, + .regs = stmpe1801_regs, + .blocks = stmpe1801_blocks, + .num_blocks = ARRAY_SIZE(stmpe1801_blocks), + .num_irqs = STMPE1801_NR_INTERNAL_IRQS, + .enable = stmpe1801_enable, + /* stmpe1801 do not have any gpio alternate function */ + .get_altfunc = NULL, +}; + +/* * STMPE24XX */ @@ -740,6 +823,7 @@ static struct stmpe_variant_info *stmpe_variant_info[STMPE_NBR_PARTS] = { [STMPE801] = &stmpe801, [STMPE811] = &stmpe811, [STMPE1601] = &stmpe1601, + [STMPE1801] = &stmpe1801, [STMPE2401] = &stmpe2401, [STMPE2403] = &stmpe2403, }; @@ -759,7 +843,7 @@ static irqreturn_t stmpe_irq(int irq, void *data) struct stmpe *stmpe = data; struct stmpe_variant_info *variant = stmpe->variant; int num = DIV_ROUND_UP(variant->num_irqs, 8); - u8 israddr = stmpe->regs[STMPE_IDX_ISR_MSB]; + u8 israddr; u8 isr[num]; int ret; int i; @@ -771,6 +855,11 @@ static irqreturn_t stmpe_irq(int irq, void *data) return IRQ_HANDLED; } + if (variant->id_val == STMPE1801_ID) + israddr = stmpe->regs[STMPE_IDX_ISR_LSB]; + else + israddr = stmpe->regs[STMPE_IDX_ISR_MSB]; + ret = stmpe_block_read(stmpe, israddr, num, isr); if (ret < 0) return IRQ_NONE; @@ -938,6 +1027,12 @@ static int stmpe_chip_init(struct stmpe *stmpe) if (ret) return ret; + if (id == STMPE1801_ID) { + ret = stmpe1801_reset(stmpe); + if (ret < 0) + return ret; + } + if (stmpe->irq >= 0) { if (id == STMPE801_ID) icr = STMPE801_REG_SYS_CTRL_INT_EN; @@ -1015,7 +1110,10 @@ void stmpe_of_probe(struct stmpe_platform_data *pdata, struct device_node *np) { struct device_node *child; - pdata->id = -1; + pdata->id = of_alias_get_id(np, "stmpe-i2c"); + if (pdata->id < 0) + pdata->id = -1; + pdata->irq_trigger = IRQF_TRIGGER_NONE; of_property_read_u32(np, "st,autosleep-timeout", @@ -1057,6 +1155,9 @@ int stmpe_probe(struct stmpe_client_info *ci, int partnum) return -ENOMEM; stmpe_of_probe(pdata, np); + + if (of_find_property(np, "interrupts", NULL) == NULL) + ci->irq = -1; } stmpe = devm_kzalloc(ci->dev, sizeof(struct stmpe), GFP_KERNEL); diff --git a/drivers/mfd/stmpe.h b/drivers/mfd/stmpe.h index 7b8e13f5b76..ff2b09ba879 100644 --- a/drivers/mfd/stmpe.h +++ b/drivers/mfd/stmpe.h @@ -199,6 +199,55 @@ int stmpe_remove(struct stmpe *stmpe); #define STPME1601_AUTOSLEEP_ENABLE (1 << 3) /* + * STMPE1801 + */ +#define STMPE1801_ID 0xc110 +#define STMPE1801_NR_INTERNAL_IRQS 5 +#define STMPE1801_IRQ_KEYPAD_COMBI 4 +#define STMPE1801_IRQ_GPIOC 3 +#define STMPE1801_IRQ_KEYPAD_OVER 2 +#define STMPE1801_IRQ_KEYPAD 1 +#define STMPE1801_IRQ_WAKEUP 0 + +#define STMPE1801_REG_CHIP_ID 0x00 +#define STMPE1801_REG_SYS_CTRL 0x02 +#define STMPE1801_REG_INT_CTRL_LOW 0x04 +#define STMPE1801_REG_INT_EN_MASK_LOW 0x06 +#define STMPE1801_REG_INT_STA_LOW 0x08 +#define STMPE1801_REG_INT_EN_GPIO_MASK_LOW 0x0A +#define STMPE1801_REG_INT_EN_GPIO_MASK_MID 0x0B +#define STMPE1801_REG_INT_EN_GPIO_MASK_HIGH 0x0C +#define STMPE1801_REG_INT_STA_GPIO_LOW 0x0D +#define STMPE1801_REG_INT_STA_GPIO_MID 0x0E +#define STMPE1801_REG_INT_STA_GPIO_HIGH 0x0F +#define STMPE1801_REG_GPIO_SET_LOW 0x10 +#define STMPE1801_REG_GPIO_SET_MID 0x11 +#define STMPE1801_REG_GPIO_SET_HIGH 0x12 +#define STMPE1801_REG_GPIO_CLR_LOW 0x13 +#define STMPE1801_REG_GPIO_CLR_MID 0x14 +#define STMPE1801_REG_GPIO_CLR_HIGH 0x15 +#define STMPE1801_REG_GPIO_MP_LOW 0x16 +#define STMPE1801_REG_GPIO_MP_MID 0x17 +#define STMPE1801_REG_GPIO_MP_HIGH 0x18 +#define STMPE1801_REG_GPIO_SET_DIR_LOW 0x19 +#define STMPE1801_REG_GPIO_SET_DIR_MID 0x1A +#define STMPE1801_REG_GPIO_SET_DIR_HIGH 0x1B +#define STMPE1801_REG_GPIO_RE_LOW 0x1C +#define STMPE1801_REG_GPIO_RE_MID 0x1D +#define STMPE1801_REG_GPIO_RE_HIGH 0x1E +#define STMPE1801_REG_GPIO_FE_LOW 0x1F +#define STMPE1801_REG_GPIO_FE_MID 0x20 +#define STMPE1801_REG_GPIO_FE_HIGH 0x21 +#define STMPE1801_REG_GPIO_PULL_UP_LOW 0x22 +#define STMPE1801_REG_GPIO_PULL_UP_MID 0x23 +#define STMPE1801_REG_GPIO_PULL_UP_HIGH 0x24 + +#define STMPE1801_MSK_SYS_CTRL_RESET (1 << 7) + +#define STMPE1801_MSK_INT_EN_KPC (1 << 1) +#define STMPE1801_MSK_INT_EN_GPIO (1 << 3) + +/* * STMPE24xx */ diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c index 61aea6381cd..962a6e17a01 100644 --- a/drivers/mfd/syscon.c +++ b/drivers/mfd/syscon.c @@ -25,17 +25,15 @@ static struct platform_driver syscon_driver; struct syscon { - struct device *dev; void __iomem *base; struct regmap *regmap; }; -static int syscon_match(struct device *dev, void *data) +static int syscon_match_node(struct device *dev, void *data) { - struct syscon *syscon = dev_get_drvdata(dev); struct device_node *dn = data; - return (syscon->dev->of_node == dn) ? 1 : 0; + return (dev->of_node == dn) ? 1 : 0; } struct regmap *syscon_node_to_regmap(struct device_node *np) @@ -44,7 +42,7 @@ struct regmap *syscon_node_to_regmap(struct device_node *np) struct device *dev; dev = driver_find_device(&syscon_driver.driver, NULL, np, - syscon_match); + syscon_match_node); if (!dev) return ERR_PTR(-EPROBE_DEFER); @@ -70,6 +68,34 @@ struct regmap *syscon_regmap_lookup_by_compatible(const char *s) } EXPORT_SYMBOL_GPL(syscon_regmap_lookup_by_compatible); +static int syscon_match_pdevname(struct device *dev, void *data) +{ + struct platform_device *pdev = to_platform_device(dev); + const struct platform_device_id *id = platform_get_device_id(pdev); + + if (id) + if (!strcmp(id->name, (const char *)data)) + return 1; + + return !strcmp(dev_name(dev), (const char *)data); +} + +struct regmap *syscon_regmap_lookup_by_pdevname(const char *s) +{ + struct device *dev; + struct syscon *syscon; + + dev = driver_find_device(&syscon_driver.driver, NULL, (void *)s, + syscon_match_pdevname); + if (!dev) + return ERR_PTR(-EPROBE_DEFER); + + syscon = dev_get_drvdata(dev); + + return syscon->regmap; +} +EXPORT_SYMBOL_GPL(syscon_regmap_lookup_by_pdevname); + struct regmap *syscon_regmap_lookup_by_phandle(struct device_node *np, const char *property) { @@ -101,28 +127,22 @@ static struct regmap_config syscon_regmap_config = { static int syscon_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct device_node *np = dev->of_node; struct syscon *syscon; - struct resource res; - int ret; - - if (!np) - return -ENOENT; + struct resource *res; - syscon = devm_kzalloc(dev, sizeof(struct syscon), - GFP_KERNEL); + syscon = devm_kzalloc(dev, sizeof(*syscon), GFP_KERNEL); if (!syscon) return -ENOMEM; - syscon->base = of_iomap(np, 0); - if (!syscon->base) - return -EADDRNOTAVAIL; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENOENT; - ret = of_address_to_resource(np, 0, &res); - if (ret) - return ret; + syscon->base = devm_ioremap(dev, res->start, resource_size(res)); + if (!syscon->base) + return -ENOMEM; - syscon_regmap_config.max_register = res.end - res.start - 3; + syscon_regmap_config.max_register = res->end - res->start - 3; syscon->regmap = devm_regmap_init_mmio(dev, syscon->base, &syscon_regmap_config); if (IS_ERR(syscon->regmap)) { @@ -130,25 +150,17 @@ static int syscon_probe(struct platform_device *pdev) return PTR_ERR(syscon->regmap); } - syscon->dev = dev; platform_set_drvdata(pdev, syscon); - dev_info(dev, "syscon regmap start 0x%x end 0x%x registered\n", - res.start, res.end); + dev_info(dev, "regmap %pR registered\n", res); return 0; } -static int syscon_remove(struct platform_device *pdev) -{ - struct syscon *syscon; - - syscon = platform_get_drvdata(pdev); - iounmap(syscon->base); - platform_set_drvdata(pdev, NULL); - - return 0; -} +static const struct platform_device_id syscon_ids[] = { + { "syscon", }, + { } +}; static struct platform_driver syscon_driver = { .driver = { @@ -157,7 +169,7 @@ static struct platform_driver syscon_driver = { .of_match_table = of_syscon_match, }, .probe = syscon_probe, - .remove = syscon_remove, + .id_table = syscon_ids, }; static int __init syscon_init(void) diff --git a/drivers/mfd/tc3589x.c b/drivers/mfd/tc3589x.c index ecc092c7f74..4cb92bb2aea 100644 --- a/drivers/mfd/tc3589x.c +++ b/drivers/mfd/tc3589x.c @@ -350,7 +350,8 @@ static int tc3589x_probe(struct i2c_client *i2c, | I2C_FUNC_SMBUS_I2C_BLOCK)) return -EIO; - tc3589x = kzalloc(sizeof(struct tc3589x), GFP_KERNEL); + tc3589x = devm_kzalloc(&i2c->dev, sizeof(struct tc3589x), + GFP_KERNEL); if (!tc3589x) return -ENOMEM; @@ -366,33 +367,27 @@ static int tc3589x_probe(struct i2c_client *i2c, ret = tc3589x_chip_init(tc3589x); if (ret) - goto out_free; + return ret; ret = tc3589x_irq_init(tc3589x, np); if (ret) - goto out_free; + return ret; ret = request_threaded_irq(tc3589x->i2c->irq, NULL, tc3589x_irq, IRQF_TRIGGER_FALLING | IRQF_ONESHOT, "tc3589x", tc3589x); if (ret) { dev_err(tc3589x->dev, "failed to request IRQ: %d\n", ret); - goto out_free; + return ret; } ret = tc3589x_device_init(tc3589x); if (ret) { dev_err(tc3589x->dev, "failed to add child devices\n"); - goto out_freeirq; + return ret; } return 0; - -out_freeirq: - free_irq(tc3589x->i2c->irq, tc3589x); -out_free: - kfree(tc3589x); - return ret; } static int tc3589x_remove(struct i2c_client *client) @@ -401,10 +396,6 @@ static int tc3589x_remove(struct i2c_client *client) mfd_remove_devices(tc3589x->dev); - free_irq(tc3589x->i2c->irq, tc3589x); - - kfree(tc3589x); - return 0; } diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c index 98edb5be85c..fbd6ee67b5a 100644 --- a/drivers/mfd/tps65090.c +++ b/drivers/mfd/tps65090.c @@ -56,12 +56,23 @@ #define TPS65090_INT2_MASK_OVERLOAD_FET6 6 #define TPS65090_INT2_MASK_OVERLOAD_FET7 7 +static struct resource charger_resources[] = { + { + .start = TPS65090_IRQ_VAC_STATUS_CHANGE, + .end = TPS65090_IRQ_VAC_STATUS_CHANGE, + .flags = IORESOURCE_IRQ, + } +}; + static struct mfd_cell tps65090s[] = { { .name = "tps65090-pmic", }, { .name = "tps65090-charger", + .num_resources = ARRAY_SIZE(charger_resources), + .resources = &charger_resources[0], + .of_compatible = "ti,tps65090-charger", }, }; diff --git a/drivers/mfd/twl4030-madc.c b/drivers/mfd/twl4030-madc.c index 942b666a2a0..42bd3ea5df3 100644 --- a/drivers/mfd/twl4030-madc.c +++ b/drivers/mfd/twl4030-madc.c @@ -211,12 +211,14 @@ static int twl4030battery_current(int raw_volt) * @reg_base - Base address of the first channel * @Channels - 16 bit bitmap. If the bit is set, channel value is read * @buf - The channel values are stored here. if read fails error + * @raw - Return raw values without conversion * value is stored * Returns the number of successfully read channels. */ static int twl4030_madc_read_channels(struct twl4030_madc_data *madc, u8 reg_base, unsigned - long channels, int *buf) + long channels, int *buf, + bool raw) { int count = 0, count_req = 0, i; u8 reg; @@ -230,6 +232,10 @@ static int twl4030_madc_read_channels(struct twl4030_madc_data *madc, count_req++; continue; } + if (raw) { + count++; + continue; + } switch (i) { case 10: buf[i] = twl4030battery_current(buf[i]); @@ -371,7 +377,7 @@ static irqreturn_t twl4030_madc_threaded_irq_handler(int irq, void *_madc) method = &twl4030_conversion_methods[r->method]; /* Read results */ len = twl4030_madc_read_channels(madc, method->rbase, - r->channels, r->rbuf); + r->channels, r->rbuf, r->raw); /* Return results to caller */ if (r->func_cb != NULL) { r->func_cb(len, r->channels, r->rbuf); @@ -397,7 +403,7 @@ err_i2c: method = &twl4030_conversion_methods[r->method]; /* Read results */ len = twl4030_madc_read_channels(madc, method->rbase, - r->channels, r->rbuf); + r->channels, r->rbuf, r->raw); /* Return results to caller */ if (r->func_cb != NULL) { r->func_cb(len, r->channels, r->rbuf); @@ -585,7 +591,7 @@ int twl4030_madc_conversion(struct twl4030_madc_request *req) goto out; } ret = twl4030_madc_read_channels(twl4030_madc, method->rbase, - req->channels, req->rbuf); + req->channels, req->rbuf, req->raw); twl4030_madc->requests[req->method].active = 0; out: diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c index f361bf38a0a..492ee2cd340 100644 --- a/drivers/mfd/twl6040.c +++ b/drivers/mfd/twl6040.c @@ -554,7 +554,7 @@ static int twl6040_probe(struct i2c_client *client, twl6040->supplies[0].supply = "vio"; twl6040->supplies[1].supply = "v2v1"; - ret = regulator_bulk_get(&client->dev, TWL6040_NUM_SUPPLIES, + ret = devm_regulator_bulk_get(&client->dev, TWL6040_NUM_SUPPLIES, twl6040->supplies); if (ret != 0) { dev_err(&client->dev, "Failed to get supplies: %d\n", ret); @@ -564,7 +564,7 @@ static int twl6040_probe(struct i2c_client *client, ret = regulator_bulk_enable(TWL6040_NUM_SUPPLIES, twl6040->supplies); if (ret != 0) { dev_err(&client->dev, "Failed to enable supplies: %d\n", ret); - goto power_err; + goto regulator_get_err; } twl6040->dev = &client->dev; @@ -586,8 +586,8 @@ static int twl6040_probe(struct i2c_client *client, twl6040->audpwron = -EINVAL; if (gpio_is_valid(twl6040->audpwron)) { - ret = gpio_request_one(twl6040->audpwron, GPIOF_OUT_INIT_LOW, - "audpwron"); + ret = devm_gpio_request_one(&client->dev, twl6040->audpwron, + GPIOF_OUT_INIT_LOW, "audpwron"); if (ret) goto gpio_err; } @@ -596,14 +596,14 @@ static int twl6040_probe(struct i2c_client *client, IRQF_ONESHOT, 0, &twl6040_irq_chip, &twl6040->irq_data); if (ret < 0) - goto irq_init_err; + goto gpio_err; twl6040->irq_ready = regmap_irq_get_virq(twl6040->irq_data, TWL6040_IRQ_READY); twl6040->irq_th = regmap_irq_get_virq(twl6040->irq_data, TWL6040_IRQ_TH); - ret = request_threaded_irq(twl6040->irq_ready, NULL, + ret = devm_request_threaded_irq(twl6040->dev, twl6040->irq_ready, NULL, twl6040_readyint_handler, IRQF_ONESHOT, "twl6040_irq_ready", twl6040); if (ret) { @@ -611,7 +611,7 @@ static int twl6040_probe(struct i2c_client *client, goto readyirq_err; } - ret = request_threaded_irq(twl6040->irq_th, NULL, + ret = devm_request_threaded_irq(twl6040->dev, twl6040->irq_th, NULL, twl6040_thint_handler, IRQF_ONESHOT, "twl6040_irq_th", twl6040); if (ret) { @@ -681,18 +681,13 @@ static int twl6040_probe(struct i2c_client *client, return 0; mfd_err: - free_irq(twl6040->irq_th, twl6040); + devm_free_irq(&client->dev, twl6040->irq_th, twl6040); thirq_err: - free_irq(twl6040->irq_ready, twl6040); + devm_free_irq(&client->dev, twl6040->irq_ready, twl6040); readyirq_err: regmap_del_irq_chip(twl6040->irq, twl6040->irq_data); -irq_init_err: - if (gpio_is_valid(twl6040->audpwron)) - gpio_free(twl6040->audpwron); gpio_err: regulator_bulk_disable(TWL6040_NUM_SUPPLIES, twl6040->supplies); -power_err: - regulator_bulk_free(TWL6040_NUM_SUPPLIES, twl6040->supplies); regulator_get_err: i2c_set_clientdata(client, NULL); err: @@ -706,18 +701,14 @@ static int twl6040_remove(struct i2c_client *client) if (twl6040->power_count) twl6040_power(twl6040, 0); - if (gpio_is_valid(twl6040->audpwron)) - gpio_free(twl6040->audpwron); - - free_irq(twl6040->irq_ready, twl6040); - free_irq(twl6040->irq_th, twl6040); + devm_free_irq(&client->dev, twl6040->irq_ready, twl6040); + devm_free_irq(&client->dev, twl6040->irq_th, twl6040); regmap_del_irq_chip(twl6040->irq, twl6040->irq_data); mfd_remove_devices(&client->dev); i2c_set_clientdata(client, NULL); regulator_bulk_disable(TWL6040_NUM_SUPPLIES, twl6040->supplies); - regulator_bulk_free(TWL6040_NUM_SUPPLIES, twl6040->supplies); return 0; } diff --git a/drivers/mfd/ucb1400_core.c b/drivers/mfd/ucb1400_core.c index daf69527ed8..e9031fa9d53 100644 --- a/drivers/mfd/ucb1400_core.c +++ b/drivers/mfd/ucb1400_core.c @@ -75,6 +75,11 @@ static int ucb1400_core_probe(struct device *dev) /* GPIO */ ucb_gpio.ac97 = ac97; + if (pdata) { + ucb_gpio.gpio_setup = pdata->gpio_setup; + ucb_gpio.gpio_teardown = pdata->gpio_teardown; + ucb_gpio.gpio_offset = pdata->gpio_offset; + } ucb->ucb1400_gpio = platform_device_alloc("ucb1400_gpio", -1); if (!ucb->ucb1400_gpio) { err = -ENOMEM; diff --git a/drivers/mfd/vexpress-config.c b/drivers/mfd/vexpress-config.c index 3c1723aa622..84ce6b9daa3 100644 --- a/drivers/mfd/vexpress-config.c +++ b/drivers/mfd/vexpress-config.c @@ -184,13 +184,14 @@ static int vexpress_config_schedule(struct vexpress_config_trans *trans) spin_lock_irqsave(&bridge->transactions_lock, flags); - vexpress_config_dump_trans("Executing", trans); - - if (list_empty(&bridge->transactions)) + if (list_empty(&bridge->transactions)) { + vexpress_config_dump_trans("Executing", trans); status = bridge->info->func_exec(trans->func->func, trans->offset, trans->write, trans->data); - else + } else { + vexpress_config_dump_trans("Queuing", trans); status = VEXPRESS_CONFIG_STATUS_WAIT; + } switch (status) { case VEXPRESS_CONFIG_STATUS_DONE: @@ -212,25 +213,31 @@ void vexpress_config_complete(struct vexpress_config_bridge *bridge, { struct vexpress_config_trans *trans; unsigned long flags; + const char *message = "Completed"; spin_lock_irqsave(&bridge->transactions_lock, flags); trans = list_first_entry(&bridge->transactions, struct vexpress_config_trans, list); - vexpress_config_dump_trans("Completed", trans); - trans->status = status; - list_del(&trans->list); - if (!list_empty(&bridge->transactions)) { - vexpress_config_dump_trans("Pending", trans); + do { + vexpress_config_dump_trans(message, trans); + list_del(&trans->list); + complete(&trans->completion); - bridge->info->func_exec(trans->func->func, trans->offset, - trans->write, trans->data); - } - spin_unlock_irqrestore(&bridge->transactions_lock, flags); + if (list_empty(&bridge->transactions)) + break; + + trans = list_first_entry(&bridge->transactions, + struct vexpress_config_trans, list); + vexpress_config_dump_trans("Executing pending", trans); + trans->status = bridge->info->func_exec(trans->func->func, + trans->offset, trans->write, trans->data); + message = "Finished pending"; + } while (trans->status == VEXPRESS_CONFIG_STATUS_DONE); - complete(&trans->completion); + spin_unlock_irqrestore(&bridge->transactions_lock, flags); } EXPORT_SYMBOL(vexpress_config_complete); diff --git a/drivers/mfd/vexpress-sysreg.c b/drivers/mfd/vexpress-sysreg.c index bf75e967a1f..96a020b1dcd 100644 --- a/drivers/mfd/vexpress-sysreg.c +++ b/drivers/mfd/vexpress-sysreg.c @@ -490,12 +490,12 @@ static int vexpress_sysreg_probe(struct platform_device *pdev) return err; } + vexpress_sysreg_dev = &pdev->dev; + platform_device_register_data(vexpress_sysreg_dev, "leds-gpio", PLATFORM_DEVID_AUTO, &vexpress_sysreg_leds_pdata, sizeof(vexpress_sysreg_leds_pdata)); - vexpress_sysreg_dev = &pdev->dev; - device_create_file(vexpress_sysreg_dev, &dev_attr_sys_id); return 0; diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c index f70c4956ff9..155c4a1a6a9 100644 --- a/drivers/mfd/wm5102-tables.c +++ b/drivers/mfd/wm5102-tables.c @@ -10,6 +10,7 @@ * published by the Free Software Foundation. */ +#include <linux/device.h> #include <linux/module.h> #include <linux/mfd/arizona/core.h> @@ -57,31 +58,54 @@ static const struct reg_default wm5102_reva_patch[] = { }; static const struct reg_default wm5102_revb_patch[] = { + { 0x19, 0x0001 }, { 0x80, 0x0003 }, { 0x081, 0xE022 }, - { 0x410, 0x4080 }, - { 0x418, 0x4080 }, - { 0x420, 0x4080 }, - { 0x428, 0xC000 }, + { 0x410, 0x6080 }, + { 0x418, 0xa080 }, + { 0x420, 0xa080 }, + { 0x428, 0xe000 }, + { 0x443, 0xDC1A }, { 0x4B0, 0x0066 }, { 0x458, 0x000b }, { 0x212, 0x0000 }, + { 0x171, 0x0000 }, + { 0x35E, 0x000C }, + { 0x2D4, 0x0000 }, { 0x80, 0x0000 }, }; /* We use a function so we can use ARRAY_SIZE() */ int wm5102_patch(struct arizona *arizona) { + const struct reg_default *wm5102_patch; + int ret = 0; + int i, patch_size; + switch (arizona->rev) { case 0: - return regmap_register_patch(arizona->regmap, - wm5102_reva_patch, - ARRAY_SIZE(wm5102_reva_patch)); + wm5102_patch = wm5102_reva_patch; + patch_size = ARRAY_SIZE(wm5102_reva_patch); default: - return regmap_register_patch(arizona->regmap, - wm5102_revb_patch, - ARRAY_SIZE(wm5102_revb_patch)); + wm5102_patch = wm5102_revb_patch; + patch_size = ARRAY_SIZE(wm5102_revb_patch); + } + + regcache_cache_bypass(arizona->regmap, true); + + for (i = 0; i < patch_size; i++) { + ret = regmap_write(arizona->regmap, wm5102_patch[i].reg, + wm5102_patch[i].def); + if (ret != 0) { + dev_err(arizona->dev, "Failed to write %x = %x: %d\n", + wm5102_patch[i].reg, wm5102_patch[i].def, ret); + goto out; + } } + +out: + regcache_cache_bypass(arizona->regmap, false); + return ret; } static const struct regmap_irq wm5102_aod_irqs[ARIZONA_NUM_IRQ] = { @@ -282,7 +306,7 @@ static const struct reg_default wm5102_reg_default[] = { { 0x00000155, 0x0000 }, /* R341 - Rate Estimator 4 */ { 0x00000156, 0x0000 }, /* R342 - Rate Estimator 5 */ { 0x00000161, 0x0000 }, /* R353 - Dynamic Frequency Scaling 1 */ - { 0x00000171, 0x0002 }, /* R369 - FLL1 Control 1 */ + { 0x00000171, 0x0000 }, /* R369 - FLL1 Control 1 */ { 0x00000172, 0x0008 }, /* R370 - FLL1 Control 2 */ { 0x00000173, 0x0018 }, /* R371 - FLL1 Control 3 */ { 0x00000174, 0x007D }, /* R372 - FLL1 Control 4 */ @@ -366,7 +390,7 @@ static const struct reg_default wm5102_reg_default[] = { { 0x00000400, 0x0000 }, /* R1024 - Output Enables 1 */ { 0x00000408, 0x0000 }, /* R1032 - Output Rate 1 */ { 0x00000409, 0x0022 }, /* R1033 - Output Volume Ramp */ - { 0x00000410, 0x4080 }, /* R1040 - Output Path Config 1L */ + { 0x00000410, 0x6080 }, /* R1040 - Output Path Config 1L */ { 0x00000411, 0x0180 }, /* R1041 - DAC Digital Volume 1L */ { 0x00000412, 0x0081 }, /* R1042 - DAC Volume Limit 1L */ { 0x00000413, 0x0001 }, /* R1043 - Noise Gate Select 1L */ @@ -374,7 +398,7 @@ static const struct reg_default wm5102_reg_default[] = { { 0x00000415, 0x0180 }, /* R1045 - DAC Digital Volume 1R */ { 0x00000416, 0x0081 }, /* R1046 - DAC Volume Limit 1R */ { 0x00000417, 0x0002 }, /* R1047 - Noise Gate Select 1R */ - { 0x00000418, 0x4080 }, /* R1048 - Output Path Config 2L */ + { 0x00000418, 0xA080 }, /* R1048 - Output Path Config 2L */ { 0x00000419, 0x0180 }, /* R1049 - DAC Digital Volume 2L */ { 0x0000041A, 0x0081 }, /* R1050 - DAC Volume Limit 2L */ { 0x0000041B, 0x0004 }, /* R1051 - Noise Gate Select 2L */ @@ -382,11 +406,11 @@ static const struct reg_default wm5102_reg_default[] = { { 0x0000041D, 0x0180 }, /* R1053 - DAC Digital Volume 2R */ { 0x0000041E, 0x0081 }, /* R1054 - DAC Volume Limit 2R */ { 0x0000041F, 0x0008 }, /* R1055 - Noise Gate Select 2R */ - { 0x00000420, 0x4080 }, /* R1056 - Output Path Config 3L */ + { 0x00000420, 0xA080 }, /* R1056 - Output Path Config 3L */ { 0x00000421, 0x0180 }, /* R1057 - DAC Digital Volume 3L */ { 0x00000422, 0x0081 }, /* R1058 - DAC Volume Limit 3L */ { 0x00000423, 0x0010 }, /* R1059 - Noise Gate Select 3L */ - { 0x00000428, 0xC000 }, /* R1064 - Output Path Config 4L */ + { 0x00000428, 0xE000 }, /* R1064 - Output Path Config 4L */ { 0x00000429, 0x0180 }, /* R1065 - DAC Digital Volume 4L */ { 0x0000042A, 0x0081 }, /* R1066 - Out Volume 4L */ { 0x0000042B, 0x0040 }, /* R1067 - Noise Gate Select 4L */ @@ -401,7 +425,7 @@ static const struct reg_default wm5102_reg_default[] = { { 0x00000436, 0x0081 }, /* R1078 - DAC Volume Limit 5R */ { 0x00000437, 0x0200 }, /* R1079 - Noise Gate Select 5R */ { 0x00000450, 0x0000 }, /* R1104 - DAC AEC Control 1 */ - { 0x00000458, 0x0001 }, /* R1112 - Noise Gate Control */ + { 0x00000458, 0x000B }, /* R1112 - Noise Gate Control */ { 0x00000490, 0x0069 }, /* R1168 - PDM SPK1 CTRL 1 */ { 0x00000491, 0x0000 }, /* R1169 - PDM SPK1 CTRL 2 */ { 0x00000500, 0x000C }, /* R1280 - AIF1 BCLK Ctrl */ diff --git a/drivers/mfd/wm831x-spi.c b/drivers/mfd/wm831x-spi.c index 4e70e157a90..e7ed14f661d 100644 --- a/drivers/mfd/wm831x-spi.c +++ b/drivers/mfd/wm831x-spi.c @@ -37,7 +37,7 @@ static int wm831x_spi_probe(struct spi_device *spi) spi->bits_per_word = 16; spi->mode = SPI_MODE_0; - dev_set_drvdata(&spi->dev, wm831x); + spi_set_drvdata(spi, wm831x); wm831x->dev = &spi->dev; wm831x->regmap = devm_regmap_init_spi(spi, &wm831x_regmap_config); @@ -53,7 +53,7 @@ static int wm831x_spi_probe(struct spi_device *spi) static int wm831x_spi_remove(struct spi_device *spi) { - struct wm831x *wm831x = dev_get_drvdata(&spi->dev); + struct wm831x *wm831x = spi_get_drvdata(spi); wm831x_device_exit(wm831x); @@ -69,7 +69,7 @@ static int wm831x_spi_suspend(struct device *dev) static void wm831x_spi_shutdown(struct spi_device *spi) { - struct wm831x *wm831x = dev_get_drvdata(&spi->dev); + struct wm831x *wm831x = spi_get_drvdata(spi); wm831x_device_shutdown(wm831x); } diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c index 803e93fae56..00e4fe2f3c7 100644 --- a/drivers/mfd/wm8994-core.c +++ b/drivers/mfd/wm8994-core.c @@ -19,6 +19,9 @@ #include <linux/err.h> #include <linux/delay.h> #include <linux/mfd/core.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_gpio.h> #include <linux/pm_runtime.h> #include <linux/regmap.h> #include <linux/regulator/consumer.h> @@ -191,7 +194,7 @@ static const char *wm8958_main_supplies[] = { "SPKVDD2", }; -#ifdef CONFIG_PM +#ifdef CONFIG_PM_RUNTIME static int wm8994_suspend(struct device *dev) { struct wm8994 *wm8994 = dev_get_drvdata(dev); @@ -396,6 +399,60 @@ static const struct reg_default wm1811_reva_patch[] = { { 0x102, 0x0 }, }; +#ifdef CONFIG_OF +static int wm8994_set_pdata_from_of(struct wm8994 *wm8994) +{ + struct device_node *np = wm8994->dev->of_node; + struct wm8994_pdata *pdata = &wm8994->pdata; + int i; + + if (!np) + return 0; + + if (of_property_read_u32_array(np, "wlf,gpio-cfg", pdata->gpio_defaults, + ARRAY_SIZE(pdata->gpio_defaults)) >= 0) { + for (i = 0; i < ARRAY_SIZE(pdata->gpio_defaults); i++) { + if (wm8994->pdata.gpio_defaults[i] == 0) + pdata->gpio_defaults[i] + = WM8994_CONFIGURE_GPIO; + } + } + + of_property_read_u32_array(np, "wlf,micbias-cfg", pdata->micbias, + ARRAY_SIZE(pdata->micbias)); + + pdata->lineout1_diff = true; + pdata->lineout2_diff = true; + if (of_find_property(np, "wlf,lineout1-se", NULL)) + pdata->lineout1_diff = false; + if (of_find_property(np, "wlf,lineout2-se", NULL)) + pdata->lineout2_diff = false; + + if (of_find_property(np, "wlf,lineout1-feedback", NULL)) + pdata->lineout1fb = true; + if (of_find_property(np, "wlf,lineout2-feedback", NULL)) + pdata->lineout2fb = true; + + if (of_find_property(np, "wlf,ldoena-always-driven", NULL)) + pdata->lineout2fb = true; + + pdata->ldo[0].enable = of_get_named_gpio(np, "wlf,ldo1ena", 0); + if (pdata->ldo[0].enable < 0) + pdata->ldo[0].enable = 0; + + pdata->ldo[1].enable = of_get_named_gpio(np, "wlf,ldo2ena", 0); + if (pdata->ldo[1].enable < 0) + pdata->ldo[1].enable = 0; + + return 0; +} +#else +static int wm8994_set_pdata_from_of(struct wm8994 *wm8994) +{ + return 0; +} +#endif + /* * Instantiate the generic non-control parts of the device. */ @@ -405,7 +462,7 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq) struct regmap_config *regmap_config; const struct reg_default *regmap_patch = NULL; const char *devname; - int ret, i, patch_regs; + int ret, i, patch_regs = 0; int pulls = 0; if (dev_get_platdata(wm8994->dev)) { @@ -414,6 +471,10 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq) } pdata = &wm8994->pdata; + ret = wm8994_set_pdata_from_of(wm8994); + if (ret != 0) + return ret; + dev_set_drvdata(wm8994->dev, wm8994); /* Add the on-chip regulators first for bootstrapping */ @@ -673,9 +734,9 @@ static void wm8994_device_exit(struct wm8994 *wm8994) } static const struct of_device_id wm8994_of_match[] = { - { .compatible = "wlf,wm1811", }, - { .compatible = "wlf,wm8994", }, - { .compatible = "wlf,wm8958", }, + { .compatible = "wlf,wm1811", .data = (void *)WM1811 }, + { .compatible = "wlf,wm8994", .data = (void *)WM8994 }, + { .compatible = "wlf,wm8958", .data = (void *)WM8958 }, { } }; MODULE_DEVICE_TABLE(of, wm8994_of_match); @@ -683,6 +744,7 @@ MODULE_DEVICE_TABLE(of, wm8994_of_match); static int wm8994_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { + const struct of_device_id *of_id; struct wm8994 *wm8994; int ret; @@ -693,7 +755,14 @@ static int wm8994_i2c_probe(struct i2c_client *i2c, i2c_set_clientdata(i2c, wm8994); wm8994->dev = &i2c->dev; wm8994->irq = i2c->irq; - wm8994->type = id->driver_data; + + if (i2c->dev.of_node) { + of_id = of_match_device(wm8994_of_match, &i2c->dev); + if (of_id) + wm8994->type = (int)of_id->data; + } else { + wm8994->type = id->driver_data; + } wm8994->regmap = devm_regmap_init_i2c(i2c, &wm8994_base_regmap_config); if (IS_ERR(wm8994->regmap)) { @@ -724,15 +793,16 @@ static const struct i2c_device_id wm8994_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, wm8994_i2c_id); -static UNIVERSAL_DEV_PM_OPS(wm8994_pm_ops, wm8994_suspend, wm8994_resume, - NULL); +static const struct dev_pm_ops wm8994_pm_ops = { + SET_RUNTIME_PM_OPS(wm8994_suspend, wm8994_resume, NULL) +}; static struct i2c_driver wm8994_i2c_driver = { .driver = { .name = "wm8994", .owner = THIS_MODULE, .pm = &wm8994_pm_ops, - .of_match_table = wm8994_of_match, + .of_match_table = of_match_ptr(wm8994_of_match), }, .probe = wm8994_i2c_probe, .remove = wm8994_i2c_remove, diff --git a/drivers/power/rx51_battery.c b/drivers/power/rx51_battery.c index 1a1dcb831a1..cbde1d6d322 100644 --- a/drivers/power/rx51_battery.c +++ b/drivers/power/rx51_battery.c @@ -42,6 +42,7 @@ static int rx51_battery_read_adc(int channel) req.method = TWL4030_MADC_SW1; req.func_cb = NULL; req.type = TWL4030_MADC_WAIT; + req.raw = true; if (twl4030_madc_conversion(&req) <= 0) return -ENODATA; diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index 0e0bfa03508..115b6445349 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -147,8 +147,7 @@ config PWM_TEGRA config PWM_TIECAP tristate "ECAP PWM support" - depends on SOC_AM33XX - select PWM_TIPWMSS + depends on SOC_AM33XX || ARCH_DAVINCI_DA8XX help PWM driver support for the ECAP APWM controller found on AM33XX TI SOC @@ -158,8 +157,7 @@ config PWM_TIECAP config PWM_TIEHRPWM tristate "EHRPWM PWM support" - depends on SOC_AM33XX - select PWM_TIPWMSS + depends on SOC_AM33XX || ARCH_DAVINCI_DA8XX help PWM driver support for the EHRPWM controller found on AM33XX TI SOC @@ -169,7 +167,7 @@ config PWM_TIEHRPWM config PWM_TIPWMSS bool - depends on SOC_AM33XX && (PWM_TIEHRPWM || PWM_TIECAP) + default y if SOC_AM33XX && (PWM_TIECAP || PWM_TIEHRPWM) help PWM Subsystem driver support for AM33xx SOC. diff --git a/drivers/pwm/pwm-ab8500.c b/drivers/pwm/pwm-ab8500.c index 4248d041827..1d07a6f9937 100644 --- a/drivers/pwm/pwm-ab8500.c +++ b/drivers/pwm/pwm-ab8500.c @@ -66,7 +66,7 @@ static int ab8500_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) AB8500_MISC, AB8500_PWM_OUT_CTRL7_REG, 1 << (chip->base - 1), ENABLE_PWM); if (ret < 0) - dev_err(chip->dev, "%s: Failed to disable PWM, Error %d\n", + dev_err(chip->dev, "%s: Failed to enable PWM, Error %d\n", pwm->label, ret); return ret; } @@ -88,6 +88,7 @@ static const struct pwm_ops ab8500_pwm_ops = { .config = ab8500_pwm_config, .enable = ab8500_pwm_enable, .disable = ab8500_pwm_disable, + .owner = THIS_MODULE, }; static int ab8500_pwm_probe(struct platform_device *pdev) @@ -99,7 +100,7 @@ static int ab8500_pwm_probe(struct platform_device *pdev) * Nothing to be done in probe, this is required to get the * device which is required for ab8500 read and write */ - ab8500 = kzalloc(sizeof(*ab8500), GFP_KERNEL); + ab8500 = devm_kzalloc(&pdev->dev, sizeof(*ab8500), GFP_KERNEL); if (ab8500 == NULL) { dev_err(&pdev->dev, "failed to allocate memory\n"); return -ENOMEM; @@ -111,10 +112,8 @@ static int ab8500_pwm_probe(struct platform_device *pdev) ab8500->chip.npwm = 1; err = pwmchip_add(&ab8500->chip); - if (err < 0) { - kfree(ab8500); + if (err < 0) return err; - } dev_dbg(&pdev->dev, "pwm probe successful\n"); platform_set_drvdata(pdev, ab8500); @@ -132,7 +131,6 @@ static int ab8500_pwm_remove(struct platform_device *pdev) return err; dev_dbg(&pdev->dev, "pwm driver removed\n"); - kfree(ab8500); return 0; } diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c index 16cb5309285..0a7b6582edb 100644 --- a/drivers/pwm/pwm-atmel-tcb.c +++ b/drivers/pwm/pwm-atmel-tcb.c @@ -358,6 +358,7 @@ static const struct pwm_ops atmel_tcb_pwm_ops = { .set_polarity = atmel_tcb_pwm_set_polarity, .enable = atmel_tcb_pwm_enable, .disable = atmel_tcb_pwm_disable, + .owner = THIS_MODULE, }; static int atmel_tcb_pwm_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c index 3f5677b7690..ec287989eaf 100644 --- a/drivers/pwm/pwm-imx.c +++ b/drivers/pwm/pwm-imx.c @@ -43,7 +43,6 @@ struct imx_chip { struct clk *clk_per; struct clk *clk_ipg; - int enabled; void __iomem *mmio_base; struct pwm_chip chip; @@ -135,7 +134,7 @@ static int imx_pwm_config_v2(struct pwm_chip *chip, MX3_PWMCR_DOZEEN | MX3_PWMCR_WAITEN | MX3_PWMCR_DBGEN | MX3_PWMCR_CLKSRC_IPG_HIGH; - if (imx->enabled) + if (test_bit(PWMF_ENABLED, &pwm->flags)) cr |= MX3_PWMCR_EN; writel(cr, imx->mmio_base + MX3_PWMCR); @@ -186,8 +185,6 @@ static int imx_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) imx->set_enable(chip, true); - imx->enabled = 1; - return 0; } @@ -198,7 +195,6 @@ static void imx_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) imx->set_enable(chip, false); clk_disable_unprepare(imx->clk_per); - imx->enabled = 0; } static struct pwm_ops imx_pwm_ops = { diff --git a/drivers/pwm/pwm-lpc32xx.c b/drivers/pwm/pwm-lpc32xx.c index b3f0d0dfd74..8272883c0d0 100644 --- a/drivers/pwm/pwm-lpc32xx.c +++ b/drivers/pwm/pwm-lpc32xx.c @@ -37,6 +37,7 @@ static int lpc32xx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, struct lpc32xx_pwm_chip *lpc32xx = to_lpc32xx_pwm_chip(chip); unsigned long long c; int period_cycles, duty_cycles; + u32 val; c = clk_get_rate(lpc32xx->clk) / 256; c = c * period_ns; @@ -68,8 +69,10 @@ static int lpc32xx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, c = 255; duty_cycles = 256 - c; - writel(PWM_ENABLE | PWM_RELOADV(period_cycles) | PWM_DUTY(duty_cycles), - lpc32xx->base + (pwm->hwpwm << 2)); + val = readl(lpc32xx->base + (pwm->hwpwm << 2)); + val &= ~0xFFFF; + val |= PWM_RELOADV(period_cycles) | PWM_DUTY(duty_cycles); + writel(val, lpc32xx->base + (pwm->hwpwm << 2)); return 0; } @@ -77,15 +80,29 @@ static int lpc32xx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, static int lpc32xx_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) { struct lpc32xx_pwm_chip *lpc32xx = to_lpc32xx_pwm_chip(chip); + u32 val; + int ret; + + ret = clk_enable(lpc32xx->clk); + if (ret) + return ret; - return clk_enable(lpc32xx->clk); + val = readl(lpc32xx->base + (pwm->hwpwm << 2)); + val |= PWM_ENABLE; + writel(val, lpc32xx->base + (pwm->hwpwm << 2)); + + return 0; } static void lpc32xx_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) { struct lpc32xx_pwm_chip *lpc32xx = to_lpc32xx_pwm_chip(chip); + u32 val; + + val = readl(lpc32xx->base + (pwm->hwpwm << 2)); + val &= ~PWM_ENABLE; + writel(val, lpc32xx->base + (pwm->hwpwm << 2)); - writel(0, lpc32xx->base + (pwm->hwpwm << 2)); clk_disable(lpc32xx->clk); } @@ -145,7 +162,7 @@ static int lpc32xx_pwm_remove(struct platform_device *pdev) return pwmchip_remove(&lpc32xx->chip); } -static struct of_device_id lpc32xx_pwm_dt_ids[] = { +static const struct of_device_id lpc32xx_pwm_dt_ids[] = { { .compatible = "nxp,lpc3220-pwm", }, { /* sentinel */ } }; diff --git a/drivers/pwm/pwm-mxs.c b/drivers/pwm/pwm-mxs.c index a53d3094b75..3febdddf71f 100644 --- a/drivers/pwm/pwm-mxs.c +++ b/drivers/pwm/pwm-mxs.c @@ -38,7 +38,6 @@ struct mxs_pwm_chip { struct pwm_chip chip; - struct device *dev; struct clk *clk; void __iomem *base; }; @@ -166,7 +165,6 @@ static int mxs_pwm_probe(struct platform_device *pdev) return ret; } - mxs->dev = &pdev->dev; platform_set_drvdata(pdev, mxs); stmp_reset_block(mxs->base); @@ -181,7 +179,7 @@ static int mxs_pwm_remove(struct platform_device *pdev) return pwmchip_remove(&mxs->chip); } -static struct of_device_id mxs_pwm_dt_ids[] = { +static const struct of_device_id mxs_pwm_dt_ids[] = { { .compatible = "fsl,imx23-pwm", }, { /* sentinel */ } }; diff --git a/drivers/pwm/pwm-puv3.c b/drivers/pwm/pwm-puv3.c index db964e6ecf5..d1eb499fb15 100644 --- a/drivers/pwm/pwm-puv3.c +++ b/drivers/pwm/pwm-puv3.c @@ -27,7 +27,6 @@ struct puv3_pwm_chip { struct pwm_chip chip; void __iomem *base; struct clk *clk; - bool enabled; }; static inline struct puv3_pwm_chip *to_puv3(struct pwm_chip *chip) diff --git a/drivers/pwm/pwm-pxa.c b/drivers/pwm/pwm-pxa.c index 20370e61de5..dee6ab552a0 100644 --- a/drivers/pwm/pwm-pxa.c +++ b/drivers/pwm/pwm-pxa.c @@ -23,14 +23,13 @@ #include <asm/div64.h> #define HAS_SECONDARY_PWM 0x10 -#define PWM_ID_BASE(d) ((d) & 0xf) static const struct platform_device_id pwm_id_table[] = { /* PWM has_secondary_pwm? */ { "pxa25x-pwm", 0 }, - { "pxa27x-pwm", 0 | HAS_SECONDARY_PWM }, - { "pxa168-pwm", 1 }, - { "pxa910-pwm", 1 }, + { "pxa27x-pwm", HAS_SECONDARY_PWM }, + { "pxa168-pwm", 0 }, + { "pxa910-pwm", 0 }, { }, }; MODULE_DEVICE_TABLE(platform, pwm_id_table); @@ -48,7 +47,6 @@ struct pxa_pwm_chip { struct device *dev; struct clk *clk; - int clk_enabled; void __iomem *mmio_base; }; @@ -108,24 +106,15 @@ static int pxa_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, static int pxa_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) { struct pxa_pwm_chip *pc = to_pxa_pwm_chip(chip); - int rc = 0; - if (!pc->clk_enabled) { - rc = clk_prepare_enable(pc->clk); - if (!rc) - pc->clk_enabled++; - } - return rc; + return clk_prepare_enable(pc->clk); } static void pxa_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) { struct pxa_pwm_chip *pc = to_pxa_pwm_chip(chip); - if (pc->clk_enabled) { - clk_disable_unprepare(pc->clk); - pc->clk_enabled--; - } + clk_disable_unprepare(pc->clk); } static struct pwm_ops pxa_pwm_ops = { @@ -152,8 +141,6 @@ static int pwm_probe(struct platform_device *pdev) if (IS_ERR(pwm->clk)) return PTR_ERR(pwm->clk); - pwm->clk_enabled = 0; - pwm->chip.dev = &pdev->dev; pwm->chip.ops = &pxa_pwm_ops; pwm->chip.base = -1; diff --git a/drivers/pwm/pwm-samsung.c b/drivers/pwm/pwm-samsung.c index 5207e6cd864..a0ece50d70b 100644 --- a/drivers/pwm/pwm-samsung.c +++ b/drivers/pwm/pwm-samsung.c @@ -289,10 +289,10 @@ static int s3c_pwm_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int s3c_pwm_suspend(struct platform_device *pdev, pm_message_t state) +#ifdef CONFIG_PM_SLEEP +static int s3c_pwm_suspend(struct device *dev) { - struct s3c_chip *s3c = platform_get_drvdata(pdev); + struct s3c_chip *s3c = dev_get_drvdata(dev); /* No one preserve these values during suspend so reset them * Otherwise driver leaves PWM unconfigured if same values @@ -304,9 +304,9 @@ static int s3c_pwm_suspend(struct platform_device *pdev, pm_message_t state) return 0; } -static int s3c_pwm_resume(struct platform_device *pdev) +static int s3c_pwm_resume(struct device *dev) { - struct s3c_chip *s3c = platform_get_drvdata(pdev); + struct s3c_chip *s3c = dev_get_drvdata(dev); unsigned long tcon; /* Restore invertion */ @@ -316,21 +316,19 @@ static int s3c_pwm_resume(struct platform_device *pdev) return 0; } - -#else -#define s3c_pwm_suspend NULL -#define s3c_pwm_resume NULL #endif +static SIMPLE_DEV_PM_OPS(s3c_pwm_pm_ops, s3c_pwm_suspend, + s3c_pwm_resume); + static struct platform_driver s3c_pwm_driver = { .driver = { .name = "s3c24xx-pwm", .owner = THIS_MODULE, + .pm = &s3c_pwm_pm_ops, }, .probe = s3c_pwm_probe, .remove = s3c_pwm_remove, - .suspend = s3c_pwm_suspend, - .resume = s3c_pwm_resume, }; static int __init pwm_init(void) diff --git a/drivers/pwm/pwm-spear.c b/drivers/pwm/pwm-spear.c index 69a2d9eb34d..6d99e2cbdc7 100644 --- a/drivers/pwm/pwm-spear.c +++ b/drivers/pwm/pwm-spear.c @@ -49,13 +49,11 @@ * @mmio_base: base address of pwm chip * @clk: pointer to clk structure of pwm chip * @chip: linux pwm chip representation - * @dev: pointer to device structure of pwm chip */ struct spear_pwm_chip { void __iomem *mmio_base; struct clk *clk; struct pwm_chip chip; - struct device *dev; }; static inline struct spear_pwm_chip *to_spear_pwm_chip(struct pwm_chip *chip) @@ -143,7 +141,7 @@ static int spear_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) u32 val; rc = clk_enable(pc->clk); - if (!rc) + if (rc) return rc; val = spear_pwm_readl(pc, pwm->hwpwm, PWMCR); @@ -200,7 +198,6 @@ static int spear_pwm_probe(struct platform_device *pdev) if (IS_ERR(pc->clk)) return PTR_ERR(pc->clk); - pc->dev = &pdev->dev; platform_set_drvdata(pdev, pc); pc->chip.dev = &pdev->dev; @@ -209,12 +206,12 @@ static int spear_pwm_probe(struct platform_device *pdev) pc->chip.npwm = NUM_PWM; ret = clk_prepare(pc->clk); - if (!ret) + if (ret) return ret; if (of_device_is_compatible(np, "st,spear1340-pwm")) { ret = clk_enable(pc->clk); - if (!ret) { + if (ret) { clk_unprepare(pc->clk); return ret; } @@ -251,7 +248,7 @@ static int spear_pwm_remove(struct platform_device *pdev) return pwmchip_remove(&pc->chip); } -static struct of_device_id spear_pwm_of_match[] = { +static const struct of_device_id spear_pwm_of_match[] = { { .compatible = "st,spear320-pwm" }, { .compatible = "st,spear1340-pwm" }, { } diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c index af3ab48cb7e..3d75f4a88f9 100644 --- a/drivers/pwm/pwm-tegra.c +++ b/drivers/pwm/pwm-tegra.c @@ -233,7 +233,7 @@ static int tegra_pwm_remove(struct platform_device *pdev) return pwmchip_remove(&pc->chip); } -static struct of_device_id tegra_pwm_of_match[] = { +static const struct of_device_id tegra_pwm_of_match[] = { { .compatible = "nvidia,tegra20-pwm" }, { .compatible = "nvidia,tegra30-pwm" }, { } diff --git a/drivers/pwm/pwm-tiecap.c b/drivers/pwm/pwm-tiecap.c index 22e96e2bffd..0d65fb2e02c 100644 --- a/drivers/pwm/pwm-tiecap.c +++ b/drivers/pwm/pwm-tiecap.c @@ -295,7 +295,7 @@ static int ecap_pwm_remove(struct platform_device *pdev) return pwmchip_remove(&pc->chip); } -void ecap_pwm_save_context(struct ecap_pwm_chip *pc) +static void ecap_pwm_save_context(struct ecap_pwm_chip *pc) { pm_runtime_get_sync(pc->chip.dev); pc->ctx.ecctl2 = readw(pc->mmio_base + ECCTL2); @@ -304,13 +304,14 @@ void ecap_pwm_save_context(struct ecap_pwm_chip *pc) pm_runtime_put_sync(pc->chip.dev); } -void ecap_pwm_restore_context(struct ecap_pwm_chip *pc) +static void ecap_pwm_restore_context(struct ecap_pwm_chip *pc) { writel(pc->ctx.cap3, pc->mmio_base + CAP3); writel(pc->ctx.cap4, pc->mmio_base + CAP4); writew(pc->ctx.ecctl2, pc->mmio_base + ECCTL2); } +#ifdef CONFIG_PM_SLEEP static int ecap_pwm_suspend(struct device *dev) { struct ecap_pwm_chip *pc = dev_get_drvdata(dev); @@ -337,6 +338,7 @@ static int ecap_pwm_resume(struct device *dev) ecap_pwm_restore_context(pc); return 0; } +#endif static SIMPLE_DEV_PM_OPS(ecap_pwm_pm_ops, ecap_pwm_suspend, ecap_pwm_resume); diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c index 8b4c86fa99c..6a217596942 100644 --- a/drivers/pwm/pwm-tiehrpwm.c +++ b/drivers/pwm/pwm-tiehrpwm.c @@ -533,7 +533,7 @@ static int ehrpwm_pwm_remove(struct platform_device *pdev) return pwmchip_remove(&pc->chip); } -void ehrpwm_pwm_save_context(struct ehrpwm_pwm_chip *pc) +static void ehrpwm_pwm_save_context(struct ehrpwm_pwm_chip *pc) { pm_runtime_get_sync(pc->chip.dev); pc->ctx.tbctl = ehrpwm_read(pc->mmio_base, TBCTL); @@ -547,7 +547,7 @@ void ehrpwm_pwm_save_context(struct ehrpwm_pwm_chip *pc) pm_runtime_put_sync(pc->chip.dev); } -void ehrpwm_pwm_restore_context(struct ehrpwm_pwm_chip *pc) +static void ehrpwm_pwm_restore_context(struct ehrpwm_pwm_chip *pc) { ehrpwm_write(pc->mmio_base, TBPRD, pc->ctx.tbprd); ehrpwm_write(pc->mmio_base, CMPA, pc->ctx.cmpa); @@ -559,6 +559,7 @@ void ehrpwm_pwm_restore_context(struct ehrpwm_pwm_chip *pc) ehrpwm_write(pc->mmio_base, TBCTL, pc->ctx.tbctl); } +#ifdef CONFIG_PM_SLEEP static int ehrpwm_pwm_suspend(struct device *dev) { struct ehrpwm_pwm_chip *pc = dev_get_drvdata(dev); @@ -594,6 +595,7 @@ static int ehrpwm_pwm_resume(struct device *dev) ehrpwm_pwm_restore_context(pc); return 0; } +#endif static SIMPLE_DEV_PM_OPS(ehrpwm_pwm_pm_ops, ehrpwm_pwm_suspend, ehrpwm_pwm_resume); diff --git a/drivers/pwm/pwm-tipwmss.c b/drivers/pwm/pwm-tipwmss.c index 17cbc59660e..c9c3d3a1e0e 100644 --- a/drivers/pwm/pwm-tipwmss.c +++ b/drivers/pwm/pwm-tipwmss.c @@ -101,6 +101,7 @@ static int pwmss_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP static int pwmss_suspend(struct device *dev) { struct pwmss_info *info = dev_get_drvdata(dev); @@ -118,6 +119,7 @@ static int pwmss_resume(struct device *dev) writew(info->pwmss_clkconfig, info->mmio_base + PWMSS_CLKCONFIG); return 0; } +#endif static SIMPLE_DEV_PM_OPS(pwmss_pm_ops, pwmss_suspend, pwmss_resume); diff --git a/drivers/pwm/pwm-tipwmss.h b/drivers/pwm/pwm-tipwmss.h index 11f76a1e266..10ad8040408 100644 --- a/drivers/pwm/pwm-tipwmss.h +++ b/drivers/pwm/pwm-tipwmss.h @@ -18,7 +18,6 @@ #ifndef __TIPWMSS_H #define __TIPWMSS_H -#ifdef CONFIG_PWM_TIPWMSS /* PWM substem clock gating */ #define PWMSS_ECAPCLK_EN BIT(0) #define PWMSS_ECAPCLK_STOP_REQ BIT(1) @@ -28,6 +27,7 @@ #define PWMSS_ECAPCLK_EN_ACK BIT(0) #define PWMSS_EPWMCLK_EN_ACK BIT(8) +#ifdef CONFIG_PWM_TIPWMSS extern u16 pwmss_submodule_state_change(struct device *dev, int set); #else static inline u16 pwmss_submodule_state_change(struct device *dev, int set) diff --git a/drivers/pwm/pwm-twl-led.c b/drivers/pwm/pwm-twl-led.c index 83e25d45d64..29d1bba4804 100644 --- a/drivers/pwm/pwm-twl-led.c +++ b/drivers/pwm/pwm-twl-led.c @@ -271,6 +271,7 @@ static const struct pwm_ops twl4030_pwmled_ops = { .enable = twl4030_pwmled_enable, .disable = twl4030_pwmled_disable, .config = twl4030_pwmled_config, + .owner = THIS_MODULE, }; static const struct pwm_ops twl6030_pwmled_ops = { @@ -279,6 +280,7 @@ static const struct pwm_ops twl6030_pwmled_ops = { .config = twl6030_pwmled_config, .request = twl6030_pwmled_request, .free = twl6030_pwmled_free, + .owner = THIS_MODULE, }; static int twl_pwmled_probe(struct platform_device *pdev) @@ -321,7 +323,7 @@ static int twl_pwmled_remove(struct platform_device *pdev) } #ifdef CONFIG_OF -static struct of_device_id twl_pwmled_of_match[] = { +static const struct of_device_id twl_pwmled_of_match[] = { { .compatible = "ti,twl4030-pwmled" }, { .compatible = "ti,twl6030-pwmled" }, { }, diff --git a/drivers/pwm/pwm-twl.c b/drivers/pwm/pwm-twl.c index bf3fda29422..eef910580ea 100644 --- a/drivers/pwm/pwm-twl.c +++ b/drivers/pwm/pwm-twl.c @@ -248,7 +248,7 @@ static int twl6030_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) twl->twl6030_toggle3 = val; out: mutex_unlock(&twl->mutex); - return 0; + return ret; } static void twl6030_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) @@ -287,12 +287,14 @@ static const struct pwm_ops twl4030_pwm_ops = { .disable = twl4030_pwm_disable, .request = twl4030_pwm_request, .free = twl4030_pwm_free, + .owner = THIS_MODULE, }; static const struct pwm_ops twl6030_pwm_ops = { .config = twl_pwm_config, .enable = twl6030_pwm_enable, .disable = twl6030_pwm_disable, + .owner = THIS_MODULE, }; static int twl_pwm_probe(struct platform_device *pdev) @@ -333,7 +335,7 @@ static int twl_pwm_remove(struct platform_device *pdev) } #ifdef CONFIG_OF -static struct of_device_id twl_pwm_of_match[] = { +static const struct of_device_id twl_pwm_of_match[] = { { .compatible = "ti,twl4030-pwm" }, { .compatible = "ti,twl6030-pwm" }, { }, diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 178836ec252..bf07c3a188d 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -345,7 +345,6 @@ struct memory_increment { struct list_head list; u16 rn; int standby; - int usecount; }; struct assign_storage_sccb { @@ -463,21 +462,10 @@ static int sclp_mem_change_state(unsigned long start, unsigned long size, break; if (start > istart + rzm - 1) continue; - if (online) { - if (incr->usecount++) - continue; - /* - * Don't break the loop if one assign fails. Loop may - * be walked again on CANCEL and we can't save - * information if state changed before or not. - * So continue and increase usecount for all increments. - */ + if (online) rc |= sclp_assign_storage(incr->rn); - } else { - if (--incr->usecount) - continue; + else sclp_unassign_storage(incr->rn); - } } return rc ? -EIO : 0; } @@ -561,8 +549,6 @@ static void __init sclp_add_standby_memory(void) add_memory_merged(0); } -#define MEM_SCT_SIZE (1UL << SECTION_SIZE_BITS) - static void __init insert_increment(u16 rn, int standby, int assigned) { struct memory_increment *incr, *new_incr; @@ -574,8 +560,6 @@ static void __init insert_increment(u16 rn, int standby, int assigned) return; new_incr->rn = rn; new_incr->standby = standby; - if (!standby) - new_incr->usecount = rzm > MEM_SCT_SIZE ? rzm/MEM_SCT_SIZE : 1; last_rn = 0; prev = &sclp_mem_list; list_for_each_entry(incr, &sclp_mem_list, list) { diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 22820610022..9e5e14686e7 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -426,7 +426,7 @@ static int zcore_memmap_open(struct inode *inode, struct file *filp) GFP_KERNEL); if (!chunk_array) return -ENOMEM; - detect_memory_layout(chunk_array); + detect_memory_layout(chunk_array, 0); buf = kzalloc(MEMORY_CHUNKS * CHUNK_INFO_SIZE, GFP_KERNEL); if (!buf) { kfree(chunk_array); @@ -557,7 +557,7 @@ static void __init set_lc_mask(struct save_area *map) /* * Initialize dump globals for a given architecture */ -static int __init sys_info_init(enum arch_id arch) +static int __init sys_info_init(enum arch_id arch, unsigned long mem_end) { int rc; @@ -579,7 +579,7 @@ static int __init sys_info_init(enum arch_id arch) rc = init_cpu_info(arch); if (rc) return rc; - sys_info.mem_size = real_memory_size; + sys_info.mem_size = mem_end; return 0; } @@ -601,7 +601,7 @@ static int __init check_sdias(void) return 0; } -static int __init get_mem_size(unsigned long *mem) +static int __init get_mem_info(unsigned long *mem, unsigned long *end) { int i; struct mem_chunk *chunk_array; @@ -610,33 +610,31 @@ static int __init get_mem_size(unsigned long *mem) GFP_KERNEL); if (!chunk_array) return -ENOMEM; - detect_memory_layout(chunk_array); + detect_memory_layout(chunk_array, 0); for (i = 0; i < MEMORY_CHUNKS; i++) { if (chunk_array[i].size == 0) break; *mem += chunk_array[i].size; + *end = max(*end, chunk_array[i].addr + chunk_array[i].size); } kfree(chunk_array); return 0; } -static int __init zcore_header_init(int arch, struct zcore_header *hdr) +static void __init zcore_header_init(int arch, struct zcore_header *hdr, + unsigned long mem_size) { - int rc, i; - unsigned long memory = 0; u32 prefix; + int i; if (arch == ARCH_S390X) hdr->arch_id = DUMP_ARCH_S390X; else hdr->arch_id = DUMP_ARCH_S390; - rc = get_mem_size(&memory); - if (rc) - return rc; - hdr->mem_size = memory; - hdr->rmem_size = memory; + hdr->mem_size = mem_size; + hdr->rmem_size = mem_size; hdr->mem_end = sys_info.mem_size; - hdr->num_pages = memory / PAGE_SIZE; + hdr->num_pages = mem_size / PAGE_SIZE; hdr->tod = get_tod_clock(); get_cpu_id(&hdr->cpu_id); for (i = 0; zfcpdump_save_areas[i]; i++) { @@ -647,7 +645,6 @@ static int __init zcore_header_init(int arch, struct zcore_header *hdr) hdr->lc_vec[hdr->cpu_cnt] = prefix; hdr->cpu_cnt++; } - return 0; } /* @@ -682,9 +679,11 @@ static int __init zcore_reipl_init(void) static int __init zcore_init(void) { + unsigned long mem_size, mem_end; unsigned char arch; int rc; + mem_size = mem_end = 0; if (ipl_info.type != IPL_TYPE_FCP_DUMP) return -ENODATA; if (OLDMEM_BASE) @@ -727,13 +726,14 @@ static int __init zcore_init(void) } #endif /* CONFIG_64BIT */ - rc = sys_info_init(arch); + rc = get_mem_info(&mem_size, &mem_end); if (rc) goto fail; - rc = zcore_header_init(arch, &zcore_header); + rc = sys_info_init(arch, mem_end); if (rc) goto fail; + zcore_header_init(arch, &zcore_header, mem_size); rc = zcore_reipl_init(); if (rc) diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c index 2d2a966a3b3..a9fe3de2dec 100644 --- a/drivers/s390/cio/blacklist.c +++ b/drivers/s390/cio/blacklist.c @@ -1,7 +1,7 @@ /* * S/390 common I/O routines -- blacklisting of specific devices * - * Copyright IBM Corp. 1999, 2002 + * Copyright IBM Corp. 1999, 2013 * Author(s): Ingo Adlung (adlung@de.ibm.com) * Cornelia Huck (cornelia.huck@de.ibm.com) * Arnd Bergmann (arndb@de.ibm.com) @@ -17,8 +17,9 @@ #include <linux/ctype.h> #include <linux/device.h> -#include <asm/cio.h> #include <asm/uaccess.h> +#include <asm/cio.h> +#include <asm/ipl.h> #include "blacklist.h" #include "cio.h" @@ -172,6 +173,29 @@ static int blacklist_parse_parameters(char *str, range_action action, to_cssid = __MAX_CSSID; to_ssid = __MAX_SSID; to = __MAX_SUBCHANNEL; + } else if (strcmp(parm, "ipldev") == 0) { + if (ipl_info.type == IPL_TYPE_CCW) { + from_cssid = 0; + from_ssid = ipl_info.data.ccw.dev_id.ssid; + from = ipl_info.data.ccw.dev_id.devno; + } else if (ipl_info.type == IPL_TYPE_FCP || + ipl_info.type == IPL_TYPE_FCP_DUMP) { + from_cssid = 0; + from_ssid = ipl_info.data.fcp.dev_id.ssid; + from = ipl_info.data.fcp.dev_id.devno; + } else { + continue; + } + to_cssid = from_cssid; + to_ssid = from_ssid; + to = from; + } else if (strcmp(parm, "condev") == 0) { + if (console_devno == -1) + continue; + + from_cssid = to_cssid = 0; + from_ssid = to_ssid = 0; + from = to = console_devno; } else { rc = parse_busid(strsep(&parm, "-"), &from_cssid, &from_ssid, &from, msgtrigger); diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index b8b340ac533..9de41aa1489 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -954,15 +954,11 @@ EXPORT_SYMBOL(ap_driver_unregister); void ap_bus_force_rescan(void) { - /* Delete the AP bus rescan timer. */ - del_timer(&ap_config_timer); - - /* processing a synchonuous bus rescan */ - ap_scan_bus(NULL); - - /* Setup the AP bus rescan timer again. */ - ap_config_timer.expires = jiffies + ap_config_time * HZ; - add_timer(&ap_config_timer); + /* reconfigure the AP bus rescan timer. */ + mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ); + /* processing a asynchronous bus rescan */ + queue_work(ap_work_queue, &ap_config_work); + flush_work(&ap_config_work); } EXPORT_SYMBOL(ap_bus_force_rescan); @@ -1305,8 +1301,9 @@ static void ap_scan_bus(struct work_struct *unused) int rc, i; ap_query_configuration(); - if (ap_select_domain() != 0) + if (ap_select_domain() != 0) { return; + } for (i = 0; i < AP_DEVICES; i++) { qid = AP_MKQID(i, ap_domain_index); dev = bus_find_device(&ap_bus_type, NULL, diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index 6711e65764b..2ea6165366b 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -443,29 +443,30 @@ static int __init test_devices_support(unsigned long addr) } /* * Init function for virtio - * devices are in a single page above top of "normal" mem + * devices are in a single page above top of "normal" + standby mem */ static int __init kvm_devices_init(void) { int rc; + unsigned long total_memory_size = sclp_get_rzm() * sclp_get_rnmax(); if (!MACHINE_IS_KVM) return -ENODEV; - if (test_devices_support(real_memory_size) < 0) + if (test_devices_support(total_memory_size) < 0) return -ENODEV; - rc = vmem_add_mapping(real_memory_size, PAGE_SIZE); + rc = vmem_add_mapping(total_memory_size, PAGE_SIZE); if (rc) return rc; - kvm_devices = (void *) real_memory_size; + kvm_devices = (void *) total_memory_size; kvm_root = root_device_register("kvm_s390"); if (IS_ERR(kvm_root)) { rc = PTR_ERR(kvm_root); printk(KERN_ERR "Could not register kvm_s390 root device"); - vmem_remove_mapping(real_memory_size, PAGE_SIZE); + vmem_remove_mapping(total_memory_size, PAGE_SIZE); return rc; } diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c index fb877b59ec5..779dc513629 100644 --- a/drivers/s390/kvm/virtio_ccw.c +++ b/drivers/s390/kvm/virtio_ccw.c @@ -31,6 +31,7 @@ #include <asm/irq.h> #include <asm/cio.h> #include <asm/ccwdev.h> +#include <asm/virtio-ccw.h> /* * virtio related functions @@ -77,12 +78,9 @@ struct virtio_ccw_vq_info { void *queue; struct vq_info_block *info_block; struct list_head node; + long cookie; }; -#define KVM_VIRTIO_CCW_RING_ALIGN 4096 - -#define KVM_S390_VIRTIO_CCW_NOTIFY 3 - #define CCW_CMD_SET_VQ 0x13 #define CCW_CMD_VDEV_RESET 0x33 #define CCW_CMD_SET_IND 0x43 @@ -135,8 +133,11 @@ static int ccw_io_helper(struct virtio_ccw_device *vcdev, do { spin_lock_irqsave(get_ccwdev_lock(vcdev->cdev), flags); ret = ccw_device_start(vcdev->cdev, ccw, intparm, 0, 0); - if (!ret) + if (!ret) { + if (!vcdev->curr_io) + vcdev->err = 0; vcdev->curr_io |= flag; + } spin_unlock_irqrestore(get_ccwdev_lock(vcdev->cdev), flags); cpu_relax(); } while (ret == -EBUSY); @@ -145,15 +146,18 @@ static int ccw_io_helper(struct virtio_ccw_device *vcdev, } static inline long do_kvm_notify(struct subchannel_id schid, - unsigned long queue_index) + unsigned long queue_index, + long cookie) { register unsigned long __nr asm("1") = KVM_S390_VIRTIO_CCW_NOTIFY; register struct subchannel_id __schid asm("2") = schid; register unsigned long __index asm("3") = queue_index; register long __rc asm("2"); + register long __cookie asm("4") = cookie; asm volatile ("diag 2,4,0x500\n" - : "=d" (__rc) : "d" (__nr), "d" (__schid), "d" (__index) + : "=d" (__rc) : "d" (__nr), "d" (__schid), "d" (__index), + "d"(__cookie) : "memory", "cc"); return __rc; } @@ -166,7 +170,7 @@ static void virtio_ccw_kvm_notify(struct virtqueue *vq) vcdev = to_vc_device(info->vq->vdev); ccw_device_get_schid(vcdev->cdev, &schid); - do_kvm_notify(schid, vq->index); + info->cookie = do_kvm_notify(schid, vq->index, info->cookie); } static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev, diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c index fa00304a63d..1fea627394d 100644 --- a/drivers/video/backlight/pwm_bl.c +++ b/drivers/video/backlight/pwm_bl.c @@ -274,7 +274,7 @@ static int pwm_backlight_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int pwm_backlight_suspend(struct device *dev) { struct backlight_device *bl = dev_get_drvdata(dev); @@ -296,19 +296,16 @@ static int pwm_backlight_resume(struct device *dev) backlight_update_status(bl); return 0; } +#endif static SIMPLE_DEV_PM_OPS(pwm_backlight_pm_ops, pwm_backlight_suspend, pwm_backlight_resume); -#endif - static struct platform_driver pwm_backlight_driver = { .driver = { .name = "pwm-backlight", .owner = THIS_MODULE, -#ifdef CONFIG_PM .pm = &pwm_backlight_pm_ops, -#endif .of_match_table = of_match_ptr(pwm_backlight_of_match), }, .probe = pwm_backlight_probe, diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 01443ce43ee..13ddec92341 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -61,15 +61,6 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) /* This is an autofs submount, we can't expire it */ if (autofs_type_indirect(sbi->type)) goto done; - - /* - * Otherwise it's an offset mount and we need to check - * if we can umount its mount, if there is one. - */ - if (!d_mountpoint(path.dentry)) { - status = 0; - goto done; - } } /* Update the expiry counter if fs is busy */ diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 9bd16255dd9..085da86e07c 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -408,7 +408,7 @@ done: return NULL; } -int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) +static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); struct autofs_info *ino = autofs4_dentry_ino(dentry); diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index a60ea977af6..3e68ac10104 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -236,15 +236,21 @@ static int ceph_readpage(struct file *filp, struct page *page) static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg) { struct inode *inode = req->r_inode; + struct ceph_osd_data *osd_data; int rc = req->r_result; int bytes = le32_to_cpu(msg->hdr.data_len); + int num_pages; int i; dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes); /* unlock all pages, zeroing any data we didn't read */ - for (i = 0; i < req->r_num_pages; i++, bytes -= PAGE_CACHE_SIZE) { - struct page *page = req->r_pages[i]; + osd_data = osd_req_op_extent_osd_data(req, 0); + BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES); + num_pages = calc_pages_for((u64)osd_data->alignment, + (u64)osd_data->length); + for (i = 0; i < num_pages; i++) { + struct page *page = osd_data->pages[i]; if (bytes < (int)PAGE_CACHE_SIZE) { /* zero (remainder of) page */ @@ -257,8 +263,9 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg) SetPageUptodate(page); unlock_page(page); page_cache_release(page); + bytes -= PAGE_CACHE_SIZE; } - kfree(req->r_pages); + kfree(osd_data->pages); } static void ceph_unlock_page_vector(struct page **pages, int num_pages) @@ -279,6 +286,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) &ceph_inode_to_client(inode)->client->osdc; struct ceph_inode_info *ci = ceph_inode(inode); struct page *page = list_entry(page_list->prev, struct page, lru); + struct ceph_vino vino; struct ceph_osd_request *req; u64 off; u64 len; @@ -303,18 +311,17 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) len = nr_pages << PAGE_CACHE_SHIFT; dout("start_read %p nr_pages %d is %lld~%lld\n", inode, nr_pages, off, len); - - req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode), - off, &len, - CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, - NULL, 0, + vino = ceph_vino(inode); + req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len, + 1, CEPH_OSD_OP_READ, + CEPH_OSD_FLAG_READ, NULL, ci->i_truncate_seq, ci->i_truncate_size, - NULL, false, 0); + false); if (IS_ERR(req)) return PTR_ERR(req); /* build page vector */ - nr_pages = len >> PAGE_CACHE_SHIFT; + nr_pages = calc_pages_for(0, len); pages = kmalloc(sizeof(*pages) * nr_pages, GFP_NOFS); ret = -ENOMEM; if (!pages) @@ -336,11 +343,12 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) } pages[i] = page; } - req->r_pages = pages; - req->r_num_pages = nr_pages; + osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, false); req->r_callback = finish_read; req->r_inode = inode; + ceph_osdc_build_request(req, off, NULL, vino.snap, NULL); + dout("start_read %p starting %p %lld~%lld\n", inode, req, off, len); ret = ceph_osdc_start_request(osdc, req, false); if (ret < 0) @@ -373,7 +381,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT; - dout("readpages %p file %p nr_pages %d max %d\n", inode, file, nr_pages, + dout("readpages %p file %p nr_pages %d max %d\n", inode, + file, nr_pages, max); while (!list_empty(page_list)) { rc = start_read(inode, page_list, max); @@ -548,17 +557,23 @@ static void writepages_finish(struct ceph_osd_request *req, { struct inode *inode = req->r_inode; struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_osd_data *osd_data; unsigned wrote; struct page *page; + int num_pages; int i; struct ceph_snap_context *snapc = req->r_snapc; struct address_space *mapping = inode->i_mapping; int rc = req->r_result; - u64 bytes = le64_to_cpu(req->r_request_ops[0].extent.length); + u64 bytes = req->r_ops[0].extent.length; struct ceph_fs_client *fsc = ceph_inode_to_client(inode); long writeback_stat; unsigned issued = ceph_caps_issued(ci); + osd_data = osd_req_op_extent_osd_data(req, 0); + BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES); + num_pages = calc_pages_for((u64)osd_data->alignment, + (u64)osd_data->length); if (rc >= 0) { /* * Assume we wrote the pages we originally sent. The @@ -566,7 +581,7 @@ static void writepages_finish(struct ceph_osd_request *req, * raced with a truncation and was adjusted at the osd, * so don't believe the reply. */ - wrote = req->r_num_pages; + wrote = num_pages; } else { wrote = 0; mapping_set_error(mapping, rc); @@ -575,8 +590,8 @@ static void writepages_finish(struct ceph_osd_request *req, inode, rc, bytes, wrote); /* clean all pages */ - for (i = 0; i < req->r_num_pages; i++) { - page = req->r_pages[i]; + for (i = 0; i < num_pages; i++) { + page = osd_data->pages[i]; BUG_ON(!page); WARN_ON(!PageUptodate(page)); @@ -605,32 +620,34 @@ static void writepages_finish(struct ceph_osd_request *req, unlock_page(page); } dout("%p wrote+cleaned %d pages\n", inode, wrote); - ceph_put_wrbuffer_cap_refs(ci, req->r_num_pages, snapc); + ceph_put_wrbuffer_cap_refs(ci, num_pages, snapc); - ceph_release_pages(req->r_pages, req->r_num_pages); - if (req->r_pages_from_pool) - mempool_free(req->r_pages, + ceph_release_pages(osd_data->pages, num_pages); + if (osd_data->pages_from_pool) + mempool_free(osd_data->pages, ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool); else - kfree(req->r_pages); + kfree(osd_data->pages); ceph_osdc_put_request(req); } -/* - * allocate a page vec, either directly, or if necessary, via a the - * mempool. we avoid the mempool if we can because req->r_num_pages - * may be less than the maximum write size. - */ -static void alloc_page_vec(struct ceph_fs_client *fsc, - struct ceph_osd_request *req) +static struct ceph_osd_request * +ceph_writepages_osd_request(struct inode *inode, u64 offset, u64 *len, + struct ceph_snap_context *snapc, int num_ops) { - req->r_pages = kmalloc(sizeof(struct page *) * req->r_num_pages, - GFP_NOFS); - if (!req->r_pages) { - req->r_pages = mempool_alloc(fsc->wb_pagevec_pool, GFP_NOFS); - req->r_pages_from_pool = 1; - WARN_ON(!req->r_pages); - } + struct ceph_fs_client *fsc; + struct ceph_inode_info *ci; + struct ceph_vino vino; + + fsc = ceph_inode_to_client(inode); + ci = ceph_inode(inode); + vino = ceph_vino(inode); + /* BUG_ON(vino.snap != CEPH_NOSNAP); */ + + return ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, + vino, offset, len, num_ops, CEPH_OSD_OP_WRITE, + CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK, + snapc, ci->i_truncate_seq, ci->i_truncate_size, true); } /* @@ -653,7 +670,7 @@ static int ceph_writepages_start(struct address_space *mapping, unsigned wsize = 1 << inode->i_blkbits; struct ceph_osd_request *req = NULL; int do_sync; - u64 snap_size = 0; + u64 snap_size; /* * Include a 'sync' in the OSD request if this is a data @@ -699,6 +716,7 @@ static int ceph_writepages_start(struct address_space *mapping, retry: /* find oldest snap context with dirty data */ ceph_put_snap_context(snapc); + snap_size = 0; snapc = get_oldest_context(inode, &snap_size); if (!snapc) { /* hmm, why does writepages get called when there @@ -706,6 +724,8 @@ retry: dout(" no snap context with dirty data?\n"); goto out; } + if (snap_size == 0) + snap_size = i_size_read(inode); dout(" oldest snapc is %p seq %lld (%d snaps)\n", snapc, snapc->seq, snapc->num_snaps); if (last_snapc && snapc != last_snapc) { @@ -718,10 +738,14 @@ retry: last_snapc = snapc; while (!done && index <= end) { + int num_ops = do_sync ? 2 : 1; + struct ceph_vino vino; unsigned i; int first; pgoff_t next; int pvec_pages, locked_pages; + struct page **pages = NULL; + mempool_t *pool = NULL; /* Becomes non-null if mempool used */ struct page *page; int want; u64 offset, len; @@ -773,11 +797,8 @@ get_more_pages: dout("waiting on writeback %p\n", page); wait_on_page_writeback(page); } - if ((snap_size && page_offset(page) > snap_size) || - (!snap_size && - page_offset(page) > i_size_read(inode))) { - dout("%p page eof %llu\n", page, snap_size ? - snap_size : i_size_read(inode)); + if (page_offset(page) >= snap_size) { + dout("%p page eof %llu\n", page, snap_size); done = 1; unlock_page(page); break; @@ -805,22 +826,23 @@ get_more_pages: break; } - /* ok */ + /* + * We have something to write. If this is + * the first locked page this time through, + * allocate an osd request and a page array + * that it will use. + */ if (locked_pages == 0) { + size_t size; + + BUG_ON(pages); + /* prepare async write request */ - offset = (u64) page_offset(page); + offset = (u64)page_offset(page); len = wsize; - req = ceph_osdc_new_request(&fsc->client->osdc, - &ci->i_layout, - ceph_vino(inode), - offset, &len, - CEPH_OSD_OP_WRITE, - CEPH_OSD_FLAG_WRITE | - CEPH_OSD_FLAG_ONDISK, - snapc, do_sync, - ci->i_truncate_seq, - ci->i_truncate_size, - &inode->i_mtime, true, 0); + req = ceph_writepages_osd_request(inode, + offset, &len, snapc, + num_ops); if (IS_ERR(req)) { rc = PTR_ERR(req); @@ -828,11 +850,17 @@ get_more_pages: break; } - max_pages = req->r_num_pages; - - alloc_page_vec(fsc, req); req->r_callback = writepages_finish; req->r_inode = inode; + + max_pages = calc_pages_for(0, (u64)len); + size = max_pages * sizeof (*pages); + pages = kmalloc(size, GFP_NOFS); + if (!pages) { + pool = fsc->wb_pagevec_pool; + pages = mempool_alloc(pool, GFP_NOFS); + BUG_ON(!pages); + } } /* note position of first page in pvec */ @@ -850,7 +878,7 @@ get_more_pages: } set_page_writeback(page); - req->r_pages[locked_pages] = page; + pages[locked_pages] = page; locked_pages++; next = page->index + 1; } @@ -879,18 +907,27 @@ get_more_pages: pvec.nr -= i-first; } - /* submit the write */ - offset = req->r_pages[0]->index << PAGE_CACHE_SHIFT; - len = min((snap_size ? snap_size : i_size_read(inode)) - offset, + /* Format the osd request message and submit the write */ + + offset = page_offset(pages[0]); + len = min(snap_size - offset, (u64)locked_pages << PAGE_CACHE_SHIFT); dout("writepages got %d pages at %llu~%llu\n", locked_pages, offset, len); - /* revise final length, page count */ - req->r_num_pages = locked_pages; - req->r_request_ops[0].extent.length = cpu_to_le64(len); - req->r_request_ops[0].payload_len = cpu_to_le32(len); - req->r_request->hdr.data_len = cpu_to_le32(len); + osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, + !!pool, false); + + pages = NULL; /* request message now owns the pages array */ + pool = NULL; + + /* Update the write op length in case we changed it */ + + osd_req_op_extent_update(req, 0, len); + + vino = ceph_vino(inode); + ceph_osdc_build_request(req, offset, snapc, vino.snap, + &inode->i_mtime); rc = ceph_osdc_start_request(&fsc->client->osdc, req, true); BUG_ON(rc); @@ -1067,51 +1104,23 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { struct inode *inode = file_inode(file); - struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_file_info *fi = file->private_data; struct page *page; pgoff_t index = pos >> PAGE_CACHE_SHIFT; - int r, want, got = 0; - - if (fi->fmode & CEPH_FILE_MODE_LAZY) - want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; - else - want = CEPH_CAP_FILE_BUFFER; - - dout("write_begin %p %llx.%llx %llu~%u getting caps. i_size %llu\n", - inode, ceph_vinop(inode), pos, len, inode->i_size); - r = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos+len); - if (r < 0) - return r; - dout("write_begin %p %llx.%llx %llu~%u got cap refs on %s\n", - inode, ceph_vinop(inode), pos, len, ceph_cap_string(got)); - if (!(got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO))) { - ceph_put_cap_refs(ci, got); - return -EAGAIN; - } + int r; do { /* get a page */ page = grab_cache_page_write_begin(mapping, index, 0); - if (!page) { - r = -ENOMEM; - break; - } + if (!page) + return -ENOMEM; + *pagep = page; dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len); r = ceph_update_writeable_page(file, pos, len, page); - if (r) - page_cache_release(page); } while (r == -EAGAIN); - if (r) { - ceph_put_cap_refs(ci, got); - } else { - *pagep = page; - *(int *)fsdata = got; - } return r; } @@ -1125,12 +1134,10 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata) { struct inode *inode = file_inode(file); - struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_mds_client *mdsc = fsc->mdsc; unsigned from = pos & (PAGE_CACHE_SIZE - 1); int check_cap = 0; - int got = (unsigned long)fsdata; dout("write_end file %p inode %p page %p %d~%d (%d)\n", file, inode, page, (int)pos, (int)copied, (int)len); @@ -1153,19 +1160,6 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, up_read(&mdsc->snap_rwsem); page_cache_release(page); - if (copied > 0) { - int dirty; - spin_lock(&ci->i_ceph_lock); - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); - spin_unlock(&ci->i_ceph_lock); - if (dirty) - __mark_inode_dirty(inode, dirty); - } - - dout("write_end %p %llx.%llx %llu~%u dropping cap refs on %s\n", - inode, ceph_vinop(inode), pos, len, ceph_cap_string(got)); - ceph_put_cap_refs(ci, got); - if (check_cap) ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 78e2f575247..da0f9b8a3bc 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -490,15 +490,17 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, ci->i_rdcache_gen++; /* - * if we are newly issued FILE_SHARED, clear D_COMPLETE; we + * if we are newly issued FILE_SHARED, mark dir not complete; we * don't know what happened to this directory while we didn't * have the cap. */ if ((issued & CEPH_CAP_FILE_SHARED) && (had & CEPH_CAP_FILE_SHARED) == 0) { ci->i_shared_gen++; - if (S_ISDIR(ci->vfs_inode.i_mode)) - ceph_dir_clear_complete(&ci->vfs_inode); + if (S_ISDIR(ci->vfs_inode.i_mode)) { + dout(" marking %p NOT complete\n", &ci->vfs_inode); + __ceph_dir_clear_complete(ci); + } } } @@ -553,6 +555,7 @@ retry: cap->implemented = 0; cap->mds = mds; cap->mds_wanted = 0; + cap->mseq = 0; cap->ci = ci; __insert_cap_node(ci, cap); @@ -628,7 +631,10 @@ retry: cap->cap_id = cap_id; cap->issued = issued; cap->implemented |= issued; - cap->mds_wanted |= wanted; + if (mseq > cap->mseq) + cap->mds_wanted = wanted; + else + cap->mds_wanted |= wanted; cap->seq = seq; cap->issue_seq = seq; cap->mseq = mseq; @@ -997,9 +1003,9 @@ static int send_cap_msg(struct ceph_mds_session *session, return 0; } -static void __queue_cap_release(struct ceph_mds_session *session, - u64 ino, u64 cap_id, u32 migrate_seq, - u32 issue_seq) +void __queue_cap_release(struct ceph_mds_session *session, + u64 ino, u64 cap_id, u32 migrate_seq, + u32 issue_seq) { struct ceph_msg *msg; struct ceph_mds_cap_release *head; @@ -2046,6 +2052,13 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, goto out; } + /* finish pending truncate */ + while (ci->i_truncate_pending) { + spin_unlock(&ci->i_ceph_lock); + __ceph_do_pending_vmtruncate(inode, !(need & CEPH_CAP_FILE_WR)); + spin_lock(&ci->i_ceph_lock); + } + if (need & CEPH_CAP_FILE_WR) { if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) { dout("get_cap_refs %p endoff %llu > maxsize %llu\n", @@ -2067,12 +2080,6 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, } have = __ceph_caps_issued(ci, &implemented); - /* - * disallow writes while a truncate is pending - */ - if (ci->i_truncate_pending) - have &= ~CEPH_CAP_FILE_WR; - if ((have & need) == need) { /* * Look at (implemented & ~have & not) so that we keep waiting diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 6d797f46d77..f02d82b7933 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -107,7 +107,7 @@ static unsigned fpos_off(loff_t p) * falling back to a "normal" sync readdir if any dentries in the dir * are dropped. * - * D_COMPLETE tells indicates we have all dentries in the dir. It is + * Complete dir indicates that we have all dentries in the dir. It is * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by * the MDS if/when the directory is modified). */ @@ -198,8 +198,8 @@ more: filp->f_pos++; /* make sure a dentry wasn't dropped while we didn't have parent lock */ - if (!ceph_dir_test_complete(dir)) { - dout(" lost D_COMPLETE on %p; falling back to mds\n", dir); + if (!ceph_dir_is_complete(dir)) { + dout(" lost dir complete on %p; falling back to mds\n", dir); err = -EAGAIN; goto out; } @@ -258,7 +258,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) if (filp->f_pos == 0) { /* note dir version at start of readdir so we can tell * if any dentries get dropped */ - fi->dir_release_count = ci->i_release_count; + fi->dir_release_count = atomic_read(&ci->i_release_count); dout("readdir off 0 -> '.'\n"); if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0), @@ -284,7 +284,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) if ((filp->f_pos == 2 || fi->dentry) && !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && ceph_snap(inode) != CEPH_SNAPDIR && - ceph_dir_test_complete(inode) && + __ceph_dir_is_complete(ci) && __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { spin_unlock(&ci->i_ceph_lock); err = __dcache_readdir(filp, dirent, filldir); @@ -350,7 +350,8 @@ more: if (!req->r_did_prepopulate) { dout("readdir !did_prepopulate"); - fi->dir_release_count--; /* preclude D_COMPLETE */ + /* preclude from marking dir complete */ + fi->dir_release_count--; } /* note next offset and last dentry name */ @@ -428,8 +429,9 @@ more: * the complete dir contents in our cache. */ spin_lock(&ci->i_ceph_lock); - if (ci->i_release_count == fi->dir_release_count) { - ceph_dir_set_complete(inode); + if (atomic_read(&ci->i_release_count) == fi->dir_release_count) { + dout(" marking %p complete\n", inode); + __ceph_dir_set_complete(ci, fi->dir_release_count); ci->i_max_offset = filp->f_pos; } spin_unlock(&ci->i_ceph_lock); @@ -604,7 +606,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, fsc->mount_options->snapdir_name, dentry->d_name.len) && !is_root_ceph_dentry(dir, dentry) && - ceph_dir_test_complete(dir) && + __ceph_dir_is_complete(ci) && (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { spin_unlock(&ci->i_ceph_lock); dout(" dir %p complete, -ENOENT\n", dir); @@ -1065,44 +1067,6 @@ static int ceph_snapdir_d_revalidate(struct dentry *dentry, } /* - * Set/clear/test dir complete flag on the dir's dentry. - */ -void ceph_dir_set_complete(struct inode *inode) -{ - struct dentry *dentry = d_find_any_alias(inode); - - if (dentry && ceph_dentry(dentry) && - ceph_test_mount_opt(ceph_sb_to_client(dentry->d_sb), DCACHE)) { - dout(" marking %p (%p) complete\n", inode, dentry); - set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags); - } - dput(dentry); -} - -void ceph_dir_clear_complete(struct inode *inode) -{ - struct dentry *dentry = d_find_any_alias(inode); - - if (dentry && ceph_dentry(dentry)) { - dout(" marking %p (%p) complete\n", inode, dentry); - set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags); - } - dput(dentry); -} - -bool ceph_dir_test_complete(struct inode *inode) -{ - struct dentry *dentry = d_find_any_alias(inode); - - if (dentry && ceph_dentry(dentry)) { - dout(" marking %p (%p) NOT complete\n", inode, dentry); - clear_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags); - } - dput(dentry); - return false; -} - -/* * When the VFS prunes a dentry from the cache, we need to clear the * complete flag on the parent directory. * @@ -1110,15 +1074,13 @@ bool ceph_dir_test_complete(struct inode *inode) */ static void ceph_d_prune(struct dentry *dentry) { - struct ceph_dentry_info *di; - dout("ceph_d_prune %p\n", dentry); /* do we have a valid parent? */ if (IS_ROOT(dentry)) return; - /* if we are not hashed, we don't affect D_COMPLETE */ + /* if we are not hashed, we don't affect dir's completeness */ if (d_unhashed(dentry)) return; @@ -1126,8 +1088,7 @@ static void ceph_d_prune(struct dentry *dentry) * we hold d_lock, so d_parent is stable, and d_fsdata is never * cleared until d_release */ - di = ceph_dentry(dentry->d_parent); - clear_bit(CEPH_D_COMPLETE, &di->flags); + ceph_dir_clear_complete(dentry->d_parent->d_inode); } /* diff --git a/fs/ceph/file.c b/fs/ceph/file.c index bf338d9b67e..d70830c6683 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -446,19 +446,35 @@ done: } /* - * Write commit callback, called if we requested both an ACK and - * ONDISK commit reply from the OSD. + * Write commit request unsafe callback, called to tell us when a + * request is unsafe (that is, in flight--has been handed to the + * messenger to send to its target osd). It is called again when + * we've received a response message indicating the request is + * "safe" (its CEPH_OSD_FLAG_ONDISK flag is set), or when a request + * is completed early (and unsuccessfully) due to a timeout or + * interrupt. + * + * This is used if we requested both an ACK and ONDISK commit reply + * from the OSD. */ -static void sync_write_commit(struct ceph_osd_request *req, - struct ceph_msg *msg) +static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe) { struct ceph_inode_info *ci = ceph_inode(req->r_inode); - dout("sync_write_commit %p tid %llu\n", req, req->r_tid); - spin_lock(&ci->i_unsafe_lock); - list_del_init(&req->r_unsafe_item); - spin_unlock(&ci->i_unsafe_lock); - ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR); + dout("%s %p tid %llu %ssafe\n", __func__, req, req->r_tid, + unsafe ? "un" : ""); + if (unsafe) { + ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); + spin_lock(&ci->i_unsafe_lock); + list_add_tail(&req->r_unsafe_item, + &ci->i_unsafe_writes); + spin_unlock(&ci->i_unsafe_lock); + } else { + spin_lock(&ci->i_unsafe_lock); + list_del_init(&req->r_unsafe_item); + spin_unlock(&ci->i_unsafe_lock); + ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR); + } } /* @@ -470,36 +486,33 @@ static void sync_write_commit(struct ceph_osd_request *req, * objects, rollback on failure, etc.) */ static ssize_t ceph_sync_write(struct file *file, const char __user *data, - size_t left, loff_t *offset) + size_t left, loff_t pos, loff_t *ppos) { struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); + struct ceph_snap_context *snapc; + struct ceph_vino vino; struct ceph_osd_request *req; + int num_ops = 1; struct page **pages; int num_pages; - long long unsigned pos; u64 len; int written = 0; int flags; - int do_sync = 0; int check_caps = 0; int page_align, io_align; unsigned long buf_align; int ret; struct timespec mtime = CURRENT_TIME; + bool own_pages = false; if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) return -EROFS; - dout("sync_write on file %p %lld~%u %s\n", file, *offset, + dout("sync_write on file %p %lld~%u %s\n", file, pos, (unsigned)left, (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); - if (file->f_flags & O_APPEND) - pos = i_size_read(inode); - else - pos = *offset; - ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); if (ret < 0) return ret; @@ -516,7 +529,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, if ((file->f_flags & (O_SYNC|O_DIRECT)) == 0) flags |= CEPH_OSD_FLAG_ACK; else - do_sync = 1; + num_ops++; /* Also include a 'startsync' command. */ /* * we may need to do multiple writes here if we span an object @@ -526,25 +539,20 @@ more: io_align = pos & ~PAGE_MASK; buf_align = (unsigned long)data & ~PAGE_MASK; len = left; - if (file->f_flags & O_DIRECT) { - /* write from beginning of first page, regardless of - io alignment */ - page_align = (pos - io_align + buf_align) & ~PAGE_MASK; - num_pages = calc_pages_for((unsigned long)data, len); - } else { - page_align = pos & ~PAGE_MASK; - num_pages = calc_pages_for(pos, len); - } + + snapc = ci->i_snap_realm->cached_context; + vino = ceph_vino(inode); req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, - ceph_vino(inode), pos, &len, - CEPH_OSD_OP_WRITE, flags, - ci->i_snap_realm->cached_context, - do_sync, + vino, pos, &len, num_ops, + CEPH_OSD_OP_WRITE, flags, snapc, ci->i_truncate_seq, ci->i_truncate_size, - &mtime, false, page_align); + false); if (IS_ERR(req)) return PTR_ERR(req); + /* write from beginning of first page, regardless of io alignment */ + page_align = file->f_flags & O_DIRECT ? buf_align : io_align; + num_pages = calc_pages_for(page_align, len); if (file->f_flags & O_DIRECT) { pages = ceph_get_direct_page_vector(data, num_pages, false); if (IS_ERR(pages)) { @@ -572,36 +580,20 @@ more: if ((file->f_flags & O_SYNC) == 0) { /* get a second commit callback */ - req->r_safe_callback = sync_write_commit; - req->r_own_pages = 1; + req->r_unsafe_callback = ceph_sync_write_unsafe; + req->r_inode = inode; + own_pages = true; } } - req->r_pages = pages; - req->r_num_pages = num_pages; - req->r_inode = inode; + osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align, + false, own_pages); + + /* BUG_ON(vino.snap != CEPH_NOSNAP); */ + ceph_osdc_build_request(req, pos, snapc, vino.snap, &mtime); ret = ceph_osdc_start_request(&fsc->client->osdc, req, false); - if (!ret) { - if (req->r_safe_callback) { - /* - * Add to inode unsafe list only after we - * start_request so that a tid has been assigned. - */ - spin_lock(&ci->i_unsafe_lock); - list_add_tail(&req->r_unsafe_item, - &ci->i_unsafe_writes); - spin_unlock(&ci->i_unsafe_lock); - ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); - } - + if (!ret) ret = ceph_osdc_wait_request(&fsc->client->osdc, req); - if (ret < 0 && req->r_safe_callback) { - spin_lock(&ci->i_unsafe_lock); - list_del_init(&req->r_unsafe_item); - spin_unlock(&ci->i_unsafe_lock); - ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR); - } - } if (file->f_flags & O_DIRECT) ceph_put_page_vector(pages, num_pages, false); @@ -614,12 +606,12 @@ out: pos += len; written += len; left -= len; - data += written; + data += len; if (left) goto more; ret = written; - *offset = pos; + *ppos = pos; if (pos > i_size_read(inode)) check_caps = ceph_inode_set_size(inode, pos); if (check_caps) @@ -653,7 +645,6 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov, dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", inode, ceph_vinop(inode), pos, (unsigned)len, inode); again: - __ceph_do_pending_vmtruncate(inode); if (fi->fmode & CEPH_FILE_MODE_LAZY) want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; else @@ -717,55 +708,75 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->client->osdc; - loff_t endoff = pos + iov->iov_len; - int got = 0; - int ret, err, written; + ssize_t count, written = 0; + int err, want, got; + bool hold_mutex; if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; -retry_snap: - written = 0; - if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) - return -ENOSPC; - __ceph_do_pending_vmtruncate(inode); + sb_start_write(inode->i_sb); + mutex_lock(&inode->i_mutex); + hold_mutex = true; - /* - * try to do a buffered write. if we don't have sufficient - * caps, we'll get -EAGAIN from generic_file_aio_write, or a - * short write if we only get caps for some pages. - */ - if (!(iocb->ki_filp->f_flags & O_DIRECT) && - !(inode->i_sb->s_flags & MS_SYNCHRONOUS) && - !(fi->flags & CEPH_F_SYNC)) { - ret = generic_file_aio_write(iocb, iov, nr_segs, pos); - if (ret >= 0) - written = ret; - - if ((ret >= 0 || ret == -EIOCBQUEUED) && - ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) - || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { - err = vfs_fsync_range(file, pos, pos + written - 1, 1); - if (err < 0) - ret = err; - } - if ((ret < 0 && ret != -EAGAIN) || pos + written >= endoff) - goto out; + err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); + if (err) + goto out; + + /* We can write back this queue in page reclaim */ + current->backing_dev_info = file->f_mapping->backing_dev_info; + + err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); + if (err) + goto out; + + if (count == 0) + goto out; + + err = file_remove_suid(file); + if (err) + goto out; + + err = file_update_time(file); + if (err) + goto out; + +retry_snap: + if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) { + err = -ENOSPC; + goto out; } - dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", - inode, ceph_vinop(inode), pos + written, - (unsigned)iov->iov_len - written, inode->i_size); - ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, 0, &got, endoff); - if (ret < 0) + dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n", + inode, ceph_vinop(inode), pos, count, inode->i_size); + if (fi->fmode & CEPH_FILE_MODE_LAZY) + want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; + else + want = CEPH_CAP_FILE_BUFFER; + got = 0; + err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos + count); + if (err < 0) goto out; - dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", - inode, ceph_vinop(inode), pos + written, - (unsigned)iov->iov_len - written, ceph_cap_string(got)); - ret = ceph_sync_write(file, iov->iov_base + written, - iov->iov_len - written, &iocb->ki_pos); - if (ret >= 0) { + dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s\n", + inode, ceph_vinop(inode), pos, count, ceph_cap_string(got)); + + if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || + (iocb->ki_filp->f_flags & O_DIRECT) || + (inode->i_sb->s_flags & MS_SYNCHRONOUS) || + (fi->flags & CEPH_F_SYNC)) { + mutex_unlock(&inode->i_mutex); + written = ceph_sync_write(file, iov->iov_base, count, + pos, &iocb->ki_pos); + } else { + written = generic_file_buffered_write(iocb, iov, nr_segs, + pos, &iocb->ki_pos, + count, 0); + mutex_unlock(&inode->i_mutex); + } + hold_mutex = false; + + if (written >= 0) { int dirty; spin_lock(&ci->i_ceph_lock); dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); @@ -773,18 +784,34 @@ retry_snap: if (dirty) __mark_inode_dirty(inode, dirty); } + dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", - inode, ceph_vinop(inode), pos + written, - (unsigned)iov->iov_len - written, ceph_cap_string(got)); + inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, + ceph_cap_string(got)); ceph_put_cap_refs(ci, got); -out: - if (ret == -EOLDSNAPC) { + + if (written >= 0 && + ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) || + ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { + err = vfs_fsync_range(file, pos, pos + written - 1, 1); + if (err < 0) + written = err; + } + + if (written == -EOLDSNAPC) { dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len); + mutex_lock(&inode->i_mutex); + hold_mutex = true; goto retry_snap; } +out: + if (hold_mutex) + mutex_unlock(&inode->i_mutex); + sb_end_write(inode->i_sb); + current->backing_dev_info = NULL; - return ret; + return written ? written : err; } /* @@ -796,7 +823,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) int ret; mutex_lock(&inode->i_mutex); - __ceph_do_pending_vmtruncate(inode); + __ceph_do_pending_vmtruncate(inode, false); if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) { ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 851814d951c..be0f7e20d62 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -302,7 +302,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_version = 0; ci->i_time_warp_seq = 0; ci->i_ceph_flags = 0; - ci->i_release_count = 0; + atomic_set(&ci->i_release_count, 1); + atomic_set(&ci->i_complete_count, 0); ci->i_symlink = NULL; memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); @@ -561,7 +562,6 @@ static int fill_inode(struct inode *inode, struct ceph_inode_info *ci = ceph_inode(inode); int i; int issued = 0, implemented; - int updating_inode = 0; struct timespec mtime, atime, ctime; u32 nsplits; struct ceph_buffer *xattr_blob = NULL; @@ -601,7 +601,6 @@ static int fill_inode(struct inode *inode, (ci->i_version & ~1) >= le64_to_cpu(info->version)) goto no_change; - updating_inode = 1; issued = __ceph_caps_issued(ci, &implemented); issued |= implemented | __ceph_caps_dirty(ci); @@ -717,6 +716,17 @@ static int fill_inode(struct inode *inode, ceph_vinop(inode), inode->i_mode); } + /* set dir completion flag? */ + if (S_ISDIR(inode->i_mode) && + ci->i_files == 0 && ci->i_subdirs == 0 && + ceph_snap(inode) == CEPH_NOSNAP && + (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && + (issued & CEPH_CAP_FILE_EXCL) == 0 && + !__ceph_dir_is_complete(ci)) { + dout(" marking %p complete (empty)\n", inode); + __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); + ci->i_max_offset = 2; + } no_change: spin_unlock(&ci->i_ceph_lock); @@ -767,19 +777,6 @@ no_change: __ceph_get_fmode(ci, cap_fmode); } - /* set dir completion flag? */ - if (S_ISDIR(inode->i_mode) && - updating_inode && /* didn't jump to no_change */ - ci->i_files == 0 && ci->i_subdirs == 0 && - ceph_snap(inode) == CEPH_NOSNAP && - (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && - (issued & CEPH_CAP_FILE_EXCL) == 0 && - !ceph_dir_test_complete(inode)) { - dout(" marking %p complete (empty)\n", inode); - ceph_dir_set_complete(inode); - ci->i_max_offset = 2; - } - /* update delegation info? */ if (dirinfo) ceph_fill_dirfrag(inode, dirinfo); @@ -861,7 +858,7 @@ static void ceph_set_dentry_offset(struct dentry *dn) di = ceph_dentry(dn); spin_lock(&ci->i_ceph_lock); - if (!ceph_dir_test_complete(inode)) { + if (!__ceph_dir_is_complete(ci)) { spin_unlock(&ci->i_ceph_lock); return; } @@ -1065,8 +1062,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, /* * d_move() puts the renamed dentry at the end of * d_subdirs. We need to assign it an appropriate - * directory offset so we can behave when holding - * D_COMPLETE. + * directory offset so we can behave when dir is + * complete. */ ceph_set_dentry_offset(req->r_old_dentry); dout("dn %p gets new offset %lld\n", req->r_old_dentry, @@ -1457,7 +1454,7 @@ out: /* - * called by trunc_wq; take i_mutex ourselves + * called by trunc_wq; * * We also truncate in a separate thread as well. */ @@ -1468,9 +1465,7 @@ static void ceph_vmtruncate_work(struct work_struct *work) struct inode *inode = &ci->vfs_inode; dout("vmtruncate_work %p\n", inode); - mutex_lock(&inode->i_mutex); - __ceph_do_pending_vmtruncate(inode); - mutex_unlock(&inode->i_mutex); + __ceph_do_pending_vmtruncate(inode, true); iput(inode); } @@ -1494,12 +1489,10 @@ void ceph_queue_vmtruncate(struct inode *inode) } /* - * called with i_mutex held. - * * Make sure any pending truncation is applied before doing anything * that may depend on it. */ -void __ceph_do_pending_vmtruncate(struct inode *inode) +void __ceph_do_pending_vmtruncate(struct inode *inode, bool needlock) { struct ceph_inode_info *ci = ceph_inode(inode); u64 to; @@ -1532,7 +1525,11 @@ retry: ci->i_truncate_pending, to); spin_unlock(&ci->i_ceph_lock); + if (needlock) + mutex_lock(&inode->i_mutex); truncate_inode_pages(inode->i_mapping, to); + if (needlock) + mutex_unlock(&inode->i_mutex); spin_lock(&ci->i_ceph_lock); if (to == ci->i_truncate_size) { @@ -1563,6 +1560,12 @@ static void *ceph_sym_follow_link(struct dentry *dentry, struct nameidata *nd) static const struct inode_operations ceph_symlink_iops = { .readlink = generic_readlink, .follow_link = ceph_sym_follow_link, + .setattr = ceph_setattr, + .getattr = ceph_getattr, + .setxattr = ceph_setxattr, + .getxattr = ceph_getxattr, + .listxattr = ceph_listxattr, + .removexattr = ceph_removexattr, }; /* @@ -1585,7 +1588,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; - __ceph_do_pending_vmtruncate(inode); + __ceph_do_pending_vmtruncate(inode, false); err = inode_change_ok(inode, attr); if (err != 0) @@ -1767,7 +1770,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) ceph_cap_string(dirtied), mask); ceph_mdsc_put_request(req); - __ceph_do_pending_vmtruncate(inode); + __ceph_do_pending_vmtruncate(inode, false); return err; out: spin_unlock(&ci->i_ceph_lock); diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 4a989345b37..e0b4ef31d3c 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -208,8 +208,9 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx", ceph_ino(inode), dl.object_no); - ceph_calc_object_layout(&pgid, dl.object_name, &ci->i_layout, - osdc->osdmap); + + ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap, + ceph_file_layout_pg_pool(ci->i_layout)); dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid); if (dl.osd >= 0) { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 442880d099c..4f22671a5bd 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -265,7 +265,8 @@ static int parse_reply_info_extra(void **p, void *end, { if (info->head->op == CEPH_MDS_OP_GETFILELOCK) return parse_reply_info_filelock(p, end, info, features); - else if (info->head->op == CEPH_MDS_OP_READDIR) + else if (info->head->op == CEPH_MDS_OP_READDIR || + info->head->op == CEPH_MDS_OP_LSSNAP) return parse_reply_info_dir(p, end, info, features); else if (info->head->op == CEPH_MDS_OP_CREATE) return parse_reply_info_create(p, end, info, features); @@ -364,9 +365,9 @@ void ceph_put_mds_session(struct ceph_mds_session *s) atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1); if (atomic_dec_and_test(&s->s_ref)) { if (s->s_auth.authorizer) - s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer( - s->s_mdsc->fsc->client->monc.auth, - s->s_auth.authorizer); + ceph_auth_destroy_authorizer( + s->s_mdsc->fsc->client->monc.auth, + s->s_auth.authorizer); kfree(s); } } @@ -1196,6 +1197,8 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) session->s_trim_caps--; if (oissued) { /* we aren't the only cap.. just remove us */ + __queue_cap_release(session, ceph_ino(inode), cap->cap_id, + cap->mseq, cap->issue_seq); __ceph_remove_cap(cap); } else { /* try to drop referring dentries */ @@ -1718,8 +1721,12 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); - msg->pages = req->r_pages; - msg->nr_pages = req->r_num_pages; + if (req->r_data_len) { + /* outbound data set only by ceph_sync_setxattr() */ + BUG_ON(!req->r_pages); + ceph_msg_data_add_pages(msg, req->r_pages, req->r_data_len, 0); + } + msg->hdr.data_len = cpu_to_le32(req->r_data_len); msg->hdr.data_off = cpu_to_le16(0); @@ -1913,6 +1920,7 @@ static void __wake_requests(struct ceph_mds_client *mdsc, req = list_entry(tmp_list.next, struct ceph_mds_request, r_wait); list_del_init(&req->r_wait); + dout(" wake request %p tid %llu\n", req, req->r_tid); __do_request(mdsc, req); } } @@ -2026,20 +2034,16 @@ out: } /* - * Invalidate dir D_COMPLETE, dentry lease state on an aborted MDS + * Invalidate dir's completeness, dentry lease state on an aborted MDS * namespace request. */ void ceph_invalidate_dir_request(struct ceph_mds_request *req) { struct inode *inode = req->r_locked_dir; - struct ceph_inode_info *ci = ceph_inode(inode); - dout("invalidate_dir_request %p (D_COMPLETE, lease(s))\n", inode); - spin_lock(&ci->i_ceph_lock); - ceph_dir_clear_complete(inode); - ci->i_release_count++; - spin_unlock(&ci->i_ceph_lock); + dout("invalidate_dir_request %p (complete, lease(s))\n", inode); + ceph_dir_clear_complete(inode); if (req->r_dentry) ceph_invalidate_dentry_lease(req->r_dentry); if (req->r_old_dentry) @@ -2599,11 +2603,13 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, goto fail; } - reply->pagelist = pagelist; if (recon_state.flock) reply->hdr.version = cpu_to_le16(2); - reply->hdr.data_len = cpu_to_le32(pagelist->length); - reply->nr_pages = calc_pages_for(0, pagelist->length); + if (pagelist->length) { + /* set up outbound data if we have any */ + reply->hdr.data_len = cpu_to_le32(pagelist->length); + ceph_msg_data_add_pagelist(reply, pagelist); + } ceph_con_send(&session->s_con, reply); mutex_unlock(&session->s_mutex); @@ -3433,13 +3439,17 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, struct ceph_auth_handshake *auth = &s->s_auth; if (force_new && auth->authorizer) { - if (ac->ops && ac->ops->destroy_authorizer) - ac->ops->destroy_authorizer(ac, auth->authorizer); + ceph_auth_destroy_authorizer(ac, auth->authorizer); auth->authorizer = NULL; } - if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) { - int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_MDS, - auth); + if (!auth->authorizer) { + int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_MDS, + auth); + if (ret) + return ERR_PTR(ret); + } else { + int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_MDS, + auth); if (ret) return ERR_PTR(ret); } @@ -3455,7 +3465,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len) struct ceph_mds_client *mdsc = s->s_mdsc; struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; - return ac->ops->verify_authorizer_reply(ac, s->s_auth.authorizer, len); + return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer, len); } static int invalidate_authorizer(struct ceph_connection *con) @@ -3464,12 +3474,32 @@ static int invalidate_authorizer(struct ceph_connection *con) struct ceph_mds_client *mdsc = s->s_mdsc; struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; - if (ac->ops->invalidate_authorizer) - ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS); + ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS); return ceph_monc_validate_auth(&mdsc->fsc->client->monc); } +static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con, + struct ceph_msg_header *hdr, int *skip) +{ + struct ceph_msg *msg; + int type = (int) le16_to_cpu(hdr->type); + int front_len = (int) le32_to_cpu(hdr->front_len); + + if (con->in_msg) + return con->in_msg; + + *skip = 0; + msg = ceph_msg_new(type, front_len, GFP_NOFS, false); + if (!msg) { + pr_err("unable to allocate msg type %d len %d\n", + type, front_len); + return NULL; + } + + return msg; +} + static const struct ceph_connection_operations mds_con_ops = { .get = con_get, .put = con_put, @@ -3478,6 +3508,7 @@ static const struct ceph_connection_operations mds_con_ops = { .verify_authorizer_reply = verify_authorizer_reply, .invalidate_authorizer = invalidate_authorizer, .peer_reset = peer_reset, + .alloc_msg = mds_alloc_msg, }; /* eof */ diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 0d3c9240c61..9278dec9e94 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -20,7 +20,10 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) { int n = 0; int i; - char r; + + /* special case for one mds */ + if (1 == m->m_max_mds && m->m_info[0].state > 0) + return 0; /* count */ for (i = 0; i < m->m_max_mds; i++) @@ -30,8 +33,7 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) return -1; /* pick */ - get_random_bytes(&r, 1); - n = r % n; + n = prandom_u32() % n; i = 0; for (i = 0; n > 0; i++, n--) while (m->m_info[i].state <= 0) diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index cbb2f54a301..f01645a2775 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -332,10 +332,9 @@ static int build_snap_context(struct ceph_snap_realm *realm) err = -ENOMEM; if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64)) goto fail; - snapc = kzalloc(sizeof(*snapc) + num*sizeof(u64), GFP_NOFS); + snapc = ceph_create_snap_context(num, GFP_NOFS); if (!snapc) goto fail; - atomic_set(&snapc->nref, 1); /* build (reverse sorted) snap vector */ num = 0; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 6ddc0bca56b..7d377c9a5e3 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -479,6 +479,8 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, CEPH_FEATURE_FLOCK | CEPH_FEATURE_DIRLAYOUTHASH; const unsigned required_features = 0; + int page_count; + size_t size; int err = -ENOMEM; fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); @@ -522,8 +524,9 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, /* set up mempools */ err = -ENOMEM; - fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, - fsc->mount_options->wsize >> PAGE_CACHE_SHIFT); + page_count = fsc->mount_options->wsize >> PAGE_CACHE_SHIFT; + size = sizeof (struct page *) * (page_count ? page_count : 1); + fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size); if (!fsc->wb_pagevec_pool) goto fail_trunc_wq; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index c7b309723dc..8696be2ff67 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -204,7 +204,6 @@ struct ceph_inode_xattr { * Ceph dentry state */ struct ceph_dentry_info { - unsigned long flags; struct ceph_mds_session *lease_session; u32 lease_gen, lease_shared_gen; u32 lease_seq; @@ -215,18 +214,6 @@ struct ceph_dentry_info { u64 offset; }; -/* - * dentry flags - * - * The locking for D_COMPLETE is a bit odd: - * - we can clear it at almost any time (see ceph_d_prune) - * - it is only meaningful if: - * - we hold dir inode i_ceph_lock - * - we hold dir FILE_SHARED caps - * - the dentry D_COMPLETE is set - */ -#define CEPH_D_COMPLETE 1 /* if set, d_u.d_subdirs is complete directory */ - struct ceph_inode_xattrs_info { /* * (still encoded) xattr blob. we avoid the overhead of parsing @@ -257,7 +244,8 @@ struct ceph_inode_info { u32 i_time_warp_seq; unsigned i_ceph_flags; - unsigned long i_release_count; + atomic_t i_release_count; + atomic_t i_complete_count; struct ceph_dir_layout i_dir_layout; struct ceph_file_layout i_layout; @@ -267,7 +255,7 @@ struct ceph_inode_info { struct timespec i_rctime; u64 i_rbytes, i_rfiles, i_rsubdirs; u64 i_files, i_subdirs; - u64 i_max_offset; /* largest readdir offset, set with D_COMPLETE */ + u64 i_max_offset; /* largest readdir offset, set with complete dir */ struct rb_root i_fragtree; struct mutex i_fragtree_mutex; @@ -436,33 +424,31 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, #define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ #define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ -static inline void ceph_i_clear(struct inode *inode, unsigned mask) +static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci, + int release_count) { - struct ceph_inode_info *ci = ceph_inode(inode); - - spin_lock(&ci->i_ceph_lock); - ci->i_ceph_flags &= ~mask; - spin_unlock(&ci->i_ceph_lock); + atomic_set(&ci->i_complete_count, release_count); } -static inline void ceph_i_set(struct inode *inode, unsigned mask) +static inline void __ceph_dir_clear_complete(struct ceph_inode_info *ci) { - struct ceph_inode_info *ci = ceph_inode(inode); + atomic_inc(&ci->i_release_count); +} - spin_lock(&ci->i_ceph_lock); - ci->i_ceph_flags |= mask; - spin_unlock(&ci->i_ceph_lock); +static inline bool __ceph_dir_is_complete(struct ceph_inode_info *ci) +{ + return atomic_read(&ci->i_complete_count) == + atomic_read(&ci->i_release_count); } -static inline bool ceph_i_test(struct inode *inode, unsigned mask) +static inline void ceph_dir_clear_complete(struct inode *inode) { - struct ceph_inode_info *ci = ceph_inode(inode); - bool r; + __ceph_dir_clear_complete(ceph_inode(inode)); +} - spin_lock(&ci->i_ceph_lock); - r = (ci->i_ceph_flags & mask) == mask; - spin_unlock(&ci->i_ceph_lock); - return r; +static inline bool ceph_dir_is_complete(struct inode *inode) +{ + return __ceph_dir_is_complete(ceph_inode(inode)); } @@ -489,13 +475,6 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off) } /* - * set/clear directory D_COMPLETE flag - */ -void ceph_dir_set_complete(struct inode *inode); -void ceph_dir_clear_complete(struct inode *inode); -bool ceph_dir_test_complete(struct inode *inode); - -/* * caps helpers */ static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci) @@ -584,7 +563,7 @@ struct ceph_file_info { u64 next_offset; /* offset of next chunk (last_name's + 1) */ char *last_name; /* last entry in previous chunk */ struct dentry *dentry; /* next dentry (for dcache readdir) */ - unsigned long dir_release_count; + int dir_release_count; /* used for -o dirstat read() on directory thing */ char *dir_info; @@ -713,7 +692,7 @@ extern int ceph_readdir_prepopulate(struct ceph_mds_request *req, extern int ceph_inode_holds_cap(struct inode *inode, int mask); extern int ceph_inode_set_size(struct inode *inode, loff_t size); -extern void __ceph_do_pending_vmtruncate(struct inode *inode); +extern void __ceph_do_pending_vmtruncate(struct inode *inode, bool needlock); extern void ceph_queue_vmtruncate(struct inode *inode); extern void ceph_queue_invalidate(struct inode *inode); @@ -755,6 +734,8 @@ static inline void ceph_remove_cap(struct ceph_cap *cap) extern void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap); +extern void __queue_cap_release(struct ceph_mds_session *session, u64 ino, + u64 cap_id, u32 migrate_seq, u32 issue_seq); extern void ceph_queue_caps_release(struct inode *inode); extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); extern int ceph_fsync(struct file *file, loff_t start, loff_t end, diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index 1d36db11477..a3b56544c21 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -506,11 +506,11 @@ decode_negTokenInit(unsigned char *security_blob, int length, /* GSSAPI header */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, "Error decoding negTokenInit header"); + cifs_dbg(FYI, "Error decoding negTokenInit header\n"); return 0; } else if ((cls != ASN1_APL) || (con != ASN1_CON) || (tag != ASN1_EOC)) { - cFYI(1, "cls = %d con = %d tag = %d", cls, con, tag); + cifs_dbg(FYI, "cls = %d con = %d tag = %d\n", cls, con, tag); return 0; } @@ -531,52 +531,52 @@ decode_negTokenInit(unsigned char *security_blob, int length, /* SPNEGO OID not present or garbled -- bail out */ if (!rc) { - cFYI(1, "Error decoding negTokenInit header"); + cifs_dbg(FYI, "Error decoding negTokenInit header\n"); return 0; } /* SPNEGO */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, "Error decoding negTokenInit"); + cifs_dbg(FYI, "Error decoding negTokenInit\n"); return 0; } else if ((cls != ASN1_CTX) || (con != ASN1_CON) || (tag != ASN1_EOC)) { - cFYI(1, "cls = %d con = %d tag = %d end = %p (%d) exit 0", - cls, con, tag, end, *end); + cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 0\n", + cls, con, tag, end, *end); return 0; } /* negTokenInit */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, "Error decoding negTokenInit"); + cifs_dbg(FYI, "Error decoding negTokenInit\n"); return 0; } else if ((cls != ASN1_UNI) || (con != ASN1_CON) || (tag != ASN1_SEQ)) { - cFYI(1, "cls = %d con = %d tag = %d end = %p (%d) exit 1", - cls, con, tag, end, *end); + cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 1\n", + cls, con, tag, end, *end); return 0; } /* sequence */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, "Error decoding 2nd part of negTokenInit"); + cifs_dbg(FYI, "Error decoding 2nd part of negTokenInit\n"); return 0; } else if ((cls != ASN1_CTX) || (con != ASN1_CON) || (tag != ASN1_EOC)) { - cFYI(1, "cls = %d con = %d tag = %d end = %p (%d) exit 0", - cls, con, tag, end, *end); + cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 0\n", + cls, con, tag, end, *end); return 0; } /* sequence of */ if (asn1_header_decode (&ctx, &sequence_end, &cls, &con, &tag) == 0) { - cFYI(1, "Error decoding 2nd part of negTokenInit"); + cifs_dbg(FYI, "Error decoding 2nd part of negTokenInit\n"); return 0; } else if ((cls != ASN1_UNI) || (con != ASN1_CON) || (tag != ASN1_SEQ)) { - cFYI(1, "cls = %d con = %d tag = %d end = %p (%d) exit 1", - cls, con, tag, end, *end); + cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 1\n", + cls, con, tag, end, *end); return 0; } @@ -584,15 +584,15 @@ decode_negTokenInit(unsigned char *security_blob, int length, while (!asn1_eoc_decode(&ctx, sequence_end)) { rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); if (!rc) { - cFYI(1, "Error decoding negTokenInit hdr exit2"); + cifs_dbg(FYI, "Error decoding negTokenInit hdr exit2\n"); return 0; } if ((tag == ASN1_OJI) && (con == ASN1_PRI)) { if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) { - cFYI(1, "OID len = %d oid = 0x%lx 0x%lx " - "0x%lx 0x%lx", oidlen, *oid, - *(oid + 1), *(oid + 2), *(oid + 3)); + cifs_dbg(FYI, "OID len = %d oid = 0x%lx 0x%lx 0x%lx 0x%lx\n", + oidlen, *oid, *(oid + 1), *(oid + 2), + *(oid + 3)); if (compare_oid(oid, oidlen, MSKRB5_OID, MSKRB5_OID_LEN)) @@ -610,7 +610,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, kfree(oid); } } else { - cFYI(1, "Should be an oid what is going on?"); + cifs_dbg(FYI, "Should be an oid what is going on?\n"); } } diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c index 282d6de7e41..6c665bf4a27 100644 --- a/fs/cifs/cache.c +++ b/fs/cifs/cache.c @@ -92,7 +92,7 @@ static uint16_t cifs_server_get_key(const void *cookie_netfs_data, break; default: - cERROR(1, "Unknown network family '%d'", sa->sa_family); + cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family); key_len = 0; break; } @@ -152,7 +152,7 @@ static uint16_t cifs_super_get_key(const void *cookie_netfs_data, void *buffer, sharename = extract_sharename(tcon->treeName); if (IS_ERR(sharename)) { - cFYI(1, "%s: couldn't extract sharename", __func__); + cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__); sharename = NULL; return 0; } @@ -302,7 +302,7 @@ static void cifs_fscache_inode_now_uncached(void *cookie_netfs_data) pagevec_init(&pvec, 0); first = 0; - cFYI(1, "%s: cifs inode 0x%p now uncached", __func__, cifsi); + cifs_dbg(FYI, "%s: cifs inode 0x%p now uncached\n", __func__, cifsi); for (;;) { nr_pages = pagevec_lookup(&pvec, diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index d9ea6ede6a7..d5974834602 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -57,15 +57,32 @@ cifs_dump_mem(char *label, void *data, int length) } } +#ifdef CONFIG_CIFS_DEBUG +void cifs_vfs_err(const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_ERR "CIFS VFS: %pV", &vaf); + + va_end(args); +} +#endif + void cifs_dump_detail(void *buf) { #ifdef CONFIG_CIFS_DEBUG2 struct smb_hdr *smb = (struct smb_hdr *)buf; - cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d", - smb->Command, smb->Status.CifsError, - smb->Flags, smb->Flags2, smb->Mid, smb->Pid); - cERROR(1, "smb buf %p len %u", smb, smbCalcSize(smb)); + cifs_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d\n", + smb->Command, smb->Status.CifsError, + smb->Flags, smb->Flags2, smb->Mid, smb->Pid); + cifs_dbg(VFS, "smb buf %p len %u\n", smb, smbCalcSize(smb)); #endif /* CONFIG_CIFS_DEBUG2 */ } @@ -78,25 +95,25 @@ void cifs_dump_mids(struct TCP_Server_Info *server) if (server == NULL) return; - cERROR(1, "Dump pending requests:"); + cifs_dbg(VFS, "Dump pending requests:\n"); spin_lock(&GlobalMid_Lock); list_for_each(tmp, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); - cERROR(1, "State: %d Cmd: %d Pid: %d Cbdata: %p Mid %llu", - mid_entry->mid_state, - le16_to_cpu(mid_entry->command), - mid_entry->pid, - mid_entry->callback_data, - mid_entry->mid); + cifs_dbg(VFS, "State: %d Cmd: %d Pid: %d Cbdata: %p Mid %llu\n", + mid_entry->mid_state, + le16_to_cpu(mid_entry->command), + mid_entry->pid, + mid_entry->callback_data, + mid_entry->mid); #ifdef CONFIG_CIFS_STATS2 - cERROR(1, "IsLarge: %d buf: %p time rcv: %ld now: %ld", - mid_entry->large_buf, - mid_entry->resp_buf, - mid_entry->when_received, - jiffies); + cifs_dbg(VFS, "IsLarge: %d buf: %p time rcv: %ld now: %ld\n", + mid_entry->large_buf, + mid_entry->resp_buf, + mid_entry->when_received, + jiffies); #endif /* STATS2 */ - cERROR(1, "IsMult: %d IsEnd: %d", mid_entry->multiRsp, - mid_entry->multiEnd); + cifs_dbg(VFS, "IsMult: %d IsEnd: %d\n", + mid_entry->multiRsp, mid_entry->multiEnd); if (mid_entry->resp_buf) { cifs_dump_detail(mid_entry->resp_buf); cifs_dump_mem("existing buf: ", @@ -603,7 +620,7 @@ static ssize_t cifs_security_flags_proc_write(struct file *file, global_secflags = CIFSSEC_MAX; return count; } else if (!isdigit(c)) { - cERROR(1, "invalid flag %c", c); + cifs_dbg(VFS, "invalid flag %c\n", c); return -EINVAL; } } @@ -611,16 +628,16 @@ static ssize_t cifs_security_flags_proc_write(struct file *file, flags = simple_strtoul(flags_string, NULL, 0); - cFYI(1, "sec flags 0x%x", flags); + cifs_dbg(FYI, "sec flags 0x%x\n", flags); if (flags <= 0) { - cERROR(1, "invalid security flags %s", flags_string); + cifs_dbg(VFS, "invalid security flags %s\n", flags_string); return -EINVAL; } if (flags & ~CIFSSEC_MASK) { - cERROR(1, "attempt to set unsupported security flags 0x%x", - flags & ~CIFSSEC_MASK); + cifs_dbg(VFS, "attempt to set unsupported security flags 0x%x\n", + flags & ~CIFSSEC_MASK); return -EINVAL; } /* flags look ok - update the global security flags for cifs module */ @@ -628,9 +645,9 @@ static ssize_t cifs_security_flags_proc_write(struct file *file, if (global_secflags & CIFSSEC_MUST_SIGN) { /* requiring signing implies signing is allowed */ global_secflags |= CIFSSEC_MAY_SIGN; - cFYI(1, "packet signing now required"); + cifs_dbg(FYI, "packet signing now required\n"); } else if ((global_secflags & CIFSSEC_MAY_SIGN) == 0) { - cFYI(1, "packet signing disabled"); + cifs_dbg(FYI, "packet signing disabled\n"); } /* BB should we turn on MAY flags for other MUST options? */ return count; diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h index 69ae3d3c3b3..c99b40fb609 100644 --- a/fs/cifs/cifs_debug.h +++ b/fs/cifs/cifs_debug.h @@ -25,18 +25,20 @@ void cifs_dump_mem(char *label, void *data, int length); void cifs_dump_detail(void *); void cifs_dump_mids(struct TCP_Server_Info *); -#ifdef CONFIG_CIFS_DEBUG2 -#define DBG2 2 -#else -#define DBG2 0 -#endif extern int traceSMB; /* flag which enables the function below */ void dump_smb(void *, int); #define CIFS_INFO 0x01 #define CIFS_RC 0x02 #define CIFS_TIMER 0x04 +#define VFS 1 +#define FYI 2 extern int cifsFYI; +#ifdef CONFIG_CIFS_DEBUG2 +#define NOISY 4 +#else +#define NOISY 0 +#endif /* * debug ON @@ -44,31 +46,21 @@ extern int cifsFYI; */ #ifdef CONFIG_CIFS_DEBUG -/* information message: e.g., configuration, major event */ -#define cifsfyi(fmt, ...) \ -do { \ - if (cifsFYI & CIFS_INFO) \ - printk(KERN_DEBUG "%s: " fmt "\n", \ - __FILE__, ##__VA_ARGS__); \ -} while (0) - -#define cFYI(set, fmt, ...) \ -do { \ - if (set) \ - cifsfyi(fmt, ##__VA_ARGS__); \ -} while (0) +__printf(1, 2) void cifs_vfs_err(const char *fmt, ...); -#define cifswarn(fmt, ...) \ - printk(KERN_WARNING fmt "\n", ##__VA_ARGS__) - -/* error event message: e.g., i/o error */ -#define cifserror(fmt, ...) \ - printk(KERN_ERR "CIFS VFS: " fmt "\n", ##__VA_ARGS__); \ - -#define cERROR(set, fmt, ...) \ +/* information message: e.g., configuration, major event */ +#define cifs_dbg(type, fmt, ...) \ do { \ - if (set) \ - cifserror(fmt, ##__VA_ARGS__); \ + if (type == FYI) { \ + if (cifsFYI & CIFS_INFO) { \ + printk(KERN_DEBUG "%s: " fmt, \ + __FILE__, ##__VA_ARGS__); \ + } \ + } else if (type == VFS) { \ + cifs_vfs_err(fmt, ##__VA_ARGS__); \ + } else if (type == NOISY && type != 0) { \ + printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ + } \ } while (0) /* @@ -76,27 +68,11 @@ do { \ * --------- */ #else /* _CIFS_DEBUG */ -#define cifsfyi(fmt, ...) \ +#define cifs_dbg(type, fmt, ...) \ do { \ if (0) \ - printk(KERN_DEBUG "%s: " fmt "\n", \ - __FILE__, ##__VA_ARGS__); \ + printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ } while (0) -#define cFYI(set, fmt, ...) \ -do { \ - if (0 && set) \ - cifsfyi(fmt, ##__VA_ARGS__); \ -} while (0) -#define cifserror(fmt, ...) \ -do { \ - if (0) \ - printk(KERN_ERR "CIFS VFS: " fmt "\n", ##__VA_ARGS__); \ -} while (0) -#define cERROR(set, fmt, ...) \ -do { \ - if (0 && set) \ - cifserror(fmt, ##__VA_ARGS__); \ -} while (0) -#endif /* _CIFS_DEBUG */ +#endif #endif /* _H_CIFS_DEBUG */ diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 210fce2df30..8e33ec65847 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -84,8 +84,8 @@ static char *cifs_get_share_name(const char *node_name) /* find server name end */ pSep = memchr(UNC+2, '\\', len-2); if (!pSep) { - cERROR(1, "%s: no server name end in node name: %s", - __func__, node_name); + cifs_dbg(VFS, "%s: no server name end in node name: %s\n", + __func__, node_name); kfree(UNC); return ERR_PTR(-EINVAL); } @@ -141,8 +141,8 @@ char *cifs_compose_mount_options(const char *sb_mountdata, rc = dns_resolve_server_name_to_ip(*devname, &srvIP); if (rc < 0) { - cFYI(1, "%s: Failed to resolve server part of %s to IP: %d", - __func__, *devname, rc); + cifs_dbg(FYI, "%s: Failed to resolve server part of %s to IP: %d\n", + __func__, *devname, rc); goto compose_mount_options_err; } @@ -216,8 +216,8 @@ char *cifs_compose_mount_options(const char *sb_mountdata, strcat(mountdata, fullpath + ref->path_consumed); } - /*cFYI(1, "%s: parent mountdata: %s", __func__,sb_mountdata);*/ - /*cFYI(1, "%s: submount mountdata: %s", __func__, mountdata );*/ + /*cifs_dbg(FYI, "%s: parent mountdata: %s\n", __func__, sb_mountdata);*/ + /*cifs_dbg(FYI, "%s: submount mountdata: %s\n", __func__, mountdata );*/ compose_mount_options_out: kfree(srvIP); @@ -260,11 +260,12 @@ static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb, static void dump_referral(const struct dfs_info3_param *ref) { - cFYI(1, "DFS: ref path: %s", ref->path_name); - cFYI(1, "DFS: node path: %s", ref->node_name); - cFYI(1, "DFS: fl: %hd, srv_type: %hd", ref->flags, ref->server_type); - cFYI(1, "DFS: ref_flags: %hd, path_consumed: %hd", ref->ref_flag, - ref->path_consumed); + cifs_dbg(FYI, "DFS: ref path: %s\n", ref->path_name); + cifs_dbg(FYI, "DFS: node path: %s\n", ref->node_name); + cifs_dbg(FYI, "DFS: fl: %hd, srv_type: %hd\n", + ref->flags, ref->server_type); + cifs_dbg(FYI, "DFS: ref_flags: %hd, path_consumed: %hd\n", + ref->ref_flag, ref->path_consumed); } /* @@ -283,7 +284,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) struct vfsmount *mnt; struct tcon_link *tlink; - cFYI(1, "in %s", __func__); + cifs_dbg(FYI, "in %s\n", __func__); BUG_ON(IS_ROOT(mntpt)); /* @@ -320,15 +321,15 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) /* connect to a node */ len = strlen(referrals[i].node_name); if (len < 2) { - cERROR(1, "%s: Net Address path too short: %s", - __func__, referrals[i].node_name); + cifs_dbg(VFS, "%s: Net Address path too short: %s\n", + __func__, referrals[i].node_name); mnt = ERR_PTR(-EINVAL); break; } mnt = cifs_dfs_do_refmount(cifs_sb, full_path, referrals + i); - cFYI(1, "%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__, - referrals[i].node_name, mnt); + cifs_dbg(FYI, "%s: cifs_dfs_do_refmount:%s , mnt:%p\n", + __func__, referrals[i].node_name, mnt); if (!IS_ERR(mnt)) goto success; } @@ -343,7 +344,7 @@ success: free_full_path: kfree(full_path); cdda_exit: - cFYI(1, "leaving %s" , __func__); + cifs_dbg(FYI, "leaving %s\n" , __func__); return mnt; } @@ -354,11 +355,11 @@ struct vfsmount *cifs_dfs_d_automount(struct path *path) { struct vfsmount *newmnt; - cFYI(1, "in %s", __func__); + cifs_dbg(FYI, "in %s\n", __func__); newmnt = cifs_dfs_do_automount(path->dentry); if (IS_ERR(newmnt)) { - cFYI(1, "leaving %s [automount failed]" , __func__); + cifs_dbg(FYI, "leaving %s [automount failed]\n" , __func__); return newmnt; } @@ -366,7 +367,7 @@ struct vfsmount *cifs_dfs_d_automount(struct path *path) mnt_set_expiry(newmnt, &cifs_dfs_automount_list); schedule_delayed_work(&cifs_dfs_automount_task, cifs_dfs_mountpoint_expiry_timeout); - cFYI(1, "leaving %s [ok]" , __func__); + cifs_dbg(FYI, "leaving %s [ok]\n" , __func__); return newmnt; } diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 10e77476129..a3e93254761 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -37,12 +37,11 @@ cifs_spnego_key_instantiate(struct key *key, struct key_preparsed_payload *prep) int ret; ret = -ENOMEM; - payload = kmalloc(prep->datalen, GFP_KERNEL); + payload = kmemdup(prep->data, prep->datalen, GFP_KERNEL); if (!payload) goto error; /* attach the data */ - memcpy(payload, prep->data, prep->datalen); key->payload.data = payload; ret = 0; @@ -164,7 +163,7 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo) dp = description + strlen(description); sprintf(dp, ";pid=0x%x", current->pid); - cFYI(1, "key description = %s", description); + cifs_dbg(FYI, "key description = %s\n", description); spnego_key = request_key(&cifs_spnego_key_type, description, ""); #ifdef CONFIG_CIFS_DEBUG2 diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 71d5d0a5f6b..0227b45ef00 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -227,8 +227,8 @@ cifs_strtoUTF16(__le16 *to, const char *from, int len, for (i = 0; len && *from; i++, from += charlen, len -= charlen) { charlen = codepage->char2uni(from, len, &wchar_to); if (charlen < 1) { - cERROR(1, "strtoUTF16: char2uni of 0x%x returned %d", - *from, charlen); + cifs_dbg(VFS, "strtoUTF16: char2uni of 0x%x returned %d\n", + *from, charlen); /* A question mark */ wchar_to = 0x003f; charlen = 1; diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index f1e3f25fe00..51f5e0ee723 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -63,11 +63,10 @@ cifs_idmap_key_instantiate(struct key *key, struct key_preparsed_payload *prep) key->datalen = prep->datalen; return 0; } - payload = kmalloc(prep->datalen, GFP_KERNEL); + payload = kmemdup(prep->data, prep->datalen, GFP_KERNEL); if (!payload) return -ENOMEM; - memcpy(payload, prep->data, prep->datalen); key->payload.data = payload; key->datalen = prep->datalen; return 0; @@ -219,13 +218,13 @@ id_to_sid(unsigned int cid, uint sidtype, struct cifs_sid *ssid) sidkey = request_key(&cifs_idmap_key_type, desc, ""); if (IS_ERR(sidkey)) { rc = -EINVAL; - cFYI(1, "%s: Can't map %cid %u to a SID", __func__, - sidtype == SIDOWNER ? 'u' : 'g', cid); + cifs_dbg(FYI, "%s: Can't map %cid %u to a SID\n", + __func__, sidtype == SIDOWNER ? 'u' : 'g', cid); goto out_revert_creds; } else if (sidkey->datalen < CIFS_SID_BASE_SIZE) { rc = -EIO; - cFYI(1, "%s: Downcall contained malformed key " - "(datalen=%hu)", __func__, sidkey->datalen); + cifs_dbg(FYI, "%s: Downcall contained malformed key (datalen=%hu)\n", + __func__, sidkey->datalen); goto invalidate_key; } @@ -241,8 +240,8 @@ id_to_sid(unsigned int cid, uint sidtype, struct cifs_sid *ssid) ksid_size = CIFS_SID_BASE_SIZE + (ksid->num_subauth * sizeof(__le32)); if (ksid_size > sidkey->datalen) { rc = -EIO; - cFYI(1, "%s: Downcall contained malformed key (datalen=%hu, " - "ksid_size=%u)", __func__, sidkey->datalen, ksid_size); + cifs_dbg(FYI, "%s: Downcall contained malformed key (datalen=%hu, ksid_size=%u)\n", + __func__, sidkey->datalen, ksid_size); goto invalidate_key; } @@ -274,8 +273,8 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid, * Just return an error. */ if (unlikely(psid->num_subauth > SID_MAX_SUB_AUTHORITIES)) { - cFYI(1, "%s: %u subauthorities is too many!", __func__, - psid->num_subauth); + cifs_dbg(FYI, "%s: %u subauthorities is too many!\n", + __func__, psid->num_subauth); return -EIO; } @@ -287,8 +286,8 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid, sidkey = request_key(&cifs_idmap_key_type, sidstr, ""); if (IS_ERR(sidkey)) { rc = -EINVAL; - cFYI(1, "%s: Can't map SID %s to a %cid", __func__, sidstr, - sidtype == SIDOWNER ? 'u' : 'g'); + cifs_dbg(FYI, "%s: Can't map SID %s to a %cid\n", + __func__, sidstr, sidtype == SIDOWNER ? 'u' : 'g'); goto out_revert_creds; } @@ -300,8 +299,8 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid, BUILD_BUG_ON(sizeof(uid_t) != sizeof(gid_t)); if (sidkey->datalen != sizeof(uid_t)) { rc = -EIO; - cFYI(1, "%s: Downcall contained malformed key " - "(datalen=%hu)", __func__, sidkey->datalen); + cifs_dbg(FYI, "%s: Downcall contained malformed key (datalen=%hu)\n", + __func__, sidkey->datalen); key_invalidate(sidkey); goto out_key_put; } @@ -346,7 +345,8 @@ init_cifs_idmap(void) struct key *keyring; int ret; - cFYI(1, "Registering the %s key type", cifs_idmap_key_type.name); + cifs_dbg(FYI, "Registering the %s key type\n", + cifs_idmap_key_type.name); /* create an override credential set with a special thread keyring in * which requests are cached @@ -379,7 +379,7 @@ init_cifs_idmap(void) cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; root_cred = cred; - cFYI(1, "cifs idmap keyring: %d", key_serial(keyring)); + cifs_dbg(FYI, "cifs idmap keyring: %d\n", key_serial(keyring)); return 0; failed_put_key: @@ -395,7 +395,7 @@ exit_cifs_idmap(void) key_revoke(root_cred->thread_keyring); unregister_key_type(&cifs_idmap_key_type); put_cred(root_cred); - cFYI(1, "Unregistered %s key type", cifs_idmap_key_type.name); + cifs_dbg(FYI, "Unregistered %s key type\n", cifs_idmap_key_type.name); } /* copy ntsd, owner sid, and group sid from a security descriptor to another */ @@ -462,14 +462,14 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode, *pbits_to_set &= ~S_IXUGO; return; } else if (type != ACCESS_ALLOWED) { - cERROR(1, "unknown access control type %d", type); + cifs_dbg(VFS, "unknown access control type %d\n", type); return; } /* else ACCESS_ALLOWED type */ if (flags & GENERIC_ALL) { *pmode |= (S_IRWXUGO & (*pbits_to_set)); - cFYI(DBG2, "all perms"); + cifs_dbg(NOISY, "all perms\n"); return; } if ((flags & GENERIC_WRITE) || @@ -482,7 +482,7 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode, ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS)) *pmode |= (S_IXUGO & (*pbits_to_set)); - cFYI(DBG2, "access flags 0x%x mode now 0x%x", flags, *pmode); + cifs_dbg(NOISY, "access flags 0x%x mode now 0x%x\n", flags, *pmode); return; } @@ -511,7 +511,8 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use, if (mode & S_IXUGO) *pace_flags |= SET_FILE_EXEC_RIGHTS; - cFYI(DBG2, "mode: 0x%x, access flags now 0x%x", mode, *pace_flags); + cifs_dbg(NOISY, "mode: 0x%x, access flags now 0x%x\n", + mode, *pace_flags); return; } @@ -551,24 +552,24 @@ static void dump_ace(struct cifs_ace *pace, char *end_of_acl) /* validate that we do not go past end of acl */ if (le16_to_cpu(pace->size) < 16) { - cERROR(1, "ACE too small %d", le16_to_cpu(pace->size)); + cifs_dbg(VFS, "ACE too small %d\n", le16_to_cpu(pace->size)); return; } if (end_of_acl < (char *)pace + le16_to_cpu(pace->size)) { - cERROR(1, "ACL too small to parse ACE"); + cifs_dbg(VFS, "ACL too small to parse ACE\n"); return; } num_subauth = pace->sid.num_subauth; if (num_subauth) { int i; - cFYI(1, "ACE revision %d num_auth %d type %d flags %d size %d", - pace->sid.revision, pace->sid.num_subauth, pace->type, - pace->flags, le16_to_cpu(pace->size)); + cifs_dbg(FYI, "ACE revision %d num_auth %d type %d flags %d size %d\n", + pace->sid.revision, pace->sid.num_subauth, pace->type, + pace->flags, le16_to_cpu(pace->size)); for (i = 0; i < num_subauth; ++i) { - cFYI(1, "ACE sub_auth[%d]: 0x%x", i, - le32_to_cpu(pace->sid.sub_auth[i])); + cifs_dbg(FYI, "ACE sub_auth[%d]: 0x%x\n", + i, le32_to_cpu(pace->sid.sub_auth[i])); } /* BB add length check to make sure that we do not have huge @@ -601,13 +602,13 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, /* validate that we do not go past end of acl */ if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) { - cERROR(1, "ACL too small to parse DACL"); + cifs_dbg(VFS, "ACL too small to parse DACL\n"); return; } - cFYI(DBG2, "DACL revision %d size %d num aces %d", - le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size), - le32_to_cpu(pdacl->num_aces)); + cifs_dbg(NOISY, "DACL revision %d size %d num aces %d\n", + le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size), + le32_to_cpu(pdacl->num_aces)); /* reset rwx permissions for user/group/other. Also, if num_aces is 0 i.e. DACL has no ACEs, @@ -627,10 +628,8 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, return; ppace = kmalloc(num_aces * sizeof(struct cifs_ace *), GFP_KERNEL); - if (!ppace) { - cERROR(1, "DACL memory allocation error"); + if (!ppace) return; - } for (i = 0; i < num_aces; ++i) { ppace[i] = (struct cifs_ace *) (acl_base + acl_size); @@ -703,25 +702,25 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl) /* validate that we do not go past end of ACL - sid must be at least 8 bytes long (assuming no sub-auths - e.g. the null SID */ if (end_of_acl < (char *)psid + 8) { - cERROR(1, "ACL too small to parse SID %p", psid); + cifs_dbg(VFS, "ACL too small to parse SID %p\n", psid); return -EINVAL; } #ifdef CONFIG_CIFS_DEBUG2 if (psid->num_subauth) { int i; - cFYI(1, "SID revision %d num_auth %d", - psid->revision, psid->num_subauth); + cifs_dbg(FYI, "SID revision %d num_auth %d\n", + psid->revision, psid->num_subauth); for (i = 0; i < psid->num_subauth; i++) { - cFYI(1, "SID sub_auth[%d]: 0x%x ", i, - le32_to_cpu(psid->sub_auth[i])); + cifs_dbg(FYI, "SID sub_auth[%d]: 0x%x\n", + i, le32_to_cpu(psid->sub_auth[i])); } /* BB add length check to make sure that we do not have huge num auths and therefore go off the end */ - cFYI(1, "RID 0x%x", - le32_to_cpu(psid->sub_auth[psid->num_subauth-1])); + cifs_dbg(FYI, "RID 0x%x\n", + le32_to_cpu(psid->sub_auth[psid->num_subauth-1])); } #endif @@ -748,31 +747,33 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, le32_to_cpu(pntsd->gsidoffset)); dacloffset = le32_to_cpu(pntsd->dacloffset); dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset); - cFYI(DBG2, "revision %d type 0x%x ooffset 0x%x goffset 0x%x " - "sacloffset 0x%x dacloffset 0x%x", + cifs_dbg(NOISY, "revision %d type 0x%x ooffset 0x%x goffset 0x%x sacloffset 0x%x dacloffset 0x%x\n", pntsd->revision, pntsd->type, le32_to_cpu(pntsd->osidoffset), le32_to_cpu(pntsd->gsidoffset), le32_to_cpu(pntsd->sacloffset), dacloffset); /* cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */ rc = parse_sid(owner_sid_ptr, end_of_acl); if (rc) { - cFYI(1, "%s: Error %d parsing Owner SID", __func__, rc); + cifs_dbg(FYI, "%s: Error %d parsing Owner SID\n", __func__, rc); return rc; } rc = sid_to_id(cifs_sb, owner_sid_ptr, fattr, SIDOWNER); if (rc) { - cFYI(1, "%s: Error %d mapping Owner SID to uid", __func__, rc); + cifs_dbg(FYI, "%s: Error %d mapping Owner SID to uid\n", + __func__, rc); return rc; } rc = parse_sid(group_sid_ptr, end_of_acl); if (rc) { - cFYI(1, "%s: Error %d mapping Owner SID to gid", __func__, rc); + cifs_dbg(FYI, "%s: Error %d mapping Owner SID to gid\n", + __func__, rc); return rc; } rc = sid_to_id(cifs_sb, group_sid_ptr, fattr, SIDGROUP); if (rc) { - cFYI(1, "%s: Error %d mapping Group SID to gid", __func__, rc); + cifs_dbg(FYI, "%s: Error %d mapping Group SID to gid\n", + __func__, rc); return rc; } @@ -780,7 +781,7 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb, parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr, group_sid_ptr, fattr); else - cFYI(1, "no ACL"); /* BB grant all or default perms? */ + cifs_dbg(FYI, "no ACL\n"); /* BB grant all or default perms? */ return rc; } @@ -830,8 +831,8 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, id = from_kuid(&init_user_ns, uid); rc = id_to_sid(id, SIDOWNER, nowner_sid_ptr); if (rc) { - cFYI(1, "%s: Mapping error %d for owner id %d", - __func__, rc, id); + cifs_dbg(FYI, "%s: Mapping error %d for owner id %d\n", + __func__, rc, id); kfree(nowner_sid_ptr); return rc; } @@ -850,8 +851,8 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, id = from_kgid(&init_user_ns, gid); rc = id_to_sid(id, SIDGROUP, ngroup_sid_ptr); if (rc) { - cFYI(1, "%s: Mapping error %d for group id %d", - __func__, rc, id); + cifs_dbg(FYI, "%s: Mapping error %d for group id %d\n", + __func__, rc, id); kfree(ngroup_sid_ptr); return rc; } @@ -881,7 +882,7 @@ static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, cifs_put_tlink(tlink); - cFYI(1, "%s: rc = %d ACL len %d", __func__, rc, *pacllen); + cifs_dbg(FYI, "%s: rc = %d ACL len %d\n", __func__, rc, *pacllen); if (rc) return ERR_PTR(rc); return pntsd; @@ -918,7 +919,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, cifs_put_tlink(tlink); free_xid(xid); - cFYI(1, "%s: rc = %d ACL len %d", __func__, rc, *pacllen); + cifs_dbg(FYI, "%s: rc = %d ACL len %d\n", __func__, rc, *pacllen); if (rc) return ERR_PTR(rc); return pntsd; @@ -972,12 +973,12 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, create_options, &fid, &oplock, NULL, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc) { - cERROR(1, "Unable to open file to set ACL"); + cifs_dbg(VFS, "Unable to open file to set ACL\n"); goto out; } rc = CIFSSMBSetCIFSACL(xid, tcon, fid, pnntsd, acllen, aclflag); - cFYI(DBG2, "SetCIFSACL rc = %d", rc); + cifs_dbg(NOISY, "SetCIFSACL rc = %d\n", rc); CIFSSMBClose(xid, tcon, fid); out: @@ -995,7 +996,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, u32 acllen = 0; int rc = 0; - cFYI(DBG2, "converting ACL to mode for %s", path); + cifs_dbg(NOISY, "converting ACL to mode for %s\n", path); if (pfid) pntsd = get_cifs_acl_by_fid(cifs_sb, *pfid, &acllen); @@ -1005,12 +1006,12 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */ if (IS_ERR(pntsd)) { rc = PTR_ERR(pntsd); - cERROR(1, "%s: error %d getting sec desc", __func__, rc); + cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc); } else { rc = parse_sec_desc(cifs_sb, pntsd, acllen, fattr); kfree(pntsd); if (rc) - cERROR(1, "parse sec desc failed rc = %d", rc); + cifs_dbg(VFS, "parse sec desc failed rc = %d\n", rc); } return rc; @@ -1027,13 +1028,13 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */ struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */ - cFYI(DBG2, "set ACL from mode for %s", path); + cifs_dbg(NOISY, "set ACL from mode for %s\n", path); /* Get the security descriptor */ pntsd = get_cifs_acl(CIFS_SB(inode->i_sb), inode, path, &secdesclen); if (IS_ERR(pntsd)) { rc = PTR_ERR(pntsd); - cERROR(1, "%s: error %d getting sec desc", __func__, rc); + cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc); goto out; } @@ -1046,7 +1047,6 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, secdesclen = max_t(u32, secdesclen, DEFAULT_SEC_DESC_LEN); pnntsd = kmalloc(secdesclen, GFP_KERNEL); if (!pnntsd) { - cERROR(1, "Unable to allocate security descriptor"); kfree(pntsd); return -ENOMEM; } @@ -1054,12 +1054,12 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, rc = build_sec_desc(pntsd, pnntsd, secdesclen, nmode, uid, gid, &aclflag); - cFYI(DBG2, "build_sec_desc rc: %d", rc); + cifs_dbg(NOISY, "build_sec_desc rc: %d\n", rc); if (!rc) { /* Set the security descriptor */ rc = set_cifs_acl(pnntsd, secdesclen, inode, path, aclflag); - cFYI(DBG2, "set_cifs_acl rc: %d", rc); + cifs_dbg(NOISY, "set_cifs_acl rc: %d\n", rc); } kfree(pnntsd); diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 652f5051be0..71436d1fca1 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -50,20 +50,20 @@ static int cifs_calc_signature(struct smb_rqst *rqst, return -EINVAL; if (!server->secmech.sdescmd5) { - cERROR(1, "%s: Can't generate signature", __func__); + cifs_dbg(VFS, "%s: Can't generate signature\n", __func__); return -1; } rc = crypto_shash_init(&server->secmech.sdescmd5->shash); if (rc) { - cERROR(1, "%s: Could not init md5", __func__); + cifs_dbg(VFS, "%s: Could not init md5\n", __func__); return rc; } rc = crypto_shash_update(&server->secmech.sdescmd5->shash, server->session_key.response, server->session_key.len); if (rc) { - cERROR(1, "%s: Could not update with response", __func__); + cifs_dbg(VFS, "%s: Could not update with response\n", __func__); return rc; } @@ -71,7 +71,7 @@ static int cifs_calc_signature(struct smb_rqst *rqst, if (iov[i].iov_len == 0) continue; if (iov[i].iov_base == NULL) { - cERROR(1, "null iovec entry"); + cifs_dbg(VFS, "null iovec entry\n"); return -EIO; } /* The first entry includes a length field (which does not get @@ -88,8 +88,8 @@ static int cifs_calc_signature(struct smb_rqst *rqst, iov[i].iov_base, iov[i].iov_len); } if (rc) { - cERROR(1, "%s: Could not update with payload", - __func__); + cifs_dbg(VFS, "%s: Could not update with payload\n", + __func__); return rc; } } @@ -106,7 +106,7 @@ static int cifs_calc_signature(struct smb_rqst *rqst, rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature); if (rc) - cERROR(1, "%s: Could not generate md5 hash", __func__); + cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__); return rc; } @@ -135,8 +135,8 @@ int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server, cpu_to_le32(server->sequence_number); cifs_pdu->Signature.Sequence.Reserved = 0; - *pexpected_response_sequence_number = server->sequence_number++; - server->sequence_number++; + *pexpected_response_sequence_number = ++server->sequence_number; + ++server->sequence_number; rc = cifs_calc_signature(rqst, server, smb_signature); if (rc) @@ -196,8 +196,8 @@ int cifs_verify_signature(struct smb_rqst *rqst, /* Do not need to verify session setups with signature "BSRSPYL " */ if (memcmp(cifs_pdu->Signature.SecuritySignature, "BSRSPYL ", 8) == 0) - cFYI(1, "dummy signature received for smb command 0x%x", - cifs_pdu->Command); + cifs_dbg(FYI, "dummy signature received for smb command 0x%x\n", + cifs_pdu->Command); /* save off the origiginal signature so we can modify the smb and check its signature against what the server sent */ @@ -235,30 +235,30 @@ int setup_ntlm_response(struct cifs_ses *ses, const struct nls_table *nls_cp) return -EINVAL; ses->auth_key.response = kmalloc(temp_len, GFP_KERNEL); - if (!ses->auth_key.response) { - cERROR(1, "NTLM can't allocate (%u bytes) memory", temp_len); + if (!ses->auth_key.response) return -ENOMEM; - } + ses->auth_key.len = temp_len; rc = SMBNTencrypt(ses->password, ses->server->cryptkey, ses->auth_key.response + CIFS_SESS_KEY_SIZE, nls_cp); if (rc) { - cFYI(1, "%s Can't generate NTLM response, error: %d", - __func__, rc); + cifs_dbg(FYI, "%s Can't generate NTLM response, error: %d\n", + __func__, rc); return rc; } rc = E_md4hash(ses->password, temp_key, nls_cp); if (rc) { - cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc); + cifs_dbg(FYI, "%s Can't generate NT hash, error: %d\n", + __func__, rc); return rc; } rc = mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE); if (rc) - cFYI(1, "%s Can't generate NTLM session key, error: %d", - __func__, rc); + cifs_dbg(FYI, "%s Can't generate NTLM session key, error: %d\n", + __func__, rc); return rc; } @@ -334,7 +334,6 @@ build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp) ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL); if (!ses->auth_key.response) { ses->auth_key.len = 0; - cERROR(1, "Challenge target info allocation failure"); return -ENOMEM; } @@ -420,7 +419,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, wchar_t *server; if (!ses->server->secmech.sdeschmacmd5) { - cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash"); + cifs_dbg(VFS, "%s: can't generate ntlmv2 hash\n", __func__); return -1; } @@ -430,13 +429,13 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, rc = crypto_shash_setkey(ses->server->secmech.hmacmd5, nt_hash, CIFS_NTHASH_SIZE); if (rc) { - cERROR(1, "%s: Could not set NT Hash as a key", __func__); + cifs_dbg(VFS, "%s: Could not set NT Hash as a key\n", __func__); return rc; } rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash); if (rc) { - cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5"); + cifs_dbg(VFS, "%s: could not init hmacmd5\n", __func__); return rc; } @@ -444,7 +443,6 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, len = ses->user_name ? strlen(ses->user_name) : 0; user = kmalloc(2 + (len * 2), GFP_KERNEL); if (user == NULL) { - cERROR(1, "calc_ntlmv2_hash: user mem alloc failure"); rc = -ENOMEM; return rc; } @@ -460,7 +458,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, (char *)user, 2 * len); kfree(user); if (rc) { - cERROR(1, "%s: Could not update with user", __func__); + cifs_dbg(VFS, "%s: Could not update with user\n", __func__); return rc; } @@ -470,7 +468,6 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, domain = kmalloc(2 + (len * 2), GFP_KERNEL); if (domain == NULL) { - cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure"); rc = -ENOMEM; return rc; } @@ -481,8 +478,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, (char *)domain, 2 * len); kfree(domain); if (rc) { - cERROR(1, "%s: Could not update with domain", - __func__); + cifs_dbg(VFS, "%s: Could not update with domain\n", + __func__); return rc; } } else if (ses->serverName) { @@ -490,7 +487,6 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, server = kmalloc(2 + (len * 2), GFP_KERNEL); if (server == NULL) { - cERROR(1, "calc_ntlmv2_hash: server mem alloc failure"); rc = -ENOMEM; return rc; } @@ -501,8 +497,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, (char *)server, 2 * len); kfree(server); if (rc) { - cERROR(1, "%s: Could not update with server", - __func__); + cifs_dbg(VFS, "%s: Could not update with server\n", + __func__); return rc; } } @@ -510,7 +506,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash, ntlmv2_hash); if (rc) - cERROR(1, "%s: Could not generate md5 hash", __func__); + cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__); return rc; } @@ -522,20 +518,21 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) unsigned int offset = CIFS_SESS_KEY_SIZE + 8; if (!ses->server->secmech.sdeschmacmd5) { - cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash"); + cifs_dbg(VFS, "%s: can't generate ntlmv2 hash\n", __func__); return -1; } rc = crypto_shash_setkey(ses->server->secmech.hmacmd5, ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); if (rc) { - cERROR(1, "%s: Could not set NTLMV2 Hash as a key", __func__); + cifs_dbg(VFS, "%s: Could not set NTLMV2 Hash as a key\n", + __func__); return rc; } rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash); if (rc) { - cERROR(1, "CalcNTLMv2_response: could not init hmacmd5"); + cifs_dbg(VFS, "%s: could not init hmacmd5\n", __func__); return rc; } @@ -548,14 +545,14 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, ses->auth_key.response + offset, ses->auth_key.len - offset); if (rc) { - cERROR(1, "%s: Could not update with response", __func__); + cifs_dbg(VFS, "%s: Could not update with response\n", __func__); return rc; } rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash, ses->auth_key.response + CIFS_SESS_KEY_SIZE); if (rc) - cERROR(1, "%s: Could not generate md5 hash", __func__); + cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__); return rc; } @@ -575,14 +572,15 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) if (!ses->domainName) { rc = find_domain_name(ses, nls_cp); if (rc) { - cERROR(1, "error %d finding domain name", rc); + cifs_dbg(VFS, "error %d finding domain name\n", + rc); goto setup_ntlmv2_rsp_ret; } } } else { rc = build_avpair_blob(ses, nls_cp); if (rc) { - cERROR(1, "error %d building av pair blob", rc); + cifs_dbg(VFS, "error %d building av pair blob\n", rc); goto setup_ntlmv2_rsp_ret; } } @@ -595,7 +593,6 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) if (!ses->auth_key.response) { rc = ENOMEM; ses->auth_key.len = 0; - cERROR(1, "%s: Can't allocate auth blob", __func__); goto setup_ntlmv2_rsp_ret; } ses->auth_key.len += baselen; @@ -613,14 +610,14 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) /* calculate ntlmv2_hash */ rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp); if (rc) { - cERROR(1, "could not get v2 hash rc %d", rc); + cifs_dbg(VFS, "could not get v2 hash rc %d\n", rc); goto setup_ntlmv2_rsp_ret; } /* calculate first part of the client response (CR1) */ rc = CalcNTLMv2_response(ses, ntlmv2_hash); if (rc) { - cERROR(1, "Could not calculate CR1 rc: %d", rc); + cifs_dbg(VFS, "Could not calculate CR1 rc: %d\n", rc); goto setup_ntlmv2_rsp_ret; } @@ -628,13 +625,14 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) rc = crypto_shash_setkey(ses->server->secmech.hmacmd5, ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); if (rc) { - cERROR(1, "%s: Could not set NTLMV2 Hash as a key", __func__); + cifs_dbg(VFS, "%s: Could not set NTLMV2 Hash as a key\n", + __func__); goto setup_ntlmv2_rsp_ret; } rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash); if (rc) { - cERROR(1, "%s: Could not init hmacmd5", __func__); + cifs_dbg(VFS, "%s: Could not init hmacmd5\n", __func__); goto setup_ntlmv2_rsp_ret; } @@ -642,14 +640,14 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) ses->auth_key.response + CIFS_SESS_KEY_SIZE, CIFS_HMAC_MD5_HASH_SIZE); if (rc) { - cERROR(1, "%s: Could not update with response", __func__); + cifs_dbg(VFS, "%s: Could not update with response\n", __func__); goto setup_ntlmv2_rsp_ret; } rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash, ses->auth_key.response); if (rc) - cERROR(1, "%s: Could not generate md5 hash", __func__); + cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__); setup_ntlmv2_rsp_ret: kfree(tiblob); @@ -671,7 +669,7 @@ calc_seckey(struct cifs_ses *ses) tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm_arc4)) { rc = PTR_ERR(tfm_arc4); - cERROR(1, "could not allocate crypto API arc4"); + cifs_dbg(VFS, "could not allocate crypto API arc4\n"); return rc; } @@ -680,7 +678,8 @@ calc_seckey(struct cifs_ses *ses) rc = crypto_blkcipher_setkey(tfm_arc4, ses->auth_key.response, CIFS_SESS_KEY_SIZE); if (rc) { - cERROR(1, "%s: Could not set response as a key", __func__); + cifs_dbg(VFS, "%s: Could not set response as a key\n", + __func__); return rc; } @@ -689,7 +688,7 @@ calc_seckey(struct cifs_ses *ses) rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE); if (rc) { - cERROR(1, "could not encrypt session key rc: %d", rc); + cifs_dbg(VFS, "could not encrypt session key rc: %d\n", rc); crypto_free_blkcipher(tfm_arc4); return rc; } @@ -731,20 +730,20 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0); if (IS_ERR(server->secmech.hmacmd5)) { - cERROR(1, "could not allocate crypto hmacmd5"); + cifs_dbg(VFS, "could not allocate crypto hmacmd5\n"); return PTR_ERR(server->secmech.hmacmd5); } server->secmech.md5 = crypto_alloc_shash("md5", 0, 0); if (IS_ERR(server->secmech.md5)) { - cERROR(1, "could not allocate crypto md5"); + cifs_dbg(VFS, "could not allocate crypto md5\n"); rc = PTR_ERR(server->secmech.md5); goto crypto_allocate_md5_fail; } server->secmech.hmacsha256 = crypto_alloc_shash("hmac(sha256)", 0, 0); if (IS_ERR(server->secmech.hmacsha256)) { - cERROR(1, "could not allocate crypto hmacsha256\n"); + cifs_dbg(VFS, "could not allocate crypto hmacsha256\n"); rc = PTR_ERR(server->secmech.hmacsha256); goto crypto_allocate_hmacsha256_fail; } @@ -753,7 +752,6 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) crypto_shash_descsize(server->secmech.hmacmd5); server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL); if (!server->secmech.sdeschmacmd5) { - cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5"); rc = -ENOMEM; goto crypto_allocate_hmacmd5_sdesc_fail; } @@ -764,7 +762,6 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) crypto_shash_descsize(server->secmech.md5); server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL); if (!server->secmech.sdescmd5) { - cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5"); rc = -ENOMEM; goto crypto_allocate_md5_sdesc_fail; } @@ -775,7 +772,6 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) crypto_shash_descsize(server->secmech.hmacsha256); server->secmech.sdeschmacsha256 = kmalloc(size, GFP_KERNEL); if (!server->secmech.sdeschmacsha256) { - cERROR(1, "%s: Can't alloc hmacsha256\n", __func__); rc = -ENOMEM; goto crypto_allocate_hmacsha256_sdesc_fail; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 345fc89c428..72e4efee138 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -161,7 +161,7 @@ cifs_read_super(struct super_block *sb) #ifdef CONFIG_CIFS_NFSD_EXPORT if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { - cFYI(1, "export ops supported"); + cifs_dbg(FYI, "export ops supported\n"); sb->s_export_op = &cifs_export_ops; } #endif /* CONFIG_CIFS_NFSD_EXPORT */ @@ -169,7 +169,7 @@ cifs_read_super(struct super_block *sb) return 0; out_no_root: - cERROR(1, "cifs_read_super: get root inode failed"); + cifs_dbg(VFS, "%s: get root inode failed\n", __func__); return rc; } @@ -502,7 +502,7 @@ static void cifs_umount_begin(struct super_block *sb) /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */ /* cancel_notify_requests(tcon); */ if (tcon->ses && tcon->ses->server) { - cFYI(1, "wake up tasks now - umount begin not complete"); + cifs_dbg(FYI, "wake up tasks now - umount begin not complete\n"); wake_up_all(&tcon->ses->server->request_q); wake_up_all(&tcon->ses->server->response_q); msleep(1); /* yield */ @@ -573,7 +573,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) if (full_path == NULL) return ERR_PTR(-ENOMEM); - cFYI(1, "Get root dentry for %s", full_path); + cifs_dbg(FYI, "Get root dentry for %s\n", full_path); sep = CIFS_DIR_SEP(cifs_sb); dentry = dget(sb->s_root); @@ -632,7 +632,7 @@ cifs_do_mount(struct file_system_type *fs_type, struct cifs_mnt_data mnt_data; struct dentry *root; - cFYI(1, "Devname: %s flags: %d ", dev_name, flags); + cifs_dbg(FYI, "Devname: %s flags: %d\n", dev_name, flags); volume_info = cifs_get_volume_info((char *)data, dev_name); if (IS_ERR(volume_info)) @@ -655,7 +655,8 @@ cifs_do_mount(struct file_system_type *fs_type, rc = cifs_mount(cifs_sb, volume_info); if (rc) { if (!(flags & MS_SILENT)) - cERROR(1, "cifs_mount failed w/return code = %d", rc); + cifs_dbg(VFS, "cifs_mount failed w/return code = %d\n", + rc); root = ERR_PTR(rc); goto out_mountdata; } @@ -675,7 +676,7 @@ cifs_do_mount(struct file_system_type *fs_type, } if (sb->s_root) { - cFYI(1, "Use existing superblock"); + cifs_dbg(FYI, "Use existing superblock\n"); cifs_umount(cifs_sb); } else { rc = cifs_read_super(sb); @@ -691,7 +692,7 @@ cifs_do_mount(struct file_system_type *fs_type, if (IS_ERR(root)) goto out_super; - cFYI(1, "dentry root is: %p", root); + cifs_dbg(FYI, "dentry root is: %p\n", root); goto out; out_super: @@ -723,7 +724,8 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, rc = filemap_fdatawrite(inode->i_mapping); if (rc) - cFYI(1, "cifs_file_aio_write: %d rc on %p inode", rc, inode); + cifs_dbg(FYI, "cifs_file_aio_write: %d rc on %p inode\n", + rc, inode); return written; } @@ -1030,7 +1032,10 @@ cifs_init_request_bufs(void) } else { CIFSMaxBufSize &= 0x1FE00; /* Round size to even 512 byte mult*/ } -/* cERROR(1, "CIFSMaxBufSize %d 0x%x",CIFSMaxBufSize,CIFSMaxBufSize); */ +/* + cifs_dbg(VFS, "CIFSMaxBufSize %d 0x%x\n", + CIFSMaxBufSize, CIFSMaxBufSize); +*/ cifs_req_cachep = kmem_cache_create("cifs_request", CIFSMaxBufSize + max_hdr_size, 0, SLAB_HWCACHE_ALIGN, NULL); @@ -1041,7 +1046,7 @@ cifs_init_request_bufs(void) cifs_min_rcv = 1; else if (cifs_min_rcv > 64) { cifs_min_rcv = 64; - cERROR(1, "cifs_min_rcv set to maximum (64)"); + cifs_dbg(VFS, "cifs_min_rcv set to maximum (64)\n"); } cifs_req_poolp = mempool_create_slab_pool(cifs_min_rcv, @@ -1072,7 +1077,7 @@ cifs_init_request_bufs(void) cifs_min_small = 2; else if (cifs_min_small > 256) { cifs_min_small = 256; - cFYI(1, "cifs_min_small set to maximum (256)"); + cifs_dbg(FYI, "cifs_min_small set to maximum (256)\n"); } cifs_sm_req_poolp = mempool_create_slab_pool(cifs_min_small, @@ -1163,10 +1168,11 @@ init_cifs(void) if (cifs_max_pending < 2) { cifs_max_pending = 2; - cFYI(1, "cifs_max_pending set to min of 2"); + cifs_dbg(FYI, "cifs_max_pending set to min of 2\n"); } else if (cifs_max_pending > CIFS_MAX_REQ) { cifs_max_pending = CIFS_MAX_REQ; - cFYI(1, "cifs_max_pending set to max of %u", CIFS_MAX_REQ); + cifs_dbg(FYI, "cifs_max_pending set to max of %u\n", + CIFS_MAX_REQ); } cifsiod_wq = alloc_workqueue("cifsiod", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); @@ -1235,7 +1241,7 @@ out_clean_proc: static void __exit exit_cifs(void) { - cFYI(DBG2, "exit_cifs"); + cifs_dbg(NOISY, "exit_cifs\n"); unregister_filesystem(&cifs_fs_type); cifs_dfs_release_automount_timer(); #ifdef CONFIG_CIFS_ACL diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index f450f0683dd..dda188a9433 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -45,17 +45,17 @@ extern void _free_xid(unsigned int); #define get_xid() \ ({ \ unsigned int __xid = _get_xid(); \ - cFYI(1, "CIFS VFS: in %s as Xid: %u with uid: %d", \ - __func__, __xid, \ - from_kuid(&init_user_ns, current_fsuid())); \ + cifs_dbg(FYI, "CIFS VFS: in %s as Xid: %u with uid: %d\n", \ + __func__, __xid, \ + from_kuid(&init_user_ns, current_fsuid())); \ __xid; \ }) #define free_xid(curr_xid) \ do { \ _free_xid(curr_xid); \ - cFYI(1, "CIFS VFS: leaving %s (xid = %u) rc = %d", \ - __func__, curr_xid, (int)rc); \ + cifs_dbg(FYI, "CIFS VFS: leaving %s (xid = %u) rc = %d\n", \ + __func__, curr_xid, (int)rc); \ } while (0) extern int init_cifs_idmap(void); extern void exit_cifs_idmap(void); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 8e2e799e7a2..a58dc77cc44 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -139,8 +139,8 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) if (smb_command != SMB_COM_WRITE_ANDX && smb_command != SMB_COM_OPEN_ANDX && smb_command != SMB_COM_TREE_DISCONNECT) { - cFYI(1, "can not send cmd %d while umounting", - smb_command); + cifs_dbg(FYI, "can not send cmd %d while umounting\n", + smb_command); return -ENODEV; } } @@ -163,7 +163,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) * back on-line */ if (!tcon->retry) { - cFYI(1, "gave up waiting on reconnect in smb_init"); + cifs_dbg(FYI, "gave up waiting on reconnect in smb_init\n"); return -EHOSTDOWN; } } @@ -191,7 +191,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) cifs_mark_open_files_invalid(tcon); rc = CIFSTCon(0, ses, tcon->treeName, tcon, nls_codepage); mutex_unlock(&ses->session_mutex); - cFYI(1, "reconnect tcon rc = %d", rc); + cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc); if (rc) goto out; @@ -396,7 +396,7 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) else /* if override flags set only sign/seal OR them with global auth */ secFlags = global_secflags | ses->overrideSecFlg; - cFYI(1, "secFlags 0x%x", secFlags); + cifs_dbg(FYI, "secFlags 0x%x\n", secFlags); pSMB->hdr.Mid = get_next_mid(server); pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS); @@ -404,12 +404,12 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) { - cFYI(1, "Kerberos only mechanism, enable extended security"); + cifs_dbg(FYI, "Kerberos only mechanism, enable extended security\n"); pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; } else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP) pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_NTLMSSP) { - cFYI(1, "NTLMSSP only mechanism, enable extended security"); + cifs_dbg(FYI, "NTLMSSP only mechanism, enable extended security\n"); pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; } @@ -428,7 +428,7 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) goto neg_err_exit; server->dialect = le16_to_cpu(pSMBr->DialectIndex); - cFYI(1, "Dialect: %d", server->dialect); + cifs_dbg(FYI, "Dialect: %d\n", server->dialect); /* Check wct = 1 error case */ if ((pSMBr->hdr.WordCount < 13) || (server->dialect == BAD_PROT)) { /* core returns wct = 1, but we do not ask for core - otherwise @@ -447,8 +447,7 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) (secFlags & CIFSSEC_MAY_PLNTXT)) server->secType = LANMAN; else { - cERROR(1, "mount failed weak security disabled" - " in /proc/fs/cifs/SecurityFlags"); + cifs_dbg(VFS, "mount failed weak security disabled in /proc/fs/cifs/SecurityFlags\n"); rc = -EOPNOTSUPP; goto neg_err_exit; } @@ -482,9 +481,9 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) utc = CURRENT_TIME; ts = cnvrtDosUnixTm(rsp->SrvTime.Date, rsp->SrvTime.Time, 0); - cFYI(1, "SrvTime %d sec since 1970 (utc: %d) diff: %d", - (int)ts.tv_sec, (int)utc.tv_sec, - (int)(utc.tv_sec - ts.tv_sec)); + cifs_dbg(FYI, "SrvTime %d sec since 1970 (utc: %d) diff: %d\n", + (int)ts.tv_sec, (int)utc.tv_sec, + (int)(utc.tv_sec - ts.tv_sec)); val = (int)(utc.tv_sec - ts.tv_sec); seconds = abs(val); result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ; @@ -498,7 +497,7 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) server->timeAdj = (int)tmp; server->timeAdj *= 60; /* also in seconds */ } - cFYI(1, "server->timeAdj: %d seconds", server->timeAdj); + cifs_dbg(FYI, "server->timeAdj: %d seconds\n", server->timeAdj); /* BB get server time for time conversions and add @@ -513,14 +512,13 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) goto neg_err_exit; } - cFYI(1, "LANMAN negotiated"); + cifs_dbg(FYI, "LANMAN negotiated\n"); /* we will not end up setting signing flags - as no signing was in LANMAN and server did not return the flags on */ goto signing_check; #else /* weak security disabled */ } else if (pSMBr->hdr.WordCount == 13) { - cERROR(1, "mount failed, cifs module not built " - "with CIFS_WEAK_PW_HASH support"); + cifs_dbg(VFS, "mount failed, cifs module not built with CIFS_WEAK_PW_HASH support\n"); rc = -EOPNOTSUPP; #endif /* WEAK_PW_HASH */ goto neg_err_exit; @@ -532,14 +530,13 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) /* else wct == 17 NTLM */ server->sec_mode = pSMBr->SecurityMode; if ((server->sec_mode & SECMODE_USER) == 0) - cFYI(1, "share mode security"); + cifs_dbg(FYI, "share mode security\n"); if ((server->sec_mode & SECMODE_PW_ENCRYPT) == 0) #ifdef CONFIG_CIFS_WEAK_PW_HASH if ((secFlags & CIFSSEC_MAY_PLNTXT) == 0) #endif /* CIFS_WEAK_PW_HASH */ - cERROR(1, "Server requests plain text password" - " but client support disabled"); + cifs_dbg(VFS, "Server requests plain text password but client support disabled\n"); if ((secFlags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2) server->secType = NTLMv2; @@ -555,7 +552,7 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) server->secType = LANMAN; else { rc = -EOPNOTSUPP; - cERROR(1, "Invalid security type"); + cifs_dbg(VFS, "Invalid security type\n"); goto neg_err_exit; } /* else ... any others ...? */ @@ -568,7 +565,7 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) /* probably no need to store and check maxvcs */ server->maxBuf = le32_to_cpu(pSMBr->MaxBufferSize); server->max_rw = le32_to_cpu(pSMBr->MaxRawSize); - cFYI(DBG2, "Max buf = %d", ses->server->maxBuf); + cifs_dbg(NOISY, "Max buf = %d\n", ses->server->maxBuf); server->capabilities = le32_to_cpu(pSMBr->Capabilities); server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone); server->timeAdj *= 60; @@ -590,7 +587,7 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses) if (memcmp(server->server_GUID, pSMBr->u.extended_response. GUID, 16) != 0) { - cFYI(1, "server UID changed"); + cifs_dbg(FYI, "server UID changed\n"); memcpy(server->server_GUID, pSMBr->u.extended_response.GUID, 16); @@ -633,21 +630,19 @@ signing_check: if ((secFlags & CIFSSEC_MAY_SIGN) == 0) { /* MUST_SIGN already includes the MAY_SIGN FLAG so if this is zero it means that signing is disabled */ - cFYI(1, "Signing disabled"); + cifs_dbg(FYI, "Signing disabled\n"); if (server->sec_mode & SECMODE_SIGN_REQUIRED) { - cERROR(1, "Server requires " - "packet signing to be enabled in " - "/proc/fs/cifs/SecurityFlags."); + cifs_dbg(VFS, "Server requires packet signing to be enabled in /proc/fs/cifs/SecurityFlags\n"); rc = -EOPNOTSUPP; } server->sec_mode &= ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); } else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) { /* signing required */ - cFYI(1, "Must sign - secFlags 0x%x", secFlags); + cifs_dbg(FYI, "Must sign - secFlags 0x%x\n", secFlags); if ((server->sec_mode & (SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED)) == 0) { - cERROR(1, "signing required but server lacks support"); + cifs_dbg(VFS, "signing required but server lacks support\n"); rc = -EOPNOTSUPP; } else server->sec_mode |= SECMODE_SIGN_REQUIRED; @@ -661,7 +656,7 @@ signing_check: neg_err_exit: cifs_buf_release(pSMB); - cFYI(1, "negprot rc %d", rc); + cifs_dbg(FYI, "negprot rc %d\n", rc); return rc; } @@ -671,7 +666,7 @@ CIFSSMBTDis(const unsigned int xid, struct cifs_tcon *tcon) struct smb_hdr *smb_buffer; int rc = 0; - cFYI(1, "In tree disconnect"); + cifs_dbg(FYI, "In tree disconnect\n"); /* BB: do we need to check this? These should never be NULL. */ if ((tcon->ses == NULL) || (tcon->ses->server == NULL)) @@ -693,7 +688,7 @@ CIFSSMBTDis(const unsigned int xid, struct cifs_tcon *tcon) rc = SendReceiveNoRsp(xid, tcon->ses, (char *)smb_buffer, 0); if (rc) - cFYI(1, "Tree disconnect failed %d", rc); + cifs_dbg(FYI, "Tree disconnect failed %d\n", rc); /* No need to return error on this operation if tid invalidated and closed on server already e.g. due to tcp session crashing */ @@ -728,7 +723,7 @@ CIFSSMBEcho(struct TCP_Server_Info *server) struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 }; - cFYI(1, "In echo request"); + cifs_dbg(FYI, "In echo request\n"); rc = small_smb_init(SMB_COM_ECHO, 0, NULL, (void **)&smb); if (rc) @@ -747,7 +742,7 @@ CIFSSMBEcho(struct TCP_Server_Info *server) rc = cifs_call_async(server, &rqst, NULL, cifs_echo_callback, server, CIFS_ASYNC_OP | CIFS_ECHO_OP); if (rc) - cFYI(1, "Echo request failed: %d", rc); + cifs_dbg(FYI, "Echo request failed: %d\n", rc); cifs_small_buf_release(smb); @@ -760,7 +755,7 @@ CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses) LOGOFF_ANDX_REQ *pSMB; int rc = 0; - cFYI(1, "In SMBLogoff for session disconnect"); + cifs_dbg(FYI, "In SMBLogoff for session disconnect\n"); /* * BB: do we need to check validity of ses and server? They should @@ -814,7 +809,7 @@ CIFSPOSIXDelFile(const unsigned int xid, struct cifs_tcon *tcon, int bytes_returned = 0; __u16 params, param_offset, offset, byte_count; - cFYI(1, "In POSIX delete"); + cifs_dbg(FYI, "In POSIX delete\n"); PsxDelete: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -866,7 +861,7 @@ PsxDelete: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) - cFYI(1, "Posix delete returned %d", rc); + cifs_dbg(FYI, "Posix delete returned %d\n", rc); cifs_buf_release(pSMB); cifs_stats_inc(&tcon->stats.cifs_stats.num_deletes); @@ -914,7 +909,7 @@ DelFileRetry: (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_deletes); if (rc) - cFYI(1, "Error in RMFile = %d", rc); + cifs_dbg(FYI, "Error in RMFile = %d\n", rc); cifs_buf_release(pSMB); if (rc == -EAGAIN) @@ -934,7 +929,7 @@ CIFSSMBRmDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, int name_len; int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; - cFYI(1, "In CIFSSMBRmDir"); + cifs_dbg(FYI, "In CIFSSMBRmDir\n"); RmDirRetry: rc = smb_init(SMB_COM_DELETE_DIRECTORY, 0, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -960,7 +955,7 @@ RmDirRetry: (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_rmdirs); if (rc) - cFYI(1, "Error in RMDir = %d", rc); + cifs_dbg(FYI, "Error in RMDir = %d\n", rc); cifs_buf_release(pSMB); if (rc == -EAGAIN) @@ -979,7 +974,7 @@ CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, int name_len; int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; - cFYI(1, "In CIFSSMBMkDir"); + cifs_dbg(FYI, "In CIFSSMBMkDir\n"); MkDirRetry: rc = smb_init(SMB_COM_CREATE_DIRECTORY, 0, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -1005,7 +1000,7 @@ MkDirRetry: (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_mkdirs); if (rc) - cFYI(1, "Error in Mkdir = %d", rc); + cifs_dbg(FYI, "Error in Mkdir = %d\n", rc); cifs_buf_release(pSMB); if (rc == -EAGAIN) @@ -1029,7 +1024,7 @@ CIFSPOSIXCreate(const unsigned int xid, struct cifs_tcon *tcon, OPEN_PSX_REQ *pdata; OPEN_PSX_RSP *psx_rsp; - cFYI(1, "In POSIX Create"); + cifs_dbg(FYI, "In POSIX Create\n"); PsxCreat: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -1083,11 +1078,11 @@ PsxCreat: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cFYI(1, "Posix create returned %d", rc); + cifs_dbg(FYI, "Posix create returned %d\n", rc); goto psx_create_err; } - cFYI(1, "copying inode info"); + cifs_dbg(FYI, "copying inode info\n"); rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc || get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)) { @@ -1109,11 +1104,11 @@ PsxCreat: /* check to make sure response data is there */ if (psx_rsp->ReturnedLevel != cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC)) { pRetData->Type = cpu_to_le32(-1); /* unknown */ - cFYI(DBG2, "unknown type"); + cifs_dbg(NOISY, "unknown type\n"); } else { if (get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP) + sizeof(FILE_UNIX_BASIC_INFO)) { - cERROR(1, "Open response data too small"); + cifs_dbg(VFS, "Open response data too small\n"); pRetData->Type = cpu_to_le32(-1); goto psx_create_err; } @@ -1160,7 +1155,7 @@ static __u16 convert_disposition(int disposition) ofun = SMBOPEN_OCREATE | SMBOPEN_OTRUNC; break; default: - cFYI(1, "unknown disposition %d", disposition); + cifs_dbg(FYI, "unknown disposition %d\n", disposition); ofun = SMBOPEN_OAPPEND; /* regular open */ } return ofun; @@ -1251,7 +1246,7 @@ OldOpenRetry: (struct smb_hdr *)pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_opens); if (rc) { - cFYI(1, "Error in Open = %d", rc); + cifs_dbg(FYI, "Error in Open = %d\n", rc); } else { /* BB verify if wct == 15 */ @@ -1364,7 +1359,7 @@ openRetry: (struct smb_hdr *)pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_opens); if (rc) { - cFYI(1, "Error in Open = %d", rc); + cifs_dbg(FYI, "Error in Open = %d\n", rc); } else { *pOplock = pSMBr->OplockLevel; /* 1 byte no need to le_to_cpu */ *netfid = pSMBr->Fid; /* cifs fid stays in le */ @@ -1425,8 +1420,8 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) char *buf = server->smallbuf; unsigned int buflen = get_rfc1002_length(buf) + 4; - cFYI(1, "%s: mid=%llu offset=%llu bytes=%u", __func__, - mid->mid, rdata->offset, rdata->bytes); + cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%u\n", + __func__, mid->mid, rdata->offset, rdata->bytes); /* * read the rest of READ_RSP header (sans Data array), or whatever we @@ -1447,16 +1442,16 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) /* Was the SMB read successful? */ rdata->result = server->ops->map_error(buf, false); if (rdata->result != 0) { - cFYI(1, "%s: server returned error %d", __func__, - rdata->result); + cifs_dbg(FYI, "%s: server returned error %d\n", + __func__, rdata->result); return cifs_readv_discard(server, mid); } /* Is there enough to get to the rest of the READ_RSP header? */ if (server->total_read < server->vals->read_rsp_size) { - cFYI(1, "%s: server returned short header. got=%u expected=%zu", - __func__, server->total_read, - server->vals->read_rsp_size); + cifs_dbg(FYI, "%s: server returned short header. got=%u expected=%zu\n", + __func__, server->total_read, + server->vals->read_rsp_size); rdata->result = -EIO; return cifs_readv_discard(server, mid); } @@ -1468,19 +1463,19 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) * is beyond the EOF. Treat it as if the data starts just after * the header. */ - cFYI(1, "%s: data offset (%u) inside read response header", - __func__, data_offset); + cifs_dbg(FYI, "%s: data offset (%u) inside read response header\n", + __func__, data_offset); data_offset = server->total_read; } else if (data_offset > MAX_CIFS_SMALL_BUFFER_SIZE) { /* data_offset is beyond the end of smallbuf */ - cFYI(1, "%s: data offset (%u) beyond end of smallbuf", - __func__, data_offset); + cifs_dbg(FYI, "%s: data offset (%u) beyond end of smallbuf\n", + __func__, data_offset); rdata->result = -EIO; return cifs_readv_discard(server, mid); } - cFYI(1, "%s: total_read=%u data_offset=%u", __func__, - server->total_read, data_offset); + cifs_dbg(FYI, "%s: total_read=%u data_offset=%u\n", + __func__, server->total_read, data_offset); len = data_offset - server->total_read; if (len > 0) { @@ -1496,8 +1491,8 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) /* set up first iov for signature check */ rdata->iov.iov_base = buf; rdata->iov.iov_len = server->total_read; - cFYI(1, "0: iov_base=%p iov_len=%zu", - rdata->iov.iov_base, rdata->iov.iov_len); + cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n", + rdata->iov.iov_base, rdata->iov.iov_len); /* how much data is in the response? */ data_len = server->ops->read_data_length(buf); @@ -1514,8 +1509,8 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) server->total_read += length; rdata->bytes = length; - cFYI(1, "total_read=%u buflen=%u remaining=%u", server->total_read, - buflen, data_len); + cifs_dbg(FYI, "total_read=%u buflen=%u remaining=%u\n", + server->total_read, buflen, data_len); /* discard anything left over */ if (server->total_read < buflen) @@ -1538,8 +1533,9 @@ cifs_readv_callback(struct mid_q_entry *mid) .rq_pagesz = rdata->pagesz, .rq_tailsz = rdata->tailsz }; - cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__, - mid->mid, mid->mid_state, rdata->result, rdata->bytes); + cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%u\n", + __func__, mid->mid, mid->mid_state, rdata->result, + rdata->bytes); switch (mid->mid_state) { case MID_RESPONSE_RECEIVED: @@ -1549,10 +1545,10 @@ cifs_readv_callback(struct mid_q_entry *mid) int rc = 0; rc = cifs_verify_signature(&rqst, server, - mid->sequence_number + 1); + mid->sequence_number); if (rc) - cERROR(1, "SMB signature verification returned " - "error = %d", rc); + cifs_dbg(VFS, "SMB signature verification returned error = %d\n", + rc); } /* FIXME: should this be counted toward the initiating task? */ task_io_account_read(rdata->bytes); @@ -1582,8 +1578,8 @@ cifs_async_readv(struct cifs_readdata *rdata) struct smb_rqst rqst = { .rq_iov = &rdata->iov, .rq_nvec = 1 }; - cFYI(1, "%s: offset=%llu bytes=%u", __func__, - rdata->offset, rdata->bytes); + cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n", + __func__, rdata->offset, rdata->bytes); if (tcon->ses->capabilities & CAP_LARGE_FILES) wct = 12; @@ -1653,7 +1649,7 @@ CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms, struct cifs_tcon *tcon = io_parms->tcon; unsigned int count = io_parms->length; - cFYI(1, "Reading %d bytes on fid %d", count, netfid); + cifs_dbg(FYI, "Reading %d bytes on fid %d\n", count, netfid); if (tcon->ses->capabilities & CAP_LARGE_FILES) wct = 12; else { @@ -1701,7 +1697,7 @@ CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms, cifs_stats_inc(&tcon->stats.cifs_stats.num_reads); pSMBr = (READ_RSP *)iov[0].iov_base; if (rc) { - cERROR(1, "Send error in read = %d", rc); + cifs_dbg(VFS, "Send error in read = %d\n", rc); } else { int data_length = le16_to_cpu(pSMBr->DataLengthHigh); data_length = data_length << 16; @@ -1711,7 +1707,7 @@ CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms, /*check that DataLength would not go beyond end of SMB */ if ((data_length > CIFSMaxBufSize) || (data_length > count)) { - cFYI(1, "bad length %d for count %d", + cifs_dbg(FYI, "bad length %d for count %d\n", data_length, count); rc = -EIO; *nbytes = 0; @@ -1719,7 +1715,7 @@ CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms, pReadData = (char *) (&pSMBr->hdr.Protocol) + le16_to_cpu(pSMBr->DataOffset); /* if (rc = copy_to_user(buf, pReadData, data_length)) { - cERROR(1, "Faulting on read rc = %d",rc); + cifs_dbg(VFS, "Faulting on read rc = %d\n",rc); rc = -EFAULT; }*/ /* can not use copy_to_user when using page cache*/ if (*buf) @@ -1767,7 +1763,7 @@ CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms, *nbytes = 0; - /* cFYI(1, "write at %lld %d bytes", offset, count);*/ + /* cifs_dbg(FYI, "write at %lld %d bytes\n", offset, count);*/ if (tcon->ses == NULL) return -ECONNABORTED; @@ -1852,7 +1848,7 @@ CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms, (struct smb_hdr *) pSMBr, &bytes_returned, long_op); cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); if (rc) { - cFYI(1, "Send error in write = %d", rc); + cifs_dbg(FYI, "Send error in write = %d\n", rc); } else { *nbytes = le16_to_cpu(pSMBr->CountHigh); *nbytes = (*nbytes) << 16; @@ -1959,7 +1955,7 @@ cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete) /* this would overflow */ if (nr_pages == 0) { - cERROR(1, "%s: called with nr_pages == 0!", __func__); + cifs_dbg(VFS, "%s: called with nr_pages == 0!\n", __func__); return NULL; } @@ -2075,7 +2071,8 @@ cifs_async_writev(struct cifs_writedata *wdata) rqst.rq_pagesz = wdata->pagesz; rqst.rq_tailsz = wdata->tailsz; - cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); + cifs_dbg(FYI, "async write at %llu %u bytes\n", + wdata->offset, wdata->bytes); smb->DataLengthLow = cpu_to_le16(wdata->bytes & 0xFFFF); smb->DataLengthHigh = cpu_to_le16(wdata->bytes >> 16); @@ -2123,7 +2120,7 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, *nbytes = 0; - cFYI(1, "write2 at %lld %d bytes", (long long)offset, count); + cifs_dbg(FYI, "write2 at %lld %d bytes\n", (long long)offset, count); if (tcon->ses->capabilities & CAP_LARGE_FILES) { wct = 14; @@ -2182,7 +2179,7 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); if (rc) { - cFYI(1, "Send error Write2 = %d", rc); + cifs_dbg(FYI, "Send error Write2 = %d\n", rc); } else if (resp_buf_type == 0) { /* presumably this can not happen, but best to be safe */ rc = -EIO; @@ -2223,7 +2220,8 @@ int cifs_lockv(const unsigned int xid, struct cifs_tcon *tcon, int resp_buf_type; __u16 count; - cFYI(1, "cifs_lockv num lock %d num unlock %d", num_lock, num_unlock); + cifs_dbg(FYI, "cifs_lockv num lock %d num unlock %d\n", + num_lock, num_unlock); rc = small_smb_init(SMB_COM_LOCKING_ANDX, 8, tcon, (void **) &pSMB); if (rc) @@ -2249,7 +2247,7 @@ int cifs_lockv(const unsigned int xid, struct cifs_tcon *tcon, cifs_stats_inc(&tcon->stats.cifs_stats.num_locks); rc = SendReceive2(xid, tcon->ses, iov, 2, &resp_buf_type, CIFS_NO_RESP); if (rc) - cFYI(1, "Send error in cifs_lockv = %d", rc); + cifs_dbg(FYI, "Send error in cifs_lockv = %d\n", rc); return rc; } @@ -2268,7 +2266,8 @@ CIFSSMBLock(const unsigned int xid, struct cifs_tcon *tcon, int flags = 0; __u16 count; - cFYI(1, "CIFSSMBLock timeout %d numLock %d", (int)waitFlag, numLock); + cifs_dbg(FYI, "CIFSSMBLock timeout %d numLock %d\n", + (int)waitFlag, numLock); rc = small_smb_init(SMB_COM_LOCKING_ANDX, 8, tcon, (void **) &pSMB); if (rc) @@ -2317,7 +2316,7 @@ CIFSSMBLock(const unsigned int xid, struct cifs_tcon *tcon, } cifs_stats_inc(&tcon->stats.cifs_stats.num_locks); if (rc) - cFYI(1, "Send error in Lock = %d", rc); + cifs_dbg(FYI, "Send error in Lock = %d\n", rc); /* Note: On -EAGAIN error only caller can retry on handle based calls since file handle passed in no longer valid */ @@ -2341,7 +2340,7 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon, __u16 params, param_offset, offset, byte_count, count; struct kvec iov[1]; - cFYI(1, "Posix Lock"); + cifs_dbg(FYI, "Posix Lock\n"); rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); @@ -2408,7 +2407,7 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon, } if (rc) { - cFYI(1, "Send error in Posix Lock = %d", rc); + cifs_dbg(FYI, "Send error in Posix Lock = %d\n", rc); } else if (pLockData) { /* lock structure can be returned on get */ __u16 data_offset; @@ -2465,7 +2464,7 @@ CIFSSMBClose(const unsigned int xid, struct cifs_tcon *tcon, int smb_file_id) { int rc = 0; CLOSE_REQ *pSMB = NULL; - cFYI(1, "In CIFSSMBClose"); + cifs_dbg(FYI, "In CIFSSMBClose\n"); /* do not retry on dead session on close */ rc = small_smb_init(SMB_COM_CLOSE, 3, tcon, (void **) &pSMB); @@ -2482,7 +2481,7 @@ CIFSSMBClose(const unsigned int xid, struct cifs_tcon *tcon, int smb_file_id) if (rc) { if (rc != -EINTR) { /* EINTR is expected when user ctl-c to kill app */ - cERROR(1, "Send error in Close = %d", rc); + cifs_dbg(VFS, "Send error in Close = %d\n", rc); } } @@ -2498,7 +2497,7 @@ CIFSSMBFlush(const unsigned int xid, struct cifs_tcon *tcon, int smb_file_id) { int rc = 0; FLUSH_REQ *pSMB = NULL; - cFYI(1, "In CIFSSMBFlush"); + cifs_dbg(FYI, "In CIFSSMBFlush\n"); rc = small_smb_init(SMB_COM_FLUSH, 1, tcon, (void **) &pSMB); if (rc) @@ -2509,7 +2508,7 @@ CIFSSMBFlush(const unsigned int xid, struct cifs_tcon *tcon, int smb_file_id) rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_flushes); if (rc) - cERROR(1, "Send error in Flush = %d", rc); + cifs_dbg(VFS, "Send error in Flush = %d\n", rc); return rc; } @@ -2527,7 +2526,7 @@ CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, __u16 count; int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; - cFYI(1, "In CIFSSMBRename"); + cifs_dbg(FYI, "In CIFSSMBRename\n"); renameRetry: rc = smb_init(SMB_COM_RENAME, 1, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -2574,7 +2573,7 @@ renameRetry: (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_renames); if (rc) - cFYI(1, "Send error in rename = %d", rc); + cifs_dbg(FYI, "Send error in rename = %d\n", rc); cifs_buf_release(pSMB); @@ -2598,7 +2597,7 @@ int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *pTcon, int len_of_str; __u16 params, param_offset, offset, count, byte_count; - cFYI(1, "Rename to File by handle"); + cifs_dbg(FYI, "Rename to File by handle\n"); rc = smb_init(SMB_COM_TRANSACTION2, 15, pTcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -2655,7 +2654,8 @@ int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *pTcon, (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&pTcon->stats.cifs_stats.num_t2renames); if (rc) - cFYI(1, "Send error in Rename (by file handle) = %d", rc); + cifs_dbg(FYI, "Send error in Rename (by file handle) = %d\n", + rc); cifs_buf_release(pSMB); @@ -2677,7 +2677,7 @@ CIFSSMBCopy(const unsigned int xid, struct cifs_tcon *tcon, int name_len, name_len2; __u16 count; - cFYI(1, "In CIFSSMBCopy"); + cifs_dbg(FYI, "In CIFSSMBCopy\n"); copyRetry: rc = smb_init(SMB_COM_COPY, 1, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -2722,8 +2722,8 @@ copyRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cFYI(1, "Send error in copy = %d with %d files copied", - rc, le16_to_cpu(pSMBr->CopyCount)); + cifs_dbg(FYI, "Send error in copy = %d with %d files copied\n", + rc, le16_to_cpu(pSMBr->CopyCount)); } cifs_buf_release(pSMB); @@ -2747,7 +2747,7 @@ CIFSUnixCreateSymLink(const unsigned int xid, struct cifs_tcon *tcon, int bytes_returned = 0; __u16 params, param_offset, offset, byte_count; - cFYI(1, "In Symlink Unix style"); + cifs_dbg(FYI, "In Symlink Unix style\n"); createSymLinkRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -2812,7 +2812,8 @@ createSymLinkRetry: (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_symlinks); if (rc) - cFYI(1, "Send error in SetPathInfo create symlink = %d", rc); + cifs_dbg(FYI, "Send error in SetPathInfo create symlink = %d\n", + rc); cifs_buf_release(pSMB); @@ -2836,7 +2837,7 @@ CIFSUnixCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, int bytes_returned = 0; __u16 params, param_offset, offset, byte_count; - cFYI(1, "In Create Hard link Unix style"); + cifs_dbg(FYI, "In Create Hard link Unix style\n"); createHardLinkRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -2898,7 +2899,8 @@ createHardLinkRetry: (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_hardlinks); if (rc) - cFYI(1, "Send error in SetPathInfo (hard link) = %d", rc); + cifs_dbg(FYI, "Send error in SetPathInfo (hard link) = %d\n", + rc); cifs_buf_release(pSMB); if (rc == -EAGAIN) @@ -2920,7 +2922,7 @@ CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, __u16 count; int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR; - cFYI(1, "In CIFSCreateHardLink"); + cifs_dbg(FYI, "In CIFSCreateHardLink\n"); winCreateHardLinkRetry: rc = smb_init(SMB_COM_NT_RENAME, 4, tcon, (void **) &pSMB, @@ -2972,7 +2974,7 @@ winCreateHardLinkRetry: (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_hardlinks); if (rc) - cFYI(1, "Send error in hard link (NT rename) = %d", rc); + cifs_dbg(FYI, "Send error in hard link (NT rename) = %d\n", rc); cifs_buf_release(pSMB); if (rc == -EAGAIN) @@ -2995,7 +2997,7 @@ CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, __u16 params, byte_count; char *data_start; - cFYI(1, "In QPathSymLinkInfo (Unix) for path %s", searchName); + cifs_dbg(FYI, "In QPathSymLinkInfo (Unix) for path %s\n", searchName); querySymLinkRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, @@ -3042,7 +3044,7 @@ querySymLinkRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cFYI(1, "Send error in QuerySymLinkInfo = %d", rc); + cifs_dbg(FYI, "Send error in QuerySymLinkInfo = %d\n", rc); } else { /* decode response */ @@ -3097,7 +3099,8 @@ CIFSSMBQueryReparseLinkInfo(const unsigned int xid, struct cifs_tcon *tcon, struct smb_com_transaction_ioctl_req *pSMB; struct smb_com_transaction_ioctl_rsp *pSMBr; - cFYI(1, "In Windows reparse style QueryLink for path %s", searchName); + cifs_dbg(FYI, "In Windows reparse style QueryLink for path %s\n", + searchName); rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -3125,7 +3128,7 @@ CIFSSMBQueryReparseLinkInfo(const unsigned int xid, struct cifs_tcon *tcon, rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cFYI(1, "Send error in QueryReparseLinkInfo = %d", rc); + cifs_dbg(FYI, "Send error in QueryReparseLinkInfo = %d\n", rc); } else { /* decode response */ __u32 data_offset = le32_to_cpu(pSMBr->DataOffset); __u32 data_count = le32_to_cpu(pSMBr->DataCount); @@ -3149,7 +3152,7 @@ CIFSSMBQueryReparseLinkInfo(const unsigned int xid, struct cifs_tcon *tcon, if ((reparse_buf->LinkNamesBuf + reparse_buf->TargetNameOffset + reparse_buf->TargetNameLen) > end_of_smb) { - cFYI(1, "reparse buf beyond SMB"); + cifs_dbg(FYI, "reparse buf beyond SMB\n"); rc = -EIO; goto qreparse_out; } @@ -3170,12 +3173,11 @@ CIFSSMBQueryReparseLinkInfo(const unsigned int xid, struct cifs_tcon *tcon, } } else { rc = -EIO; - cFYI(1, "Invalid return data count on " - "get reparse info ioctl"); + cifs_dbg(FYI, "Invalid return data count on get reparse info ioctl\n"); } symlinkinfo[buflen] = 0; /* just in case so the caller does not go off the end of the buffer */ - cFYI(1, "readlink result - %s", symlinkinfo); + cifs_dbg(FYI, "readlink result - %s\n", symlinkinfo); } qreparse_out: @@ -3198,7 +3200,10 @@ static void cifs_convert_ace(posix_acl_xattr_entry *ace, ace->e_perm = cpu_to_le16(cifs_ace->cifs_e_perm); ace->e_tag = cpu_to_le16(cifs_ace->cifs_e_tag); ace->e_id = cpu_to_le32(le64_to_cpu(cifs_ace->cifs_uid)); - /* cFYI(1, "perm %d tag %d id %d",ace->e_perm,ace->e_tag,ace->e_id); */ +/* + cifs_dbg(FYI, "perm %d tag %d id %d\n", + ace->e_perm, ace->e_tag, ace->e_id); +*/ return; } @@ -3224,8 +3229,8 @@ static int cifs_copy_posix_acl(char *trgt, char *src, const int buflen, size += sizeof(struct cifs_posix_ace) * count; /* check if we would go beyond end of SMB */ if (size_of_data_area < size) { - cFYI(1, "bad CIFS POSIX ACL size %d vs. %d", - size_of_data_area, size); + cifs_dbg(FYI, "bad CIFS POSIX ACL size %d vs. %d\n", + size_of_data_area, size); return -EINVAL; } } else if (acl_type & ACL_TYPE_DEFAULT) { @@ -3272,7 +3277,10 @@ static __u16 convert_ace_to_cifs_ace(struct cifs_posix_ace *cifs_ace, cifs_ace->cifs_uid = cpu_to_le64(-1); } else cifs_ace->cifs_uid = cpu_to_le64(le32_to_cpu(local_ace->e_id)); - /*cFYI(1, "perm %d tag %d id %d",ace->e_perm,ace->e_tag,ace->e_id);*/ +/* + cifs_dbg(FYI, "perm %d tag %d id %d\n", + ace->e_perm, ace->e_tag, ace->e_id); +*/ return rc; } @@ -3290,12 +3298,11 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL, return 0; count = posix_acl_xattr_count((size_t)buflen); - cFYI(1, "setting acl with %d entries from buf of length %d and " - "version of %d", - count, buflen, le32_to_cpu(local_acl->a_version)); + cifs_dbg(FYI, "setting acl with %d entries from buf of length %d and version of %d\n", + count, buflen, le32_to_cpu(local_acl->a_version)); if (le32_to_cpu(local_acl->a_version) != 2) { - cFYI(1, "unknown POSIX ACL version %d", - le32_to_cpu(local_acl->a_version)); + cifs_dbg(FYI, "unknown POSIX ACL version %d\n", + le32_to_cpu(local_acl->a_version)); return 0; } cifs_acl->version = cpu_to_le16(1); @@ -3304,7 +3311,7 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL, else if (acl_type == ACL_TYPE_DEFAULT) cifs_acl->default_entry_count = cpu_to_le16(count); else { - cFYI(1, "unknown ACL type %d", acl_type); + cifs_dbg(FYI, "unknown ACL type %d\n", acl_type); return 0; } for (i = 0; i < count; i++) { @@ -3337,7 +3344,7 @@ CIFSSMBGetPosixACL(const unsigned int xid, struct cifs_tcon *tcon, int name_len; __u16 params, byte_count; - cFYI(1, "In GetPosixACL (Unix) for path %s", searchName); + cifs_dbg(FYI, "In GetPosixACL (Unix) for path %s\n", searchName); queryAclRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, @@ -3390,7 +3397,7 @@ queryAclRetry: (struct smb_hdr *) pSMBr, &bytes_returned, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_acl_get); if (rc) { - cFYI(1, "Send error in Query POSIX ACL = %d", rc); + cifs_dbg(FYI, "Send error in Query POSIX ACL = %d\n", rc); } else { /* decode response */ @@ -3427,7 +3434,7 @@ CIFSSMBSetPosixACL(const unsigned int xid, struct cifs_tcon *tcon, int bytes_returned = 0; __u16 params, byte_count, data_count, param_offset, offset; - cFYI(1, "In SetPosixACL (Unix) for path %s", fileName); + cifs_dbg(FYI, "In SetPosixACL (Unix) for path %s\n", fileName); setAclRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -3482,7 +3489,7 @@ setAclRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) - cFYI(1, "Set POSIX ACL returned %d", rc); + cifs_dbg(FYI, "Set POSIX ACL returned %d\n", rc); setACLerrorExit: cifs_buf_release(pSMB); @@ -3502,7 +3509,7 @@ CIFSGetExtAttr(const unsigned int xid, struct cifs_tcon *tcon, int bytes_returned; __u16 params, byte_count; - cFYI(1, "In GetExtAttr"); + cifs_dbg(FYI, "In GetExtAttr\n"); if (tcon == NULL) return -ENODEV; @@ -3541,7 +3548,7 @@ GetExtAttrRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cFYI(1, "error %d in GetExtAttr", rc); + cifs_dbg(FYI, "error %d in GetExtAttr\n", rc); } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); @@ -3556,7 +3563,7 @@ GetExtAttrRetry: struct file_chattr_info *pfinfo; /* BB Do we need a cast or hash here ? */ if (count != 16) { - cFYI(1, "Illegal size ret in GetExtAttr"); + cifs_dbg(FYI, "Illegal size ret in GetExtAttr\n"); rc = -EIO; goto GetExtAttrOut; } @@ -3644,21 +3651,21 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata, /* should we also check that parm and data areas do not overlap? */ if (*ppparm > end_of_smb) { - cFYI(1, "parms start after end of smb"); + cifs_dbg(FYI, "parms start after end of smb\n"); return -EINVAL; } else if (parm_count + *ppparm > end_of_smb) { - cFYI(1, "parm end after end of smb"); + cifs_dbg(FYI, "parm end after end of smb\n"); return -EINVAL; } else if (*ppdata > end_of_smb) { - cFYI(1, "data starts after end of smb"); + cifs_dbg(FYI, "data starts after end of smb\n"); return -EINVAL; } else if (data_count + *ppdata > end_of_smb) { - cFYI(1, "data %p + count %d (%p) past smb end %p start %p", - *ppdata, data_count, (data_count + *ppdata), - end_of_smb, pSMBr); + cifs_dbg(FYI, "data %p + count %d (%p) past smb end %p start %p\n", + *ppdata, data_count, (data_count + *ppdata), + end_of_smb, pSMBr); return -EINVAL; } else if (parm_count + data_count > bcc) { - cFYI(1, "parm count and data count larger than SMB"); + cifs_dbg(FYI, "parm count and data count larger than SMB\n"); return -EINVAL; } *pdatalen = data_count; @@ -3676,7 +3683,7 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, QUERY_SEC_DESC_REQ *pSMB; struct kvec iov[1]; - cFYI(1, "GetCifsACL"); + cifs_dbg(FYI, "GetCifsACL\n"); *pbuflen = 0; *acl_inf = NULL; @@ -3701,7 +3708,7 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, 0); cifs_stats_inc(&tcon->stats.cifs_stats.num_acl_get); if (rc) { - cFYI(1, "Send error in QuerySecDesc = %d", rc); + cifs_dbg(FYI, "Send error in QuerySecDesc = %d\n", rc); } else { /* decode response */ __le32 *parm; __u32 parm_len; @@ -3716,7 +3723,8 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, goto qsec_out; pSMBr = (struct smb_com_ntransact_rsp *)iov[0].iov_base; - cFYI(1, "smb %p parm %p data %p", pSMBr, parm, *acl_inf); + cifs_dbg(FYI, "smb %p parm %p data %p\n", + pSMBr, parm, *acl_inf); if (le32_to_cpu(pSMBr->ParameterCount) != 4) { rc = -EIO; /* bad smb */ @@ -3728,8 +3736,8 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, acl_len = le32_to_cpu(*parm); if (acl_len != *pbuflen) { - cERROR(1, "acl length %d does not match %d", - acl_len, *pbuflen); + cifs_dbg(VFS, "acl length %d does not match %d\n", + acl_len, *pbuflen); if (*pbuflen > acl_len) *pbuflen = acl_len; } @@ -3738,16 +3746,15 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, header followed by the smallest SID */ if ((*pbuflen < sizeof(struct cifs_ntsd) + 8) || (*pbuflen >= 64 * 1024)) { - cERROR(1, "bad acl length %d", *pbuflen); + cifs_dbg(VFS, "bad acl length %d\n", *pbuflen); rc = -EINVAL; *pbuflen = 0; } else { - *acl_inf = kmalloc(*pbuflen, GFP_KERNEL); + *acl_inf = kmemdup(pdata, *pbuflen, GFP_KERNEL); if (*acl_inf == NULL) { *pbuflen = 0; rc = -ENOMEM; } - memcpy(*acl_inf, pdata, *pbuflen); } } qsec_out: @@ -3809,9 +3816,10 @@ setCifsAclRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); - cFYI(1, "SetCIFSACL bytes_returned: %d, rc: %d", bytes_returned, rc); + cifs_dbg(FYI, "SetCIFSACL bytes_returned: %d, rc: %d\n", + bytes_returned, rc); if (rc) - cFYI(1, "Set CIFS ACL returned %d", rc); + cifs_dbg(FYI, "Set CIFS ACL returned %d\n", rc); cifs_buf_release(pSMB); if (rc == -EAGAIN) @@ -3835,7 +3843,7 @@ SMBQueryInformation(const unsigned int xid, struct cifs_tcon *tcon, int bytes_returned; int name_len; - cFYI(1, "In SMBQPath path %s", search_name); + cifs_dbg(FYI, "In SMBQPath path %s\n", search_name); QInfRetry: rc = smb_init(SMB_COM_QUERY_INFORMATION, 0, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -3862,7 +3870,7 @@ QInfRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cFYI(1, "Send error in QueryInfo = %d", rc); + cifs_dbg(FYI, "Send error in QueryInfo = %d\n", rc); } else if (data) { struct timespec ts; __u32 time = le32_to_cpu(pSMBr->last_write_time); @@ -3936,7 +3944,7 @@ QFileInfoRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cFYI(1, "Send error in QPathInfo = %d", rc); + cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc); } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); @@ -3973,7 +3981,7 @@ CIFSSMBQPathInfo(const unsigned int xid, struct cifs_tcon *tcon, int name_len; __u16 params, byte_count; - /* cFYI(1, "In QPathInfo path %s", search_name); */ + /* cifs_dbg(FYI, "In QPathInfo path %s\n", search_name); */ QPathInfoRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -4023,7 +4031,7 @@ QPathInfoRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cFYI(1, "Send error in QPathInfo = %d", rc); + cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc); } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); @@ -4104,14 +4112,12 @@ UnixQFileInfoRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cFYI(1, "Send error in QPathInfo = %d", rc); + cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc); } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) { - cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response. " - "Unix Extensions can be disabled on mount " - "by specifying the nosfu mount option."); + cifs_dbg(VFS, "Malformed FILE_UNIX_BASIC_INFO response. Unix Extensions can be disabled on mount by specifying the nosfu mount option.\n"); rc = -EIO; /* bad smb */ } else { __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); @@ -4143,7 +4149,7 @@ CIFSSMBUnixQPathInfo(const unsigned int xid, struct cifs_tcon *tcon, int name_len; __u16 params, byte_count; - cFYI(1, "In QPathInfo (Unix) the path %s", searchName); + cifs_dbg(FYI, "In QPathInfo (Unix) the path %s\n", searchName); UnixQPathInfoRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -4190,14 +4196,12 @@ UnixQPathInfoRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cFYI(1, "Send error in QPathInfo = %d", rc); + cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc); } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) { - cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response. " - "Unix Extensions can be disabled on mount " - "by specifying the nosfu mount option."); + cifs_dbg(VFS, "Malformed FILE_UNIX_BASIC_INFO response. Unix Extensions can be disabled on mount by specifying the nosfu mount option.\n"); rc = -EIO; /* bad smb */ } else { __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); @@ -4231,7 +4235,7 @@ CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon, __u16 params, byte_count; struct nls_table *nls_codepage; - cFYI(1, "In FindFirst for %s", searchName); + cifs_dbg(FYI, "In FindFirst for %s\n", searchName); findFirstRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, @@ -4314,7 +4318,7 @@ findFirstRetry: if (rc) {/* BB add logic to retry regular search if Unix search rejected unexpectedly by server */ /* BB Add code to handle unsupported level rc */ - cFYI(1, "Error in FindFirst = %d", rc); + cifs_dbg(FYI, "Error in FindFirst = %d\n", rc); cifs_buf_release(pSMB); @@ -4352,7 +4356,7 @@ findFirstRetry: psrch_inf->entries_in_buffer; lnoff = le16_to_cpu(parms->LastNameOffset); if (CIFSMaxBufSize < lnoff) { - cERROR(1, "ignoring corrupt resume name"); + cifs_dbg(VFS, "ignoring corrupt resume name\n"); psrch_inf->last_entry = NULL; return rc; } @@ -4383,7 +4387,7 @@ int CIFSFindNext(const unsigned int xid, struct cifs_tcon *tcon, unsigned int name_len; __u16 params, byte_count; - cFYI(1, "In FindNext"); + cifs_dbg(FYI, "In FindNext\n"); if (psrch_inf->endOfSearch) return -ENOENT; @@ -4444,7 +4448,7 @@ int CIFSFindNext(const unsigned int xid, struct cifs_tcon *tcon, cifs_buf_release(pSMB); rc = 0; /* search probably was closed at end of search*/ } else - cFYI(1, "FindNext returned = %d", rc); + cifs_dbg(FYI, "FindNext returned = %d\n", rc); } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); @@ -4479,15 +4483,15 @@ int CIFSFindNext(const unsigned int xid, struct cifs_tcon *tcon, psrch_inf->entries_in_buffer; lnoff = le16_to_cpu(parms->LastNameOffset); if (CIFSMaxBufSize < lnoff) { - cERROR(1, "ignoring corrupt resume name"); + cifs_dbg(VFS, "ignoring corrupt resume name\n"); psrch_inf->last_entry = NULL; return rc; } else psrch_inf->last_entry = psrch_inf->srch_entries_start + lnoff; -/* cFYI(1, "fnxt2 entries in buf %d index_of_last %d", - psrch_inf->entries_in_buffer, psrch_inf->index_of_last_entry); */ +/* cifs_dbg(FYI, "fnxt2 entries in buf %d index_of_last %d\n", + psrch_inf->entries_in_buffer, psrch_inf->index_of_last_entry); */ /* BB fixme add unlock here */ } @@ -4512,7 +4516,7 @@ CIFSFindClose(const unsigned int xid, struct cifs_tcon *tcon, int rc = 0; FINDCLOSE_REQ *pSMB = NULL; - cFYI(1, "In CIFSSMBFindClose"); + cifs_dbg(FYI, "In CIFSSMBFindClose\n"); rc = small_smb_init(SMB_COM_FIND_CLOSE2, 1, tcon, (void **)&pSMB); /* no sense returning error if session restarted @@ -4526,7 +4530,7 @@ CIFSFindClose(const unsigned int xid, struct cifs_tcon *tcon, pSMB->ByteCount = 0; rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0); if (rc) - cERROR(1, "Send error in FindClose = %d", rc); + cifs_dbg(VFS, "Send error in FindClose = %d\n", rc); cifs_stats_inc(&tcon->stats.cifs_stats.num_fclose); @@ -4548,7 +4552,7 @@ CIFSGetSrvInodeNumber(const unsigned int xid, struct cifs_tcon *tcon, int name_len, bytes_returned; __u16 params, byte_count; - cFYI(1, "In GetSrvInodeNum for %s", search_name); + cifs_dbg(FYI, "In GetSrvInodeNum for %s\n", search_name); if (tcon == NULL) return -ENODEV; @@ -4599,7 +4603,7 @@ GetInodeNumberRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cFYI(1, "error %d in QueryInternalInfo", rc); + cifs_dbg(FYI, "error %d in QueryInternalInfo\n", rc); } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); @@ -4614,7 +4618,7 @@ GetInodeNumberRetry: struct file_internal_info *pfinfo; /* BB Do we need a cast or hash here ? */ if (count < 8) { - cFYI(1, "Illegal size ret in QryIntrnlInf"); + cifs_dbg(FYI, "Illegal size ret in QryIntrnlInf\n"); rc = -EIO; goto GetInodeNumOut; } @@ -4655,16 +4659,16 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, *num_of_nodes = le16_to_cpu(pSMBr->NumberOfReferrals); if (*num_of_nodes < 1) { - cERROR(1, "num_referrals: must be at least > 0," - "but we get num_referrals = %d", *num_of_nodes); + cifs_dbg(VFS, "num_referrals: must be at least > 0, but we get num_referrals = %d\n", + *num_of_nodes); rc = -EINVAL; goto parse_DFS_referrals_exit; } ref = (struct dfs_referral_level_3 *) &(pSMBr->referrals); if (ref->VersionNumber != cpu_to_le16(3)) { - cERROR(1, "Referrals of V%d version are not supported," - "should be V3", le16_to_cpu(ref->VersionNumber)); + cifs_dbg(VFS, "Referrals of V%d version are not supported, should be V3\n", + le16_to_cpu(ref->VersionNumber)); rc = -EINVAL; goto parse_DFS_referrals_exit; } @@ -4673,14 +4677,12 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, data_end = (char *)(&(pSMBr->PathConsumed)) + le16_to_cpu(pSMBr->t2.DataCount); - cFYI(1, "num_referrals: %d dfs flags: 0x%x ...", - *num_of_nodes, - le32_to_cpu(pSMBr->DFSFlags)); + cifs_dbg(FYI, "num_referrals: %d dfs flags: 0x%x ...\n", + *num_of_nodes, le32_to_cpu(pSMBr->DFSFlags)); - *target_nodes = kzalloc(sizeof(struct dfs_info3_param) * - *num_of_nodes, GFP_KERNEL); + *target_nodes = kcalloc(*num_of_nodes, sizeof(struct dfs_info3_param), + GFP_KERNEL); if (*target_nodes == NULL) { - cERROR(1, "Failed to allocate buffer for target_nodes"); rc = -ENOMEM; goto parse_DFS_referrals_exit; } @@ -4759,7 +4761,7 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses, *num_of_nodes = 0; *target_nodes = NULL; - cFYI(1, "In GetDFSRefer the path %s", search_name); + cifs_dbg(FYI, "In GetDFSRefer the path %s\n", search_name); if (ses == NULL) return -ENODEV; getDFSRetry: @@ -4827,7 +4829,7 @@ getDFSRetry: rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cFYI(1, "Send error in GetDFSRefer = %d", rc); + cifs_dbg(FYI, "Send error in GetDFSRefer = %d\n", rc); goto GetDFSRefExit; } rc = validate_t2((struct smb_t2_rsp *)pSMBr); @@ -4838,9 +4840,8 @@ getDFSRetry: goto GetDFSRefExit; } - cFYI(1, "Decoding GetDFSRefer response BCC: %d Offset %d", - get_bcc(&pSMBr->hdr), - le16_to_cpu(pSMBr->t2.DataOffset)); + cifs_dbg(FYI, "Decoding GetDFSRefer response BCC: %d Offset %d\n", + get_bcc(&pSMBr->hdr), le16_to_cpu(pSMBr->t2.DataOffset)); /* parse returned result into more usable form */ rc = parse_DFS_referrals(pSMBr, num_of_nodes, @@ -4869,7 +4870,7 @@ SMBOldQFSInfo(const unsigned int xid, struct cifs_tcon *tcon, int bytes_returned = 0; __u16 params, byte_count; - cFYI(1, "OldQFSInfo"); + cifs_dbg(FYI, "OldQFSInfo\n"); oldQFSInfoRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -4902,7 +4903,7 @@ oldQFSInfoRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cFYI(1, "Send error in QFSInfo = %d", rc); + cifs_dbg(FYI, "Send error in QFSInfo = %d\n", rc); } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); @@ -4910,7 +4911,7 @@ oldQFSInfoRetry: rc = -EIO; /* bad smb */ else { __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); - cFYI(1, "qfsinf resp BCC: %d Offset %d", + cifs_dbg(FYI, "qfsinf resp BCC: %d Offset %d\n", get_bcc(&pSMBr->hdr), data_offset); response_data = (FILE_SYSTEM_ALLOC_INFO *) @@ -4923,10 +4924,10 @@ oldQFSInfoRetry: le32_to_cpu(response_data->TotalAllocationUnits); FSData->f_bfree = FSData->f_bavail = le32_to_cpu(response_data->FreeAllocationUnits); - cFYI(1, "Blocks: %lld Free: %lld Block size %ld", - (unsigned long long)FSData->f_blocks, - (unsigned long long)FSData->f_bfree, - FSData->f_bsize); + cifs_dbg(FYI, "Blocks: %lld Free: %lld Block size %ld\n", + (unsigned long long)FSData->f_blocks, + (unsigned long long)FSData->f_bfree, + FSData->f_bsize); } } cifs_buf_release(pSMB); @@ -4949,7 +4950,7 @@ CIFSSMBQFSInfo(const unsigned int xid, struct cifs_tcon *tcon, int bytes_returned = 0; __u16 params, byte_count; - cFYI(1, "In QFSInfo"); + cifs_dbg(FYI, "In QFSInfo\n"); QFSInfoRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -4982,7 +4983,7 @@ QFSInfoRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cFYI(1, "Send error in QFSInfo = %d", rc); + cifs_dbg(FYI, "Send error in QFSInfo = %d\n", rc); } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); @@ -5003,10 +5004,10 @@ QFSInfoRetry: le64_to_cpu(response_data->TotalAllocationUnits); FSData->f_bfree = FSData->f_bavail = le64_to_cpu(response_data->FreeAllocationUnits); - cFYI(1, "Blocks: %lld Free: %lld Block size %ld", - (unsigned long long)FSData->f_blocks, - (unsigned long long)FSData->f_bfree, - FSData->f_bsize); + cifs_dbg(FYI, "Blocks: %lld Free: %lld Block size %ld\n", + (unsigned long long)FSData->f_blocks, + (unsigned long long)FSData->f_bfree, + FSData->f_bsize); } } cifs_buf_release(pSMB); @@ -5028,7 +5029,7 @@ CIFSSMBQFSAttributeInfo(const unsigned int xid, struct cifs_tcon *tcon) int bytes_returned = 0; __u16 params, byte_count; - cFYI(1, "In QFSAttributeInfo"); + cifs_dbg(FYI, "In QFSAttributeInfo\n"); QFSAttributeRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -5062,7 +5063,7 @@ QFSAttributeRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cERROR(1, "Send error in QFSAttributeInfo = %d", rc); + cifs_dbg(VFS, "Send error in QFSAttributeInfo = %d\n", rc); } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); @@ -5098,7 +5099,7 @@ CIFSSMBQFSDeviceInfo(const unsigned int xid, struct cifs_tcon *tcon) int bytes_returned = 0; __u16 params, byte_count; - cFYI(1, "In QFSDeviceInfo"); + cifs_dbg(FYI, "In QFSDeviceInfo\n"); QFSDeviceRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -5133,7 +5134,7 @@ QFSDeviceRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cFYI(1, "Send error in QFSDeviceInfo = %d", rc); + cifs_dbg(FYI, "Send error in QFSDeviceInfo = %d\n", rc); } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); @@ -5169,7 +5170,7 @@ CIFSSMBQFSUnixInfo(const unsigned int xid, struct cifs_tcon *tcon) int bytes_returned = 0; __u16 params, byte_count; - cFYI(1, "In QFSUnixInfo"); + cifs_dbg(FYI, "In QFSUnixInfo\n"); QFSUnixRetry: rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -5203,7 +5204,7 @@ QFSUnixRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cERROR(1, "Send error in QFSUnixInfo = %d", rc); + cifs_dbg(VFS, "Send error in QFSUnixInfo = %d\n", rc); } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); @@ -5238,7 +5239,7 @@ CIFSSMBSetFSUnixInfo(const unsigned int xid, struct cifs_tcon *tcon, __u64 cap) int bytes_returned = 0; __u16 params, param_offset, offset, byte_count; - cFYI(1, "In SETFSUnixInfo"); + cifs_dbg(FYI, "In SETFSUnixInfo\n"); SETFSUnixRetry: /* BB switch to small buf init to save memory */ rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon, @@ -5286,7 +5287,7 @@ SETFSUnixRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cERROR(1, "Send error in SETFSUnixInfo = %d", rc); + cifs_dbg(VFS, "Send error in SETFSUnixInfo = %d\n", rc); } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); if (rc) @@ -5314,7 +5315,7 @@ CIFSSMBQFSPosixInfo(const unsigned int xid, struct cifs_tcon *tcon, int bytes_returned = 0; __u16 params, byte_count; - cFYI(1, "In QFSPosixInfo"); + cifs_dbg(FYI, "In QFSPosixInfo\n"); QFSPosixRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -5348,7 +5349,7 @@ QFSPosixRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cFYI(1, "Send error in QFSUnixInfo = %d", rc); + cifs_dbg(FYI, "Send error in QFSUnixInfo = %d\n", rc); } else { /* decode response */ rc = validate_t2((struct smb_t2_rsp *)pSMBr); @@ -5410,7 +5411,7 @@ CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon, __u16 params, byte_count, data_count, param_offset, offset; - cFYI(1, "In SetEOF"); + cifs_dbg(FYI, "In SetEOF\n"); SetEOFRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -5476,7 +5477,7 @@ SetEOFRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) - cFYI(1, "SetPathInfo (file size) returned %d", rc); + cifs_dbg(FYI, "SetPathInfo (file size) returned %d\n", rc); cifs_buf_release(pSMB); @@ -5495,8 +5496,8 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, int rc = 0; __u16 params, param_offset, offset, byte_count, count; - cFYI(1, "SetFileSize (via SetFileInfo) %lld", - (long long)size); + cifs_dbg(FYI, "SetFileSize (via SetFileInfo) %lld\n", + (long long)size); rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); if (rc) @@ -5553,7 +5554,8 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, pSMB->ByteCount = cpu_to_le16(byte_count); rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0); if (rc) { - cFYI(1, "Send error in SetFileInfo (SetFileSize) = %d", rc); + cifs_dbg(FYI, "Send error in SetFileInfo (SetFileSize) = %d\n", + rc); } /* Note: On -EAGAIN error only caller can retry on handle based calls @@ -5577,7 +5579,7 @@ CIFSSMBSetFileInfo(const unsigned int xid, struct cifs_tcon *tcon, int rc = 0; __u16 params, param_offset, offset, byte_count, count; - cFYI(1, "Set Times (via SetFileInfo)"); + cifs_dbg(FYI, "Set Times (via SetFileInfo)\n"); rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); if (rc) @@ -5623,7 +5625,8 @@ CIFSSMBSetFileInfo(const unsigned int xid, struct cifs_tcon *tcon, memcpy(data_offset, data, sizeof(FILE_BASIC_INFO)); rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0); if (rc) - cFYI(1, "Send error in Set Time (SetFileInfo) = %d", rc); + cifs_dbg(FYI, "Send error in Set Time (SetFileInfo) = %d\n", + rc); /* Note: On -EAGAIN error only caller can retry on handle based calls since file handle passed in no longer valid */ @@ -5640,7 +5643,7 @@ CIFSSMBSetFileDisposition(const unsigned int xid, struct cifs_tcon *tcon, int rc = 0; __u16 params, param_offset, offset, byte_count, count; - cFYI(1, "Set File Disposition (via SetFileInfo)"); + cifs_dbg(FYI, "Set File Disposition (via SetFileInfo)\n"); rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); if (rc) @@ -5682,7 +5685,7 @@ CIFSSMBSetFileDisposition(const unsigned int xid, struct cifs_tcon *tcon, *data_offset = delete_file ? 1 : 0; rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0); if (rc) - cFYI(1, "Send error in SetFileDisposition = %d", rc); + cifs_dbg(FYI, "Send error in SetFileDisposition = %d\n", rc); return rc; } @@ -5700,7 +5703,7 @@ CIFSSMBSetPathInfo(const unsigned int xid, struct cifs_tcon *tcon, char *data_offset; __u16 params, param_offset, offset, byte_count, count; - cFYI(1, "In SetTimes"); + cifs_dbg(FYI, "In SetTimes\n"); SetTimesRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, @@ -5756,7 +5759,7 @@ SetTimesRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) - cFYI(1, "SetPathInfo (times) returned %d", rc); + cifs_dbg(FYI, "SetPathInfo (times) returned %d\n", rc); cifs_buf_release(pSMB); @@ -5781,7 +5784,7 @@ CIFSSMBSetAttrLegacy(unsigned int xid, struct cifs_tcon *tcon, char *fileName, int bytes_returned; int name_len; - cFYI(1, "In SetAttrLegacy"); + cifs_dbg(FYI, "In SetAttrLegacy\n"); SetAttrLgcyRetry: rc = smb_init(SMB_COM_SETATTR, 8, tcon, (void **) &pSMB, @@ -5807,7 +5810,7 @@ SetAttrLgcyRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) - cFYI(1, "Error in LegacySetAttr = %d", rc); + cifs_dbg(FYI, "Error in LegacySetAttr = %d\n", rc); cifs_buf_release(pSMB); @@ -5875,7 +5878,7 @@ CIFSSMBUnixSetFileInfo(const unsigned int xid, struct cifs_tcon *tcon, int rc = 0; u16 params, param_offset, offset, byte_count, count; - cFYI(1, "Set Unix Info (via SetFileInfo)"); + cifs_dbg(FYI, "Set Unix Info (via SetFileInfo)\n"); rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); if (rc) @@ -5921,7 +5924,8 @@ CIFSSMBUnixSetFileInfo(const unsigned int xid, struct cifs_tcon *tcon, rc = SendReceiveNoRsp(xid, tcon->ses, (char *) pSMB, 0); if (rc) - cFYI(1, "Send error in Set Time (SetFileInfo) = %d", rc); + cifs_dbg(FYI, "Send error in Set Time (SetFileInfo) = %d\n", + rc); /* Note: On -EAGAIN error only caller can retry on handle based calls since file handle passed in no longer valid */ @@ -5943,7 +5947,7 @@ CIFSSMBUnixSetPathInfo(const unsigned int xid, struct cifs_tcon *tcon, FILE_UNIX_BASIC_INFO *data_offset; __u16 params, param_offset, offset, count, byte_count; - cFYI(1, "In SetUID/GID/Mode"); + cifs_dbg(FYI, "In SetUID/GID/Mode\n"); setPermsRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -5999,7 +6003,7 @@ setPermsRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) - cFYI(1, "SetPathInfo (perms) returned %d", rc); + cifs_dbg(FYI, "SetPathInfo (perms) returned %d\n", rc); cifs_buf_release(pSMB); if (rc == -EAGAIN) @@ -6036,7 +6040,7 @@ CIFSSMBQAllEAs(const unsigned int xid, struct cifs_tcon *tcon, __u16 params, byte_count, data_offset; unsigned int ea_name_len = ea_name ? strlen(ea_name) : 0; - cFYI(1, "In Query All EAs path %s", searchName); + cifs_dbg(FYI, "In Query All EAs path %s\n", searchName); QAllEAsRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -6083,7 +6087,7 @@ QAllEAsRetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cFYI(1, "Send error in QueryAllEAs = %d", rc); + cifs_dbg(FYI, "Send error in QueryAllEAs = %d\n", rc); goto QAllEAsOut; } @@ -6111,16 +6115,16 @@ QAllEAsRetry: (((char *) &pSMBr->hdr.Protocol) + data_offset); list_len = le32_to_cpu(ea_response_data->list_len); - cFYI(1, "ea length %d", list_len); + cifs_dbg(FYI, "ea length %d\n", list_len); if (list_len <= 8) { - cFYI(1, "empty EA list returned from server"); + cifs_dbg(FYI, "empty EA list returned from server\n"); goto QAllEAsOut; } /* make sure list_len doesn't go past end of SMB */ end_of_smb = (char *)pByteArea(&pSMBr->hdr) + get_bcc(&pSMBr->hdr); if ((char *)ea_response_data + list_len > end_of_smb) { - cFYI(1, "EA list appears to go beyond SMB"); + cifs_dbg(FYI, "EA list appears to go beyond SMB\n"); rc = -EIO; goto QAllEAsOut; } @@ -6137,7 +6141,7 @@ QAllEAsRetry: temp_ptr += 4; /* make sure we can read name_len and value_len */ if (list_len < 0) { - cFYI(1, "EA entry goes beyond length of list"); + cifs_dbg(FYI, "EA entry goes beyond length of list\n"); rc = -EIO; goto QAllEAsOut; } @@ -6146,7 +6150,7 @@ QAllEAsRetry: value_len = le16_to_cpu(temp_fea->value_len); list_len -= name_len + 1 + value_len; if (list_len < 0) { - cFYI(1, "EA entry goes beyond length of list"); + cifs_dbg(FYI, "EA entry goes beyond length of list\n"); rc = -EIO; goto QAllEAsOut; } @@ -6214,7 +6218,7 @@ CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon, int bytes_returned = 0; __u16 params, param_offset, byte_count, offset, count; - cFYI(1, "In SetEA"); + cifs_dbg(FYI, "In SetEA\n"); SetEARetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -6296,7 +6300,7 @@ SetEARetry: rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) - cFYI(1, "SetPathInfo (EA) returned %d", rc); + cifs_dbg(FYI, "SetPathInfo (EA) returned %d\n", rc); cifs_buf_release(pSMB); @@ -6339,7 +6343,7 @@ int CIFSSMBNotify(const unsigned int xid, struct cifs_tcon *tcon, struct dir_notify_req *dnotify_req; int bytes_returned; - cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid); + cifs_dbg(FYI, "In CIFSSMBNotify for file handle %d\n", (int)netfid); rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -6368,7 +6372,7 @@ int CIFSSMBNotify(const unsigned int xid, struct cifs_tcon *tcon, (struct smb_hdr *)pSMBr, &bytes_returned, CIFS_ASYNC_OP); if (rc) { - cFYI(1, "Error in Notify = %d", rc); + cifs_dbg(FYI, "Error in Notify = %d\n", rc); } else { /* Add file to outstanding requests */ /* BB change to kmem cache alloc */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 21b3a291c32..99eeaa17ee0 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -95,9 +95,7 @@ enum { /* Mount options which take string value */ Opt_user, Opt_pass, Opt_ip, - Opt_unc, Opt_domain, - Opt_srcaddr, Opt_prefixpath, - Opt_iocharset, + Opt_domain, Opt_srcaddr, Opt_iocharset, Opt_netbiosname, Opt_servern, Opt_ver, Opt_vers, Opt_sec, Opt_cache, @@ -193,14 +191,14 @@ static const match_table_t cifs_mount_option_tokens = { { Opt_blank_ip, "addr=" }, { Opt_ip, "ip=%s" }, { Opt_ip, "addr=%s" }, - { Opt_unc, "unc=%s" }, - { Opt_unc, "target=%s" }, - { Opt_unc, "path=%s" }, + { Opt_ignore, "unc=%s" }, + { Opt_ignore, "target=%s" }, + { Opt_ignore, "path=%s" }, { Opt_domain, "dom=%s" }, { Opt_domain, "domain=%s" }, { Opt_domain, "workgroup=%s" }, { Opt_srcaddr, "srcaddr=%s" }, - { Opt_prefixpath, "prefixpath=%s" }, + { Opt_ignore, "prefixpath=%s" }, { Opt_iocharset, "iocharset=%s" }, { Opt_netbiosname, "netbiosname=%s" }, { Opt_servern, "servern=%s" }, @@ -318,11 +316,12 @@ cifs_reconnect(struct TCP_Server_Info *server) server->max_read = 0; #endif - cFYI(1, "Reconnecting tcp session"); + cifs_dbg(FYI, "Reconnecting tcp session\n"); /* before reconnecting the tcp session, mark the smb session (uid) and the tid bad so they are not used until reconnected */ - cFYI(1, "%s: marking sessions and tcons for reconnect", __func__); + cifs_dbg(FYI, "%s: marking sessions and tcons for reconnect\n", + __func__); spin_lock(&cifs_tcp_ses_lock); list_for_each(tmp, &server->smb_ses_list) { ses = list_entry(tmp, struct cifs_ses, smb_ses_list); @@ -336,15 +335,14 @@ cifs_reconnect(struct TCP_Server_Info *server) spin_unlock(&cifs_tcp_ses_lock); /* do not want to be sending data on a socket we are freeing */ - cFYI(1, "%s: tearing down socket", __func__); + cifs_dbg(FYI, "%s: tearing down socket\n", __func__); mutex_lock(&server->srv_mutex); if (server->ssocket) { - cFYI(1, "State: 0x%x Flags: 0x%lx", server->ssocket->state, - server->ssocket->flags); + cifs_dbg(FYI, "State: 0x%x Flags: 0x%lx\n", + server->ssocket->state, server->ssocket->flags); kernel_sock_shutdown(server->ssocket, SHUT_WR); - cFYI(1, "Post shutdown state: 0x%x Flags: 0x%lx", - server->ssocket->state, - server->ssocket->flags); + cifs_dbg(FYI, "Post shutdown state: 0x%x Flags: 0x%lx\n", + server->ssocket->state, server->ssocket->flags); sock_release(server->ssocket); server->ssocket = NULL; } @@ -358,7 +356,7 @@ cifs_reconnect(struct TCP_Server_Info *server) /* mark submitted MIDs for retry and issue callback */ INIT_LIST_HEAD(&retry_list); - cFYI(1, "%s: moving mids to private list", __func__); + cifs_dbg(FYI, "%s: moving mids to private list\n", __func__); spin_lock(&GlobalMid_Lock); list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); @@ -368,7 +366,7 @@ cifs_reconnect(struct TCP_Server_Info *server) } spin_unlock(&GlobalMid_Lock); - cFYI(1, "%s: issuing mid callbacks", __func__); + cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__); list_for_each_safe(tmp, tmp2, &retry_list) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); list_del_init(&mid_entry->qhead); @@ -381,7 +379,7 @@ cifs_reconnect(struct TCP_Server_Info *server) /* we should try only the port we connected to before */ rc = generic_ip_connect(server); if (rc) { - cFYI(1, "reconnect error %d", rc); + cifs_dbg(FYI, "reconnect error %d\n", rc); msleep(3000); } else { atomic_inc(&tcpSesReconnectCount); @@ -415,8 +413,8 @@ cifs_echo_request(struct work_struct *work) rc = server->ops->echo ? server->ops->echo(server) : -ENOSYS; if (rc) - cFYI(1, "Unable to send echo request to server: %s", - server->hostname); + cifs_dbg(FYI, "Unable to send echo request to server: %s\n", + server->hostname); requeue_echo: queue_delayed_work(cifsiod_wq, &server->echo, SMB_ECHO_INTERVAL); @@ -428,7 +426,7 @@ allocate_buffers(struct TCP_Server_Info *server) if (!server->bigbuf) { server->bigbuf = (char *)cifs_buf_get(); if (!server->bigbuf) { - cERROR(1, "No memory for large SMB response"); + cifs_dbg(VFS, "No memory for large SMB response\n"); msleep(3000); /* retry will check if exiting */ return false; @@ -441,7 +439,7 @@ allocate_buffers(struct TCP_Server_Info *server) if (!server->smallbuf) { server->smallbuf = (char *)cifs_small_buf_get(); if (!server->smallbuf) { - cERROR(1, "No memory for SMB response"); + cifs_dbg(VFS, "No memory for SMB response\n"); msleep(1000); /* retry will check if exiting */ return false; @@ -471,9 +469,8 @@ server_unresponsive(struct TCP_Server_Info *server) */ if (server->tcpStatus == CifsGood && time_after(jiffies, server->lstrp + 2 * SMB_ECHO_INTERVAL)) { - cERROR(1, "Server %s has not responded in %d seconds. " - "Reconnecting...", server->hostname, - (2 * SMB_ECHO_INTERVAL) / HZ); + cifs_dbg(VFS, "Server %s has not responded in %d seconds. Reconnecting...\n", + server->hostname, (2 * SMB_ECHO_INTERVAL) / HZ); cifs_reconnect(server); wake_up(&server->response_q); return true; @@ -584,8 +581,8 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig, length = 0; continue; } else if (length <= 0) { - cFYI(1, "Received no data or error: expecting %d " - "got %d", to_read, length); + cifs_dbg(FYI, "Received no data or error: expecting %d\n" + "got %d", to_read, length); cifs_reconnect(server); total_read = -EAGAIN; break; @@ -619,17 +616,17 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type) /* Regular SMB response */ return true; case RFC1002_SESSION_KEEP_ALIVE: - cFYI(1, "RFC 1002 session keep alive"); + cifs_dbg(FYI, "RFC 1002 session keep alive\n"); break; case RFC1002_POSITIVE_SESSION_RESPONSE: - cFYI(1, "RFC 1002 positive session response"); + cifs_dbg(FYI, "RFC 1002 positive session response\n"); break; case RFC1002_NEGATIVE_SESSION_RESPONSE: /* * We get this from Windows 98 instead of an error on * SMB negprot response. */ - cFYI(1, "RFC 1002 negative session response"); + cifs_dbg(FYI, "RFC 1002 negative session response\n"); /* give server a second to clean up */ msleep(1000); /* @@ -643,7 +640,7 @@ is_smb_response(struct TCP_Server_Info *server, unsigned char type) wake_up(&server->response_q); break; default: - cERROR(1, "RFC 1002 unknown response type 0x%x", type); + cifs_dbg(VFS, "RFC 1002 unknown response type 0x%x\n", type); cifs_reconnect(server); } @@ -729,7 +726,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) spin_lock(&GlobalMid_Lock); list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); - cFYI(1, "Clearing mid 0x%llx", mid_entry->mid); + cifs_dbg(FYI, "Clearing mid 0x%llx\n", mid_entry->mid); mid_entry->mid_state = MID_SHUTDOWN; list_move(&mid_entry->qhead, &dispose_list); } @@ -738,7 +735,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) /* now walk dispose list and issue callbacks */ list_for_each_safe(tmp, tmp2, &dispose_list) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); - cFYI(1, "Callback mid 0x%llx", mid_entry->mid); + cifs_dbg(FYI, "Callback mid 0x%llx\n", mid_entry->mid); list_del_init(&mid_entry->qhead); mid_entry->callback(mid_entry); } @@ -755,7 +752,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) * least 45 seconds before giving up on a request getting a * response and going ahead and killing cifsd. */ - cFYI(1, "Wait for exit from demultiplex thread"); + cifs_dbg(FYI, "Wait for exit from demultiplex thread\n"); msleep(46000); /* * If threads still have not exited they are probably never @@ -782,8 +779,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) /* make sure this will fit in a large buffer */ if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server) - 4) { - cERROR(1, "SMB response too long (%u bytes)", - pdu_length); + cifs_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length); cifs_reconnect(server); wake_up(&server->response_q); return -EAGAIN; @@ -841,7 +837,7 @@ cifs_demultiplex_thread(void *p) struct mid_q_entry *mid_entry; current->flags |= PF_MEMALLOC; - cFYI(1, "Demultiplex PID: %d", task_pid_nr(current)); + cifs_dbg(FYI, "Demultiplex PID: %d\n", task_pid_nr(current)); length = atomic_inc_return(&tcpSesAllocCount); if (length > 1) @@ -871,14 +867,14 @@ cifs_demultiplex_thread(void *p) */ pdu_length = get_rfc1002_length(buf); - cFYI(1, "RFC1002 header 0x%x", pdu_length); + cifs_dbg(FYI, "RFC1002 header 0x%x\n", pdu_length); if (!is_smb_response(server, buf[0])) continue; /* make sure we have enough to get to the MID */ if (pdu_length < HEADER_SIZE(server) - 1 - 4) { - cERROR(1, "SMB response too short (%u bytes)", - pdu_length); + cifs_dbg(VFS, "SMB response too short (%u bytes)\n", + pdu_length); cifs_reconnect(server); wake_up(&server->response_q); continue; @@ -910,8 +906,8 @@ cifs_demultiplex_thread(void *p) mid_entry->callback(mid_entry); } else if (!server->ops->is_oplock_break || !server->ops->is_oplock_break(buf, server)) { - cERROR(1, "No task to wake, unknown frame received! " - "NumMids %d", atomic_read(&midCount)); + cifs_dbg(VFS, "No task to wake, unknown frame received! NumMids %d\n", + atomic_read(&midCount)); cifs_dump_mem("Received Data is: ", buf, HEADER_SIZE(server)); #ifdef CONFIG_CIFS_DEBUG2 @@ -1037,7 +1033,7 @@ static int cifs_parse_security_flavors(char *value, break; case Opt_sec_krb5p: /* vol->secFlg |= CIFSSEC_MUST_SEAL | CIFSSEC_MAY_KRB5; */ - cERROR(1, "Krb5 cifs privacy not supported"); + cifs_dbg(VFS, "Krb5 cifs privacy not supported\n"); break; case Opt_sec_ntlmssp: vol->secFlg |= CIFSSEC_MAY_NTLMSSP; @@ -1067,7 +1063,7 @@ static int cifs_parse_security_flavors(char *value, vol->nullauth = 1; break; default: - cERROR(1, "bad security option: %s", value); + cifs_dbg(VFS, "bad security option: %s\n", value); return 1; } @@ -1093,7 +1089,7 @@ cifs_parse_cache_flavor(char *value, struct smb_vol *vol) vol->strict_io = false; break; default: - cERROR(1, "bad cache= option: %s", value); + cifs_dbg(VFS, "bad cache= option: %s\n", value); return 1; } return 0; @@ -1124,7 +1120,7 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol) break; #endif default: - cERROR(1, "Unknown vers= option specified: %s", value); + cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value); return 1; } return 0; @@ -1255,7 +1251,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, separator[0] = options[4]; options += 5; } else { - cFYI(1, "Null separator not allowed"); + cifs_dbg(FYI, "Null separator not allowed\n"); } } vol->backupuid_specified = false; /* no backup intent for a user */ @@ -1440,8 +1436,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, break; case Opt_fsc: #ifndef CONFIG_CIFS_FSCACHE - cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE " - "kernel config option set"); + cifs_dbg(VFS, "FS-Cache support needs CONFIG_CIFS_FSCACHE kernel config option set\n"); goto cifs_parse_mount_err; #endif vol->fsc = true; @@ -1459,55 +1454,55 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, /* Numeric Values */ case Opt_backupuid: if (get_option_uid(args, &vol->backupuid)) { - cERROR(1, "%s: Invalid backupuid value", - __func__); + cifs_dbg(VFS, "%s: Invalid backupuid value\n", + __func__); goto cifs_parse_mount_err; } vol->backupuid_specified = true; break; case Opt_backupgid: if (get_option_gid(args, &vol->backupgid)) { - cERROR(1, "%s: Invalid backupgid value", - __func__); + cifs_dbg(VFS, "%s: Invalid backupgid value\n", + __func__); goto cifs_parse_mount_err; } vol->backupgid_specified = true; break; case Opt_uid: if (get_option_uid(args, &vol->linux_uid)) { - cERROR(1, "%s: Invalid uid value", - __func__); + cifs_dbg(VFS, "%s: Invalid uid value\n", + __func__); goto cifs_parse_mount_err; } uid_specified = true; break; case Opt_cruid: if (get_option_uid(args, &vol->cred_uid)) { - cERROR(1, "%s: Invalid cruid value", - __func__); + cifs_dbg(VFS, "%s: Invalid cruid value\n", + __func__); goto cifs_parse_mount_err; } break; case Opt_gid: if (get_option_gid(args, &vol->linux_gid)) { - cERROR(1, "%s: Invalid gid value", - __func__); + cifs_dbg(VFS, "%s: Invalid gid value\n", + __func__); goto cifs_parse_mount_err; } gid_specified = true; break; case Opt_file_mode: if (get_option_ul(args, &option)) { - cERROR(1, "%s: Invalid file_mode value", - __func__); + cifs_dbg(VFS, "%s: Invalid file_mode value\n", + __func__); goto cifs_parse_mount_err; } vol->file_mode = option; break; case Opt_dirmode: if (get_option_ul(args, &option)) { - cERROR(1, "%s: Invalid dir_mode value", - __func__); + cifs_dbg(VFS, "%s: Invalid dir_mode value\n", + __func__); goto cifs_parse_mount_err; } vol->dir_mode = option; @@ -1515,37 +1510,37 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, case Opt_port: if (get_option_ul(args, &option) || option > USHRT_MAX) { - cERROR(1, "%s: Invalid port value", __func__); + cifs_dbg(VFS, "%s: Invalid port value\n", + __func__); goto cifs_parse_mount_err; } port = (unsigned short)option; break; case Opt_rsize: if (get_option_ul(args, &option)) { - cERROR(1, "%s: Invalid rsize value", - __func__); + cifs_dbg(VFS, "%s: Invalid rsize value\n", + __func__); goto cifs_parse_mount_err; } vol->rsize = option; break; case Opt_wsize: if (get_option_ul(args, &option)) { - cERROR(1, "%s: Invalid wsize value", - __func__); + cifs_dbg(VFS, "%s: Invalid wsize value\n", + __func__); goto cifs_parse_mount_err; } vol->wsize = option; break; case Opt_actimeo: if (get_option_ul(args, &option)) { - cERROR(1, "%s: Invalid actimeo value", - __func__); + cifs_dbg(VFS, "%s: Invalid actimeo value\n", + __func__); goto cifs_parse_mount_err; } vol->actimeo = HZ * option; if (vol->actimeo > CIFS_MAX_ACTIMEO) { - cERROR(1, "CIFS: attribute cache" - "timeout too large"); + cifs_dbg(VFS, "attribute cache timeout too large\n"); goto cifs_parse_mount_err; } break; @@ -1568,11 +1563,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, goto cifs_parse_mount_err; } vol->username = kstrdup(string, GFP_KERNEL); - if (!vol->username) { - printk(KERN_WARNING "CIFS: no memory " - "for username\n"); + if (!vol->username) goto cifs_parse_mount_err; - } break; case Opt_blank_pass: /* passwords have to be handled differently @@ -1660,30 +1652,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, } got_ip = true; break; - case Opt_unc: - string = vol->UNC; - vol->UNC = match_strdup(args); - if (vol->UNC == NULL) - goto out_nomem; - - convert_delimiter(vol->UNC, '\\'); - if (vol->UNC[0] != '\\' || vol->UNC[1] != '\\') { - printk(KERN_ERR "CIFS: UNC Path does not " - "begin with // or \\\\\n"); - goto cifs_parse_mount_err; - } - - /* Compare old unc= option to new one */ - if (!string || strcmp(string, vol->UNC)) - printk(KERN_WARNING "CIFS: the value of the " - "unc= mount option does not match the " - "device string. Using the unc= option " - "for now. In 3.10, that option will " - "be ignored and the contents of the " - "device string will be used " - "instead. (%s != %s)\n", string, - vol->UNC); - break; case Opt_domain: string = match_strdup(args); if (string == NULL) @@ -1701,7 +1669,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, "for domainname\n"); goto cifs_parse_mount_err; } - cFYI(1, "Domain name set"); + cifs_dbg(FYI, "Domain name set\n"); break; case Opt_srcaddr: string = match_strdup(args); @@ -1716,26 +1684,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, goto cifs_parse_mount_err; } break; - case Opt_prefixpath: - /* skip over any leading delimiter */ - if (*args[0].from == '/' || *args[0].from == '\\') - args[0].from++; - - string = vol->prepath; - vol->prepath = match_strdup(args); - if (vol->prepath == NULL) - goto out_nomem; - /* Compare old prefixpath= option to new one */ - if (!string || strcmp(string, vol->prepath)) - printk(KERN_WARNING "CIFS: the value of the " - "prefixpath= mount option does not " - "match the device string. Using the " - "prefixpath= option for now. In 3.10, " - "that option will be ignored and the " - "contents of the device string will be " - "used instead.(%s != %s)\n", string, - vol->prepath); - break; case Opt_iocharset: string = match_strdup(args); if (string == NULL) @@ -1759,7 +1707,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, /* if iocharset not set then load_nls_default * is used by caller */ - cFYI(1, "iocharset set to %s", string); + cifs_dbg(FYI, "iocharset set to %s\n", string); break; case Opt_netbiosname: string = match_strdup(args); @@ -1873,20 +1821,18 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, #ifndef CONFIG_KEYS /* Muliuser mounts require CONFIG_KEYS support */ if (vol->multiuser) { - cERROR(1, "Multiuser mounts require kernels with " - "CONFIG_KEYS enabled."); + cifs_dbg(VFS, "Multiuser mounts require kernels with CONFIG_KEYS enabled\n"); goto cifs_parse_mount_err; } #endif if (!vol->UNC) { - cERROR(1, "CIFS mount error: No usable UNC path provided in " - "device string or in unc= option!"); + cifs_dbg(VFS, "CIFS mount error: No usable UNC path provided in device string or in unc= option!\n"); goto cifs_parse_mount_err; } /* make sure UNC has a share name */ if (!strchr(vol->UNC + 3, '\\')) { - cERROR(1, "Malformed UNC. Unable to find share name."); + cifs_dbg(VFS, "Malformed UNC. Unable to find share name.\n"); goto cifs_parse_mount_err; } @@ -2107,7 +2053,7 @@ cifs_find_tcp_session(struct smb_vol *vol) ++server->srv_count; spin_unlock(&cifs_tcp_ses_lock); - cFYI(1, "Existing tcp session with server found"); + cifs_dbg(FYI, "Existing tcp session with server found\n"); return server; } spin_unlock(&cifs_tcp_ses_lock); @@ -2154,7 +2100,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) struct TCP_Server_Info *tcp_ses = NULL; int rc; - cFYI(1, "UNC: %s", volume_info->UNC); + cifs_dbg(FYI, "UNC: %s\n", volume_info->UNC); /* see if we already have a matching tcp_ses */ tcp_ses = cifs_find_tcp_session(volume_info); @@ -2169,7 +2115,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) rc = cifs_crypto_shash_allocate(tcp_ses); if (rc) { - cERROR(1, "could not setup hash structures rc %d", rc); + cifs_dbg(VFS, "could not setup hash structures rc %d\n", rc); goto out_err; } @@ -2216,7 +2162,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) rc = ip_connect(tcp_ses); if (rc < 0) { - cERROR(1, "Error connecting to socket. Aborting operation"); + cifs_dbg(VFS, "Error connecting to socket. Aborting operation.\n"); goto out_err_crypto_release; } @@ -2229,7 +2175,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) tcp_ses, "cifsd"); if (IS_ERR(tcp_ses->tsk)) { rc = PTR_ERR(tcp_ses->tsk); - cERROR(1, "error %d create cifsd thread", rc); + cifs_dbg(VFS, "error %d create cifsd thread\n", rc); module_put(THIS_MODULE); goto out_err_crypto_release; } @@ -2316,7 +2262,7 @@ cifs_put_smb_ses(struct cifs_ses *ses) unsigned int xid; struct TCP_Server_Info *server = ses->server; - cFYI(1, "%s: ses_count=%d", __func__, ses->ses_count); + cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count); spin_lock(&cifs_tcp_ses_lock); if (--ses->ses_count > 0) { spin_unlock(&cifs_tcp_ses_lock); @@ -2368,23 +2314,24 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) sprintf(desc, "cifs:a:%pI6c", &sa6->sin6_addr.s6_addr); break; default: - cFYI(1, "Bad ss_family (%hu)", server->dstaddr.ss_family); + cifs_dbg(FYI, "Bad ss_family (%hu)\n", + server->dstaddr.ss_family); rc = -EINVAL; goto out_err; } - cFYI(1, "%s: desc=%s", __func__, desc); + cifs_dbg(FYI, "%s: desc=%s\n", __func__, desc); key = request_key(&key_type_logon, desc, ""); if (IS_ERR(key)) { if (!ses->domainName) { - cFYI(1, "domainName is NULL"); + cifs_dbg(FYI, "domainName is NULL\n"); rc = PTR_ERR(key); goto out_err; } /* didn't work, try to find a domain key */ sprintf(desc, "cifs:d:%s", ses->domainName); - cFYI(1, "%s: desc=%s", __func__, desc); + cifs_dbg(FYI, "%s: desc=%s\n", __func__, desc); key = request_key(&key_type_logon, desc, ""); if (IS_ERR(key)) { rc = PTR_ERR(key); @@ -2402,32 +2349,34 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) /* find first : in payload */ payload = (char *)upayload->data; delim = strnchr(payload, upayload->datalen, ':'); - cFYI(1, "payload=%s", payload); + cifs_dbg(FYI, "payload=%s\n", payload); if (!delim) { - cFYI(1, "Unable to find ':' in payload (datalen=%d)", - upayload->datalen); + cifs_dbg(FYI, "Unable to find ':' in payload (datalen=%d)\n", + upayload->datalen); rc = -EINVAL; goto out_key_put; } len = delim - payload; if (len > MAX_USERNAME_SIZE || len <= 0) { - cFYI(1, "Bad value from username search (len=%zd)", len); + cifs_dbg(FYI, "Bad value from username search (len=%zd)\n", + len); rc = -EINVAL; goto out_key_put; } vol->username = kstrndup(payload, len, GFP_KERNEL); if (!vol->username) { - cFYI(1, "Unable to allocate %zd bytes for username", len); + cifs_dbg(FYI, "Unable to allocate %zd bytes for username\n", + len); rc = -ENOMEM; goto out_key_put; } - cFYI(1, "%s: username=%s", __func__, vol->username); + cifs_dbg(FYI, "%s: username=%s\n", __func__, vol->username); len = key->datalen - (len + 1); if (len > MAX_PASSWORD_SIZE || len <= 0) { - cFYI(1, "Bad len for password search (len=%zd)", len); + cifs_dbg(FYI, "Bad len for password search (len=%zd)\n", len); rc = -EINVAL; kfree(vol->username); vol->username = NULL; @@ -2437,7 +2386,8 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) ++delim; vol->password = kstrndup(delim, len, GFP_KERNEL); if (!vol->password) { - cFYI(1, "Unable to allocate %zd bytes for password", len); + cifs_dbg(FYI, "Unable to allocate %zd bytes for password\n", + len); rc = -ENOMEM; kfree(vol->username); vol->username = NULL; @@ -2449,7 +2399,7 @@ out_key_put: key_put(key); out_err: kfree(desc); - cFYI(1, "%s: returning %d", __func__, rc); + cifs_dbg(FYI, "%s: returning %d\n", __func__, rc); return rc; } #else /* ! CONFIG_KEYS */ @@ -2474,7 +2424,8 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) ses = cifs_find_smb_ses(server, volume_info); if (ses) { - cFYI(1, "Existing smb sess found (status=%d)", ses->status); + cifs_dbg(FYI, "Existing smb sess found (status=%d)\n", + ses->status); mutex_lock(&ses->session_mutex); rc = cifs_negotiate_protocol(xid, ses); @@ -2486,7 +2437,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) return ERR_PTR(rc); } if (ses->need_reconnect) { - cFYI(1, "Session needs reconnect"); + cifs_dbg(FYI, "Session needs reconnect\n"); rc = cifs_setup_session(xid, ses, volume_info->local_nls); if (rc) { @@ -2505,7 +2456,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) return ses; } - cFYI(1, "Existing smb sess not found"); + cifs_dbg(FYI, "Existing smb sess not found\n"); ses = sesInfoAlloc(); if (ses == NULL) goto get_ses_fail; @@ -2595,7 +2546,7 @@ cifs_put_tcon(struct cifs_tcon *tcon) unsigned int xid; struct cifs_ses *ses = tcon->ses; - cFYI(1, "%s: tc_count=%d", __func__, tcon->tc_count); + cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count); spin_lock(&cifs_tcp_ses_lock); if (--tcon->tc_count > 0) { spin_unlock(&cifs_tcp_ses_lock); @@ -2623,12 +2574,11 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) tcon = cifs_find_tcon(ses, volume_info->UNC); if (tcon) { - cFYI(1, "Found match on UNC path"); + cifs_dbg(FYI, "Found match on UNC path\n"); /* existing tcon already has a reference */ cifs_put_smb_ses(ses); if (tcon->seal != volume_info->seal) - cERROR(1, "transport encryption setting " - "conflicts with existing tid"); + cifs_dbg(VFS, "transport encryption setting conflicts with existing tid\n"); return tcon; } @@ -2660,13 +2610,13 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) rc = ses->server->ops->tree_connect(xid, ses, volume_info->UNC, tcon, volume_info->local_nls); free_xid(xid); - cFYI(1, "Tcon rc = %d", rc); + cifs_dbg(FYI, "Tcon rc = %d\n", rc); if (rc) goto out_fail; if (volume_info->nodfs) { tcon->Flags &= ~SMB_SHARE_IS_IN_DFS; - cFYI(1, "DFS disabled (%d)", tcon->Flags); + cifs_dbg(FYI, "DFS disabled (%d)\n", tcon->Flags); } tcon->seal = volume_info->seal; /* @@ -2820,7 +2770,7 @@ get_dfs_path(const unsigned int xid, struct cifs_ses *ses, const char *old_path, strcpy(temp_unc + 2 + strlen(ses->serverName), "\\IPC$"); rc = ses->server->ops->tree_connect(xid, ses, temp_unc, NULL, nls_codepage); - cFYI(1, "Tcon rc = %d ipc_tid = %d", rc, ses->ipc_tid); + cifs_dbg(FYI, "Tcon rc = %d ipc_tid = %d\n", rc, ses->ipc_tid); kfree(temp_unc); } if (rc == 0) @@ -2898,13 +2848,11 @@ bind_socket(struct TCP_Server_Info *server) saddr4 = (struct sockaddr_in *)&server->srcaddr; saddr6 = (struct sockaddr_in6 *)&server->srcaddr; if (saddr6->sin6_family == AF_INET6) - cERROR(1, "cifs: " - "Failed to bind to: %pI6c, error: %d", - &saddr6->sin6_addr, rc); + cifs_dbg(VFS, "Failed to bind to: %pI6c, error: %d\n", + &saddr6->sin6_addr, rc); else - cERROR(1, "cifs: " - "Failed to bind to: %pI4, error: %d", - &saddr4->sin_addr.s_addr, rc); + cifs_dbg(VFS, "Failed to bind to: %pI4, error: %d\n", + &saddr4->sin_addr.s_addr, rc); } } return rc; @@ -3009,13 +2957,13 @@ generic_ip_connect(struct TCP_Server_Info *server) rc = __sock_create(cifs_net_ns(server), sfamily, SOCK_STREAM, IPPROTO_TCP, &socket, 1); if (rc < 0) { - cERROR(1, "Error %d creating socket", rc); + cifs_dbg(VFS, "Error %d creating socket\n", rc); server->ssocket = NULL; return rc; } /* BB other socket options to set KEEPALIVE, NODELAY? */ - cFYI(1, "Socket created"); + cifs_dbg(FYI, "Socket created\n"); server->ssocket = socket; socket->sk->sk_allocation = GFP_NOFS; if (sfamily == AF_INET6) @@ -3049,16 +2997,17 @@ generic_ip_connect(struct TCP_Server_Info *server) rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY, (char *)&val, sizeof(val)); if (rc) - cFYI(1, "set TCP_NODELAY socket option error %d", rc); + cifs_dbg(FYI, "set TCP_NODELAY socket option error %d\n", + rc); } - cFYI(1, "sndbuf %d rcvbuf %d rcvtimeo 0x%lx", + cifs_dbg(FYI, "sndbuf %d rcvbuf %d rcvtimeo 0x%lx\n", socket->sk->sk_sndbuf, socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo); rc = socket->ops->connect(socket, saddr, slen, 0); if (rc < 0) { - cFYI(1, "Error %d connecting to server", rc); + cifs_dbg(FYI, "Error %d connecting to server\n", rc); sock_release(socket); server->ssocket = NULL; return rc; @@ -3116,19 +3065,19 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, if (vol_info && vol_info->no_linux_ext) { tcon->fsUnixInfo.Capability = 0; tcon->unix_ext = 0; /* Unix Extensions disabled */ - cFYI(1, "Linux protocol extensions disabled"); + cifs_dbg(FYI, "Linux protocol extensions disabled\n"); return; } else if (vol_info) tcon->unix_ext = 1; /* Unix Extensions supported */ if (tcon->unix_ext == 0) { - cFYI(1, "Unix extensions disabled so not set on reconnect"); + cifs_dbg(FYI, "Unix extensions disabled so not set on reconnect\n"); return; } if (!CIFSSMBQFSUnixInfo(xid, tcon)) { __u64 cap = le64_to_cpu(tcon->fsUnixInfo.Capability); - cFYI(1, "unix caps which server supports %lld", cap); + cifs_dbg(FYI, "unix caps which server supports %lld\n", cap); /* check for reconnect case in which we do not want to change the mount behavior if we can avoid it */ if (vol_info == NULL) { @@ -3138,22 +3087,22 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, cap &= ~CIFS_UNIX_POSIX_ACL_CAP; if ((saved_cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) { if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) - cERROR(1, "POSIXPATH support change"); + cifs_dbg(VFS, "POSIXPATH support change\n"); cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP; } else if ((cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) { - cERROR(1, "possible reconnect error"); - cERROR(1, "server disabled POSIX path support"); + cifs_dbg(VFS, "possible reconnect error\n"); + cifs_dbg(VFS, "server disabled POSIX path support\n"); } } if (cap & CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP) - cERROR(1, "per-share encryption not supported yet"); + cifs_dbg(VFS, "per-share encryption not supported yet\n"); cap &= CIFS_UNIX_CAP_MASK; if (vol_info && vol_info->no_psx_acl) cap &= ~CIFS_UNIX_POSIX_ACL_CAP; else if (CIFS_UNIX_POSIX_ACL_CAP & cap) { - cFYI(1, "negotiated posix acl support"); + cifs_dbg(FYI, "negotiated posix acl support\n"); if (cifs_sb) cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIXACL; @@ -3162,43 +3111,38 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, if (vol_info && vol_info->posix_paths == 0) cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP; else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) { - cFYI(1, "negotiate posix pathnames"); + cifs_dbg(FYI, "negotiate posix pathnames\n"); if (cifs_sb) cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS; } - cFYI(1, "Negotiate caps 0x%x", (int)cap); + cifs_dbg(FYI, "Negotiate caps 0x%x\n", (int)cap); #ifdef CONFIG_CIFS_DEBUG2 if (cap & CIFS_UNIX_FCNTL_CAP) - cFYI(1, "FCNTL cap"); + cifs_dbg(FYI, "FCNTL cap\n"); if (cap & CIFS_UNIX_EXTATTR_CAP) - cFYI(1, "EXTATTR cap"); + cifs_dbg(FYI, "EXTATTR cap\n"); if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) - cFYI(1, "POSIX path cap"); + cifs_dbg(FYI, "POSIX path cap\n"); if (cap & CIFS_UNIX_XATTR_CAP) - cFYI(1, "XATTR cap"); + cifs_dbg(FYI, "XATTR cap\n"); if (cap & CIFS_UNIX_POSIX_ACL_CAP) - cFYI(1, "POSIX ACL cap"); + cifs_dbg(FYI, "POSIX ACL cap\n"); if (cap & CIFS_UNIX_LARGE_READ_CAP) - cFYI(1, "very large read cap"); + cifs_dbg(FYI, "very large read cap\n"); if (cap & CIFS_UNIX_LARGE_WRITE_CAP) - cFYI(1, "very large write cap"); + cifs_dbg(FYI, "very large write cap\n"); if (cap & CIFS_UNIX_TRANSPORT_ENCRYPTION_CAP) - cFYI(1, "transport encryption cap"); + cifs_dbg(FYI, "transport encryption cap\n"); if (cap & CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP) - cFYI(1, "mandatory transport encryption cap"); + cifs_dbg(FYI, "mandatory transport encryption cap\n"); #endif /* CIFS_DEBUG2 */ if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) { if (vol_info == NULL) { - cFYI(1, "resetting capabilities failed"); + cifs_dbg(FYI, "resetting capabilities failed\n"); } else - cERROR(1, "Negotiating Unix capabilities " - "with the server failed. Consider " - "mounting with the Unix Extensions " - "disabled if problems are found " - "by specifying the nounix mount " - "option."); + cifs_dbg(VFS, "Negotiating Unix capabilities with the server failed. Consider mounting with the Unix Extensions disabled if problems are found by specifying the nounix mount option.\n"); } } @@ -3223,8 +3167,8 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, cifs_sb->mnt_gid = pvolume_info->linux_gid; cifs_sb->mnt_file_mode = pvolume_info->file_mode; cifs_sb->mnt_dir_mode = pvolume_info->dir_mode; - cFYI(1, "file mode: 0x%hx dir mode: 0x%hx", - cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); + cifs_dbg(FYI, "file mode: 0x%hx dir mode: 0x%hx\n", + cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); cifs_sb->actimeo = pvolume_info->actimeo; cifs_sb->local_nls = pvolume_info->local_nls; @@ -3273,21 +3217,19 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, if (pvolume_info->strict_io) cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_STRICT_IO; if (pvolume_info->direct_io) { - cFYI(1, "mounting share using direct i/o"); + cifs_dbg(FYI, "mounting share using direct i/o\n"); cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; } if (pvolume_info->mfsymlinks) { if (pvolume_info->sfu_emul) { - cERROR(1, "mount option mfsymlinks ignored if sfu " - "mount option is used"); + cifs_dbg(VFS, "mount option mfsymlinks ignored if sfu mount option is used\n"); } else { cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MF_SYMLINKS; } } if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm)) - cERROR(1, "mount option dynperm ignored if cifsacl " - "mount option supported"); + cifs_dbg(VFS, "mount option dynperm ignored if cifsacl mount option supported\n"); } static void @@ -3339,7 +3281,7 @@ build_unc_path_to_root(const struct smb_vol *vol, *pos = '\0'; /* add trailing null */ convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb)); - cFYI(1, "%s: full_path=%s", __func__, full_path); + cifs_dbg(FYI, "%s: full_path=%s\n", __func__, full_path); return full_path; } @@ -3410,14 +3352,14 @@ cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data, return -EINVAL; if (volume_info->nullauth) { - cFYI(1, "Anonymous login"); + cifs_dbg(FYI, "Anonymous login\n"); kfree(volume_info->username); volume_info->username = NULL; } else if (volume_info->username) { /* BB fixme parse for domain name here */ - cFYI(1, "Username: %s", volume_info->username); + cifs_dbg(FYI, "Username: %s\n", volume_info->username); } else { - cifserror("No username specified"); + cifs_dbg(VFS, "No username specified\n"); /* In userspace mount helper we can get user name from alternate locations such as env variables and files on disk */ return -EINVAL; @@ -3430,7 +3372,7 @@ cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data, } else { volume_info->local_nls = load_nls(volume_info->iocharset); if (volume_info->local_nls == NULL) { - cERROR(1, "CIFS mount error: iocharset %s not found", + cifs_dbg(VFS, "CIFS mount error: iocharset %s not found\n", volume_info->iocharset); return -ELIBACC; } @@ -3780,13 +3722,13 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, if (length == 3) { if ((bcc_ptr[0] == 'I') && (bcc_ptr[1] == 'P') && (bcc_ptr[2] == 'C')) { - cFYI(1, "IPC connection"); + cifs_dbg(FYI, "IPC connection\n"); tcon->ipc = 1; } } else if (length == 2) { if ((bcc_ptr[0] == 'A') && (bcc_ptr[1] == ':')) { /* the most common case */ - cFYI(1, "disk share connection"); + cifs_dbg(FYI, "disk share connection\n"); } } bcc_ptr += length + 1; @@ -3799,7 +3741,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, bytes_left, is_unicode, nls_codepage); - cFYI(1, "nativeFileSystem=%s", tcon->nativeFileSystem); + cifs_dbg(FYI, "nativeFileSystem=%s\n", tcon->nativeFileSystem); if ((smb_buffer_response->WordCount == 3) || (smb_buffer_response->WordCount == 7)) @@ -3807,7 +3749,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, tcon->Flags = le16_to_cpu(pSMBr->OptionalSupport); else tcon->Flags = 0; - cFYI(1, "Tcon flags: 0x%x ", tcon->Flags); + cifs_dbg(FYI, "Tcon flags: 0x%x\n", tcon->Flags); } else if ((rc == 0) && tcon == NULL) { /* all we need to save for IPC$ connection */ ses->ipc_tid = smb_buffer_response->Tid; @@ -3885,16 +3827,16 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, if (linuxExtEnabled == 0) ses->capabilities &= (~server->vals->cap_unix); - cFYI(1, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", + cifs_dbg(FYI, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d\n", server->sec_mode, server->capabilities, server->timeAdj); if (server->ops->sess_setup) rc = server->ops->sess_setup(xid, ses, nls_info); if (rc) { - cERROR(1, "Send error in SessSetup = %d", rc); + cifs_dbg(VFS, "Send error in SessSetup = %d\n", rc); } else { - mutex_lock(&ses->server->srv_mutex); + mutex_lock(&server->srv_mutex); if (!server->session_estab) { server->session_key.response = ses->auth_key.response; server->session_key.len = ses->auth_key.len; @@ -3904,7 +3846,7 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, } mutex_unlock(&server->srv_mutex); - cFYI(1, "CIFS Session Established successfully"); + cifs_dbg(FYI, "CIFS Session Established successfully\n"); spin_lock(&GlobalMid_Lock); ses->status = CifsGood; ses->need_reconnect = false; diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 1cd01621744..5699b5036ed 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -102,7 +102,7 @@ cifs_bp_rename_retry: namelen += (1 + temp->d_name.len); temp = temp->d_parent; if (temp == NULL) { - cERROR(1, "corrupt dentry"); + cifs_dbg(VFS, "corrupt dentry\n"); rcu_read_unlock(); return NULL; } @@ -124,12 +124,12 @@ cifs_bp_rename_retry: full_path[namelen] = dirsep; strncpy(full_path + namelen + 1, temp->d_name.name, temp->d_name.len); - cFYI(0, "name: %s", full_path + namelen); + cifs_dbg(FYI, "name: %s\n", full_path + namelen); } spin_unlock(&temp->d_lock); temp = temp->d_parent; if (temp == NULL) { - cERROR(1, "corrupt dentry"); + cifs_dbg(VFS, "corrupt dentry\n"); rcu_read_unlock(); kfree(full_path); return NULL; @@ -137,8 +137,8 @@ cifs_bp_rename_retry: } rcu_read_unlock(); if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) { - cFYI(1, "did not end path lookup where expected. namelen=%d " - "dfsplen=%d", namelen, dfsplen); + cifs_dbg(FYI, "did not end path lookup where expected. namelen=%ddfsplen=%d\n", + namelen, dfsplen); /* presumably this is only possible if racing with a rename of one of the parent directories (we can not lock the dentries above us to prevent this, but retrying should be harmless) */ @@ -178,7 +178,7 @@ check_name(struct dentry *direntry) if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) { for (i = 0; i < direntry->d_name.len; i++) { if (direntry->d_name.name[i] == '\\') { - cFYI(1, "Invalid file name"); + cifs_dbg(FYI, "Invalid file name\n"); return -EINVAL; } } @@ -291,7 +291,7 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, else if ((oflags & O_CREAT) == O_CREAT) disposition = FILE_OPEN_IF; else - cFYI(1, "Create flag not set in create function"); + cifs_dbg(FYI, "Create flag not set in create function\n"); /* * BB add processing to set equivalent of mode - e.g. via CreateX with @@ -323,7 +323,7 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, desired_access, create_options, fid, oplock, buf, cifs_sb); if (rc) { - cFYI(1, "cifs_create returned 0x%x", rc); + cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc); goto out; } @@ -389,7 +389,8 @@ cifs_create_get_file_info: cifs_create_set_dentry: if (rc != 0) { - cFYI(1, "Create worked, get_inode_info failed rc = %d", rc); + cifs_dbg(FYI, "Create worked, get_inode_info failed rc = %d\n", + rc); if (server->ops->close) server->ops->close(xid, tcon, fid); goto out; @@ -452,12 +453,14 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, xid = get_xid(); - cFYI(1, "parent inode = 0x%p name is: %s and dentry = 0x%p", - inode, direntry->d_name.name, direntry); + cifs_dbg(FYI, "parent inode = 0x%p name is: %s and dentry = 0x%p\n", + inode, direntry->d_name.name, direntry); tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb)); - if (IS_ERR(tlink)) + if (IS_ERR(tlink)) { + rc = PTR_ERR(tlink); goto out_free_xid; + } tcon = tlink_tcon(tlink); server = tcon->ses->server; @@ -518,8 +521,8 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, __u32 oplock; int created = FILE_CREATED; - cFYI(1, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p", - inode, direntry->d_name.name, direntry); + cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p\n", + inode, direntry->d_name.name, direntry); tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb)); rc = PTR_ERR(tlink); @@ -613,7 +616,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, goto mknod_out; - cFYI(1, "sfu compat create special file"); + cifs_dbg(FYI, "sfu compat create special file\n"); buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); if (buf == NULL) { @@ -688,8 +691,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, xid = get_xid(); - cFYI(1, "parent inode = 0x%p name is: %s and dentry = 0x%p", - parent_dir_inode, direntry->d_name.name, direntry); + cifs_dbg(FYI, "parent inode = 0x%p name is: %s and dentry = 0x%p\n", + parent_dir_inode, direntry->d_name.name, direntry); /* check whether path exists */ @@ -715,11 +718,12 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, } if (direntry->d_inode != NULL) { - cFYI(1, "non-NULL inode in lookup"); + cifs_dbg(FYI, "non-NULL inode in lookup\n"); } else { - cFYI(1, "NULL inode in lookup"); + cifs_dbg(FYI, "NULL inode in lookup\n"); } - cFYI(1, "Full path: %s inode = 0x%p", full_path, direntry->d_inode); + cifs_dbg(FYI, "Full path: %s inode = 0x%p\n", + full_path, direntry->d_inode); if (pTcon->unix_ext) { rc = cifs_get_inode_info_unix(&newInode, full_path, @@ -742,7 +746,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, /* if it was once a directory (but how can we tell?) we could do shrink_dcache_parent(direntry); */ } else if (rc != -EACCES) { - cERROR(1, "Unexpected lookup error %d", rc); + cifs_dbg(VFS, "Unexpected lookup error %d\n", rc); /* We special case check for Access Denied - since that is a common return code */ } @@ -807,7 +811,7 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags) { int rc = 0; - cFYI(1, "In cifs d_delete, name = %s", direntry->d_name.name); + cifs_dbg(FYI, "In cifs d_delete, name = %s\n", direntry->d_name.name); return rc; } */ diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 1d2d91d9bf6..e7512e49761 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -55,7 +55,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) len = strlen(unc); if (len < 3) { - cFYI(1, "%s: unc is too short: %s", __func__, unc); + cifs_dbg(FYI, "%s: unc is too short: %s\n", __func__, unc); return -EINVAL; } @@ -68,8 +68,8 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) if (sep) len = sep - hostname; else - cFYI(1, "%s: probably server name is whole unc: %s", - __func__, unc); + cifs_dbg(FYI, "%s: probably server name is whole unc: %s\n", + __func__, unc); /* Try to interpret hostname as an IPv4 or IPv6 address */ rc = cifs_convert_address((struct sockaddr *)&ss, hostname, len); @@ -79,11 +79,11 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) /* Perform the upcall */ rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL); if (rc < 0) - cFYI(1, "%s: unable to resolve: %*.*s", - __func__, len, len, hostname); + cifs_dbg(FYI, "%s: unable to resolve: %*.*s\n", + __func__, len, len, hostname); else - cFYI(1, "%s: resolved: %*.*s to %s", - __func__, len, len, hostname, *ip_addr); + cifs_dbg(FYI, "%s: resolved: %*.*s to %s\n", + __func__, len, len, hostname, *ip_addr); return rc; name_is_IP_address: @@ -92,7 +92,8 @@ name_is_IP_address: return -ENOMEM; memcpy(name, hostname, len); name[len] = 0; - cFYI(1, "%s: unc is IP, skipping dns upcall: %s", __func__, name); + cifs_dbg(FYI, "%s: unc is IP, skipping dns upcall: %s\n", + __func__, name); *ip_addr = name; return 0; } diff --git a/fs/cifs/export.c b/fs/cifs/export.c index 9c7ecdccf2f..ce8b7f677c5 100644 --- a/fs/cifs/export.c +++ b/fs/cifs/export.c @@ -49,7 +49,7 @@ static struct dentry *cifs_get_parent(struct dentry *dentry) { /* BB need to add code here eventually to enable export via NFSD */ - cFYI(1, "get parent for %p", dentry); + cifs_dbg(FYI, "get parent for %p\n", dentry); return ERR_PTR(-EACCES); } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 2d4a231dd70..48b29d24c9f 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -78,9 +78,8 @@ static u32 cifs_posix_convert_flags(unsigned int flags) if (flags & O_EXCL) posix_flags |= SMB_O_EXCL; } else if (flags & O_EXCL) - cFYI(1, "Application %s pid %d has incorrectly set O_EXCL flag" - "but not O_CREAT on file open. Ignoring O_EXCL", - current->comm, current->tgid); + cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n", + current->comm, current->tgid); if (flags & O_TRUNC) posix_flags |= SMB_O_TRUNC; @@ -123,7 +122,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode, struct tcon_link *tlink; struct cifs_tcon *tcon; - cFYI(1, "posix open %s", full_path); + cifs_dbg(FYI, "posix open %s\n", full_path); presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); if (presp_data == NULL) @@ -308,7 +307,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, */ if (oplock == server->vals->oplock_read && cifs_has_mand_locks(cinode)) { - cFYI(1, "Reset oplock val from read to None due to mand locks"); + cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); oplock = 0; } @@ -374,8 +373,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) list_del(&cifs_file->tlist); if (list_empty(&cifsi->openFileList)) { - cFYI(1, "closing last open instance for inode %p", - cifs_file->dentry->d_inode); + cifs_dbg(FYI, "closing last open instance for inode %p\n", + cifs_file->dentry->d_inode); /* * In strict cache mode we need invalidate mapping on the last * close because it may cause a error when we open this file @@ -454,7 +453,7 @@ int cifs_open(struct inode *inode, struct file *file) goto out; } - cFYI(1, "inode = 0x%p file flags are 0x%x for %s", + cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n", inode, file->f_flags, full_path); if (server->oplocks) @@ -470,16 +469,13 @@ int cifs_open(struct inode *inode, struct file *file) cifs_sb->mnt_file_mode /* ignored */, file->f_flags, &oplock, &fid.netfid, xid); if (rc == 0) { - cFYI(1, "posix open succeeded"); + cifs_dbg(FYI, "posix open succeeded\n"); posix_open_ok = true; } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { if (tcon->ses->serverNOS) - cERROR(1, "server %s of type %s returned" - " unexpected error on SMB posix open" - ", disabling posix open support." - " Check if server update available.", - tcon->ses->serverName, - tcon->ses->serverNOS); + cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n", + tcon->ses->serverName, + tcon->ses->serverNOS); tcon->broken_posix_open = true; } else if ((rc != -EIO) && (rc != -EREMOTE) && (rc != -EOPNOTSUPP)) /* path not found or net err */ @@ -621,8 +617,8 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) return rc; } - cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags, - full_path); + cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n", + inode, cfile->f_flags, full_path); if (tcon->ses->server->oplocks) oplock = REQ_OPLOCK; @@ -643,7 +639,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) cifs_sb->mnt_file_mode /* ignored */, oflags, &oplock, &fid.netfid, xid); if (rc == 0) { - cFYI(1, "posix reopen succeeded"); + cifs_dbg(FYI, "posix reopen succeeded\n"); goto reopen_success; } /* @@ -672,8 +668,8 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) NULL, cifs_sb); if (rc) { mutex_unlock(&cfile->fh_mutex); - cFYI(1, "cifs_reopen returned 0x%x", rc); - cFYI(1, "oplock: %d", oplock); + cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc); + cifs_dbg(FYI, "oplock: %d\n", oplock); goto reopen_error_exit; } @@ -729,7 +725,7 @@ int cifs_closedir(struct inode *inode, struct file *file) struct TCP_Server_Info *server; char *buf; - cFYI(1, "Closedir inode = 0x%p", inode); + cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode); if (cfile == NULL) return rc; @@ -738,7 +734,7 @@ int cifs_closedir(struct inode *inode, struct file *file) tcon = tlink_tcon(cfile->tlink); server = tcon->ses->server; - cFYI(1, "Freeing private data in close dir"); + cifs_dbg(FYI, "Freeing private data in close dir\n"); spin_lock(&cifs_file_list_lock); if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { cfile->invalidHandle = true; @@ -747,7 +743,7 @@ int cifs_closedir(struct inode *inode, struct file *file) rc = server->ops->close_dir(xid, tcon, &cfile->fid); else rc = -ENOSYS; - cFYI(1, "Closing uncompleted readdir with rc %d", rc); + cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc); /* not much we can do if it fails anyway, ignore rc */ rc = 0; } else @@ -755,7 +751,7 @@ int cifs_closedir(struct inode *inode, struct file *file) buf = cfile->srch_inf.ntwrk_buf_start; if (buf) { - cFYI(1, "closedir free smb buf in srch struct"); + cifs_dbg(FYI, "closedir free smb buf in srch struct\n"); cfile->srch_inf.ntwrk_buf_start = NULL; if (cfile->srch_inf.smallBuf) cifs_small_buf_release(buf); @@ -1140,7 +1136,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) * The list ended. We don't have enough allocated * structures - something is really wrong. */ - cERROR(1, "Can't push all brlocks!"); + cifs_dbg(VFS, "Can't push all brlocks!\n"); break; } length = 1 + flock->fl_end - flock->fl_start; @@ -1213,47 +1209,46 @@ cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock, bool *wait_flag, struct TCP_Server_Info *server) { if (flock->fl_flags & FL_POSIX) - cFYI(1, "Posix"); + cifs_dbg(FYI, "Posix\n"); if (flock->fl_flags & FL_FLOCK) - cFYI(1, "Flock"); + cifs_dbg(FYI, "Flock\n"); if (flock->fl_flags & FL_SLEEP) { - cFYI(1, "Blocking lock"); + cifs_dbg(FYI, "Blocking lock\n"); *wait_flag = true; } if (flock->fl_flags & FL_ACCESS) - cFYI(1, "Process suspended by mandatory locking - " - "not implemented yet"); + cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n"); if (flock->fl_flags & FL_LEASE) - cFYI(1, "Lease on file - not implemented yet"); + cifs_dbg(FYI, "Lease on file - not implemented yet\n"); if (flock->fl_flags & (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE | FL_CLOSE))) - cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags); + cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags); *type = server->vals->large_lock_type; if (flock->fl_type == F_WRLCK) { - cFYI(1, "F_WRLCK "); + cifs_dbg(FYI, "F_WRLCK\n"); *type |= server->vals->exclusive_lock_type; *lock = 1; } else if (flock->fl_type == F_UNLCK) { - cFYI(1, "F_UNLCK"); + cifs_dbg(FYI, "F_UNLCK\n"); *type |= server->vals->unlock_lock_type; *unlock = 1; /* Check if unlock includes more than one lock range */ } else if (flock->fl_type == F_RDLCK) { - cFYI(1, "F_RDLCK"); + cifs_dbg(FYI, "F_RDLCK\n"); *type |= server->vals->shared_lock_type; *lock = 1; } else if (flock->fl_type == F_EXLCK) { - cFYI(1, "F_EXLCK"); + cifs_dbg(FYI, "F_EXLCK\n"); *type |= server->vals->exclusive_lock_type; *lock = 1; } else if (flock->fl_type == F_SHLCK) { - cFYI(1, "F_SHLCK"); + cifs_dbg(FYI, "F_SHLCK\n"); *type |= server->vals->shared_lock_type; *lock = 1; } else - cFYI(1, "Unknown type of lock"); + cifs_dbg(FYI, "Unknown type of lock\n"); } static int @@ -1296,8 +1291,8 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, type, 0, 1, false); flock->fl_type = F_UNLCK; if (rc != 0) - cERROR(1, "Error unlocking previously locked " - "range %d during test of lock", rc); + cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", + rc); return 0; } @@ -1316,8 +1311,8 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, type | server->vals->shared_lock_type, 0, 1, false); flock->fl_type = F_RDLCK; if (rc != 0) - cERROR(1, "Error unlocking previously locked " - "range %d during test of lock", rc); + cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", + rc); } else flock->fl_type = F_WRLCK; @@ -1508,8 +1503,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, if (!CIFS_I(inode)->clientCanCacheAll && CIFS_I(inode)->clientCanCacheRead) { cifs_invalidate_mapping(inode); - cFYI(1, "Set no oplock for inode=%p due to mand locks", - inode); + cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n", + inode); CIFS_I(inode)->clientCanCacheRead = false; } @@ -1546,9 +1541,9 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) rc = -EACCES; xid = get_xid(); - cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld " - "end: %lld", cmd, flock->fl_flags, flock->fl_type, - flock->fl_start, flock->fl_end); + cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n", + cmd, flock->fl_flags, flock->fl_type, + flock->fl_start, flock->fl_end); cfile = (struct cifsFileInfo *)file->private_data; tcon = tlink_tcon(cfile->tlink); @@ -1620,8 +1615,8 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, cifs_sb = CIFS_SB(dentry->d_sb); - cFYI(1, "write %zd bytes to offset %lld of %s", write_size, - *offset, dentry->d_name.name); + cifs_dbg(FYI, "write %zd bytes to offset %lld of %s\n", + write_size, *offset, dentry->d_name.name); tcon = tlink_tcon(open_file->tlink); server = tcon->ses->server; @@ -1736,7 +1731,7 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode, it being zero) during stress testcases so we need to check for it */ if (cifs_inode == NULL) { - cERROR(1, "Null inode passed to cifs_writeable_file"); + cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n"); dump_stack(); return NULL; } @@ -1848,7 +1843,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) else if (bytes_written < 0) rc = bytes_written; } else { - cFYI(1, "No writeable filehandles for inode"); + cifs_dbg(FYI, "No writeable filehandles for inode\n"); rc = -EIO; } @@ -2015,7 +2010,7 @@ retry: wdata->cfile = find_writable_file(CIFS_I(mapping->host), false); if (!wdata->cfile) { - cERROR(1, "No writable handles for inode"); + cifs_dbg(VFS, "No writable handles for inode\n"); rc = -EBADF; break; } @@ -2076,7 +2071,7 @@ cifs_writepage_locked(struct page *page, struct writeback_control *wbc) /* BB add check for wbc flags */ page_cache_get(page); if (!PageUptodate(page)) - cFYI(1, "ppw - page not up to date"); + cifs_dbg(FYI, "ppw - page not up to date\n"); /* * Set the "writeback" flag, and clear "dirty" in the radix tree. @@ -2127,7 +2122,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping, else pid = current->tgid; - cFYI(1, "write_end for page %p from pos %lld with %d bytes", + cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n", page, pos, copied); if (PageChecked(page)) { @@ -2191,13 +2186,13 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, xid = get_xid(); - cFYI(1, "Sync file - name: %s datasync: 0x%x", - file->f_path.dentry->d_name.name, datasync); + cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n", + file->f_path.dentry->d_name.name, datasync); if (!CIFS_I(inode)->clientCanCacheRead) { rc = cifs_invalidate_mapping(inode); if (rc) { - cFYI(1, "rc: %d during invalidate phase", rc); + cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc); rc = 0; /* don't care about it in fsync */ } } @@ -2233,8 +2228,8 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) xid = get_xid(); - cFYI(1, "Sync file - name: %s datasync: 0x%x", - file->f_path.dentry->d_name.name, datasync); + cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n", + file->f_path.dentry->d_name.name, datasync); tcon = tlink_tcon(smbfile->tlink); if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { @@ -2262,7 +2257,7 @@ int cifs_flush(struct file *file, fl_owner_t id) if (file->f_mode & FMODE_WRITE) rc = filemap_write_and_wait(inode->i_mapping); - cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc); + cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc); return rc; } @@ -2579,8 +2574,8 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, * an old data. */ cifs_invalidate_mapping(inode); - cFYI(1, "Set no oplock for inode=%p after a write operation", - inode); + cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n", + inode); cinode->clientCanCacheRead = false; } return written; @@ -2756,15 +2751,15 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server, /* enough data to fill the page */ iov.iov_base = kmap(page); iov.iov_len = PAGE_SIZE; - cFYI(1, "%u: iov_base=%p iov_len=%zu", - i, iov.iov_base, iov.iov_len); + cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n", + i, iov.iov_base, iov.iov_len); len -= PAGE_SIZE; } else if (len > 0) { /* enough for partial page, fill and zero the rest */ iov.iov_base = kmap(page); iov.iov_len = len; - cFYI(1, "%u: iov_base=%p iov_len=%zu", - i, iov.iov_base, iov.iov_len); + cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n", + i, iov.iov_base, iov.iov_len); memset(iov.iov_base + len, '\0', PAGE_SIZE - len); rdata->tailsz = len; len = 0; @@ -2824,7 +2819,7 @@ cifs_iovec_read(struct file *file, const struct iovec *iov, pid = current->tgid; if ((file->f_flags & O_ACCMODE) == O_WRONLY) - cFYI(1, "attempting read on write only file instance"); + cifs_dbg(FYI, "attempting read on write only file instance\n"); do { cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize); @@ -3003,7 +2998,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) pid = current->tgid; if ((file->f_flags & O_ACCMODE) == O_WRONLY) - cFYI(1, "attempting read on write only file instance"); + cifs_dbg(FYI, "attempting read on write only file instance\n"); for (total_read = 0, cur_offset = read_data; read_size > total_read; total_read += bytes_read, cur_offset += bytes_read) { @@ -3094,7 +3089,8 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) xid = get_xid(); rc = cifs_revalidate_file(file); if (rc) { - cFYI(1, "Validation prior to mmap failed, error=%d", rc); + cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n", + rc); free_xid(xid); return rc; } @@ -3147,7 +3143,7 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server, /* determine the eof that the server (probably) has */ eof = CIFS_I(rdata->mapping->host)->server_eof; eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0; - cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index); + cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index); rdata->tailsz = PAGE_CACHE_SIZE; for (i = 0; i < nr_pages; i++) { @@ -3157,15 +3153,15 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server, /* enough data to fill the page */ iov.iov_base = kmap(page); iov.iov_len = PAGE_CACHE_SIZE; - cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", - i, page->index, iov.iov_base, iov.iov_len); + cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n", + i, page->index, iov.iov_base, iov.iov_len); len -= PAGE_CACHE_SIZE; } else if (len > 0) { /* enough for partial page, fill and zero the rest */ iov.iov_base = kmap(page); iov.iov_len = len; - cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", - i, page->index, iov.iov_base, iov.iov_len); + cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n", + i, page->index, iov.iov_base, iov.iov_len); memset(iov.iov_base + len, '\0', PAGE_CACHE_SIZE - len); rdata->tailsz = len; @@ -3245,8 +3241,8 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, rc = 0; INIT_LIST_HEAD(&tmplist); - cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file, - mapping, num_pages); + cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", + __func__, file, mapping, num_pages); /* * Start with the page at end of list and move it to private @@ -3376,7 +3372,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page, if (rc < 0) goto io_error; else - cFYI(1, "Bytes read %d", rc); + cifs_dbg(FYI, "Bytes read %d\n", rc); file_inode(file)->i_atime = current_fs_time(file_inode(file)->i_sb); @@ -3414,7 +3410,7 @@ static int cifs_readpage(struct file *file, struct page *page) return rc; } - cFYI(1, "readpage %p at offset %d 0x%x", + cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n", page, (int)offset, (int)offset); rc = cifs_readpage_worker(file, page, &offset); @@ -3481,7 +3477,7 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping, struct page *page; int rc = 0; - cFYI(1, "write_begin from %lld len %d", (long long)pos, len); + cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len); page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { @@ -3570,7 +3566,7 @@ static int cifs_launder_page(struct page *page) .range_end = range_end, }; - cFYI(1, "Launder page: %p", page); + cifs_dbg(FYI, "Launder page: %p\n", page); if (clear_page_dirty_for_io(page)) rc = cifs_writepage_locked(page, &wbc); @@ -3590,8 +3586,8 @@ void cifs_oplock_break(struct work_struct *work) if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead && cifs_has_mand_locks(cinode)) { - cFYI(1, "Reset oplock to None for inode=%p due to mand locks", - inode); + cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n", + inode); cinode->clientCanCacheRead = false; } @@ -3606,12 +3602,12 @@ void cifs_oplock_break(struct work_struct *work) mapping_set_error(inode->i_mapping, rc); cifs_invalidate_mapping(inode); } - cFYI(1, "Oplock flush inode %p rc %d", inode, rc); + cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc); } rc = cifs_push_locks(cfile); if (rc) - cERROR(1, "Push locks rc = %d", rc); + cifs_dbg(VFS, "Push locks rc = %d\n", rc); /* * releasing stale oplock after recent reconnect of smb session using @@ -3622,7 +3618,7 @@ void cifs_oplock_break(struct work_struct *work) if (!cfile->oplock_break_cancelled) { rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid, cinode); - cFYI(1, "Oplock release rc = %d", rc); + cifs_dbg(FYI, "Oplock release rc = %d\n", rc); } } diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index 42e5363b410..2f4bc5a5805 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -28,14 +28,14 @@ void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) server->fscache = fscache_acquire_cookie(cifs_fscache_netfs.primary_index, &cifs_fscache_server_index_def, server); - cFYI(1, "%s: (0x%p/0x%p)", __func__, server, - server->fscache); + cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", + __func__, server, server->fscache); } void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) { - cFYI(1, "%s: (0x%p/0x%p)", __func__, server, - server->fscache); + cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", + __func__, server, server->fscache); fscache_relinquish_cookie(server->fscache, 0); server->fscache = NULL; } @@ -47,13 +47,13 @@ void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) tcon->fscache = fscache_acquire_cookie(server->fscache, &cifs_fscache_super_index_def, tcon); - cFYI(1, "%s: (0x%p/0x%p)", __func__, server->fscache, - tcon->fscache); + cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", + __func__, server->fscache, tcon->fscache); } void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) { - cFYI(1, "%s: (0x%p)", __func__, tcon->fscache); + cifs_dbg(FYI, "%s: (0x%p)\n", __func__, tcon->fscache); fscache_relinquish_cookie(tcon->fscache, 0); tcon->fscache = NULL; } @@ -70,8 +70,8 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode) if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) { cifsi->fscache = fscache_acquire_cookie(tcon->fscache, &cifs_fscache_inode_object_def, cifsi); - cFYI(1, "%s: got FH cookie (0x%p/0x%p)", __func__, - tcon->fscache, cifsi->fscache); + cifs_dbg(FYI, "%s: got FH cookie (0x%p/0x%p)\n", + __func__, tcon->fscache, cifsi->fscache); } } @@ -80,7 +80,7 @@ void cifs_fscache_release_inode_cookie(struct inode *inode) struct cifsInodeInfo *cifsi = CIFS_I(inode); if (cifsi->fscache) { - cFYI(1, "%s: (0x%p)", __func__, cifsi->fscache); + cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache); fscache_relinquish_cookie(cifsi->fscache, 0); cifsi->fscache = NULL; } @@ -91,7 +91,7 @@ static void cifs_fscache_disable_inode_cookie(struct inode *inode) struct cifsInodeInfo *cifsi = CIFS_I(inode); if (cifsi->fscache) { - cFYI(1, "%s: (0x%p)", __func__, cifsi->fscache); + cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache); fscache_uncache_all_inode_pages(cifsi->fscache, inode); fscache_relinquish_cookie(cifsi->fscache, 1); cifsi->fscache = NULL; @@ -120,8 +120,8 @@ void cifs_fscache_reset_inode_cookie(struct inode *inode) cifs_sb_master_tcon(cifs_sb)->fscache, &cifs_fscache_inode_object_def, cifsi); - cFYI(1, "%s: new cookie 0x%p oldcookie 0x%p", - __func__, cifsi->fscache, old); + cifs_dbg(FYI, "%s: new cookie 0x%p oldcookie 0x%p\n", + __func__, cifsi->fscache, old); } } @@ -131,8 +131,8 @@ int cifs_fscache_release_page(struct page *page, gfp_t gfp) struct inode *inode = page->mapping->host; struct cifsInodeInfo *cifsi = CIFS_I(inode); - cFYI(1, "%s: (0x%p/0x%p)", __func__, page, - cifsi->fscache); + cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", + __func__, page, cifsi->fscache); if (!fscache_maybe_release_page(cifsi->fscache, page, gfp)) return 0; } @@ -143,7 +143,7 @@ int cifs_fscache_release_page(struct page *page, gfp_t gfp) static void cifs_readpage_from_fscache_complete(struct page *page, void *ctx, int error) { - cFYI(1, "%s: (0x%p/%d)", __func__, page, error); + cifs_dbg(FYI, "%s: (0x%p/%d)\n", __func__, page, error); if (!error) SetPageUptodate(page); unlock_page(page); @@ -156,8 +156,8 @@ int __cifs_readpage_from_fscache(struct inode *inode, struct page *page) { int ret; - cFYI(1, "%s: (fsc:%p, p:%p, i:0x%p", __func__, - CIFS_I(inode)->fscache, page, inode); + cifs_dbg(FYI, "%s: (fsc:%p, p:%p, i:0x%p\n", + __func__, CIFS_I(inode)->fscache, page, inode); ret = fscache_read_or_alloc_page(CIFS_I(inode)->fscache, page, cifs_readpage_from_fscache_complete, NULL, @@ -165,15 +165,15 @@ int __cifs_readpage_from_fscache(struct inode *inode, struct page *page) switch (ret) { case 0: /* page found in fscache, read submitted */ - cFYI(1, "%s: submitted", __func__); + cifs_dbg(FYI, "%s: submitted\n", __func__); return ret; case -ENOBUFS: /* page won't be cached */ case -ENODATA: /* page not in cache */ - cFYI(1, "%s: %d", __func__, ret); + cifs_dbg(FYI, "%s: %d\n", __func__, ret); return 1; default: - cERROR(1, "unknown error ret = %d", ret); + cifs_dbg(VFS, "unknown error ret = %d\n", ret); } return ret; } @@ -188,8 +188,8 @@ int __cifs_readpages_from_fscache(struct inode *inode, { int ret; - cFYI(1, "%s: (0x%p/%u/0x%p)", __func__, - CIFS_I(inode)->fscache, *nr_pages, inode); + cifs_dbg(FYI, "%s: (0x%p/%u/0x%p)\n", + __func__, CIFS_I(inode)->fscache, *nr_pages, inode); ret = fscache_read_or_alloc_pages(CIFS_I(inode)->fscache, mapping, pages, nr_pages, cifs_readpage_from_fscache_complete, @@ -197,16 +197,16 @@ int __cifs_readpages_from_fscache(struct inode *inode, mapping_gfp_mask(mapping)); switch (ret) { case 0: /* read submitted to the cache for all pages */ - cFYI(1, "%s: submitted", __func__); + cifs_dbg(FYI, "%s: submitted\n", __func__); return ret; case -ENOBUFS: /* some pages are not cached and can't be */ case -ENODATA: /* some pages are not cached */ - cFYI(1, "%s: no page", __func__); + cifs_dbg(FYI, "%s: no page\n", __func__); return 1; default: - cFYI(1, "unknown error ret = %d", ret); + cifs_dbg(FYI, "unknown error ret = %d\n", ret); } return ret; @@ -216,8 +216,8 @@ void __cifs_readpage_to_fscache(struct inode *inode, struct page *page) { int ret; - cFYI(1, "%s: (fsc: %p, p: %p, i: %p)", __func__, - CIFS_I(inode)->fscache, page, inode); + cifs_dbg(FYI, "%s: (fsc: %p, p: %p, i: %p)\n", + __func__, CIFS_I(inode)->fscache, page, inode); ret = fscache_write_page(CIFS_I(inode)->fscache, page, GFP_KERNEL); if (ret != 0) fscache_uncache_page(CIFS_I(inode)->fscache, page); @@ -228,7 +228,7 @@ void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode) struct cifsInodeInfo *cifsi = CIFS_I(inode); struct fscache_cookie *cookie = cifsi->fscache; - cFYI(1, "%s: (0x%p/0x%p)", __func__, page, cookie); + cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie); fscache_wait_on_page_write(cookie, page); fscache_uncache_page(cookie, page); } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 20887bf6312..fc3025199cb 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -91,30 +91,32 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr) { struct cifsInodeInfo *cifs_i = CIFS_I(inode); - cFYI(1, "%s: revalidating inode %llu", __func__, cifs_i->uniqueid); + cifs_dbg(FYI, "%s: revalidating inode %llu\n", + __func__, cifs_i->uniqueid); if (inode->i_state & I_NEW) { - cFYI(1, "%s: inode %llu is new", __func__, cifs_i->uniqueid); + cifs_dbg(FYI, "%s: inode %llu is new\n", + __func__, cifs_i->uniqueid); return; } /* don't bother with revalidation if we have an oplock */ if (cifs_i->clientCanCacheRead) { - cFYI(1, "%s: inode %llu is oplocked", __func__, - cifs_i->uniqueid); + cifs_dbg(FYI, "%s: inode %llu is oplocked\n", + __func__, cifs_i->uniqueid); return; } /* revalidate if mtime or size have changed */ if (timespec_equal(&inode->i_mtime, &fattr->cf_mtime) && cifs_i->server_eof == fattr->cf_eof) { - cFYI(1, "%s: inode %llu is unchanged", __func__, - cifs_i->uniqueid); + cifs_dbg(FYI, "%s: inode %llu is unchanged\n", + __func__, cifs_i->uniqueid); return; } - cFYI(1, "%s: invalidating inode %llu mapping", __func__, - cifs_i->uniqueid); + cifs_dbg(FYI, "%s: invalidating inode %llu mapping\n", + __func__, cifs_i->uniqueid); cifs_i->invalid_mapping = true; } @@ -240,7 +242,7 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, /* safest to call it a file if we do not know */ fattr->cf_mode |= S_IFREG; fattr->cf_dtype = DT_REG; - cFYI(1, "unknown type %d", le32_to_cpu(info->Type)); + cifs_dbg(FYI, "unknown type %d\n", le32_to_cpu(info->Type)); break; } @@ -279,7 +281,7 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb) { struct cifs_sb_info *cifs_sb = CIFS_SB(sb); - cFYI(1, "creating fake fattr for DFS referral"); + cifs_dbg(FYI, "creating fake fattr for DFS referral\n"); memset(fattr, 0, sizeof(*fattr)); fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU; @@ -329,7 +331,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, struct tcon_link *tlink; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); - cFYI(1, "Getting info on %s", full_path); + cifs_dbg(FYI, "Getting info on %s\n", full_path); tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) @@ -355,7 +357,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { int tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid); if (tmprc) - cFYI(1, "CIFSCheckMFSymlink: %d", tmprc); + cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc); } if (*pinode == NULL) { @@ -422,7 +424,7 @@ cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path, &buf_type); if ((rc == 0) && (bytes_read >= 8)) { if (memcmp("IntxBLK", pbuf, 8) == 0) { - cFYI(1, "Block device"); + cifs_dbg(FYI, "Block device\n"); fattr->cf_mode |= S_IFBLK; fattr->cf_dtype = DT_BLK; if (bytes_read == 24) { @@ -434,7 +436,7 @@ cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path, fattr->cf_rdev = MKDEV(mjr, mnr); } } else if (memcmp("IntxCHR", pbuf, 8) == 0) { - cFYI(1, "Char device"); + cifs_dbg(FYI, "Char device\n"); fattr->cf_mode |= S_IFCHR; fattr->cf_dtype = DT_CHR; if (bytes_read == 24) { @@ -446,7 +448,7 @@ cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path, fattr->cf_rdev = MKDEV(mjr, mnr); } } else if (memcmp("IntxLNK", pbuf, 7) == 0) { - cFYI(1, "Symlink"); + cifs_dbg(FYI, "Symlink\n"); fattr->cf_mode |= S_IFLNK; fattr->cf_dtype = DT_LNK; } else { @@ -497,10 +499,10 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, else if (rc > 3) { mode = le32_to_cpu(*((__le32 *)ea_value)); fattr->cf_mode &= ~SFBITS_MASK; - cFYI(1, "special bits 0%o org mode 0%o", mode, - fattr->cf_mode); + cifs_dbg(FYI, "special bits 0%o org mode 0%o\n", + mode, fattr->cf_mode); fattr->cf_mode = (mode & SFBITS_MASK) | fattr->cf_mode; - cFYI(1, "special mode bits 0%o", mode); + cifs_dbg(FYI, "special mode bits 0%o\n", mode); } return 0; @@ -635,11 +637,11 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, tcon = tlink_tcon(tlink); server = tcon->ses->server; - cFYI(1, "Getting info on %s", full_path); + cifs_dbg(FYI, "Getting info on %s\n", full_path); if ((data == NULL) && (*inode != NULL)) { if (CIFS_I(*inode)->clientCanCacheRead) { - cFYI(1, "No need to revalidate cached inode sizes"); + cifs_dbg(FYI, "No need to revalidate cached inode sizes\n"); goto cgii_exit; } } @@ -714,7 +716,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, tcon, cifs_sb, full_path, &fattr.cf_uniqueid, data); if (tmprc) { - cFYI(1, "GetSrvInodeNum rc %d", tmprc); + cifs_dbg(FYI, "GetSrvInodeNum rc %d\n", + tmprc); fattr.cf_uniqueid = iunique(sb, ROOT_I); cifs_autodisable_serverino(cifs_sb); } @@ -729,7 +732,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { tmprc = cifs_sfu_type(&fattr, full_path, cifs_sb, xid); if (tmprc) - cFYI(1, "cifs_sfu_type failed: %d", tmprc); + cifs_dbg(FYI, "cifs_sfu_type failed: %d\n", tmprc); } #ifdef CONFIG_CIFS_ACL @@ -737,8 +740,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { rc = cifs_acl_to_fattr(cifs_sb, &fattr, *inode, full_path, fid); if (rc) { - cFYI(1, "%s: Getting ACL failed with error: %d", - __func__, rc); + cifs_dbg(FYI, "%s: Getting ACL failed with error: %d\n", + __func__, rc); goto cgii_exit; } } @@ -752,7 +755,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid); if (tmprc) - cFYI(1, "CIFSCheckMFSymlink: %d", tmprc); + cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc); } if (!*inode) { @@ -836,7 +839,7 @@ cifs_iget(struct super_block *sb, struct cifs_fattr *fattr) struct inode *inode; retry_iget5_locked: - cFYI(1, "looking for uniqueid=%llu", fattr->cf_uniqueid); + cifs_dbg(FYI, "looking for uniqueid=%llu\n", fattr->cf_uniqueid); /* hash down to 32-bits on 32-bit arch */ hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); @@ -899,7 +902,7 @@ struct inode *cifs_root_iget(struct super_block *sb) #endif if (rc && tcon->ipc) { - cFYI(1, "ipc connection - fake read inode"); + cifs_dbg(FYI, "ipc connection - fake read inode\n"); spin_lock(&inode->i_lock); inode->i_mode |= S_IFDIR; set_nlink(inode, 2); @@ -958,7 +961,7 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid, * server times. */ if (set_time && (attrs->ia_valid & ATTR_CTIME)) { - cFYI(1, "CIFS - CTIME changed"); + cifs_dbg(FYI, "CIFS - CTIME changed\n"); info_buf.ChangeTime = cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime)); } else @@ -1127,7 +1130,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) struct iattr *attrs = NULL; __u32 dosattr = 0, origattr = 0; - cFYI(1, "cifs_unlink, dir=0x%p, dentry=0x%p", dir, dentry); + cifs_dbg(FYI, "cifs_unlink, dir=0x%p, dentry=0x%p\n", dir, dentry); tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) @@ -1150,7 +1153,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) rc = CIFSPOSIXDelFile(xid, tcon, full_path, SMB_POSIX_UNLINK_FILE_TARGET, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - cFYI(1, "posix del rc %d", rc); + cifs_dbg(FYI, "posix del rc %d\n", rc); if ((rc == 0) || (rc == -ENOENT)) goto psx_del_no_retry; } @@ -1320,7 +1323,7 @@ cifs_posix_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode, if (rc == -EOPNOTSUPP) goto posix_mkdir_out; else if (rc) { - cFYI(1, "posix mkdir returned 0x%x", rc); + cifs_dbg(FYI, "posix mkdir returned 0x%x\n", rc); d_drop(dentry); goto posix_mkdir_out; } @@ -1342,11 +1345,12 @@ cifs_posix_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode, d_instantiate(dentry, newinode); #ifdef CONFIG_CIFS_DEBUG2 - cFYI(1, "instantiated dentry %p %s to inode %p", dentry, - dentry->d_name.name, newinode); + cifs_dbg(FYI, "instantiated dentry %p %s to inode %p\n", + dentry, dentry->d_name.name, newinode); if (newinode->i_nlink != 2) - cFYI(1, "unexpected number of links %d", newinode->i_nlink); + cifs_dbg(FYI, "unexpected number of links %d\n", + newinode->i_nlink); #endif posix_mkdir_out: @@ -1368,7 +1372,8 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) struct TCP_Server_Info *server; char *full_path; - cFYI(1, "In cifs_mkdir, mode = 0x%hx inode = 0x%p", mode, inode); + cifs_dbg(FYI, "In cifs_mkdir, mode = 0x%hx inode = 0x%p\n", + mode, inode); cifs_sb = CIFS_SB(inode->i_sb); tlink = cifs_sb_tlink(cifs_sb); @@ -1402,7 +1407,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) /* BB add setting the equivalent of mode via CreateX w/ACLs */ rc = server->ops->mkdir(xid, tcon, full_path, cifs_sb); if (rc) { - cFYI(1, "cifs_mkdir returned 0x%x", rc); + cifs_dbg(FYI, "cifs_mkdir returned 0x%x\n", rc); d_drop(direntry); goto mkdir_out; } @@ -1432,7 +1437,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) char *full_path = NULL; struct cifsInodeInfo *cifsInode; - cFYI(1, "cifs_rmdir, inode = 0x%p", inode); + cifs_dbg(FYI, "cifs_rmdir, inode = 0x%p\n", inode); xid = get_xid(); @@ -1681,8 +1686,8 @@ cifs_invalidate_mapping(struct inode *inode) if (inode->i_mapping && inode->i_mapping->nrpages != 0) { rc = invalidate_inode_pages2(inode->i_mapping); if (rc) { - cERROR(1, "%s: could not invalidate inode %p", __func__, - inode); + cifs_dbg(VFS, "%s: could not invalidate inode %p\n", + __func__, inode); cifs_i->invalid_mapping = true; } } @@ -1732,8 +1737,8 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry) goto out; } - cFYI(1, "Update attributes: %s inode 0x%p count %d dentry: 0x%p d_time " - "%ld jiffies %ld", full_path, inode, inode->i_count.counter, + cifs_dbg(FYI, "Update attributes: %s inode 0x%p count %d dentry: 0x%p d_time %ld jiffies %ld\n", + full_path, inode, inode->i_count.counter, dentry, dentry->d_time, jiffies); if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext) @@ -1883,7 +1888,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, else rc = -ENOSYS; cifsFileInfo_put(open_file); - cFYI(1, "SetFSize for attrs rc = %d", rc); + cifs_dbg(FYI, "SetFSize for attrs rc = %d\n", rc); if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { unsigned int bytes_written; @@ -1894,7 +1899,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, io_parms.length = attrs->ia_size; rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, NULL, NULL, 1); - cFYI(1, "Wrt seteof rc %d", rc); + cifs_dbg(FYI, "Wrt seteof rc %d\n", rc); } } else rc = -EINVAL; @@ -1920,7 +1925,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, attrs->ia_size, cifs_sb, false); else rc = -ENOSYS; - cFYI(1, "SetEOF by path (setattrs) rc = %d", rc); + cifs_dbg(FYI, "SetEOF by path (setattrs) rc = %d\n", rc); if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { __u16 netfid; int oplock = 0; @@ -1940,7 +1945,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, io_parms.length = attrs->ia_size; rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, NULL, NULL, 1); - cFYI(1, "wrt seteof rc %d", rc); + cifs_dbg(FYI, "wrt seteof rc %d\n", rc); CIFSSMBClose(xid, tcon, netfid); } } @@ -1971,7 +1976,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) struct cifs_unix_set_info_args *args = NULL; struct cifsFileInfo *open_file; - cFYI(1, "setattr_unix on file %s attrs->ia_valid=0x%x", + cifs_dbg(FYI, "setattr_unix on file %s attrs->ia_valid=0x%x\n", direntry->d_name.name, attrs->ia_valid); xid = get_xid(); @@ -2114,7 +2119,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) xid = get_xid(); - cFYI(1, "setattr on file %s attrs->iavalid 0x%x", + cifs_dbg(FYI, "setattr on file %s attrs->iavalid 0x%x\n", direntry->d_name.name, attrs->ia_valid); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) @@ -2166,8 +2171,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) rc = id_mode_to_cifs_acl(inode, full_path, NO_CHANGE_64, uid, gid); if (rc) { - cFYI(1, "%s: Setting id failed with error: %d", - __func__, rc); + cifs_dbg(FYI, "%s: Setting id failed with error: %d\n", + __func__, rc); goto cifs_setattr_exit; } } @@ -2188,8 +2193,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) rc = id_mode_to_cifs_acl(inode, full_path, mode, INVALID_UID, INVALID_GID); if (rc) { - cFYI(1, "%s: Setting ACL failed with error: %d", - __func__, rc); + cifs_dbg(FYI, "%s: Setting ACL failed with error: %d\n", + __func__, rc); goto cifs_setattr_exit; } } else @@ -2277,7 +2282,7 @@ cifs_setattr(struct dentry *direntry, struct iattr *attrs) #if 0 void cifs_delete_inode(struct inode *inode) { - cFYI(1, "In cifs_delete_inode, inode = 0x%p", inode); + cifs_dbg(FYI, "In cifs_delete_inode, inode = 0x%p\n", inode); /* may have to add back in if and when safe distributed caching of directories added e.g. via FindNotify */ } diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 6c9f1214cf0..3e084558585 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -44,7 +44,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) xid = get_xid(); - cFYI(1, "ioctl file %p cmd %u arg %lu", filep, command, arg); + cifs_dbg(FYI, "ioctl file %p cmd %u arg %lu\n", filep, command, arg); cifs_sb = CIFS_SB(inode->i_sb); @@ -83,11 +83,11 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) * &ExtAttrMask); */ } - cFYI(1, "set flags not implemented yet"); + cifs_dbg(FYI, "set flags not implemented yet\n"); break; #endif /* CONFIG_CIFS_POSIX */ default: - cFYI(1, "unsupported ioctl"); + cifs_dbg(FYI, "unsupported ioctl\n"); break; } diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 9f6c4c45d21..b83c3f5646b 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -56,14 +56,14 @@ symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash) md5 = crypto_alloc_shash("md5", 0, 0); if (IS_ERR(md5)) { rc = PTR_ERR(md5); - cERROR(1, "%s: Crypto md5 allocation error %d", __func__, rc); + cifs_dbg(VFS, "%s: Crypto md5 allocation error %d\n", + __func__, rc); return rc; } size = sizeof(struct shash_desc) + crypto_shash_descsize(md5); sdescmd5 = kmalloc(size, GFP_KERNEL); if (!sdescmd5) { rc = -ENOMEM; - cERROR(1, "%s: Memory allocation failure", __func__); goto symlink_hash_err; } sdescmd5->shash.tfm = md5; @@ -71,17 +71,17 @@ symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash) rc = crypto_shash_init(&sdescmd5->shash); if (rc) { - cERROR(1, "%s: Could not init md5 shash", __func__); + cifs_dbg(VFS, "%s: Could not init md5 shash\n", __func__); goto symlink_hash_err; } rc = crypto_shash_update(&sdescmd5->shash, link_str, link_len); if (rc) { - cERROR(1, "%s: Could not update with link_str", __func__); + cifs_dbg(VFS, "%s: Could not update with link_str\n", __func__); goto symlink_hash_err; } rc = crypto_shash_final(&sdescmd5->shash, md5_hash); if (rc) - cERROR(1, "%s: Could not generate md5 hash", __func__); + cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__); symlink_hash_err: crypto_free_shash(md5); @@ -115,7 +115,7 @@ CIFSParseMFSymlink(const u8 *buf, rc = symlink_hash(link_len, link_str, md5_hash); if (rc) { - cFYI(1, "%s: MD5 hash failure: %d", __func__, rc); + cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc); return rc; } @@ -154,7 +154,7 @@ CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str) rc = symlink_hash(link_len, link_str, md5_hash); if (rc) { - cFYI(1, "%s: MD5 hash failure: %d", __func__, rc); + cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc); return rc; } @@ -521,7 +521,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) if (!full_path) goto out; - cFYI(1, "Full path: %s inode = 0x%p", full_path, inode); + cifs_dbg(FYI, "Full path: %s inode = 0x%p\n", full_path, inode); rc = -EACCES; /* @@ -578,8 +578,8 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) goto symlink_exit; } - cFYI(1, "Full path: %s", full_path); - cFYI(1, "symname is %s", symname); + cifs_dbg(FYI, "Full path: %s\n", full_path); + cifs_dbg(FYI, "symname is %s\n", symname); /* BB what if DFS and this volume is on different share? BB */ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) @@ -601,8 +601,8 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) inode->i_sb, xid, NULL); if (rc != 0) { - cFYI(1, "Create symlink ok, getinodeinfo fail rc = %d", - rc); + cifs_dbg(FYI, "Create symlink ok, getinodeinfo fail rc = %d\n", + rc); } else { d_instantiate(direntry, newinode); } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 1b15bf839f3..1bec014779f 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -54,7 +54,7 @@ _get_xid(void) if (GlobalTotalActiveXid > GlobalMaxActiveXid) GlobalMaxActiveXid = GlobalTotalActiveXid; if (GlobalTotalActiveXid > 65000) - cFYI(1, "warning: more than 65000 requests active"); + cifs_dbg(FYI, "warning: more than 65000 requests active\n"); xid = GlobalCurrentXid++; spin_unlock(&GlobalMid_Lock); return xid; @@ -91,7 +91,7 @@ void sesInfoFree(struct cifs_ses *buf_to_free) { if (buf_to_free == NULL) { - cFYI(1, "Null buffer passed to sesInfoFree"); + cifs_dbg(FYI, "Null buffer passed to sesInfoFree\n"); return; } @@ -130,7 +130,7 @@ void tconInfoFree(struct cifs_tcon *buf_to_free) { if (buf_to_free == NULL) { - cFYI(1, "Null buffer passed to tconInfoFree"); + cifs_dbg(FYI, "Null buffer passed to tconInfoFree\n"); return; } atomic_dec(&tconInfoAllocCount); @@ -180,7 +180,7 @@ void cifs_buf_release(void *buf_to_free) { if (buf_to_free == NULL) { - /* cFYI(1, "Null buffer passed to cifs_buf_release");*/ + /* cifs_dbg(FYI, "Null buffer passed to cifs_buf_release\n");*/ return; } mempool_free(buf_to_free, cifs_req_poolp); @@ -216,7 +216,7 @@ cifs_small_buf_release(void *buf_to_free) { if (buf_to_free == NULL) { - cFYI(1, "Null buffer passed to cifs_small_buf_release"); + cifs_dbg(FYI, "Null buffer passed to cifs_small_buf_release\n"); return; } mempool_free(buf_to_free, cifs_sm_req_poolp); @@ -282,15 +282,15 @@ check_smb_hdr(struct smb_hdr *smb, __u16 mid) { /* does it have the right SMB "signature" ? */ if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff)) { - cERROR(1, "Bad protocol string signature header 0x%x", - *(unsigned int *)smb->Protocol); + cifs_dbg(VFS, "Bad protocol string signature header 0x%x\n", + *(unsigned int *)smb->Protocol); return 1; } /* Make sure that message ids match */ if (mid != smb->Mid) { - cERROR(1, "Mids do not match. received=%u expected=%u", - smb->Mid, mid); + cifs_dbg(VFS, "Mids do not match. received=%u expected=%u\n", + smb->Mid, mid); return 1; } @@ -302,7 +302,7 @@ check_smb_hdr(struct smb_hdr *smb, __u16 mid) if (smb->Command == SMB_COM_LOCKING_ANDX) return 0; - cERROR(1, "Server sent request, not response. mid=%u", smb->Mid); + cifs_dbg(VFS, "Server sent request, not response. mid=%u\n", smb->Mid); return 1; } @@ -313,8 +313,8 @@ checkSMB(char *buf, unsigned int total_read) __u16 mid = smb->Mid; __u32 rfclen = be32_to_cpu(smb->smb_buf_length); __u32 clc_len; /* calculated length */ - cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x", - total_read, rfclen); + cifs_dbg(FYI, "checkSMB Length: 0x%x, smb_buf_length: 0x%x\n", + total_read, rfclen); /* is this frame too small to even get to a BCC? */ if (total_read < 2 + sizeof(struct smb_hdr)) { @@ -340,9 +340,9 @@ checkSMB(char *buf, unsigned int total_read) tmp[sizeof(struct smb_hdr)+1] = 0; return 0; } - cERROR(1, "rcvd invalid byte count (bcc)"); + cifs_dbg(VFS, "rcvd invalid byte count (bcc)\n"); } else { - cERROR(1, "Length less than smb header size"); + cifs_dbg(VFS, "Length less than smb header size\n"); } return -EIO; } @@ -353,8 +353,8 @@ checkSMB(char *buf, unsigned int total_read) clc_len = smbCalcSize(smb); if (4 + rfclen != total_read) { - cERROR(1, "Length read does not match RFC1001 length %d", - rfclen); + cifs_dbg(VFS, "Length read does not match RFC1001 length %d\n", + rfclen); return -EIO; } @@ -365,12 +365,12 @@ checkSMB(char *buf, unsigned int total_read) if (((4 + rfclen) & 0xFFFF) == (clc_len & 0xFFFF)) return 0; /* bcc wrapped */ } - cFYI(1, "Calculated size %u vs length %u mismatch for mid=%u", - clc_len, 4 + rfclen, smb->Mid); + cifs_dbg(FYI, "Calculated size %u vs length %u mismatch for mid=%u\n", + clc_len, 4 + rfclen, smb->Mid); if (4 + rfclen < clc_len) { - cERROR(1, "RFC1001 size %u smaller than SMB for mid=%u", - rfclen, smb->Mid); + cifs_dbg(VFS, "RFC1001 size %u smaller than SMB for mid=%u\n", + rfclen, smb->Mid); return -EIO; } else if (rfclen > clc_len + 512) { /* @@ -382,8 +382,8 @@ checkSMB(char *buf, unsigned int total_read) * trailing data, we choose limit the amount of extra * data to 512 bytes. */ - cERROR(1, "RFC1001 size %u more than 512 bytes larger " - "than SMB for mid=%u", rfclen, smb->Mid); + cifs_dbg(VFS, "RFC1001 size %u more than 512 bytes larger than SMB for mid=%u\n", + rfclen, smb->Mid); return -EIO; } } @@ -401,7 +401,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) struct cifsInodeInfo *pCifsInode; struct cifsFileInfo *netfile; - cFYI(1, "Checking for oplock break or dnotify response"); + cifs_dbg(FYI, "Checking for oplock break or dnotify response\n"); if ((pSMB->hdr.Command == SMB_COM_NT_TRANSACT) && (pSMB->hdr.Flags & SMBFLG_RESPONSE)) { struct smb_com_transaction_change_notify_rsp *pSMBr = @@ -413,15 +413,15 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) pnotify = (struct file_notify_information *) ((char *)&pSMBr->hdr.Protocol + data_offset); - cFYI(1, "dnotify on %s Action: 0x%x", + cifs_dbg(FYI, "dnotify on %s Action: 0x%x\n", pnotify->FileName, pnotify->Action); /* cifs_dump_mem("Rcvd notify Data: ",buf, sizeof(struct smb_hdr)+60); */ return true; } if (pSMBr->hdr.Status.CifsError) { - cFYI(1, "notify err 0x%d", - pSMBr->hdr.Status.CifsError); + cifs_dbg(FYI, "notify err 0x%d\n", + pSMBr->hdr.Status.CifsError); return true; } return false; @@ -435,7 +435,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) large dirty files cached on the client */ if ((NT_STATUS_INVALID_HANDLE) == le32_to_cpu(pSMB->hdr.Status.CifsError)) { - cFYI(1, "invalid handle on oplock break"); + cifs_dbg(FYI, "invalid handle on oplock break\n"); return true; } else if (ERRbadfid == le16_to_cpu(pSMB->hdr.Status.DosError.Error)) { @@ -447,7 +447,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) if (pSMB->hdr.WordCount != 8) return false; - cFYI(1, "oplock type 0x%d level 0x%d", + cifs_dbg(FYI, "oplock type 0x%d level 0x%d\n", pSMB->LockType, pSMB->OplockLevel); if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE)) return false; @@ -469,7 +469,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) if (pSMB->Fid != netfile->fid.netfid) continue; - cFYI(1, "file id match, oplock break"); + cifs_dbg(FYI, "file id match, oplock break\n"); pCifsInode = CIFS_I(netfile->dentry->d_inode); cifs_set_oplock_level(pCifsInode, @@ -484,12 +484,12 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) } spin_unlock(&cifs_file_list_lock); spin_unlock(&cifs_tcp_ses_lock); - cFYI(1, "No matching file for oplock break"); + cifs_dbg(FYI, "No matching file for oplock break\n"); return true; } } spin_unlock(&cifs_tcp_ses_lock); - cFYI(1, "Can not process oplock break for non-existent connection"); + cifs_dbg(FYI, "Can not process oplock break for non-existent connection\n"); return true; } @@ -536,12 +536,8 @@ cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM; - cERROR(1, "Autodisabling the use of server inode numbers on " - "%s. This server doesn't seem to support them " - "properly. Hardlinks will not be recognized on this " - "mount. Consider mounting with the \"noserverino\" " - "option to silence this message.", - cifs_sb_master_tcon(cifs_sb)->treeName); + cifs_dbg(VFS, "Autodisabling the use of server inode numbers on %s. This server doesn't seem to support them properly. Hardlinks will not be recognized on this mount. Consider mounting with the \"noserverino\" option to silence this message.\n", + cifs_sb_master_tcon(cifs_sb)->treeName); } } @@ -552,13 +548,13 @@ void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) if (oplock == OPLOCK_EXCLUSIVE) { cinode->clientCanCacheAll = true; cinode->clientCanCacheRead = true; - cFYI(1, "Exclusive Oplock granted on inode %p", - &cinode->vfs_inode); + cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n", + &cinode->vfs_inode); } else if (oplock == OPLOCK_READ) { cinode->clientCanCacheAll = false; cinode->clientCanCacheRead = true; - cFYI(1, "Level II Oplock granted on inode %p", - &cinode->vfs_inode); + cifs_dbg(FYI, "Level II Oplock granted on inode %p\n", + &cinode->vfs_inode); } else { cinode->clientCanCacheAll = false; cinode->clientCanCacheRead = false; diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index c0b25b28be6..af847e1cf1c 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -150,8 +150,8 @@ cifs_inet_pton(const int address_family, const char *cp, int len, void *dst) else if (address_family == AF_INET6) ret = in6_pton(cp, len, dst , '\\', NULL); - cFYI(DBG2, "address conversion returned %d for %*.*s", - ret, len, len, cp); + cifs_dbg(NOISY, "address conversion returned %d for %*.*s\n", + ret, len, len, cp); if (ret > 0) ret = 1; return ret; @@ -887,7 +887,7 @@ map_smb_to_linux_error(char *buf, bool logErr) } /* else ERRHRD class errors or junk - return EIO */ - cFYI(1, "Mapping smb error code 0x%x to POSIX err %d", + cifs_dbg(FYI, "Mapping smb error code 0x%x to POSIX err %d\n", le32_to_cpu(smb->Status.CifsError), rc); /* generic corrective action e.g. reconnect SMB session on @@ -951,20 +951,20 @@ struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset) SMB_TIME *st = (SMB_TIME *)&time; SMB_DATE *sd = (SMB_DATE *)&date; - cFYI(1, "date %d time %d", date, time); + cifs_dbg(FYI, "date %d time %d\n", date, time); sec = 2 * st->TwoSeconds; min = st->Minutes; if ((sec > 59) || (min > 59)) - cERROR(1, "illegal time min %d sec %d", min, sec); + cifs_dbg(VFS, "illegal time min %d sec %d\n", min, sec); sec += (min * 60); sec += 60 * 60 * st->Hours; if (st->Hours > 24) - cERROR(1, "illegal hours %d", st->Hours); + cifs_dbg(VFS, "illegal hours %d\n", st->Hours); days = sd->Day; month = sd->Month; if ((days > 31) || (month > 12)) { - cERROR(1, "illegal date, month %d day: %d", month, days); + cifs_dbg(VFS, "illegal date, month %d day: %d\n", month, days); if (month > 12) month = 12; } @@ -990,7 +990,7 @@ struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset) ts.tv_sec = sec + offset; - /* cFYI(1, "sec after cnvrt dos to unix time %d",sec); */ + /* cifs_dbg(FYI, "sec after cnvrt dos to unix time %d\n",sec); */ ts.tv_nsec = 0; return ts; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index df40cc5fd13..770d5a9781c 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -48,15 +48,15 @@ static void dump_cifs_file_struct(struct file *file, char *label) if (file) { cf = file->private_data; if (cf == NULL) { - cFYI(1, "empty cifs private file data"); + cifs_dbg(FYI, "empty cifs private file data\n"); return; } if (cf->invalidHandle) - cFYI(1, "invalid handle"); + cifs_dbg(FYI, "invalid handle\n"); if (cf->srch_inf.endOfSearch) - cFYI(1, "end of search"); + cifs_dbg(FYI, "end of search\n"); if (cf->srch_inf.emptyDir) - cFYI(1, "empty dir"); + cifs_dbg(FYI, "empty dir\n"); } } #else @@ -80,7 +80,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, struct super_block *sb = parent->d_inode->i_sb; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); - cFYI(1, "%s: for %s", __func__, name->name); + cifs_dbg(FYI, "%s: for %s\n", __func__, name->name); dentry = d_hash_and_lookup(parent, name); if (unlikely(IS_ERR(dentry))) @@ -233,7 +233,7 @@ int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb, fid, cifs_sb->local_nls); if (CIFSSMBClose(xid, ptcon, fid)) { - cFYI(1, "Error closing temporary reparsepoint open"); + cifs_dbg(FYI, "Error closing temporary reparsepoint open\n"); } } } @@ -285,7 +285,7 @@ initiate_cifs_search(const unsigned int xid, struct file *file) goto error_exit; } - cFYI(1, "Full path: %s start at: %lld", full_path, file->f_pos); + cifs_dbg(FYI, "Full path: %s start at: %lld\n", full_path, file->f_pos); ffirst_retry: /* test for Unix extensions */ @@ -336,7 +336,7 @@ static int cifs_unicode_bytelen(const char *str) if (ustr[len] == 0) return len << 1; } - cFYI(1, "Unicode string longer than PATH_MAX found"); + cifs_dbg(FYI, "Unicode string longer than PATH_MAX found\n"); return len << 1; } @@ -353,18 +353,18 @@ static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level) pfData->FileNameLength; } else new_entry = old_entry + le32_to_cpu(pDirInfo->NextEntryOffset); - cFYI(1, "new entry %p old entry %p", new_entry, old_entry); + cifs_dbg(FYI, "new entry %p old entry %p\n", new_entry, old_entry); /* validate that new_entry is not past end of SMB */ if (new_entry >= end_of_smb) { - cERROR(1, "search entry %p began after end of SMB %p old entry %p", - new_entry, end_of_smb, old_entry); + cifs_dbg(VFS, "search entry %p began after end of SMB %p old entry %p\n", + new_entry, end_of_smb, old_entry); return NULL; } else if (((level == SMB_FIND_FILE_INFO_STANDARD) && (new_entry + sizeof(FIND_FILE_STANDARD_INFO) > end_of_smb)) || ((level != SMB_FIND_FILE_INFO_STANDARD) && (new_entry + sizeof(FILE_DIRECTORY_INFO) > end_of_smb))) { - cERROR(1, "search entry %p extends after end of SMB %p", - new_entry, end_of_smb); + cifs_dbg(VFS, "search entry %p extends after end of SMB %p\n", + new_entry, end_of_smb); return NULL; } else return new_entry; @@ -457,7 +457,7 @@ static int cifs_fill_dirent(struct cifs_dirent *de, const void *info, cifs_fill_dirent_std(de, info); break; default: - cFYI(1, "Unknown findfirst level %d", level); + cifs_dbg(FYI, "Unknown findfirst level %d\n", level); return -EINVAL; } @@ -572,7 +572,7 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, if (((index_to_find < cfile->srch_inf.index_of_last_entry) && is_dir_changed(file)) || (index_to_find < first_entry_in_buffer)) { /* close and restart search */ - cFYI(1, "search backing up - close and restart search"); + cifs_dbg(FYI, "search backing up - close and restart search\n"); spin_lock(&cifs_file_list_lock); if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { cfile->invalidHandle = true; @@ -582,7 +582,7 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, } else spin_unlock(&cifs_file_list_lock); if (cfile->srch_inf.ntwrk_buf_start) { - cFYI(1, "freeing SMB ff cache buf on search rewind"); + cifs_dbg(FYI, "freeing SMB ff cache buf on search rewind\n"); if (cfile->srch_inf.smallBuf) cifs_small_buf_release(cfile->srch_inf. ntwrk_buf_start); @@ -593,7 +593,7 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, } rc = initiate_cifs_search(xid, file); if (rc) { - cFYI(1, "error %d reinitiating a search on rewind", + cifs_dbg(FYI, "error %d reinitiating a search on rewind\n", rc); return rc; } @@ -608,7 +608,7 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, while ((index_to_find >= cfile->srch_inf.index_of_last_entry) && (rc == 0) && !cfile->srch_inf.endOfSearch) { - cFYI(1, "calling findnext2"); + cifs_dbg(FYI, "calling findnext2\n"); rc = server->ops->query_dir_next(xid, tcon, &cfile->fid, search_flags, &cfile->srch_inf); @@ -631,7 +631,7 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, first_entry_in_buffer = cfile->srch_inf.index_of_last_entry - cfile->srch_inf.entries_in_buffer; pos_in_buf = index_to_find - first_entry_in_buffer; - cFYI(1, "found entry - pos_in_buf %d", pos_in_buf); + cifs_dbg(FYI, "found entry - pos_in_buf %d\n", pos_in_buf); for (i = 0; (i < (pos_in_buf)) && (cur_ent != NULL); i++) { /* go entry by entry figuring out which is first */ @@ -640,19 +640,18 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, } if ((cur_ent == NULL) && (i < pos_in_buf)) { /* BB fixme - check if we should flag this error */ - cERROR(1, "reached end of buf searching for pos in buf" - " %d index to find %lld rc %d", pos_in_buf, - index_to_find, rc); + cifs_dbg(VFS, "reached end of buf searching for pos in buf %d index to find %lld rc %d\n", + pos_in_buf, index_to_find, rc); } rc = 0; *current_entry = cur_ent; } else { - cFYI(1, "index not in buffer - could not findnext into it"); + cifs_dbg(FYI, "index not in buffer - could not findnext into it\n"); return 0; } if (pos_in_buf >= cfile->srch_inf.entries_in_buffer) { - cFYI(1, "can not return entries pos_in_buf beyond last"); + cifs_dbg(FYI, "can not return entries pos_in_buf beyond last\n"); *num_to_ret = 0; } else *num_to_ret = cfile->srch_inf.entries_in_buffer - pos_in_buf; @@ -678,8 +677,8 @@ static int cifs_filldir(char *find_entry, struct file *file, filldir_t filldir, return rc; if (de.namelen > max_len) { - cERROR(1, "bad search response length %zd past smb end", - de.namelen); + cifs_dbg(VFS, "bad search response length %zd past smb end\n", + de.namelen); return -EINVAL; } @@ -768,7 +767,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) */ if (file->private_data == NULL) { rc = initiate_cifs_search(xid, file); - cFYI(1, "initiate cifs search rc %d", rc); + cifs_dbg(FYI, "initiate cifs search rc %d\n", rc); if (rc) goto rddir2_exit; } @@ -777,7 +776,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) case 0: if (filldir(direntry, ".", 1, file->f_pos, file_inode(file)->i_ino, DT_DIR) < 0) { - cERROR(1, "Filldir for current dir failed"); + cifs_dbg(VFS, "Filldir for current dir failed\n"); rc = -ENOMEM; break; } @@ -785,7 +784,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) case 1: if (filldir(direntry, "..", 2, file->f_pos, parent_ino(file->f_path.dentry), DT_DIR) < 0) { - cERROR(1, "Filldir for parent dir failed"); + cifs_dbg(VFS, "Filldir for parent dir failed\n"); rc = -ENOMEM; break; } @@ -804,7 +803,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) cifsFile = file->private_data; if (cifsFile->srch_inf.endOfSearch) { if (cifsFile->srch_inf.emptyDir) { - cFYI(1, "End of search, empty dir"); + cifs_dbg(FYI, "End of search, empty dir\n"); rc = 0; break; } @@ -817,16 +816,16 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) rc = find_cifs_entry(xid, tcon, file, ¤t_entry, &num_to_fill); if (rc) { - cFYI(1, "fce error %d", rc); + cifs_dbg(FYI, "fce error %d\n", rc); goto rddir2_exit; } else if (current_entry != NULL) { - cFYI(1, "entry %lld found", file->f_pos); + cifs_dbg(FYI, "entry %lld found\n", file->f_pos); } else { - cFYI(1, "could not find entry"); + cifs_dbg(FYI, "could not find entry\n"); goto rddir2_exit; } - cFYI(1, "loop through %d times filling dir for net buf %p", - num_to_fill, cifsFile->srch_inf.ntwrk_buf_start); + cifs_dbg(FYI, "loop through %d times filling dir for net buf %p\n", + num_to_fill, cifsFile->srch_inf.ntwrk_buf_start); max_len = tcon->ses->server->ops->calc_smb_size( cifsFile->srch_inf.ntwrk_buf_start); end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len; @@ -840,8 +839,8 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) for (i = 0; (i < num_to_fill) && (rc == 0); i++) { if (current_entry == NULL) { /* evaluate whether this case is an error */ - cERROR(1, "past SMB end, num to fill %d i %d", - num_to_fill, i); + cifs_dbg(VFS, "past SMB end, num to fill %d i %d\n", + num_to_fill, i); break; } /* @@ -858,8 +857,8 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) file->f_pos++; if (file->f_pos == cifsFile->srch_inf.index_of_last_entry) { - cFYI(1, "last entry in buf at pos %lld %s", - file->f_pos, tmp_buf); + cifs_dbg(FYI, "last entry in buf at pos %lld %s\n", + file->f_pos, tmp_buf); cifs_save_resume_key(current_entry, cifsFile); break; } else diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 76809f4d342..f230571a7ab 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -283,11 +283,11 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifs_ses *ses, int len; char *data = *pbcc_area; - cFYI(1, "bleft %d", bleft); + cifs_dbg(FYI, "bleft %d\n", bleft); kfree(ses->serverOS); ses->serverOS = cifs_strndup_from_utf16(data, bleft, true, nls_cp); - cFYI(1, "serverOS=%s", ses->serverOS); + cifs_dbg(FYI, "serverOS=%s\n", ses->serverOS); len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2; data += len; bleft -= len; @@ -296,7 +296,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifs_ses *ses, kfree(ses->serverNOS); ses->serverNOS = cifs_strndup_from_utf16(data, bleft, true, nls_cp); - cFYI(1, "serverNOS=%s", ses->serverNOS); + cifs_dbg(FYI, "serverNOS=%s\n", ses->serverNOS); len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2; data += len; bleft -= len; @@ -305,7 +305,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifs_ses *ses, kfree(ses->serverDomain); ses->serverDomain = cifs_strndup_from_utf16(data, bleft, true, nls_cp); - cFYI(1, "serverDomain=%s", ses->serverDomain); + cifs_dbg(FYI, "serverDomain=%s\n", ses->serverDomain); return; } @@ -318,7 +318,7 @@ static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, int len; char *bcc_ptr = *pbcc_area; - cFYI(1, "decode sessetup ascii. bleft %d", bleft); + cifs_dbg(FYI, "decode sessetup ascii. bleft %d\n", bleft); len = strnlen(bcc_ptr, bleft); if (len >= bleft) @@ -330,7 +330,7 @@ static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, if (ses->serverOS) strncpy(ses->serverOS, bcc_ptr, len); if (strncmp(ses->serverOS, "OS/2", 4) == 0) { - cFYI(1, "OS/2 server"); + cifs_dbg(FYI, "OS/2 server\n"); ses->flags |= CIFS_SES_OS2; } @@ -359,7 +359,7 @@ static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, /* BB For newer servers which do not support Unicode, but thus do return domain here we could add parsing for it later, but it is not very important */ - cFYI(1, "ascii: bytes left %d", bleft); + cifs_dbg(FYI, "ascii: bytes left %d\n", bleft); return rc; } @@ -373,16 +373,18 @@ int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr; if (blob_len < sizeof(CHALLENGE_MESSAGE)) { - cERROR(1, "challenge blob len %d too small", blob_len); + cifs_dbg(VFS, "challenge blob len %d too small\n", blob_len); return -EINVAL; } if (memcmp(pblob->Signature, "NTLMSSP", 8)) { - cERROR(1, "blob signature incorrect %s", pblob->Signature); + cifs_dbg(VFS, "blob signature incorrect %s\n", + pblob->Signature); return -EINVAL; } if (pblob->MessageType != NtLmChallenge) { - cERROR(1, "Incorrect message type %d", pblob->MessageType); + cifs_dbg(VFS, "Incorrect message type %d\n", + pblob->MessageType); return -EINVAL; } @@ -395,16 +397,17 @@ int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset); tilen = le16_to_cpu(pblob->TargetInfoArray.Length); if (tioffset > blob_len || tioffset + tilen > blob_len) { - cERROR(1, "tioffset + tilen too high %u + %u", tioffset, tilen); + cifs_dbg(VFS, "tioffset + tilen too high %u + %u", + tioffset, tilen); return -EINVAL; } if (tilen) { - ses->auth_key.response = kmalloc(tilen, GFP_KERNEL); + ses->auth_key.response = kmemdup(bcc_ptr + tioffset, tilen, + GFP_KERNEL); if (!ses->auth_key.response) { - cERROR(1, "Challenge target info allocation failure"); + cifs_dbg(VFS, "Challenge target info alloc failure"); return -ENOMEM; } - memcpy(ses->auth_key.response, bcc_ptr + tioffset, tilen); ses->auth_key.len = tilen; } @@ -486,7 +489,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer, sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer); rc = setup_ntlmv2_rsp(ses, nls_cp); if (rc) { - cERROR(1, "Error %d during NTLMSSP authentication", rc); + cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc); goto setup_ntlmv2_ret; } memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE, @@ -580,7 +583,7 @@ CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, return -EINVAL; type = ses->server->secType; - cFYI(1, "sess setup type %d", type); + cifs_dbg(FYI, "sess setup type %d\n", type); if (type == RawNTLMSSP) { /* if memory allocation is successful, caller of this function * frees it. @@ -674,7 +677,7 @@ ssetup_ntlmssp_authenticate: changed to do higher than lanman dialect and we reconnected would we ever calc signing_key? */ - cFYI(1, "Negotiating LANMAN setting up strings"); + cifs_dbg(FYI, "Negotiating LANMAN setting up strings\n"); /* Unicode not allowed for LANMAN dialects */ ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); #endif @@ -688,7 +691,8 @@ ssetup_ntlmssp_authenticate: /* calculate ntlm response and session key */ rc = setup_ntlm_response(ses, nls_cp); if (rc) { - cERROR(1, "Error %d during NTLM authentication", rc); + cifs_dbg(VFS, "Error %d during NTLM authentication\n", + rc); goto ssetup_exit; } @@ -718,7 +722,8 @@ ssetup_ntlmssp_authenticate: /* calculate nlmv2 response and session key */ rc = setup_ntlmv2_rsp(ses, nls_cp); if (rc) { - cERROR(1, "Error %d during NTLMv2 authentication", rc); + cifs_dbg(VFS, "Error %d during NTLMv2 authentication\n", + rc); goto ssetup_exit; } memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, @@ -754,21 +759,21 @@ ssetup_ntlmssp_authenticate: /* check version field to make sure that cifs.upcall is sending us a response in an expected form */ if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) { - cERROR(1, "incorrect version of cifs.upcall (expected" - " %d but got %d)", + cifs_dbg(VFS, "incorrect version of cifs.upcall " + "expected %d but got %d)", CIFS_SPNEGO_UPCALL_VERSION, msg->version); rc = -EKEYREJECTED; goto ssetup_exit; } - ses->auth_key.response = kmalloc(msg->sesskey_len, GFP_KERNEL); + ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, + GFP_KERNEL); if (!ses->auth_key.response) { - cERROR(1, "Kerberos can't allocate (%u bytes) memory", + cifs_dbg(VFS, "Kerberos can't allocate (%u bytes) memory", msg->sesskey_len); rc = -ENOMEM; goto ssetup_exit; } - memcpy(ses->auth_key.response, msg->data, msg->sesskey_len); ses->auth_key.len = msg->sesskey_len; pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; @@ -790,18 +795,18 @@ ssetup_ntlmssp_authenticate: /* BB: is this right? */ ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); #else /* ! CONFIG_CIFS_UPCALL */ - cERROR(1, "Kerberos negotiated but upcall support disabled!"); + cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n"); rc = -ENOSYS; goto ssetup_exit; #endif /* CONFIG_CIFS_UPCALL */ } else if (type == RawNTLMSSP) { if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { - cERROR(1, "NTLMSSP requires Unicode support"); + cifs_dbg(VFS, "NTLMSSP requires Unicode support\n"); rc = -ENOSYS; goto ssetup_exit; } - cFYI(1, "ntlmssp session setup phase %d", phase); + cifs_dbg(FYI, "ntlmssp session setup phase %d\n", phase); pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; capabilities |= CAP_EXTENDED_SECURITY; pSMB->req.Capabilities |= cpu_to_le32(capabilities); @@ -824,7 +829,6 @@ ssetup_ntlmssp_authenticate: 5*sizeof(struct _AUTHENTICATE_MESSAGE), GFP_KERNEL); if (!ntlmsspblob) { - cERROR(1, "Can't allocate NTLMSSP blob"); rc = -ENOMEM; goto ssetup_exit; } @@ -844,7 +848,7 @@ ssetup_ntlmssp_authenticate: smb_buf->Uid = ses->Suid; break; default: - cERROR(1, "invalid phase %d", phase); + cifs_dbg(VFS, "invalid phase %d\n", phase); rc = -ENOSYS; goto ssetup_exit; } @@ -855,7 +859,7 @@ ssetup_ntlmssp_authenticate: } unicode_oslm_strings(&bcc_ptr, nls_cp); } else { - cERROR(1, "secType %d not supported!", type); + cifs_dbg(VFS, "secType %d not supported!\n", type); rc = -ENOSYS; goto ssetup_exit; } @@ -880,7 +884,7 @@ ssetup_ntlmssp_authenticate: (smb_buf->Status.CifsError == cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) { if (phase != NtLmNegotiate) { - cERROR(1, "Unexpected more processing error"); + cifs_dbg(VFS, "Unexpected more processing error\n"); goto ssetup_exit; } /* NTLMSSP Negotiate sent now processing challenge (response) */ @@ -892,14 +896,14 @@ ssetup_ntlmssp_authenticate: if ((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) { rc = -EIO; - cERROR(1, "bad word count %d", smb_buf->WordCount); + cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); goto ssetup_exit; } action = le16_to_cpu(pSMB->resp.Action); if (action & GUEST_LOGIN) - cFYI(1, "Guest login"); /* BB mark SesInfo struct? */ + cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */ ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */ - cFYI(1, "UID = %llu ", ses->Suid); + cifs_dbg(FYI, "UID = %llu\n", ses->Suid); /* response can have either 3 or 4 word count - Samba sends 3 */ /* and lanman response is 3 */ bytes_remaining = get_bcc(smb_buf); @@ -908,7 +912,8 @@ ssetup_ntlmssp_authenticate: if (smb_buf->WordCount == 4) { blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); if (blob_len > bytes_remaining) { - cERROR(1, "bad security blob length %d", blob_len); + cifs_dbg(VFS, "bad security blob length %d\n", + blob_len); rc = -EINVAL; goto ssetup_exit; } @@ -946,7 +951,7 @@ ssetup_exit: kfree(ntlmsspblob); ntlmsspblob = NULL; if (resp_buf_type == CIFS_SMALL_BUFFER) { - cFYI(1, "ssetup freeing small buf %p", iov[0].iov_base); + cifs_dbg(FYI, "ssetup freeing small buf %p\n", iov[0].iov_base); cifs_small_buf_release(iov[0].iov_base); } else if (resp_buf_type == CIFS_LARGE_BUFFER) cifs_buf_release(iov[0].iov_base); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 47bc5a87f94..3efdb9d5c0b 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -61,10 +61,13 @@ send_nt_cancel(struct TCP_Server_Info *server, void *buf, */ --server->sequence_number; rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); + if (rc < 0) + server->sequence_number--; + mutex_unlock(&server->srv_mutex); - cFYI(1, "issued NT_CANCEL for mid %u, rc = %d", - in_buf->Mid, rc); + cifs_dbg(FYI, "issued NT_CANCEL for mid %u, rc = %d\n", + in_buf->Mid, rc); return rc; } @@ -249,7 +252,7 @@ check2ndT2(char *buf) /* check for plausible wct, bcc and t2 data and parm sizes */ /* check for parm and data offset going beyond end of smb */ if (pSMB->WordCount != 10) { /* coalesce_t2 depends on this */ - cFYI(1, "invalid transact2 word count"); + cifs_dbg(FYI, "invalid transact2 word count\n"); return -EINVAL; } @@ -261,18 +264,18 @@ check2ndT2(char *buf) if (total_data_size == data_in_this_rsp) return 0; else if (total_data_size < data_in_this_rsp) { - cFYI(1, "total data %d smaller than data in frame %d", - total_data_size, data_in_this_rsp); + cifs_dbg(FYI, "total data %d smaller than data in frame %d\n", + total_data_size, data_in_this_rsp); return -EINVAL; } remaining = total_data_size - data_in_this_rsp; - cFYI(1, "missing %d bytes from transact2, check next response", - remaining); + cifs_dbg(FYI, "missing %d bytes from transact2, check next response\n", + remaining); if (total_data_size > CIFSMaxBufSize) { - cERROR(1, "TotalDataSize %d is over maximum buffer %d", - total_data_size, CIFSMaxBufSize); + cifs_dbg(VFS, "TotalDataSize %d is over maximum buffer %d\n", + total_data_size, CIFSMaxBufSize); return -EINVAL; } return remaining; @@ -293,28 +296,28 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr) tgt_total_cnt = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); if (tgt_total_cnt != src_total_cnt) - cFYI(1, "total data count of primary and secondary t2 differ " - "source=%hu target=%hu", src_total_cnt, tgt_total_cnt); + cifs_dbg(FYI, "total data count of primary and secondary t2 differ source=%hu target=%hu\n", + src_total_cnt, tgt_total_cnt); total_in_tgt = get_unaligned_le16(&pSMBt->t2_rsp.DataCount); remaining = tgt_total_cnt - total_in_tgt; if (remaining < 0) { - cFYI(1, "Server sent too much data. tgt_total_cnt=%hu " - "total_in_tgt=%hu", tgt_total_cnt, total_in_tgt); + cifs_dbg(FYI, "Server sent too much data. tgt_total_cnt=%hu total_in_tgt=%hu\n", + tgt_total_cnt, total_in_tgt); return -EPROTO; } if (remaining == 0) { /* nothing to do, ignore */ - cFYI(1, "no more data remains"); + cifs_dbg(FYI, "no more data remains\n"); return 0; } total_in_src = get_unaligned_le16(&pSMBs->t2_rsp.DataCount); if (remaining < total_in_src) - cFYI(1, "transact2 2nd response contains too much data"); + cifs_dbg(FYI, "transact2 2nd response contains too much data\n"); /* find end of first SMB data area */ data_area_of_tgt = (char *)&pSMBt->hdr.Protocol + @@ -329,7 +332,8 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr) total_in_tgt += total_in_src; /* is the result too big for the field? */ if (total_in_tgt > USHRT_MAX) { - cFYI(1, "coalesced DataCount too large (%u)", total_in_tgt); + cifs_dbg(FYI, "coalesced DataCount too large (%u)\n", + total_in_tgt); return -EPROTO; } put_unaligned_le16(total_in_tgt, &pSMBt->t2_rsp.DataCount); @@ -339,7 +343,7 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr) byte_count += total_in_src; /* is the result too big for the field? */ if (byte_count > USHRT_MAX) { - cFYI(1, "coalesced BCC too large (%u)", byte_count); + cifs_dbg(FYI, "coalesced BCC too large (%u)\n", byte_count); return -EPROTO; } put_bcc(byte_count, target_hdr); @@ -348,7 +352,8 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr) byte_count += total_in_src; /* don't allow buffer to overflow */ if (byte_count > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { - cFYI(1, "coalesced BCC exceeds buffer size (%u)", byte_count); + cifs_dbg(FYI, "coalesced BCC exceeds buffer size (%u)\n", + byte_count); return -ENOBUFS; } target_hdr->smb_buf_length = cpu_to_be32(byte_count); @@ -358,12 +363,12 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr) if (remaining != total_in_src) { /* more responses to go */ - cFYI(1, "waiting for more secondary responses"); + cifs_dbg(FYI, "waiting for more secondary responses\n"); return 1; } /* we are done */ - cFYI(1, "found the last secondary response"); + cifs_dbg(FYI, "found the last secondary response\n"); return 0; } @@ -388,7 +393,7 @@ cifs_check_trans2(struct mid_q_entry *mid, struct TCP_Server_Info *server, } if (!server->large_buf) { /*FIXME: switch to already allocated largebuf?*/ - cERROR(1, "1st trans2 resp needs bigbuf"); + cifs_dbg(VFS, "1st trans2 resp needs bigbuf\n"); } else { /* Have first buffer */ mid->resp_buf = buf; @@ -776,8 +781,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, goto out; } - cFYI(1, "calling SetFileInfo since SetPathInfo for times not supported " - "by this server"); + cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for times not supported by this server\n"); rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, CREATE_NOT_DIR, &netfid, &oplock, NULL, cifs_sb->local_nls, diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 71e6aed4b38..5da1b55a225 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -43,13 +43,13 @@ smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) { cinode->clientCanCacheAll = true; cinode->clientCanCacheRead = true; - cFYI(1, "Exclusive Oplock granted on inode %p", - &cinode->vfs_inode); + cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n", + &cinode->vfs_inode); } else if (oplock == SMB2_OPLOCK_LEVEL_II) { cinode->clientCanCacheAll = false; cinode->clientCanCacheRead = true; - cFYI(1, "Level II Oplock granted on inode %p", - &cinode->vfs_inode); + cifs_dbg(FYI, "Level II Oplock granted on inode %p\n", + &cinode->vfs_inode); } else { cinode->clientCanCacheAll = false; cinode->clientCanCacheRead = false; diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 706482452df..fff6dfba620 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -92,7 +92,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, (FILE_BASIC_INFO *)data); break; default: - cERROR(1, "Invalid command"); + cifs_dbg(VFS, "Invalid command\n"); break; } diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index 494c912c76f..7c2f45c06fc 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -2472,7 +2472,7 @@ map_smb2_to_linux_error(char *buf, bool log_err) /* on error mapping not found - return EIO */ - cFYI(1, "Mapping SMB2 status code %d to POSIX err %d", + cifs_dbg(FYI, "Mapping SMB2 status code %d to POSIX err %d\n", smb2err, rc); return rc; diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 7b1c5e3287f..10383d8c015 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -45,17 +45,17 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid) if (hdr->Command == SMB2_OPLOCK_BREAK) return 0; else - cERROR(1, "Received Request not response"); + cifs_dbg(VFS, "Received Request not response\n"); } } else { /* bad signature or mid */ if (*(__le32 *)hdr->ProtocolId != SMB2_PROTO_NUMBER) - cERROR(1, "Bad protocol string signature header %x", - *(unsigned int *) hdr->ProtocolId); + cifs_dbg(VFS, "Bad protocol string signature header %x\n", + *(unsigned int *) hdr->ProtocolId); if (mid != hdr->MessageId) - cERROR(1, "Mids do not match: %llu and %llu", mid, - hdr->MessageId); + cifs_dbg(VFS, "Mids do not match: %llu and %llu\n", + mid, hdr->MessageId); } - cERROR(1, "Bad SMB detected. The Mid=%llu", hdr->MessageId); + cifs_dbg(VFS, "Bad SMB detected. The Mid=%llu\n", hdr->MessageId); return 1; } @@ -101,7 +101,8 @@ smb2_check_message(char *buf, unsigned int length) int command; /* BB disable following printk later */ - cFYI(1, "%s length: 0x%x, smb_buf_length: 0x%x", __func__, length, len); + cifs_dbg(FYI, "%s length: 0x%x, smb_buf_length: 0x%x\n", + __func__, length, len); /* * Add function to do table lookup of StructureSize by command @@ -117,12 +118,13 @@ smb2_check_message(char *buf, unsigned int length) */ return 0; } else { - cERROR(1, "Length less than SMB header size"); + cifs_dbg(VFS, "Length less than SMB header size\n"); } return 1; } if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE - 4) { - cERROR(1, "SMB length greater than maximum, mid=%llu", mid); + cifs_dbg(VFS, "SMB length greater than maximum, mid=%llu\n", + mid); return 1; } @@ -130,14 +132,14 @@ smb2_check_message(char *buf, unsigned int length) return 1; if (hdr->StructureSize != SMB2_HEADER_STRUCTURE_SIZE) { - cERROR(1, "Illegal structure size %u", - le16_to_cpu(hdr->StructureSize)); + cifs_dbg(VFS, "Illegal structure size %u\n", + le16_to_cpu(hdr->StructureSize)); return 1; } command = le16_to_cpu(hdr->Command); if (command >= NUMBER_OF_SMB2_COMMANDS) { - cERROR(1, "Illegal SMB2 command %d", command); + cifs_dbg(VFS, "Illegal SMB2 command %d\n", command); return 1; } @@ -145,30 +147,30 @@ smb2_check_message(char *buf, unsigned int length) if (command != SMB2_OPLOCK_BREAK_HE && (hdr->Status == 0 || pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2)) { /* error packets have 9 byte structure size */ - cERROR(1, "Illegal response size %u for command %d", - le16_to_cpu(pdu->StructureSize2), command); + cifs_dbg(VFS, "Illegal response size %u for command %d\n", + le16_to_cpu(pdu->StructureSize2), command); return 1; } else if (command == SMB2_OPLOCK_BREAK_HE && (hdr->Status == 0) && (le16_to_cpu(pdu->StructureSize2) != 44) && (le16_to_cpu(pdu->StructureSize2) != 36)) { /* special case for SMB2.1 lease break message */ - cERROR(1, "Illegal response size %d for oplock break", - le16_to_cpu(pdu->StructureSize2)); + cifs_dbg(VFS, "Illegal response size %d for oplock break\n", + le16_to_cpu(pdu->StructureSize2)); return 1; } } if (4 + len != length) { - cERROR(1, "Total length %u RFC1002 length %u mismatch mid %llu", - length, 4 + len, mid); + cifs_dbg(VFS, "Total length %u RFC1002 length %u mismatch mid %llu\n", + length, 4 + len, mid); return 1; } clc_len = smb2_calc_size(hdr); if (4 + len != clc_len) { - cFYI(1, "Calculated size %u length %u mismatch mid %llu", - clc_len, 4 + len, mid); + cifs_dbg(FYI, "Calculated size %u length %u mismatch mid %llu\n", + clc_len, 4 + len, mid); /* Windows 7 server returns 24 bytes more */ if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE) return 0; @@ -267,7 +269,7 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) case SMB2_CHANGE_NOTIFY: default: /* BB FIXME for unimplemented cases above */ - cERROR(1, "no length check for command"); + cifs_dbg(VFS, "no length check for command\n"); break; } @@ -276,20 +278,20 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) * we have little choice but to ignore the data area in this case. */ if (*off > 4096) { - cERROR(1, "offset %d too large, data area ignored", *off); + cifs_dbg(VFS, "offset %d too large, data area ignored\n", *off); *len = 0; *off = 0; } else if (*off < 0) { - cERROR(1, "negative offset %d to data invalid ignore data area", - *off); + cifs_dbg(VFS, "negative offset %d to data invalid ignore data area\n", + *off); *off = 0; *len = 0; } else if (*len < 0) { - cERROR(1, "negative data length %d invalid, data area ignored", - *len); + cifs_dbg(VFS, "negative data length %d invalid, data area ignored\n", + *len); *len = 0; } else if (*len > 128 * 1024) { - cERROR(1, "data area larger than 128K: %d", *len); + cifs_dbg(VFS, "data area larger than 128K: %d\n", *len); *len = 0; } @@ -324,7 +326,7 @@ smb2_calc_size(void *buf) goto calc_size_exit; smb2_get_data_area_len(&offset, &data_length, hdr); - cFYI(1, "SMB2 data length %d offset %d", data_length, offset); + cifs_dbg(FYI, "SMB2 data length %d offset %d\n", data_length, offset); if (data_length > 0) { /* @@ -335,15 +337,15 @@ smb2_calc_size(void *buf) * the size of the RFC1001 hdr. */ if (offset + 4 + 1 < len) { - cERROR(1, "data area offset %d overlaps SMB2 header %d", - offset + 4 + 1, len); + cifs_dbg(VFS, "data area offset %d overlaps SMB2 header %d\n", + offset + 4 + 1, len); data_length = 0; } else { len = 4 + offset + data_length; } } calc_size_exit: - cFYI(1, "SMB2 len %d", len); + cifs_dbg(FYI, "SMB2 len %d\n", len); return len; } @@ -405,7 +407,7 @@ cifs_ses_oplock_break(struct work_struct *work) rc = SMB2_lease_break(0, tlink_tcon(lw->tlink), lw->lease_key, lw->lease_state); - cFYI(1, "Lease release rc %d", rc); + cifs_dbg(FYI, "Lease release rc %d\n", rc); cifs_put_tlink(lw->tlink); kfree(lw); } @@ -426,15 +428,13 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED); lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL); - if (!lw) { - cERROR(1, "Memory allocation failed during lease break check"); + if (!lw) return false; - } INIT_WORK(&lw->lease_break, cifs_ses_oplock_break); lw->lease_state = rsp->NewLeaseState; - cFYI(1, "Checking for lease break"); + cifs_dbg(FYI, "Checking for lease break\n"); /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); @@ -455,9 +455,9 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) SMB2_LEASE_KEY_SIZE)) continue; - cFYI(1, "found in the open list"); - cFYI(1, "lease key match, lease break 0x%d", - le32_to_cpu(rsp->NewLeaseState)); + cifs_dbg(FYI, "found in the open list\n"); + cifs_dbg(FYI, "lease key match, lease break 0x%d\n", + le32_to_cpu(rsp->NewLeaseState)); smb2_set_oplock_level(cinode, smb2_map_lease_to_oplock(rsp->NewLeaseState)); @@ -489,9 +489,9 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) &lw->lease_break); } - cFYI(1, "found in the pending open list"); - cFYI(1, "lease key match, lease break 0x%d", - le32_to_cpu(rsp->NewLeaseState)); + cifs_dbg(FYI, "found in the pending open list\n"); + cifs_dbg(FYI, "lease key match, lease break 0x%d\n", + le32_to_cpu(rsp->NewLeaseState)); open->oplock = smb2_map_lease_to_oplock(rsp->NewLeaseState); @@ -506,7 +506,7 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) } spin_unlock(&cifs_tcp_ses_lock); kfree(lw); - cFYI(1, "Can not process lease break - no lease matched"); + cifs_dbg(FYI, "Can not process lease break - no lease matched\n"); return false; } @@ -520,7 +520,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) struct cifsInodeInfo *cinode; struct cifsFileInfo *cfile; - cFYI(1, "Checking for oplock break"); + cifs_dbg(FYI, "Checking for oplock break\n"); if (rsp->hdr.Command != SMB2_OPLOCK_BREAK) return false; @@ -533,7 +533,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) return false; } - cFYI(1, "oplock level 0x%d", rsp->OplockLevel); + cifs_dbg(FYI, "oplock level 0x%d\n", rsp->OplockLevel); /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); @@ -553,7 +553,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) cfile->fid.volatile_fid) continue; - cFYI(1, "file id match, oplock break"); + cifs_dbg(FYI, "file id match, oplock break\n"); cinode = CIFS_I(cfile->dentry->d_inode); if (!cinode->clientCanCacheAll && @@ -573,11 +573,11 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) } spin_unlock(&cifs_file_list_lock); spin_unlock(&cifs_tcp_ses_lock); - cFYI(1, "No matching file for oplock break"); + cifs_dbg(FYI, "No matching file for oplock break\n"); return true; } } spin_unlock(&cifs_tcp_ses_lock); - cFYI(1, "Can not process oplock break for non-existent connection"); + cifs_dbg(FYI, "Can not process oplock break for non-existent connection\n"); return false; } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index bceffe7b8f8..f2e76f3b0c6 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -38,13 +38,13 @@ change_conf(struct TCP_Server_Info *server) case 1: server->echoes = false; server->oplocks = false; - cERROR(1, "disabling echoes and oplocks"); + cifs_dbg(VFS, "disabling echoes and oplocks\n"); break; case 2: server->echoes = true; server->oplocks = false; server->echo_credits = 1; - cFYI(1, "disabling oplocks"); + cifs_dbg(FYI, "disabling oplocks\n"); break; default: server->echoes = true; @@ -147,10 +147,10 @@ smb2_dump_detail(void *buf) #ifdef CONFIG_CIFS_DEBUG2 struct smb2_hdr *smb = (struct smb2_hdr *)buf; - cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Mid: %llu Pid: %d", - smb->Command, smb->Status, smb->Flags, smb->MessageId, - smb->ProcessId); - cERROR(1, "smb buf %p len %u", smb, smb2_calc_size(smb)); + cifs_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Mid: %llu Pid: %d\n", + smb->Command, smb->Status, smb->Flags, smb->MessageId, + smb->ProcessId); + cifs_dbg(VFS, "smb buf %p len %u\n", smb, smb2_calc_size(smb)); #endif } @@ -436,7 +436,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, &oplock, NULL); kfree(utf16_path); if (rc) { - cERROR(1, "open dir failed"); + cifs_dbg(VFS, "open dir failed\n"); return rc; } @@ -448,7 +448,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_query_directory(xid, tcon, persistent_fid, volatile_fid, 0, srch_inf); if (rc) { - cERROR(1, "query directory failed"); + cifs_dbg(VFS, "query directory failed\n"); SMB2_close(xid, tcon, persistent_fid, volatile_fid); } return rc; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 41d9d0725f0..2b95ce2b54e 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -155,8 +155,8 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) if ((smb2_command != SMB2_WRITE) && (smb2_command != SMB2_CREATE) && (smb2_command != SMB2_TREE_DISCONNECT)) { - cFYI(1, "can not send cmd %d while umounting", - smb2_command); + cifs_dbg(FYI, "can not send cmd %d while umounting\n", + smb2_command); return -ENODEV; } } @@ -200,7 +200,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) * back on-line */ if (!tcon->retry) { - cFYI(1, "gave up waiting on reconnect in smb_init"); + cifs_dbg(FYI, "gave up waiting on reconnect in smb_init\n"); return -EHOSTDOWN; } } @@ -227,7 +227,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) cifs_mark_open_files_invalid(tcon); rc = SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nls_codepage); mutex_unlock(&tcon->ses->session_mutex); - cFYI(1, "reconnect tcon rc = %d", rc); + cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc); if (rc) goto out; atomic_inc(&tconInfoReconnectCount); @@ -335,7 +335,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) char *security_blob; int flags = CIFS_NEG_OP; - cFYI(1, "Negotiate protocol"); + cifs_dbg(FYI, "Negotiate protocol\n"); if (ses->server) server = ses->server; @@ -354,7 +354,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) else /* if override flags set only sign/seal OR them with global auth */ sec_flags = global_secflags | ses->overrideSecFlg; - cFYI(1, "sec_flags 0x%x", sec_flags); + cifs_dbg(FYI, "sec_flags 0x%x\n", sec_flags); req->hdr.SessionId = 0; @@ -389,19 +389,19 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) if (rc != 0) goto neg_exit; - cFYI(1, "mode 0x%x", rsp->SecurityMode); + cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode); /* BB we may eventually want to match the negotiated vs. requested dialect, even though we are only requesting one at a time */ if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) - cFYI(1, "negotiated smb2.0 dialect"); + cifs_dbg(FYI, "negotiated smb2.0 dialect\n"); else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) - cFYI(1, "negotiated smb2.1 dialect"); + cifs_dbg(FYI, "negotiated smb2.1 dialect\n"); else if (rsp->DialectRevision == cpu_to_le16(SMB30_PROT_ID)) - cFYI(1, "negotiated smb3.0 dialect"); + cifs_dbg(FYI, "negotiated smb3.0 dialect\n"); else { - cERROR(1, "Illegal dialect returned by server %d", - le16_to_cpu(rsp->DialectRevision)); + cifs_dbg(VFS, "Illegal dialect returned by server %d\n", + le16_to_cpu(rsp->DialectRevision)); rc = -EIO; goto neg_exit; } @@ -419,35 +419,34 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) security_blob = smb2_get_data_area_len(&blob_offset, &blob_length, &rsp->hdr); if (blob_length == 0) { - cERROR(1, "missing security blob on negprot"); + cifs_dbg(VFS, "missing security blob on negprot\n"); rc = -EIO; goto neg_exit; } - cFYI(1, "sec_flags 0x%x", sec_flags); + cifs_dbg(FYI, "sec_flags 0x%x\n", sec_flags); if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) { - cFYI(1, "Signing required"); + cifs_dbg(FYI, "Signing required\n"); if (!(server->sec_mode & (SMB2_NEGOTIATE_SIGNING_REQUIRED | SMB2_NEGOTIATE_SIGNING_ENABLED))) { - cERROR(1, "signing required but server lacks support"); + cifs_dbg(VFS, "signing required but server lacks support\n"); rc = -EOPNOTSUPP; goto neg_exit; } server->sec_mode |= SECMODE_SIGN_REQUIRED; } else if (sec_flags & CIFSSEC_MAY_SIGN) { - cFYI(1, "Signing optional"); + cifs_dbg(FYI, "Signing optional\n"); if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) { - cFYI(1, "Server requires signing"); + cifs_dbg(FYI, "Server requires signing\n"); server->sec_mode |= SECMODE_SIGN_REQUIRED; } else { server->sec_mode &= ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); } } else { - cFYI(1, "Signing disabled"); + cifs_dbg(FYI, "Signing disabled\n"); if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) { - cERROR(1, "Server requires packet signing to be enabled" - " in /proc/fs/cifs/SecurityFlags."); + cifs_dbg(VFS, "Server requires packet signing to be enabled in /proc/fs/cifs/SecurityFlags\n"); rc = -EOPNOTSUPP; goto neg_exit; } @@ -489,7 +488,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, char *ntlmssp_blob = NULL; bool use_spnego = false; /* else use raw ntlmssp */ - cFYI(1, "Session Setup"); + cifs_dbg(FYI, "Session Setup\n"); if (ses->server) server = ses->server; @@ -522,7 +521,7 @@ ssetup_ntlmssp_authenticate: else /* if override flags set only sign/seal OR them with global auth */ sec_flags = global_secflags | ses->overrideSecFlg; - cFYI(1, "sec_flags 0x%x", sec_flags); + cifs_dbg(FYI, "sec_flags 0x%x\n", sec_flags); req->hdr.SessionId = 0; /* First session, not a reauthenticate */ req->VcNumber = 0; /* MBZ */ @@ -558,7 +557,7 @@ ssetup_ntlmssp_authenticate: sizeof(struct _NEGOTIATE_MESSAGE), ntlmssp_blob); */ /* BB eventually need to add this */ - cERROR(1, "spnego not supported for SMB2 yet"); + cifs_dbg(VFS, "spnego not supported for SMB2 yet\n"); rc = -EOPNOTSUPP; kfree(ntlmssp_blob); goto ssetup_exit; @@ -572,14 +571,14 @@ ssetup_ntlmssp_authenticate: ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500, GFP_KERNEL); if (ntlmssp_blob == NULL) { - cERROR(1, "failed to malloc ntlmssp blob"); rc = -ENOMEM; goto ssetup_exit; } rc = build_ntlmssp_auth_blob(ntlmssp_blob, &blob_length, ses, nls_cp); if (rc) { - cFYI(1, "build_ntlmssp_auth_blob failed %d", rc); + cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n", + rc); goto ssetup_exit; /* BB double check error handling */ } if (use_spnego) { @@ -587,7 +586,7 @@ ssetup_ntlmssp_authenticate: &security_blob, blob_length, ntlmssp_blob); */ - cERROR(1, "spnego not supported for SMB2 yet"); + cifs_dbg(VFS, "spnego not supported for SMB2 yet\n"); rc = -EOPNOTSUPP; kfree(ntlmssp_blob); goto ssetup_exit; @@ -595,7 +594,7 @@ ssetup_ntlmssp_authenticate: security_blob = ntlmssp_blob; } } else { - cERROR(1, "illegal ntlmssp phase"); + cifs_dbg(VFS, "illegal ntlmssp phase\n"); rc = -EIO; goto ssetup_exit; } @@ -620,13 +619,13 @@ ssetup_ntlmssp_authenticate: if (resp_buftype != CIFS_NO_BUFFER && rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) { if (phase != NtLmNegotiate) { - cERROR(1, "Unexpected more processing error"); + cifs_dbg(VFS, "Unexpected more processing error\n"); goto ssetup_exit; } if (offsetof(struct smb2_sess_setup_rsp, Buffer) - 4 != le16_to_cpu(rsp->SecurityBufferOffset)) { - cERROR(1, "Invalid security buffer offset %d", - le16_to_cpu(rsp->SecurityBufferOffset)); + cifs_dbg(VFS, "Invalid security buffer offset %d\n", + le16_to_cpu(rsp->SecurityBufferOffset)); rc = -EIO; goto ssetup_exit; } @@ -667,7 +666,7 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) int rc = 0; struct TCP_Server_Info *server; - cFYI(1, "disconnect session %p", ses); + cifs_dbg(FYI, "disconnect session %p\n", ses); if (ses && (ses->server)) server = ses->server; @@ -711,7 +710,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, struct TCP_Server_Info *server; __le16 *unc_path = NULL; - cFYI(1, "TCON"); + cifs_dbg(FYI, "TCON\n"); if ((ses->server) && tree) server = ses->server; @@ -775,15 +774,15 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, } if (rsp->ShareType & SMB2_SHARE_TYPE_DISK) - cFYI(1, "connection to disk share"); + cifs_dbg(FYI, "connection to disk share\n"); else if (rsp->ShareType & SMB2_SHARE_TYPE_PIPE) { tcon->ipc = true; - cFYI(1, "connection to pipe share"); + cifs_dbg(FYI, "connection to pipe share\n"); } else if (rsp->ShareType & SMB2_SHARE_TYPE_PRINT) { tcon->print = true; - cFYI(1, "connection to printer"); + cifs_dbg(FYI, "connection to printer\n"); } else { - cERROR(1, "unknown share type %d", rsp->ShareType); + cifs_dbg(VFS, "unknown share type %d\n", rsp->ShareType); rc = -EOPNOTSUPP; goto tcon_error_exit; } @@ -797,7 +796,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) && ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0)) - cERROR(1, "DFS capability contradicts DFS flag"); + cifs_dbg(VFS, "DFS capability contradicts DFS flag\n"); tcon_exit: free_rsp_buf(resp_buftype, rsp); @@ -806,7 +805,7 @@ tcon_exit: tcon_error_exit: if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) { - cERROR(1, "BAD_NETWORK_NAME: %s", tree); + cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); tcon->bad_network_name = true; } goto tcon_exit; @@ -820,7 +819,7 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) struct TCP_Server_Info *server; struct cifs_ses *ses = tcon->ses; - cFYI(1, "Tree Disconnect"); + cifs_dbg(FYI, "Tree Disconnect\n"); if (ses && (ses->server)) server = ses->server; @@ -846,12 +845,10 @@ create_lease_buf(u8 *lease_key, u8 oplock) { struct create_lease *buf; - buf = kmalloc(sizeof(struct create_lease), GFP_KERNEL); + buf = kzalloc(sizeof(struct create_lease), GFP_KERNEL); if (!buf) return NULL; - memset(buf, 0, sizeof(struct create_lease)); - buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key)); buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8))); if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) @@ -925,7 +922,7 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, int rc = 0; int num_iovecs = 2; - cFYI(1, "create/open"); + cifs_dbg(FYI, "create/open\n"); if (ses && (ses->server)) server = ses->server; @@ -1051,7 +1048,7 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, int resp_buftype; int rc = 0; - cFYI(1, "Close"); + cifs_dbg(FYI, "Close\n"); if (ses && (ses->server)) server = ses->server; @@ -1097,20 +1094,20 @@ validate_buf(unsigned int offset, unsigned int buffer_length, if (buffer_length < min_buf_size) { - cERROR(1, "buffer length %d smaller than minimum size %d", - buffer_length, min_buf_size); + cifs_dbg(VFS, "buffer length %d smaller than minimum size %d\n", + buffer_length, min_buf_size); return -EINVAL; } /* check if beyond RFC1001 maximum length */ if ((smb_len > 0x7FFFFF) || (buffer_length > 0x7FFFFF)) { - cERROR(1, "buffer length %d or smb length %d too large", - buffer_length, smb_len); + cifs_dbg(VFS, "buffer length %d or smb length %d too large\n", + buffer_length, smb_len); return -EINVAL; } if ((begin_of_buf > end_of_smb) || (end_of_buf > end_of_smb)) { - cERROR(1, "illegal server response, bad offset to data"); + cifs_dbg(VFS, "illegal server response, bad offset to data\n"); return -EINVAL; } @@ -1155,7 +1152,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, struct TCP_Server_Info *server; struct cifs_ses *ses = tcon->ses; - cFYI(1, "Query Info"); + cifs_dbg(FYI, "Query Info\n"); if (ses && (ses->server)) server = ses->server; @@ -1247,7 +1244,7 @@ SMB2_echo(struct TCP_Server_Info *server) struct smb_rqst rqst = { .rq_iov = &iov, .rq_nvec = 1 }; - cFYI(1, "In echo request"); + cifs_dbg(FYI, "In echo request\n"); rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req); if (rc) @@ -1262,7 +1259,7 @@ SMB2_echo(struct TCP_Server_Info *server) rc = cifs_call_async(server, &rqst, NULL, smb2_echo_callback, server, CIFS_ECHO_OP); if (rc) - cFYI(1, "Echo request failed: %d", rc); + cifs_dbg(FYI, "Echo request failed: %d\n", rc); cifs_small_buf_release(req); return rc; @@ -1279,7 +1276,7 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, int resp_buftype; int rc = 0; - cFYI(1, "Flush"); + cifs_dbg(FYI, "Flush\n"); if (ses && (ses->server)) server = ses->server; @@ -1379,8 +1376,9 @@ smb2_readv_callback(struct mid_q_entry *mid) .rq_pagesz = rdata->pagesz, .rq_tailsz = rdata->tailsz }; - cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__, - mid->mid, mid->mid_state, rdata->result, rdata->bytes); + cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%u\n", + __func__, mid->mid, mid->mid_state, rdata->result, + rdata->bytes); switch (mid->mid_state) { case MID_RESPONSE_RECEIVED: @@ -1392,8 +1390,8 @@ smb2_readv_callback(struct mid_q_entry *mid) rc = smb2_verify_signature(&rqst, server); if (rc) - cERROR(1, "SMB signature verification returned " - "error = %d", rc); + cifs_dbg(VFS, "SMB signature verification returned error = %d\n", + rc); } /* FIXME: should this be counted toward the initiating task? */ task_io_account_read(rdata->bytes); @@ -1426,8 +1424,8 @@ smb2_async_readv(struct cifs_readdata *rdata) struct smb_rqst rqst = { .rq_iov = &rdata->iov, .rq_nvec = 1 }; - cFYI(1, "%s: offset=%llu bytes=%u", __func__, - rdata->offset, rdata->bytes); + cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n", + __func__, rdata->offset, rdata->bytes); io_parms.tcon = tlink_tcon(rdata->cfile->tlink); io_parms.offset = rdata->offset; @@ -1481,13 +1479,13 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, if (rc) { cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE); - cERROR(1, "Send error in read = %d", rc); + cifs_dbg(VFS, "Send error in read = %d\n", rc); } else { *nbytes = le32_to_cpu(rsp->DataLength); if ((*nbytes > CIFS_MAX_MSGSIZE) || (*nbytes > io_parms->length)) { - cFYI(1, "bad length %d for count %d", *nbytes, - io_parms->length); + cifs_dbg(FYI, "bad length %d for count %d\n", + *nbytes, io_parms->length); rc = -EIO; *nbytes = 0; } @@ -1597,7 +1595,8 @@ smb2_async_writev(struct cifs_writedata *wdata) rqst.rq_pagesz = wdata->pagesz; rqst.rq_tailsz = wdata->tailsz; - cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); + cifs_dbg(FYI, "async write at %llu %u bytes\n", + wdata->offset, wdata->bytes); req->Length = cpu_to_le32(wdata->bytes); @@ -1670,7 +1669,7 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, if (rc) { cifs_stats_fail_inc(io_parms->tcon, SMB2_WRITE_HE); - cERROR(1, "Send error in write = %d", rc); + cifs_dbg(VFS, "Send error in write = %d\n", rc); } else *nbytes = le32_to_cpu(rsp->DataLength); @@ -1696,14 +1695,14 @@ num_entries(char *bufstart, char *end_of_buf, char **lastentry, size_t size) ((char *)entryptr + next_offset); if ((char *)entryptr + size > end_of_buf) { - cERROR(1, "malformed search entry would overflow"); + cifs_dbg(VFS, "malformed search entry would overflow\n"); break; } len = le32_to_cpu(entryptr->FileNameLength); if ((char *)entryptr + len + size > end_of_buf) { - cERROR(1, "directory entry name would overflow frame " - "end of buf %p", end_of_buf); + cifs_dbg(VFS, "directory entry name would overflow frame end of buf %p\n", + end_of_buf); break; } @@ -1759,8 +1758,8 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, info_buf_size = sizeof(SEARCH_ID_FULL_DIR_INFO) - 1; break; default: - cERROR(1, "info level %u isn't supported", - srch_inf->info_level); + cifs_dbg(VFS, "info level %u isn't supported\n", + srch_inf->info_level); rc = -EINVAL; goto qdir_exit; } @@ -1824,15 +1823,15 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, num_entries(srch_inf->srch_entries_start, end_of_smb, &srch_inf->last_entry, info_buf_size); srch_inf->index_of_last_entry += srch_inf->entries_in_buffer; - cFYI(1, "num entries %d last_index %lld srch start %p srch end %p", - srch_inf->entries_in_buffer, srch_inf->index_of_last_entry, - srch_inf->srch_entries_start, srch_inf->last_entry); + cifs_dbg(FYI, "num entries %d last_index %lld srch start %p srch end %p\n", + srch_inf->entries_in_buffer, srch_inf->index_of_last_entry, + srch_inf->srch_entries_start, srch_inf->last_entry); if (resp_buftype == CIFS_LARGE_BUFFER) srch_inf->smallBuf = false; else if (resp_buftype == CIFS_SMALL_BUFFER) srch_inf->smallBuf = true; else - cERROR(1, "illegal search buffer type"); + cifs_dbg(VFS, "illegal search buffer type\n"); if (rsp->hdr.Status == STATUS_NO_MORE_FILES) srch_inf->endOfSearch = 1; @@ -2017,7 +2016,7 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, int rc; struct smb2_oplock_break *req = NULL; - cFYI(1, "SMB2_oplock_break"); + cifs_dbg(FYI, "SMB2_oplock_break\n"); rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req); if (rc) @@ -2033,7 +2032,7 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, if (rc) { cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE); - cFYI(1, "Send error in Oplock Break = %d", rc); + cifs_dbg(FYI, "Send error in Oplock Break = %d\n", rc); } return rc; @@ -2058,7 +2057,7 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level, int rc; struct smb2_query_info_req *req; - cFYI(1, "Query FSInfo level %d", level); + cifs_dbg(FYI, "Query FSInfo level %d\n", level); if ((tcon->ses == NULL) || (tcon->ses->server == NULL)) return -EIO; @@ -2131,7 +2130,7 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, int resp_buf_type; unsigned int count; - cFYI(1, "smb2_lockv num lock %d", num_lock); + cifs_dbg(FYI, "smb2_lockv num lock %d\n", num_lock); rc = small_smb2_init(SMB2_LOCK, tcon, (void **) &req); if (rc) @@ -2155,7 +2154,7 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, cifs_stats_inc(&tcon->stats.cifs_stats.num_locks); rc = SendReceive2(xid, tcon->ses, iov, 2, &resp_buf_type, CIFS_NO_RESP); if (rc) { - cFYI(1, "Send error in smb2_lockv = %d", rc); + cifs_dbg(FYI, "Send error in smb2_lockv = %d\n", rc); cifs_stats_fail_inc(tcon, SMB2_LOCK_HE); } @@ -2186,7 +2185,7 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, int rc; struct smb2_lease_ack *req = NULL; - cFYI(1, "SMB2_lease_break"); + cifs_dbg(FYI, "SMB2_lease_break\n"); rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req); if (rc) @@ -2204,7 +2203,7 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, if (rc) { cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE); - cFYI(1, "Send error in Lease Break = %d", rc); + cifs_dbg(FYI, "Send error in Lease Break = %d\n", rc); } return rc; diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 8dd73e61d76..01f0ac80078 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -55,13 +55,13 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) rc = crypto_shash_setkey(server->secmech.hmacsha256, server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE); if (rc) { - cERROR(1, "%s: Could not update with response\n", __func__); + cifs_dbg(VFS, "%s: Could not update with response\n", __func__); return rc; } rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash); if (rc) { - cERROR(1, "%s: Could not init md5\n", __func__); + cifs_dbg(VFS, "%s: Could not init md5\n", __func__); return rc; } @@ -69,7 +69,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) if (iov[i].iov_len == 0) continue; if (iov[i].iov_base == NULL) { - cERROR(1, "null iovec entry"); + cifs_dbg(VFS, "null iovec entry\n"); return -EIO; } /* @@ -90,8 +90,8 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) iov[i].iov_base, iov[i].iov_len); } if (rc) { - cERROR(1, "%s: Could not update with payload\n", - __func__); + cifs_dbg(VFS, "%s: Could not update with payload\n", + __func__); return rc; } } @@ -109,7 +109,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) rc = crypto_shash_final(&server->secmech.sdeschmacsha256->shash, sigptr); if (rc) - cERROR(1, "%s: Could not generate sha256 hash\n", __func__); + cifs_dbg(VFS, "%s: Could not generate sha256 hash\n", __func__); memcpy(smb2_pdu->Signature, sigptr, SMB2_SIGNATURE_SIZE); @@ -119,7 +119,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) int smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) { - cFYI(1, "smb3 signatures not supported yet"); + cifs_dbg(FYI, "smb3 signatures not supported yet\n"); return -EOPNOTSUPP; } @@ -163,8 +163,8 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) /* Do not need to verify session setups with signature "BSRSPYL " */ if (memcmp(smb2_pdu->Signature, "BSRSPYL ", 8) == 0) - cFYI(1, "dummy signature received for smb command 0x%x", - smb2_pdu->Command); + cifs_dbg(FYI, "dummy signature received for smb command 0x%x\n", + smb2_pdu->Command); /* * Save off the origiginal signature so we can modify the smb and check @@ -205,7 +205,7 @@ smb2_mid_entry_alloc(const struct smb2_hdr *smb_buffer, struct mid_q_entry *temp; if (server == NULL) { - cERROR(1, "Null TCP session in smb2_mid_entry_alloc"); + cifs_dbg(VFS, "Null TCP session in smb2_mid_entry_alloc\n"); return NULL; } @@ -241,7 +241,7 @@ smb2_get_mid_entry(struct cifs_ses *ses, struct smb2_hdr *buf, return -ENOENT; if (ses->server->tcpStatus == CifsNeedReconnect) { - cFYI(1, "tcp session dead - return to caller to retry"); + cifs_dbg(FYI, "tcp session dead - return to caller to retry\n"); return -EAGAIN; } @@ -281,8 +281,8 @@ smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, rc = smb2_verify_signature(&rqst, server); if (rc) - cERROR(1, "SMB signature verification returned error = " - "%d", rc); + cifs_dbg(VFS, "SMB signature verification returned error = %d\n", + rc); } return map_smb2_to_linux_error(mid->resp_buf, log_error); diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index a0a58fbe2c1..43eb1367b10 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -78,7 +78,7 @@ smbhash(unsigned char *out, const unsigned char *in, unsigned char *key) tfm_des = crypto_alloc_blkcipher("ecb(des)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm_des)) { rc = PTR_ERR(tfm_des); - cERROR(1, "could not allocate des crypto API"); + cifs_dbg(VFS, "could not allocate des crypto API\n"); goto smbhash_err; } @@ -91,7 +91,7 @@ smbhash(unsigned char *out, const unsigned char *in, unsigned char *key) rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8); if (rc) - cERROR(1, "could not encrypt crypt key rc: %d", rc); + cifs_dbg(VFS, "could not encrypt crypt key rc: %d\n", rc); crypto_free_blkcipher(tfm_des); smbhash_err: @@ -139,14 +139,14 @@ mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len) md4 = crypto_alloc_shash("md4", 0, 0); if (IS_ERR(md4)) { rc = PTR_ERR(md4); - cERROR(1, "%s: Crypto md4 allocation error %d", __func__, rc); + cifs_dbg(VFS, "%s: Crypto md4 allocation error %d\n", + __func__, rc); return rc; } size = sizeof(struct shash_desc) + crypto_shash_descsize(md4); sdescmd4 = kmalloc(size, GFP_KERNEL); if (!sdescmd4) { rc = -ENOMEM; - cERROR(1, "%s: Memory allocation failure", __func__); goto mdfour_err; } sdescmd4->shash.tfm = md4; @@ -154,17 +154,17 @@ mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len) rc = crypto_shash_init(&sdescmd4->shash); if (rc) { - cERROR(1, "%s: Could not init md4 shash", __func__); + cifs_dbg(VFS, "%s: Could not init md4 shash\n", __func__); goto mdfour_err; } rc = crypto_shash_update(&sdescmd4->shash, link_str, link_len); if (rc) { - cERROR(1, "%s: Could not update with link_str", __func__); + cifs_dbg(VFS, "%s: Could not update with link_str\n", __func__); goto mdfour_err; } rc = crypto_shash_final(&sdescmd4->shash, md4_hash); if (rc) - cERROR(1, "%s: Could not genereate md4 hash", __func__); + cifs_dbg(VFS, "%s: Could not generate md4 hash\n", __func__); mdfour_err: crypto_free_shash(md4); @@ -238,7 +238,8 @@ SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24, rc = E_md4hash(passwd, p16, codepage); if (rc) { - cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc); + cifs_dbg(FYI, "%s Can't generate NT hash, error: %d\n", + __func__, rc); return rc; } memcpy(p21, p16, 16); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 1a528680ec5..bfbf4700d16 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -49,7 +49,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) struct mid_q_entry *temp; if (server == NULL) { - cERROR(1, "Null TCP session in AllocMidQEntry"); + cifs_dbg(VFS, "Null TCP session in AllocMidQEntry\n"); return NULL; } @@ -61,7 +61,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) temp->mid = smb_buffer->Mid; /* always LE */ temp->pid = current->pid; temp->command = cpu_to_le16(smb_buffer->Command); - cFYI(1, "For smb_command %d", smb_buffer->Command); + cifs_dbg(FYI, "For smb_command %d\n", smb_buffer->Command); /* do_gettimeofday(&temp->when_sent);*/ /* easier to use jiffies */ /* when mid allocated can be before when sent */ temp->when_alloc = jiffies; @@ -179,17 +179,11 @@ smb_send_kvec(struct TCP_Server_Info *server, struct kvec *iov, size_t n_vec, */ rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec], n_vec - first_vec, remaining); - if (rc == -ENOSPC || rc == -EAGAIN) { - /* - * Catch if a low level driver returns -ENOSPC. This - * WARN_ON will be removed by 3.10 if no one reports - * seeing this. - */ - WARN_ON_ONCE(rc == -ENOSPC); + if (rc == -EAGAIN) { i++; if (i >= 14 || (!server->noblocksnd && (i > 2))) { - cERROR(1, "sends on sock %p stuck for 15 " - "seconds", ssocket); + cifs_dbg(VFS, "sends on sock %p stuck for 15 seconds\n", + ssocket); rc = -EAGAIN; break; } @@ -209,14 +203,14 @@ smb_send_kvec(struct TCP_Server_Info *server, struct kvec *iov, size_t n_vec, } if (rc > remaining) { - cERROR(1, "sent %d requested %d", rc, remaining); + cifs_dbg(VFS, "sent %d requested %d\n", rc, remaining); break; } if (rc == 0) { /* should never happen, letting socket clear before retrying is our only obvious option here */ - cERROR(1, "tcp sent no data"); + cifs_dbg(VFS, "tcp sent no data\n"); msleep(500); continue; } @@ -291,7 +285,7 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) if (ssocket == NULL) return -ENOTSOCK; - cFYI(1, "Sending smb: smb_len=%u", smb_buf_length); + cifs_dbg(FYI, "Sending smb: smb_len=%u\n", smb_buf_length); dump_smb(iov[0].iov_base, iov[0].iov_len); /* cork the socket */ @@ -324,8 +318,8 @@ uncork: (char *)&val, sizeof(val)); if ((total_len > 0) && (total_len != smb_buf_length + 4)) { - cFYI(1, "partial send (wanted=%u sent=%zu): terminating " - "session", smb_buf_length + 4, total_len); + cifs_dbg(FYI, "partial send (wanted=%u sent=%zu): terminating session\n", + smb_buf_length + 4, total_len); /* * If we have only sent part of an SMB then the next SMB could * be taken as the remainder of this one. We need to kill the @@ -335,7 +329,8 @@ uncork: } if (rc < 0 && rc != -EINTR) - cERROR(1, "Error %d sending data on socket to server", rc); + cifs_dbg(VFS, "Error %d sending data on socket to server\n", + rc); else rc = 0; @@ -427,7 +422,7 @@ static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, } if (ses->server->tcpStatus == CifsNeedReconnect) { - cFYI(1, "tcp session dead - return to caller to retry"); + cifs_dbg(FYI, "tcp session dead - return to caller to retry\n"); return -EAGAIN; } @@ -527,6 +522,9 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, rc = smb_send_rqst(server, rqst); cifs_in_send_dec(server); cifs_save_when_sent(mid); + + if (rc < 0) + server->sequence_number -= 2; mutex_unlock(&server->srv_mutex); if (rc == 0) @@ -559,7 +557,7 @@ SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, iov[0].iov_len = get_rfc1002_length(in_buf) + 4; flags |= CIFS_NO_RESP; rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags); - cFYI(DBG2, "SendRcvNoRsp flags %d rc %d", flags, rc); + cifs_dbg(NOISY, "SendRcvNoRsp flags %d rc %d\n", flags, rc); return rc; } @@ -569,8 +567,8 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) { int rc = 0; - cFYI(1, "%s: cmd=%d mid=%llu state=%d", __func__, - le16_to_cpu(mid->command), mid->mid, mid->mid_state); + cifs_dbg(FYI, "%s: cmd=%d mid=%llu state=%d\n", + __func__, le16_to_cpu(mid->command), mid->mid, mid->mid_state); spin_lock(&GlobalMid_Lock); switch (mid->mid_state) { @@ -588,8 +586,8 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) break; default: list_del_init(&mid->qhead); - cERROR(1, "%s: invalid mid state mid=%llu state=%d", __func__, - mid->mid, mid->mid_state); + cifs_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n", + __func__, mid->mid, mid->mid_state); rc = -EIO; } spin_unlock(&GlobalMid_Lock); @@ -624,10 +622,10 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server, iov.iov_len = len; /* FIXME: add code to kill session */ rc = cifs_verify_signature(&rqst, server, - mid->sequence_number + 1); + mid->sequence_number); if (rc) - cERROR(1, "SMB signature verification returned error = " - "%d", rc); + cifs_dbg(VFS, "SMB signature verification returned error = %d\n", + rc); } /* BB special case reconnect tid and uid here? */ @@ -672,7 +670,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, if ((ses == NULL) || (ses->server == NULL)) { cifs_small_buf_release(buf); - cERROR(1, "Null session"); + cifs_dbg(VFS, "Null session\n"); return -EIO; } @@ -716,6 +714,8 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, cifs_in_send_dec(ses->server); cifs_save_when_sent(midQ); + if (rc < 0) + ses->server->sequence_number -= 2; mutex_unlock(&ses->server->srv_mutex); if (rc < 0) { @@ -752,7 +752,7 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, if (!midQ->resp_buf || midQ->mid_state != MID_RESPONSE_RECEIVED) { rc = -EIO; - cFYI(1, "Bad MID state?"); + cifs_dbg(FYI, "Bad MID state?\n"); goto out; } @@ -788,11 +788,11 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, struct mid_q_entry *midQ; if (ses == NULL) { - cERROR(1, "Null smb session"); + cifs_dbg(VFS, "Null smb session\n"); return -EIO; } if (ses->server == NULL) { - cERROR(1, "Null tcp session"); + cifs_dbg(VFS, "Null tcp session\n"); return -EIO; } @@ -805,8 +805,8 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { - cERROR(1, "Illegal length, greater than maximum frame, %d", - be32_to_cpu(in_buf->smb_buf_length)); + cifs_dbg(VFS, "Illegal length, greater than maximum frame, %d\n", + be32_to_cpu(in_buf->smb_buf_length)); return -EIO; } @@ -840,6 +840,10 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); cifs_in_send_dec(ses->server); cifs_save_when_sent(midQ); + + if (rc < 0) + ses->server->sequence_number -= 2; + mutex_unlock(&ses->server->srv_mutex); if (rc < 0) @@ -871,7 +875,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, if (!midQ->resp_buf || !out_buf || midQ->mid_state != MID_RESPONSE_RECEIVED) { rc = -EIO; - cERROR(1, "Bad MID state?"); + cifs_dbg(VFS, "Bad MID state?\n"); goto out; } @@ -921,13 +925,13 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_ses *ses; if (tcon == NULL || tcon->ses == NULL) { - cERROR(1, "Null smb session"); + cifs_dbg(VFS, "Null smb session\n"); return -EIO; } ses = tcon->ses; if (ses->server == NULL) { - cERROR(1, "Null tcp session"); + cifs_dbg(VFS, "Null tcp session\n"); return -EIO; } @@ -940,8 +944,8 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { - cERROR(1, "Illegal length, greater than maximum frame, %d", - be32_to_cpu(in_buf->smb_buf_length)); + cifs_dbg(VFS, "Illegal length, greater than maximum frame, %d\n", + be32_to_cpu(in_buf->smb_buf_length)); return -EIO; } @@ -973,6 +977,10 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); cifs_in_send_dec(ses->server); cifs_save_when_sent(midQ); + + if (rc < 0) + ses->server->sequence_number -= 2; + mutex_unlock(&ses->server->srv_mutex); if (rc < 0) { @@ -1038,7 +1046,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, /* rcvd frame is ok */ if (out_buf == NULL || midQ->mid_state != MID_RESPONSE_RECEIVED) { rc = -EIO; - cERROR(1, "Bad MID state?"); + cifs_dbg(VFS, "Bad MID state?\n"); goto out; } diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 5142f2c6027..09afda4cc58 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -68,12 +68,12 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name) goto remove_ea_exit; } if (ea_name == NULL) { - cFYI(1, "Null xattr names not supported"); + cifs_dbg(FYI, "Null xattr names not supported\n"); } else if (strncmp(ea_name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN))) { - cFYI(1, - "illegal xattr request %s (only user namespace supported)", - ea_name); + cifs_dbg(FYI, + "illegal xattr request %s (only user namespace supported)\n", + ea_name); /* BB what if no namespace prefix? */ /* Should we just pass them to server, except for system and perhaps security prefixes? */ @@ -134,19 +134,19 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, search server for EAs or streams to returns as xattrs */ if (value_size > MAX_EA_VALUE_SIZE) { - cFYI(1, "size of EA value too large"); + cifs_dbg(FYI, "size of EA value too large\n"); rc = -EOPNOTSUPP; goto set_ea_exit; } if (ea_name == NULL) { - cFYI(1, "Null xattr names not supported"); + cifs_dbg(FYI, "Null xattr names not supported\n"); } else if (strncmp(ea_name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) == 0) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) goto set_ea_exit; if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0) - cFYI(1, "attempt to set cifs inode metadata"); + cifs_dbg(FYI, "attempt to set cifs inode metadata\n"); ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */ rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, @@ -167,8 +167,6 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, struct cifs_ntsd *pacl; pacl = kmalloc(value_size, GFP_KERNEL); if (!pacl) { - cFYI(1, "%s: Can't allocate memory for ACL", - __func__); rc = -ENOMEM; } else { memcpy(pacl, ea_value, value_size); @@ -179,7 +177,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, kfree(pacl); } #else - cFYI(1, "Set CIFS ACL not supported yet"); + cifs_dbg(FYI, "Set CIFS ACL not supported yet\n"); #endif /* CONFIG_CIFS_ACL */ } else { int temp; @@ -193,9 +191,9 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, ACL_TYPE_ACCESS, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - cFYI(1, "set POSIX ACL rc %d", rc); + cifs_dbg(FYI, "set POSIX ACL rc %d\n", rc); #else - cFYI(1, "set POSIX ACL not supported"); + cifs_dbg(FYI, "set POSIX ACL not supported\n"); #endif } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT, strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) { @@ -206,13 +204,13 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, ACL_TYPE_DEFAULT, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - cFYI(1, "set POSIX default ACL rc %d", rc); + cifs_dbg(FYI, "set POSIX default ACL rc %d\n", rc); #else - cFYI(1, "set default POSIX ACL not supported"); + cifs_dbg(FYI, "set default POSIX ACL not supported\n"); #endif } else { - cFYI(1, "illegal xattr request %s (only user namespace" - " supported)", ea_name); + cifs_dbg(FYI, "illegal xattr request %s (only user namespace supported)\n", + ea_name); /* BB what if no namespace prefix? */ /* Should we just pass them to server, except for system and perhaps security prefixes? */ @@ -263,14 +261,14 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, /* return dos attributes as pseudo xattr */ /* return alt name if available as pseudo attr */ if (ea_name == NULL) { - cFYI(1, "Null xattr names not supported"); + cifs_dbg(FYI, "Null xattr names not supported\n"); } else if (strncmp(ea_name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) == 0) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) goto get_ea_exit; if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0) { - cFYI(1, "attempt to query cifs inode metadata"); + cifs_dbg(FYI, "attempt to query cifs inode metadata\n"); /* revalidate/getattr then populate from inode */ } /* BB add else when above is implemented */ ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */ @@ -295,7 +293,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); #else - cFYI(1, "Query POSIX ACL not supported yet"); + cifs_dbg(FYI, "Query POSIX ACL not supported yet\n"); #endif /* CONFIG_CIFS_POSIX */ } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT, strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) { @@ -307,7 +305,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); #else - cFYI(1, "Query POSIX default ACL not supported yet"); + cifs_dbg(FYI, "Query POSIX default ACL not supported yet\n"); #endif /* CONFIG_CIFS_POSIX */ } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL, strlen(CIFS_XATTR_CIFS_ACL)) == 0) { @@ -319,8 +317,8 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, full_path, &acllen); if (IS_ERR(pacl)) { rc = PTR_ERR(pacl); - cERROR(1, "%s: error %zd getting sec desc", - __func__, rc); + cifs_dbg(VFS, "%s: error %zd getting sec desc\n", + __func__, rc); } else { if (ea_value) { if (acllen > buf_size) @@ -332,18 +330,18 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, kfree(pacl); } #else - cFYI(1, "Query CIFS ACL not supported yet"); + cifs_dbg(FYI, "Query CIFS ACL not supported yet\n"); #endif /* CONFIG_CIFS_ACL */ } else if (strncmp(ea_name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) { - cFYI(1, "Trusted xattr namespace not supported yet"); + cifs_dbg(FYI, "Trusted xattr namespace not supported yet\n"); } else if (strncmp(ea_name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) == 0) { - cFYI(1, "Security xattr namespace not supported yet"); + cifs_dbg(FYI, "Security xattr namespace not supported yet\n"); } else - cFYI(1, - "illegal xattr request %s (only user namespace supported)", - ea_name); + cifs_dbg(FYI, + "illegal xattr request %s (only user namespace supported)\n", + ea_name); /* We could add an additional check for streams ie if proc/fs/cifs/streamstoxattr is set then diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h index a8ece9a33ae..2c9e62c2bfd 100644 --- a/include/asm-generic/cputime_nsecs.h +++ b/include/asm-generic/cputime_nsecs.h @@ -16,21 +16,27 @@ #ifndef _ASM_GENERIC_CPUTIME_NSECS_H #define _ASM_GENERIC_CPUTIME_NSECS_H +#include <linux/math64.h> + typedef u64 __nocast cputime_t; typedef u64 __nocast cputime64_t; #define cputime_one_jiffy jiffies_to_cputime(1) +#define cputime_div(__ct, divisor) div_u64((__force u64)__ct, divisor) +#define cputime_div_rem(__ct, divisor, remainder) \ + div_u64_rem((__force u64)__ct, divisor, remainder); + /* * Convert cputime <-> jiffies (HZ) */ #define cputime_to_jiffies(__ct) \ - ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) + cputime_div(__ct, NSEC_PER_SEC / HZ) #define cputime_to_scaled(__ct) (__ct) #define jiffies_to_cputime(__jif) \ (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ)) #define cputime64_to_jiffies64(__ct) \ - ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) + cputime_div(__ct, NSEC_PER_SEC / HZ) #define jiffies64_to_cputime64(__jif) \ (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ)) @@ -45,7 +51,7 @@ typedef u64 __nocast cputime64_t; * Convert cputime <-> microseconds */ #define cputime_to_usecs(__ct) \ - ((__force u64)(__ct) / NSEC_PER_USEC) + cputime_div(__ct, NSEC_PER_USEC) #define usecs_to_cputime(__usecs) \ (__force cputime_t)((__usecs) * NSEC_PER_USEC) #define usecs_to_cputime64(__usecs) \ @@ -55,7 +61,7 @@ typedef u64 __nocast cputime64_t; * Convert cputime <-> seconds */ #define cputime_to_secs(__ct) \ - ((__force u64)(__ct) / NSEC_PER_SEC) + cputime_div(__ct, NSEC_PER_SEC) #define secs_to_cputime(__secs) \ (__force cputime_t)((__secs) * NSEC_PER_SEC) @@ -69,8 +75,10 @@ static inline cputime_t timespec_to_cputime(const struct timespec *val) } static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val) { - val->tv_sec = (__force u64) ct / NSEC_PER_SEC; - val->tv_nsec = (__force u64) ct % NSEC_PER_SEC; + u32 rem; + + val->tv_sec = cputime_div_rem(ct, NSEC_PER_SEC, &rem); + val->tv_nsec = rem; } /* @@ -83,15 +91,17 @@ static inline cputime_t timeval_to_cputime(const struct timeval *val) } static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val) { - val->tv_sec = (__force u64) ct / NSEC_PER_SEC; - val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC; + u32 rem; + + val->tv_sec = cputime_div_rem(ct, NSEC_PER_SEC, &rem); + val->tv_usec = rem / NSEC_PER_USEC; } /* * Convert cputime <-> clock (USER_HZ) */ #define cputime_to_clock_t(__ct) \ - ((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ)) + cputime_div(__ct, (NSEC_PER_SEC / USER_HZ)) #define clock_t_to_cputime(__x) \ (__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ)) diff --git a/include/linux/acpi_gpio.h b/include/linux/acpi_gpio.h index b76ebd08ff8..4c120a1e0ca 100644 --- a/include/linux/acpi_gpio.h +++ b/include/linux/acpi_gpio.h @@ -1,13 +1,25 @@ #ifndef _LINUX_ACPI_GPIO_H_ #define _LINUX_ACPI_GPIO_H_ +#include <linux/device.h> #include <linux/errno.h> #include <linux/gpio.h> +/** + * struct acpi_gpio_info - ACPI GPIO specific information + * @gpioint: if %true this GPIO is of type GpioInt otherwise type is GpioIo + */ +struct acpi_gpio_info { + bool gpioint; +}; + #ifdef CONFIG_GPIO_ACPI int acpi_get_gpio(char *path, int pin); +int acpi_get_gpio_by_index(struct device *dev, int index, + struct acpi_gpio_info *info); void acpi_gpiochip_request_interrupts(struct gpio_chip *chip); +void acpi_gpiochip_free_interrupts(struct gpio_chip *chip); #else /* CONFIG_GPIO_ACPI */ @@ -16,7 +28,14 @@ static inline int acpi_get_gpio(char *path, int pin) return -ENODEV; } +static inline int acpi_get_gpio_by_index(struct device *dev, int index, + struct acpi_gpio_info *info) +{ + return -ENODEV; +} + static inline void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) { } +static inline void acpi_gpiochip_free_interrupts(struct gpio_chip *chip) { } #endif /* CONFIG_GPIO_ACPI */ diff --git a/include/linux/basic_mmio_gpio.h b/include/linux/basic_mmio_gpio.h index 1c504ca5bdb..d8a97ec0e2b 100644 --- a/include/linux/basic_mmio_gpio.h +++ b/include/linux/basic_mmio_gpio.h @@ -72,5 +72,6 @@ int bgpio_init(struct bgpio_chip *bgc, struct device *dev, #define BGPIOF_BIG_ENDIAN BIT(0) #define BGPIOF_UNREADABLE_REG_SET BIT(1) /* reg_set is unreadable */ #define BGPIOF_UNREADABLE_REG_DIR BIT(2) /* reg_dir is unreadable */ +#define BGPIOF_BIG_ENDIAN_BYTE_ORDER BIT(3) #endif /* __BASIC_MMIO_GPIO_H */ diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index d4080f309b5..5f338684413 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -52,6 +52,9 @@ struct ceph_auth_client_ops { */ int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *auth); + /* ensure that an existing authorizer is up to date */ + int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type, + struct ceph_auth_handshake *auth); int (*verify_authorizer_reply)(struct ceph_auth_client *ac, struct ceph_authorizer *a, size_t len); void (*destroy_authorizer)(struct ceph_auth_client *ac, @@ -75,6 +78,8 @@ struct ceph_auth_client { u64 global_id; /* our unique id in system */ const struct ceph_crypto_key *key; /* our secret key */ unsigned want_keys; /* which services we want */ + + struct mutex mutex; }; extern struct ceph_auth_client *ceph_auth_init(const char *name, @@ -94,5 +99,18 @@ extern int ceph_build_auth(struct ceph_auth_client *ac, void *msg_buf, size_t msg_len); extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac); +extern int ceph_auth_create_authorizer(struct ceph_auth_client *ac, + int peer_type, + struct ceph_auth_handshake *auth); +extern void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac, + struct ceph_authorizer *a); +extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac, + int peer_type, + struct ceph_auth_handshake *a); +extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, + struct ceph_authorizer *a, + size_t len); +extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, + int peer_type); #endif diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 76554cecaab..4c42080347a 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -41,6 +41,7 @@ */ #define CEPH_FEATURES_SUPPORTED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ + CEPH_FEATURE_RECONNECT_SEQ | \ CEPH_FEATURE_PGID64 | \ CEPH_FEATURE_PGPOOL3 | \ CEPH_FEATURE_OSDENC | \ @@ -51,6 +52,7 @@ #define CEPH_FEATURES_REQUIRED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ + CEPH_FEATURE_RECONNECT_SEQ | \ CEPH_FEATURE_PGID64 | \ CEPH_FEATURE_PGPOOL3 | \ CEPH_FEATURE_OSDENC) diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index 360d9d08ca9..379f7150899 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -8,6 +8,23 @@ #include <linux/ceph/types.h> +/* This seemed to be the easiest place to define these */ + +#define U8_MAX ((u8)(~0U)) +#define U16_MAX ((u16)(~0U)) +#define U32_MAX ((u32)(~0U)) +#define U64_MAX ((u64)(~0ULL)) + +#define S8_MAX ((s8)(U8_MAX >> 1)) +#define S16_MAX ((s16)(U16_MAX >> 1)) +#define S32_MAX ((s32)(U32_MAX >> 1)) +#define S64_MAX ((s64)(U64_MAX >> 1LL)) + +#define S8_MIN ((s8)(-S8_MAX - 1)) +#define S16_MIN ((s16)(-S16_MAX - 1)) +#define S32_MIN ((s32)(-S32_MAX - 1)) +#define S64_MIN ((s64)(-S64_MAX - 1LL)) + /* * in all cases, * void **p pointer to position pointer @@ -137,14 +154,19 @@ bad: static inline void ceph_decode_timespec(struct timespec *ts, const struct ceph_timespec *tv) { - ts->tv_sec = le32_to_cpu(tv->tv_sec); - ts->tv_nsec = le32_to_cpu(tv->tv_nsec); + ts->tv_sec = (__kernel_time_t)le32_to_cpu(tv->tv_sec); + ts->tv_nsec = (long)le32_to_cpu(tv->tv_nsec); } static inline void ceph_encode_timespec(struct ceph_timespec *tv, const struct timespec *ts) { - tv->tv_sec = cpu_to_le32(ts->tv_sec); - tv->tv_nsec = cpu_to_le32(ts->tv_nsec); + BUG_ON(ts->tv_sec < 0); + BUG_ON(ts->tv_sec > (__kernel_time_t)U32_MAX); + BUG_ON(ts->tv_nsec < 0); + BUG_ON(ts->tv_nsec > (long)U32_MAX); + + tv->tv_sec = cpu_to_le32((u32)ts->tv_sec); + tv->tv_nsec = cpu_to_le32((u32)ts->tv_nsec); } /* diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 29818fc3fa4..2e3024881a5 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -66,6 +66,7 @@ struct ceph_options { #define CEPH_OSD_IDLE_TTL_DEFAULT 60 #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) +#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) #define CEPH_AUTH_NAME_DEFAULT "guest" @@ -156,31 +157,11 @@ struct ceph_snap_context { u64 snaps[]; }; -static inline struct ceph_snap_context * -ceph_get_snap_context(struct ceph_snap_context *sc) -{ - /* - printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), - atomic_read(&sc->nref)+1); - */ - if (sc) - atomic_inc(&sc->nref); - return sc; -} - -static inline void ceph_put_snap_context(struct ceph_snap_context *sc) -{ - if (!sc) - return; - /* - printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), - atomic_read(&sc->nref)-1); - */ - if (atomic_dec_and_test(&sc->nref)) { - /*printk(" deleting snap_context %p\n", sc);*/ - kfree(sc); - } -} +extern struct ceph_snap_context *ceph_create_snap_context(u32 snap_count, + gfp_t gfp_flags); +extern struct ceph_snap_context *ceph_get_snap_context( + struct ceph_snap_context *sc); +extern void ceph_put_snap_context(struct ceph_snap_context *sc); /* * calculate the number of pages a given length and offset map onto, diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 60903e0f665..7c1420bb1dc 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -64,6 +64,77 @@ struct ceph_messenger { u32 required_features; }; +enum ceph_msg_data_type { + CEPH_MSG_DATA_NONE, /* message contains no data payload */ + CEPH_MSG_DATA_PAGES, /* data source/destination is a page array */ + CEPH_MSG_DATA_PAGELIST, /* data source/destination is a pagelist */ +#ifdef CONFIG_BLOCK + CEPH_MSG_DATA_BIO, /* data source/destination is a bio list */ +#endif /* CONFIG_BLOCK */ +}; + +static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type) +{ + switch (type) { + case CEPH_MSG_DATA_NONE: + case CEPH_MSG_DATA_PAGES: + case CEPH_MSG_DATA_PAGELIST: +#ifdef CONFIG_BLOCK + case CEPH_MSG_DATA_BIO: +#endif /* CONFIG_BLOCK */ + return true; + default: + return false; + } +} + +struct ceph_msg_data { + struct list_head links; /* ceph_msg->data */ + enum ceph_msg_data_type type; + union { +#ifdef CONFIG_BLOCK + struct { + struct bio *bio; + size_t bio_length; + }; +#endif /* CONFIG_BLOCK */ + struct { + struct page **pages; /* NOT OWNER. */ + size_t length; /* total # bytes */ + unsigned int alignment; /* first page */ + }; + struct ceph_pagelist *pagelist; + }; +}; + +struct ceph_msg_data_cursor { + size_t total_resid; /* across all data items */ + struct list_head *data_head; /* = &ceph_msg->data */ + + struct ceph_msg_data *data; /* current data item */ + size_t resid; /* bytes not yet consumed */ + bool last_piece; /* current is last piece */ + bool need_crc; /* crc update needed */ + union { +#ifdef CONFIG_BLOCK + struct { /* bio */ + struct bio *bio; /* bio from list */ + unsigned int vector_index; /* vector from bio */ + unsigned int vector_offset; /* bytes from vector */ + }; +#endif /* CONFIG_BLOCK */ + struct { /* pages */ + unsigned int page_offset; /* offset in page */ + unsigned short page_index; /* index in array */ + unsigned short page_count; /* pages in array */ + }; + struct { /* pagelist */ + struct page *page; /* page from list */ + size_t offset; /* bytes from list */ + }; + }; +}; + /* * a single message. it contains a header (src, dest, message type, etc.), * footer (crc values, mainly), a "front" message body, and possibly a @@ -74,21 +145,15 @@ struct ceph_msg { struct ceph_msg_footer footer; /* footer */ struct kvec front; /* unaligned blobs of message */ struct ceph_buffer *middle; - struct page **pages; /* data payload. NOT OWNER. */ - unsigned nr_pages; /* size of page array */ - unsigned page_alignment; /* io offset in first page */ - struct ceph_pagelist *pagelist; /* instead of pages */ + + size_t data_length; + struct list_head data; + struct ceph_msg_data_cursor cursor; struct ceph_connection *con; - struct list_head list_head; + struct list_head list_head; /* links for connection lists */ struct kref kref; -#ifdef CONFIG_BLOCK - struct bio *bio; /* instead of pages/pagelist */ - struct bio *bio_iter; /* bio iterator */ - int bio_seg; /* current bio segment */ -#endif /* CONFIG_BLOCK */ - struct ceph_pagelist *trail; /* the trailing part of the data */ bool front_is_vmalloc; bool more_to_follow; bool needs_out_seq; @@ -98,12 +163,6 @@ struct ceph_msg { struct ceph_msgpool *pool; }; -struct ceph_msg_pos { - int page, page_pos; /* which page; offset in page */ - int data_pos; /* offset in data payload */ - bool did_page_crc; /* true if we've calculated crc for current page */ -}; - /* ceph connection fault delay defaults, for exponential backoff */ #define BASE_DELAY_INTERVAL (HZ/2) #define MAX_DELAY_INTERVAL (5 * 60 * HZ) @@ -161,7 +220,6 @@ struct ceph_connection { struct ceph_msg *out_msg; /* sending message (== tail of out_sent) */ bool out_msg_done; - struct ceph_msg_pos out_msg_pos; struct kvec out_kvec[8], /* sending header/footer data */ *out_kvec_cur; @@ -175,7 +233,6 @@ struct ceph_connection { /* message in temps */ struct ceph_msg_header in_hdr; struct ceph_msg *in_msg; - struct ceph_msg_pos in_msg_pos; u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */ char in_tag; /* protocol control byte */ @@ -218,6 +275,15 @@ extern void ceph_msg_revoke_incoming(struct ceph_msg *msg); extern void ceph_con_keepalive(struct ceph_connection *con); +extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, + size_t length, size_t alignment); +extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg, + struct ceph_pagelist *pagelist); +#ifdef CONFIG_BLOCK +extern void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio, + size_t length); +#endif /* CONFIG_BLOCK */ + extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, bool can_fail); extern void ceph_msg_kfree(struct ceph_msg *m); diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index 680d3d648ca..3d94a73b5f3 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h @@ -87,6 +87,7 @@ struct ceph_entity_inst { #define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ #define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ +#define CEPH_MSGR_TAG_SEQ 13 /* 64-bit int follows with seen seq number */ /* diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 1dd5d466b6f..186db0bf495 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -29,6 +29,7 @@ struct ceph_authorizer; */ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *, struct ceph_msg *); +typedef void (*ceph_osdc_unsafe_callback_t)(struct ceph_osd_request *, bool); /* a given osd we're communicating with */ struct ceph_osd { @@ -48,7 +49,67 @@ struct ceph_osd { }; -#define CEPH_OSD_MAX_OP 10 +#define CEPH_OSD_MAX_OP 2 + +enum ceph_osd_data_type { + CEPH_OSD_DATA_TYPE_NONE = 0, + CEPH_OSD_DATA_TYPE_PAGES, + CEPH_OSD_DATA_TYPE_PAGELIST, +#ifdef CONFIG_BLOCK + CEPH_OSD_DATA_TYPE_BIO, +#endif /* CONFIG_BLOCK */ +}; + +struct ceph_osd_data { + enum ceph_osd_data_type type; + union { + struct { + struct page **pages; + u64 length; + u32 alignment; + bool pages_from_pool; + bool own_pages; + }; + struct ceph_pagelist *pagelist; +#ifdef CONFIG_BLOCK + struct { + struct bio *bio; /* list of bios */ + size_t bio_length; /* total in list */ + }; +#endif /* CONFIG_BLOCK */ + }; +}; + +struct ceph_osd_req_op { + u16 op; /* CEPH_OSD_OP_* */ + u32 payload_len; + union { + struct ceph_osd_data raw_data_in; + struct { + u64 offset, length; + u64 truncate_size; + u32 truncate_seq; + struct ceph_osd_data osd_data; + } extent; + struct { + const char *class_name; + const char *method_name; + struct ceph_osd_data request_info; + struct ceph_osd_data request_data; + struct ceph_osd_data response_data; + __u8 class_len; + __u8 method_len; + __u8 argc; + } cls; + struct { + u64 cookie; + u64 ver; + u32 prot_ver; + u32 timeout; + __u8 flag; + } watch; + }; +}; /* an in-flight request */ struct ceph_osd_request { @@ -63,15 +124,14 @@ struct ceph_osd_request { int r_pg_osds[CEPH_PG_MAX_SIZE]; int r_num_pg_osds; - struct ceph_connection *r_con_filling_msg; - struct ceph_msg *r_request, *r_reply; int r_flags; /* any additional flags for the osd */ u32 r_sent; /* >0 if r_request is sending/sent */ - int r_num_ops; - /* encoded message content */ - struct ceph_osd_op *r_request_ops; + /* request osd ops array */ + unsigned int r_num_ops; + struct ceph_osd_req_op r_ops[CEPH_OSD_MAX_OP]; + /* these are updated on each send */ __le32 *r_request_osdmap_epoch; __le32 *r_request_flags; @@ -85,12 +145,14 @@ struct ceph_osd_request { s32 r_reply_op_result[CEPH_OSD_MAX_OP]; int r_got_reply; int r_linger; + int r_completed; struct ceph_osd_client *r_osdc; struct kref r_kref; bool r_mempool; struct completion r_completion, r_safe_completion; - ceph_osdc_callback_t r_callback, r_safe_callback; + ceph_osdc_callback_t r_callback; + ceph_osdc_unsafe_callback_t r_unsafe_callback; struct ceph_eversion r_reassert_version; struct list_head r_unsafe_item; @@ -104,16 +166,6 @@ struct ceph_osd_request { struct ceph_file_layout r_file_layout; struct ceph_snap_context *r_snapc; /* snap context for writes */ - unsigned r_num_pages; /* size of page array (follows) */ - unsigned r_page_alignment; /* io offset in first page */ - struct page **r_pages; /* pages for data payload */ - int r_pages_from_pool; - int r_own_pages; /* if true, i own page list */ -#ifdef CONFIG_BLOCK - struct bio *r_bio; /* instead of pages */ -#endif - - struct ceph_pagelist r_trail; /* trailing part of the data */ }; struct ceph_osd_event { @@ -172,48 +224,8 @@ struct ceph_osd_client { struct workqueue_struct *notify_wq; }; -struct ceph_osd_req_op { - u16 op; /* CEPH_OSD_OP_* */ - u32 payload_len; - union { - struct { - u64 offset, length; - u64 truncate_size; - u32 truncate_seq; - } extent; - struct { - const char *name; - const char *val; - u32 name_len; - u32 value_len; - __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ - __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ - } xattr; - struct { - const char *class_name; - const char *method_name; - const char *indata; - u32 indata_len; - __u8 class_len; - __u8 method_len; - __u8 argc; - } cls; - struct { - u64 cookie; - u64 count; - } pgls; - struct { - u64 snapid; - } snap; - struct { - u64 cookie; - u64 ver; - u32 prot_ver; - u32 timeout; - __u8 flag; - } watch; - }; -}; +extern int ceph_osdc_setup(void); +extern void ceph_osdc_cleanup(void); extern int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client); @@ -224,16 +236,71 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); +extern void osd_req_op_init(struct ceph_osd_request *osd_req, + unsigned int which, u16 opcode); + +extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *, + unsigned int which, + struct page **pages, u64 length, + u32 alignment, bool pages_from_pool, + bool own_pages); + +extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req, + unsigned int which, u16 opcode, + u64 offset, u64 length, + u64 truncate_size, u32 truncate_seq); +extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req, + unsigned int which, u64 length); + +extern struct ceph_osd_data *osd_req_op_extent_osd_data( + struct ceph_osd_request *osd_req, + unsigned int which); +extern struct ceph_osd_data *osd_req_op_cls_response_data( + struct ceph_osd_request *osd_req, + unsigned int which); + +extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *, + unsigned int which, + struct page **pages, u64 length, + u32 alignment, bool pages_from_pool, + bool own_pages); +extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *, + unsigned int which, + struct ceph_pagelist *pagelist); +#ifdef CONFIG_BLOCK +extern void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *, + unsigned int which, + struct bio *bio, size_t bio_length); +#endif /* CONFIG_BLOCK */ + +extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, + unsigned int which, + struct ceph_pagelist *pagelist); +extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *, + unsigned int which, + struct page **pages, u64 length, + u32 alignment, bool pages_from_pool, + bool own_pages); +extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *, + unsigned int which, + struct page **pages, u64 length, + u32 alignment, bool pages_from_pool, + bool own_pages); + +extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req, + unsigned int which, u16 opcode, + const char *class, const char *method); +extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req, + unsigned int which, u16 opcode, + u64 cookie, u64 version, int flag); + extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_snap_context *snapc, - unsigned int num_op, + unsigned int num_ops, bool use_mempool, gfp_t gfp_flags); -extern void ceph_osdc_build_request(struct ceph_osd_request *req, - u64 off, u64 len, - unsigned int num_op, - struct ceph_osd_req_op *src_ops, +extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, struct ceph_snap_context *snapc, u64 snap_id, struct timespec *mtime); @@ -241,12 +308,11 @@ extern void ceph_osdc_build_request(struct ceph_osd_request *req, extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, struct ceph_file_layout *layout, struct ceph_vino vino, - u64 offset, u64 *len, int op, int flags, + u64 offset, u64 *len, + int num_ops, int opcode, int flags, struct ceph_snap_context *snapc, - int do_sync, u32 truncate_seq, - u64 truncate_size, - struct timespec *mtime, - bool use_mempool, int page_align); + u32 truncate_seq, u64 truncate_size, + bool use_mempool); extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, struct ceph_osd_request *req); diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index c819190d164..d05cc4451af 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -3,6 +3,7 @@ #include <linux/rbtree.h> #include <linux/ceph/types.h> +#include <linux/ceph/decode.h> #include <linux/ceph/ceph_fs.h> #include <linux/crush/crush.h> @@ -119,6 +120,29 @@ static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map, return &map->osd_addr[osd]; } +static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid) +{ + __u8 version; + + if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) { + pr_warning("incomplete pg encoding"); + + return -EINVAL; + } + version = ceph_decode_8(p); + if (version > 1) { + pr_warning("do not understand pg encoding %d > 1", + (int)version); + return -EINVAL; + } + + pgid->pool = ceph_decode_64(p); + pgid->seed = ceph_decode_32(p); + *p += 4; /* skip deprecated preferred value */ + + return 0; +} + extern struct ceph_osdmap *osdmap_decode(void **p, void *end); extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, struct ceph_osdmap *map, @@ -131,10 +155,8 @@ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, u64 *bno, u64 *oxoff, u64 *oxlen); /* calculate mapping of object to a placement group */ -extern int ceph_calc_object_layout(struct ceph_pg *pg, - const char *oid, - struct ceph_file_layout *fl, - struct ceph_osdmap *osdmap); +extern int ceph_calc_ceph_pg(struct ceph_pg *pg, const char *oid, + struct ceph_osdmap *osdmap, uint64_t pool); extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, int *acting); diff --git a/include/linux/i2c/twl4030-madc.h b/include/linux/i2c/twl4030-madc.h index 530e11ba073..01f59510704 100644 --- a/include/linux/i2c/twl4030-madc.h +++ b/include/linux/i2c/twl4030-madc.h @@ -39,6 +39,7 @@ struct twl4030_madc_conversion_method { * @do_avgP: sample the input channel for 4 consecutive cycles * @method: RT, SW1, SW2 * @type: Polling or interrupt based method + * @raw: Return raw value, do not convert it */ struct twl4030_madc_request { @@ -48,6 +49,7 @@ struct twl4030_madc_request { u16 type; bool active; bool result_pending; + bool raw; int rbuf[TWL4030_MADC_MAX_CHANNELS]; void (*func_cb)(int len, int channels, int *buf); }; diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h index 5f3aa6b11bf..27e06acc509 100644 --- a/include/linux/input/matrix_keypad.h +++ b/include/linux/input/matrix_keypad.h @@ -81,4 +81,23 @@ int matrix_keypad_build_keymap(const struct matrix_keymap_data *keymap_data, unsigned short *keymap, struct input_dev *input_dev); +#ifdef CONFIG_OF +/** + * matrix_keypad_parse_of_params() - Read parameters from matrix-keypad node + * + * @dev: Device containing of_node + * @rows: Returns number of matrix rows + * @cols: Returns number of matrix columns + * @return 0 if OK, <0 on error + */ +int matrix_keypad_parse_of_params(struct device *dev, + unsigned int *rows, unsigned int *cols); +#else +static inline int matrix_keypad_parse_of_params(struct device *dev, + unsigned int *rows, unsigned int *cols) +{ + return -ENOSYS; +} +#endif /* CONFIG_OF */ + #endif /* _MATRIX_KEYPAD_H */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ba3b8a98a04..3aeb7305e2f 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -20,6 +20,7 @@ #define __LINUX_IOMMU_H #include <linux/errno.h> +#include <linux/err.h> #include <linux/types.h> #define IOMMU_READ (1) @@ -91,8 +92,7 @@ struct iommu_ops { phys_addr_t paddr, size_t size, int prot); size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, size_t size); - phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, - unsigned long iova); + phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); int (*domain_has_cap)(struct iommu_domain *domain, unsigned long cap); int (*add_device)(struct device *dev); @@ -105,7 +105,7 @@ struct iommu_ops { /* Window handling functions */ int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr, - phys_addr_t paddr, u64 size); + phys_addr_t paddr, u64 size, int prot); void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr); /* Set the numer of window per domain */ int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count); @@ -125,6 +125,7 @@ struct iommu_ops { extern int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops); extern bool iommu_present(struct bus_type *bus); extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus); +extern struct iommu_group *iommu_group_get_by_id(int id); extern void iommu_domain_free(struct iommu_domain *domain); extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); @@ -134,8 +135,7 @@ extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); -extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova); +extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova); extern int iommu_domain_has_cap(struct iommu_domain *domain, unsigned long cap); extern void iommu_set_fault_handler(struct iommu_domain *domain, @@ -171,7 +171,8 @@ extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr, /* Window handling function prototypes */ extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, - phys_addr_t offset, u64 size); + phys_addr_t offset, u64 size, + int prot); extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr); /** * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework @@ -257,7 +258,7 @@ static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova, static inline int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t paddr, - u64 size) + u64 size, int prot) { return -ENODEV; } @@ -267,8 +268,7 @@ static inline void iommu_domain_window_disable(struct iommu_domain *domain, { } -static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) +static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { return 0; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c1395825192..f0eea07d2c2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -117,14 +117,13 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_APF_HALT 12 #define KVM_REQ_STEAL_UPDATE 13 #define KVM_REQ_NMI 14 -#define KVM_REQ_IMMEDIATE_EXIT 15 -#define KVM_REQ_PMU 16 -#define KVM_REQ_PMI 17 -#define KVM_REQ_WATCHDOG 18 -#define KVM_REQ_MASTERCLOCK_UPDATE 19 -#define KVM_REQ_MCLOCK_INPROGRESS 20 -#define KVM_REQ_EPR_EXIT 21 -#define KVM_REQ_EOIBITMAP 22 +#define KVM_REQ_PMU 15 +#define KVM_REQ_PMI 16 +#define KVM_REQ_WATCHDOG 17 +#define KVM_REQ_MASTERCLOCK_UPDATE 18 +#define KVM_REQ_MCLOCK_INPROGRESS 19 +#define KVM_REQ_EPR_EXIT 20 +#define KVM_REQ_SCAN_IOAPIC 21 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 @@ -133,6 +132,9 @@ struct kvm; struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; +extern raw_spinlock_t kvm_lock; +extern struct list_head vm_list; + struct kvm_io_range { gpa_t addr; int len; @@ -149,6 +151,7 @@ struct kvm_io_bus { enum kvm_bus { KVM_MMIO_BUS, KVM_PIO_BUS, + KVM_VIRTIO_CCW_NOTIFY_BUS, KVM_NR_BUSES }; @@ -252,6 +255,7 @@ struct kvm_vcpu { bool dy_eligible; } spin_loop; #endif + bool preempted; struct kvm_vcpu_arch arch; }; @@ -285,7 +289,8 @@ struct kvm_kernel_irq_routing_entry { u32 gsi; u32 type; int (*set)(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, int level); + struct kvm *kvm, int irq_source_id, int level, + bool line_status); union { struct { unsigned irqchip; @@ -296,10 +301,10 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING struct kvm_irq_routing_table { - int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS]; + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; struct kvm_kernel_irq_routing_entry *rt_entries; u32 nr_rt_entries; /* @@ -385,6 +390,7 @@ struct kvm { long mmu_notifier_count; #endif long tlbs_dirty; + struct list_head devices; }; #define kvm_err(fmt, ...) \ @@ -424,6 +430,19 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); int __must_check vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +int kvm_irqfd_init(void); +void kvm_irqfd_exit(void); +#else +static inline int kvm_irqfd_init(void) +{ + return 0; +} + +static inline void kvm_irqfd_exit(void) +{ +} +#endif int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, struct module *module); void kvm_exit(void); @@ -452,24 +471,39 @@ id_to_memslot(struct kvm_memslots *slots, int id) return slot; } +/* + * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: + * - create a new memory slot + * - delete an existing memory slot + * - modify an existing memory slot + * -- move it in the guest physical memory space + * -- just change its flags + * + * Since flags can be changed by some of these operations, the following + * differentiation is the best we can do for __kvm_set_memory_region(): + */ +enum kvm_mr_change { + KVM_MR_CREATE, + KVM_MR_DELETE, + KVM_MR_MOVE, + KVM_MR_FLAGS_ONLY, +}; + int kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc); + struct kvm_userspace_memory_region *mem); int __kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc); + struct kvm_userspace_memory_region *mem); void kvm_arch_free_memslot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont); int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - bool user_alloc); + enum kvm_mr_change change); void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc); + const struct kvm_memory_slot *old, + enum kvm_mr_change change); bool kvm_largepages_enabled(void); void kvm_disable_largepages(void); /* flush all memory translations */ @@ -539,7 +573,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_reload_remote_mmus(struct kvm *kvm); void kvm_make_mclock_inprogress_request(struct kvm *kvm); -void kvm_make_update_eoibitmap_request(struct kvm *kvm); +void kvm_make_scan_ioapic_request(struct kvm *kvm); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); @@ -555,10 +589,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, - struct - kvm_userspace_memory_region *mem, - bool user_alloc); -int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level); + struct kvm_userspace_memory_region *mem); +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, + bool line_status); long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); @@ -632,7 +665,6 @@ static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); void kvm_arch_destroy_vm(struct kvm *kvm); -void kvm_free_all_assigned_devices(struct kvm *kvm); void kvm_arch_sync_events(struct kvm *kvm); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); @@ -684,15 +716,11 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask); -#ifdef __KVM_HAVE_IOAPIC -void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, - union kvm_ioapic_redirect_entry *entry, - unsigned long *deliver_bitmask); -#endif -int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level); +int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, + bool line_status); int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, - int irq_source_id, int level); + int irq_source_id, int level, bool line_status); bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, @@ -705,7 +733,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); /* For vcpu->arch.iommu_flags */ #define KVM_IOMMU_CACHE_COHERENCY 0x1 -#ifdef CONFIG_IOMMU_API +#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot); int kvm_iommu_map_guest(struct kvm *kvm); @@ -714,7 +742,7 @@ int kvm_assign_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev); int kvm_deassign_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev); -#else /* CONFIG_IOMMU_API */ +#else static inline int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) { @@ -726,28 +754,11 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm, { } -static inline int kvm_iommu_map_guest(struct kvm *kvm) -{ - return -ENODEV; -} - static inline int kvm_iommu_unmap_guest(struct kvm *kvm) { return 0; } - -static inline int kvm_assign_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - return 0; -} - -static inline int kvm_deassign_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - return 0; -} -#endif /* CONFIG_IOMMU_API */ +#endif static inline void __guest_enter(void) { @@ -921,7 +932,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) } #endif -#ifdef KVM_CAP_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING #define KVM_MAX_IRQ_ROUTES 1024 @@ -930,6 +941,9 @@ int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, unsigned flags); +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue); void kvm_free_irq_routing(struct kvm *kvm); int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); @@ -998,11 +1012,13 @@ static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; } #endif -#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT +#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, unsigned long arg); +void kvm_free_all_assigned_devices(struct kvm *kvm); + #else static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, @@ -1011,6 +1027,8 @@ static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, return -ENOTTY; } +static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {} + #endif static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) @@ -1028,6 +1046,46 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) } } +extern bool kvm_rebooting; + +struct kvm_device_ops; + +struct kvm_device { + struct kvm_device_ops *ops; + struct kvm *kvm; + void *private; + struct list_head vm_node; +}; + +/* create, destroy, and name are mandatory */ +struct kvm_device_ops { + const char *name; + int (*create)(struct kvm_device *dev, u32 type); + + /* + * Destroy is responsible for freeing dev. + * + * Destroy may be called before or after destructors are called + * on emulated I/O regions, depending on whether a reference is + * held by a vcpu or other kvm component that gets destroyed + * after the emulated I/O. + */ + void (*destroy)(struct kvm_device *dev); + + int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); + int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); + int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); + long (*ioctl)(struct kvm_device *dev, unsigned int ioctl, + unsigned long arg); +}; + +void kvm_device_get(struct kvm_device *dev); +void kvm_device_put(struct kvm_device *dev); +struct kvm_device *kvm_device_from_filp(struct file *filp); + +extern struct kvm_device_ops kvm_mpic_ops; +extern struct kvm_device_ops kvm_xics_ops; + #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) diff --git a/include/linux/leds.h b/include/linux/leds.h index 0d9b5eed714..0287ab29668 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -142,6 +142,10 @@ extern void led_set_brightness(struct led_classdev *led_cdev, /* * LED Triggers */ +/* Registration functions for simple triggers */ +#define DEFINE_LED_TRIGGER(x) static struct led_trigger *x; +#define DEFINE_LED_TRIGGER_GLOBAL(x) struct led_trigger *x; + #ifdef CONFIG_LEDS_TRIGGERS #define TRIG_NAME_MAX 50 @@ -164,9 +168,6 @@ struct led_trigger { extern int led_trigger_register(struct led_trigger *trigger); extern void led_trigger_unregister(struct led_trigger *trigger); -/* Registration functions for simple triggers */ -#define DEFINE_LED_TRIGGER(x) static struct led_trigger *x; -#define DEFINE_LED_TRIGGER_GLOBAL(x) struct led_trigger *x; extern void led_trigger_register_simple(const char *name, struct led_trigger **trigger); extern void led_trigger_unregister_simple(struct led_trigger *trigger); @@ -199,20 +200,30 @@ extern void led_trigger_rename_static(const char *name, #else -/* Triggers aren't active - null macros */ -#define DEFINE_LED_TRIGGER(x) -#define DEFINE_LED_TRIGGER_GLOBAL(x) -#define led_trigger_register_simple(x, y) do {} while(0) -#define led_trigger_unregister_simple(x) do {} while(0) -#define led_trigger_event(x, y) do {} while(0) +/* Trigger has no members */ +struct led_trigger {}; -#endif +/* Trigger inline empty functions */ +static inline void led_trigger_register_simple(const char *name, + struct led_trigger **trigger) {} +static inline void led_trigger_unregister_simple(struct led_trigger *trigger) {} +static inline void led_trigger_event(struct led_trigger *trigger, + enum led_brightness event) {} +#endif /* CONFIG_LEDS_TRIGGERS */ /* Trigger specific functions */ #ifdef CONFIG_LEDS_TRIGGER_IDE_DISK extern void ledtrig_ide_activity(void); #else -#define ledtrig_ide_activity() do {} while(0) +static inline void ledtrig_ide_activity(void) {} +#endif + +#if defined(CONFIG_LEDS_TRIGGER_CAMERA) || defined(CONFIG_LEDS_TRIGGER_CAMERA_MODULE) +extern void ledtrig_flash_ctrl(bool on); +extern void ledtrig_torch_ctrl(bool on); +#else +static inline void ledtrig_flash_ctrl(bool on) {} +static inline void ledtrig_torch_ctrl(bool on) {} #endif /* diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index a0f940987a3..80dead1f710 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -78,6 +78,7 @@ struct arizona_micbias { unsigned int ext_cap:1; /** External capacitor fitted */ unsigned int discharge:1; /** Actively discharge */ unsigned int fast_start:1; /** Enable aggressive startup ramp rate */ + unsigned int bypass:1; /** Use bypass mode */ }; struct arizona_micd_config { @@ -104,7 +105,8 @@ struct arizona_pdata { /** If a direct 32kHz clock is provided on an MCLK specify it here */ int clk32k_src; - bool irq_active_high; /** IRQ polarity */ + /** Mode for primary IRQ (defaults to active low) */ + unsigned int irq_flags; /* Base GPIO */ int gpio_base; @@ -183,6 +185,9 @@ struct arizona_pdata { /** Haptic actuator type */ unsigned int hap_act; + + /** GPIO for primary IRQ (used for edge triggered emulation) */ + int irq_gpio; }; #endif diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h new file mode 100644 index 00000000000..032af7fc5b2 --- /dev/null +++ b/include/linux/mfd/cros_ec.h @@ -0,0 +1,170 @@ +/* + * ChromeOS EC multi-function device + * + * Copyright (C) 2012 Google, Inc + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __LINUX_MFD_CROS_EC_H +#define __LINUX_MFD_CROS_EC_H + +#include <linux/mfd/cros_ec_commands.h> + +/* + * Command interface between EC and AP, for LPC, I2C and SPI interfaces. + */ +enum { + EC_MSG_TX_HEADER_BYTES = 3, + EC_MSG_TX_TRAILER_BYTES = 1, + EC_MSG_TX_PROTO_BYTES = EC_MSG_TX_HEADER_BYTES + + EC_MSG_TX_TRAILER_BYTES, + EC_MSG_RX_PROTO_BYTES = 3, + + /* Max length of messages */ + EC_MSG_BYTES = EC_HOST_PARAM_SIZE + EC_MSG_TX_PROTO_BYTES, + +}; + +/** + * struct cros_ec_msg - A message sent to the EC, and its reply + * + * @version: Command version number (often 0) + * @cmd: Command to send (EC_CMD_...) + * @out_buf: Outgoing payload (to EC) + * @outlen: Outgoing length + * @in_buf: Incoming payload (from EC) + * @in_len: Incoming length + */ +struct cros_ec_msg { + u8 version; + u8 cmd; + uint8_t *out_buf; + int out_len; + uint8_t *in_buf; + int in_len; +}; + +/** + * struct cros_ec_device - Information about a ChromeOS EC device + * + * @name: Name of this EC interface + * @priv: Private data + * @irq: Interrupt to use + * @din: input buffer (from EC) + * @dout: output buffer (to EC) + * \note + * These two buffers will always be dword-aligned and include enough + * space for up to 7 word-alignment bytes also, so we can ensure that + * the body of the message is always dword-aligned (64-bit). + * + * We use this alignment to keep ARM and x86 happy. Probably word + * alignment would be OK, there might be a small performance advantage + * to using dword. + * @din_size: size of din buffer + * @dout_size: size of dout buffer + * @command_send: send a command + * @command_recv: receive a command + * @ec_name: name of EC device (e.g. 'chromeos-ec') + * @phys_name: name of physical comms layer (e.g. 'i2c-4') + * @parent: pointer to parent device (e.g. i2c or spi device) + * @dev: Device pointer + * dev_lock: Lock to prevent concurrent access + * @wake_enabled: true if this device can wake the system from sleep + * @was_wake_device: true if this device was set to wake the system from + * sleep at the last suspend + * @event_notifier: interrupt event notifier for transport devices + */ +struct cros_ec_device { + const char *name; + void *priv; + int irq; + uint8_t *din; + uint8_t *dout; + int din_size; + int dout_size; + int (*command_send)(struct cros_ec_device *ec, + uint16_t cmd, void *out_buf, int out_len); + int (*command_recv)(struct cros_ec_device *ec, + uint16_t cmd, void *in_buf, int in_len); + int (*command_sendrecv)(struct cros_ec_device *ec, + uint16_t cmd, void *out_buf, int out_len, + void *in_buf, int in_len); + int (*command_xfer)(struct cros_ec_device *ec, + struct cros_ec_msg *msg); + + const char *ec_name; + const char *phys_name; + struct device *parent; + + /* These are --private-- fields - do not assign */ + struct device *dev; + struct mutex dev_lock; + bool wake_enabled; + bool was_wake_device; + struct blocking_notifier_head event_notifier; +}; + +/** + * cros_ec_suspend - Handle a suspend operation for the ChromeOS EC device + * + * This can be called by drivers to handle a suspend event. + * + * ec_dev: Device to suspend + * @return 0 if ok, -ve on error + */ +int cros_ec_suspend(struct cros_ec_device *ec_dev); + +/** + * cros_ec_resume - Handle a resume operation for the ChromeOS EC device + * + * This can be called by drivers to handle a resume event. + * + * @ec_dev: Device to resume + * @return 0 if ok, -ve on error + */ +int cros_ec_resume(struct cros_ec_device *ec_dev); + +/** + * cros_ec_prepare_tx - Prepare an outgoing message in the output buffer + * + * This is intended to be used by all ChromeOS EC drivers, but at present + * only SPI uses it. Once LPC uses the same protocol it can start using it. + * I2C could use it now, with a refactor of the existing code. + * + * @ec_dev: Device to register + * @msg: Message to write + */ +int cros_ec_prepare_tx(struct cros_ec_device *ec_dev, + struct cros_ec_msg *msg); + +/** + * cros_ec_remove - Remove a ChromeOS EC + * + * Call this to deregister a ChromeOS EC. After this you should call + * cros_ec_free(). + * + * @ec_dev: Device to register + * @return 0 if ok, -ve on error + */ +int cros_ec_remove(struct cros_ec_device *ec_dev); + +/** + * cros_ec_register - Register a new ChromeOS EC, using the provided info + * + * Before calling this, allocate a pointer to a new device and then fill + * in all the fields up to the --private-- marker. + * + * @ec_dev: Device to register + * @return 0 if ok, -ve on error + */ +int cros_ec_register(struct cros_ec_device *ec_dev); + +#endif /* __LINUX_MFD_CROS_EC_H */ diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h new file mode 100644 index 00000000000..86fd06953bc --- /dev/null +++ b/include/linux/mfd/cros_ec_commands.h @@ -0,0 +1,1369 @@ +/* + * Host communication command constants for ChromeOS EC + * + * Copyright (C) 2012 Google, Inc + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * The ChromeOS EC multi function device is used to mux all the requests + * to the EC device for its multiple features: keyboard controller, + * battery charging and regulator control, firmware update. + * + * NOTE: This file is copied verbatim from the ChromeOS EC Open Source + * project in an attempt to make future updates easy to make. + */ + +#ifndef __CROS_EC_COMMANDS_H +#define __CROS_EC_COMMANDS_H + +/* + * Protocol overview + * + * request: CMD [ P0 P1 P2 ... Pn S ] + * response: ERR [ P0 P1 P2 ... Pn S ] + * + * where the bytes are defined as follow : + * - CMD is the command code. (defined by EC_CMD_ constants) + * - ERR is the error code. (defined by EC_RES_ constants) + * - Px is the optional payload. + * it is not sent if the error code is not success. + * (defined by ec_params_ and ec_response_ structures) + * - S is the checksum which is the sum of all payload bytes. + * + * On LPC, CMD and ERR are sent/received at EC_LPC_ADDR_KERNEL|USER_CMD + * and the payloads are sent/received at EC_LPC_ADDR_KERNEL|USER_PARAM. + * On I2C, all bytes are sent serially in the same message. + */ + +/* Current version of this protocol */ +#define EC_PROTO_VERSION 0x00000002 + +/* Command version mask */ +#define EC_VER_MASK(version) (1UL << (version)) + +/* I/O addresses for ACPI commands */ +#define EC_LPC_ADDR_ACPI_DATA 0x62 +#define EC_LPC_ADDR_ACPI_CMD 0x66 + +/* I/O addresses for host command */ +#define EC_LPC_ADDR_HOST_DATA 0x200 +#define EC_LPC_ADDR_HOST_CMD 0x204 + +/* I/O addresses for host command args and params */ +#define EC_LPC_ADDR_HOST_ARGS 0x800 +#define EC_LPC_ADDR_HOST_PARAM 0x804 +#define EC_HOST_PARAM_SIZE 0x0fc /* Size of param area in bytes */ + +/* I/O addresses for host command params, old interface */ +#define EC_LPC_ADDR_OLD_PARAM 0x880 +#define EC_OLD_PARAM_SIZE 0x080 /* Size of param area in bytes */ + +/* EC command register bit functions */ +#define EC_LPC_CMDR_DATA (1 << 0) /* Data ready for host to read */ +#define EC_LPC_CMDR_PENDING (1 << 1) /* Write pending to EC */ +#define EC_LPC_CMDR_BUSY (1 << 2) /* EC is busy processing a command */ +#define EC_LPC_CMDR_CMD (1 << 3) /* Last host write was a command */ +#define EC_LPC_CMDR_ACPI_BRST (1 << 4) /* Burst mode (not used) */ +#define EC_LPC_CMDR_SCI (1 << 5) /* SCI event is pending */ +#define EC_LPC_CMDR_SMI (1 << 6) /* SMI event is pending */ + +#define EC_LPC_ADDR_MEMMAP 0x900 +#define EC_MEMMAP_SIZE 255 /* ACPI IO buffer max is 255 bytes */ +#define EC_MEMMAP_TEXT_MAX 8 /* Size of a string in the memory map */ + +/* The offset address of each type of data in mapped memory. */ +#define EC_MEMMAP_TEMP_SENSOR 0x00 /* Temp sensors */ +#define EC_MEMMAP_FAN 0x10 /* Fan speeds */ +#define EC_MEMMAP_TEMP_SENSOR_B 0x18 /* Temp sensors (second set) */ +#define EC_MEMMAP_ID 0x20 /* 'E' 'C' */ +#define EC_MEMMAP_ID_VERSION 0x22 /* Version of data in 0x20 - 0x2f */ +#define EC_MEMMAP_THERMAL_VERSION 0x23 /* Version of data in 0x00 - 0x1f */ +#define EC_MEMMAP_BATTERY_VERSION 0x24 /* Version of data in 0x40 - 0x7f */ +#define EC_MEMMAP_SWITCHES_VERSION 0x25 /* Version of data in 0x30 - 0x33 */ +#define EC_MEMMAP_EVENTS_VERSION 0x26 /* Version of data in 0x34 - 0x3f */ +#define EC_MEMMAP_HOST_CMD_FLAGS 0x27 /* Host command interface flags */ +#define EC_MEMMAP_SWITCHES 0x30 +#define EC_MEMMAP_HOST_EVENTS 0x34 +#define EC_MEMMAP_BATT_VOLT 0x40 /* Battery Present Voltage */ +#define EC_MEMMAP_BATT_RATE 0x44 /* Battery Present Rate */ +#define EC_MEMMAP_BATT_CAP 0x48 /* Battery Remaining Capacity */ +#define EC_MEMMAP_BATT_FLAG 0x4c /* Battery State, defined below */ +#define EC_MEMMAP_BATT_DCAP 0x50 /* Battery Design Capacity */ +#define EC_MEMMAP_BATT_DVLT 0x54 /* Battery Design Voltage */ +#define EC_MEMMAP_BATT_LFCC 0x58 /* Battery Last Full Charge Capacity */ +#define EC_MEMMAP_BATT_CCNT 0x5c /* Battery Cycle Count */ +#define EC_MEMMAP_BATT_MFGR 0x60 /* Battery Manufacturer String */ +#define EC_MEMMAP_BATT_MODEL 0x68 /* Battery Model Number String */ +#define EC_MEMMAP_BATT_SERIAL 0x70 /* Battery Serial Number String */ +#define EC_MEMMAP_BATT_TYPE 0x78 /* Battery Type String */ + +/* Number of temp sensors at EC_MEMMAP_TEMP_SENSOR */ +#define EC_TEMP_SENSOR_ENTRIES 16 +/* + * Number of temp sensors at EC_MEMMAP_TEMP_SENSOR_B. + * + * Valid only if EC_MEMMAP_THERMAL_VERSION returns >= 2. + */ +#define EC_TEMP_SENSOR_B_ENTRIES 8 +#define EC_TEMP_SENSOR_NOT_PRESENT 0xff +#define EC_TEMP_SENSOR_ERROR 0xfe +#define EC_TEMP_SENSOR_NOT_POWERED 0xfd +#define EC_TEMP_SENSOR_NOT_CALIBRATED 0xfc +/* + * The offset of temperature value stored in mapped memory. This allows + * reporting a temperature range of 200K to 454K = -73C to 181C. + */ +#define EC_TEMP_SENSOR_OFFSET 200 + +#define EC_FAN_SPEED_ENTRIES 4 /* Number of fans at EC_MEMMAP_FAN */ +#define EC_FAN_SPEED_NOT_PRESENT 0xffff /* Entry not present */ +#define EC_FAN_SPEED_STALLED 0xfffe /* Fan stalled */ + +/* Battery bit flags at EC_MEMMAP_BATT_FLAG. */ +#define EC_BATT_FLAG_AC_PRESENT 0x01 +#define EC_BATT_FLAG_BATT_PRESENT 0x02 +#define EC_BATT_FLAG_DISCHARGING 0x04 +#define EC_BATT_FLAG_CHARGING 0x08 +#define EC_BATT_FLAG_LEVEL_CRITICAL 0x10 + +/* Switch flags at EC_MEMMAP_SWITCHES */ +#define EC_SWITCH_LID_OPEN 0x01 +#define EC_SWITCH_POWER_BUTTON_PRESSED 0x02 +#define EC_SWITCH_WRITE_PROTECT_DISABLED 0x04 +/* Recovery requested via keyboard */ +#define EC_SWITCH_KEYBOARD_RECOVERY 0x08 +/* Recovery requested via dedicated signal (from servo board) */ +#define EC_SWITCH_DEDICATED_RECOVERY 0x10 +/* Was fake developer mode switch; now unused. Remove in next refactor. */ +#define EC_SWITCH_IGNORE0 0x20 + +/* Host command interface flags */ +/* Host command interface supports LPC args (LPC interface only) */ +#define EC_HOST_CMD_FLAG_LPC_ARGS_SUPPORTED 0x01 + +/* Wireless switch flags */ +#define EC_WIRELESS_SWITCH_WLAN 0x01 +#define EC_WIRELESS_SWITCH_BLUETOOTH 0x02 + +/* + * This header file is used in coreboot both in C and ACPI code. The ACPI code + * is pre-processed to handle constants but the ASL compiler is unable to + * handle actual C code so keep it separate. + */ +#ifndef __ACPI__ + +/* LPC command status byte masks */ +/* EC has written a byte in the data register and host hasn't read it yet */ +#define EC_LPC_STATUS_TO_HOST 0x01 +/* Host has written a command/data byte and the EC hasn't read it yet */ +#define EC_LPC_STATUS_FROM_HOST 0x02 +/* EC is processing a command */ +#define EC_LPC_STATUS_PROCESSING 0x04 +/* Last write to EC was a command, not data */ +#define EC_LPC_STATUS_LAST_CMD 0x08 +/* EC is in burst mode. Unsupported by Chrome EC, so this bit is never set */ +#define EC_LPC_STATUS_BURST_MODE 0x10 +/* SCI event is pending (requesting SCI query) */ +#define EC_LPC_STATUS_SCI_PENDING 0x20 +/* SMI event is pending (requesting SMI query) */ +#define EC_LPC_STATUS_SMI_PENDING 0x40 +/* (reserved) */ +#define EC_LPC_STATUS_RESERVED 0x80 + +/* + * EC is busy. This covers both the EC processing a command, and the host has + * written a new command but the EC hasn't picked it up yet. + */ +#define EC_LPC_STATUS_BUSY_MASK \ + (EC_LPC_STATUS_FROM_HOST | EC_LPC_STATUS_PROCESSING) + +/* Host command response codes */ +enum ec_status { + EC_RES_SUCCESS = 0, + EC_RES_INVALID_COMMAND = 1, + EC_RES_ERROR = 2, + EC_RES_INVALID_PARAM = 3, + EC_RES_ACCESS_DENIED = 4, + EC_RES_INVALID_RESPONSE = 5, + EC_RES_INVALID_VERSION = 6, + EC_RES_INVALID_CHECKSUM = 7, + EC_RES_IN_PROGRESS = 8, /* Accepted, command in progress */ + EC_RES_UNAVAILABLE = 9, /* No response available */ + EC_RES_TIMEOUT = 10, /* We got a timeout */ + EC_RES_OVERFLOW = 11, /* Table / data overflow */ +}; + +/* + * Host event codes. Note these are 1-based, not 0-based, because ACPI query + * EC command uses code 0 to mean "no event pending". We explicitly specify + * each value in the enum listing so they won't change if we delete/insert an + * item or rearrange the list (it needs to be stable across platforms, not + * just within a single compiled instance). + */ +enum host_event_code { + EC_HOST_EVENT_LID_CLOSED = 1, + EC_HOST_EVENT_LID_OPEN = 2, + EC_HOST_EVENT_POWER_BUTTON = 3, + EC_HOST_EVENT_AC_CONNECTED = 4, + EC_HOST_EVENT_AC_DISCONNECTED = 5, + EC_HOST_EVENT_BATTERY_LOW = 6, + EC_HOST_EVENT_BATTERY_CRITICAL = 7, + EC_HOST_EVENT_BATTERY = 8, + EC_HOST_EVENT_THERMAL_THRESHOLD = 9, + EC_HOST_EVENT_THERMAL_OVERLOAD = 10, + EC_HOST_EVENT_THERMAL = 11, + EC_HOST_EVENT_USB_CHARGER = 12, + EC_HOST_EVENT_KEY_PRESSED = 13, + /* + * EC has finished initializing the host interface. The host can check + * for this event following sending a EC_CMD_REBOOT_EC command to + * determine when the EC is ready to accept subsequent commands. + */ + EC_HOST_EVENT_INTERFACE_READY = 14, + /* Keyboard recovery combo has been pressed */ + EC_HOST_EVENT_KEYBOARD_RECOVERY = 15, + + /* Shutdown due to thermal overload */ + EC_HOST_EVENT_THERMAL_SHUTDOWN = 16, + /* Shutdown due to battery level too low */ + EC_HOST_EVENT_BATTERY_SHUTDOWN = 17, + + /* + * The high bit of the event mask is not used as a host event code. If + * it reads back as set, then the entire event mask should be + * considered invalid by the host. This can happen when reading the + * raw event status via EC_MEMMAP_HOST_EVENTS but the LPC interface is + * not initialized on the EC, or improperly configured on the host. + */ + EC_HOST_EVENT_INVALID = 32 +}; +/* Host event mask */ +#define EC_HOST_EVENT_MASK(event_code) (1UL << ((event_code) - 1)) + +/* Arguments at EC_LPC_ADDR_HOST_ARGS */ +struct ec_lpc_host_args { + uint8_t flags; + uint8_t command_version; + uint8_t data_size; + /* + * Checksum; sum of command + flags + command_version + data_size + + * all params/response data bytes. + */ + uint8_t checksum; +} __packed; + +/* Flags for ec_lpc_host_args.flags */ +/* + * Args are from host. Data area at EC_LPC_ADDR_HOST_PARAM contains command + * params. + * + * If EC gets a command and this flag is not set, this is an old-style command. + * Command version is 0 and params from host are at EC_LPC_ADDR_OLD_PARAM with + * unknown length. EC must respond with an old-style response (that is, + * withouth setting EC_HOST_ARGS_FLAG_TO_HOST). + */ +#define EC_HOST_ARGS_FLAG_FROM_HOST 0x01 +/* + * Args are from EC. Data area at EC_LPC_ADDR_HOST_PARAM contains response. + * + * If EC responds to a command and this flag is not set, this is an old-style + * response. Command version is 0 and response data from EC is at + * EC_LPC_ADDR_OLD_PARAM with unknown length. + */ +#define EC_HOST_ARGS_FLAG_TO_HOST 0x02 + +/* + * Notes on commands: + * + * Each command is an 8-byte command value. Commands which take params or + * return response data specify structs for that data. If no struct is + * specified, the command does not input or output data, respectively. + * Parameter/response length is implicit in the structs. Some underlying + * communication protocols (I2C, SPI) may add length or checksum headers, but + * those are implementation-dependent and not defined here. + */ + +/*****************************************************************************/ +/* General / test commands */ + +/* + * Get protocol version, used to deal with non-backward compatible protocol + * changes. + */ +#define EC_CMD_PROTO_VERSION 0x00 + +struct ec_response_proto_version { + uint32_t version; +} __packed; + +/* + * Hello. This is a simple command to test the EC is responsive to + * commands. + */ +#define EC_CMD_HELLO 0x01 + +struct ec_params_hello { + uint32_t in_data; /* Pass anything here */ +} __packed; + +struct ec_response_hello { + uint32_t out_data; /* Output will be in_data + 0x01020304 */ +} __packed; + +/* Get version number */ +#define EC_CMD_GET_VERSION 0x02 + +enum ec_current_image { + EC_IMAGE_UNKNOWN = 0, + EC_IMAGE_RO, + EC_IMAGE_RW +}; + +struct ec_response_get_version { + /* Null-terminated version strings for RO, RW */ + char version_string_ro[32]; + char version_string_rw[32]; + char reserved[32]; /* Was previously RW-B string */ + uint32_t current_image; /* One of ec_current_image */ +} __packed; + +/* Read test */ +#define EC_CMD_READ_TEST 0x03 + +struct ec_params_read_test { + uint32_t offset; /* Starting value for read buffer */ + uint32_t size; /* Size to read in bytes */ +} __packed; + +struct ec_response_read_test { + uint32_t data[32]; +} __packed; + +/* + * Get build information + * + * Response is null-terminated string. + */ +#define EC_CMD_GET_BUILD_INFO 0x04 + +/* Get chip info */ +#define EC_CMD_GET_CHIP_INFO 0x05 + +struct ec_response_get_chip_info { + /* Null-terminated strings */ + char vendor[32]; + char name[32]; + char revision[32]; /* Mask version */ +} __packed; + +/* Get board HW version */ +#define EC_CMD_GET_BOARD_VERSION 0x06 + +struct ec_response_board_version { + uint16_t board_version; /* A monotonously incrementing number. */ +} __packed; + +/* + * Read memory-mapped data. + * + * This is an alternate interface to memory-mapped data for bus protocols + * which don't support direct-mapped memory - I2C, SPI, etc. + * + * Response is params.size bytes of data. + */ +#define EC_CMD_READ_MEMMAP 0x07 + +struct ec_params_read_memmap { + uint8_t offset; /* Offset in memmap (EC_MEMMAP_*) */ + uint8_t size; /* Size to read in bytes */ +} __packed; + +/* Read versions supported for a command */ +#define EC_CMD_GET_CMD_VERSIONS 0x08 + +struct ec_params_get_cmd_versions { + uint8_t cmd; /* Command to check */ +} __packed; + +struct ec_response_get_cmd_versions { + /* + * Mask of supported versions; use EC_VER_MASK() to compare with a + * desired version. + */ + uint32_t version_mask; +} __packed; + +/* + * Check EC communcations status (busy). This is needed on i2c/spi but not + * on lpc since it has its own out-of-band busy indicator. + * + * lpc must read the status from the command register. Attempting this on + * lpc will overwrite the args/parameter space and corrupt its data. + */ +#define EC_CMD_GET_COMMS_STATUS 0x09 + +/* Avoid using ec_status which is for return values */ +enum ec_comms_status { + EC_COMMS_STATUS_PROCESSING = 1 << 0, /* Processing cmd */ +}; + +struct ec_response_get_comms_status { + uint32_t flags; /* Mask of enum ec_comms_status */ +} __packed; + + +/*****************************************************************************/ +/* Flash commands */ + +/* Get flash info */ +#define EC_CMD_FLASH_INFO 0x10 + +struct ec_response_flash_info { + /* Usable flash size, in bytes */ + uint32_t flash_size; + /* + * Write block size. Write offset and size must be a multiple + * of this. + */ + uint32_t write_block_size; + /* + * Erase block size. Erase offset and size must be a multiple + * of this. + */ + uint32_t erase_block_size; + /* + * Protection block size. Protection offset and size must be a + * multiple of this. + */ + uint32_t protect_block_size; +} __packed; + +/* + * Read flash + * + * Response is params.size bytes of data. + */ +#define EC_CMD_FLASH_READ 0x11 + +struct ec_params_flash_read { + uint32_t offset; /* Byte offset to read */ + uint32_t size; /* Size to read in bytes */ +} __packed; + +/* Write flash */ +#define EC_CMD_FLASH_WRITE 0x12 + +struct ec_params_flash_write { + uint32_t offset; /* Byte offset to write */ + uint32_t size; /* Size to write in bytes */ + /* + * Data to write. Could really use EC_PARAM_SIZE - 8, but tidiest to + * use a power of 2 so writes stay aligned. + */ + uint8_t data[64]; +} __packed; + +/* Erase flash */ +#define EC_CMD_FLASH_ERASE 0x13 + +struct ec_params_flash_erase { + uint32_t offset; /* Byte offset to erase */ + uint32_t size; /* Size to erase in bytes */ +} __packed; + +/* + * Get/set flash protection. + * + * If mask!=0, sets/clear the requested bits of flags. Depending on the + * firmware write protect GPIO, not all flags will take effect immediately; + * some flags require a subsequent hard reset to take effect. Check the + * returned flags bits to see what actually happened. + * + * If mask=0, simply returns the current flags state. + */ +#define EC_CMD_FLASH_PROTECT 0x15 +#define EC_VER_FLASH_PROTECT 1 /* Command version 1 */ + +/* Flags for flash protection */ +/* RO flash code protected when the EC boots */ +#define EC_FLASH_PROTECT_RO_AT_BOOT (1 << 0) +/* + * RO flash code protected now. If this bit is set, at-boot status cannot + * be changed. + */ +#define EC_FLASH_PROTECT_RO_NOW (1 << 1) +/* Entire flash code protected now, until reboot. */ +#define EC_FLASH_PROTECT_ALL_NOW (1 << 2) +/* Flash write protect GPIO is asserted now */ +#define EC_FLASH_PROTECT_GPIO_ASSERTED (1 << 3) +/* Error - at least one bank of flash is stuck locked, and cannot be unlocked */ +#define EC_FLASH_PROTECT_ERROR_STUCK (1 << 4) +/* + * Error - flash protection is in inconsistent state. At least one bank of + * flash which should be protected is not protected. Usually fixed by + * re-requesting the desired flags, or by a hard reset if that fails. + */ +#define EC_FLASH_PROTECT_ERROR_INCONSISTENT (1 << 5) +/* Entile flash code protected when the EC boots */ +#define EC_FLASH_PROTECT_ALL_AT_BOOT (1 << 6) + +struct ec_params_flash_protect { + uint32_t mask; /* Bits in flags to apply */ + uint32_t flags; /* New flags to apply */ +} __packed; + +struct ec_response_flash_protect { + /* Current value of flash protect flags */ + uint32_t flags; + /* + * Flags which are valid on this platform. This allows the caller + * to distinguish between flags which aren't set vs. flags which can't + * be set on this platform. + */ + uint32_t valid_flags; + /* Flags which can be changed given the current protection state */ + uint32_t writable_flags; +} __packed; + +/* + * Note: commands 0x14 - 0x19 version 0 were old commands to get/set flash + * write protect. These commands may be reused with version > 0. + */ + +/* Get the region offset/size */ +#define EC_CMD_FLASH_REGION_INFO 0x16 +#define EC_VER_FLASH_REGION_INFO 1 + +enum ec_flash_region { + /* Region which holds read-only EC image */ + EC_FLASH_REGION_RO, + /* Region which holds rewritable EC image */ + EC_FLASH_REGION_RW, + /* + * Region which should be write-protected in the factory (a superset of + * EC_FLASH_REGION_RO) + */ + EC_FLASH_REGION_WP_RO, +}; + +struct ec_params_flash_region_info { + uint32_t region; /* enum ec_flash_region */ +} __packed; + +struct ec_response_flash_region_info { + uint32_t offset; + uint32_t size; +} __packed; + +/* Read/write VbNvContext */ +#define EC_CMD_VBNV_CONTEXT 0x17 +#define EC_VER_VBNV_CONTEXT 1 +#define EC_VBNV_BLOCK_SIZE 16 + +enum ec_vbnvcontext_op { + EC_VBNV_CONTEXT_OP_READ, + EC_VBNV_CONTEXT_OP_WRITE, +}; + +struct ec_params_vbnvcontext { + uint32_t op; + uint8_t block[EC_VBNV_BLOCK_SIZE]; +} __packed; + +struct ec_response_vbnvcontext { + uint8_t block[EC_VBNV_BLOCK_SIZE]; +} __packed; + +/*****************************************************************************/ +/* PWM commands */ + +/* Get fan target RPM */ +#define EC_CMD_PWM_GET_FAN_TARGET_RPM 0x20 + +struct ec_response_pwm_get_fan_rpm { + uint32_t rpm; +} __packed; + +/* Set target fan RPM */ +#define EC_CMD_PWM_SET_FAN_TARGET_RPM 0x21 + +struct ec_params_pwm_set_fan_target_rpm { + uint32_t rpm; +} __packed; + +/* Get keyboard backlight */ +#define EC_CMD_PWM_GET_KEYBOARD_BACKLIGHT 0x22 + +struct ec_response_pwm_get_keyboard_backlight { + uint8_t percent; + uint8_t enabled; +} __packed; + +/* Set keyboard backlight */ +#define EC_CMD_PWM_SET_KEYBOARD_BACKLIGHT 0x23 + +struct ec_params_pwm_set_keyboard_backlight { + uint8_t percent; +} __packed; + +/* Set target fan PWM duty cycle */ +#define EC_CMD_PWM_SET_FAN_DUTY 0x24 + +struct ec_params_pwm_set_fan_duty { + uint32_t percent; +} __packed; + +/*****************************************************************************/ +/* + * Lightbar commands. This looks worse than it is. Since we only use one HOST + * command to say "talk to the lightbar", we put the "and tell it to do X" part + * into a subcommand. We'll make separate structs for subcommands with + * different input args, so that we know how much to expect. + */ +#define EC_CMD_LIGHTBAR_CMD 0x28 + +struct rgb_s { + uint8_t r, g, b; +}; + +#define LB_BATTERY_LEVELS 4 +/* List of tweakable parameters. NOTE: It's __packed so it can be sent in a + * host command, but the alignment is the same regardless. Keep it that way. + */ +struct lightbar_params { + /* Timing */ + int google_ramp_up; + int google_ramp_down; + int s3s0_ramp_up; + int s0_tick_delay[2]; /* AC=0/1 */ + int s0a_tick_delay[2]; /* AC=0/1 */ + int s0s3_ramp_down; + int s3_sleep_for; + int s3_ramp_up; + int s3_ramp_down; + + /* Oscillation */ + uint8_t new_s0; + uint8_t osc_min[2]; /* AC=0/1 */ + uint8_t osc_max[2]; /* AC=0/1 */ + uint8_t w_ofs[2]; /* AC=0/1 */ + + /* Brightness limits based on the backlight and AC. */ + uint8_t bright_bl_off_fixed[2]; /* AC=0/1 */ + uint8_t bright_bl_on_min[2]; /* AC=0/1 */ + uint8_t bright_bl_on_max[2]; /* AC=0/1 */ + + /* Battery level thresholds */ + uint8_t battery_threshold[LB_BATTERY_LEVELS - 1]; + + /* Map [AC][battery_level] to color index */ + uint8_t s0_idx[2][LB_BATTERY_LEVELS]; /* AP is running */ + uint8_t s3_idx[2][LB_BATTERY_LEVELS]; /* AP is sleeping */ + + /* Color palette */ + struct rgb_s color[8]; /* 0-3 are Google colors */ +} __packed; + +struct ec_params_lightbar { + uint8_t cmd; /* Command (see enum lightbar_command) */ + union { + struct { + /* no args */ + } dump, off, on, init, get_seq, get_params; + + struct num { + uint8_t num; + } brightness, seq, demo; + + struct reg { + uint8_t ctrl, reg, value; + } reg; + + struct rgb { + uint8_t led, red, green, blue; + } rgb; + + struct lightbar_params set_params; + }; +} __packed; + +struct ec_response_lightbar { + union { + struct dump { + struct { + uint8_t reg; + uint8_t ic0; + uint8_t ic1; + } vals[23]; + } dump; + + struct get_seq { + uint8_t num; + } get_seq; + + struct lightbar_params get_params; + + struct { + /* no return params */ + } off, on, init, brightness, seq, reg, rgb, demo, set_params; + }; +} __packed; + +/* Lightbar commands */ +enum lightbar_command { + LIGHTBAR_CMD_DUMP = 0, + LIGHTBAR_CMD_OFF = 1, + LIGHTBAR_CMD_ON = 2, + LIGHTBAR_CMD_INIT = 3, + LIGHTBAR_CMD_BRIGHTNESS = 4, + LIGHTBAR_CMD_SEQ = 5, + LIGHTBAR_CMD_REG = 6, + LIGHTBAR_CMD_RGB = 7, + LIGHTBAR_CMD_GET_SEQ = 8, + LIGHTBAR_CMD_DEMO = 9, + LIGHTBAR_CMD_GET_PARAMS = 10, + LIGHTBAR_CMD_SET_PARAMS = 11, + LIGHTBAR_NUM_CMDS +}; + +/*****************************************************************************/ +/* Verified boot commands */ + +/* + * Note: command code 0x29 version 0 was VBOOT_CMD in Link EVT; it may be + * reused for other purposes with version > 0. + */ + +/* Verified boot hash command */ +#define EC_CMD_VBOOT_HASH 0x2A + +struct ec_params_vboot_hash { + uint8_t cmd; /* enum ec_vboot_hash_cmd */ + uint8_t hash_type; /* enum ec_vboot_hash_type */ + uint8_t nonce_size; /* Nonce size; may be 0 */ + uint8_t reserved0; /* Reserved; set 0 */ + uint32_t offset; /* Offset in flash to hash */ + uint32_t size; /* Number of bytes to hash */ + uint8_t nonce_data[64]; /* Nonce data; ignored if nonce_size=0 */ +} __packed; + +struct ec_response_vboot_hash { + uint8_t status; /* enum ec_vboot_hash_status */ + uint8_t hash_type; /* enum ec_vboot_hash_type */ + uint8_t digest_size; /* Size of hash digest in bytes */ + uint8_t reserved0; /* Ignore; will be 0 */ + uint32_t offset; /* Offset in flash which was hashed */ + uint32_t size; /* Number of bytes hashed */ + uint8_t hash_digest[64]; /* Hash digest data */ +} __packed; + +enum ec_vboot_hash_cmd { + EC_VBOOT_HASH_GET = 0, /* Get current hash status */ + EC_VBOOT_HASH_ABORT = 1, /* Abort calculating current hash */ + EC_VBOOT_HASH_START = 2, /* Start computing a new hash */ + EC_VBOOT_HASH_RECALC = 3, /* Synchronously compute a new hash */ +}; + +enum ec_vboot_hash_type { + EC_VBOOT_HASH_TYPE_SHA256 = 0, /* SHA-256 */ +}; + +enum ec_vboot_hash_status { + EC_VBOOT_HASH_STATUS_NONE = 0, /* No hash (not started, or aborted) */ + EC_VBOOT_HASH_STATUS_DONE = 1, /* Finished computing a hash */ + EC_VBOOT_HASH_STATUS_BUSY = 2, /* Busy computing a hash */ +}; + +/* + * Special values for offset for EC_VBOOT_HASH_START and EC_VBOOT_HASH_RECALC. + * If one of these is specified, the EC will automatically update offset and + * size to the correct values for the specified image (RO or RW). + */ +#define EC_VBOOT_HASH_OFFSET_RO 0xfffffffe +#define EC_VBOOT_HASH_OFFSET_RW 0xfffffffd + +/*****************************************************************************/ +/* USB charging control commands */ + +/* Set USB port charging mode */ +#define EC_CMD_USB_CHARGE_SET_MODE 0x30 + +struct ec_params_usb_charge_set_mode { + uint8_t usb_port_id; + uint8_t mode; +} __packed; + +/*****************************************************************************/ +/* Persistent storage for host */ + +/* Maximum bytes that can be read/written in a single command */ +#define EC_PSTORE_SIZE_MAX 64 + +/* Get persistent storage info */ +#define EC_CMD_PSTORE_INFO 0x40 + +struct ec_response_pstore_info { + /* Persistent storage size, in bytes */ + uint32_t pstore_size; + /* Access size; read/write offset and size must be a multiple of this */ + uint32_t access_size; +} __packed; + +/* + * Read persistent storage + * + * Response is params.size bytes of data. + */ +#define EC_CMD_PSTORE_READ 0x41 + +struct ec_params_pstore_read { + uint32_t offset; /* Byte offset to read */ + uint32_t size; /* Size to read in bytes */ +} __packed; + +/* Write persistent storage */ +#define EC_CMD_PSTORE_WRITE 0x42 + +struct ec_params_pstore_write { + uint32_t offset; /* Byte offset to write */ + uint32_t size; /* Size to write in bytes */ + uint8_t data[EC_PSTORE_SIZE_MAX]; +} __packed; + +/*****************************************************************************/ +/* Real-time clock */ + +/* RTC params and response structures */ +struct ec_params_rtc { + uint32_t time; +} __packed; + +struct ec_response_rtc { + uint32_t time; +} __packed; + +/* These use ec_response_rtc */ +#define EC_CMD_RTC_GET_VALUE 0x44 +#define EC_CMD_RTC_GET_ALARM 0x45 + +/* These all use ec_params_rtc */ +#define EC_CMD_RTC_SET_VALUE 0x46 +#define EC_CMD_RTC_SET_ALARM 0x47 + +/*****************************************************************************/ +/* Port80 log access */ + +/* Get last port80 code from previous boot */ +#define EC_CMD_PORT80_LAST_BOOT 0x48 + +struct ec_response_port80_last_boot { + uint16_t code; +} __packed; + +/*****************************************************************************/ +/* Thermal engine commands */ + +/* Set thershold value */ +#define EC_CMD_THERMAL_SET_THRESHOLD 0x50 + +struct ec_params_thermal_set_threshold { + uint8_t sensor_type; + uint8_t threshold_id; + uint16_t value; +} __packed; + +/* Get threshold value */ +#define EC_CMD_THERMAL_GET_THRESHOLD 0x51 + +struct ec_params_thermal_get_threshold { + uint8_t sensor_type; + uint8_t threshold_id; +} __packed; + +struct ec_response_thermal_get_threshold { + uint16_t value; +} __packed; + +/* Toggle automatic fan control */ +#define EC_CMD_THERMAL_AUTO_FAN_CTRL 0x52 + +/* Get TMP006 calibration data */ +#define EC_CMD_TMP006_GET_CALIBRATION 0x53 + +struct ec_params_tmp006_get_calibration { + uint8_t index; +} __packed; + +struct ec_response_tmp006_get_calibration { + float s0; + float b0; + float b1; + float b2; +} __packed; + +/* Set TMP006 calibration data */ +#define EC_CMD_TMP006_SET_CALIBRATION 0x54 + +struct ec_params_tmp006_set_calibration { + uint8_t index; + uint8_t reserved[3]; /* Reserved; set 0 */ + float s0; + float b0; + float b1; + float b2; +} __packed; + +/*****************************************************************************/ +/* MKBP - Matrix KeyBoard Protocol */ + +/* + * Read key state + * + * Returns raw data for keyboard cols; see ec_response_mkbp_info.cols for + * expected response size. + */ +#define EC_CMD_MKBP_STATE 0x60 + +/* Provide information about the matrix : number of rows and columns */ +#define EC_CMD_MKBP_INFO 0x61 + +struct ec_response_mkbp_info { + uint32_t rows; + uint32_t cols; + uint8_t switches; +} __packed; + +/* Simulate key press */ +#define EC_CMD_MKBP_SIMULATE_KEY 0x62 + +struct ec_params_mkbp_simulate_key { + uint8_t col; + uint8_t row; + uint8_t pressed; +} __packed; + +/* Configure keyboard scanning */ +#define EC_CMD_MKBP_SET_CONFIG 0x64 +#define EC_CMD_MKBP_GET_CONFIG 0x65 + +/* flags */ +enum mkbp_config_flags { + EC_MKBP_FLAGS_ENABLE = 1, /* Enable keyboard scanning */ +}; + +enum mkbp_config_valid { + EC_MKBP_VALID_SCAN_PERIOD = 1 << 0, + EC_MKBP_VALID_POLL_TIMEOUT = 1 << 1, + EC_MKBP_VALID_MIN_POST_SCAN_DELAY = 1 << 3, + EC_MKBP_VALID_OUTPUT_SETTLE = 1 << 4, + EC_MKBP_VALID_DEBOUNCE_DOWN = 1 << 5, + EC_MKBP_VALID_DEBOUNCE_UP = 1 << 6, + EC_MKBP_VALID_FIFO_MAX_DEPTH = 1 << 7, +}; + +/* Configuration for our key scanning algorithm */ +struct ec_mkbp_config { + uint32_t valid_mask; /* valid fields */ + uint8_t flags; /* some flags (enum mkbp_config_flags) */ + uint8_t valid_flags; /* which flags are valid */ + uint16_t scan_period_us; /* period between start of scans */ + /* revert to interrupt mode after no activity for this long */ + uint32_t poll_timeout_us; + /* + * minimum post-scan relax time. Once we finish a scan we check + * the time until we are due to start the next one. If this time is + * shorter this field, we use this instead. + */ + uint16_t min_post_scan_delay_us; + /* delay between setting up output and waiting for it to settle */ + uint16_t output_settle_us; + uint16_t debounce_down_us; /* time for debounce on key down */ + uint16_t debounce_up_us; /* time for debounce on key up */ + /* maximum depth to allow for fifo (0 = no keyscan output) */ + uint8_t fifo_max_depth; +} __packed; + +struct ec_params_mkbp_set_config { + struct ec_mkbp_config config; +} __packed; + +struct ec_response_mkbp_get_config { + struct ec_mkbp_config config; +} __packed; + +/* Run the key scan emulation */ +#define EC_CMD_KEYSCAN_SEQ_CTRL 0x66 + +enum ec_keyscan_seq_cmd { + EC_KEYSCAN_SEQ_STATUS = 0, /* Get status information */ + EC_KEYSCAN_SEQ_CLEAR = 1, /* Clear sequence */ + EC_KEYSCAN_SEQ_ADD = 2, /* Add item to sequence */ + EC_KEYSCAN_SEQ_START = 3, /* Start running sequence */ + EC_KEYSCAN_SEQ_COLLECT = 4, /* Collect sequence summary data */ +}; + +enum ec_collect_flags { + /* + * Indicates this scan was processed by the EC. Due to timing, some + * scans may be skipped. + */ + EC_KEYSCAN_SEQ_FLAG_DONE = 1 << 0, +}; + +struct ec_collect_item { + uint8_t flags; /* some flags (enum ec_collect_flags) */ +}; + +struct ec_params_keyscan_seq_ctrl { + uint8_t cmd; /* Command to send (enum ec_keyscan_seq_cmd) */ + union { + struct { + uint8_t active; /* still active */ + uint8_t num_items; /* number of items */ + /* Current item being presented */ + uint8_t cur_item; + } status; + struct { + /* + * Absolute time for this scan, measured from the + * start of the sequence. + */ + uint32_t time_us; + uint8_t scan[0]; /* keyscan data */ + } add; + struct { + uint8_t start_item; /* First item to return */ + uint8_t num_items; /* Number of items to return */ + } collect; + }; +} __packed; + +struct ec_result_keyscan_seq_ctrl { + union { + struct { + uint8_t num_items; /* Number of items */ + /* Data for each item */ + struct ec_collect_item item[0]; + } collect; + }; +} __packed; + +/*****************************************************************************/ +/* Temperature sensor commands */ + +/* Read temperature sensor info */ +#define EC_CMD_TEMP_SENSOR_GET_INFO 0x70 + +struct ec_params_temp_sensor_get_info { + uint8_t id; +} __packed; + +struct ec_response_temp_sensor_get_info { + char sensor_name[32]; + uint8_t sensor_type; +} __packed; + +/*****************************************************************************/ + +/* + * Note: host commands 0x80 - 0x87 are reserved to avoid conflict with ACPI + * commands accidentally sent to the wrong interface. See the ACPI section + * below. + */ + +/*****************************************************************************/ +/* Host event commands */ + +/* + * Host event mask params and response structures, shared by all of the host + * event commands below. + */ +struct ec_params_host_event_mask { + uint32_t mask; +} __packed; + +struct ec_response_host_event_mask { + uint32_t mask; +} __packed; + +/* These all use ec_response_host_event_mask */ +#define EC_CMD_HOST_EVENT_GET_B 0x87 +#define EC_CMD_HOST_EVENT_GET_SMI_MASK 0x88 +#define EC_CMD_HOST_EVENT_GET_SCI_MASK 0x89 +#define EC_CMD_HOST_EVENT_GET_WAKE_MASK 0x8d + +/* These all use ec_params_host_event_mask */ +#define EC_CMD_HOST_EVENT_SET_SMI_MASK 0x8a +#define EC_CMD_HOST_EVENT_SET_SCI_MASK 0x8b +#define EC_CMD_HOST_EVENT_CLEAR 0x8c +#define EC_CMD_HOST_EVENT_SET_WAKE_MASK 0x8e +#define EC_CMD_HOST_EVENT_CLEAR_B 0x8f + +/*****************************************************************************/ +/* Switch commands */ + +/* Enable/disable LCD backlight */ +#define EC_CMD_SWITCH_ENABLE_BKLIGHT 0x90 + +struct ec_params_switch_enable_backlight { + uint8_t enabled; +} __packed; + +/* Enable/disable WLAN/Bluetooth */ +#define EC_CMD_SWITCH_ENABLE_WIRELESS 0x91 + +struct ec_params_switch_enable_wireless { + uint8_t enabled; +} __packed; + +/*****************************************************************************/ +/* GPIO commands. Only available on EC if write protect has been disabled. */ + +/* Set GPIO output value */ +#define EC_CMD_GPIO_SET 0x92 + +struct ec_params_gpio_set { + char name[32]; + uint8_t val; +} __packed; + +/* Get GPIO value */ +#define EC_CMD_GPIO_GET 0x93 + +struct ec_params_gpio_get { + char name[32]; +} __packed; +struct ec_response_gpio_get { + uint8_t val; +} __packed; + +/*****************************************************************************/ +/* I2C commands. Only available when flash write protect is unlocked. */ + +/* Read I2C bus */ +#define EC_CMD_I2C_READ 0x94 + +struct ec_params_i2c_read { + uint16_t addr; + uint8_t read_size; /* Either 8 or 16. */ + uint8_t port; + uint8_t offset; +} __packed; +struct ec_response_i2c_read { + uint16_t data; +} __packed; + +/* Write I2C bus */ +#define EC_CMD_I2C_WRITE 0x95 + +struct ec_params_i2c_write { + uint16_t data; + uint16_t addr; + uint8_t write_size; /* Either 8 or 16. */ + uint8_t port; + uint8_t offset; +} __packed; + +/*****************************************************************************/ +/* Charge state commands. Only available when flash write protect unlocked. */ + +/* Force charge state machine to stop in idle mode */ +#define EC_CMD_CHARGE_FORCE_IDLE 0x96 + +struct ec_params_force_idle { + uint8_t enabled; +} __packed; + +/*****************************************************************************/ +/* Console commands. Only available when flash write protect is unlocked. */ + +/* Snapshot console output buffer for use by EC_CMD_CONSOLE_READ. */ +#define EC_CMD_CONSOLE_SNAPSHOT 0x97 + +/* + * Read next chunk of data from saved snapshot. + * + * Response is null-terminated string. Empty string, if there is no more + * remaining output. + */ +#define EC_CMD_CONSOLE_READ 0x98 + +/*****************************************************************************/ + +/* + * Cut off battery power output if the battery supports. + * + * For unsupported battery, just don't implement this command and lets EC + * return EC_RES_INVALID_COMMAND. + */ +#define EC_CMD_BATTERY_CUT_OFF 0x99 + +/*****************************************************************************/ +/* Temporary debug commands. TODO: remove this crosbug.com/p/13849 */ + +/* + * Dump charge state machine context. + * + * Response is a binary dump of charge state machine context. + */ +#define EC_CMD_CHARGE_DUMP 0xa0 + +/* + * Set maximum battery charging current. + */ +#define EC_CMD_CHARGE_CURRENT_LIMIT 0xa1 + +struct ec_params_current_limit { + uint32_t limit; +} __packed; + +/*****************************************************************************/ +/* System commands */ + +/* + * TODO: this is a confusing name, since it doesn't necessarily reboot the EC. + * Rename to "set image" or something similar. + */ +#define EC_CMD_REBOOT_EC 0xd2 + +/* Command */ +enum ec_reboot_cmd { + EC_REBOOT_CANCEL = 0, /* Cancel a pending reboot */ + EC_REBOOT_JUMP_RO = 1, /* Jump to RO without rebooting */ + EC_REBOOT_JUMP_RW = 2, /* Jump to RW without rebooting */ + /* (command 3 was jump to RW-B) */ + EC_REBOOT_COLD = 4, /* Cold-reboot */ + EC_REBOOT_DISABLE_JUMP = 5, /* Disable jump until next reboot */ + EC_REBOOT_HIBERNATE = 6 /* Hibernate EC */ +}; + +/* Flags for ec_params_reboot_ec.reboot_flags */ +#define EC_REBOOT_FLAG_RESERVED0 (1 << 0) /* Was recovery request */ +#define EC_REBOOT_FLAG_ON_AP_SHUTDOWN (1 << 1) /* Reboot after AP shutdown */ + +struct ec_params_reboot_ec { + uint8_t cmd; /* enum ec_reboot_cmd */ + uint8_t flags; /* See EC_REBOOT_FLAG_* */ +} __packed; + +/* + * Get information on last EC panic. + * + * Returns variable-length platform-dependent panic information. See panic.h + * for details. + */ +#define EC_CMD_GET_PANIC_INFO 0xd3 + +/*****************************************************************************/ +/* + * ACPI commands + * + * These are valid ONLY on the ACPI command/data port. + */ + +/* + * ACPI Read Embedded Controller + * + * This reads from ACPI memory space on the EC (EC_ACPI_MEM_*). + * + * Use the following sequence: + * + * - Write EC_CMD_ACPI_READ to EC_LPC_ADDR_ACPI_CMD + * - Wait for EC_LPC_CMDR_PENDING bit to clear + * - Write address to EC_LPC_ADDR_ACPI_DATA + * - Wait for EC_LPC_CMDR_DATA bit to set + * - Read value from EC_LPC_ADDR_ACPI_DATA + */ +#define EC_CMD_ACPI_READ 0x80 + +/* + * ACPI Write Embedded Controller + * + * This reads from ACPI memory space on the EC (EC_ACPI_MEM_*). + * + * Use the following sequence: + * + * - Write EC_CMD_ACPI_WRITE to EC_LPC_ADDR_ACPI_CMD + * - Wait for EC_LPC_CMDR_PENDING bit to clear + * - Write address to EC_LPC_ADDR_ACPI_DATA + * - Wait for EC_LPC_CMDR_PENDING bit to clear + * - Write value to EC_LPC_ADDR_ACPI_DATA + */ +#define EC_CMD_ACPI_WRITE 0x81 + +/* + * ACPI Query Embedded Controller + * + * This clears the lowest-order bit in the currently pending host events, and + * sets the result code to the 1-based index of the bit (event 0x00000001 = 1, + * event 0x80000000 = 32), or 0 if no event was pending. + */ +#define EC_CMD_ACPI_QUERY_EVENT 0x84 + +/* Valid addresses in ACPI memory space, for read/write commands */ +/* Memory space version; set to EC_ACPI_MEM_VERSION_CURRENT */ +#define EC_ACPI_MEM_VERSION 0x00 +/* + * Test location; writing value here updates test compliment byte to (0xff - + * value). + */ +#define EC_ACPI_MEM_TEST 0x01 +/* Test compliment; writes here are ignored. */ +#define EC_ACPI_MEM_TEST_COMPLIMENT 0x02 +/* Keyboard backlight brightness percent (0 - 100) */ +#define EC_ACPI_MEM_KEYBOARD_BACKLIGHT 0x03 + +/* Current version of ACPI memory address space */ +#define EC_ACPI_MEM_VERSION_CURRENT 1 + + +/*****************************************************************************/ +/* + * Special commands + * + * These do not follow the normal rules for commands. See each command for + * details. + */ + +/* + * Reboot NOW + * + * This command will work even when the EC LPC interface is busy, because the + * reboot command is processed at interrupt level. Note that when the EC + * reboots, the host will reboot too, so there is no response to this command. + * + * Use EC_CMD_REBOOT_EC to reboot the EC more politely. + */ +#define EC_CMD_REBOOT 0xd1 /* Think "die" */ + +/* + * Resend last response (not supported on LPC). + * + * Returns EC_RES_UNAVAILABLE if there is no response available - for example, + * there was no previous command, or the previous command's response was too + * big to save. + */ +#define EC_CMD_RESEND_RESPONSE 0xdb + +/* + * This header byte on a command indicate version 0. Any header byte less + * than this means that we are talking to an old EC which doesn't support + * versioning. In that case, we assume version 0. + * + * Header bytes greater than this indicate a later version. For example, + * EC_CMD_VERSION0 + 1 means we are using version 1. + * + * The old EC interface must not use commands 0dc or higher. + */ +#define EC_CMD_VERSION0 0xdc + +#endif /* !__ACPI__ */ + +#endif /* __CROS_EC_COMMANDS_H */ diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h index ecddc5173c7..8f21daf62fb 100644 --- a/include/linux/mfd/palmas.h +++ b/include/linux/mfd/palmas.h @@ -1,9 +1,10 @@ /* * TI Palmas * - * Copyright 2011 Texas Instruments Inc. + * Copyright 2011-2013 Texas Instruments Inc. * * Author: Graeme Gregory <gg@slimlogic.co.uk> + * Author: Ian Lartey <ian@slimlogic.co.uk> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -22,6 +23,15 @@ #define PALMAS_NUM_CLIENTS 3 +/* The ID_REVISION NUMBERS */ +#define PALMAS_CHIP_OLD_ID 0x0000 +#define PALMAS_CHIP_ID 0xC035 +#define PALMAS_CHIP_CHARGER_ID 0xC036 + +#define is_palmas(a) (((a) == PALMAS_CHIP_OLD_ID) || \ + ((a) == PALMAS_CHIP_ID)) +#define is_palmas_charger(a) ((a) == PALMAS_CHIP_CHARGER_ID) + struct palmas_pmic; struct palmas_gpadc; struct palmas_resource; diff --git a/include/linux/mfd/retu.h b/include/linux/mfd/retu.h index 1e2715d5b83..65471c4a392 100644 --- a/include/linux/mfd/retu.h +++ b/include/linux/mfd/retu.h @@ -1,5 +1,5 @@ /* - * Retu MFD driver interface + * Retu/Tahvo MFD driver interface * * This file is subject to the terms and conditions of the GNU General * Public License. See the file "COPYING" in the main directory of this @@ -19,4 +19,10 @@ int retu_write(struct retu_dev *, u8, u16); #define RETU_REG_CC1 0x0d /* Common control register 1 */ #define RETU_REG_STATUS 0x16 /* Status register */ +/* Interrupt sources */ +#define TAHVO_INT_VBUS 0 /* VBUS state */ + +/* Interrupt status */ +#define TAHVO_STAT_VBUS (1 << TAHVO_INT_VBUS) + #endif /* __LINUX_MFD_RETU_H */ diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index 26ea7f1b7ca..86bc635f838 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -500,6 +500,8 @@ #define BPP_POWER_15_PERCENT_ON 0x08 #define BPP_POWER_ON 0x00 #define BPP_POWER_MASK 0x0F +#define SD_VCC_PARTIAL_POWER_ON 0x02 +#define SD_VCC_POWER_ON 0x00 /* PWR_GATE_CTRL */ #define PWR_GATE_EN 0x01 @@ -689,6 +691,40 @@ #define IMAGE_FLAG_ADDR0 0xCE80 #define IMAGE_FLAG_ADDR1 0xCE81 +/* Phy register */ +#define PHY_PCR 0x00 +#define PHY_RCR0 0x01 +#define PHY_RCR1 0x02 +#define PHY_RCR2 0x03 +#define PHY_RTCR 0x04 +#define PHY_RDR 0x05 +#define PHY_TCR0 0x06 +#define PHY_TCR1 0x07 +#define PHY_TUNE 0x08 +#define PHY_IMR 0x09 +#define PHY_BPCR 0x0A +#define PHY_BIST 0x0B +#define PHY_RAW_L 0x0C +#define PHY_RAW_H 0x0D +#define PHY_RAW_DATA 0x0E +#define PHY_HOST_CLK_CTRL 0x0F +#define PHY_DMR 0x10 +#define PHY_BACR 0x11 +#define PHY_IER 0x12 +#define PHY_BCSR 0x13 +#define PHY_BPR 0x14 +#define PHY_BPNR2 0x15 +#define PHY_BPNR 0x16 +#define PHY_BRNR2 0x17 +#define PHY_BENR 0x18 +#define PHY_REG_REV 0x19 +#define PHY_FLD0 0x1A +#define PHY_FLD1 0x1B +#define PHY_FLD2 0x1C +#define PHY_FLD3 0x1D +#define PHY_FLD4 0x1E +#define PHY_DUM_REG 0x1F + #define rtsx_pci_init_cmd(pcr) ((pcr)->ci = 0) struct rtsx_pcr; diff --git a/include/linux/mfd/si476x-core.h b/include/linux/mfd/si476x-core.h new file mode 100644 index 00000000000..ba89b94e4a5 --- /dev/null +++ b/include/linux/mfd/si476x-core.h @@ -0,0 +1,533 @@ +/* + * include/media/si476x-core.h -- Common definitions for si476x core + * device + * + * Copyright (C) 2012 Innovative Converged Devices(ICD) + * Copyright (C) 2013 Andrey Smirnov + * + * Author: Andrey Smirnov <andrew.smirnov@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#ifndef SI476X_CORE_H +#define SI476X_CORE_H + +#include <linux/kfifo.h> +#include <linux/atomic.h> +#include <linux/i2c.h> +#include <linux/regmap.h> +#include <linux/mutex.h> +#include <linux/mfd/core.h> +#include <linux/videodev2.h> +#include <linux/regulator/consumer.h> + +#include <linux/mfd/si476x-platform.h> +#include <linux/mfd/si476x-reports.h> + +/* Command Timeouts */ +#define SI476X_DEFAULT_TIMEOUT 100000 +#define SI476X_TIMEOUT_TUNE 700000 +#define SI476X_TIMEOUT_POWER_UP 330000 +#define SI476X_STATUS_POLL_US 0 + +/* -------------------- si476x-i2c.c ----------------------- */ + +enum si476x_freq_supported_chips { + SI476X_CHIP_SI4761 = 1, + SI476X_CHIP_SI4764, + SI476X_CHIP_SI4768, +}; + +enum si476x_part_revisions { + SI476X_REVISION_A10 = 0, + SI476X_REVISION_A20 = 1, + SI476X_REVISION_A30 = 2, +}; + +enum si476x_mfd_cells { + SI476X_RADIO_CELL = 0, + SI476X_CODEC_CELL, + SI476X_MFD_CELLS, +}; + +/** + * enum si476x_power_state - possible power state of the si476x + * device. + * + * @SI476X_POWER_DOWN: In this state all regulators are turned off + * and the reset line is pulled low. The device is completely + * inactive. + * @SI476X_POWER_UP_FULL: In this state all the power regualtors are + * turned on, reset line pulled high, IRQ line is enabled(polling is + * active for polling use scenario) and device is turned on with + * POWER_UP command. The device is ready to be used. + * @SI476X_POWER_INCONSISTENT: This state indicates that previous + * power down was inconsistent, meaning some of the regulators were + * not turned down and thus use of the device, without power-cycling + * is impossible. + */ +enum si476x_power_state { + SI476X_POWER_DOWN = 0, + SI476X_POWER_UP_FULL = 1, + SI476X_POWER_INCONSISTENT = 2, +}; + +/** + * struct si476x_core - internal data structure representing the + * underlying "core" device which all the MFD cell-devices use. + * + * @client: Actual I2C client used to transfer commands to the chip. + * @chip_id: Last digit of the chip model(E.g. "1" for SI4761) + * @cells: MFD cell devices created by this driver. + * @cmd_lock: Mutex used to serialize all the requests to the core + * device. This filed should not be used directly. Instead + * si476x_core_lock()/si476x_core_unlock() should be used to get + * exclusive access to the "core" device. + * @users: Active users counter(Used by the radio cell) + * @rds_read_queue: Wait queue used to wait for RDS data. + * @rds_fifo: FIFO in which all the RDS data received from the chip is + * placed. + * @rds_fifo_drainer: Worker that drains on-chip RDS FIFO. + * @rds_drainer_is_working: Flag used for launching only one instance + * of the @rds_fifo_drainer. + * @rds_drainer_status_lock: Lock used to guard access to the + * @rds_drainer_is_working variable. + * @command: Wait queue for wainting on the command comapletion. + * @cts: Clear To Send flag set upon receiving first status with CTS + * set. + * @tuning: Wait queue used for wainting for tune/seek comand + * completion. + * @stc: Similar to @cts, but for the STC bit of the status value. + * @power_up_parameters: Parameters used as argument for POWER_UP + * command when the device is started. + * @state: Current power state of the device. + * @supplues: Structure containing handles to all power supplies used + * by the device (NULL ones are ignored). + * @gpio_reset: GPIO pin connectet to the RSTB pin of the chip. + * @pinmux: Chip's configurable pins configuration. + * @diversity_mode: Chips role when functioning in diversity mode. + * @status_monitor: Polling worker used in polling use case scenarion + * (when IRQ is not avalible). + * @revision: Chip's running firmware revision number(Used for correct + * command set support). + */ + +struct si476x_core { + struct i2c_client *client; + struct regmap *regmap; + int chip_id; + struct mfd_cell cells[SI476X_MFD_CELLS]; + + struct mutex cmd_lock; /* for serializing fm radio operations */ + atomic_t users; + + wait_queue_head_t rds_read_queue; + struct kfifo rds_fifo; + struct work_struct rds_fifo_drainer; + bool rds_drainer_is_working; + struct mutex rds_drainer_status_lock; + + wait_queue_head_t command; + atomic_t cts; + + wait_queue_head_t tuning; + atomic_t stc; + + struct si476x_power_up_args power_up_parameters; + + enum si476x_power_state power_state; + + struct regulator_bulk_data supplies[4]; + + int gpio_reset; + + struct si476x_pinmux pinmux; + enum si476x_phase_diversity_mode diversity_mode; + + atomic_t is_alive; + + struct delayed_work status_monitor; +#define SI476X_WORK_TO_CORE(w) container_of(to_delayed_work(w), \ + struct si476x_core, \ + status_monitor) + + int revision; + + int rds_fifo_depth; +}; + +static inline struct si476x_core *i2c_mfd_cell_to_core(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev->parent); + return i2c_get_clientdata(client); +} + + +/** + * si476x_core_lock() - lock the core device to get an exclusive access + * to it. + */ +static inline void si476x_core_lock(struct si476x_core *core) +{ + mutex_lock(&core->cmd_lock); +} + +/** + * si476x_core_unlock() - unlock the core device to relinquish an + * exclusive access to it. + */ +static inline void si476x_core_unlock(struct si476x_core *core) +{ + mutex_unlock(&core->cmd_lock); +} + +/* *_TUNE_FREQ family of commands accept frequency in multiples of + 10kHz */ +static inline u16 hz_to_si476x(struct si476x_core *core, int freq) +{ + u16 result; + + switch (core->power_up_parameters.func) { + default: + case SI476X_FUNC_FM_RECEIVER: + result = freq / 10000; + break; + case SI476X_FUNC_AM_RECEIVER: + result = freq / 1000; + break; + } + + return result; +} + +static inline int si476x_to_hz(struct si476x_core *core, u16 freq) +{ + int result; + + switch (core->power_up_parameters.func) { + default: + case SI476X_FUNC_FM_RECEIVER: + result = freq * 10000; + break; + case SI476X_FUNC_AM_RECEIVER: + result = freq * 1000; + break; + } + + return result; +} + +/* Since the V4L2_TUNER_CAP_LOW flag is supplied, V4L2 subsystem + * mesures frequency in 62.5 Hz units */ + +static inline int hz_to_v4l2(int freq) +{ + return (freq * 10) / 625; +} + +static inline int v4l2_to_hz(int freq) +{ + return (freq * 625) / 10; +} + +static inline u16 v4l2_to_si476x(struct si476x_core *core, int freq) +{ + return hz_to_si476x(core, v4l2_to_hz(freq)); +} + +static inline int si476x_to_v4l2(struct si476x_core *core, u16 freq) +{ + return hz_to_v4l2(si476x_to_hz(core, freq)); +} + + + +/** + * struct si476x_func_info - structure containing result of the + * FUNC_INFO command. + * + * @firmware.major: Firmware major number. + * @firmware.minor[...]: Firmware minor numbers. + * @patch_id: + * @func: Mode tuner is working in. + */ +struct si476x_func_info { + struct { + u8 major, minor[2]; + } firmware; + u16 patch_id; + enum si476x_func func; +}; + +/** + * struct si476x_power_down_args - structure used to pass parameters + * to POWER_DOWN command + * + * @xosc: true - Power down, but leav oscillator running. + * false - Full power down. + */ +struct si476x_power_down_args { + bool xosc; +}; + +/** + * enum si476x_tunemode - enum representing possible tune modes for + * the chip. + * @SI476X_TM_VALIDATED_NORMAL_TUNE: Unconditionally stay on the new + * channel after tune, tune status is valid. + * @SI476X_TM_INVALIDATED_FAST_TUNE: Unconditionally stay in the new + * channel after tune, tune status invalid. + * @SI476X_TM_VALIDATED_AF_TUNE: Jump back to previous channel if + * metric thresholds are not met. + * @SI476X_TM_VALIDATED_AF_CHECK: Unconditionally jump back to the + * previous channel. + */ +enum si476x_tunemode { + SI476X_TM_VALIDATED_NORMAL_TUNE = 0, + SI476X_TM_INVALIDATED_FAST_TUNE = 1, + SI476X_TM_VALIDATED_AF_TUNE = 2, + SI476X_TM_VALIDATED_AF_CHECK = 3, +}; + +/** + * enum si476x_smoothmetrics - enum containing the possible setting fo + * audio transitioning of the chip + * @SI476X_SM_INITIALIZE_AUDIO: Initialize audio state to match this + * new channel + * @SI476X_SM_TRANSITION_AUDIO: Transition audio state from previous + * channel values to the new values + */ +enum si476x_smoothmetrics { + SI476X_SM_INITIALIZE_AUDIO = 0, + SI476X_SM_TRANSITION_AUDIO = 1, +}; + +/** + * struct si476x_rds_status_report - the structure representing the + * response to 'FM_RD_STATUS' command + * @rdstpptyint: Traffic program flag(TP) and/or program type(PTY) + * code has changed. + * @rdspiint: Program indentifiaction(PI) code has changed. + * @rdssyncint: RDS synchronization has changed. + * @rdsfifoint: RDS was received and the RDS FIFO has at least + * 'FM_RDS_INTERRUPT_FIFO_COUNT' elements in it. + * @tpptyvalid: TP flag and PTY code are valid falg. + * @pivalid: PI code is valid flag. + * @rdssync: RDS is currently synchronized. + * @rdsfifolost: On or more RDS groups have been lost/discarded flag. + * @tp: Current channel's TP flag. + * @pty: Current channel's PTY code. + * @pi: Current channel's PI code. + * @rdsfifoused: Number of blocks remaining in the RDS FIFO (0 if + * empty). + */ +struct si476x_rds_status_report { + bool rdstpptyint, rdspiint, rdssyncint, rdsfifoint; + bool tpptyvalid, pivalid, rdssync, rdsfifolost; + bool tp; + + u8 pty; + u16 pi; + + u8 rdsfifoused; + u8 ble[4]; + + struct v4l2_rds_data rds[4]; +}; + +struct si476x_rsq_status_args { + bool primary; + bool rsqack; + bool attune; + bool cancel; + bool stcack; +}; + +enum si476x_injside { + SI476X_INJSIDE_AUTO = 0, + SI476X_INJSIDE_LOW = 1, + SI476X_INJSIDE_HIGH = 2, +}; + +struct si476x_tune_freq_args { + bool zifsr; + bool hd; + enum si476x_injside injside; + int freq; + enum si476x_tunemode tunemode; + enum si476x_smoothmetrics smoothmetrics; + int antcap; +}; + +int si476x_core_stop(struct si476x_core *, bool); +int si476x_core_start(struct si476x_core *, bool); +int si476x_core_set_power_state(struct si476x_core *, enum si476x_power_state); +bool si476x_core_has_am(struct si476x_core *); +bool si476x_core_has_diversity(struct si476x_core *); +bool si476x_core_is_a_secondary_tuner(struct si476x_core *); +bool si476x_core_is_a_primary_tuner(struct si476x_core *); +bool si476x_core_is_in_am_receiver_mode(struct si476x_core *core); +bool si476x_core_is_powered_up(struct si476x_core *core); + +enum si476x_i2c_type { + SI476X_I2C_SEND, + SI476X_I2C_RECV +}; + +int si476x_core_i2c_xfer(struct si476x_core *, + enum si476x_i2c_type, + char *, int); + + +/* -------------------- si476x-cmd.c ----------------------- */ + +int si476x_core_cmd_func_info(struct si476x_core *, struct si476x_func_info *); +int si476x_core_cmd_set_property(struct si476x_core *, u16, u16); +int si476x_core_cmd_get_property(struct si476x_core *, u16); +int si476x_core_cmd_dig_audio_pin_cfg(struct si476x_core *, + enum si476x_dclk_config, + enum si476x_dfs_config, + enum si476x_dout_config, + enum si476x_xout_config); +int si476x_core_cmd_zif_pin_cfg(struct si476x_core *, + enum si476x_iqclk_config, + enum si476x_iqfs_config, + enum si476x_iout_config, + enum si476x_qout_config); +int si476x_core_cmd_ic_link_gpo_ctl_pin_cfg(struct si476x_core *, + enum si476x_icin_config, + enum si476x_icip_config, + enum si476x_icon_config, + enum si476x_icop_config); +int si476x_core_cmd_ana_audio_pin_cfg(struct si476x_core *, + enum si476x_lrout_config); +int si476x_core_cmd_intb_pin_cfg(struct si476x_core *, enum si476x_intb_config, + enum si476x_a1_config); +int si476x_core_cmd_fm_seek_start(struct si476x_core *, bool, bool); +int si476x_core_cmd_am_seek_start(struct si476x_core *, bool, bool); +int si476x_core_cmd_fm_rds_status(struct si476x_core *, bool, bool, bool, + struct si476x_rds_status_report *); +int si476x_core_cmd_fm_rds_blockcount(struct si476x_core *, bool, + struct si476x_rds_blockcount_report *); +int si476x_core_cmd_fm_tune_freq(struct si476x_core *, + struct si476x_tune_freq_args *); +int si476x_core_cmd_am_tune_freq(struct si476x_core *, + struct si476x_tune_freq_args *); +int si476x_core_cmd_am_rsq_status(struct si476x_core *, + struct si476x_rsq_status_args *, + struct si476x_rsq_status_report *); +int si476x_core_cmd_fm_rsq_status(struct si476x_core *, + struct si476x_rsq_status_args *, + struct si476x_rsq_status_report *); +int si476x_core_cmd_power_up(struct si476x_core *, + struct si476x_power_up_args *); +int si476x_core_cmd_power_down(struct si476x_core *, + struct si476x_power_down_args *); +int si476x_core_cmd_fm_phase_div_status(struct si476x_core *); +int si476x_core_cmd_fm_phase_diversity(struct si476x_core *, + enum si476x_phase_diversity_mode); + +int si476x_core_cmd_fm_acf_status(struct si476x_core *, + struct si476x_acf_status_report *); +int si476x_core_cmd_am_acf_status(struct si476x_core *, + struct si476x_acf_status_report *); +int si476x_core_cmd_agc_status(struct si476x_core *, + struct si476x_agc_status_report *); + +enum si476x_power_grid_type { + SI476X_POWER_GRID_50HZ = 0, + SI476X_POWER_GRID_60HZ, +}; + +/* Properties */ + +enum si476x_interrupt_flags { + SI476X_STCIEN = (1 << 0), + SI476X_ACFIEN = (1 << 1), + SI476X_RDSIEN = (1 << 2), + SI476X_RSQIEN = (1 << 3), + + SI476X_ERRIEN = (1 << 6), + SI476X_CTSIEN = (1 << 7), + + SI476X_STCREP = (1 << 8), + SI476X_ACFREP = (1 << 9), + SI476X_RDSREP = (1 << 10), + SI476X_RSQREP = (1 << 11), +}; + +enum si476x_rdsint_sources { + SI476X_RDSTPPTY = (1 << 4), + SI476X_RDSPI = (1 << 3), + SI476X_RDSSYNC = (1 << 1), + SI476X_RDSRECV = (1 << 0), +}; + +enum si476x_status_response_bits { + SI476X_CTS = (1 << 7), + SI476X_ERR = (1 << 6), + /* Status response for WB receiver */ + SI476X_WB_ASQ_INT = (1 << 4), + SI476X_RSQ_INT = (1 << 3), + /* Status response for FM receiver */ + SI476X_FM_RDS_INT = (1 << 2), + SI476X_ACF_INT = (1 << 1), + SI476X_STC_INT = (1 << 0), +}; + +/* -------------------- si476x-prop.c ----------------------- */ + +enum si476x_common_receiver_properties { + SI476X_PROP_INT_CTL_ENABLE = 0x0000, + SI476X_PROP_DIGITAL_IO_INPUT_SAMPLE_RATE = 0x0200, + SI476X_PROP_DIGITAL_IO_INPUT_FORMAT = 0x0201, + SI476X_PROP_DIGITAL_IO_OUTPUT_SAMPLE_RATE = 0x0202, + SI476X_PROP_DIGITAL_IO_OUTPUT_FORMAT = 0x0203, + + SI476X_PROP_SEEK_BAND_BOTTOM = 0x1100, + SI476X_PROP_SEEK_BAND_TOP = 0x1101, + SI476X_PROP_SEEK_FREQUENCY_SPACING = 0x1102, + + SI476X_PROP_VALID_MAX_TUNE_ERROR = 0x2000, + SI476X_PROP_VALID_SNR_THRESHOLD = 0x2003, + SI476X_PROP_VALID_RSSI_THRESHOLD = 0x2004, +}; + +enum si476x_am_receiver_properties { + SI476X_PROP_AUDIO_PWR_LINE_FILTER = 0x0303, +}; + +enum si476x_fm_receiver_properties { + SI476X_PROP_AUDIO_DEEMPHASIS = 0x0302, + + SI476X_PROP_FM_RDS_INTERRUPT_SOURCE = 0x4000, + SI476X_PROP_FM_RDS_INTERRUPT_FIFO_COUNT = 0x4001, + SI476X_PROP_FM_RDS_CONFIG = 0x4002, +}; + +enum si476x_prop_audio_pwr_line_filter_bits { + SI476X_PROP_PWR_HARMONICS_MASK = 0x001f, + SI476X_PROP_PWR_GRID_MASK = 0x0100, + SI476X_PROP_PWR_ENABLE_MASK = 0x0200, + SI476X_PROP_PWR_GRID_50HZ = 0x0000, + SI476X_PROP_PWR_GRID_60HZ = 0x0100, +}; + +enum si476x_prop_fm_rds_config_bits { + SI476X_PROP_RDSEN_MASK = 0x1, + SI476X_PROP_RDSEN = 0x1, +}; + + +struct regmap *devm_regmap_init_si476x(struct si476x_core *); + +#endif /* SI476X_CORE_H */ diff --git a/include/linux/mfd/si476x-platform.h b/include/linux/mfd/si476x-platform.h new file mode 100644 index 00000000000..88bb93b7a9d --- /dev/null +++ b/include/linux/mfd/si476x-platform.h @@ -0,0 +1,267 @@ +/* + * include/media/si476x-platform.h -- Platform data specific definitions + * + * Copyright (C) 2013 Andrey Smirnov + * + * Author: Andrey Smirnov <andrew.smirnov@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#ifndef __SI476X_PLATFORM_H__ +#define __SI476X_PLATFORM_H__ + +/* It is possible to select one of the four adresses using pins A0 + * and A1 on SI476x */ +#define SI476X_I2C_ADDR_1 0x60 +#define SI476X_I2C_ADDR_2 0x61 +#define SI476X_I2C_ADDR_3 0x62 +#define SI476X_I2C_ADDR_4 0x63 + +enum si476x_iqclk_config { + SI476X_IQCLK_NOOP = 0, + SI476X_IQCLK_TRISTATE = 1, + SI476X_IQCLK_IQ = 21, +}; +enum si476x_iqfs_config { + SI476X_IQFS_NOOP = 0, + SI476X_IQFS_TRISTATE = 1, + SI476X_IQFS_IQ = 21, +}; +enum si476x_iout_config { + SI476X_IOUT_NOOP = 0, + SI476X_IOUT_TRISTATE = 1, + SI476X_IOUT_OUTPUT = 22, +}; +enum si476x_qout_config { + SI476X_QOUT_NOOP = 0, + SI476X_QOUT_TRISTATE = 1, + SI476X_QOUT_OUTPUT = 22, +}; + +enum si476x_dclk_config { + SI476X_DCLK_NOOP = 0, + SI476X_DCLK_TRISTATE = 1, + SI476X_DCLK_DAUDIO = 10, +}; + +enum si476x_dfs_config { + SI476X_DFS_NOOP = 0, + SI476X_DFS_TRISTATE = 1, + SI476X_DFS_DAUDIO = 10, +}; + +enum si476x_dout_config { + SI476X_DOUT_NOOP = 0, + SI476X_DOUT_TRISTATE = 1, + SI476X_DOUT_I2S_OUTPUT = 12, + SI476X_DOUT_I2S_INPUT = 13, +}; + +enum si476x_xout_config { + SI476X_XOUT_NOOP = 0, + SI476X_XOUT_TRISTATE = 1, + SI476X_XOUT_I2S_INPUT = 13, + SI476X_XOUT_MODE_SELECT = 23, +}; + +enum si476x_icin_config { + SI476X_ICIN_NOOP = 0, + SI476X_ICIN_TRISTATE = 1, + SI476X_ICIN_GPO1_HIGH = 2, + SI476X_ICIN_GPO1_LOW = 3, + SI476X_ICIN_IC_LINK = 30, +}; + +enum si476x_icip_config { + SI476X_ICIP_NOOP = 0, + SI476X_ICIP_TRISTATE = 1, + SI476X_ICIP_GPO2_HIGH = 2, + SI476X_ICIP_GPO2_LOW = 3, + SI476X_ICIP_IC_LINK = 30, +}; + +enum si476x_icon_config { + SI476X_ICON_NOOP = 0, + SI476X_ICON_TRISTATE = 1, + SI476X_ICON_I2S = 10, + SI476X_ICON_IC_LINK = 30, +}; + +enum si476x_icop_config { + SI476X_ICOP_NOOP = 0, + SI476X_ICOP_TRISTATE = 1, + SI476X_ICOP_I2S = 10, + SI476X_ICOP_IC_LINK = 30, +}; + + +enum si476x_lrout_config { + SI476X_LROUT_NOOP = 0, + SI476X_LROUT_TRISTATE = 1, + SI476X_LROUT_AUDIO = 2, + SI476X_LROUT_MPX = 3, +}; + + +enum si476x_intb_config { + SI476X_INTB_NOOP = 0, + SI476X_INTB_TRISTATE = 1, + SI476X_INTB_DAUDIO = 10, + SI476X_INTB_IRQ = 40, +}; + +enum si476x_a1_config { + SI476X_A1_NOOP = 0, + SI476X_A1_TRISTATE = 1, + SI476X_A1_IRQ = 40, +}; + + +struct si476x_pinmux { + enum si476x_dclk_config dclk; + enum si476x_dfs_config dfs; + enum si476x_dout_config dout; + enum si476x_xout_config xout; + + enum si476x_iqclk_config iqclk; + enum si476x_iqfs_config iqfs; + enum si476x_iout_config iout; + enum si476x_qout_config qout; + + enum si476x_icin_config icin; + enum si476x_icip_config icip; + enum si476x_icon_config icon; + enum si476x_icop_config icop; + + enum si476x_lrout_config lrout; + + enum si476x_intb_config intb; + enum si476x_a1_config a1; +}; + +enum si476x_ibias6x { + SI476X_IBIAS6X_OTHER = 0, + SI476X_IBIAS6X_RCVR1_NON_4MHZ_CLK = 1, +}; + +enum si476x_xstart { + SI476X_XSTART_MULTIPLE_TUNER = 0x11, + SI476X_XSTART_NORMAL = 0x77, +}; + +enum si476x_freq { + SI476X_FREQ_4_MHZ = 0, + SI476X_FREQ_37P209375_MHZ = 1, + SI476X_FREQ_36P4_MHZ = 2, + SI476X_FREQ_37P8_MHZ = 3, +}; + +enum si476x_xmode { + SI476X_XMODE_CRYSTAL_RCVR1 = 1, + SI476X_XMODE_EXT_CLOCK = 2, + SI476X_XMODE_CRYSTAL_RCVR2_3 = 3, +}; + +enum si476x_xbiashc { + SI476X_XBIASHC_SINGLE_RECEIVER = 0, + SI476X_XBIASHC_MULTIPLE_RECEIVER = 1, +}; + +enum si476x_xbias { + SI476X_XBIAS_RCVR2_3 = 0, + SI476X_XBIAS_4MHZ_RCVR1 = 3, + SI476X_XBIAS_RCVR1 = 7, +}; + +enum si476x_func { + SI476X_FUNC_BOOTLOADER = 0, + SI476X_FUNC_FM_RECEIVER = 1, + SI476X_FUNC_AM_RECEIVER = 2, + SI476X_FUNC_WB_RECEIVER = 3, +}; + + +/** + * @xcload: Selects the amount of additional on-chip capacitance to + * be connected between XTAL1 and gnd and between XTAL2 and + * GND. One half of the capacitance value shown here is the + * additional load capacitance presented to the xtal. The + * minimum step size is 0.277 pF. Recommended value is 0x28 + * but it will be layout dependent. Range is 0–0x3F i.e. + * (0–16.33 pF) + * @ctsien: enable CTSINT(interrupt request when CTS condition + * arises) when set + * @intsel: when set A1 pin becomes the interrupt pin; otherwise, + * INTB is the interrupt pin + * @func: selects the boot function of the device. I.e. + * SI476X_BOOTLOADER - Boot loader + * SI476X_FM_RECEIVER - FM receiver + * SI476X_AM_RECEIVER - AM receiver + * SI476X_WB_RECEIVER - Weatherband receiver + * @freq: oscillator's crystal frequency: + * SI476X_XTAL_37P209375_MHZ - 37.209375 Mhz + * SI476X_XTAL_36P4_MHZ - 36.4 Mhz + * SI476X_XTAL_37P8_MHZ - 37.8 Mhz + */ +struct si476x_power_up_args { + enum si476x_ibias6x ibias6x; + enum si476x_xstart xstart; + u8 xcload; + bool fastboot; + enum si476x_xbiashc xbiashc; + enum si476x_xbias xbias; + enum si476x_func func; + enum si476x_freq freq; + enum si476x_xmode xmode; +}; + + +/** + * enum si476x_phase_diversity_mode - possbile phase diversity modes + * for SI4764/5/6/7 chips. + * + * @SI476X_PHDIV_DISABLED: Phase diversity feature is + * disabled. + * @SI476X_PHDIV_PRIMARY_COMBINING: Tuner works as a primary tuner + * in combination with a + * secondary one. + * @SI476X_PHDIV_PRIMARY_ANTENNA: Tuner works as a primary tuner + * using only its own antenna. + * @SI476X_PHDIV_SECONDARY_ANTENNA: Tuner works as a primary tuner + * usning seconary tuner's antenna. + * @SI476X_PHDIV_SECONDARY_COMBINING: Tuner works as a secondary + * tuner in combination with the + * primary one. + */ +enum si476x_phase_diversity_mode { + SI476X_PHDIV_DISABLED = 0, + SI476X_PHDIV_PRIMARY_COMBINING = 1, + SI476X_PHDIV_PRIMARY_ANTENNA = 2, + SI476X_PHDIV_SECONDARY_ANTENNA = 3, + SI476X_PHDIV_SECONDARY_COMBINING = 5, +}; + + +/* + * Platform dependent definition + */ +struct si476x_platform_data { + int gpio_reset; /* < 0 if not used */ + + struct si476x_power_up_args power_up_parameters; + enum si476x_phase_diversity_mode diversity_mode; + + struct si476x_pinmux pinmux; +}; + + +#endif /* __SI476X_PLATFORM_H__ */ diff --git a/include/linux/mfd/si476x-reports.h b/include/linux/mfd/si476x-reports.h new file mode 100644 index 00000000000..e0b9455a79c --- /dev/null +++ b/include/linux/mfd/si476x-reports.h @@ -0,0 +1,163 @@ +/* + * include/media/si476x-platform.h -- Definitions of the data formats + * returned by debugfs hooks + * + * Copyright (C) 2013 Andrey Smirnov + * + * Author: Andrey Smirnov <andrew.smirnov@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#ifndef __SI476X_REPORTS_H__ +#define __SI476X_REPORTS_H__ + +/** + * struct si476x_rsq_status - structure containing received signal + * quality + * @multhint: Multipath Detect High. + * true - Indicatedes that the value is below + * FM_RSQ_MULTIPATH_HIGH_THRESHOLD + * false - Indicatedes that the value is above + * FM_RSQ_MULTIPATH_HIGH_THRESHOLD + * @multlint: Multipath Detect Low. + * true - Indicatedes that the value is below + * FM_RSQ_MULTIPATH_LOW_THRESHOLD + * false - Indicatedes that the value is above + * FM_RSQ_MULTIPATH_LOW_THRESHOLD + * @snrhint: SNR Detect High. + * true - Indicatedes that the value is below + * FM_RSQ_SNR_HIGH_THRESHOLD + * false - Indicatedes that the value is above + * FM_RSQ_SNR_HIGH_THRESHOLD + * @snrlint: SNR Detect Low. + * true - Indicatedes that the value is below + * FM_RSQ_SNR_LOW_THRESHOLD + * false - Indicatedes that the value is above + * FM_RSQ_SNR_LOW_THRESHOLD + * @rssihint: RSSI Detect High. + * true - Indicatedes that the value is below + * FM_RSQ_RSSI_HIGH_THRESHOLD + * false - Indicatedes that the value is above + * FM_RSQ_RSSI_HIGH_THRESHOLD + * @rssilint: RSSI Detect Low. + * true - Indicatedes that the value is below + * FM_RSQ_RSSI_LOW_THRESHOLD + * false - Indicatedes that the value is above + * FM_RSQ_RSSI_LOW_THRESHOLD + * @bltf: Band Limit. + * Set if seek command hits the band limit or wrapped to + * the original frequency. + * @snr_ready: SNR measurement in progress. + * @rssiready: RSSI measurement in progress. + * @afcrl: Set if FREQOFF >= MAX_TUNE_ERROR + * @valid: Set if the channel is valid + * rssi < FM_VALID_RSSI_THRESHOLD + * snr < FM_VALID_SNR_THRESHOLD + * tune_error < FM_VALID_MAX_TUNE_ERROR + * @readfreq: Current tuned frequency. + * @freqoff: Signed frequency offset. + * @rssi: Received Signal Strength Indicator(dBuV). + * @snr: RF SNR Indicator(dB). + * @lassi: + * @hassi: Low/High side Adjacent(100 kHz) Channel Strength Indicator + * @mult: Multipath indicator + * @dev: Who knows? But values may vary. + * @readantcap: Antenna tuning capacity value. + * @assi: Adjacent Channel(+/- 200kHz) Strength Indicator + * @usn: Ultrasonic Noise Inticator in -DBFS + */ +struct si476x_rsq_status_report { + __u8 multhint, multlint; + __u8 snrhint, snrlint; + __u8 rssihint, rssilint; + __u8 bltf; + __u8 snr_ready; + __u8 rssiready; + __u8 injside; + __u8 afcrl; + __u8 valid; + + __u16 readfreq; + __s8 freqoff; + __s8 rssi; + __s8 snr; + __s8 issi; + __s8 lassi, hassi; + __s8 mult; + __u8 dev; + __u16 readantcap; + __s8 assi; + __s8 usn; + + __u8 pilotdev; + __u8 rdsdev; + __u8 assidev; + __u8 strongdev; + __u16 rdspi; +} __packed; + +/** + * si476x_acf_status_report - ACF report results + * + * @blend_int: If set, indicates that stereo separation has crossed + * below the blend threshold as set by FM_ACF_BLEND_THRESHOLD + * @hblend_int: If set, indicates that HiBlend cutoff frequency is + * lower than threshold as set by FM_ACF_HBLEND_THRESHOLD + * @hicut_int: If set, indicates that HiCut cutoff frequency is lower + * than the threshold set by ACF_ + + */ +struct si476x_acf_status_report { + __u8 blend_int; + __u8 hblend_int; + __u8 hicut_int; + __u8 chbw_int; + __u8 softmute_int; + __u8 smute; + __u8 smattn; + __u8 chbw; + __u8 hicut; + __u8 hiblend; + __u8 pilot; + __u8 stblend; +} __packed; + +enum si476x_fmagc { + SI476X_FMAGC_10K_OHM = 0, + SI476X_FMAGC_800_OHM = 1, + SI476X_FMAGC_400_OHM = 2, + SI476X_FMAGC_200_OHM = 4, + SI476X_FMAGC_100_OHM = 8, + SI476X_FMAGC_50_OHM = 16, + SI476X_FMAGC_25_OHM = 32, + SI476X_FMAGC_12P5_OHM = 64, + SI476X_FMAGC_6P25_OHM = 128, +}; + +struct si476x_agc_status_report { + __u8 mxhi; + __u8 mxlo; + __u8 lnahi; + __u8 lnalo; + __u8 fmagc1; + __u8 fmagc2; + __u8 pgagain; + __u8 fmwblang; +} __packed; + +struct si476x_rds_blockcount_report { + __u16 expected; + __u16 received; + __u16 uncorrectable; +} __packed; + +#endif /* __SI476X_REPORTS_H__ */ diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h index 383ac1512a3..48395a69a7e 100644 --- a/include/linux/mfd/stmpe.h +++ b/include/linux/mfd/stmpe.h @@ -26,6 +26,7 @@ enum stmpe_partnum { STMPE801, STMPE811, STMPE1601, + STMPE1801, STMPE2401, STMPE2403, STMPE_NBR_PARTS @@ -39,6 +40,7 @@ enum { STMPE_IDX_CHIP_ID, STMPE_IDX_ICR_LSB, STMPE_IDX_IER_LSB, + STMPE_IDX_ISR_LSB, STMPE_IDX_ISR_MSB, STMPE_IDX_GPMR_LSB, STMPE_IDX_GPSR_LSB, @@ -49,6 +51,7 @@ enum { STMPE_IDX_GPFER_LSB, STMPE_IDX_GPAFR_U_MSB, STMPE_IDX_IEGPIOR_LSB, + STMPE_IDX_ISGPIOR_LSB, STMPE_IDX_ISGPIOR_MSB, STMPE_IDX_MAX, }; diff --git a/include/linux/mfd/syscon.h b/include/linux/mfd/syscon.h index 6aeb6b8da64..b473577f36d 100644 --- a/include/linux/mfd/syscon.h +++ b/include/linux/mfd/syscon.h @@ -15,8 +15,11 @@ #ifndef __LINUX_MFD_SYSCON_H__ #define __LINUX_MFD_SYSCON_H__ +struct device_node; + extern struct regmap *syscon_node_to_regmap(struct device_node *np); extern struct regmap *syscon_regmap_lookup_by_compatible(const char *s); +extern struct regmap *syscon_regmap_lookup_by_pdevname(const char *s); extern struct regmap *syscon_regmap_lookup_by_phandle( struct device_node *np, const char *property); diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h index 998628a2b08..3f43069413e 100644 --- a/include/linux/mfd/tps65090.h +++ b/include/linux/mfd/tps65090.h @@ -27,6 +27,7 @@ /* TPS65090 IRQs */ enum { + TPS65090_IRQ_INTERRUPT, TPS65090_IRQ_VAC_STATUS_CHANGE, TPS65090_IRQ_VSYS_STATUS_CHANGE, TPS65090_IRQ_BAT_STATUS_CHANGE, diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 8873e834959..e326ae2882a 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -342,9 +342,7 @@ struct mmc_host { mmc_pm_flag_t pm_flags; /* requested pm features */ -#ifdef CONFIG_LEDS_TRIGGERS struct led_trigger *led; /* activity led */ -#endif #ifdef CONFIG_REGULATOR bool regulator_enabled; /* regulator state */ diff --git a/include/linux/of.h b/include/linux/of.h index fb2002f3c7d..1b671c3809b 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -356,6 +356,11 @@ static inline struct device_node *of_find_node_by_name(struct device_node *from, return NULL; } +static inline struct device_node *of_get_parent(const struct device_node *node) +{ + return NULL; +} + static inline bool of_have_populated_dt(void) { return false; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e0373d26c24..f463a46424e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -788,6 +788,12 @@ static inline int __perf_event_disable(void *info) { return -1; } static inline void perf_event_task_tick(void) { } #endif +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL) +extern bool perf_event_can_stop_tick(void); +#else +static inline bool perf_event_can_stop_tick(void) { return true; } +#endif + #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) extern void perf_restore_debug_store(void); #else diff --git a/include/linux/platform_data/leds-lp55xx.h b/include/linux/platform_data/leds-lp55xx.h index 1509570d5a3..202e290faea 100644 --- a/include/linux/platform_data/leds-lp55xx.h +++ b/include/linux/platform_data/leds-lp55xx.h @@ -20,18 +20,6 @@ #define LP55XX_CLOCK_INT 1 #define LP55XX_CLOCK_EXT 2 -/* Bits in LP5521 CONFIG register. 'update_config' in lp55xx_platform_data */ -#define LP5521_PWM_HF 0x40 /* PWM: 0 = 256Hz, 1 = 558Hz */ -#define LP5521_PWRSAVE_EN 0x20 /* 1 = Power save mode */ -#define LP5521_CP_MODE_OFF 0 /* Charge pump (CP) off */ -#define LP5521_CP_MODE_BYPASS 8 /* CP forced to bypass mode */ -#define LP5521_CP_MODE_1X5 0x10 /* CP forced to 1.5x mode */ -#define LP5521_CP_MODE_AUTO 0x18 /* Automatic mode selection */ -#define LP5521_R_TO_BATT 4 /* R out: 0 = CP, 1 = Vbat */ -#define LP5521_CLK_SRC_EXT 0 /* Ext-clk source (CLK_32K) */ -#define LP5521_CLK_INT 1 /* Internal clock */ -#define LP5521_CLK_AUTO 2 /* Automatic clock selection */ - struct lp55xx_led_config { const char *name; u8 chan_nr; @@ -40,9 +28,9 @@ struct lp55xx_led_config { }; struct lp55xx_predef_pattern { - u8 *r; - u8 *g; - u8 *b; + const u8 *r; + const u8 *g; + const u8 *b; u8 size_r; u8 size_g; u8 size_b; @@ -79,9 +67,6 @@ struct lp55xx_platform_data { /* Predefined pattern data */ struct lp55xx_predef_pattern *patterns; unsigned int num_patterns; - - /* _CONFIG register */ - u8 update_config; }; #endif /* _LEDS_LP55XX_H */ diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 60bac697a91..7794d75ed15 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -123,6 +123,8 @@ void run_posix_cpu_timers(struct task_struct *task); void posix_cpu_timers_exit(struct task_struct *task); void posix_cpu_timers_exit_group(struct task_struct *task); +bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk); + void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, cputime_t *newval, cputime_t *oldval); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9ed2c9a4de4..4ccd68e49b0 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1000,4 +1000,11 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define kfree_rcu(ptr, rcu_head) \ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) +#ifdef CONFIG_RCU_NOCB_CPU +extern bool rcu_is_nocb_cpu(int cpu); +#else +static inline bool rcu_is_nocb_cpu(int cpu) { return false; } +#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ + + #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 6f950048b6e..4800e9d1864 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -231,7 +231,7 @@ extern void init_idle_bootup_task(struct task_struct *idle); extern int runqueue_is_locked(int cpu); -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void nohz_balance_enter_idle(int cpu); extern void set_cpu_sd_state_idle(void); extern int get_nohz_timer_target(void); @@ -1764,13 +1764,13 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, } #endif -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON void calc_load_enter_idle(void); void calc_load_exit_idle(void); #else static inline void calc_load_enter_idle(void) { } static inline void calc_load_exit_idle(void) { } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ #ifndef CONFIG_CPUMASK_OFFSTACK static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) @@ -1856,10 +1856,17 @@ extern void idle_task_exit(void); static inline void idle_task_exit(void) {} #endif -#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) -extern void wake_up_idle_cpu(int cpu); +#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) +extern void wake_up_nohz_cpu(int cpu); #else -static inline void wake_up_idle_cpu(int cpu) { } +static inline void wake_up_nohz_cpu(int cpu) { } +#endif + +#ifdef CONFIG_NO_HZ_FULL +extern bool sched_can_stop_tick(void); +extern u64 scheduler_tick_max_deferment(void); +#else +static inline bool sched_can_stop_tick(void) { return false; } #endif #ifdef CONFIG_SCHED_AUTOGROUP diff --git a/include/linux/tick.h b/include/linux/tick.h index 553272e6af5..9180f4b85e6 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -82,7 +82,7 @@ extern int tick_program_event(ktime_t expires, int force); extern void tick_setup_sched_timer(void); # endif -# if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS +# if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS extern void tick_cancel_sched_timer(int cpu); # else static inline void tick_cancel_sched_timer(int cpu) { } @@ -123,7 +123,7 @@ static inline void tick_check_idle(int cpu) { } static inline int tick_oneshot_mode_active(void) { return 0; } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ -# ifdef CONFIG_NO_HZ +# ifdef CONFIG_NO_HZ_COMMON DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched); static inline int tick_nohz_tick_stopped(void) @@ -138,7 +138,7 @@ extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); -# else /* !CONFIG_NO_HZ */ +# else /* !CONFIG_NO_HZ_COMMON */ static inline int tick_nohz_tick_stopped(void) { return 0; @@ -155,7 +155,24 @@ static inline ktime_t tick_nohz_get_sleep_length(void) } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } -# endif /* !NO_HZ */ +# endif /* !CONFIG_NO_HZ_COMMON */ + +#ifdef CONFIG_NO_HZ_FULL +extern void tick_nohz_init(void); +extern int tick_nohz_full_cpu(int cpu); +extern void tick_nohz_full_check(void); +extern void tick_nohz_full_kick(void); +extern void tick_nohz_full_kick_all(void); +extern void tick_nohz_task_switch(struct task_struct *tsk); +#else +static inline void tick_nohz_init(void) { } +static inline int tick_nohz_full_cpu(int cpu) { return 0; } +static inline void tick_nohz_full_check(void) { } +static inline void tick_nohz_full_kick(void) { } +static inline void tick_nohz_full_kick_all(void) { } +static inline void tick_nohz_task_switch(struct task_struct *tsk) { } +#endif + # ifdef CONFIG_CPU_IDLE_GOV_MENU extern void menu_hrtimer_cancel(void); diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h index d21b33c4c6c..2e9ee4d1c67 100644 --- a/include/linux/ucb1400.h +++ b/include/linux/ucb1400.h @@ -83,15 +83,12 @@ #define UCB_ID 0x7e #define UCB_ID_1400 0x4304 -struct ucb1400_gpio_data { - int gpio_offset; - int (*gpio_setup)(struct device *dev, int ngpio); - int (*gpio_teardown)(struct device *dev, int ngpio); -}; - struct ucb1400_gpio { struct gpio_chip gc; struct snd_ac97 *ac97; + int gpio_offset; + int (*gpio_setup)(struct device *dev, int ngpio); + int (*gpio_teardown)(struct device *dev, int ngpio); }; struct ucb1400_ts { @@ -110,6 +107,9 @@ struct ucb1400 { struct ucb1400_pdata { int irq; + int gpio_offset; + int (*gpio_setup)(struct device *dev, int ngpio); + int (*gpio_teardown)(struct device *dev, int ngpio); }; static inline u16 ucb1400_reg_read(struct snd_ac97 *ac97, u16 reg) @@ -162,10 +162,4 @@ static inline void ucb1400_adc_disable(struct snd_ac97 *ac97) unsigned int ucb1400_adc_read(struct snd_ac97 *ac97, u16 adc_channel, int adcsync); -#ifdef CONFIG_GPIO_UCB1400 -void __init ucb1400_gpio_set_data(struct ucb1400_gpio_data *data); -#else -static inline void ucb1400_gpio_set_data(struct ucb1400_gpio_data *data) {} -#endif - #endif diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 19911dddaeb..7005d1109ec 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -37,7 +37,7 @@ TRACE_EVENT(kvm_userspace_exit, __entry->errno < 0 ? -__entry->errno : __entry->reason) ); -#if defined(__KVM_HAVE_IRQ_LINE) +#if defined(CONFIG_HAVE_KVM_IRQCHIP) TRACE_EVENT(kvm_set_irq, TP_PROTO(unsigned int gsi, int level, int irq_source_id), TP_ARGS(gsi, level, irq_source_id), @@ -122,6 +122,10 @@ TRACE_EVENT(kvm_msi_set_irq, {KVM_IRQCHIP_PIC_SLAVE, "PIC slave"}, \ {KVM_IRQCHIP_IOAPIC, "IOAPIC"} +#endif /* defined(__KVM_HAVE_IOAPIC) */ + +#if defined(CONFIG_HAVE_KVM_IRQCHIP) + TRACE_EVENT(kvm_ack_irq, TP_PROTO(unsigned int irqchip, unsigned int pin), TP_ARGS(irqchip, pin), @@ -136,14 +140,18 @@ TRACE_EVENT(kvm_ack_irq, __entry->pin = pin; ), +#ifdef kvm_irqchips TP_printk("irqchip %s pin %u", __print_symbolic(__entry->irqchip, kvm_irqchips), __entry->pin) +#else + TP_printk("irqchip %d pin %u", __entry->irqchip, __entry->pin) +#endif ); +#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */ -#endif /* defined(__KVM_HAVE_IOAPIC) */ #define KVM_TRACE_MMIO_READ_UNSATISFIED 0 #define KVM_TRACE_MMIO_READ 1 diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 8d219470624..68c2c2000f0 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -323,6 +323,27 @@ TRACE_EVENT(itimer_expire, (int) __entry->pid, (unsigned long long)__entry->now) ); +#ifdef CONFIG_NO_HZ_COMMON +TRACE_EVENT(tick_stop, + + TP_PROTO(int success, char *error_msg), + + TP_ARGS(success, error_msg), + + TP_STRUCT__entry( + __field( int , success ) + __string( msg, error_msg ) + ), + + TP_fast_assign( + __entry->success = success; + __assign_str(msg, error_msg); + ), + + TP_printk("success=%s msg=%s", __entry->success ? "yes" : "no", __get_str(msg)) +); +#endif + #endif /* _TRACE_TIMER_H */ /* This part must be outside protection */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 3c56ba3d80c..a5c86fc34a3 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -449,12 +449,15 @@ enum { kvm_ioeventfd_flag_nr_datamatch, kvm_ioeventfd_flag_nr_pio, kvm_ioeventfd_flag_nr_deassign, + kvm_ioeventfd_flag_nr_virtio_ccw_notify, kvm_ioeventfd_flag_nr_max, }; #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch) #define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio) #define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) +#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \ + (1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify) #define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 << kvm_ioeventfd_flag_nr_max) - 1) @@ -558,9 +561,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_MP_STATE 14 #define KVM_CAP_COALESCED_MMIO 15 #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ -#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT #define KVM_CAP_DEVICE_ASSIGNMENT 17 -#endif #define KVM_CAP_IOMMU 18 #ifdef __KVM_HAVE_MSI #define KVM_CAP_DEVICE_MSI 20 @@ -576,13 +577,9 @@ struct kvm_ppc_smmu_info { #ifdef __KVM_HAVE_PIT #define KVM_CAP_REINJECT_CONTROL 24 #endif -#ifdef __KVM_HAVE_IOAPIC #define KVM_CAP_IRQ_ROUTING 25 -#endif #define KVM_CAP_IRQ_INJECT_STATUS 26 -#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT #define KVM_CAP_DEVICE_DEASSIGNMENT 27 -#endif #ifdef __KVM_HAVE_MSIX #define KVM_CAP_DEVICE_MSIX 28 #endif @@ -665,6 +662,10 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_EPR 86 #define KVM_CAP_ARM_PSCI 87 #define KVM_CAP_ARM_SET_DEVICE_ADDR 88 +#define KVM_CAP_DEVICE_CTRL 89 +#define KVM_CAP_IRQ_MPIC 90 +#define KVM_CAP_PPC_RTAS 91 +#define KVM_CAP_IRQ_XICS 92 #ifdef KVM_CAP_IRQ_ROUTING @@ -818,6 +819,28 @@ struct kvm_arm_device_addr { }; /* + * Device control API, available with KVM_CAP_DEVICE_CTRL + */ +#define KVM_CREATE_DEVICE_TEST 1 + +struct kvm_create_device { + __u32 type; /* in: KVM_DEV_TYPE_xxx */ + __u32 fd; /* out: device handle */ + __u32 flags; /* in: KVM_CREATE_DEVICE_xxx */ +}; + +struct kvm_device_attr { + __u32 flags; /* no flags currently defined */ + __u32 group; /* device-defined */ + __u64 attr; /* group-defined */ + __u64 addr; /* userspace address of attr data */ +}; + +#define KVM_DEV_TYPE_FSL_MPIC_20 1 +#define KVM_DEV_TYPE_FSL_MPIC_42 2 +#define KVM_DEV_TYPE_XICS 3 + +/* * ioctls for VM fds */ #define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) @@ -904,6 +927,16 @@ struct kvm_s390_ucas_mapping { #define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd) /* Available with KVM_CAP_ARM_SET_DEVICE_ADDR */ #define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) +/* Available with KVM_CAP_PPC_RTAS */ +#define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args) + +/* ioctl for vm fd */ +#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) + +/* ioctls for fds returned by KVM_CREATE_DEVICE */ +#define KVM_SET_DEVICE_ATTR _IOW(KVMIO, 0xe1, struct kvm_device_attr) +#define KVM_GET_DEVICE_ATTR _IOW(KVMIO, 0xe2, struct kvm_device_attr) +#define KVM_HAS_DEVICE_ATTR _IOW(KVMIO, 0xe3, struct kvm_device_attr) /* * ioctls for vcpu fds diff --git a/init/Kconfig b/init/Kconfig index a76d13189e4..9d3a7887a6d 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -302,7 +302,7 @@ choice # Kind of a stub config for the pure tick based cputime accounting config TICK_CPU_ACCOUNTING bool "Simple tick based cputime accounting" - depends on !S390 + depends on !S390 && !NO_HZ_FULL help This is the basic tick based cputime accounting that maintains statistics about user, system and idle time spent on per jiffies @@ -312,7 +312,7 @@ config TICK_CPU_ACCOUNTING config VIRT_CPU_ACCOUNTING_NATIVE bool "Deterministic task and CPU time accounting" - depends on HAVE_VIRT_CPU_ACCOUNTING + depends on HAVE_VIRT_CPU_ACCOUNTING && !NO_HZ_FULL select VIRT_CPU_ACCOUNTING help Select this option to enable more accurate task and CPU time @@ -342,7 +342,7 @@ config VIRT_CPU_ACCOUNTING_GEN config IRQ_TIME_ACCOUNTING bool "Fine granularity task level IRQ time accounting" - depends on HAVE_IRQ_TIME_ACCOUNTING + depends on HAVE_IRQ_TIME_ACCOUNTING && !NO_HZ_FULL help Select this option to enable fine granularity task irq time accounting. This is done by reading a timestamp on each @@ -576,7 +576,7 @@ config RCU_FANOUT_EXACT config RCU_FAST_NO_HZ bool "Accelerate last non-dyntick-idle CPU's grace periods" - depends on NO_HZ && SMP + depends on NO_HZ_COMMON && SMP default n help This option permits CPUs to enter dynticks-idle state even if @@ -687,7 +687,7 @@ choice config RCU_NOCB_CPU_NONE bool "No build_forced no-CBs CPUs" - depends on RCU_NOCB_CPU + depends on RCU_NOCB_CPU && !NO_HZ_FULL help This option does not force any of the CPUs to be no-CBs CPUs. Only CPUs designated by the rcu_nocbs= boot parameter will be @@ -695,7 +695,7 @@ config RCU_NOCB_CPU_NONE config RCU_NOCB_CPU_ZERO bool "CPU 0 is a build_forced no-CBs CPU" - depends on RCU_NOCB_CPU + depends on RCU_NOCB_CPU && !NO_HZ_FULL help This option forces CPU 0 to be a no-CBs CPU. Additional CPUs may be designated as no-CBs CPUs using the rcu_nocbs= boot diff --git a/init/main.c b/init/main.c index ceed17aaedf..9484f4ba88d 100644 --- a/init/main.c +++ b/init/main.c @@ -544,6 +544,7 @@ asmlinkage void __init start_kernel(void) idr_init_cache(); perf_event_init(); rcu_init(); + tick_nohz_init(); radix_tree_init(); /* init some links before init_ISA_irqs() */ early_irq_init(); diff --git a/kernel/events/core.c b/kernel/events/core.c index 3820e3cefba..6b41c1899a8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -18,6 +18,7 @@ #include <linux/poll.h> #include <linux/slab.h> #include <linux/hash.h> +#include <linux/tick.h> #include <linux/sysfs.h> #include <linux/dcache.h> #include <linux/percpu.h> @@ -685,8 +686,12 @@ static void perf_pmu_rotate_start(struct pmu *pmu) WARN_ON(!irqs_disabled()); - if (list_empty(&cpuctx->rotation_list)) + if (list_empty(&cpuctx->rotation_list)) { + int was_empty = list_empty(head); list_add(&cpuctx->rotation_list, head); + if (was_empty) + tick_nohz_full_kick(); + } } static void get_ctx(struct perf_event_context *ctx) @@ -2591,6 +2596,16 @@ done: list_del_init(&cpuctx->rotation_list); } +#ifdef CONFIG_NO_HZ_FULL +bool perf_event_can_stop_tick(void) +{ + if (list_empty(&__get_cpu_var(rotation_list))) + return true; + else + return false; +} +#endif + void perf_event_task_tick(void) { struct list_head *head = &__get_cpu_var(rotation_list); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 97fddb09762..cd55144270b 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -326,11 +326,16 @@ void rb_free(struct ring_buffer *rb) } #else +static int data_page_nr(struct ring_buffer *rb) +{ + return rb->nr_pages << page_order(rb); +} struct page * perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) { - if (pgoff > (1UL << page_order(rb))) + /* The '>' counts in the user page. */ + if (pgoff > data_page_nr(rb)) return NULL; return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE); @@ -350,10 +355,11 @@ static void rb_free_work(struct work_struct *work) int i, nr; rb = container_of(work, struct ring_buffer, work); - nr = 1 << page_order(rb); + nr = data_page_nr(rb); base = rb->user_page; - for (i = 0; i < nr + 1; i++) + /* The '<=' counts in the user page. */ + for (i = 0; i <= nr; i++) perf_mmap_unmark_page(base + (i * PAGE_SIZE)); vfree(base); @@ -387,7 +393,7 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) rb->user_page = all_buf; rb->data_pages[0] = all_buf + PAGE_SIZE; rb->page_order = ilog2(nr_pages); - rb->nr_pages = 1; + rb->nr_pages = !!nr_pages; ring_buffer_init(rb, watermark, flags); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 609d8ff38b7..fd4b13b131f 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -172,7 +172,7 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, */ static int hrtimer_get_target(int this_cpu, int pinned) { -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) return get_nohz_timer_target(); #endif @@ -1125,7 +1125,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) } EXPORT_SYMBOL_GPL(hrtimer_get_remaining); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /** * hrtimer_get_next_event - get the time until next expiry event * diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 8fd709c9bb5..42670e9b44e 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -10,6 +10,8 @@ #include <linux/kernel_stat.h> #include <trace/events/timer.h> #include <linux/random.h> +#include <linux/tick.h> +#include <linux/workqueue.h> /* * Called after updating RLIMIT_CPU to run cpu timer and update @@ -153,6 +155,21 @@ static void bump_cpu_timer(struct k_itimer *timer, } } +/** + * task_cputime_zero - Check a task_cputime struct for all zero fields. + * + * @cputime: The struct to compare. + * + * Checks @cputime to see if all fields are zero. Returns true if all fields + * are zero, false if any field is nonzero. + */ +static inline int task_cputime_zero(const struct task_cputime *cputime) +{ + if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime) + return 1; + return 0; +} + static inline cputime_t prof_ticks(struct task_struct *p) { cputime_t utime, stime; @@ -636,6 +653,37 @@ static int cpu_timer_sample_group(const clockid_t which_clock, return 0; } +#ifdef CONFIG_NO_HZ_FULL +static void nohz_kick_work_fn(struct work_struct *work) +{ + tick_nohz_full_kick_all(); +} + +static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn); + +/* + * We need the IPIs to be sent from sane process context. + * The posix cpu timers are always set with irqs disabled. + */ +static void posix_cpu_timer_kick_nohz(void) +{ + schedule_work(&nohz_kick_work); +} + +bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk) +{ + if (!task_cputime_zero(&tsk->cputime_expires)) + return false; + + if (tsk->signal->cputimer.running) + return false; + + return true; +} +#else +static inline void posix_cpu_timer_kick_nohz(void) { } +#endif + /* * Guts of sys_timer_settime for CPU timers. * This is called with the timer locked and interrupts disabled. @@ -794,6 +842,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, sample_to_timespec(timer->it_clock, old_incr, &old->it_interval); } + if (!ret) + posix_cpu_timer_kick_nohz(); return ret; } @@ -1008,21 +1058,6 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, } } -/** - * task_cputime_zero - Check a task_cputime struct for all zero fields. - * - * @cputime: The struct to compare. - * - * Checks @cputime to see if all fields are zero. Returns true if all fields - * are zero, false if any field is nonzero. - */ -static inline int task_cputime_zero(const struct task_cputime *cputime) -{ - if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime) - return 1; - return 0; -} - /* * Check for any per-thread CPU timers that have fired and move them * off the tsk->*_timers list onto the firing list. Per-thread timers @@ -1336,6 +1371,13 @@ void run_posix_cpu_timers(struct task_struct *tsk) cpu_timer_fire(timer); spin_unlock(&timer->it_lock); } + + /* + * In case some timers were rescheduled after the queue got emptied, + * wake up full dynticks CPUs. + */ + if (tsk->signal->cputimer.running) + posix_cpu_timer_kick_nohz(); } /* @@ -1366,7 +1408,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, } if (!*newval) - return; + goto out; *newval += now.cpu; } @@ -1384,6 +1426,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, tsk->signal->cputime_expires.virt_exp = *newval; break; } +out: + posix_cpu_timer_kick_nohz(); } static int do_cpu_nanosleep(const clockid_t which_clock, int flags, diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d8534308fd0..16ea6792501 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -799,6 +799,16 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) rdp->offline_fqs++; return 1; } + + /* + * There is a possibility that a CPU in adaptive-ticks state + * might run in the kernel with the scheduling-clock tick disabled + * for an extended time period. Invoke rcu_kick_nohz_cpu() to + * force the CPU to restart the scheduling-clock tick in this + * CPU is in this state. + */ + rcu_kick_nohz_cpu(rdp->cpu); + return 0; } @@ -1820,7 +1830,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { /* No-CBs CPUs do not have orphanable callbacks. */ - if (is_nocb_cpu(rdp->cpu)) + if (rcu_is_nocb_cpu(rdp->cpu)) return; /* @@ -2892,10 +2902,10 @@ static void _rcu_barrier(struct rcu_state *rsp) * corresponding CPU's preceding callbacks have been invoked. */ for_each_possible_cpu(cpu) { - if (!cpu_online(cpu) && !is_nocb_cpu(cpu)) + if (!cpu_online(cpu) && !rcu_is_nocb_cpu(cpu)) continue; rdp = per_cpu_ptr(rsp->rda, cpu); - if (is_nocb_cpu(cpu)) { + if (rcu_is_nocb_cpu(cpu)) { _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, rsp->n_barrier_done); atomic_inc(&rsp->barrier_cpu_count); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 14ee40795d6..da77a8f57ff 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -530,13 +530,13 @@ static int rcu_nocb_needs_gp(struct rcu_state *rsp); static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq); static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); static void rcu_init_one_nocb(struct rcu_node *rnp); -static bool is_nocb_cpu(int cpu); static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, bool lazy); static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, struct rcu_data *rdp); static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); +static void rcu_kick_nohz_cpu(int cpu); static bool init_nocb_callback_list(struct rcu_data *rdp); #endif /* #ifndef RCU_TREE_NONCORE */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index d084ae3f281..170814dc418 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -28,6 +28,7 @@ #include <linux/gfp.h> #include <linux/oom.h> #include <linux/smpboot.h> +#include <linux/tick.h> #define RCU_KTHREAD_PRIO 1 @@ -1705,7 +1706,7 @@ static void rcu_prepare_for_idle(int cpu) return; /* If this is a no-CBs CPU, no callbacks, just return. */ - if (is_nocb_cpu(cpu)) + if (rcu_is_nocb_cpu(cpu)) return; /* @@ -1747,7 +1748,7 @@ static void rcu_cleanup_after_idle(int cpu) struct rcu_data *rdp; struct rcu_state *rsp; - if (is_nocb_cpu(cpu)) + if (rcu_is_nocb_cpu(cpu)) return; rcu_try_advance_all_cbs(); for_each_rcu_flavor(rsp) { @@ -2052,7 +2053,7 @@ static void rcu_init_one_nocb(struct rcu_node *rnp) } /* Is the specified CPU a no-CPUs CPU? */ -static bool is_nocb_cpu(int cpu) +bool rcu_is_nocb_cpu(int cpu) { if (have_rcu_nocb_mask) return cpumask_test_cpu(cpu, rcu_nocb_mask); @@ -2110,7 +2111,7 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, bool lazy) { - if (!is_nocb_cpu(rdp->cpu)) + if (!rcu_is_nocb_cpu(rdp->cpu)) return 0; __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy); if (__is_kfree_rcu_offset((unsigned long)rhp->func)) @@ -2134,7 +2135,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, long qll = rsp->qlen_lazy; /* If this is not a no-CBs CPU, tell the caller to do it the old way. */ - if (!is_nocb_cpu(smp_processor_id())) + if (!rcu_is_nocb_cpu(smp_processor_id())) return 0; rsp->qlen = 0; rsp->qlen_lazy = 0; @@ -2306,11 +2307,6 @@ static void rcu_init_one_nocb(struct rcu_node *rnp) { } -static bool is_nocb_cpu(int cpu) -{ - return false; -} - static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, bool lazy) { @@ -2337,3 +2333,20 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) } #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ + +/* + * An adaptive-ticks CPU can potentially execute in kernel mode for an + * arbitrarily long period of time with the scheduling-clock tick turned + * off. RCU will be paying attention to this CPU because it is in the + * kernel, but the CPU cannot be guaranteed to be executing the RCU state + * machine because the scheduling-clock tick has been disabled. Therefore, + * if an adaptive-ticks CPU is failing to respond to the current grace + * period and has not be idle from an RCU perspective, kick it. + */ +static void rcu_kick_nohz_cpu(int cpu) +{ +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_cpu(cpu)) + smp_send_reschedule(cpu); +#endif /* #ifdef CONFIG_NO_HZ_FULL */ +} diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5662f58f0b6..58453b8272f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -544,7 +544,7 @@ void resched_cpu(int cpu) raw_spin_unlock_irqrestore(&rq->lock, flags); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * In the semi idle case, use the nearest busy cpu for migrating timers * from an idle cpu. This is good for power-savings. @@ -582,7 +582,7 @@ unlock: * account when the CPU goes back to idle and evaluates the timer * wheel for the next timer event. */ -void wake_up_idle_cpu(int cpu) +static void wake_up_idle_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); @@ -612,20 +612,56 @@ void wake_up_idle_cpu(int cpu) smp_send_reschedule(cpu); } +static bool wake_up_full_nohz_cpu(int cpu) +{ + if (tick_nohz_full_cpu(cpu)) { + if (cpu != smp_processor_id() || + tick_nohz_tick_stopped()) + smp_send_reschedule(cpu); + return true; + } + + return false; +} + +void wake_up_nohz_cpu(int cpu) +{ + if (!wake_up_full_nohz_cpu(cpu)) + wake_up_idle_cpu(cpu); +} + static inline bool got_nohz_idle_kick(void) { int cpu = smp_processor_id(); return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)); } -#else /* CONFIG_NO_HZ */ +#else /* CONFIG_NO_HZ_COMMON */ static inline bool got_nohz_idle_kick(void) { return false; } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ + +#ifdef CONFIG_NO_HZ_FULL +bool sched_can_stop_tick(void) +{ + struct rq *rq; + + rq = this_rq(); + + /* Make sure rq->nr_running update is visible after the IPI */ + smp_rmb(); + + /* More than one running task need preemption */ + if (rq->nr_running > 1) + return false; + + return true; +} +#endif /* CONFIG_NO_HZ_FULL */ void sched_avg_update(struct rq *rq) { @@ -1357,7 +1393,8 @@ static void sched_ttwu_pending(void) void scheduler_ipi(void) { - if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) + if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick() + && !tick_nohz_full_cpu(smp_processor_id())) return; /* @@ -1374,6 +1411,7 @@ void scheduler_ipi(void) * somewhat pessimize the simple resched case. */ irq_enter(); + tick_nohz_full_check(); sched_ttwu_pending(); /* @@ -1855,6 +1893,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) kprobe_flush_task(prev); put_task_struct(prev); } + + tick_nohz_task_switch(current); } #ifdef CONFIG_SMP @@ -2118,7 +2158,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) return load >> FSHIFT; } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Handle NO_HZ for the global load-average. * @@ -2344,12 +2384,12 @@ static void calc_global_nohz(void) smp_wmb(); calc_load_idx++; } -#else /* !CONFIG_NO_HZ */ +#else /* !CONFIG_NO_HZ_COMMON */ static inline long calc_load_fold_idle(void) { return 0; } static inline void calc_global_nohz(void) { } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * calc_load - update the avenrun load estimates 10 ticks after the @@ -2509,7 +2549,7 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, sched_avg_update(this_rq); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * There is no sane way to deal with nohz on smp when using jiffies because the * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading @@ -2569,7 +2609,7 @@ void update_cpu_load_nohz(void) } raw_spin_unlock(&this_rq->lock); } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * Called from scheduler_tick() @@ -2696,7 +2736,34 @@ void scheduler_tick(void) rq->idle_balance = idle_cpu(cpu); trigger_load_balance(rq, cpu); #endif + rq_last_tick_reset(rq); +} + +#ifdef CONFIG_NO_HZ_FULL +/** + * scheduler_tick_max_deferment + * + * Keep at least one tick per second when a single + * active task is running because the scheduler doesn't + * yet completely support full dynticks environment. + * + * This makes sure that uptime, CFS vruntime, load + * balancing, etc... continue to move forward, even + * with a very low granularity. + */ +u64 scheduler_tick_max_deferment(void) +{ + struct rq *rq = this_rq(); + unsigned long next, now = ACCESS_ONCE(jiffies); + + next = rq->last_sched_tick + HZ; + + if (time_before_eq(next, now)) + return 0; + + return jiffies_to_usecs(next - now) * NSEC_PER_USEC; } +#endif notrace unsigned long get_parent_ip(unsigned long addr) { @@ -6951,9 +7018,12 @@ void __init sched_init(void) INIT_LIST_HEAD(&rq->cfs_tasks); rq_attach_root(rq, &def_root_domain); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON rq->nohz_flags = 0; #endif +#ifdef CONFIG_NO_HZ_FULL + rq->last_sched_tick = 0; +#endif #endif init_rq_hrtick(rq); atomic_set(&rq->nr_iowait, 0); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8bf7081b1ec..c61a614465c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5355,7 +5355,7 @@ out_unlock: return 0; } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * idle load balancing details * - When one of the busy CPUs notice that there may be an idle rebalancing @@ -5572,9 +5572,9 @@ out: rq->next_balance = next_balance; } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* - * In CONFIG_NO_HZ case, the idle balance kickee will do the + * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the * rebalancing for all the cpus for whom scheduler ticks are stopped. */ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) @@ -5717,7 +5717,7 @@ void trigger_load_balance(struct rq *rq, int cpu) if (time_after_eq(jiffies, rq->next_balance) && likely(!on_null_domain(cpu))) raise_softirq(SCHED_SOFTIRQ); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu))) nohz_balancer_kick(cpu); #endif @@ -6187,7 +6187,7 @@ __init void init_sched_fair_class(void) #ifdef CONFIG_SMP open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON nohz.next_balance = jiffies; zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); cpu_notifier(sched_ilb_notifier, 0); diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index b8ce7732834..d8da01008d3 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c @@ -17,6 +17,7 @@ select_task_rq_idle(struct task_struct *p, int sd_flag, int flags) static void pre_schedule_idle(struct rq *rq, struct task_struct *prev) { idle_exit_fair(rq); + rq_last_tick_reset(rq); } static void post_schedule_idle(struct rq *rq) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 4c225c4c711..ce39224d615 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -5,6 +5,7 @@ #include <linux/mutex.h> #include <linux/spinlock.h> #include <linux/stop_machine.h> +#include <linux/tick.h> #include "cpupri.h" #include "cpuacct.h" @@ -405,10 +406,13 @@ struct rq { #define CPU_LOAD_IDX_MAX 5 unsigned long cpu_load[CPU_LOAD_IDX_MAX]; unsigned long last_load_update_tick; -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON u64 nohz_stamp; unsigned long nohz_flags; #endif +#ifdef CONFIG_NO_HZ_FULL + unsigned long last_sched_tick; +#endif int skip_clock_update; /* capture load from *all* tasks on this cpu: */ @@ -1072,6 +1076,16 @@ static inline u64 steal_ticks(u64 steal) static inline void inc_nr_running(struct rq *rq) { rq->nr_running++; + +#ifdef CONFIG_NO_HZ_FULL + if (rq->nr_running == 2) { + if (tick_nohz_full_cpu(rq->cpu)) { + /* Order rq->nr_running write against the IPI */ + smp_wmb(); + smp_send_reschedule(rq->cpu); + } + } +#endif } static inline void dec_nr_running(struct rq *rq) @@ -1079,6 +1093,13 @@ static inline void dec_nr_running(struct rq *rq) rq->nr_running--; } +static inline void rq_last_tick_reset(struct rq *rq) +{ +#ifdef CONFIG_NO_HZ_FULL + rq->last_sched_tick = jiffies; +#endif +} + extern void update_rq_clock(struct rq *rq); extern void activate_task(struct rq *rq, struct task_struct *p, int flags); @@ -1299,7 +1320,7 @@ extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq); extern void account_cfs_bandwidth_used(int enabled, int was_enabled); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON enum rq_nohz_flag_bits { NOHZ_TICK_STOPPED, NOHZ_BALANCE_KICK, diff --git a/kernel/softirq.c b/kernel/softirq.c index aa82723c720..b5197dcb0da 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -329,6 +329,19 @@ static inline void invoke_softirq(void) wakeup_softirqd(); } +static inline void tick_irq_exit(void) +{ +#ifdef CONFIG_NO_HZ_COMMON + int cpu = smp_processor_id(); + + /* Make sure that timer wheel updates are propagated */ + if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) { + if (!in_interrupt()) + tick_nohz_irq_exit(); + } +#endif +} + /* * Exit an interrupt context. Process softirqs if needed and possible: */ @@ -346,11 +359,7 @@ void irq_exit(void) if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); -#ifdef CONFIG_NO_HZ - /* Make sure that timer wheel updates are propagated */ - if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) - tick_nohz_irq_exit(); -#endif + tick_irq_exit(); rcu_irq_exit(); } diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 24510d84efd..e4c07b0692b 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -64,20 +64,88 @@ config GENERIC_CMOS_UPDATE if GENERIC_CLOCKEVENTS menu "Timers subsystem" -# Core internal switch. Selected by NO_HZ / HIGH_RES_TIMERS. This is +# Core internal switch. Selected by NO_HZ_COMMON / HIGH_RES_TIMERS. This is # only related to the tick functionality. Oneshot clockevent devices # are supported independ of this. config TICK_ONESHOT bool -config NO_HZ - bool "Tickless System (Dynamic Ticks)" +config NO_HZ_COMMON + bool depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS select TICK_ONESHOT + +choice + prompt "Timer tick handling" + default NO_HZ_IDLE if NO_HZ + +config HZ_PERIODIC + bool "Periodic timer ticks (constant rate, no dynticks)" + help + This option keeps the tick running periodically at a constant + rate, even when the CPU doesn't need it. + +config NO_HZ_IDLE + bool "Idle dynticks system (tickless idle)" + depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS + select NO_HZ_COMMON + help + This option enables a tickless idle system: timer interrupts + will only trigger on an as-needed basis when the system is idle. + This is usually interesting for energy saving. + + Most of the time you want to say Y here. + +config NO_HZ_FULL + bool "Full dynticks system (tickless)" + # NO_HZ_COMMON dependency + depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS + # We need at least one periodic CPU for timekeeping + depends on SMP + # RCU_USER_QS dependency + depends on HAVE_CONTEXT_TRACKING + # VIRT_CPU_ACCOUNTING_GEN dependency + depends on 64BIT + select NO_HZ_COMMON + select RCU_USER_QS + select RCU_NOCB_CPU + select VIRT_CPU_ACCOUNTING_GEN + select CONTEXT_TRACKING_FORCE + select IRQ_WORK + help + Adaptively try to shutdown the tick whenever possible, even when + the CPU is running tasks. Typically this requires running a single + task on the CPU. Chances for running tickless are maximized when + the task mostly runs in userspace and has few kernel activity. + + You need to fill up the nohz_full boot parameter with the + desired range of dynticks CPUs. + + This is implemented at the expense of some overhead in user <-> kernel + transitions: syscalls, exceptions and interrupts. Even when it's + dynamically off. + + Say N. + +endchoice + +config NO_HZ_FULL_ALL + bool "Full dynticks system on all CPUs by default" + depends on NO_HZ_FULL + help + If the user doesn't pass the nohz_full boot option to + define the range of full dynticks CPUs, consider that all + CPUs in the system are full dynticks by default. + Note the boot CPU will still be kept outside the range to + handle the timekeeping duty. + +config NO_HZ + bool "Old Idle dynticks config" + depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS help - This option enables a tickless system: timer interrupts will - only trigger on an as-needed basis both when the system is - busy and when the system is idle. + This is the old config entry that enables dynticks idle. + We keep it around for a little while to enforce backward + compatibility with older config files. config HIGH_RES_TIMERS bool "High Resolution Timer Support" diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 61d00a8cdf2..206bbfb34e0 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -693,7 +693,8 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) bc->event_handler = tick_handle_oneshot_broadcast; /* Take the do_timer update */ - tick_do_timer_cpu = cpu; + if (!tick_nohz_full_cpu(cpu)) + tick_do_timer_cpu = cpu; /* * We must be careful here. There might be other CPUs diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 6176a3e4570..5d3fb100bc0 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -163,7 +163,10 @@ static void tick_setup_device(struct tick_device *td, * this cpu: */ if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { - tick_do_timer_cpu = cpu; + if (!tick_nohz_full_cpu(cpu)) + tick_do_timer_cpu = cpu; + else + tick_do_timer_cpu = TICK_DO_TIMER_NONE; tick_next_period = ktime_get(); tick_period = ktime_set(0, NSEC_PER_SEC / HZ); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 225f8bf1909..bc67d4245e1 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -21,11 +21,15 @@ #include <linux/sched.h> #include <linux/module.h> #include <linux/irq_work.h> +#include <linux/posix-timers.h> +#include <linux/perf_event.h> #include <asm/irq_regs.h> #include "tick-internal.h" +#include <trace/events/timer.h> + /* * Per cpu nohz control structure */ @@ -104,7 +108,7 @@ static void tick_sched_do_timer(ktime_t now) { int cpu = smp_processor_id(); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Check if the do_timer duty was dropped. We don't care about * concurrency: This happens only when the cpu in charge went @@ -112,7 +116,8 @@ static void tick_sched_do_timer(ktime_t now) * this duty, then the jiffies update is still serialized by * jiffies_lock. */ - if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) + if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE) + && !tick_nohz_full_cpu(cpu)) tick_do_timer_cpu = cpu; #endif @@ -123,7 +128,7 @@ static void tick_sched_do_timer(ktime_t now) static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) { -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * When we are idle and the tick is stopped, we have to touch * the watchdog as we might not schedule for a really long @@ -142,10 +147,226 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) profile_tick(CPU_PROFILING); } +#ifdef CONFIG_NO_HZ_FULL +static cpumask_var_t nohz_full_mask; +bool have_nohz_full_mask; + +static bool can_stop_full_tick(void) +{ + WARN_ON_ONCE(!irqs_disabled()); + + if (!sched_can_stop_tick()) { + trace_tick_stop(0, "more than 1 task in runqueue\n"); + return false; + } + + if (!posix_cpu_timers_can_stop_tick(current)) { + trace_tick_stop(0, "posix timers running\n"); + return false; + } + + if (!perf_event_can_stop_tick()) { + trace_tick_stop(0, "perf events running\n"); + return false; + } + + /* sched_clock_tick() needs us? */ +#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK + /* + * TODO: kick full dynticks CPUs when + * sched_clock_stable is set. + */ + if (!sched_clock_stable) { + trace_tick_stop(0, "unstable sched clock\n"); + return false; + } +#endif + + return true; +} + +static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now); + +/* + * Re-evaluate the need for the tick on the current CPU + * and restart it if necessary. + */ +void tick_nohz_full_check(void) +{ + struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + + if (tick_nohz_full_cpu(smp_processor_id())) { + if (ts->tick_stopped && !is_idle_task(current)) { + if (!can_stop_full_tick()) + tick_nohz_restart_sched_tick(ts, ktime_get()); + } + } +} + +static void nohz_full_kick_work_func(struct irq_work *work) +{ + tick_nohz_full_check(); +} + +static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { + .func = nohz_full_kick_work_func, +}; + +/* + * Kick the current CPU if it's full dynticks in order to force it to + * re-evaluate its dependency on the tick and restart it if necessary. + */ +void tick_nohz_full_kick(void) +{ + if (tick_nohz_full_cpu(smp_processor_id())) + irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); +} + +static void nohz_full_kick_ipi(void *info) +{ + tick_nohz_full_check(); +} + +/* + * Kick all full dynticks CPUs in order to force these to re-evaluate + * their dependency on the tick and restart it if necessary. + */ +void tick_nohz_full_kick_all(void) +{ + if (!have_nohz_full_mask) + return; + + preempt_disable(); + smp_call_function_many(nohz_full_mask, + nohz_full_kick_ipi, NULL, false); + preempt_enable(); +} + +/* + * Re-evaluate the need for the tick as we switch the current task. + * It might need the tick due to per task/process properties: + * perf events, posix cpu timers, ... + */ +void tick_nohz_task_switch(struct task_struct *tsk) +{ + unsigned long flags; + + local_irq_save(flags); + + if (!tick_nohz_full_cpu(smp_processor_id())) + goto out; + + if (tick_nohz_tick_stopped() && !can_stop_full_tick()) + tick_nohz_full_kick(); + +out: + local_irq_restore(flags); +} + +int tick_nohz_full_cpu(int cpu) +{ + if (!have_nohz_full_mask) + return 0; + + return cpumask_test_cpu(cpu, nohz_full_mask); +} + +/* Parse the boot-time nohz CPU list from the kernel parameters. */ +static int __init tick_nohz_full_setup(char *str) +{ + int cpu; + + alloc_bootmem_cpumask_var(&nohz_full_mask); + if (cpulist_parse(str, nohz_full_mask) < 0) { + pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); + return 1; + } + + cpu = smp_processor_id(); + if (cpumask_test_cpu(cpu, nohz_full_mask)) { + pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); + cpumask_clear_cpu(cpu, nohz_full_mask); + } + have_nohz_full_mask = true; + + return 1; +} +__setup("nohz_full=", tick_nohz_full_setup); + +static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DOWN_PREPARE: + /* + * If we handle the timekeeping duty for full dynticks CPUs, + * we can't safely shutdown that CPU. + */ + if (have_nohz_full_mask && tick_do_timer_cpu == cpu) + return -EINVAL; + break; + } + return NOTIFY_OK; +} + +/* + * Worst case string length in chunks of CPU range seems 2 steps + * separations: 0,2,4,6,... + * This is NR_CPUS + sizeof('\0') + */ +static char __initdata nohz_full_buf[NR_CPUS + 1]; + +static int tick_nohz_init_all(void) +{ + int err = -1; + +#ifdef CONFIG_NO_HZ_FULL_ALL + if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { + pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); + return err; + } + err = 0; + cpumask_setall(nohz_full_mask); + cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); + have_nohz_full_mask = true; +#endif + return err; +} + +void __init tick_nohz_init(void) +{ + int cpu; + + if (!have_nohz_full_mask) { + if (tick_nohz_init_all() < 0) + return; + } + + cpu_notifier(tick_nohz_cpu_down_callback, 0); + + /* Make sure full dynticks CPU are also RCU nocbs */ + for_each_cpu(cpu, nohz_full_mask) { + if (!rcu_is_nocb_cpu(cpu)) { + pr_warning("NO_HZ: CPU %d is not RCU nocb: " + "cleared from nohz_full range", cpu); + cpumask_clear_cpu(cpu, nohz_full_mask); + } + } + + cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); + pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); +} +#else +#define have_nohz_full_mask (0) +#endif + /* * NOHZ - aka dynamic tick functionality */ -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * NO HZ enabled ? */ @@ -345,11 +566,12 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, delta_jiffies = rcu_delta_jiffies; } } + /* - * Do not stop the tick, if we are only one off - * or if the cpu is required for rcu + * Do not stop the tick, if we are only one off (or less) + * or if the cpu is required for RCU: */ - if (!ts->tick_stopped && delta_jiffies == 1) + if (!ts->tick_stopped && delta_jiffies <= 1) goto out; /* Schedule the tick, if we are at least one jiffie off */ @@ -378,6 +600,13 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, time_delta = KTIME_MAX; } +#ifdef CONFIG_NO_HZ_FULL + if (!ts->inidle) { + time_delta = min(time_delta, + scheduler_tick_max_deferment()); + } +#endif + /* * calculate the expiry time for the next timer wheel * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals @@ -421,6 +650,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, ts->last_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; + trace_tick_stop(1, " "); } /* @@ -457,6 +687,24 @@ out: return ret; } +static void tick_nohz_full_stop_tick(struct tick_sched *ts) +{ +#ifdef CONFIG_NO_HZ_FULL + int cpu = smp_processor_id(); + + if (!tick_nohz_full_cpu(cpu) || is_idle_task(current)) + return; + + if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) + return; + + if (!can_stop_full_tick()) + return; + + tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); +#endif +} + static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) { /* @@ -489,6 +737,21 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) return false; } + if (have_nohz_full_mask) { + /* + * Keep the tick alive to guarantee timekeeping progression + * if there are full dynticks CPUs around + */ + if (tick_do_timer_cpu == cpu) + return false; + /* + * Boot safety: make sure the timekeeping duty has been + * assigned before entering dyntick-idle mode, + */ + if (tick_do_timer_cpu == TICK_DO_TIMER_NONE) + return false; + } + return true; } @@ -568,12 +831,13 @@ void tick_nohz_irq_exit(void) { struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); - if (!ts->inidle) - return; - - /* Cancel the timer because CPU already waken up from the C-states*/ - menu_hrtimer_cancel(); - __tick_nohz_idle_enter(ts); + if (ts->inidle) { + /* Cancel the timer because CPU already waken up from the C-states*/ + menu_hrtimer_cancel(); + __tick_nohz_idle_enter(ts); + } else { + tick_nohz_full_stop_tick(ts); + } } /** @@ -802,7 +1066,7 @@ static inline void tick_check_nohz(int cpu) static inline void tick_nohz_switch_to_nohz(void) { } static inline void tick_check_nohz(int cpu) { } -#endif /* NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * Called from irq_enter to notify about the possible interruption of idle() @@ -887,14 +1151,14 @@ void tick_setup_sched_timer(void) now = ktime_get(); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON if (tick_nohz_enabled) ts->nohz_mode = NOHZ_MODE_HIGHRES; #endif } #endif /* HIGH_RES_TIMERS */ -#if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS +#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS void tick_cancel_sched_timer(int cpu) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); diff --git a/kernel/timer.c b/kernel/timer.c index 09bca8ce977..a860bba3441 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -739,7 +739,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, cpu = smp_processor_id(); -#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) +#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) cpu = get_nohz_timer_target(); #endif @@ -931,14 +931,14 @@ void add_timer_on(struct timer_list *timer, int cpu) debug_activate(timer, timer->expires); internal_add_timer(base, timer); /* - * Check whether the other CPU is idle and needs to be - * triggered to reevaluate the timer wheel when nohz is - * active. We are protected against the other CPU fiddling + * Check whether the other CPU is in dynticks mode and needs + * to be triggered to reevaluate the timer wheel. + * We are protected against the other CPU fiddling * with the timer by holding the timer base lock. This also - * makes sure that a CPU on the way to idle can not evaluate - * the timer wheel. + * makes sure that a CPU on the way to stop its tick can not + * evaluate the timer wheel. */ - wake_up_idle_cpu(cpu); + wake_up_nohz_cpu(cpu); spin_unlock_irqrestore(&base->lock, flags); } EXPORT_SYMBOL_GPL(add_timer_on); @@ -1189,7 +1189,7 @@ static inline void __run_timers(struct tvec_base *base) spin_unlock_irq(&base->lock); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Find out when the next timer event is due to happen. This * is used on S/390 to stop all activity when a CPU is idle. diff --git a/lib/oid_registry.c b/lib/oid_registry.c index d8de11f4590..318f382a010 100644 --- a/lib/oid_registry.c +++ b/lib/oid_registry.c @@ -9,6 +9,7 @@ * 2 of the Licence, or (at your option) any later version. */ +#include <linux/module.h> #include <linux/export.h> #include <linux/oid_registry.h> #include <linux/kernel.h> @@ -16,6 +17,10 @@ #include <linux/bug.h> #include "oid_registry_data.c" +MODULE_DESCRIPTION("OID Registry"); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); + /** * look_up_OID - Find an OID registration for the specified data * @data: Binary representation of the OID diff --git a/net/ceph/Makefile b/net/ceph/Makefile index e87ef435e11..958d9856912 100644 --- a/net/ceph/Makefile +++ b/net/ceph/Makefile @@ -11,5 +11,5 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ crypto.o armor.o \ auth_x.o \ ceph_fs.o ceph_strings.o ceph_hash.o \ - pagevec.o + pagevec.o snapshot.o diff --git a/net/ceph/auth.c b/net/ceph/auth.c index b4bf4ac090f..6b923bcaa2a 100644 --- a/net/ceph/auth.c +++ b/net/ceph/auth.c @@ -47,6 +47,7 @@ struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_cryp if (!ac) goto out; + mutex_init(&ac->mutex); ac->negotiating = true; if (name) ac->name = name; @@ -73,10 +74,12 @@ void ceph_auth_destroy(struct ceph_auth_client *ac) */ void ceph_auth_reset(struct ceph_auth_client *ac) { + mutex_lock(&ac->mutex); dout("auth_reset %p\n", ac); if (ac->ops && !ac->negotiating) ac->ops->reset(ac); ac->negotiating = true; + mutex_unlock(&ac->mutex); } int ceph_entity_name_encode(const char *name, void **p, void *end) @@ -102,6 +105,7 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len) int i, num; int ret; + mutex_lock(&ac->mutex); dout("auth_build_hello\n"); monhdr->have_version = 0; monhdr->session_mon = cpu_to_le16(-1); @@ -122,15 +126,19 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len) ret = ceph_entity_name_encode(ac->name, &p, end); if (ret < 0) - return ret; + goto out; ceph_decode_need(&p, end, sizeof(u64), bad); ceph_encode_64(&p, ac->global_id); ceph_encode_32(&lenp, p - lenp - sizeof(u32)); - return p - buf; + ret = p - buf; +out: + mutex_unlock(&ac->mutex); + return ret; bad: - return -ERANGE; + ret = -ERANGE; + goto out; } static int ceph_build_auth_request(struct ceph_auth_client *ac, @@ -151,11 +159,13 @@ static int ceph_build_auth_request(struct ceph_auth_client *ac, if (ret < 0) { pr_err("error %d building auth method %s request\n", ret, ac->ops->name); - return ret; + goto out; } dout(" built request %d bytes\n", ret); ceph_encode_32(&p, ret); - return p + ret - msg_buf; + ret = p + ret - msg_buf; +out: + return ret; } /* @@ -176,6 +186,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, int result_msg_len; int ret = -EINVAL; + mutex_lock(&ac->mutex); dout("handle_auth_reply %p %p\n", p, end); ceph_decode_need(&p, end, sizeof(u32) * 3 + sizeof(u64), bad); protocol = ceph_decode_32(&p); @@ -227,33 +238,103 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac, ret = ac->ops->handle_reply(ac, result, payload, payload_end); if (ret == -EAGAIN) { - return ceph_build_auth_request(ac, reply_buf, reply_len); + ret = ceph_build_auth_request(ac, reply_buf, reply_len); } else if (ret) { pr_err("auth method '%s' error %d\n", ac->ops->name, ret); - return ret; } - return 0; -bad: - pr_err("failed to decode auth msg\n"); out: + mutex_unlock(&ac->mutex); return ret; + +bad: + pr_err("failed to decode auth msg\n"); + ret = -EINVAL; + goto out; } int ceph_build_auth(struct ceph_auth_client *ac, void *msg_buf, size_t msg_len) { + int ret = 0; + + mutex_lock(&ac->mutex); if (!ac->protocol) - return ceph_auth_build_hello(ac, msg_buf, msg_len); - BUG_ON(!ac->ops); - if (ac->ops->should_authenticate(ac)) - return ceph_build_auth_request(ac, msg_buf, msg_len); - return 0; + ret = ceph_auth_build_hello(ac, msg_buf, msg_len); + else if (ac->ops->should_authenticate(ac)) + ret = ceph_build_auth_request(ac, msg_buf, msg_len); + mutex_unlock(&ac->mutex); + return ret; } int ceph_auth_is_authenticated(struct ceph_auth_client *ac) { - if (!ac->ops) - return 0; - return ac->ops->is_authenticated(ac); + int ret = 0; + + mutex_lock(&ac->mutex); + if (ac->ops) + ret = ac->ops->is_authenticated(ac); + mutex_unlock(&ac->mutex); + return ret; +} +EXPORT_SYMBOL(ceph_auth_is_authenticated); + +int ceph_auth_create_authorizer(struct ceph_auth_client *ac, + int peer_type, + struct ceph_auth_handshake *auth) +{ + int ret = 0; + + mutex_lock(&ac->mutex); + if (ac->ops && ac->ops->create_authorizer) + ret = ac->ops->create_authorizer(ac, peer_type, auth); + mutex_unlock(&ac->mutex); + return ret; +} +EXPORT_SYMBOL(ceph_auth_create_authorizer); + +void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac, + struct ceph_authorizer *a) +{ + mutex_lock(&ac->mutex); + if (ac->ops && ac->ops->destroy_authorizer) + ac->ops->destroy_authorizer(ac, a); + mutex_unlock(&ac->mutex); +} +EXPORT_SYMBOL(ceph_auth_destroy_authorizer); + +int ceph_auth_update_authorizer(struct ceph_auth_client *ac, + int peer_type, + struct ceph_auth_handshake *a) +{ + int ret = 0; + + mutex_lock(&ac->mutex); + if (ac->ops && ac->ops->update_authorizer) + ret = ac->ops->update_authorizer(ac, peer_type, a); + mutex_unlock(&ac->mutex); + return ret; +} +EXPORT_SYMBOL(ceph_auth_update_authorizer); + +int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, + struct ceph_authorizer *a, size_t len) +{ + int ret = 0; + + mutex_lock(&ac->mutex); + if (ac->ops && ac->ops->verify_authorizer_reply) + ret = ac->ops->verify_authorizer_reply(ac, a, len); + mutex_unlock(&ac->mutex); + return ret; +} +EXPORT_SYMBOL(ceph_auth_verify_authorizer_reply); + +void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, int peer_type) +{ + mutex_lock(&ac->mutex); + if (ac->ops && ac->ops->invalidate_authorizer) + ac->ops->invalidate_authorizer(ac, peer_type); + mutex_unlock(&ac->mutex); } +EXPORT_SYMBOL(ceph_auth_invalidate_authorizer); diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index a16bf14eb02..96238ba95f2 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -298,6 +298,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac, return -ENOMEM; } au->service = th->service; + au->secret_id = th->secret_id; msg_a = au->buf->vec.iov_base; msg_a->struct_v = 1; @@ -555,6 +556,26 @@ static int ceph_x_create_authorizer( return 0; } +static int ceph_x_update_authorizer( + struct ceph_auth_client *ac, int peer_type, + struct ceph_auth_handshake *auth) +{ + struct ceph_x_authorizer *au; + struct ceph_x_ticket_handler *th; + + th = get_ticket_handler(ac, peer_type); + if (IS_ERR(th)) + return PTR_ERR(th); + + au = (struct ceph_x_authorizer *)auth->authorizer; + if (au->secret_id < th->secret_id) { + dout("ceph_x_update_authorizer service %u secret %llu < %llu\n", + au->service, au->secret_id, th->secret_id); + return ceph_x_build_authorizer(ac, th, au); + } + return 0; +} + static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_authorizer *a, size_t len) { @@ -630,7 +651,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, th = get_ticket_handler(ac, peer_type); if (!IS_ERR(th)) - remove_ticket_handler(ac, th); + memset(&th->validity, 0, sizeof(th->validity)); } @@ -641,6 +662,7 @@ static const struct ceph_auth_client_ops ceph_x_ops = { .build_request = ceph_x_build_request, .handle_reply = ceph_x_handle_reply, .create_authorizer = ceph_x_create_authorizer, + .update_authorizer = ceph_x_update_authorizer, .verify_authorizer_reply = ceph_x_verify_authorizer_reply, .destroy_authorizer = ceph_x_destroy_authorizer, .invalidate_authorizer = ceph_x_invalidate_authorizer, diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h index f459e93b774..c5a058da7ac 100644 --- a/net/ceph/auth_x.h +++ b/net/ceph/auth_x.h @@ -29,6 +29,7 @@ struct ceph_x_authorizer { struct ceph_buffer *buf; unsigned int service; u64 nonce; + u64 secret_id; char reply_buf[128]; /* big enough for encrypted blob */ }; diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index e65e6e4be38..34b11ee8124 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -606,11 +606,17 @@ static int __init init_ceph_lib(void) if (ret < 0) goto out_crypto; + ret = ceph_osdc_setup(); + if (ret < 0) + goto out_msgr; + pr_info("loaded (mon/osd proto %d/%d)\n", CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL); return 0; +out_msgr: + ceph_msgr_exit(); out_crypto: ceph_crypto_shutdown(); out_debugfs: @@ -622,6 +628,7 @@ out: static void __exit exit_ceph_lib(void) { dout("exit_ceph_lib\n"); + ceph_osdc_cleanup(); ceph_msgr_exit(); ceph_crypto_shutdown(); ceph_debugfs_cleanup(); diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 00d051f4894..83661cdc076 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -123,8 +123,8 @@ static int osdc_show(struct seq_file *s, void *pp) mutex_lock(&osdc->request_mutex); for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { struct ceph_osd_request *req; + unsigned int i; int opcode; - int i; req = rb_entry(p, struct ceph_osd_request, r_node); @@ -142,7 +142,7 @@ static int osdc_show(struct seq_file *s, void *pp) seq_printf(s, "\t"); for (i = 0; i < req->r_num_ops; i++) { - opcode = le16_to_cpu(req->r_request_ops[i].op); + opcode = req->r_ops[i].op; seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); } diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 2c0669fb54e..eb0a46a49bd 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -21,6 +21,9 @@ #include <linux/ceph/pagelist.h> #include <linux/export.h> +#define list_entry_next(pos, member) \ + list_entry(pos->member.next, typeof(*pos), member) + /* * Ceph uses the messenger to exchange ceph_msg messages with other * hosts in the system. The messenger provides ordered and reliable @@ -149,6 +152,11 @@ static bool con_flag_test_and_set(struct ceph_connection *con, return test_and_set_bit(con_flag, &con->flags); } +/* Slab caches for frequently-allocated structures */ + +static struct kmem_cache *ceph_msg_cache; +static struct kmem_cache *ceph_msg_data_cache; + /* static tag bytes (protocol control messages) */ static char tag_msg = CEPH_MSGR_TAG_MSG; static char tag_ack = CEPH_MSGR_TAG_ACK; @@ -223,6 +231,41 @@ static void encode_my_addr(struct ceph_messenger *msgr) */ static struct workqueue_struct *ceph_msgr_wq; +static int ceph_msgr_slab_init(void) +{ + BUG_ON(ceph_msg_cache); + ceph_msg_cache = kmem_cache_create("ceph_msg", + sizeof (struct ceph_msg), + __alignof__(struct ceph_msg), 0, NULL); + + if (!ceph_msg_cache) + return -ENOMEM; + + BUG_ON(ceph_msg_data_cache); + ceph_msg_data_cache = kmem_cache_create("ceph_msg_data", + sizeof (struct ceph_msg_data), + __alignof__(struct ceph_msg_data), + 0, NULL); + if (ceph_msg_data_cache) + return 0; + + kmem_cache_destroy(ceph_msg_cache); + ceph_msg_cache = NULL; + + return -ENOMEM; +} + +static void ceph_msgr_slab_exit(void) +{ + BUG_ON(!ceph_msg_data_cache); + kmem_cache_destroy(ceph_msg_data_cache); + ceph_msg_data_cache = NULL; + + BUG_ON(!ceph_msg_cache); + kmem_cache_destroy(ceph_msg_cache); + ceph_msg_cache = NULL; +} + static void _ceph_msgr_exit(void) { if (ceph_msgr_wq) { @@ -230,6 +273,8 @@ static void _ceph_msgr_exit(void) ceph_msgr_wq = NULL; } + ceph_msgr_slab_exit(); + BUG_ON(zero_page == NULL); kunmap(zero_page); page_cache_release(zero_page); @@ -242,6 +287,9 @@ int ceph_msgr_init(void) zero_page = ZERO_PAGE(0); page_cache_get(zero_page); + if (ceph_msgr_slab_init()) + return -ENOMEM; + ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0); if (ceph_msgr_wq) return 0; @@ -471,6 +519,22 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) return r; } +static int ceph_tcp_recvpage(struct socket *sock, struct page *page, + int page_offset, size_t length) +{ + void *kaddr; + int ret; + + BUG_ON(page_offset + length > PAGE_SIZE); + + kaddr = kmap(page); + BUG_ON(!kaddr); + ret = ceph_tcp_recvmsg(sock, kaddr + page_offset, length); + kunmap(page); + + return ret; +} + /* * write something. @more is true if caller will be sending more data * shortly. @@ -493,7 +557,7 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, } static int ceph_tcp_sendpage(struct socket *sock, struct page *page, - int offset, size_t size, int more) + int offset, size_t size, bool more) { int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR); int ret; @@ -697,50 +761,397 @@ static void con_out_kvec_add(struct ceph_connection *con, } #ifdef CONFIG_BLOCK -static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg) + +/* + * For a bio data item, a piece is whatever remains of the next + * entry in the current bio iovec, or the first entry in the next + * bio in the list. + */ +static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor, + size_t length) { - if (!bio) { - *iter = NULL; - *seg = 0; - return; + struct ceph_msg_data *data = cursor->data; + struct bio *bio; + + BUG_ON(data->type != CEPH_MSG_DATA_BIO); + + bio = data->bio; + BUG_ON(!bio); + BUG_ON(!bio->bi_vcnt); + + cursor->resid = min(length, data->bio_length); + cursor->bio = bio; + cursor->vector_index = 0; + cursor->vector_offset = 0; + cursor->last_piece = length <= bio->bi_io_vec[0].bv_len; +} + +static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor, + size_t *page_offset, + size_t *length) +{ + struct ceph_msg_data *data = cursor->data; + struct bio *bio; + struct bio_vec *bio_vec; + unsigned int index; + + BUG_ON(data->type != CEPH_MSG_DATA_BIO); + + bio = cursor->bio; + BUG_ON(!bio); + + index = cursor->vector_index; + BUG_ON(index >= (unsigned int) bio->bi_vcnt); + + bio_vec = &bio->bi_io_vec[index]; + BUG_ON(cursor->vector_offset >= bio_vec->bv_len); + *page_offset = (size_t) (bio_vec->bv_offset + cursor->vector_offset); + BUG_ON(*page_offset >= PAGE_SIZE); + if (cursor->last_piece) /* pagelist offset is always 0 */ + *length = cursor->resid; + else + *length = (size_t) (bio_vec->bv_len - cursor->vector_offset); + BUG_ON(*length > cursor->resid); + BUG_ON(*page_offset + *length > PAGE_SIZE); + + return bio_vec->bv_page; +} + +static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor, + size_t bytes) +{ + struct bio *bio; + struct bio_vec *bio_vec; + unsigned int index; + + BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO); + + bio = cursor->bio; + BUG_ON(!bio); + + index = cursor->vector_index; + BUG_ON(index >= (unsigned int) bio->bi_vcnt); + bio_vec = &bio->bi_io_vec[index]; + + /* Advance the cursor offset */ + + BUG_ON(cursor->resid < bytes); + cursor->resid -= bytes; + cursor->vector_offset += bytes; + if (cursor->vector_offset < bio_vec->bv_len) + return false; /* more bytes to process in this segment */ + BUG_ON(cursor->vector_offset != bio_vec->bv_len); + + /* Move on to the next segment, and possibly the next bio */ + + if (++index == (unsigned int) bio->bi_vcnt) { + bio = bio->bi_next; + index = 0; } - *iter = bio; - *seg = bio->bi_idx; + cursor->bio = bio; + cursor->vector_index = index; + cursor->vector_offset = 0; + + if (!cursor->last_piece) { + BUG_ON(!cursor->resid); + BUG_ON(!bio); + /* A short read is OK, so use <= rather than == */ + if (cursor->resid <= bio->bi_io_vec[index].bv_len) + cursor->last_piece = true; + } + + return true; } +#endif /* CONFIG_BLOCK */ -static void iter_bio_next(struct bio **bio_iter, int *seg) +/* + * For a page array, a piece comes from the first page in the array + * that has not already been fully consumed. + */ +static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor, + size_t length) { - if (*bio_iter == NULL) - return; + struct ceph_msg_data *data = cursor->data; + int page_count; + + BUG_ON(data->type != CEPH_MSG_DATA_PAGES); - BUG_ON(*seg >= (*bio_iter)->bi_vcnt); + BUG_ON(!data->pages); + BUG_ON(!data->length); - (*seg)++; - if (*seg == (*bio_iter)->bi_vcnt) - init_bio_iter((*bio_iter)->bi_next, bio_iter, seg); + cursor->resid = min(length, data->length); + page_count = calc_pages_for(data->alignment, (u64)data->length); + cursor->page_offset = data->alignment & ~PAGE_MASK; + cursor->page_index = 0; + BUG_ON(page_count > (int)USHRT_MAX); + cursor->page_count = (unsigned short)page_count; + BUG_ON(length > SIZE_MAX - cursor->page_offset); + cursor->last_piece = (size_t)cursor->page_offset + length <= PAGE_SIZE; } -#endif -static void prepare_write_message_data(struct ceph_connection *con) +static struct page * +ceph_msg_data_pages_next(struct ceph_msg_data_cursor *cursor, + size_t *page_offset, size_t *length) { - struct ceph_msg *msg = con->out_msg; + struct ceph_msg_data *data = cursor->data; - BUG_ON(!msg); - BUG_ON(!msg->hdr.data_len); + BUG_ON(data->type != CEPH_MSG_DATA_PAGES); + + BUG_ON(cursor->page_index >= cursor->page_count); + BUG_ON(cursor->page_offset >= PAGE_SIZE); + + *page_offset = cursor->page_offset; + if (cursor->last_piece) + *length = cursor->resid; + else + *length = PAGE_SIZE - *page_offset; + + return data->pages[cursor->page_index]; +} + +static bool ceph_msg_data_pages_advance(struct ceph_msg_data_cursor *cursor, + size_t bytes) +{ + BUG_ON(cursor->data->type != CEPH_MSG_DATA_PAGES); + + BUG_ON(cursor->page_offset + bytes > PAGE_SIZE); + + /* Advance the cursor page offset */ + + cursor->resid -= bytes; + cursor->page_offset = (cursor->page_offset + bytes) & ~PAGE_MASK; + if (!bytes || cursor->page_offset) + return false; /* more bytes to process in the current page */ + + /* Move on to the next page; offset is already at 0 */ + + BUG_ON(cursor->page_index >= cursor->page_count); + cursor->page_index++; + cursor->last_piece = cursor->resid <= PAGE_SIZE; + + return true; +} + +/* + * For a pagelist, a piece is whatever remains to be consumed in the + * first page in the list, or the front of the next page. + */ +static void +ceph_msg_data_pagelist_cursor_init(struct ceph_msg_data_cursor *cursor, + size_t length) +{ + struct ceph_msg_data *data = cursor->data; + struct ceph_pagelist *pagelist; + struct page *page; + + BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST); + + pagelist = data->pagelist; + BUG_ON(!pagelist); + + if (!length) + return; /* pagelist can be assigned but empty */ + + BUG_ON(list_empty(&pagelist->head)); + page = list_first_entry(&pagelist->head, struct page, lru); + + cursor->resid = min(length, pagelist->length); + cursor->page = page; + cursor->offset = 0; + cursor->last_piece = cursor->resid <= PAGE_SIZE; +} + +static struct page * +ceph_msg_data_pagelist_next(struct ceph_msg_data_cursor *cursor, + size_t *page_offset, size_t *length) +{ + struct ceph_msg_data *data = cursor->data; + struct ceph_pagelist *pagelist; + + BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST); - /* initialize page iterator */ - con->out_msg_pos.page = 0; - if (msg->pages) - con->out_msg_pos.page_pos = msg->page_alignment; + pagelist = data->pagelist; + BUG_ON(!pagelist); + + BUG_ON(!cursor->page); + BUG_ON(cursor->offset + cursor->resid != pagelist->length); + + /* offset of first page in pagelist is always 0 */ + *page_offset = cursor->offset & ~PAGE_MASK; + if (cursor->last_piece) + *length = cursor->resid; else - con->out_msg_pos.page_pos = 0; + *length = PAGE_SIZE - *page_offset; + + return cursor->page; +} + +static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor, + size_t bytes) +{ + struct ceph_msg_data *data = cursor->data; + struct ceph_pagelist *pagelist; + + BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST); + + pagelist = data->pagelist; + BUG_ON(!pagelist); + + BUG_ON(cursor->offset + cursor->resid != pagelist->length); + BUG_ON((cursor->offset & ~PAGE_MASK) + bytes > PAGE_SIZE); + + /* Advance the cursor offset */ + + cursor->resid -= bytes; + cursor->offset += bytes; + /* offset of first page in pagelist is always 0 */ + if (!bytes || cursor->offset & ~PAGE_MASK) + return false; /* more bytes to process in the current page */ + + /* Move on to the next page */ + + BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head)); + cursor->page = list_entry_next(cursor->page, lru); + cursor->last_piece = cursor->resid <= PAGE_SIZE; + + return true; +} + +/* + * Message data is handled (sent or received) in pieces, where each + * piece resides on a single page. The network layer might not + * consume an entire piece at once. A data item's cursor keeps + * track of which piece is next to process and how much remains to + * be processed in that piece. It also tracks whether the current + * piece is the last one in the data item. + */ +static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor) +{ + size_t length = cursor->total_resid; + + switch (cursor->data->type) { + case CEPH_MSG_DATA_PAGELIST: + ceph_msg_data_pagelist_cursor_init(cursor, length); + break; + case CEPH_MSG_DATA_PAGES: + ceph_msg_data_pages_cursor_init(cursor, length); + break; #ifdef CONFIG_BLOCK - if (msg->bio) - init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg); -#endif - con->out_msg_pos.data_pos = 0; - con->out_msg_pos.did_page_crc = false; - con->out_more = 1; /* data + footer will follow */ + case CEPH_MSG_DATA_BIO: + ceph_msg_data_bio_cursor_init(cursor, length); + break; +#endif /* CONFIG_BLOCK */ + case CEPH_MSG_DATA_NONE: + default: + /* BUG(); */ + break; + } + cursor->need_crc = true; +} + +static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length) +{ + struct ceph_msg_data_cursor *cursor = &msg->cursor; + struct ceph_msg_data *data; + + BUG_ON(!length); + BUG_ON(length > msg->data_length); + BUG_ON(list_empty(&msg->data)); + + cursor->data_head = &msg->data; + cursor->total_resid = length; + data = list_first_entry(&msg->data, struct ceph_msg_data, links); + cursor->data = data; + + __ceph_msg_data_cursor_init(cursor); +} + +/* + * Return the page containing the next piece to process for a given + * data item, and supply the page offset and length of that piece. + * Indicate whether this is the last piece in this data item. + */ +static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, + size_t *page_offset, size_t *length, + bool *last_piece) +{ + struct page *page; + + switch (cursor->data->type) { + case CEPH_MSG_DATA_PAGELIST: + page = ceph_msg_data_pagelist_next(cursor, page_offset, length); + break; + case CEPH_MSG_DATA_PAGES: + page = ceph_msg_data_pages_next(cursor, page_offset, length); + break; +#ifdef CONFIG_BLOCK + case CEPH_MSG_DATA_BIO: + page = ceph_msg_data_bio_next(cursor, page_offset, length); + break; +#endif /* CONFIG_BLOCK */ + case CEPH_MSG_DATA_NONE: + default: + page = NULL; + break; + } + BUG_ON(!page); + BUG_ON(*page_offset + *length > PAGE_SIZE); + BUG_ON(!*length); + if (last_piece) + *last_piece = cursor->last_piece; + + return page; +} + +/* + * Returns true if the result moves the cursor on to the next piece + * of the data item. + */ +static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, + size_t bytes) +{ + bool new_piece; + + BUG_ON(bytes > cursor->resid); + switch (cursor->data->type) { + case CEPH_MSG_DATA_PAGELIST: + new_piece = ceph_msg_data_pagelist_advance(cursor, bytes); + break; + case CEPH_MSG_DATA_PAGES: + new_piece = ceph_msg_data_pages_advance(cursor, bytes); + break; +#ifdef CONFIG_BLOCK + case CEPH_MSG_DATA_BIO: + new_piece = ceph_msg_data_bio_advance(cursor, bytes); + break; +#endif /* CONFIG_BLOCK */ + case CEPH_MSG_DATA_NONE: + default: + BUG(); + break; + } + cursor->total_resid -= bytes; + + if (!cursor->resid && cursor->total_resid) { + WARN_ON(!cursor->last_piece); + BUG_ON(list_is_last(&cursor->data->links, cursor->data_head)); + cursor->data = list_entry_next(cursor->data, links); + __ceph_msg_data_cursor_init(cursor); + new_piece = true; + } + cursor->need_crc = new_piece; + + return new_piece; +} + +static void prepare_message_data(struct ceph_msg *msg, u32 data_len) +{ + BUG_ON(!msg); + BUG_ON(!data_len); + + /* Initialize data cursor */ + + ceph_msg_data_cursor_init(msg, (size_t)data_len); } /* @@ -803,16 +1214,12 @@ static void prepare_write_message(struct ceph_connection *con) m->hdr.seq = cpu_to_le64(++con->out_seq); m->needs_out_seq = false; } -#ifdef CONFIG_BLOCK - else - m->bio_iter = NULL; -#endif + WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len)); - dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", + dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n", m, con->out_seq, le16_to_cpu(m->hdr.type), le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len), - le32_to_cpu(m->hdr.data_len), - m->nr_pages); + m->data_length); BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len); /* tag + hdr + front + middle */ @@ -843,11 +1250,13 @@ static void prepare_write_message(struct ceph_connection *con) /* is there a data payload? */ con->out_msg->footer.data_crc = 0; - if (m->hdr.data_len) - prepare_write_message_data(con); - else + if (m->data_length) { + prepare_message_data(con->out_msg, m->data_length); + con->out_more = 1; /* data + footer will follow */ + } else { /* no, queue up footer too and be done */ prepare_write_message_footer(con); + } con_flag_set(con, CON_FLAG_WRITE_PENDING); } @@ -874,6 +1283,24 @@ static void prepare_write_ack(struct ceph_connection *con) } /* + * Prepare to share the seq during handshake + */ +static void prepare_write_seq(struct ceph_connection *con) +{ + dout("prepare_write_seq %p %llu -> %llu\n", con, + con->in_seq_acked, con->in_seq); + con->in_seq_acked = con->in_seq; + + con_out_kvec_reset(con); + + con->out_temp_ack = cpu_to_le64(con->in_seq_acked); + con_out_kvec_add(con, sizeof (con->out_temp_ack), + &con->out_temp_ack); + + con_flag_set(con, CON_FLAG_WRITE_PENDING); +} + +/* * Prepare to write keepalive byte. */ static void prepare_write_keepalive(struct ceph_connection *con) @@ -1022,35 +1449,19 @@ out: return ret; /* done! */ } -static void out_msg_pos_next(struct ceph_connection *con, struct page *page, - size_t len, size_t sent, bool in_trail) +static u32 ceph_crc32c_page(u32 crc, struct page *page, + unsigned int page_offset, + unsigned int length) { - struct ceph_msg *msg = con->out_msg; + char *kaddr; - BUG_ON(!msg); - BUG_ON(!sent); - - con->out_msg_pos.data_pos += sent; - con->out_msg_pos.page_pos += sent; - if (sent < len) - return; + kaddr = kmap(page); + BUG_ON(kaddr == NULL); + crc = crc32c(crc, kaddr + page_offset, length); + kunmap(page); - BUG_ON(sent != len); - con->out_msg_pos.page_pos = 0; - con->out_msg_pos.page++; - con->out_msg_pos.did_page_crc = false; - if (in_trail) - list_move_tail(&page->lru, - &msg->trail->head); - else if (msg->pagelist) - list_move_tail(&page->lru, - &msg->pagelist->head); -#ifdef CONFIG_BLOCK - else if (msg->bio) - iter_bio_next(&msg->bio_iter, &msg->bio_seg); -#endif + return crc; } - /* * Write as much message data payload as we can. If we finish, queue * up the footer. @@ -1058,21 +1469,17 @@ static void out_msg_pos_next(struct ceph_connection *con, struct page *page, * 0 -> socket full, but more to do * <0 -> error */ -static int write_partial_msg_pages(struct ceph_connection *con) +static int write_partial_message_data(struct ceph_connection *con) { struct ceph_msg *msg = con->out_msg; - unsigned int data_len = le32_to_cpu(msg->hdr.data_len); - size_t len; + struct ceph_msg_data_cursor *cursor = &msg->cursor; bool do_datacrc = !con->msgr->nocrc; - int ret; - int total_max_write; - bool in_trail = false; - const size_t trail_len = (msg->trail ? msg->trail->length : 0); - const size_t trail_off = data_len - trail_len; + u32 crc; - dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", - con, msg, con->out_msg_pos.page, msg->nr_pages, - con->out_msg_pos.page_pos); + dout("%s %p msg %p\n", __func__, con, msg); + + if (list_empty(&msg->data)) + return -EINVAL; /* * Iterate through each page that contains data to be @@ -1082,72 +1489,41 @@ static int write_partial_msg_pages(struct ceph_connection *con) * need to map the page. If we have no pages, they have * been revoked, so use the zero page. */ - while (data_len > con->out_msg_pos.data_pos) { - struct page *page = NULL; - int max_write = PAGE_SIZE; - int bio_offset = 0; - - in_trail = in_trail || con->out_msg_pos.data_pos >= trail_off; - if (!in_trail) - total_max_write = trail_off - con->out_msg_pos.data_pos; - - if (in_trail) { - total_max_write = data_len - con->out_msg_pos.data_pos; - - page = list_first_entry(&msg->trail->head, - struct page, lru); - } else if (msg->pages) { - page = msg->pages[con->out_msg_pos.page]; - } else if (msg->pagelist) { - page = list_first_entry(&msg->pagelist->head, - struct page, lru); -#ifdef CONFIG_BLOCK - } else if (msg->bio) { - struct bio_vec *bv; + crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0; + while (cursor->resid) { + struct page *page; + size_t page_offset; + size_t length; + bool last_piece; + bool need_crc; + int ret; - bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg); - page = bv->bv_page; - bio_offset = bv->bv_offset; - max_write = bv->bv_len; -#endif - } else { - page = zero_page; - } - len = min_t(int, max_write - con->out_msg_pos.page_pos, - total_max_write); - - if (do_datacrc && !con->out_msg_pos.did_page_crc) { - void *base; - u32 crc = le32_to_cpu(msg->footer.data_crc); - char *kaddr; - - kaddr = kmap(page); - BUG_ON(kaddr == NULL); - base = kaddr + con->out_msg_pos.page_pos + bio_offset; - crc = crc32c(crc, base, len); - kunmap(page); - msg->footer.data_crc = cpu_to_le32(crc); - con->out_msg_pos.did_page_crc = true; - } - ret = ceph_tcp_sendpage(con->sock, page, - con->out_msg_pos.page_pos + bio_offset, - len, 1); - if (ret <= 0) - goto out; + page = ceph_msg_data_next(&msg->cursor, &page_offset, &length, + &last_piece); + ret = ceph_tcp_sendpage(con->sock, page, page_offset, + length, last_piece); + if (ret <= 0) { + if (do_datacrc) + msg->footer.data_crc = cpu_to_le32(crc); - out_msg_pos_next(con, page, len, (size_t) ret, in_trail); + return ret; + } + if (do_datacrc && cursor->need_crc) + crc = ceph_crc32c_page(crc, page, page_offset, length); + need_crc = ceph_msg_data_advance(&msg->cursor, (size_t)ret); } - dout("write_partial_msg_pages %p msg %p done\n", con, msg); + dout("%s %p msg %p done\n", __func__, con, msg); /* prepare and queue up footer, too */ - if (!do_datacrc) + if (do_datacrc) + msg->footer.data_crc = cpu_to_le32(crc); + else msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; con_out_kvec_reset(con); prepare_write_message_footer(con); - ret = 1; -out: - return ret; + + return 1; /* must return > 0 to indicate success */ } /* @@ -1160,7 +1536,7 @@ static int write_partial_skip(struct ceph_connection *con) while (con->out_skip > 0) { size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE); - ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, 1); + ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true); if (ret <= 0) goto out; con->out_skip -= ret; @@ -1191,6 +1567,13 @@ static void prepare_read_ack(struct ceph_connection *con) con->in_base_pos = 0; } +static void prepare_read_seq(struct ceph_connection *con) +{ + dout("prepare_read_seq %p\n", con); + con->in_base_pos = 0; + con->in_tag = CEPH_MSGR_TAG_SEQ; +} + static void prepare_read_tag(struct ceph_connection *con) { dout("prepare_read_tag %p\n", con); @@ -1597,7 +1980,6 @@ static int process_connect(struct ceph_connection *con) con->error_msg = "connect authorization failure"; return -1; } - con->auth_retry = 1; con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) @@ -1668,6 +2050,7 @@ static int process_connect(struct ceph_connection *con) prepare_read_connect(con); break; + case CEPH_MSGR_TAG_SEQ: case CEPH_MSGR_TAG_READY: if (req_feat & ~server_feat) { pr_err("%s%lld %s protocol feature mismatch," @@ -1682,7 +2065,7 @@ static int process_connect(struct ceph_connection *con) WARN_ON(con->state != CON_STATE_NEGOTIATING); con->state = CON_STATE_OPEN; - + con->auth_retry = 0; /* we authenticated; clear flag */ con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); con->connect_seq++; con->peer_features = server_feat; @@ -1698,7 +2081,12 @@ static int process_connect(struct ceph_connection *con) con->delay = 0; /* reset backoff memory */ - prepare_read_tag(con); + if (con->in_reply.tag == CEPH_MSGR_TAG_SEQ) { + prepare_write_seq(con); + prepare_read_seq(con); + } else { + prepare_read_tag(con); + } break; case CEPH_MSGR_TAG_WAIT: @@ -1732,7 +2120,6 @@ static int read_partial_ack(struct ceph_connection *con) return read_partial(con, end, size, &con->in_temp_ack); } - /* * We can finally discard anything that's been acked. */ @@ -1757,8 +2144,6 @@ static void process_ack(struct ceph_connection *con) } - - static int read_partial_message_section(struct ceph_connection *con, struct kvec *section, unsigned int sec_len, u32 *crc) @@ -1782,77 +2167,49 @@ static int read_partial_message_section(struct ceph_connection *con, return 1; } -static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip); - -static int read_partial_message_pages(struct ceph_connection *con, - struct page **pages, - unsigned int data_len, bool do_datacrc) +static int read_partial_msg_data(struct ceph_connection *con) { - void *p; + struct ceph_msg *msg = con->in_msg; + struct ceph_msg_data_cursor *cursor = &msg->cursor; + const bool do_datacrc = !con->msgr->nocrc; + struct page *page; + size_t page_offset; + size_t length; + u32 crc = 0; int ret; - int left; - left = min((int)(data_len - con->in_msg_pos.data_pos), - (int)(PAGE_SIZE - con->in_msg_pos.page_pos)); - /* (page) data */ - BUG_ON(pages == NULL); - p = kmap(pages[con->in_msg_pos.page]); - ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, - left); - if (ret > 0 && do_datacrc) - con->in_data_crc = - crc32c(con->in_data_crc, - p + con->in_msg_pos.page_pos, ret); - kunmap(pages[con->in_msg_pos.page]); - if (ret <= 0) - return ret; - con->in_msg_pos.data_pos += ret; - con->in_msg_pos.page_pos += ret; - if (con->in_msg_pos.page_pos == PAGE_SIZE) { - con->in_msg_pos.page_pos = 0; - con->in_msg_pos.page++; - } - - return ret; -} - -#ifdef CONFIG_BLOCK -static int read_partial_message_bio(struct ceph_connection *con, - struct bio **bio_iter, int *bio_seg, - unsigned int data_len, bool do_datacrc) -{ - struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg); - void *p; - int ret, left; + BUG_ON(!msg); + if (list_empty(&msg->data)) + return -EIO; - left = min((int)(data_len - con->in_msg_pos.data_pos), - (int)(bv->bv_len - con->in_msg_pos.page_pos)); + if (do_datacrc) + crc = con->in_data_crc; + while (cursor->resid) { + page = ceph_msg_data_next(&msg->cursor, &page_offset, &length, + NULL); + ret = ceph_tcp_recvpage(con->sock, page, page_offset, length); + if (ret <= 0) { + if (do_datacrc) + con->in_data_crc = crc; - p = kmap(bv->bv_page) + bv->bv_offset; + return ret; + } - ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, - left); - if (ret > 0 && do_datacrc) - con->in_data_crc = - crc32c(con->in_data_crc, - p + con->in_msg_pos.page_pos, ret); - kunmap(bv->bv_page); - if (ret <= 0) - return ret; - con->in_msg_pos.data_pos += ret; - con->in_msg_pos.page_pos += ret; - if (con->in_msg_pos.page_pos == bv->bv_len) { - con->in_msg_pos.page_pos = 0; - iter_bio_next(bio_iter, bio_seg); + if (do_datacrc) + crc = ceph_crc32c_page(crc, page, page_offset, ret); + (void) ceph_msg_data_advance(&msg->cursor, (size_t)ret); } + if (do_datacrc) + con->in_data_crc = crc; - return ret; + return 1; /* must return > 0 to indicate success */ } -#endif /* * read (part of) a message. */ +static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip); + static int read_partial_message(struct ceph_connection *con) { struct ceph_msg *m = con->in_msg; @@ -1885,7 +2242,7 @@ static int read_partial_message(struct ceph_connection *con) if (front_len > CEPH_MSG_MAX_FRONT_LEN) return -EIO; middle_len = le32_to_cpu(con->in_hdr.middle_len); - if (middle_len > CEPH_MSG_MAX_DATA_LEN) + if (middle_len > CEPH_MSG_MAX_MIDDLE_LEN) return -EIO; data_len = le32_to_cpu(con->in_hdr.data_len); if (data_len > CEPH_MSG_MAX_DATA_LEN) @@ -1914,14 +2271,22 @@ static int read_partial_message(struct ceph_connection *con) int skip = 0; dout("got hdr type %d front %d data %d\n", con->in_hdr.type, - con->in_hdr.front_len, con->in_hdr.data_len); + front_len, data_len); ret = ceph_con_in_msg_alloc(con, &skip); if (ret < 0) return ret; + + BUG_ON(!con->in_msg ^ skip); + if (con->in_msg && data_len > con->in_msg->data_length) { + pr_warning("%s skipping long message (%u > %zd)\n", + __func__, data_len, con->in_msg->data_length); + ceph_msg_put(con->in_msg); + con->in_msg = NULL; + skip = 1; + } if (skip) { /* skip this message */ dout("alloc_msg said skip message\n"); - BUG_ON(con->in_msg); con->in_base_pos = -front_len - middle_len - data_len - sizeof(m->footer); con->in_tag = CEPH_MSGR_TAG_READY; @@ -1936,17 +2301,10 @@ static int read_partial_message(struct ceph_connection *con) if (m->middle) m->middle->vec.iov_len = 0; - con->in_msg_pos.page = 0; - if (m->pages) - con->in_msg_pos.page_pos = m->page_alignment; - else - con->in_msg_pos.page_pos = 0; - con->in_msg_pos.data_pos = 0; + /* prepare for data payload, if any */ -#ifdef CONFIG_BLOCK - if (m->bio) - init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg); -#endif + if (data_len) + prepare_message_data(con->in_msg, data_len); } /* front */ @@ -1965,24 +2323,10 @@ static int read_partial_message(struct ceph_connection *con) } /* (page) data */ - while (con->in_msg_pos.data_pos < data_len) { - if (m->pages) { - ret = read_partial_message_pages(con, m->pages, - data_len, do_datacrc); - if (ret <= 0) - return ret; -#ifdef CONFIG_BLOCK - } else if (m->bio) { - BUG_ON(!m->bio_iter); - ret = read_partial_message_bio(con, - &m->bio_iter, &m->bio_seg, - data_len, do_datacrc); - if (ret <= 0) - return ret; -#endif - } else { - BUG_ON(1); - } + if (data_len) { + ret = read_partial_msg_data(con); + if (ret <= 0) + return ret; } /* footer */ @@ -2108,13 +2452,13 @@ more_kvec: goto do_next; } - ret = write_partial_msg_pages(con); + ret = write_partial_message_data(con); if (ret == 1) goto more_kvec; /* we need to send the footer, too! */ if (ret == 0) goto out; if (ret < 0) { - dout("try_write write_partial_msg_pages err %d\n", + dout("try_write write_partial_message_data err %d\n", ret); goto out; } @@ -2266,7 +2610,12 @@ more: prepare_read_tag(con); goto more; } - if (con->in_tag == CEPH_MSGR_TAG_ACK) { + if (con->in_tag == CEPH_MSGR_TAG_ACK || + con->in_tag == CEPH_MSGR_TAG_SEQ) { + /* + * the final handshake seq exchange is semantically + * equivalent to an ACK + */ ret = read_partial_ack(con); if (ret <= 0) goto out; @@ -2672,6 +3021,88 @@ void ceph_con_keepalive(struct ceph_connection *con) } EXPORT_SYMBOL(ceph_con_keepalive); +static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type) +{ + struct ceph_msg_data *data; + + if (WARN_ON(!ceph_msg_data_type_valid(type))) + return NULL; + + data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS); + if (data) + data->type = type; + INIT_LIST_HEAD(&data->links); + + return data; +} + +static void ceph_msg_data_destroy(struct ceph_msg_data *data) +{ + if (!data) + return; + + WARN_ON(!list_empty(&data->links)); + if (data->type == CEPH_MSG_DATA_PAGELIST) { + ceph_pagelist_release(data->pagelist); + kfree(data->pagelist); + } + kmem_cache_free(ceph_msg_data_cache, data); +} + +void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, + size_t length, size_t alignment) +{ + struct ceph_msg_data *data; + + BUG_ON(!pages); + BUG_ON(!length); + + data = ceph_msg_data_create(CEPH_MSG_DATA_PAGES); + BUG_ON(!data); + data->pages = pages; + data->length = length; + data->alignment = alignment & ~PAGE_MASK; + + list_add_tail(&data->links, &msg->data); + msg->data_length += length; +} +EXPORT_SYMBOL(ceph_msg_data_add_pages); + +void ceph_msg_data_add_pagelist(struct ceph_msg *msg, + struct ceph_pagelist *pagelist) +{ + struct ceph_msg_data *data; + + BUG_ON(!pagelist); + BUG_ON(!pagelist->length); + + data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST); + BUG_ON(!data); + data->pagelist = pagelist; + + list_add_tail(&data->links, &msg->data); + msg->data_length += pagelist->length; +} +EXPORT_SYMBOL(ceph_msg_data_add_pagelist); + +#ifdef CONFIG_BLOCK +void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio, + size_t length) +{ + struct ceph_msg_data *data; + + BUG_ON(!bio); + + data = ceph_msg_data_create(CEPH_MSG_DATA_BIO); + BUG_ON(!data); + data->bio = bio; + data->bio_length = length; + + list_add_tail(&data->links, &msg->data); + msg->data_length += length; +} +EXPORT_SYMBOL(ceph_msg_data_add_bio); +#endif /* CONFIG_BLOCK */ /* * construct a new message with given type, size @@ -2682,49 +3113,20 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, { struct ceph_msg *m; - m = kmalloc(sizeof(*m), flags); + m = kmem_cache_zalloc(ceph_msg_cache, flags); if (m == NULL) goto out; - kref_init(&m->kref); - m->con = NULL; - INIT_LIST_HEAD(&m->list_head); - - m->hdr.tid = 0; m->hdr.type = cpu_to_le16(type); m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); - m->hdr.version = 0; m->hdr.front_len = cpu_to_le32(front_len); - m->hdr.middle_len = 0; - m->hdr.data_len = 0; - m->hdr.data_off = 0; - m->hdr.reserved = 0; - m->footer.front_crc = 0; - m->footer.middle_crc = 0; - m->footer.data_crc = 0; - m->footer.flags = 0; - m->front_max = front_len; - m->front_is_vmalloc = false; - m->more_to_follow = false; - m->ack_stamp = 0; - m->pool = NULL; - - /* middle */ - m->middle = NULL; - /* data */ - m->nr_pages = 0; - m->page_alignment = 0; - m->pages = NULL; - m->pagelist = NULL; -#ifdef CONFIG_BLOCK - m->bio = NULL; - m->bio_iter = NULL; - m->bio_seg = 0; -#endif /* CONFIG_BLOCK */ - m->trail = NULL; + INIT_LIST_HEAD(&m->list_head); + kref_init(&m->kref); + INIT_LIST_HEAD(&m->data); /* front */ + m->front_max = front_len; if (front_len) { if (front_len > PAGE_CACHE_SIZE) { m->front.iov_base = __vmalloc(front_len, flags, @@ -2802,49 +3204,37 @@ static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg) static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip) { struct ceph_msg_header *hdr = &con->in_hdr; - int type = le16_to_cpu(hdr->type); - int front_len = le32_to_cpu(hdr->front_len); int middle_len = le32_to_cpu(hdr->middle_len); + struct ceph_msg *msg; int ret = 0; BUG_ON(con->in_msg != NULL); + BUG_ON(!con->ops->alloc_msg); - if (con->ops->alloc_msg) { - struct ceph_msg *msg; - - mutex_unlock(&con->mutex); - msg = con->ops->alloc_msg(con, hdr, skip); - mutex_lock(&con->mutex); - if (con->state != CON_STATE_OPEN) { - if (msg) - ceph_msg_put(msg); - return -EAGAIN; - } - con->in_msg = msg; - if (con->in_msg) { - con->in_msg->con = con->ops->get(con); - BUG_ON(con->in_msg->con == NULL); - } - if (*skip) { - con->in_msg = NULL; - return 0; - } - if (!con->in_msg) { - con->error_msg = - "error allocating memory for incoming message"; - return -ENOMEM; - } + mutex_unlock(&con->mutex); + msg = con->ops->alloc_msg(con, hdr, skip); + mutex_lock(&con->mutex); + if (con->state != CON_STATE_OPEN) { + if (msg) + ceph_msg_put(msg); + return -EAGAIN; } - if (!con->in_msg) { - con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false); - if (!con->in_msg) { - pr_err("unable to allocate msg type %d len %d\n", - type, front_len); - return -ENOMEM; - } + if (msg) { + BUG_ON(*skip); + con->in_msg = msg; con->in_msg->con = con->ops->get(con); BUG_ON(con->in_msg->con == NULL); - con->in_msg->page_alignment = le16_to_cpu(hdr->data_off); + } else { + /* + * Null message pointer means either we should skip + * this message or we couldn't allocate memory. The + * former is not an error. + */ + if (*skip) + return 0; + con->error_msg = "error allocating memory for incoming message"; + + return -ENOMEM; } memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); @@ -2870,7 +3260,7 @@ void ceph_msg_kfree(struct ceph_msg *m) vfree(m->front.iov_base); else kfree(m->front.iov_base); - kfree(m); + kmem_cache_free(ceph_msg_cache, m); } /* @@ -2879,6 +3269,9 @@ void ceph_msg_kfree(struct ceph_msg *m) void ceph_msg_last_put(struct kref *kref) { struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); + LIST_HEAD(data); + struct list_head *links; + struct list_head *next; dout("ceph_msg_put last one on %p\n", m); WARN_ON(!list_empty(&m->list_head)); @@ -2888,16 +3281,16 @@ void ceph_msg_last_put(struct kref *kref) ceph_buffer_put(m->middle); m->middle = NULL; } - m->nr_pages = 0; - m->pages = NULL; - if (m->pagelist) { - ceph_pagelist_release(m->pagelist); - kfree(m->pagelist); - m->pagelist = NULL; - } + list_splice_init(&m->data, &data); + list_for_each_safe(links, next, &data) { + struct ceph_msg_data *data; - m->trail = NULL; + data = list_entry(links, struct ceph_msg_data, links); + list_del_init(links); + ceph_msg_data_destroy(data); + } + m->data_length = 0; if (m->pool) ceph_msgpool_put(m->pool, m); @@ -2908,8 +3301,8 @@ EXPORT_SYMBOL(ceph_msg_last_put); void ceph_msg_dump(struct ceph_msg *msg) { - pr_debug("msg_dump %p (front_max %d nr_pages %d)\n", msg, - msg->front_max, msg->nr_pages); + pr_debug("msg_dump %p (front_max %d length %zd)\n", msg, + msg->front_max, msg->data_length); print_hex_dump(KERN_DEBUG, "header: ", DUMP_PREFIX_OFFSET, 16, 1, &msg->hdr, sizeof(msg->hdr), true); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index aef5b1062be..1fe25cd29d0 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -737,7 +737,7 @@ static void delayed_work(struct work_struct *work) __validate_auth(monc); - if (monc->auth->ops->is_authenticated(monc->auth)) + if (ceph_auth_is_authenticated(monc->auth)) __send_subscribe(monc); } __schedule_delayed(monc); @@ -892,8 +892,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc, mutex_lock(&monc->mutex); had_debugfs_info = have_debugfs_info(monc); - if (monc->auth->ops) - was_auth = monc->auth->ops->is_authenticated(monc->auth); + was_auth = ceph_auth_is_authenticated(monc->auth); monc->pending_auth = 0; ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base, msg->front.iov_len, @@ -904,7 +903,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc, wake_up_all(&monc->client->auth_wq); } else if (ret > 0) { __send_prepared_auth_request(monc, ret); - } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) { + } else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) { dout("authenticated, starting session\n"); monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index d730dd4d8eb..a3395fdfbd4 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1,3 +1,4 @@ + #include <linux/ceph/ceph_debug.h> #include <linux/module.h> @@ -21,6 +22,8 @@ #define OSD_OP_FRONT_LEN 4096 #define OSD_OPREPLY_FRONT_LEN 512 +static struct kmem_cache *ceph_osd_request_cache; + static const struct ceph_connection_operations osd_con_ops; static void __send_queued(struct ceph_osd_client *osdc); @@ -32,12 +35,6 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, static void __send_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req); -static int op_has_extent(int op) -{ - return (op == CEPH_OSD_OP_READ || - op == CEPH_OSD_OP_WRITE); -} - /* * Implement client access to distributed object storage cluster. * @@ -63,53 +60,238 @@ static int op_has_extent(int op) * * fill osd op in request message. */ -static int calc_layout(struct ceph_vino vino, - struct ceph_file_layout *layout, - u64 off, u64 *plen, - struct ceph_osd_request *req, - struct ceph_osd_req_op *op) +static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen, + u64 *objnum, u64 *objoff, u64 *objlen) { u64 orig_len = *plen; - u64 bno = 0; - u64 objoff = 0; - u64 objlen = 0; int r; /* object extent? */ - r = ceph_calc_file_object_mapping(layout, off, orig_len, &bno, - &objoff, &objlen); + r = ceph_calc_file_object_mapping(layout, off, orig_len, objnum, + objoff, objlen); if (r < 0) return r; - if (objlen < orig_len) { - *plen = objlen; + if (*objlen < orig_len) { + *plen = *objlen; dout(" skipping last %llu, final file extent %llu~%llu\n", orig_len - *plen, off, *plen); } - if (op_has_extent(op->op)) { - u32 osize = le32_to_cpu(layout->fl_object_size); - op->extent.offset = objoff; - op->extent.length = objlen; - if (op->extent.truncate_size <= off - objoff) { - op->extent.truncate_size = 0; - } else { - op->extent.truncate_size -= off - objoff; - if (op->extent.truncate_size > osize) - op->extent.truncate_size = osize; - } + dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen); + + return 0; +} + +static void ceph_osd_data_init(struct ceph_osd_data *osd_data) +{ + memset(osd_data, 0, sizeof (*osd_data)); + osd_data->type = CEPH_OSD_DATA_TYPE_NONE; +} + +static void ceph_osd_data_pages_init(struct ceph_osd_data *osd_data, + struct page **pages, u64 length, u32 alignment, + bool pages_from_pool, bool own_pages) +{ + osd_data->type = CEPH_OSD_DATA_TYPE_PAGES; + osd_data->pages = pages; + osd_data->length = length; + osd_data->alignment = alignment; + osd_data->pages_from_pool = pages_from_pool; + osd_data->own_pages = own_pages; +} + +static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data, + struct ceph_pagelist *pagelist) +{ + osd_data->type = CEPH_OSD_DATA_TYPE_PAGELIST; + osd_data->pagelist = pagelist; +} + +#ifdef CONFIG_BLOCK +static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data, + struct bio *bio, size_t bio_length) +{ + osd_data->type = CEPH_OSD_DATA_TYPE_BIO; + osd_data->bio = bio; + osd_data->bio_length = bio_length; +} +#endif /* CONFIG_BLOCK */ + +#define osd_req_op_data(oreq, whch, typ, fld) \ + ({ \ + BUG_ON(whch >= (oreq)->r_num_ops); \ + &(oreq)->r_ops[whch].typ.fld; \ + }) + +static struct ceph_osd_data * +osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which) +{ + BUG_ON(which >= osd_req->r_num_ops); + + return &osd_req->r_ops[which].raw_data_in; +} + +struct ceph_osd_data * +osd_req_op_extent_osd_data(struct ceph_osd_request *osd_req, + unsigned int which) +{ + return osd_req_op_data(osd_req, which, extent, osd_data); +} +EXPORT_SYMBOL(osd_req_op_extent_osd_data); + +struct ceph_osd_data * +osd_req_op_cls_response_data(struct ceph_osd_request *osd_req, + unsigned int which) +{ + return osd_req_op_data(osd_req, which, cls, response_data); +} +EXPORT_SYMBOL(osd_req_op_cls_response_data); /* ??? */ + +void osd_req_op_raw_data_in_pages(struct ceph_osd_request *osd_req, + unsigned int which, struct page **pages, + u64 length, u32 alignment, + bool pages_from_pool, bool own_pages) +{ + struct ceph_osd_data *osd_data; + + osd_data = osd_req_op_raw_data_in(osd_req, which); + ceph_osd_data_pages_init(osd_data, pages, length, alignment, + pages_from_pool, own_pages); +} +EXPORT_SYMBOL(osd_req_op_raw_data_in_pages); + +void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *osd_req, + unsigned int which, struct page **pages, + u64 length, u32 alignment, + bool pages_from_pool, bool own_pages) +{ + struct ceph_osd_data *osd_data; + + osd_data = osd_req_op_data(osd_req, which, extent, osd_data); + ceph_osd_data_pages_init(osd_data, pages, length, alignment, + pages_from_pool, own_pages); +} +EXPORT_SYMBOL(osd_req_op_extent_osd_data_pages); + +void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *osd_req, + unsigned int which, struct ceph_pagelist *pagelist) +{ + struct ceph_osd_data *osd_data; + + osd_data = osd_req_op_data(osd_req, which, extent, osd_data); + ceph_osd_data_pagelist_init(osd_data, pagelist); +} +EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist); + +#ifdef CONFIG_BLOCK +void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, + unsigned int which, struct bio *bio, size_t bio_length) +{ + struct ceph_osd_data *osd_data; + + osd_data = osd_req_op_data(osd_req, which, extent, osd_data); + ceph_osd_data_bio_init(osd_data, bio, bio_length); +} +EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio); +#endif /* CONFIG_BLOCK */ + +static void osd_req_op_cls_request_info_pagelist( + struct ceph_osd_request *osd_req, + unsigned int which, struct ceph_pagelist *pagelist) +{ + struct ceph_osd_data *osd_data; + + osd_data = osd_req_op_data(osd_req, which, cls, request_info); + ceph_osd_data_pagelist_init(osd_data, pagelist); +} + +void osd_req_op_cls_request_data_pagelist( + struct ceph_osd_request *osd_req, + unsigned int which, struct ceph_pagelist *pagelist) +{ + struct ceph_osd_data *osd_data; + + osd_data = osd_req_op_data(osd_req, which, cls, request_data); + ceph_osd_data_pagelist_init(osd_data, pagelist); +} +EXPORT_SYMBOL(osd_req_op_cls_request_data_pagelist); + +void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req, + unsigned int which, struct page **pages, u64 length, + u32 alignment, bool pages_from_pool, bool own_pages) +{ + struct ceph_osd_data *osd_data; + + osd_data = osd_req_op_data(osd_req, which, cls, request_data); + ceph_osd_data_pages_init(osd_data, pages, length, alignment, + pages_from_pool, own_pages); +} +EXPORT_SYMBOL(osd_req_op_cls_request_data_pages); + +void osd_req_op_cls_response_data_pages(struct ceph_osd_request *osd_req, + unsigned int which, struct page **pages, u64 length, + u32 alignment, bool pages_from_pool, bool own_pages) +{ + struct ceph_osd_data *osd_data; + + osd_data = osd_req_op_data(osd_req, which, cls, response_data); + ceph_osd_data_pages_init(osd_data, pages, length, alignment, + pages_from_pool, own_pages); +} +EXPORT_SYMBOL(osd_req_op_cls_response_data_pages); + +static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data) +{ + switch (osd_data->type) { + case CEPH_OSD_DATA_TYPE_NONE: + return 0; + case CEPH_OSD_DATA_TYPE_PAGES: + return osd_data->length; + case CEPH_OSD_DATA_TYPE_PAGELIST: + return (u64)osd_data->pagelist->length; +#ifdef CONFIG_BLOCK + case CEPH_OSD_DATA_TYPE_BIO: + return (u64)osd_data->bio_length; +#endif /* CONFIG_BLOCK */ + default: + WARN(true, "unrecognized data type %d\n", (int)osd_data->type); + return 0; } - req->r_num_pages = calc_pages_for(off, *plen); - req->r_page_alignment = off & ~PAGE_MASK; - if (op->op == CEPH_OSD_OP_WRITE) - op->payload_len = *plen; +} - dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", - bno, objoff, objlen, req->r_num_pages); +static void ceph_osd_data_release(struct ceph_osd_data *osd_data) +{ + if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES && osd_data->own_pages) { + int num_pages; - snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno); - req->r_oid_len = strlen(req->r_oid); + num_pages = calc_pages_for((u64)osd_data->alignment, + (u64)osd_data->length); + ceph_release_page_vector(osd_data->pages, num_pages); + } + ceph_osd_data_init(osd_data); +} + +static void osd_req_op_data_release(struct ceph_osd_request *osd_req, + unsigned int which) +{ + struct ceph_osd_req_op *op; + + BUG_ON(which >= osd_req->r_num_ops); + op = &osd_req->r_ops[which]; - return r; + switch (op->op) { + case CEPH_OSD_OP_READ: + case CEPH_OSD_OP_WRITE: + ceph_osd_data_release(&op->extent.osd_data); + break; + case CEPH_OSD_OP_CALL: + ceph_osd_data_release(&op->cls.request_info); + ceph_osd_data_release(&op->cls.request_data); + ceph_osd_data_release(&op->cls.response_data); + break; + default: + break; + } } /* @@ -117,30 +299,26 @@ static int calc_layout(struct ceph_vino vino, */ void ceph_osdc_release_request(struct kref *kref) { - struct ceph_osd_request *req = container_of(kref, - struct ceph_osd_request, - r_kref); + struct ceph_osd_request *req; + unsigned int which; + req = container_of(kref, struct ceph_osd_request, r_kref); if (req->r_request) ceph_msg_put(req->r_request); - if (req->r_con_filling_msg) { - dout("%s revoking msg %p from con %p\n", __func__, - req->r_reply, req->r_con_filling_msg); + if (req->r_reply) { ceph_msg_revoke_incoming(req->r_reply); - req->r_con_filling_msg->ops->put(req->r_con_filling_msg); - req->r_con_filling_msg = NULL; - } - if (req->r_reply) ceph_msg_put(req->r_reply); - if (req->r_own_pages) - ceph_release_page_vector(req->r_pages, - req->r_num_pages); + } + + for (which = 0; which < req->r_num_ops; which++) + osd_req_op_data_release(req, which); + ceph_put_snap_context(req->r_snapc); - ceph_pagelist_release(&req->r_trail); if (req->r_mempool) mempool_free(req, req->r_osdc->req_mempool); else - kfree(req); + kmem_cache_free(ceph_osd_request_cache, req); + } EXPORT_SYMBOL(ceph_osdc_release_request); @@ -154,6 +332,9 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_msg *msg; size_t msg_size; + BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX); + BUG_ON(num_ops > CEPH_OSD_MAX_OP); + msg_size = 4 + 4 + 8 + 8 + 4+8; msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */ msg_size += 1 + 8 + 4 + 4; /* pg_t */ @@ -168,13 +349,14 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, req = mempool_alloc(osdc->req_mempool, gfp_flags); memset(req, 0, sizeof(*req)); } else { - req = kzalloc(sizeof(*req), gfp_flags); + req = kmem_cache_zalloc(ceph_osd_request_cache, gfp_flags); } if (req == NULL) return NULL; req->r_osdc = osdc; req->r_mempool = use_mempool; + req->r_num_ops = num_ops; kref_init(&req->r_kref); init_completion(&req->r_completion); @@ -198,8 +380,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, } req->r_reply = msg; - ceph_pagelist_init(&req->r_trail); - /* create request message; allow space for oid */ if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op, 0); @@ -218,60 +398,24 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, } EXPORT_SYMBOL(ceph_osdc_alloc_request); -static void osd_req_encode_op(struct ceph_osd_request *req, - struct ceph_osd_op *dst, - struct ceph_osd_req_op *src) +static bool osd_req_opcode_valid(u16 opcode) { - dst->op = cpu_to_le16(src->op); - - switch (src->op) { - case CEPH_OSD_OP_STAT: - break; + switch (opcode) { case CEPH_OSD_OP_READ: - case CEPH_OSD_OP_WRITE: - dst->extent.offset = - cpu_to_le64(src->extent.offset); - dst->extent.length = - cpu_to_le64(src->extent.length); - dst->extent.truncate_size = - cpu_to_le64(src->extent.truncate_size); - dst->extent.truncate_seq = - cpu_to_le32(src->extent.truncate_seq); - break; - case CEPH_OSD_OP_CALL: - dst->cls.class_len = src->cls.class_len; - dst->cls.method_len = src->cls.method_len; - dst->cls.indata_len = cpu_to_le32(src->cls.indata_len); - - ceph_pagelist_append(&req->r_trail, src->cls.class_name, - src->cls.class_len); - ceph_pagelist_append(&req->r_trail, src->cls.method_name, - src->cls.method_len); - ceph_pagelist_append(&req->r_trail, src->cls.indata, - src->cls.indata_len); - break; - case CEPH_OSD_OP_STARTSYNC: - break; - case CEPH_OSD_OP_NOTIFY_ACK: - case CEPH_OSD_OP_WATCH: - dst->watch.cookie = cpu_to_le64(src->watch.cookie); - dst->watch.ver = cpu_to_le64(src->watch.ver); - dst->watch.flag = src->watch.flag; - break; - default: - pr_err("unrecognized osd opcode %d\n", dst->op); - WARN_ON(1); - break; + case CEPH_OSD_OP_STAT: case CEPH_OSD_OP_MAPEXT: case CEPH_OSD_OP_MASKTRUNC: case CEPH_OSD_OP_SPARSE_READ: case CEPH_OSD_OP_NOTIFY: + case CEPH_OSD_OP_NOTIFY_ACK: case CEPH_OSD_OP_ASSERT_VER: + case CEPH_OSD_OP_WRITE: case CEPH_OSD_OP_WRITEFULL: case CEPH_OSD_OP_TRUNCATE: case CEPH_OSD_OP_ZERO: case CEPH_OSD_OP_DELETE: case CEPH_OSD_OP_APPEND: + case CEPH_OSD_OP_STARTSYNC: case CEPH_OSD_OP_SETTRUNC: case CEPH_OSD_OP_TRIMTRUNC: case CEPH_OSD_OP_TMAPUP: @@ -279,11 +423,11 @@ static void osd_req_encode_op(struct ceph_osd_request *req, case CEPH_OSD_OP_TMAPGET: case CEPH_OSD_OP_CREATE: case CEPH_OSD_OP_ROLLBACK: + case CEPH_OSD_OP_WATCH: case CEPH_OSD_OP_OMAPGETKEYS: case CEPH_OSD_OP_OMAPGETVALS: case CEPH_OSD_OP_OMAPGETHEADER: case CEPH_OSD_OP_OMAPGETVALSBYKEYS: - case CEPH_OSD_OP_MODE_RD: case CEPH_OSD_OP_OMAPSETVALS: case CEPH_OSD_OP_OMAPSETHEADER: case CEPH_OSD_OP_OMAPCLEAR: @@ -314,113 +458,233 @@ static void osd_req_encode_op(struct ceph_osd_request *req, case CEPH_OSD_OP_RDUNLOCK: case CEPH_OSD_OP_UPLOCK: case CEPH_OSD_OP_DNLOCK: + case CEPH_OSD_OP_CALL: case CEPH_OSD_OP_PGLS: case CEPH_OSD_OP_PGLS_FILTER: - pr_err("unsupported osd opcode %s\n", - ceph_osd_op_name(dst->op)); - WARN_ON(1); - break; + return true; + default: + return false; } - dst->payload_len = cpu_to_le32(src->payload_len); } /* - * build new request AND message - * + * This is an osd op init function for opcodes that have no data or + * other information associated with them. It also serves as a + * common init routine for all the other init functions, below. */ -void ceph_osdc_build_request(struct ceph_osd_request *req, - u64 off, u64 len, unsigned int num_ops, - struct ceph_osd_req_op *src_ops, - struct ceph_snap_context *snapc, u64 snap_id, - struct timespec *mtime) +static struct ceph_osd_req_op * +_osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, + u16 opcode) { - struct ceph_msg *msg = req->r_request; - struct ceph_osd_req_op *src_op; - void *p; - size_t msg_size; - int flags = req->r_flags; - u64 data_len; - int i; + struct ceph_osd_req_op *op; - req->r_num_ops = num_ops; - req->r_snapid = snap_id; - req->r_snapc = ceph_get_snap_context(snapc); + BUG_ON(which >= osd_req->r_num_ops); + BUG_ON(!osd_req_opcode_valid(opcode)); - /* encode request */ - msg->hdr.version = cpu_to_le16(4); + op = &osd_req->r_ops[which]; + memset(op, 0, sizeof (*op)); + op->op = opcode; - p = msg->front.iov_base; - ceph_encode_32(&p, 1); /* client_inc is always 1 */ - req->r_request_osdmap_epoch = p; - p += 4; - req->r_request_flags = p; - p += 4; - if (req->r_flags & CEPH_OSD_FLAG_WRITE) - ceph_encode_timespec(p, mtime); - p += sizeof(struct ceph_timespec); - req->r_request_reassert_version = p; - p += sizeof(struct ceph_eversion); /* will get filled in */ + return op; +} - /* oloc */ - ceph_encode_8(&p, 4); - ceph_encode_8(&p, 4); - ceph_encode_32(&p, 8 + 4 + 4); - req->r_request_pool = p; - p += 8; - ceph_encode_32(&p, -1); /* preferred */ - ceph_encode_32(&p, 0); /* key len */ +void osd_req_op_init(struct ceph_osd_request *osd_req, + unsigned int which, u16 opcode) +{ + (void)_osd_req_op_init(osd_req, which, opcode); +} +EXPORT_SYMBOL(osd_req_op_init); - ceph_encode_8(&p, 1); - req->r_request_pgid = p; - p += 8 + 4; - ceph_encode_32(&p, -1); /* preferred */ +void osd_req_op_extent_init(struct ceph_osd_request *osd_req, + unsigned int which, u16 opcode, + u64 offset, u64 length, + u64 truncate_size, u32 truncate_seq) +{ + struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); + size_t payload_len = 0; - /* oid */ - ceph_encode_32(&p, req->r_oid_len); - memcpy(p, req->r_oid, req->r_oid_len); - dout("oid '%.*s' len %d\n", req->r_oid_len, req->r_oid, req->r_oid_len); - p += req->r_oid_len; + BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE); - /* ops */ - ceph_encode_16(&p, num_ops); - src_op = src_ops; - req->r_request_ops = p; - for (i = 0; i < num_ops; i++, src_op++) { - osd_req_encode_op(req, p, src_op); - p += sizeof(struct ceph_osd_op); - } + op->extent.offset = offset; + op->extent.length = length; + op->extent.truncate_size = truncate_size; + op->extent.truncate_seq = truncate_seq; + if (opcode == CEPH_OSD_OP_WRITE) + payload_len += length; - /* snaps */ - ceph_encode_64(&p, req->r_snapid); - ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0); - ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0); - if (req->r_snapc) { - for (i = 0; i < snapc->num_snaps; i++) { - ceph_encode_64(&p, req->r_snapc->snaps[i]); - } + op->payload_len = payload_len; +} +EXPORT_SYMBOL(osd_req_op_extent_init); + +void osd_req_op_extent_update(struct ceph_osd_request *osd_req, + unsigned int which, u64 length) +{ + struct ceph_osd_req_op *op; + u64 previous; + + BUG_ON(which >= osd_req->r_num_ops); + op = &osd_req->r_ops[which]; + previous = op->extent.length; + + if (length == previous) + return; /* Nothing to do */ + BUG_ON(length > previous); + + op->extent.length = length; + op->payload_len -= previous - length; +} +EXPORT_SYMBOL(osd_req_op_extent_update); + +void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, + u16 opcode, const char *class, const char *method) +{ + struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); + struct ceph_pagelist *pagelist; + size_t payload_len = 0; + size_t size; + + BUG_ON(opcode != CEPH_OSD_OP_CALL); + + pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS); + BUG_ON(!pagelist); + ceph_pagelist_init(pagelist); + + op->cls.class_name = class; + size = strlen(class); + BUG_ON(size > (size_t) U8_MAX); + op->cls.class_len = size; + ceph_pagelist_append(pagelist, class, size); + payload_len += size; + + op->cls.method_name = method; + size = strlen(method); + BUG_ON(size > (size_t) U8_MAX); + op->cls.method_len = size; + ceph_pagelist_append(pagelist, method, size); + payload_len += size; + + osd_req_op_cls_request_info_pagelist(osd_req, which, pagelist); + + op->cls.argc = 0; /* currently unused */ + + op->payload_len = payload_len; +} +EXPORT_SYMBOL(osd_req_op_cls_init); + +void osd_req_op_watch_init(struct ceph_osd_request *osd_req, + unsigned int which, u16 opcode, + u64 cookie, u64 version, int flag) +{ + struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode); + + BUG_ON(opcode != CEPH_OSD_OP_NOTIFY_ACK && opcode != CEPH_OSD_OP_WATCH); + + op->watch.cookie = cookie; + op->watch.ver = version; + if (opcode == CEPH_OSD_OP_WATCH && flag) + op->watch.flag = (u8)1; +} +EXPORT_SYMBOL(osd_req_op_watch_init); + +static void ceph_osdc_msg_data_add(struct ceph_msg *msg, + struct ceph_osd_data *osd_data) +{ + u64 length = ceph_osd_data_length(osd_data); + + if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) { + BUG_ON(length > (u64) SIZE_MAX); + if (length) + ceph_msg_data_add_pages(msg, osd_data->pages, + length, osd_data->alignment); + } else if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGELIST) { + BUG_ON(!length); + ceph_msg_data_add_pagelist(msg, osd_data->pagelist); +#ifdef CONFIG_BLOCK + } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) { + ceph_msg_data_add_bio(msg, osd_data->bio, length); +#endif + } else { + BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE); } +} - req->r_request_attempts = p; - p += 4; +static u64 osd_req_encode_op(struct ceph_osd_request *req, + struct ceph_osd_op *dst, unsigned int which) +{ + struct ceph_osd_req_op *src; + struct ceph_osd_data *osd_data; + u64 request_data_len = 0; + u64 data_length; - data_len = req->r_trail.length; - if (flags & CEPH_OSD_FLAG_WRITE) { - req->r_request->hdr.data_off = cpu_to_le16(off); - data_len += len; + BUG_ON(which >= req->r_num_ops); + src = &req->r_ops[which]; + if (WARN_ON(!osd_req_opcode_valid(src->op))) { + pr_err("unrecognized osd opcode %d\n", src->op); + + return 0; } - req->r_request->hdr.data_len = cpu_to_le32(data_len); - req->r_request->page_alignment = req->r_page_alignment; - BUG_ON(p > msg->front.iov_base + msg->front.iov_len); - msg_size = p - msg->front.iov_base; - msg->front.iov_len = msg_size; - msg->hdr.front_len = cpu_to_le32(msg_size); + switch (src->op) { + case CEPH_OSD_OP_STAT: + osd_data = &src->raw_data_in; + ceph_osdc_msg_data_add(req->r_reply, osd_data); + break; + case CEPH_OSD_OP_READ: + case CEPH_OSD_OP_WRITE: + if (src->op == CEPH_OSD_OP_WRITE) + request_data_len = src->extent.length; + dst->extent.offset = cpu_to_le64(src->extent.offset); + dst->extent.length = cpu_to_le64(src->extent.length); + dst->extent.truncate_size = + cpu_to_le64(src->extent.truncate_size); + dst->extent.truncate_seq = + cpu_to_le32(src->extent.truncate_seq); + osd_data = &src->extent.osd_data; + if (src->op == CEPH_OSD_OP_WRITE) + ceph_osdc_msg_data_add(req->r_request, osd_data); + else + ceph_osdc_msg_data_add(req->r_reply, osd_data); + break; + case CEPH_OSD_OP_CALL: + dst->cls.class_len = src->cls.class_len; + dst->cls.method_len = src->cls.method_len; + osd_data = &src->cls.request_info; + ceph_osdc_msg_data_add(req->r_request, osd_data); + BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGELIST); + request_data_len = osd_data->pagelist->length; + + osd_data = &src->cls.request_data; + data_length = ceph_osd_data_length(osd_data); + if (data_length) { + BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE); + dst->cls.indata_len = cpu_to_le32(data_length); + ceph_osdc_msg_data_add(req->r_request, osd_data); + src->payload_len += data_length; + request_data_len += data_length; + } + osd_data = &src->cls.response_data; + ceph_osdc_msg_data_add(req->r_reply, osd_data); + break; + case CEPH_OSD_OP_STARTSYNC: + break; + case CEPH_OSD_OP_NOTIFY_ACK: + case CEPH_OSD_OP_WATCH: + dst->watch.cookie = cpu_to_le64(src->watch.cookie); + dst->watch.ver = cpu_to_le64(src->watch.ver); + dst->watch.flag = src->watch.flag; + break; + default: + pr_err("unsupported osd opcode %s\n", + ceph_osd_op_name(src->op)); + WARN_ON(1); - dout("build_request msg_size was %d num_ops %d\n", (int)msg_size, - num_ops); - return; + return 0; + } + dst->op = cpu_to_le16(src->op); + dst->payload_len = cpu_to_le32(src->payload_len); + + return request_data_len; } -EXPORT_SYMBOL(ceph_osdc_build_request); /* * build new request AND message, calculate layout, and adjust file @@ -436,51 +700,63 @@ EXPORT_SYMBOL(ceph_osdc_build_request); struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, struct ceph_file_layout *layout, struct ceph_vino vino, - u64 off, u64 *plen, + u64 off, u64 *plen, int num_ops, int opcode, int flags, struct ceph_snap_context *snapc, - int do_sync, u32 truncate_seq, u64 truncate_size, - struct timespec *mtime, - bool use_mempool, - int page_align) + bool use_mempool) { - struct ceph_osd_req_op ops[2]; struct ceph_osd_request *req; - unsigned int num_op = 1; + u64 objnum = 0; + u64 objoff = 0; + u64 objlen = 0; + u32 object_size; + u64 object_base; int r; - memset(&ops, 0, sizeof ops); - - ops[0].op = opcode; - ops[0].extent.truncate_seq = truncate_seq; - ops[0].extent.truncate_size = truncate_size; - - if (do_sync) { - ops[1].op = CEPH_OSD_OP_STARTSYNC; - num_op++; - } + BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE); - req = ceph_osdc_alloc_request(osdc, snapc, num_op, use_mempool, + req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool, GFP_NOFS); if (!req) return ERR_PTR(-ENOMEM); + req->r_flags = flags; /* calculate max write size */ - r = calc_layout(vino, layout, off, plen, req, ops); - if (r < 0) + r = calc_layout(layout, off, plen, &objnum, &objoff, &objlen); + if (r < 0) { + ceph_osdc_put_request(req); return ERR_PTR(r); - req->r_file_layout = *layout; /* keep a copy */ + } - /* in case it differs from natural (file) alignment that - calc_layout filled in for us */ - req->r_num_pages = calc_pages_for(page_align, *plen); - req->r_page_alignment = page_align; + object_size = le32_to_cpu(layout->fl_object_size); + object_base = off - objoff; + if (truncate_size <= object_base) { + truncate_size = 0; + } else { + truncate_size -= object_base; + if (truncate_size > object_size) + truncate_size = object_size; + } + + osd_req_op_extent_init(req, 0, opcode, objoff, objlen, + truncate_size, truncate_seq); + + /* + * A second op in the ops array means the caller wants to + * also issue a include a 'startsync' command so that the + * osd will flush data quickly. + */ + if (num_ops > 1) + osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC); + + req->r_file_layout = *layout; /* keep a copy */ - ceph_osdc_build_request(req, off, *plen, num_op, ops, - snapc, vino.snap, mtime); + snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", + vino.ino, objnum); + req->r_oid_len = strlen(req->r_oid); return req; } @@ -558,21 +834,46 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd) { struct ceph_osd_request *req, *nreq; + LIST_HEAD(resend); int err; dout("__kick_osd_requests osd%d\n", osd->o_osd); err = __reset_osd(osdc, osd); if (err) return; - + /* + * Build up a list of requests to resend by traversing the + * osd's list of requests. Requests for a given object are + * sent in tid order, and that is also the order they're + * kept on this list. Therefore all requests that are in + * flight will be found first, followed by all requests that + * have not yet been sent. And to resend requests while + * preserving this order we will want to put any sent + * requests back on the front of the osd client's unsent + * list. + * + * So we build a separate ordered list of already-sent + * requests for the affected osd and splice it onto the + * front of the osd client's unsent list. Once we've seen a + * request that has not yet been sent we're done. Those + * requests are already sitting right where they belong. + */ list_for_each_entry(req, &osd->o_requests, r_osd_item) { - list_move(&req->r_req_lru_item, &osdc->req_unsent); - dout("requeued %p tid %llu osd%d\n", req, req->r_tid, + if (!req->r_sent) + break; + list_move_tail(&req->r_req_lru_item, &resend); + dout("requeueing %p tid %llu osd%d\n", req, req->r_tid, osd->o_osd); if (!req->r_linger) req->r_flags |= CEPH_OSD_FLAG_RETRY; } + list_splice(&resend, &osdc->req_unsent); + /* + * Linger requests are re-registered before sending, which + * sets up a new tid for each. We add them to the unsent + * list at the end to keep things in tid order. + */ list_for_each_entry_safe(req, nreq, &osd->o_linger_requests, r_linger_osd) { /* @@ -581,8 +882,8 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, */ BUG_ON(!list_empty(&req->r_req_lru_item)); __register_request(osdc, req); - list_add(&req->r_req_lru_item, &osdc->req_unsent); - list_add(&req->r_osd_item, &req->r_osd->o_requests); + list_add_tail(&req->r_req_lru_item, &osdc->req_unsent); + list_add_tail(&req->r_osd_item, &req->r_osd->o_requests); __unregister_linger_request(osdc, req); dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid, osd->o_osd); @@ -654,8 +955,7 @@ static void put_osd(struct ceph_osd *osd) if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) { struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth; - if (ac->ops && ac->ops->destroy_authorizer) - ac->ops->destroy_authorizer(ac, osd->o_auth.authorizer); + ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer); kfree(osd); } } @@ -820,14 +1120,6 @@ static void __register_request(struct ceph_osd_client *osdc, } } -static void register_request(struct ceph_osd_client *osdc, - struct ceph_osd_request *req) -{ - mutex_lock(&osdc->request_mutex); - __register_request(osdc, req); - mutex_unlock(&osdc->request_mutex); -} - /* * called under osdc->request_mutex */ @@ -952,8 +1244,8 @@ static int __map_request(struct ceph_osd_client *osdc, int err; dout("map_request %p tid %lld\n", req, req->r_tid); - err = ceph_calc_object_layout(&pgid, req->r_oid, - &req->r_file_layout, osdc->osdmap); + err = ceph_calc_ceph_pg(&pgid, req->r_oid, osdc->osdmap, + ceph_file_layout_pg_pool(req->r_file_layout)); if (err) { list_move(&req->r_req_lru_item, &osdc->req_notarget); return err; @@ -1007,10 +1299,10 @@ static int __map_request(struct ceph_osd_client *osdc, if (req->r_osd) { __remove_osd_from_lru(req->r_osd); - list_add(&req->r_osd_item, &req->r_osd->o_requests); - list_move(&req->r_req_lru_item, &osdc->req_unsent); + list_add_tail(&req->r_osd_item, &req->r_osd->o_requests); + list_move_tail(&req->r_req_lru_item, &osdc->req_unsent); } else { - list_move(&req->r_req_lru_item, &osdc->req_notarget); + list_move_tail(&req->r_req_lru_item, &osdc->req_notarget); } err = 1; /* osd or pg changed */ @@ -1045,8 +1337,14 @@ static void __send_request(struct ceph_osd_client *osdc, list_move_tail(&req->r_req_lru_item, &osdc->req_lru); ceph_msg_get(req->r_request); /* send consumes a ref */ - ceph_con_send(&req->r_osd->o_con, req->r_request); + + /* Mark the request unsafe if this is the first timet's being sent. */ + + if (!req->r_sent && req->r_unsafe_callback) + req->r_unsafe_callback(req, true); req->r_sent = req->r_osd->o_incarnation; + + ceph_con_send(&req->r_osd->o_con, req->r_request); } /* @@ -1134,31 +1432,11 @@ static void handle_osds_timeout(struct work_struct *work) static void complete_request(struct ceph_osd_request *req) { - if (req->r_safe_callback) - req->r_safe_callback(req, NULL); + if (req->r_unsafe_callback) + req->r_unsafe_callback(req, false); complete_all(&req->r_safe_completion); /* fsync waiter */ } -static int __decode_pgid(void **p, void *end, struct ceph_pg *pgid) -{ - __u8 v; - - ceph_decode_need(p, end, 1 + 8 + 4 + 4, bad); - v = ceph_decode_8(p); - if (v > 1) { - pr_warning("do not understand pg encoding %d > 1", v); - return -EINVAL; - } - pgid->pool = ceph_decode_64(p); - pgid->seed = ceph_decode_32(p); - *p += 4; - return 0; - -bad: - pr_warning("incomplete pg encoding"); - return -EINVAL; -} - /* * handle osd op reply. either call the callback if it is specified, * or do the completion to wake up the waiting thread. @@ -1170,7 +1448,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, struct ceph_osd_request *req; u64 tid; int object_len; - int numops, payload_len, flags; + unsigned int numops; + int payload_len, flags; s32 result; s32 retry_attempt; struct ceph_pg pg; @@ -1178,7 +1457,9 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, u32 reassert_epoch; u64 reassert_version; u32 osdmap_epoch; - int i; + int already_completed; + u32 bytes; + unsigned int i; tid = le64_to_cpu(msg->hdr.tid); dout("handle_reply %p tid %llu\n", msg, tid); @@ -1191,7 +1472,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, ceph_decode_need(&p, end, object_len, bad); p += object_len; - err = __decode_pgid(&p, end, &pg); + err = ceph_decode_pgid(&p, end, &pg); if (err) goto bad; @@ -1207,8 +1488,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, req = __lookup_request(osdc, tid); if (req == NULL) { dout("handle_reply tid %llu dne\n", tid); - mutex_unlock(&osdc->request_mutex); - return; + goto bad_mutex; } ceph_osdc_get_request(req); @@ -1233,9 +1513,10 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, payload_len += len; p += sizeof(*op); } - if (payload_len != le32_to_cpu(msg->hdr.data_len)) { + bytes = le32_to_cpu(msg->hdr.data_len); + if (payload_len != bytes) { pr_warning("sum of op payload lens %d != data_len %d", - payload_len, le32_to_cpu(msg->hdr.data_len)); + payload_len, bytes); goto bad_put; } @@ -1244,21 +1525,9 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, for (i = 0; i < numops; i++) req->r_reply_op_result[i] = ceph_decode_32(&p); - /* - * if this connection filled our message, drop our reference now, to - * avoid a (safe but slower) revoke later. - */ - if (req->r_con_filling_msg == con && req->r_reply == msg) { - dout(" dropping con_filling_msg ref %p\n", con); - req->r_con_filling_msg = NULL; - con->ops->put(con); - } - if (!req->r_got_reply) { - unsigned int bytes; req->r_result = result; - bytes = le32_to_cpu(msg->hdr.data_len); dout("handle_reply result %d bytes %d\n", req->r_result, bytes); if (req->r_result == 0) @@ -1286,7 +1555,11 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, ((flags & CEPH_OSD_FLAG_WRITE) == 0)) __unregister_request(osdc, req); + already_completed = req->r_completed; + req->r_completed = 1; mutex_unlock(&osdc->request_mutex); + if (already_completed) + goto done; if (req->r_callback) req->r_callback(req, msg); @@ -1303,6 +1576,8 @@ done: bad_put: ceph_osdc_put_request(req); +bad_mutex: + mutex_unlock(&osdc->request_mutex); bad: pr_err("corrupt osd_op_reply got %d %d\n", (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len)); @@ -1736,6 +2011,104 @@ bad: } /* + * build new request AND message + * + */ +void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, + struct ceph_snap_context *snapc, u64 snap_id, + struct timespec *mtime) +{ + struct ceph_msg *msg = req->r_request; + void *p; + size_t msg_size; + int flags = req->r_flags; + u64 data_len; + unsigned int i; + + req->r_snapid = snap_id; + req->r_snapc = ceph_get_snap_context(snapc); + + /* encode request */ + msg->hdr.version = cpu_to_le16(4); + + p = msg->front.iov_base; + ceph_encode_32(&p, 1); /* client_inc is always 1 */ + req->r_request_osdmap_epoch = p; + p += 4; + req->r_request_flags = p; + p += 4; + if (req->r_flags & CEPH_OSD_FLAG_WRITE) + ceph_encode_timespec(p, mtime); + p += sizeof(struct ceph_timespec); + req->r_request_reassert_version = p; + p += sizeof(struct ceph_eversion); /* will get filled in */ + + /* oloc */ + ceph_encode_8(&p, 4); + ceph_encode_8(&p, 4); + ceph_encode_32(&p, 8 + 4 + 4); + req->r_request_pool = p; + p += 8; + ceph_encode_32(&p, -1); /* preferred */ + ceph_encode_32(&p, 0); /* key len */ + + ceph_encode_8(&p, 1); + req->r_request_pgid = p; + p += 8 + 4; + ceph_encode_32(&p, -1); /* preferred */ + + /* oid */ + ceph_encode_32(&p, req->r_oid_len); + memcpy(p, req->r_oid, req->r_oid_len); + dout("oid '%.*s' len %d\n", req->r_oid_len, req->r_oid, req->r_oid_len); + p += req->r_oid_len; + + /* ops--can imply data */ + ceph_encode_16(&p, (u16)req->r_num_ops); + data_len = 0; + for (i = 0; i < req->r_num_ops; i++) { + data_len += osd_req_encode_op(req, p, i); + p += sizeof(struct ceph_osd_op); + } + + /* snaps */ + ceph_encode_64(&p, req->r_snapid); + ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0); + ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0); + if (req->r_snapc) { + for (i = 0; i < snapc->num_snaps; i++) { + ceph_encode_64(&p, req->r_snapc->snaps[i]); + } + } + + req->r_request_attempts = p; + p += 4; + + /* data */ + if (flags & CEPH_OSD_FLAG_WRITE) { + u16 data_off; + + /* + * The header "data_off" is a hint to the receiver + * allowing it to align received data into its + * buffers such that there's no need to re-copy + * it before writing it to disk (direct I/O). + */ + data_off = (u16) (off & 0xffff); + req->r_request->hdr.data_off = cpu_to_le16(data_off); + } + req->r_request->hdr.data_len = cpu_to_le32(data_len); + + BUG_ON(p > msg->front.iov_base + msg->front.iov_len); + msg_size = p - msg->front.iov_base; + msg->front.iov_len = msg_size; + msg->hdr.front_len = cpu_to_le32(msg_size); + + dout("build_request msg_size was %d\n", (int)msg_size); +} +EXPORT_SYMBOL(ceph_osdc_build_request); + +/* * Register request, send initial attempt. */ int ceph_osdc_start_request(struct ceph_osd_client *osdc, @@ -1744,41 +2117,26 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, { int rc = 0; - req->r_request->pages = req->r_pages; - req->r_request->nr_pages = req->r_num_pages; -#ifdef CONFIG_BLOCK - req->r_request->bio = req->r_bio; -#endif - req->r_request->trail = &req->r_trail; - - register_request(osdc, req); - down_read(&osdc->map_sem); mutex_lock(&osdc->request_mutex); - /* - * a racing kick_requests() may have sent the message for us - * while we dropped request_mutex above, so only send now if - * the request still han't been touched yet. - */ - if (req->r_sent == 0) { - rc = __map_request(osdc, req, 0); - if (rc < 0) { - if (nofail) { - dout("osdc_start_request failed map, " - " will retry %lld\n", req->r_tid); - rc = 0; - } - goto out_unlock; - } - if (req->r_osd == NULL) { - dout("send_request %p no up osds in pg\n", req); - ceph_monc_request_next_osdmap(&osdc->client->monc); - } else { - __send_request(osdc, req); + __register_request(osdc, req); + WARN_ON(req->r_sent); + rc = __map_request(osdc, req, 0); + if (rc < 0) { + if (nofail) { + dout("osdc_start_request failed map, " + " will retry %lld\n", req->r_tid); + rc = 0; } - rc = 0; + goto out_unlock; } - + if (req->r_osd == NULL) { + dout("send_request %p no up osds in pg\n", req); + ceph_monc_request_next_osdmap(&osdc->client->monc); + } else { + __send_queued(osdc); + } + rc = 0; out_unlock: mutex_unlock(&osdc->request_mutex); up_read(&osdc->map_sem); @@ -1940,18 +2298,22 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino, vino.snap, off, *plen); - req = ceph_osdc_new_request(osdc, layout, vino, off, plen, + req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, - NULL, 0, truncate_seq, truncate_size, NULL, - false, page_align); + NULL, truncate_seq, truncate_size, + false); if (IS_ERR(req)) return PTR_ERR(req); /* it may be a short read due to an object boundary */ - req->r_pages = pages; - dout("readpages final extent is %llu~%llu (%d pages align %d)\n", - off, *plen, req->r_num_pages, page_align); + osd_req_op_extent_osd_data_pages(req, 0, + pages, *plen, page_align, false, false); + + dout("readpages final extent is %llu~%llu (%llu bytes align %d)\n", + off, *plen, *plen, page_align); + + ceph_osdc_build_request(req, off, NULL, vino.snap, NULL); rc = ceph_osdc_start_request(osdc, req, false); if (!rc) @@ -1978,20 +2340,21 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, int rc = 0; int page_align = off & ~PAGE_MASK; - BUG_ON(vino.snap != CEPH_NOSNAP); - req = ceph_osdc_new_request(osdc, layout, vino, off, &len, + BUG_ON(vino.snap != CEPH_NOSNAP); /* snapshots aren't writeable */ + req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1, CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, - snapc, 0, - truncate_seq, truncate_size, mtime, - true, page_align); + snapc, truncate_seq, truncate_size, + true); if (IS_ERR(req)) return PTR_ERR(req); /* it may be a short write due to an object boundary */ - req->r_pages = pages; - dout("writepages %llu~%llu (%d pages)\n", off, len, - req->r_num_pages); + osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align, + false, false); + dout("writepages %llu~%llu (%llu bytes)\n", off, len, len); + + ceph_osdc_build_request(req, off, snapc, CEPH_NOSNAP, mtime); rc = ceph_osdc_start_request(osdc, req, true); if (!rc) @@ -2005,6 +2368,26 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, } EXPORT_SYMBOL(ceph_osdc_writepages); +int ceph_osdc_setup(void) +{ + BUG_ON(ceph_osd_request_cache); + ceph_osd_request_cache = kmem_cache_create("ceph_osd_request", + sizeof (struct ceph_osd_request), + __alignof__(struct ceph_osd_request), + 0, NULL); + + return ceph_osd_request_cache ? 0 : -ENOMEM; +} +EXPORT_SYMBOL(ceph_osdc_setup); + +void ceph_osdc_cleanup(void) +{ + BUG_ON(!ceph_osd_request_cache); + kmem_cache_destroy(ceph_osd_request_cache); + ceph_osd_request_cache = NULL; +} +EXPORT_SYMBOL(ceph_osdc_cleanup); + /* * handle incoming message */ @@ -2064,13 +2447,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, goto out; } - if (req->r_con_filling_msg) { + if (req->r_reply->con) dout("%s revoking msg %p from old con %p\n", __func__, - req->r_reply, req->r_con_filling_msg); - ceph_msg_revoke_incoming(req->r_reply); - req->r_con_filling_msg->ops->put(req->r_con_filling_msg); - req->r_con_filling_msg = NULL; - } + req->r_reply, req->r_reply->con); + ceph_msg_revoke_incoming(req->r_reply); if (front > req->r_reply->front.iov_len) { pr_warning("get_reply front %d > preallocated %d\n", @@ -2084,26 +2464,29 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, m = ceph_msg_get(req->r_reply); if (data_len > 0) { - int want = calc_pages_for(req->r_page_alignment, data_len); - - if (req->r_pages && unlikely(req->r_num_pages < want)) { - pr_warning("tid %lld reply has %d bytes %d pages, we" - " had only %d pages ready\n", tid, data_len, - want, req->r_num_pages); - *skip = 1; - ceph_msg_put(m); - m = NULL; - goto out; + struct ceph_osd_data *osd_data; + + /* + * XXX This is assuming there is only one op containing + * XXX page data. Probably OK for reads, but this + * XXX ought to be done more generally. + */ + osd_data = osd_req_op_extent_osd_data(req, 0); + if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) { + if (osd_data->pages && + unlikely(osd_data->length < data_len)) { + + pr_warning("tid %lld reply has %d bytes " + "we had only %llu bytes ready\n", + tid, data_len, osd_data->length); + *skip = 1; + ceph_msg_put(m); + m = NULL; + goto out; + } } - m->pages = req->r_pages; - m->nr_pages = req->r_num_pages; - m->page_alignment = req->r_page_alignment; -#ifdef CONFIG_BLOCK - m->bio = req->r_bio; -#endif } *skip = 0; - req->r_con_filling_msg = con->ops->get(con); dout("get_reply tid %lld %p\n", tid, m); out: @@ -2168,13 +2551,17 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, struct ceph_auth_handshake *auth = &o->o_auth; if (force_new && auth->authorizer) { - if (ac->ops && ac->ops->destroy_authorizer) - ac->ops->destroy_authorizer(ac, auth->authorizer); + ceph_auth_destroy_authorizer(ac, auth->authorizer); auth->authorizer = NULL; } - if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) { - int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_OSD, - auth); + if (!auth->authorizer) { + int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_OSD, + auth); + if (ret) + return ERR_PTR(ret); + } else { + int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_OSD, + auth); if (ret) return ERR_PTR(ret); } @@ -2190,11 +2577,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len) struct ceph_osd_client *osdc = o->o_osdc; struct ceph_auth_client *ac = osdc->client->monc.auth; - /* - * XXX If ac->ops or ac->ops->verify_authorizer_reply is null, - * XXX which do we do: succeed or fail? - */ - return ac->ops->verify_authorizer_reply(ac, o->o_auth.authorizer, len); + return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer, len); } static int invalidate_authorizer(struct ceph_connection *con) @@ -2203,9 +2586,7 @@ static int invalidate_authorizer(struct ceph_connection *con) struct ceph_osd_client *osdc = o->o_osdc; struct ceph_auth_client *ac = osdc->client->monc.auth; - if (ac->ops && ac->ops->invalidate_authorizer) - ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD); - + ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD); return ceph_monc_validate_auth(&osdc->client->monc); } diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 4543b9aba40..603ddd92db1 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -654,24 +654,6 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) return 0; } -static int __decode_pgid(void **p, void *end, struct ceph_pg *pg) -{ - u8 v; - - ceph_decode_need(p, end, 1+8+4+4, bad); - v = ceph_decode_8(p); - if (v != 1) - goto bad; - pg->pool = ceph_decode_64(p); - pg->seed = ceph_decode_32(p); - *p += 4; /* skip preferred */ - return 0; - -bad: - dout("error decoding pgid\n"); - return -EINVAL; -} - /* * decode a full map. */ @@ -765,7 +747,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) struct ceph_pg pgid; struct ceph_pg_mapping *pg; - err = __decode_pgid(p, end, &pgid); + err = ceph_decode_pgid(p, end, &pgid); if (err) goto bad; ceph_decode_need(p, end, sizeof(u32), bad); @@ -983,7 +965,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, struct ceph_pg pgid; u32 pglen; - err = __decode_pgid(p, end, &pgid); + err = ceph_decode_pgid(p, end, &pgid); if (err) goto bad; ceph_decode_need(p, end, sizeof(u32), bad); @@ -1111,27 +1093,22 @@ EXPORT_SYMBOL(ceph_calc_file_object_mapping); * calculate an object layout (i.e. pgid) from an oid, * file_layout, and osdmap */ -int ceph_calc_object_layout(struct ceph_pg *pg, - const char *oid, - struct ceph_file_layout *fl, - struct ceph_osdmap *osdmap) +int ceph_calc_ceph_pg(struct ceph_pg *pg, const char *oid, + struct ceph_osdmap *osdmap, uint64_t pool) { - unsigned int num, num_mask; - struct ceph_pg_pool_info *pool; + struct ceph_pg_pool_info *pool_info; BUG_ON(!osdmap); - pg->pool = le32_to_cpu(fl->fl_pg_pool); - pool = __lookup_pg_pool(&osdmap->pg_pools, pg->pool); - if (!pool) + pool_info = __lookup_pg_pool(&osdmap->pg_pools, pool); + if (!pool_info) return -EIO; - pg->seed = ceph_str_hash(pool->object_hash, oid, strlen(oid)); - num = pool->pg_num; - num_mask = pool->pg_num_mask; + pg->pool = pool; + pg->seed = ceph_str_hash(pool_info->object_hash, oid, strlen(oid)); - dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pg->pool, pg->seed); + dout("%s '%s' pgid %lld.%x\n", __func__, oid, pg->pool, pg->seed); return 0; } -EXPORT_SYMBOL(ceph_calc_object_layout); +EXPORT_SYMBOL(ceph_calc_ceph_pg); /* * Calculate raw osd vector for the given pgid. Return pointer to osd diff --git a/net/ceph/snapshot.c b/net/ceph/snapshot.c new file mode 100644 index 00000000000..154683f5f14 --- /dev/null +++ b/net/ceph/snapshot.c @@ -0,0 +1,78 @@ +/* + * snapshot.c Ceph snapshot context utility routines (part of libceph) + * + * Copyright (C) 2013 Inktank Storage, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +#include <stddef.h> + +#include <linux/types.h> +#include <linux/export.h> +#include <linux/ceph/libceph.h> + +/* + * Ceph snapshot contexts are reference counted objects, and the + * returned structure holds a single reference. Acquire additional + * references with ceph_get_snap_context(), and release them with + * ceph_put_snap_context(). When the reference count reaches zero + * the entire structure is freed. + */ + +/* + * Create a new ceph snapshot context large enough to hold the + * indicated number of snapshot ids (which can be 0). Caller has + * to fill in snapc->seq and snapc->snaps[0..snap_count-1]. + * + * Returns a null pointer if an error occurs. + */ +struct ceph_snap_context *ceph_create_snap_context(u32 snap_count, + gfp_t gfp_flags) +{ + struct ceph_snap_context *snapc; + size_t size; + + size = sizeof (struct ceph_snap_context); + size += snap_count * sizeof (snapc->snaps[0]); + snapc = kzalloc(size, gfp_flags); + if (!snapc) + return NULL; + + atomic_set(&snapc->nref, 1); + snapc->num_snaps = snap_count; + + return snapc; +} +EXPORT_SYMBOL(ceph_create_snap_context); + +struct ceph_snap_context *ceph_get_snap_context(struct ceph_snap_context *sc) +{ + if (sc) + atomic_inc(&sc->nref); + return sc; +} +EXPORT_SYMBOL(ceph_get_snap_context); + +void ceph_put_snap_context(struct ceph_snap_context *sc) +{ + if (!sc) + return; + if (atomic_dec_and_test(&sc->nref)) { + /*printk(" deleting snap_context %p\n", sc);*/ + kfree(sc); + } +} +EXPORT_SYMBOL(ceph_put_snap_context); diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index d01b24b72c6..779262f59e2 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -6,6 +6,9 @@ config HAVE_KVM config HAVE_KVM_IRQCHIP bool +config HAVE_KVM_IRQ_ROUTING + bool + config HAVE_KVM_EVENTFD bool select EVENTFD diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 3642239252b..8db43701016 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -80,11 +80,12 @@ kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, spin_lock(&assigned_dev->intx_mask_lock); if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) kvm_set_irq(assigned_dev->kvm, - assigned_dev->irq_source_id, vector, 1); + assigned_dev->irq_source_id, vector, 1, + false); spin_unlock(&assigned_dev->intx_mask_lock); } else kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - vector, 1); + vector, 1, false); } static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) @@ -165,7 +166,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) container_of(kian, struct kvm_assigned_dev_kernel, ack_notifier); - kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); + kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false); spin_lock(&dev->intx_mask_lock); @@ -188,7 +189,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) if (reassert) kvm_set_irq(dev->kvm, dev->irq_source_id, - dev->guest_irq, 1); + dev->guest_irq, 1, false); } spin_unlock(&dev->intx_mask_lock); @@ -202,7 +203,7 @@ static void deassign_guest_irq(struct kvm *kvm, &assigned_dev->ack_notifier); kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - assigned_dev->guest_irq, 0); + assigned_dev->guest_irq, 0, false); if (assigned_dev->irq_source_id != -1) kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); @@ -901,7 +902,7 @@ static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { kvm_set_irq(match->kvm, match->irq_source_id, - match->guest_irq, 0); + match->guest_irq, 0, false); /* * Masking at hardware-level is performed on demand, * i.e. when an IRQ actually arrives at the host. @@ -982,36 +983,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, goto out; break; } -#ifdef KVM_CAP_IRQ_ROUTING - case KVM_SET_GSI_ROUTING: { - struct kvm_irq_routing routing; - struct kvm_irq_routing __user *urouting; - struct kvm_irq_routing_entry *entries; - - r = -EFAULT; - if (copy_from_user(&routing, argp, sizeof(routing))) - goto out; - r = -EINVAL; - if (routing.nr >= KVM_MAX_IRQ_ROUTES) - goto out; - if (routing.flags) - goto out; - r = -ENOMEM; - entries = vmalloc(routing.nr * sizeof(*entries)); - if (!entries) - goto out; - r = -EFAULT; - urouting = argp; - if (copy_from_user(entries, urouting->entries, - routing.nr * sizeof(*entries))) - goto out_free_irq_routing; - r = kvm_set_irq_routing(kvm, entries, routing.nr, - routing.flags); - out_free_irq_routing: - vfree(entries); - break; - } -#endif /* KVM_CAP_IRQ_ROUTING */ #ifdef __KVM_HAVE_MSIX case KVM_ASSIGN_SET_MSIX_NR: { struct kvm_assigned_msix_nr entry_nr; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index adb17f266b2..64ee720b75c 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -35,7 +35,7 @@ #include "iodev.h" -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING /* * -------------------------------------------------------------------- * irqfd: Allows an fd to be used to inject an interrupt to the guest @@ -100,11 +100,13 @@ irqfd_inject(struct work_struct *work) struct kvm *kvm = irqfd->kvm; if (!irqfd->resampler) { - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1, + false); + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0, + false); } else kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, - irqfd->gsi, 1); + irqfd->gsi, 1, false); } /* @@ -121,7 +123,7 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) resampler = container_of(kian, struct _irqfd_resampler, notifier); kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, - resampler->notifier.gsi, 0); + resampler->notifier.gsi, 0, false); rcu_read_lock(); @@ -146,7 +148,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd) list_del(&resampler->link); kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, - resampler->notifier.gsi, 0); + resampler->notifier.gsi, 0, false); kfree(resampler); } @@ -225,7 +227,8 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) irq = rcu_dereference(irqfd->irq_entry); /* An event has been signaled, inject an interrupt */ if (irq) - kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); + kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, + false); else schedule_work(&irqfd->inject); rcu_read_unlock(); @@ -430,7 +433,7 @@ fail: void kvm_eventfd_init(struct kvm *kvm) { -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING spin_lock_init(&kvm->irqfds.lock); INIT_LIST_HEAD(&kvm->irqfds.items); INIT_LIST_HEAD(&kvm->irqfds.resampler_list); @@ -439,7 +442,7 @@ kvm_eventfd_init(struct kvm *kvm) INIT_LIST_HEAD(&kvm->ioeventfds); } -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING /* * shutdown any irqfd's that match fd+gsi */ @@ -543,7 +546,7 @@ void kvm_irq_routing_update(struct kvm *kvm, * aggregated from all vm* instances. We need our own isolated single-thread * queue to prevent deadlock against flushing the normal work-queue. */ -static int __init irqfd_module_init(void) +int kvm_irqfd_init(void) { irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup"); if (!irqfd_cleanup_wq) @@ -552,13 +555,10 @@ static int __init irqfd_module_init(void) return 0; } -static void __exit irqfd_module_exit(void) +void kvm_irqfd_exit(void) { destroy_workqueue(irqfd_cleanup_wq); } - -module_init(irqfd_module_init); -module_exit(irqfd_module_exit); #endif /* @@ -577,6 +577,7 @@ struct _ioeventfd { struct eventfd_ctx *eventfd; u64 datamatch; struct kvm_io_device dev; + u8 bus_idx; bool wildcard; }; @@ -669,7 +670,8 @@ ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) struct _ioeventfd *_p; list_for_each_entry(_p, &kvm->ioeventfds, list) - if (_p->addr == p->addr && _p->length == p->length && + if (_p->bus_idx == p->bus_idx && + _p->addr == p->addr && _p->length == p->length && (_p->wildcard || p->wildcard || _p->datamatch == p->datamatch)) return true; @@ -677,15 +679,24 @@ ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) return false; } +static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) +{ + if (flags & KVM_IOEVENTFD_FLAG_PIO) + return KVM_PIO_BUS; + if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY) + return KVM_VIRTIO_CCW_NOTIFY_BUS; + return KVM_MMIO_BUS; +} + static int kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { - int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; - enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; + enum kvm_bus bus_idx; struct _ioeventfd *p; struct eventfd_ctx *eventfd; int ret; + bus_idx = ioeventfd_bus_from_flags(args->flags); /* must be natural-word sized */ switch (args->len) { case 1: @@ -717,6 +728,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) INIT_LIST_HEAD(&p->list); p->addr = args->addr; + p->bus_idx = bus_idx; p->length = args->len; p->eventfd = eventfd; @@ -760,12 +772,12 @@ fail: static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { - int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; - enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; + enum kvm_bus bus_idx; struct _ioeventfd *p, *tmp; struct eventfd_ctx *eventfd; int ret = -ENOENT; + bus_idx = ioeventfd_bus_from_flags(args->flags); eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) return PTR_ERR(eventfd); @@ -775,7 +787,8 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); - if (p->eventfd != eventfd || + if (p->bus_idx != bus_idx || + p->eventfd != eventfd || p->addr != args->addr || p->length != args->len || p->wildcard != wildcard) diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 5ba005c00e2..2d682977ce8 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -50,7 +50,8 @@ #else #define ioapic_debug(fmt, arg...) #endif -static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq); +static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq, + bool line_status); static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, unsigned long addr, @@ -90,7 +91,80 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, return result; } -static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) +static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) +{ + ioapic->rtc_status.pending_eoi = 0; + bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS); +} + +static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) +{ + bool new_val, old_val; + struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + union kvm_ioapic_redirect_entry *e; + + e = &ioapic->redirtbl[RTC_GSI]; + if (!kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, + e->fields.dest_mode)) + return; + + new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); + old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); + + if (new_val == old_val) + return; + + if (new_val) { + __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); + ioapic->rtc_status.pending_eoi++; + } else { + __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); + ioapic->rtc_status.pending_eoi--; + } + + WARN_ON(ioapic->rtc_status.pending_eoi < 0); +} + +void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) +{ + struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + + spin_lock(&ioapic->lock); + __rtc_irq_eoi_tracking_restore_one(vcpu); + spin_unlock(&ioapic->lock); +} + +static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) +{ + struct kvm_vcpu *vcpu; + int i; + + if (RTC_GSI >= IOAPIC_NUM_PINS) + return; + + rtc_irq_eoi_tracking_reset(ioapic); + kvm_for_each_vcpu(i, vcpu, ioapic->kvm) + __rtc_irq_eoi_tracking_restore_one(vcpu); +} + +static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu) +{ + if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) + --ioapic->rtc_status.pending_eoi; + + WARN_ON(ioapic->rtc_status.pending_eoi < 0); +} + +static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic) +{ + if (ioapic->rtc_status.pending_eoi > 0) + return true; /* coalesced */ + + return false; +} + +static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx, + bool line_status) { union kvm_ioapic_redirect_entry *pent; int injected = -1; @@ -98,7 +172,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) pent = &ioapic->redirtbl[idx]; if (!pent->fields.mask) { - injected = ioapic_deliver(ioapic, idx); + injected = ioapic_deliver(ioapic, idx, line_status); if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) pent->fields.remote_irr = 1; } @@ -119,41 +193,48 @@ static void update_handled_vectors(struct kvm_ioapic *ioapic) smp_wmb(); } -void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, - u64 *eoi_exit_bitmap) +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, + u32 *tmr) { struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; union kvm_ioapic_redirect_entry *e; - struct kvm_lapic_irq irqe; int index; spin_lock(&ioapic->lock); - /* traverse ioapic entry to set eoi exit bitmap*/ for (index = 0; index < IOAPIC_NUM_PINS; index++) { e = &ioapic->redirtbl[index]; if (!e->fields.mask && (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, - index))) { - irqe.dest_id = e->fields.dest_id; - irqe.vector = e->fields.vector; - irqe.dest_mode = e->fields.dest_mode; - irqe.delivery_mode = e->fields.delivery_mode << 8; - kvm_calculate_eoi_exitmap(vcpu, &irqe, eoi_exit_bitmap); + index) || index == RTC_GSI)) { + if (kvm_apic_match_dest(vcpu, NULL, 0, + e->fields.dest_id, e->fields.dest_mode)) { + __set_bit(e->fields.vector, + (unsigned long *)eoi_exit_bitmap); + if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG) + __set_bit(e->fields.vector, + (unsigned long *)tmr); + } } } spin_unlock(&ioapic->lock); } -EXPORT_SYMBOL_GPL(kvm_ioapic_calculate_eoi_exitmap); -void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm) +#ifdef CONFIG_X86 +void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; - if (!kvm_apic_vid_enabled(kvm) || !ioapic) + if (!ioapic) return; - kvm_make_update_eoibitmap_request(kvm); + kvm_make_scan_ioapic_request(kvm); } +#else +void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) +{ + return; +} +#endif static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { @@ -195,16 +276,17 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG && ioapic->irr & (1 << index)) - ioapic_service(ioapic, index); - kvm_ioapic_make_eoibitmap_request(ioapic->kvm); + ioapic_service(ioapic, index, false); + kvm_vcpu_request_scan_ioapic(ioapic->kvm); break; } } -static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) +static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status) { union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; struct kvm_lapic_irq irqe; + int ret; ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " "vector=%x trig_mode=%x\n", @@ -220,11 +302,19 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) irqe.level = 1; irqe.shorthand = 0; - return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); + if (irq == RTC_GSI && line_status) { + BUG_ON(ioapic->rtc_status.pending_eoi != 0); + ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, + ioapic->rtc_status.dest_map); + ioapic->rtc_status.pending_eoi = ret; + } else + ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); + + return ret; } int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, - int level) + int level, bool line_status) { u32 old_irr; u32 mask = 1 << irq; @@ -244,13 +334,20 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, ret = 1; } else { int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); + + if (irq == RTC_GSI && line_status && + rtc_irq_check_coalesced(ioapic)) { + ret = 0; /* coalesced */ + goto out; + } ioapic->irr |= mask; if ((edge && old_irr != ioapic->irr) || (!edge && !entry.fields.remote_irr)) - ret = ioapic_service(ioapic, irq); + ret = ioapic_service(ioapic, irq, line_status); else ret = 0; /* report coalesced interrupt */ } +out: trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); spin_unlock(&ioapic->lock); @@ -267,8 +364,8 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id) spin_unlock(&ioapic->lock); } -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, - int trigger_mode) +static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, + struct kvm_ioapic *ioapic, int vector, int trigger_mode) { int i; @@ -278,6 +375,8 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, if (ent->fields.vector != vector) continue; + if (i == RTC_GSI) + rtc_irq_eoi(ioapic, vcpu); /* * We are dropping lock while calling ack notifiers because ack * notifier callbacks for assigned devices call into IOAPIC @@ -296,7 +395,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); ent->fields.remote_irr = 0; if (!ent->fields.mask && (ioapic->irr & (1 << i))) - ioapic_service(ioapic, i); + ioapic_service(ioapic, i, false); } } @@ -307,12 +406,12 @@ bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) return test_bit(vector, ioapic->handled_vectors); } -void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) +void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode) { - struct kvm_ioapic *ioapic = kvm->arch.vioapic; + struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; spin_lock(&ioapic->lock); - __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); + __kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode); spin_unlock(&ioapic->lock); } @@ -410,7 +509,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, break; #ifdef CONFIG_IA64 case IOAPIC_REG_EOI: - __kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG); + __kvm_ioapic_update_eoi(NULL, ioapic, data, IOAPIC_LEVEL_TRIG); break; #endif @@ -431,6 +530,7 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic) ioapic->ioregsel = 0; ioapic->irr = 0; ioapic->id = 0; + rtc_irq_eoi_tracking_reset(ioapic); update_handled_vectors(ioapic); } @@ -496,7 +596,8 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) spin_lock(&ioapic->lock); memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); update_handled_vectors(ioapic); - kvm_ioapic_make_eoibitmap_request(kvm); + kvm_vcpu_request_scan_ioapic(kvm); + kvm_rtc_eoi_tracking_restore_all(ioapic); spin_unlock(&ioapic->lock); return 0; } diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 0400a466c50..615d8c995c3 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -34,6 +34,17 @@ struct kvm_vcpu; #define IOAPIC_INIT 0x5 #define IOAPIC_EXTINT 0x7 +#ifdef CONFIG_X86 +#define RTC_GSI 8 +#else +#define RTC_GSI -1U +#endif + +struct rtc_status { + int pending_eoi; + DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS); +}; + struct kvm_ioapic { u64 base_address; u32 ioregsel; @@ -47,6 +58,7 @@ struct kvm_ioapic { void (*ack_notifier)(void *opaque, int irq); spinlock_t lock; DECLARE_BITMAP(handled_vectors, 256); + struct rtc_status rtc_status; }; #ifdef DEBUG @@ -67,24 +79,25 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) return kvm->arch.vioapic; } +void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int short_hand, int dest, int dest_mode); int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); -void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); +void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, + int trigger_mode); bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector); int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_destroy(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, - int level); + int level, bool line_status); void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); void kvm_ioapic_reset(struct kvm_ioapic *ioapic); int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq); + struct kvm_lapic_irq *irq, unsigned long *dest_map); int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); -void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm); -void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, - u64 *eoi_exit_bitmap); - +void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, + u32 *tmr); #endif diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index e9073cf4d04..e2e6b4473a9 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -35,7 +35,8 @@ #include "ioapic.h" static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, int level) + struct kvm *kvm, int irq_source_id, int level, + bool line_status) { #ifdef CONFIG_X86 struct kvm_pic *pic = pic_irqchip(kvm); @@ -46,10 +47,12 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, } static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, int level) + struct kvm *kvm, int irq_source_id, int level, + bool line_status) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; - return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level); + return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level, + line_status); } inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) @@ -63,7 +66,7 @@ inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) } int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq) + struct kvm_lapic_irq *irq, unsigned long *dest_map) { int i, r = -1; struct kvm_vcpu *vcpu, *lowest = NULL; @@ -74,7 +77,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, irq->delivery_mode = APIC_DM_FIXED; } - if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r)) + if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) return r; kvm_for_each_vcpu(i, vcpu, kvm) { @@ -88,7 +91,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, if (!kvm_is_dm_lowest_prio(irq)) { if (r < 0) r = 0; - r += kvm_apic_set_irq(vcpu, irq); + r += kvm_apic_set_irq(vcpu, irq, dest_map); } else if (kvm_lapic_enabled(vcpu)) { if (!lowest) lowest = vcpu; @@ -98,7 +101,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, } if (lowest) - r = kvm_apic_set_irq(lowest, irq); + r = kvm_apic_set_irq(lowest, irq, dest_map); return r; } @@ -121,7 +124,7 @@ static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, } int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, int level) + struct kvm *kvm, int irq_source_id, int level, bool line_status) { struct kvm_lapic_irq irq; @@ -130,7 +133,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, kvm_set_msi_irq(e, &irq); - return kvm_irq_delivery_to_apic(kvm, NULL, &irq); + return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL); } @@ -142,63 +145,12 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, kvm_set_msi_irq(e, &irq); - if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r)) + if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) return r; else return -EWOULDBLOCK; } -int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) -{ - struct kvm_kernel_irq_routing_entry route; - - if (!irqchip_in_kernel(kvm) || msi->flags != 0) - return -EINVAL; - - route.msi.address_lo = msi->address_lo; - route.msi.address_hi = msi->address_hi; - route.msi.data = msi->data; - - return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); -} - -/* - * Return value: - * < 0 Interrupt was ignored (masked or not delivered for other reasons) - * = 0 Interrupt was coalesced (previous irq is still pending) - * > 0 Number of CPUs interrupt was delivered to - */ -int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) -{ - struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; - int ret = -1, i = 0; - struct kvm_irq_routing_table *irq_rt; - - trace_kvm_set_irq(irq, level, irq_source_id); - - /* Not possible to detect if the guest uses the PIC or the - * IOAPIC. So set the bit in both. The guest will ignore - * writes to the unused one. - */ - rcu_read_lock(); - irq_rt = rcu_dereference(kvm->irq_routing); - if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, &irq_rt->map[irq], link) - irq_set[i++] = *e; - rcu_read_unlock(); - - while(i--) { - int r; - r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level); - if (r < 0) - continue; - - ret = r + ((ret < 0) ? 0 : ret); - } - - return ret; -} - /* * Deliver an IRQ in an atomic context if we can, or return a failure, * user can retry in a process context. @@ -236,63 +188,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) return ret; } -bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi; - - rcu_read_lock(); - gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) { - rcu_read_unlock(); - return true; - } - - rcu_read_unlock(); - - return false; -} -EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); - -void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi; - - trace_kvm_ack_irq(irqchip, pin); - - rcu_read_lock(); - gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) - kian->irq_acked(kian); - rcu_read_unlock(); -} - -void kvm_register_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); - mutex_unlock(&kvm->irq_lock); - kvm_ioapic_make_eoibitmap_request(kvm); -} - -void kvm_unregister_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_del_init_rcu(&kian->link); - mutex_unlock(&kvm->irq_lock); - synchronize_rcu(); - kvm_ioapic_make_eoibitmap_request(kvm); -} - int kvm_request_irq_source_id(struct kvm *kvm) { unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; @@ -376,34 +271,14 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, rcu_read_unlock(); } -void kvm_free_irq_routing(struct kvm *kvm) -{ - /* Called only during vm destruction. Nobody can use the pointer - at this stage */ - kfree(kvm->irq_routing); -} - -static int setup_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, - const struct kvm_irq_routing_entry *ue) +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; int delta; unsigned max_pin; - struct kvm_kernel_irq_routing_entry *ei; - /* - * Do not allow GSI to be mapped to the same irqchip more than once. - * Allow only one to one mapping between GSI and MSI. - */ - hlist_for_each_entry(ei, &rt->map[ue->gsi], link) - if (ei->type == KVM_IRQ_ROUTING_MSI || - ue->type == KVM_IRQ_ROUTING_MSI || - ue->u.irqchip.irqchip == ei->irqchip.irqchip) - return r; - - e->gsi = ue->gsi; - e->type = ue->type; switch (ue->type) { case KVM_IRQ_ROUTING_IRQCHIP: delta = 0; @@ -440,69 +315,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, goto out; } - hlist_add_head(&e->link, &rt->map[e->gsi]); r = 0; out: return r; } - -int kvm_set_irq_routing(struct kvm *kvm, - const struct kvm_irq_routing_entry *ue, - unsigned nr, - unsigned flags) -{ - struct kvm_irq_routing_table *new, *old; - u32 i, j, nr_rt_entries = 0; - int r; - - for (i = 0; i < nr; ++i) { - if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) - return -EINVAL; - nr_rt_entries = max(nr_rt_entries, ue[i].gsi); - } - - nr_rt_entries += 1; - - new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) - + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), - GFP_KERNEL); - - if (!new) - return -ENOMEM; - - new->rt_entries = (void *)&new->map[nr_rt_entries]; - - new->nr_rt_entries = nr_rt_entries; - for (i = 0; i < 3; i++) - for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++) - new->chip[i][j] = -1; - - for (i = 0; i < nr; ++i) { - r = -EINVAL; - if (ue->flags) - goto out; - r = setup_routing_entry(new, &new->rt_entries[i], ue); - if (r) - goto out; - ++ue; - } - - mutex_lock(&kvm->irq_lock); - old = kvm->irq_routing; - kvm_irq_routing_update(kvm, new); - mutex_unlock(&kvm->irq_lock); - - synchronize_rcu(); - - new = old; - r = 0; - -out: - kfree(new); - return r; -} - #define IOAPIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) } diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c new file mode 100644 index 00000000000..20dc9e4a8f6 --- /dev/null +++ b/virt/kvm/irqchip.c @@ -0,0 +1,237 @@ +/* + * irqchip.c: Common API for in kernel interrupt controllers + * Copyright (c) 2007, Intel Corporation. + * Copyright 2010 Red Hat, Inc. and/or its affiliates. + * Copyright (c) 2013, Alexander Graf <agraf@suse.de> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * This file is derived from virt/kvm/irq_comm.c. + * + * Authors: + * Yaozu (Eddie) Dong <Eddie.dong@intel.com> + * Alexander Graf <agraf@suse.de> + */ + +#include <linux/kvm_host.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <trace/events/kvm.h> +#include "irq.h" + +bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi; + + rcu_read_lock(); + gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) { + rcu_read_unlock(); + return true; + } + + rcu_read_unlock(); + + return false; +} +EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi; + + trace_kvm_ack_irq(irqchip, pin); + + rcu_read_lock(); + gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); + rcu_read_unlock(); +} + +void kvm_register_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); + mutex_unlock(&kvm->irq_lock); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} + +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_del_init_rcu(&kian->link); + mutex_unlock(&kvm->irq_lock); + synchronize_rcu(); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} + +int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) +{ + struct kvm_kernel_irq_routing_entry route; + + if (!irqchip_in_kernel(kvm) || msi->flags != 0) + return -EINVAL; + + route.msi.address_lo = msi->address_lo; + route.msi.address_hi = msi->address_hi; + route.msi.data = msi->data; + + return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); +} + +/* + * Return value: + * < 0 Interrupt was ignored (masked or not delivered for other reasons) + * = 0 Interrupt was coalesced (previous irq is still pending) + * > 0 Number of CPUs interrupt was delivered to + */ +int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, + bool line_status) +{ + struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; + int ret = -1, i = 0; + struct kvm_irq_routing_table *irq_rt; + + trace_kvm_set_irq(irq, level, irq_source_id); + + /* Not possible to detect if the guest uses the PIC or the + * IOAPIC. So set the bit in both. The guest will ignore + * writes to the unused one. + */ + rcu_read_lock(); + irq_rt = rcu_dereference(kvm->irq_routing); + if (irq < irq_rt->nr_rt_entries) + hlist_for_each_entry(e, &irq_rt->map[irq], link) + irq_set[i++] = *e; + rcu_read_unlock(); + + while(i--) { + int r; + r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level, + line_status); + if (r < 0) + continue; + + ret = r + ((ret < 0) ? 0 : ret); + } + + return ret; +} + +void kvm_free_irq_routing(struct kvm *kvm) +{ + /* Called only during vm destruction. Nobody can use the pointer + at this stage */ + kfree(kvm->irq_routing); +} + +static int setup_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + struct kvm_kernel_irq_routing_entry *ei; + + /* + * Do not allow GSI to be mapped to the same irqchip more than once. + * Allow only one to one mapping between GSI and MSI. + */ + hlist_for_each_entry(ei, &rt->map[ue->gsi], link) + if (ei->type == KVM_IRQ_ROUTING_MSI || + ue->type == KVM_IRQ_ROUTING_MSI || + ue->u.irqchip.irqchip == ei->irqchip.irqchip) + return r; + + e->gsi = ue->gsi; + e->type = ue->type; + r = kvm_set_routing_entry(rt, e, ue); + if (r) + goto out; + + hlist_add_head(&e->link, &rt->map[e->gsi]); + r = 0; +out: + return r; +} + +int kvm_set_irq_routing(struct kvm *kvm, + const struct kvm_irq_routing_entry *ue, + unsigned nr, + unsigned flags) +{ + struct kvm_irq_routing_table *new, *old; + u32 i, j, nr_rt_entries = 0; + int r; + + for (i = 0; i < nr; ++i) { + if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) + return -EINVAL; + nr_rt_entries = max(nr_rt_entries, ue[i].gsi); + } + + nr_rt_entries += 1; + + new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) + + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), + GFP_KERNEL); + + if (!new) + return -ENOMEM; + + new->rt_entries = (void *)&new->map[nr_rt_entries]; + + new->nr_rt_entries = nr_rt_entries; + for (i = 0; i < KVM_NR_IRQCHIPS; i++) + for (j = 0; j < KVM_IRQCHIP_NUM_PINS; j++) + new->chip[i][j] = -1; + + for (i = 0; i < nr; ++i) { + r = -EINVAL; + if (ue->flags) + goto out; + r = setup_routing_entry(new, &new->rt_entries[i], ue); + if (r) + goto out; + ++ue; + } + + mutex_lock(&kvm->irq_lock); + old = kvm->irq_routing; + kvm_irq_routing_update(kvm, new); + mutex_unlock(&kvm->irq_lock); + + synchronize_rcu(); + + new = old; + r = 0; + +out: + kfree(new); + return r; +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f18013f09e6..45f09362ee7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -217,9 +217,9 @@ void kvm_make_mclock_inprogress_request(struct kvm *kvm) make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); } -void kvm_make_update_eoibitmap_request(struct kvm *kvm) +void kvm_make_scan_ioapic_request(struct kvm *kvm) { - make_all_cpus_request(kvm, KVM_REQ_EOIBITMAP); + make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); } int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) @@ -244,6 +244,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) kvm_vcpu_set_in_spin_loop(vcpu, false); kvm_vcpu_set_dy_eligible(vcpu, false); + vcpu->preempted = false; r = kvm_arch_vcpu_init(vcpu); if (r < 0) @@ -503,6 +504,7 @@ static struct kvm *kvm_create_vm(unsigned long type) mutex_init(&kvm->irq_lock); mutex_init(&kvm->slots_lock); atomic_set(&kvm->users_count, 1); + INIT_LIST_HEAD(&kvm->devices); r = kvm_init_mmu_notifier(kvm); if (r) @@ -580,6 +582,19 @@ void kvm_free_physmem(struct kvm *kvm) kfree(kvm->memslots); } +static void kvm_destroy_devices(struct kvm *kvm) +{ + struct list_head *node, *tmp; + + list_for_each_safe(node, tmp, &kvm->devices) { + struct kvm_device *dev = + list_entry(node, struct kvm_device, vm_node); + + list_del(node); + dev->ops->destroy(dev); + } +} + static void kvm_destroy_vm(struct kvm *kvm) { int i; @@ -599,6 +614,7 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_arch_flush_shadow_all(kvm); #endif kvm_arch_destroy_vm(kvm); + kvm_destroy_devices(kvm); kvm_free_physmem(kvm); cleanup_srcu_struct(&kvm->srcu); kvm_arch_free_vm(kvm); @@ -719,24 +735,6 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, } /* - * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: - * - create a new memory slot - * - delete an existing memory slot - * - modify an existing memory slot - * -- move it in the guest physical memory space - * -- just change its flags - * - * Since flags can be changed by some of these operations, the following - * differentiation is the best we can do for __kvm_set_memory_region(): - */ -enum kvm_mr_change { - KVM_MR_CREATE, - KVM_MR_DELETE, - KVM_MR_MOVE, - KVM_MR_FLAGS_ONLY, -}; - -/* * Allocate some memory and give it an address in the guest physical address * space. * @@ -745,8 +743,7 @@ enum kvm_mr_change { * Must be called holding mmap_sem for write. */ int __kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { int r; gfn_t base_gfn; @@ -767,7 +764,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if (mem->guest_phys_addr & (PAGE_SIZE - 1)) goto out; /* We can read the guest memory with __xxx_user() later on. */ - if (user_alloc && + if ((mem->slot < KVM_USER_MEM_SLOTS) && ((mem->userspace_addr & (PAGE_SIZE - 1)) || !access_ok(VERIFY_WRITE, (void __user *)(unsigned long)mem->userspace_addr, @@ -875,7 +872,7 @@ int __kvm_set_memory_region(struct kvm *kvm, slots = old_memslots; } - r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); + r = kvm_arch_prepare_memory_region(kvm, &new, mem, change); if (r) goto out_slots; @@ -915,7 +912,7 @@ int __kvm_set_memory_region(struct kvm *kvm, old_memslots = install_new_memslots(kvm, slots, &new); - kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); + kvm_arch_commit_memory_region(kvm, mem, &old, change); kvm_free_physmem_slot(&old, &new); kfree(old_memslots); @@ -932,26 +929,23 @@ out: EXPORT_SYMBOL_GPL(__kvm_set_memory_region); int kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { int r; mutex_lock(&kvm->slots_lock); - r = __kvm_set_memory_region(kvm, mem, user_alloc); + r = __kvm_set_memory_region(kvm, mem); mutex_unlock(&kvm->slots_lock); return r; } EXPORT_SYMBOL_GPL(kvm_set_memory_region); int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, - struct - kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { if (mem->slot >= KVM_USER_MEM_SLOTS) return -EINVAL; - return kvm_set_memory_region(kvm, mem, user_alloc); + return kvm_set_memory_region(kvm, mem); } int kvm_get_dirty_log(struct kvm *kvm, @@ -1099,7 +1093,7 @@ static int kvm_read_hva_atomic(void *data, void __user *hva, int len) return __copy_from_user_inatomic(data, hva, len); } -int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, +static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int write, struct page **page) { int flags = FOLL_TOUCH | FOLL_NOWAIT | FOLL_HWPOISON | FOLL_GET; @@ -1719,6 +1713,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) smp_send_reschedule(cpu); put_cpu(); } +EXPORT_SYMBOL_GPL(kvm_vcpu_kick); #endif /* !CONFIG_S390 */ void kvm_resched(struct kvm_vcpu *vcpu) @@ -1816,6 +1811,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; } else if (pass && i > last_boosted_vcpu) break; + if (!ACCESS_ONCE(vcpu->preempted)) + continue; if (vcpu == me) continue; if (waitqueue_active(&vcpu->wq)) @@ -2204,6 +2201,119 @@ out: } #endif +static int kvm_device_ioctl_attr(struct kvm_device *dev, + int (*accessor)(struct kvm_device *dev, + struct kvm_device_attr *attr), + unsigned long arg) +{ + struct kvm_device_attr attr; + + if (!accessor) + return -EPERM; + + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) + return -EFAULT; + + return accessor(dev, &attr); +} + +static long kvm_device_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + struct kvm_device *dev = filp->private_data; + + switch (ioctl) { + case KVM_SET_DEVICE_ATTR: + return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); + case KVM_GET_DEVICE_ATTR: + return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg); + case KVM_HAS_DEVICE_ATTR: + return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg); + default: + if (dev->ops->ioctl) + return dev->ops->ioctl(dev, ioctl, arg); + + return -ENOTTY; + } +} + +static int kvm_device_release(struct inode *inode, struct file *filp) +{ + struct kvm_device *dev = filp->private_data; + struct kvm *kvm = dev->kvm; + + kvm_put_kvm(kvm); + return 0; +} + +static const struct file_operations kvm_device_fops = { + .unlocked_ioctl = kvm_device_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = kvm_device_ioctl, +#endif + .release = kvm_device_release, +}; + +struct kvm_device *kvm_device_from_filp(struct file *filp) +{ + if (filp->f_op != &kvm_device_fops) + return NULL; + + return filp->private_data; +} + +static int kvm_ioctl_create_device(struct kvm *kvm, + struct kvm_create_device *cd) +{ + struct kvm_device_ops *ops = NULL; + struct kvm_device *dev; + bool test = cd->flags & KVM_CREATE_DEVICE_TEST; + int ret; + + switch (cd->type) { +#ifdef CONFIG_KVM_MPIC + case KVM_DEV_TYPE_FSL_MPIC_20: + case KVM_DEV_TYPE_FSL_MPIC_42: + ops = &kvm_mpic_ops; + break; +#endif +#ifdef CONFIG_KVM_XICS + case KVM_DEV_TYPE_XICS: + ops = &kvm_xics_ops; + break; +#endif + default: + return -ENODEV; + } + + if (test) + return 0; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + dev->ops = ops; + dev->kvm = kvm; + + ret = ops->create(dev, cd->type); + if (ret < 0) { + kfree(dev); + return ret; + } + + ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR); + if (ret < 0) { + ops->destroy(dev); + return ret; + } + + list_add(&dev->vm_node, &kvm->devices); + kvm_get_kvm(kvm); + cd->fd = ret; + return 0; +} + static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2225,7 +2335,7 @@ static long kvm_vm_ioctl(struct file *filp, sizeof kvm_userspace_mem)) goto out; - r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, true); + r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem); break; } case KVM_GET_DIRTY_LOG: { @@ -2304,7 +2414,8 @@ static long kvm_vm_ioctl(struct file *filp, if (copy_from_user(&irq_event, argp, sizeof irq_event)) goto out; - r = kvm_vm_ioctl_irq_line(kvm, &irq_event); + r = kvm_vm_ioctl_irq_line(kvm, &irq_event, + ioctl == KVM_IRQ_LINE_STATUS); if (r) goto out; @@ -2318,6 +2429,54 @@ static long kvm_vm_ioctl(struct file *filp, break; } #endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_SET_GSI_ROUTING: { + struct kvm_irq_routing routing; + struct kvm_irq_routing __user *urouting; + struct kvm_irq_routing_entry *entries; + + r = -EFAULT; + if (copy_from_user(&routing, argp, sizeof(routing))) + goto out; + r = -EINVAL; + if (routing.nr >= KVM_MAX_IRQ_ROUTES) + goto out; + if (routing.flags) + goto out; + r = -ENOMEM; + entries = vmalloc(routing.nr * sizeof(*entries)); + if (!entries) + goto out; + r = -EFAULT; + urouting = argp; + if (copy_from_user(entries, urouting->entries, + routing.nr * sizeof(*entries))) + goto out_free_irq_routing; + r = kvm_set_irq_routing(kvm, entries, routing.nr, + routing.flags); + out_free_irq_routing: + vfree(entries); + break; + } +#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */ + case KVM_CREATE_DEVICE: { + struct kvm_create_device cd; + + r = -EFAULT; + if (copy_from_user(&cd, argp, sizeof(cd))) + goto out; + + r = kvm_ioctl_create_device(kvm, &cd); + if (r) + goto out; + + r = -EFAULT; + if (copy_to_user(argp, &cd, sizeof(cd))) + goto out; + + r = 0; + break; + } default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); if (r == -ENOTTY) @@ -2447,8 +2606,11 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) #ifdef CONFIG_HAVE_KVM_MSI case KVM_CAP_SIGNAL_MSI: #endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_CAP_IRQFD_RESAMPLE: +#endif return 1; -#ifdef KVM_CAP_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING case KVM_CAP_IRQ_ROUTING: return KVM_MAX_IRQ_ROUTES; #endif @@ -2618,14 +2780,6 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, return NOTIFY_OK; } - -asmlinkage void kvm_spurious_fault(void) -{ - /* Fault while not rebooting. We want the trace. */ - BUG(); -} -EXPORT_SYMBOL_GPL(kvm_spurious_fault); - static int kvm_reboot(struct notifier_block *notifier, unsigned long val, void *v) { @@ -2658,7 +2812,7 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) kfree(bus); } -int kvm_io_bus_sort_cmp(const void *p1, const void *p2) +static int kvm_io_bus_sort_cmp(const void *p1, const void *p2) { const struct kvm_io_range *r1 = p1; const struct kvm_io_range *r2 = p2; @@ -2670,7 +2824,7 @@ int kvm_io_bus_sort_cmp(const void *p1, const void *p2) return 0; } -int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, +static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, gpa_t addr, int len) { bus->range[bus->dev_count++] = (struct kvm_io_range) { @@ -2685,7 +2839,7 @@ int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, return 0; } -int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, +static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, gpa_t addr, int len) { struct kvm_io_range *range, key; @@ -2929,6 +3083,8 @@ struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) static void kvm_sched_in(struct preempt_notifier *pn, int cpu) { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + if (vcpu->preempted) + vcpu->preempted = false; kvm_arch_vcpu_load(vcpu, cpu); } @@ -2938,6 +3094,8 @@ static void kvm_sched_out(struct preempt_notifier *pn, { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + if (current->state == TASK_RUNNING) + vcpu->preempted = true; kvm_arch_vcpu_put(vcpu); } @@ -2947,6 +3105,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, int r; int cpu; + r = kvm_irqfd_init(); + if (r) + goto out_irqfd; r = kvm_arch_init(opaque); if (r) goto out_fail; @@ -3027,6 +3188,8 @@ out_free_0a: out_free_0: kvm_arch_exit(); out_fail: + kvm_irqfd_exit(); +out_irqfd: return r; } EXPORT_SYMBOL_GPL(kvm_init); @@ -3043,6 +3206,7 @@ void kvm_exit(void) on_each_cpu(hardware_disable_nolock, NULL, 1); kvm_arch_hardware_unsetup(); kvm_arch_exit(); + kvm_irqfd_exit(); free_cpumask_var(cpus_hardware_enabled); } EXPORT_SYMBOL_GPL(kvm_exit); |