diff options
430 files changed, 6723 insertions, 3104 deletions
diff --git a/Documentation/block/null_blk.txt b/Documentation/block/null_blk.txt new file mode 100644 index 00000000000..b2830b43589 --- /dev/null +++ b/Documentation/block/null_blk.txt @@ -0,0 +1,72 @@ +Null block device driver +================================================================================ + +I. Overview + +The null block device (/dev/nullb*) is used for benchmarking the various +block-layer implementations. It emulates a block device of X gigabytes in size. +The following instances are possible: + + Single-queue block-layer + - Request-based. + - Single submission queue per device. + - Implements IO scheduling algorithms (CFQ, Deadline, noop). + Multi-queue block-layer + - Request-based. + - Configurable submission queues per device. + No block-layer (Known as bio-based) + - Bio-based. IO requests are submitted directly to the device driver. + - Directly accepts bio data structure and returns them. + +All of them have a completion queue for each core in the system. + +II. Module parameters applicable for all instances: + +queue_mode=[0-2]: Default: 2-Multi-queue + Selects which block-layer the module should instantiate with. + + 0: Bio-based. + 1: Single-queue. + 2: Multi-queue. + +home_node=[0--nr_nodes]: Default: NUMA_NO_NODE + Selects what CPU node the data structures are allocated from. + +gb=[Size in GB]: Default: 250GB + The size of the device reported to the system. + +bs=[Block size (in bytes)]: Default: 512 bytes + The block size reported to the system. + +nr_devices=[Number of devices]: Default: 2 + Number of block devices instantiated. They are instantiated as /dev/nullb0, + etc. + +irq_mode=[0-2]: Default: 1-Soft-irq + The completion mode used for completing IOs to the block-layer. + + 0: None. + 1: Soft-irq. Uses IPI to complete IOs across CPU nodes. Simulates the overhead + when IOs are issued from another CPU node than the home the device is + connected to. + 2: Timer: Waits a specific period (completion_nsec) for each IO before + completion. + +completion_nsec=[ns]: Default: 10.000ns + Combined with irq_mode=2 (timer). The time each completion event must wait. + +submit_queues=[0..nr_cpus]: + The number of submission queues attached to the device driver. If unset, it + defaults to 1 on single-queue and bio-based instances. For multi-queue, + it is ignored when use_per_node_hctx module parameter is 1. + +hw_queue_depth=[0..qdepth]: Default: 64 + The hardware queue depth of the device. + +III: Multi-queue specific parameters + +use_per_node_hctx=[0/1]: Default: 0 + 0: The number of submit queues are set to the value of the submit_queues + parameter. + 1: The multi-queue block layer is instantiated with a hardware dispatch + queue for each CPU node in the system. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 50680a59a2f..b9e9bd85429 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1529,6 +1529,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. * atapi_dmadir: Enable ATAPI DMADIR bridge support + * disable: Disable this device. + If there are multiple matching configurations changing the same attribute, the last one is used. diff --git a/Documentation/module-signing.txt b/Documentation/module-signing.txt new file mode 100644 index 00000000000..2b40e04d3c4 --- /dev/null +++ b/Documentation/module-signing.txt @@ -0,0 +1,240 @@ + ============================== + KERNEL MODULE SIGNING FACILITY + ============================== + +CONTENTS + + - Overview. + - Configuring module signing. + - Generating signing keys. + - Public keys in the kernel. + - Manually signing modules. + - Signed modules and stripping. + - Loading signed modules. + - Non-valid signatures and unsigned modules. + - Administering/protecting the private key. + + +======== +OVERVIEW +======== + +The kernel module signing facility cryptographically signs modules during +installation and then checks the signature upon loading the module. This +allows increased kernel security by disallowing the loading of unsigned modules +or modules signed with an invalid key. Module signing increases security by +making it harder to load a malicious module into the kernel. The module +signature checking is done by the kernel so that it is not necessary to have +trusted userspace bits. + +This facility uses X.509 ITU-T standard certificates to encode the public keys +involved. The signatures are not themselves encoded in any industrial standard +type. The facility currently only supports the RSA public key encryption +standard (though it is pluggable and permits others to be used). The possible +hash algorithms that can be used are SHA-1, SHA-224, SHA-256, SHA-384, and +SHA-512 (the algorithm is selected by data in the signature). + + +========================== +CONFIGURING MODULE SIGNING +========================== + +The module signing facility is enabled by going to the "Enable Loadable Module +Support" section of the kernel configuration and turning on + + CONFIG_MODULE_SIG "Module signature verification" + +This has a number of options available: + + (1) "Require modules to be validly signed" (CONFIG_MODULE_SIG_FORCE) + + This specifies how the kernel should deal with a module that has a + signature for which the key is not known or a module that is unsigned. + + If this is off (ie. "permissive"), then modules for which the key is not + available and modules that are unsigned are permitted, but the kernel will + be marked as being tainted. + + If this is on (ie. "restrictive"), only modules that have a valid + signature that can be verified by a public key in the kernel's possession + will be loaded. All other modules will generate an error. + + Irrespective of the setting here, if the module has a signature block that + cannot be parsed, it will be rejected out of hand. + + + (2) "Automatically sign all modules" (CONFIG_MODULE_SIG_ALL) + + If this is on then modules will be automatically signed during the + modules_install phase of a build. If this is off, then the modules must + be signed manually using: + + scripts/sign-file + + + (3) "Which hash algorithm should modules be signed with?" + + This presents a choice of which hash algorithm the installation phase will + sign the modules with: + + CONFIG_SIG_SHA1 "Sign modules with SHA-1" + CONFIG_SIG_SHA224 "Sign modules with SHA-224" + CONFIG_SIG_SHA256 "Sign modules with SHA-256" + CONFIG_SIG_SHA384 "Sign modules with SHA-384" + CONFIG_SIG_SHA512 "Sign modules with SHA-512" + + The algorithm selected here will also be built into the kernel (rather + than being a module) so that modules signed with that algorithm can have + their signatures checked without causing a dependency loop. + + +======================= +GENERATING SIGNING KEYS +======================= + +Cryptographic keypairs are required to generate and check signatures. A +private key is used to generate a signature and the corresponding public key is +used to check it. The private key is only needed during the build, after which +it can be deleted or stored securely. The public key gets built into the +kernel so that it can be used to check the signatures as the modules are +loaded. + +Under normal conditions, the kernel build will automatically generate a new +keypair using openssl if one does not exist in the files: + + signing_key.priv + signing_key.x509 + +during the building of vmlinux (the public part of the key needs to be built +into vmlinux) using parameters in the: + + x509.genkey + +file (which is also generated if it does not already exist). + +It is strongly recommended that you provide your own x509.genkey file. + +Most notably, in the x509.genkey file, the req_distinguished_name section +should be altered from the default: + + [ req_distinguished_name ] + O = Magrathea + CN = Glacier signing key + emailAddress = slartibartfast@magrathea.h2g2 + +The generated RSA key size can also be set with: + + [ req ] + default_bits = 4096 + + +It is also possible to manually generate the key private/public files using the +x509.genkey key generation configuration file in the root node of the Linux +kernel sources tree and the openssl command. The following is an example to +generate the public/private key files: + + openssl req -new -nodes -utf8 -sha256 -days 36500 -batch -x509 \ + -config x509.genkey -outform DER -out signing_key.x509 \ + -keyout signing_key.priv + + +========================= +PUBLIC KEYS IN THE KERNEL +========================= + +The kernel contains a ring of public keys that can be viewed by root. They're +in a keyring called ".system_keyring" that can be seen by: + + [root@deneb ~]# cat /proc/keys + ... + 223c7853 I------ 1 perm 1f030000 0 0 keyring .system_keyring: 1 + 302d2d52 I------ 1 perm 1f010000 0 0 asymmetri Fedora kernel signing key: d69a84e6bce3d216b979e9505b3e3ef9a7118079: X509.RSA a7118079 [] + ... + +Beyond the public key generated specifically for module signing, any file +placed in the kernel source root directory or the kernel build root directory +whose name is suffixed with ".x509" will be assumed to be an X.509 public key +and will be added to the keyring. + +Further, the architecture code may take public keys from a hardware store and +add those in also (e.g. from the UEFI key database). + +Finally, it is possible to add additional public keys by doing: + + keyctl padd asymmetric "" [.system_keyring-ID] <[key-file] + +e.g.: + + keyctl padd asymmetric "" 0x223c7853 <my_public_key.x509 + +Note, however, that the kernel will only permit keys to be added to +.system_keyring _if_ the new key's X.509 wrapper is validly signed by a key +that is already resident in the .system_keyring at the time the key was added. + + +========================= +MANUALLY SIGNING MODULES +========================= + +To manually sign a module, use the scripts/sign-file tool available in +the Linux kernel source tree. The script requires 4 arguments: + + 1. The hash algorithm (e.g., sha256) + 2. The private key filename + 3. The public key filename + 4. The kernel module to be signed + +The following is an example to sign a kernel module: + + scripts/sign-file sha512 kernel-signkey.priv \ + kernel-signkey.x509 module.ko + +The hash algorithm used does not have to match the one configured, but if it +doesn't, you should make sure that hash algorithm is either built into the +kernel or can be loaded without requiring itself. + + +============================ +SIGNED MODULES AND STRIPPING +============================ + +A signed module has a digital signature simply appended at the end. The string +"~Module signature appended~." at the end of the module's file confirms that a +signature is present but it does not confirm that the signature is valid! + +Signed modules are BRITTLE as the signature is outside of the defined ELF +container. Thus they MAY NOT be stripped once the signature is computed and +attached. Note the entire module is the signed payload, including any and all +debug information present at the time of signing. + + +====================== +LOADING SIGNED MODULES +====================== + +Modules are loaded with insmod, modprobe, init_module() or finit_module(), +exactly as for unsigned modules as no processing is done in userspace. The +signature checking is all done within the kernel. + + +========================================= +NON-VALID SIGNATURES AND UNSIGNED MODULES +========================================= + +If CONFIG_MODULE_SIG_FORCE is enabled or enforcemodulesig=1 is supplied on +the kernel command line, the kernel will only load validly signed modules +for which it has a public key. Otherwise, it will also load modules that are +unsigned. Any module for which the kernel has a key, but which proves to have +a signature mismatch will not be permitted to load. + +Any module that has an unparseable signature will be rejected. + + +========================================= +ADMINISTERING/PROTECTING THE PRIVATE KEY +========================================= + +Since the private key is used to sign modules, viruses and malware could use +the private key to sign modules and compromise the operating system. The +private key must be either destroyed or moved to a secure location and not kept +in the root node of the kernel source tree. diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index d71afa8bd82..7373115407e 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1094,6 +1094,13 @@ bindv6only - BOOLEAN Default: FALSE (as specified in RFC3493) +anycast_src_echo_reply - BOOLEAN + Controls the use of anycast addresses as source addresses for ICMPv6 + echo reply + TRUE: enabled + FALSE: disabled + Default: FALSE + IPv6 Fragmentation: ip6frag_high_thresh - INTEGER diff --git a/MAINTAINERS b/MAINTAINERS index 23bd3c2ee6e..e11d4952bb2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -783,7 +783,7 @@ F: arch/arm/boot/dts/sama*.dts F: arch/arm/boot/dts/sama*.dtsi ARM/CALXEDA HIGHBANK ARCHITECTURE -M: Rob Herring <rob.herring@calxeda.com> +M: Rob Herring <robh@kernel.org> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-highbank/ @@ -1008,6 +1008,8 @@ M: Santosh Shilimkar <santosh.shilimkar@ti.com> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-keystone/ +F: drivers/clk/keystone/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git ARM/LOGICPD PXA270 MACHINE SUPPORT M: Lennert Buytenhek <kernel@wantstofly.org> @@ -3823,6 +3825,12 @@ T: git git://linuxtv.org/media_tree.git S: Maintained F: drivers/media/usb/gspca/ +GUID PARTITION TABLE (GPT) +M: Davidlohr Bueso <davidlohr@hp.com> +L: linux-efi@vger.kernel.org +S: Maintained +F: block/partitions/efi.* + STK1160 USB VIDEO CAPTURE DRIVER M: Ezequiel Garcia <elezegarcia@gmail.com> L: linux-media@vger.kernel.org @@ -6240,7 +6248,7 @@ F: drivers/i2c/busses/i2c-ocores.c OPEN FIRMWARE AND FLATTENED DEVICE TREE M: Grant Likely <grant.likely@linaro.org> -M: Rob Herring <rob.herring@calxeda.com> +M: Rob Herring <robh+dt@kernel.org> L: devicetree@vger.kernel.org W: http://fdt.secretlab.ca T: git git://git.secretlab.ca/git/linux-2.6.git @@ -6252,7 +6260,7 @@ K: of_get_property K: of_match_table OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS -M: Rob Herring <rob.herring@calxeda.com> +M: Rob Herring <robh+dt@kernel.org> M: Pawel Moll <pawel.moll@arm.com> M: Mark Rutland <mark.rutland@arm.com> M: Ian Campbell <ijc+devicetree@hellion.org.uk> @@ -9581,7 +9589,7 @@ F: drivers/xen/*swiotlb* XFS FILESYSTEM P: Silicon Graphics Inc -M: Dave Chinner <dchinner@fromorbit.com> +M: Dave Chinner <david@fromorbit.com> M: Ben Myers <bpm@sgi.com> M: xfs@oss.sgi.com L: xfs@oss.sgi.com @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 13 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc6 NAME = One Giant Leap for Frogkind # *DOCUMENTATION* @@ -732,19 +732,15 @@ export mod_strip_cmd # Select initial ramdisk compression format, default is gzip(1). # This shall be used by the dracut(8) tool while creating an initramfs image. # -INITRD_COMPRESS=gzip -ifeq ($(CONFIG_RD_BZIP2), y) - INITRD_COMPRESS=bzip2 -else ifeq ($(CONFIG_RD_LZMA), y) - INITRD_COMPRESS=lzma -else ifeq ($(CONFIG_RD_XZ), y) - INITRD_COMPRESS=xz -else ifeq ($(CONFIG_RD_LZO), y) - INITRD_COMPRESS=lzo -else ifeq ($(CONFIG_RD_LZ4), y) - INITRD_COMPRESS=lz4 -endif -export INITRD_COMPRESS +INITRD_COMPRESS-y := gzip +INITRD_COMPRESS-$(CONFIG_RD_BZIP2) := bzip2 +INITRD_COMPRESS-$(CONFIG_RD_LZMA) := lzma +INITRD_COMPRESS-$(CONFIG_RD_XZ) := xz +INITRD_COMPRESS-$(CONFIG_RD_LZO) := lzo +INITRD_COMPRESS-$(CONFIG_RD_LZ4) := lz4 +# do not export INITRD_COMPRESS, since we didn't actually +# choose a sane default compression above. +# export INITRD_COMPRESS := $(INITRD_COMPRESS-y) ifdef CONFIG_MODULE_SIG_ALL MODSECKEY = ./signing_key.priv diff --git a/arch/arc/include/uapi/asm/unistd.h b/arch/arc/include/uapi/asm/unistd.h index 68125dd766c..39e58d1cdf9 100644 --- a/arch/arc/include/uapi/asm/unistd.h +++ b/arch/arc/include/uapi/asm/unistd.h @@ -8,7 +8,11 @@ /******** no-legacy-syscalls-ABI *******/ -#ifndef _UAPI_ASM_ARC_UNISTD_H +/* + * Non-typical guard macro to enable inclusion twice in ARCH sys.c + * That is how the Generic syscall wrapper generator works + */ +#if !defined(_UAPI_ASM_ARC_UNISTD_H) || defined(__SYSCALL) #define _UAPI_ASM_ARC_UNISTD_H #define __ARCH_WANT_SYS_EXECVE @@ -36,4 +40,6 @@ __SYSCALL(__NR_arc_gettls, sys_arc_gettls) #define __NR_sysfs (__NR_arch_specific_syscall + 3) __SYSCALL(__NR_sysfs, sys_sysfs) +#undef __SYSCALL + #endif diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi index ee845fad939..9987dd0e9c5 100644 --- a/arch/arm/boot/dts/r8a7790.dtsi +++ b/arch/arm/boot/dts/r8a7790.dtsi @@ -87,9 +87,9 @@ interrupts = <1 9 0xf04>; }; - gpio0: gpio@ffc40000 { + gpio0: gpio@e6050000 { compatible = "renesas,gpio-r8a7790", "renesas,gpio-rcar"; - reg = <0 0xffc40000 0 0x2c>; + reg = <0 0xe6050000 0 0x50>; interrupt-parent = <&gic>; interrupts = <0 4 0x4>; #gpio-cells = <2>; @@ -99,9 +99,9 @@ interrupt-controller; }; - gpio1: gpio@ffc41000 { + gpio1: gpio@e6051000 { compatible = "renesas,gpio-r8a7790", "renesas,gpio-rcar"; - reg = <0 0xffc41000 0 0x2c>; + reg = <0 0xe6051000 0 0x50>; interrupt-parent = <&gic>; interrupts = <0 5 0x4>; #gpio-cells = <2>; @@ -111,9 +111,9 @@ interrupt-controller; }; - gpio2: gpio@ffc42000 { + gpio2: gpio@e6052000 { compatible = "renesas,gpio-r8a7790", "renesas,gpio-rcar"; - reg = <0 0xffc42000 0 0x2c>; + reg = <0 0xe6052000 0 0x50>; interrupt-parent = <&gic>; interrupts = <0 6 0x4>; #gpio-cells = <2>; @@ -123,9 +123,9 @@ interrupt-controller; }; - gpio3: gpio@ffc43000 { + gpio3: gpio@e6053000 { compatible = "renesas,gpio-r8a7790", "renesas,gpio-rcar"; - reg = <0 0xffc43000 0 0x2c>; + reg = <0 0xe6053000 0 0x50>; interrupt-parent = <&gic>; interrupts = <0 7 0x4>; #gpio-cells = <2>; @@ -135,9 +135,9 @@ interrupt-controller; }; - gpio4: gpio@ffc44000 { + gpio4: gpio@e6054000 { compatible = "renesas,gpio-r8a7790", "renesas,gpio-rcar"; - reg = <0 0xffc44000 0 0x2c>; + reg = <0 0xe6054000 0 0x50>; interrupt-parent = <&gic>; interrupts = <0 8 0x4>; #gpio-cells = <2>; @@ -147,9 +147,9 @@ interrupt-controller; }; - gpio5: gpio@ffc45000 { + gpio5: gpio@e6055000 { compatible = "renesas,gpio-r8a7790", "renesas,gpio-rcar"; - reg = <0 0xffc45000 0 0x2c>; + reg = <0 0xe6055000 0 0x50>; interrupt-parent = <&gic>; interrupts = <0 9 0x4>; #gpio-cells = <2>; @@ -241,7 +241,7 @@ sdhi0: sdhi@ee100000 { compatible = "renesas,sdhi-r8a7790"; - reg = <0 0xee100000 0 0x100>; + reg = <0 0xee100000 0 0x200>; interrupt-parent = <&gic>; interrupts = <0 165 4>; cap-sd-highspeed; @@ -250,7 +250,7 @@ sdhi1: sdhi@ee120000 { compatible = "renesas,sdhi-r8a7790"; - reg = <0 0xee120000 0 0x100>; + reg = <0 0xee120000 0 0x200>; interrupt-parent = <&gic>; interrupts = <0 166 4>; cap-sd-highspeed; diff --git a/arch/arm/mach-omap2/board-ldp.c b/arch/arm/mach-omap2/board-ldp.c index 4ec8d82b049..44a59c3abfb 100644 --- a/arch/arm/mach-omap2/board-ldp.c +++ b/arch/arm/mach-omap2/board-ldp.c @@ -242,12 +242,18 @@ static void __init ldp_display_init(void) static int ldp_twl_gpio_setup(struct device *dev, unsigned gpio, unsigned ngpio) { + int res; + /* LCD enable GPIO */ ldp_lcd_pdata.enable_gpio = gpio + 7; /* Backlight enable GPIO */ ldp_lcd_pdata.backlight_gpio = gpio + 15; + res = platform_device_register(&ldp_lcd_device); + if (res) + pr_err("Unable to register LCD: %d\n", res); + return 0; } @@ -346,7 +352,6 @@ static struct omap2_hsmmc_info mmc[] __initdata = { static struct platform_device *ldp_devices[] __initdata = { &ldp_gpio_keys_device, - &ldp_lcd_device, }; #ifdef CONFIG_OMAP_MUX diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 58347bb874a..4cf165502b3 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -101,13 +101,51 @@ static const struct omap_dss_hwmod_data omap4_dss_hwmod_data[] __initconst = { { "dss_hdmi", "omapdss_hdmi", -1 }, }; +static int omap4_dsi_mux_pads(int dsi_id, unsigned lanes) +{ + u32 enable_mask, enable_shift; + u32 pipd_mask, pipd_shift; + u32 reg; + + if (dsi_id == 0) { + enable_mask = OMAP4_DSI1_LANEENABLE_MASK; + enable_shift = OMAP4_DSI1_LANEENABLE_SHIFT; + pipd_mask = OMAP4_DSI1_PIPD_MASK; + pipd_shift = OMAP4_DSI1_PIPD_SHIFT; + } else if (dsi_id == 1) { + enable_mask = OMAP4_DSI2_LANEENABLE_MASK; + enable_shift = OMAP4_DSI2_LANEENABLE_SHIFT; + pipd_mask = OMAP4_DSI2_PIPD_MASK; + pipd_shift = OMAP4_DSI2_PIPD_SHIFT; + } else { + return -ENODEV; + } + + reg = omap4_ctrl_pad_readl(OMAP4_CTRL_MODULE_PAD_CORE_CONTROL_DSIPHY); + + reg &= ~enable_mask; + reg &= ~pipd_mask; + + reg |= (lanes << enable_shift) & enable_mask; + reg |= (lanes << pipd_shift) & pipd_mask; + + omap4_ctrl_pad_writel(reg, OMAP4_CTRL_MODULE_PAD_CORE_CONTROL_DSIPHY); + + return 0; +} + static int omap_dsi_enable_pads(int dsi_id, unsigned lane_mask) { + if (cpu_is_omap44xx()) + return omap4_dsi_mux_pads(dsi_id, lane_mask); + return 0; } static void omap_dsi_disable_pads(int dsi_id, unsigned lane_mask) { + if (cpu_is_omap44xx()) + omap4_dsi_mux_pads(dsi_id, 0); } static int omap_dss_set_min_bus_tput(struct device *dev, unsigned long tput) diff --git a/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c b/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c index 56cebb05509..d23c77fadb3 100644 --- a/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c @@ -796,7 +796,7 @@ struct omap_hwmod omap2xxx_counter_32k_hwmod = { /* gpmc */ static struct omap_hwmod_irq_info omap2xxx_gpmc_irqs[] = { - { .irq = 20 }, + { .irq = 20 + OMAP_INTC_START, }, { .irq = -1 } }; @@ -841,7 +841,7 @@ static struct omap_hwmod_class omap2_rng_hwmod_class = { }; static struct omap_hwmod_irq_info omap2_rng_mpu_irqs[] = { - { .irq = 52 }, + { .irq = 52 + OMAP_INTC_START, }, { .irq = -1 } }; diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index d33742908f9..4c3b1e6df50 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -2165,7 +2165,7 @@ static struct omap_hwmod_class omap3xxx_gpmc_hwmod_class = { }; static struct omap_hwmod_irq_info omap3xxx_gpmc_irqs[] = { - { .irq = 20 }, + { .irq = 20 + OMAP_INTC_START, }, { .irq = -1 } }; @@ -2999,7 +2999,7 @@ static struct omap_mmu_dev_attr mmu_isp_dev_attr = { static struct omap_hwmod omap3xxx_mmu_isp_hwmod; static struct omap_hwmod_irq_info omap3xxx_mmu_isp_irqs[] = { - { .irq = 24 }, + { .irq = 24 + OMAP_INTC_START, }, { .irq = -1 } }; @@ -3041,7 +3041,7 @@ static struct omap_mmu_dev_attr mmu_iva_dev_attr = { static struct omap_hwmod omap3xxx_mmu_iva_hwmod; static struct omap_hwmod_irq_info omap3xxx_mmu_iva_irqs[] = { - { .irq = 28 }, + { .irq = 28 + OMAP_INTC_START, }, { .irq = -1 } }; diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c index db32d5380b1..18f333c440d 100644 --- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c @@ -1637,7 +1637,7 @@ static struct omap_hwmod dra7xx_uart1_hwmod = { .class = &dra7xx_uart_hwmod_class, .clkdm_name = "l4per_clkdm", .main_clk = "uart1_gfclk_mux", - .flags = HWMOD_SWSUP_SIDLE_ACT, + .flags = HWMOD_SWSUP_SIDLE_ACT | DEBUG_OMAP2UART1_FLAGS, .prcm = { .omap4 = { .clkctrl_offs = DRA7XX_CM_L4PER_UART1_CLKCTRL_OFFSET, diff --git a/arch/arm/mach-pxa/include/mach/lubbock.h b/arch/arm/mach-pxa/include/mach/lubbock.h index 2a086e8373e..958cd6af938 100644 --- a/arch/arm/mach-pxa/include/mach/lubbock.h +++ b/arch/arm/mach-pxa/include/mach/lubbock.h @@ -10,6 +10,8 @@ * published by the Free Software Foundation. */ +#include <mach/irqs.h> + #define LUBBOCK_ETH_PHYS PXA_CS3_PHYS #define LUBBOCK_FPGA_PHYS PXA_CS2_PHYS diff --git a/arch/arm/mach-s3c64xx/mach-s3c64xx-dt.c b/arch/arm/mach-s3c64xx/mach-s3c64xx-dt.c index 7eb9a10fc1a..2fddf38192d 100644 --- a/arch/arm/mach-s3c64xx/mach-s3c64xx-dt.c +++ b/arch/arm/mach-s3c64xx/mach-s3c64xx-dt.c @@ -8,8 +8,6 @@ * published by the Free Software Foundation. */ -#include <linux/clk-provider.h> -#include <linux/irqchip.h> #include <linux/of_platform.h> #include <asm/mach/arch.h> @@ -48,15 +46,9 @@ static void __init s3c64xx_dt_map_io(void) panic("SoC is not S3C64xx!"); } -static void __init s3c64xx_dt_init_irq(void) -{ - of_clk_init(NULL); - samsung_wdt_reset_of_init(); - irqchip_init(); -}; - static void __init s3c64xx_dt_init_machine(void) { + samsung_wdt_reset_of_init(); of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); } @@ -79,7 +71,6 @@ DT_MACHINE_START(S3C6400_DT, "Samsung S3C64xx (Flattened Device Tree)") /* Maintainer: Tomasz Figa <tomasz.figa@gmail.com> */ .dt_compat = s3c64xx_dt_compat, .map_io = s3c64xx_dt_map_io, - .init_irq = s3c64xx_dt_init_irq, .init_machine = s3c64xx_dt_init_machine, .restart = s3c64xx_dt_restart, MACHINE_END diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c index 958e3cbf0ac..c1868912302 100644 --- a/arch/arm/mach-shmobile/board-armadillo800eva.c +++ b/arch/arm/mach-shmobile/board-armadillo800eva.c @@ -614,6 +614,11 @@ static struct regulator_consumer_supply fixed3v3_power_consumers[] = { REGULATOR_SUPPLY("vqmmc", "sh_mmcif"), }; +/* Fixed 3.3V regulator used by LCD backlight */ +static struct regulator_consumer_supply fixed5v0_power_consumers[] = { + REGULATOR_SUPPLY("power", "pwm-backlight.0"), +}; + /* Fixed 3.3V regulator to be used by SDHI0 */ static struct regulator_consumer_supply vcc_sdhi0_consumers[] = { REGULATOR_SUPPLY("vmmc", "sh_mobile_sdhi.0"), @@ -1196,6 +1201,8 @@ static void __init eva_init(void) regulator_register_always_on(0, "fixed-3.3V", fixed3v3_power_consumers, ARRAY_SIZE(fixed3v3_power_consumers), 3300000); + regulator_register_always_on(3, "fixed-5.0V", fixed5v0_power_consumers, + ARRAY_SIZE(fixed5v0_power_consumers), 5000000); pinctrl_register_mappings(eva_pinctrl_map, ARRAY_SIZE(eva_pinctrl_map)); pwm_add_table(pwm_lookup, ARRAY_SIZE(pwm_lookup)); diff --git a/arch/arm/mach-shmobile/board-bockw.c b/arch/arm/mach-shmobile/board-bockw.c index 38611526fe9..3c4995aebd2 100644 --- a/arch/arm/mach-shmobile/board-bockw.c +++ b/arch/arm/mach-shmobile/board-bockw.c @@ -679,7 +679,7 @@ static void __init bockw_init(void) .id = i, .data = &rsnd_card_info[i], .size_data = sizeof(struct asoc_simple_card_info), - .dma_mask = ~0, + .dma_mask = DMA_BIT_MASK(32), }; platform_device_register_full(&cardinfo); diff --git a/arch/arm/mach-shmobile/board-lager.c b/arch/arm/mach-shmobile/board-lager.c index a8d3ce646fb..e0406fd3739 100644 --- a/arch/arm/mach-shmobile/board-lager.c +++ b/arch/arm/mach-shmobile/board-lager.c @@ -245,7 +245,9 @@ static void __init lager_init(void) { lager_add_standard_devices(); - phy_register_fixup_for_id("r8a7790-ether-ff:01", lager_ksz8041_fixup); + if (IS_ENABLED(CONFIG_PHYLIB)) + phy_register_fixup_for_id("r8a7790-ether-ff:01", + lager_ksz8041_fixup); } static const char * const lager_boards_compat_dt[] __initconst = { diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 83e4f959ee4..85501238b42 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -96,7 +96,7 @@ static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, struct remap_data *info = data; struct page *page = info->pages[info->index++]; unsigned long pfn = page_to_pfn(page); - pte_t pte = pfn_pte(pfn, info->prot); + pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot)); if (map_foreign_page(pfn, info->fgmfn, info->domid)) return -EFAULT; @@ -224,10 +224,10 @@ static int __init xen_guest_init(void) } if (of_address_to_resource(node, GRANT_TABLE_PHYSADDR, &res)) return 0; - xen_hvm_resume_frames = res.start >> PAGE_SHIFT; + xen_hvm_resume_frames = res.start; xen_events_irq = irq_of_parse_and_map(node, 0); pr_info("Xen %s support found, events_irq=%d gnttab_frame_pfn=%lx\n", - version, xen_events_irq, xen_hvm_resume_frames); + version, xen_events_irq, (xen_hvm_resume_frames >> PAGE_SHIFT)); xen_domain_type = XEN_HVM_DOMAIN; xen_setup_features(); diff --git a/arch/arm64/include/asm/xen/page-coherent.h b/arch/arm64/include/asm/xen/page-coherent.h index 2820f1a6eeb..dde3fc9c49f 100644 --- a/arch/arm64/include/asm/xen/page-coherent.h +++ b/arch/arm64/include/asm/xen/page-coherent.h @@ -23,25 +23,21 @@ static inline void xen_dma_map_page(struct device *hwdev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - __generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs); } static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - __generic_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs); } static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { - __generic_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir); } static inline void xen_dma_sync_single_for_device(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { - __generic_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir); } #endif /* _ASM_ARM64_XEN_PAGE_COHERENT_H */ diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 6777a2192b8..6a8928bba03 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -214,31 +214,29 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type, { int err, len, type, disabled = !ctrl.enabled; - if (disabled) { - len = 0; - type = HW_BREAKPOINT_EMPTY; - } else { - err = arch_bp_generic_fields(ctrl, &len, &type); - if (err) - return err; - - switch (note_type) { - case NT_ARM_HW_BREAK: - if ((type & HW_BREAKPOINT_X) != type) - return -EINVAL; - break; - case NT_ARM_HW_WATCH: - if ((type & HW_BREAKPOINT_RW) != type) - return -EINVAL; - break; - default: + attr->disabled = disabled; + if (disabled) + return 0; + + err = arch_bp_generic_fields(ctrl, &len, &type); + if (err) + return err; + + switch (note_type) { + case NT_ARM_HW_BREAK: + if ((type & HW_BREAKPOINT_X) != type) return -EINVAL; - } + break; + case NT_ARM_HW_WATCH: + if ((type & HW_BREAKPOINT_RW) != type) + return -EINVAL; + break; + default: + return -EINVAL; } attr->bp_len = len; attr->bp_type = type; - attr->disabled = disabled; return 0; } diff --git a/arch/powerpc/boot/dts/mpc5125twr.dts b/arch/powerpc/boot/dts/mpc5125twr.dts index 4177b62240c..a618dfc13e4 100644 --- a/arch/powerpc/boot/dts/mpc5125twr.dts +++ b/arch/powerpc/boot/dts/mpc5125twr.dts @@ -58,7 +58,6 @@ compatible = "fsl,mpc5121-immr"; #address-cells = <1>; #size-cells = <1>; - #interrupt-cells = <2>; ranges = <0x0 0x80000000 0x400000>; reg = <0x80000000 0x400000>; bus-frequency = <66000000>; // 66 MHz ips bus @@ -189,6 +188,10 @@ reg = <0xA000 0x1000>; }; + // disable USB1 port + // TODO: + // correct pinmux config and fix USB3320 ulpi dependency + // before re-enabling it usb@3000 { compatible = "fsl,mpc5121-usb2-dr"; reg = <0x3000 0x400>; @@ -197,6 +200,7 @@ interrupts = <43 0x8>; dr_mode = "host"; phy_type = "ulpi"; + status = "disabled"; }; // 5125 PSCs are not 52xx or 5121 PSC compatible diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 894662a5d4d..243ce69ad68 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -284,7 +284,7 @@ do_kvm_##n: \ subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \ beq- 1f; \ ld r1,PACAKSAVE(r13); /* kernel stack to use */ \ -1: cmpdi cr1,r1,0; /* check if r1 is in userspace */ \ +1: cmpdi cr1,r1,-INT_FRAME_SIZE; /* check if r1 is in userspace */ \ blt+ cr1,3f; /* abort if it is */ \ li r1,(n); /* will be reloaded later */ \ sth r1,PACA_TRAP_SAVE(r13); \ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 4a594b76674..bc23b1ba798 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -192,6 +192,10 @@ extern void kvmppc_load_up_vsx(void); extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst); extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd); +extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, + struct kvm_vcpu *vcpu); +extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, + struct kvmppc_book3s_shadow_vcpu *svcpu); static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) { diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 0bd9348a4db..192917d2239 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -79,6 +79,7 @@ struct kvmppc_host_state { ulong vmhandler; ulong scratch0; ulong scratch1; + ulong scratch2; u8 in_guest; u8 restore_hid5; u8 napping; @@ -106,6 +107,7 @@ struct kvmppc_host_state { }; struct kvmppc_book3s_shadow_vcpu { + bool in_use; ulong gpr[14]; u32 cr; u32 xer; diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 9ee12610af0..aace9054761 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -35,7 +35,7 @@ extern void giveup_vsx(struct task_struct *); extern void enable_kernel_spe(void); extern void giveup_spe(struct task_struct *); extern void load_up_spe(struct task_struct *); -extern void switch_booke_debug_regs(struct thread_struct *new_thread); +extern void switch_booke_debug_regs(struct debug_reg *new_debug); #ifndef CONFIG_SMP extern void discard_lazy_cpu_state(void); diff --git a/arch/powerpc/include/asm/unaligned.h b/arch/powerpc/include/asm/unaligned.h index 5f1b1e3c213..8296381ae43 100644 --- a/arch/powerpc/include/asm/unaligned.h +++ b/arch/powerpc/include/asm/unaligned.h @@ -4,13 +4,18 @@ #ifdef __KERNEL__ /* - * The PowerPC can do unaligned accesses itself in big endian mode. + * The PowerPC can do unaligned accesses itself based on its endian mode. */ #include <linux/unaligned/access_ok.h> #include <linux/unaligned/generic.h> +#ifdef __LITTLE_ENDIAN__ +#define get_unaligned __get_unaligned_le +#define put_unaligned __put_unaligned_le +#else #define get_unaligned __get_unaligned_be #define put_unaligned __put_unaligned_be +#endif #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_UNALIGNED_H */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 2ea5cc033ec..d3de01066f7 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -576,6 +576,7 @@ int main(void) HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler); HSTATE_FIELD(HSTATE_SCRATCH0, scratch0); HSTATE_FIELD(HSTATE_SCRATCH1, scratch1); + HSTATE_FIELD(HSTATE_SCRATCH2, scratch2); HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5); HSTATE_FIELD(HSTATE_NAPPING, napping); diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 2ae41aba405..4f0946de2d5 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -80,6 +80,7 @@ END_FTR_SECTION(0, 1) * of the function that the cpu should jump to to continue * initialization. */ + .balign 8 .globl __secondary_hold_spinloop __secondary_hold_spinloop: .llong 0x0 @@ -470,6 +471,7 @@ _STATIC(__after_prom_start) mtctr r8 bctr +.balign 8 p_end: .llong _end - _stext 4: /* Now copy the rest of the kernel up to _end */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3386d8ab7eb..4a96556fd2d 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -339,7 +339,7 @@ static void set_debug_reg_defaults(struct thread_struct *thread) #endif } -static void prime_debug_regs(struct thread_struct *thread) +static void prime_debug_regs(struct debug_reg *debug) { /* * We could have inherited MSR_DE from userspace, since @@ -348,22 +348,22 @@ static void prime_debug_regs(struct thread_struct *thread) */ mtmsr(mfmsr() & ~MSR_DE); - mtspr(SPRN_IAC1, thread->debug.iac1); - mtspr(SPRN_IAC2, thread->debug.iac2); + mtspr(SPRN_IAC1, debug->iac1); + mtspr(SPRN_IAC2, debug->iac2); #if CONFIG_PPC_ADV_DEBUG_IACS > 2 - mtspr(SPRN_IAC3, thread->debug.iac3); - mtspr(SPRN_IAC4, thread->debug.iac4); + mtspr(SPRN_IAC3, debug->iac3); + mtspr(SPRN_IAC4, debug->iac4); #endif - mtspr(SPRN_DAC1, thread->debug.dac1); - mtspr(SPRN_DAC2, thread->debug.dac2); + mtspr(SPRN_DAC1, debug->dac1); + mtspr(SPRN_DAC2, debug->dac2); #if CONFIG_PPC_ADV_DEBUG_DVCS > 0 - mtspr(SPRN_DVC1, thread->debug.dvc1); - mtspr(SPRN_DVC2, thread->debug.dvc2); + mtspr(SPRN_DVC1, debug->dvc1); + mtspr(SPRN_DVC2, debug->dvc2); #endif - mtspr(SPRN_DBCR0, thread->debug.dbcr0); - mtspr(SPRN_DBCR1, thread->debug.dbcr1); + mtspr(SPRN_DBCR0, debug->dbcr0); + mtspr(SPRN_DBCR1, debug->dbcr1); #ifdef CONFIG_BOOKE - mtspr(SPRN_DBCR2, thread->debug.dbcr2); + mtspr(SPRN_DBCR2, debug->dbcr2); #endif } /* @@ -371,11 +371,11 @@ static void prime_debug_regs(struct thread_struct *thread) * debug registers, set the debug registers from the values * stored in the new thread. */ -void switch_booke_debug_regs(struct thread_struct *new_thread) +void switch_booke_debug_regs(struct debug_reg *new_debug) { if ((current->thread.debug.dbcr0 & DBCR0_IDM) - || (new_thread->debug.dbcr0 & DBCR0_IDM)) - prime_debug_regs(new_thread); + || (new_debug->dbcr0 & DBCR0_IDM)) + prime_debug_regs(new_debug); } EXPORT_SYMBOL_GPL(switch_booke_debug_regs); #else /* !CONFIG_PPC_ADV_DEBUG_REGS */ @@ -683,7 +683,7 @@ struct task_struct *__switch_to(struct task_struct *prev, #endif /* CONFIG_SMP */ #ifdef CONFIG_PPC_ADV_DEBUG_REGS - switch_booke_debug_regs(&new->thread); + switch_booke_debug_regs(&new->thread.debug); #else /* * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index f3ff587a8b7..c5d148434c0 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -469,11 +469,14 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, slb_v = vcpu->kvm->arch.vrma_slb_v; } + preempt_disable(); /* Find the HPTE in the hash table */ index = kvmppc_hv_find_lock_hpte(kvm, eaddr, slb_v, HPTE_V_VALID | HPTE_V_ABSENT); - if (index < 0) + if (index < 0) { + preempt_enable(); return -ENOENT; + } hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); v = hptep[0] & ~HPTE_V_HVLOCK; gr = kvm->arch.revmap[index].guest_rpte; @@ -481,6 +484,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, /* Unlock the HPTE */ asm volatile("lwsync" : : : "memory"); hptep[0] = v; + preempt_enable(); gpte->eaddr = eaddr; gpte->vpage = ((v & HPTE_V_AVPN) << 4) | ((eaddr >> 12) & 0xfff); @@ -665,6 +669,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, return -EFAULT; } else { page = pages[0]; + pfn = page_to_pfn(page); if (PageHuge(page)) { page = compound_head(page); pte_size <<= compound_order(page); @@ -689,7 +694,6 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, } rcu_read_unlock_sched(); } - pfn = page_to_pfn(page); } ret = -EFAULT; @@ -707,8 +711,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M; } - /* Set the HPTE to point to pfn */ - r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT); + /* + * Set the HPTE to point to pfn. + * Since the pfn is at PAGE_SIZE granularity, make sure we + * don't mask out lower-order bits if psize < PAGE_SIZE. + */ + if (psize < PAGE_SIZE) + psize = PAGE_SIZE; + r = (r & ~(HPTE_R_PP0 - psize)) | ((pfn << PAGE_SHIFT) & ~(psize - 1)); if (hpte_is_writable(r) && !write_ok) r = hpte_make_readonly(r); ret = RESUME_GUEST; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 072287f1c3b..b51d5db7806 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -131,8 +131,9 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) { struct kvmppc_vcore *vc = vcpu->arch.vcore; + unsigned long flags; - spin_lock(&vcpu->arch.tbacct_lock); + spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE && vc->preempt_tb != TB_NIL) { vc->stolen_tb += mftb() - vc->preempt_tb; @@ -143,19 +144,20 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt; vcpu->arch.busy_preempt = TB_NIL; } - spin_unlock(&vcpu->arch.tbacct_lock); + spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); } static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) { struct kvmppc_vcore *vc = vcpu->arch.vcore; + unsigned long flags; - spin_lock(&vcpu->arch.tbacct_lock); + spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) vc->preempt_tb = mftb(); if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) vcpu->arch.busy_preempt = mftb(); - spin_unlock(&vcpu->arch.tbacct_lock); + spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); } static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) @@ -486,11 +488,11 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) */ if (vc->vcore_state != VCORE_INACTIVE && vc->runner->arch.run_task != current) { - spin_lock(&vc->runner->arch.tbacct_lock); + spin_lock_irq(&vc->runner->arch.tbacct_lock); p = vc->stolen_tb; if (vc->preempt_tb != TB_NIL) p += now - vc->preempt_tb; - spin_unlock(&vc->runner->arch.tbacct_lock); + spin_unlock_irq(&vc->runner->arch.tbacct_lock); } else { p = vc->stolen_tb; } @@ -512,10 +514,10 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, core_stolen = vcore_stolen_time(vc, now); stolen = core_stolen - vcpu->arch.stolen_logged; vcpu->arch.stolen_logged = core_stolen; - spin_lock(&vcpu->arch.tbacct_lock); + spin_lock_irq(&vcpu->arch.tbacct_lock); stolen += vcpu->arch.busy_stolen; vcpu->arch.busy_stolen = 0; - spin_unlock(&vcpu->arch.tbacct_lock); + spin_unlock_irq(&vcpu->arch.tbacct_lock); if (!dt || !vpa) return; memset(dt, 0, sizeof(struct dtl_entry)); @@ -589,7 +591,9 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) if (list_empty(&vcpu->kvm->arch.rtas_tokens)) return RESUME_HOST; + idx = srcu_read_lock(&vcpu->kvm->srcu); rc = kvmppc_rtas_hcall(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, idx); if (rc == -ENOENT) return RESUME_HOST; @@ -1115,13 +1119,13 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) return; - spin_lock(&vcpu->arch.tbacct_lock); + spin_lock_irq(&vcpu->arch.tbacct_lock); now = mftb(); vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) - vcpu->arch.stolen_logged; vcpu->arch.busy_preempt = now; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; - spin_unlock(&vcpu->arch.tbacct_lock); + spin_unlock_irq(&vcpu->arch.tbacct_lock); --vc->n_runnable; list_del(&vcpu->arch.run_list); } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 9c515440ad1..8689e2e3085 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -225,6 +225,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, is_io = pa & (HPTE_R_I | HPTE_R_W); pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK); pa &= PAGE_MASK; + pa |= gpa & ~PAGE_MASK; } else { /* Translate to host virtual address */ hva = __gfn_to_hva_memslot(memslot, gfn); @@ -238,13 +239,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, ptel = hpte_make_readonly(ptel); is_io = hpte_cache_bits(pte_val(pte)); pa = pte_pfn(pte) << PAGE_SHIFT; + pa |= hva & (pte_size - 1); + pa |= gpa & ~PAGE_MASK; } } if (pte_size < psize) return H_PARAMETER; - if (pa && pte_size > psize) - pa |= gpa & (pte_size - 1); ptel &= ~(HPTE_R_PP0 - psize); ptel |= pa; @@ -749,6 +750,10 @@ static int slb_base_page_shift[4] = { 20, /* 1M, unsupported */ }; +/* When called from virtmode, this func should be protected by + * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK + * can trigger deadlock issue. + */ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, unsigned long valid) { diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index bc8de75b192..be4fa04a37c 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -153,7 +153,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 13: b machine_check_fwnmi - /* * We come in here when wakened from nap mode on a secondary hw thread. * Relocation is off and most register values are lost. @@ -224,6 +223,11 @@ kvm_start_guest: /* Clear our vcpu pointer so we don't come back in early */ li r0, 0 std r0, HSTATE_KVM_VCPU(r13) + /* + * Make sure we clear HSTATE_KVM_VCPU(r13) before incrementing + * the nap_count, because once the increment to nap_count is + * visible we could be given another vcpu. + */ lwsync /* Clear any pending IPI - we're an offline thread */ ld r5, HSTATE_XICS_PHYS(r13) @@ -241,7 +245,6 @@ kvm_start_guest: /* increment the nap count and then go to nap mode */ ld r4, HSTATE_KVM_VCORE(r13) addi r4, r4, VCORE_NAP_COUNT - lwsync /* make previous updates visible */ 51: lwarx r3, 0, r4 addi r3, r3, 1 stwcx. r3, 0, r4 @@ -751,15 +754,14 @@ kvmppc_interrupt_hv: * guest CR, R12 saved in shadow VCPU SCRATCH1/0 * guest R13 saved in SPRN_SCRATCH0 */ - /* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */ - std r9, HSTATE_HOST_R2(r13) + std r9, HSTATE_SCRATCH2(r13) lbz r9, HSTATE_IN_GUEST(r13) cmpwi r9, KVM_GUEST_MODE_HOST_HV beq kvmppc_bad_host_intr #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE cmpwi r9, KVM_GUEST_MODE_GUEST - ld r9, HSTATE_HOST_R2(r13) + ld r9, HSTATE_SCRATCH2(r13) beq kvmppc_interrupt_pr #endif /* We're now back in the host but in guest MMU context */ @@ -779,7 +781,7 @@ kvmppc_interrupt_hv: std r6, VCPU_GPR(R6)(r9) std r7, VCPU_GPR(R7)(r9) std r8, VCPU_GPR(R8)(r9) - ld r0, HSTATE_HOST_R2(r13) + ld r0, HSTATE_SCRATCH2(r13) std r0, VCPU_GPR(R9)(r9) std r10, VCPU_GPR(R10)(r9) std r11, VCPU_GPR(R11)(r9) @@ -990,14 +992,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) */ /* Increment the threads-exiting-guest count in the 0xff00 bits of vcore->entry_exit_count */ - lwsync ld r5,HSTATE_KVM_VCORE(r13) addi r6,r5,VCORE_ENTRY_EXIT 41: lwarx r3,0,r6 addi r0,r3,0x100 stwcx. r0,0,r6 bne 41b - lwsync + isync /* order stwcx. vs. reading napping_threads */ /* * At this point we have an interrupt that we have to pass @@ -1030,6 +1031,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) sld r0,r0,r4 andc. r3,r3,r0 /* no sense IPI'ing ourselves */ beq 43f + /* Order entry/exit update vs. IPIs */ + sync mulli r4,r4,PACA_SIZE /* get paca for thread 0 */ subf r6,r4,r13 42: andi. r0,r3,1 @@ -1638,10 +1641,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) bge kvm_cede_exit stwcx. r4,0,r6 bne 31b + /* order napping_threads update vs testing entry_exit_count */ + isync li r0,1 stb r0,HSTATE_NAPPING(r13) - /* order napping_threads update vs testing entry_exit_count */ - lwsync mr r4,r3 lwz r7,VCORE_ENTRY_EXIT(r5) cmpwi r7,0x100 diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index f4dd041c14e..f779450cb07 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -129,29 +129,32 @@ kvm_start_lightweight: * R12 = exit handler id * R13 = PACA * SVCPU.* = guest * + * MSR.EE = 1 * */ + PPC_LL r3, GPR4(r1) /* vcpu pointer */ + + /* + * kvmppc_copy_from_svcpu can clobber volatile registers, save + * the exit handler id to the vcpu and restore it from there later. + */ + stw r12, VCPU_TRAP(r3) + /* Transfer reg values from shadow vcpu back to vcpu struct */ /* On 64-bit, interrupts are still off at this point */ - PPC_LL r3, GPR4(r1) /* vcpu pointer */ + GET_SHADOW_VCPU(r4) bl FUNC(kvmppc_copy_from_svcpu) nop #ifdef CONFIG_PPC_BOOK3S_64 - /* Re-enable interrupts */ - ld r3, HSTATE_HOST_MSR(r13) - ori r3, r3, MSR_EE - MTMSR_EERI(r3) - /* * Reload kernel SPRG3 value. * No need to save guest value as usermode can't modify SPRG3. */ ld r3, PACA_SPRG3(r13) mtspr SPRN_SPRG3, r3 - #endif /* CONFIG_PPC_BOOK3S_64 */ /* R7 = vcpu */ @@ -177,7 +180,7 @@ kvm_start_lightweight: PPC_STL r31, VCPU_GPR(R31)(r7) /* Pass the exit number as 3rd argument to kvmppc_handle_exit */ - mr r5, r12 + lwz r5, VCPU_TRAP(r7) /* Restore r3 (kvm_run) and r4 (vcpu) */ REST_2GPRS(3, r1) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index fe14ca3dd17..5b9e9063cfa 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -66,6 +66,7 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb)); svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max; + svcpu->in_use = 0; svcpu_put(svcpu); #endif vcpu->cpu = smp_processor_id(); @@ -78,6 +79,9 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) { #ifdef CONFIG_PPC_BOOK3S_64 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); + if (svcpu->in_use) { + kvmppc_copy_from_svcpu(vcpu, svcpu); + } memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb)); to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max; svcpu_put(svcpu); @@ -110,12 +114,26 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, svcpu->ctr = vcpu->arch.ctr; svcpu->lr = vcpu->arch.lr; svcpu->pc = vcpu->arch.pc; + svcpu->in_use = true; } /* Copy data touched by real-mode code from shadow vcpu back to vcpu */ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, struct kvmppc_book3s_shadow_vcpu *svcpu) { + /* + * vcpu_put would just call us again because in_use hasn't + * been updated yet. + */ + preempt_disable(); + + /* + * Maybe we were already preempted and synced the svcpu from + * our preempt notifiers. Don't bother touching this svcpu then. + */ + if (!svcpu->in_use) + goto out; + vcpu->arch.gpr[0] = svcpu->gpr[0]; vcpu->arch.gpr[1] = svcpu->gpr[1]; vcpu->arch.gpr[2] = svcpu->gpr[2]; @@ -139,6 +157,10 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, vcpu->arch.fault_dar = svcpu->fault_dar; vcpu->arch.fault_dsisr = svcpu->fault_dsisr; vcpu->arch.last_inst = svcpu->last_inst; + svcpu->in_use = false; + +out: + preempt_enable(); } static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index a38c4c9edab..c3c5231adad 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -153,15 +153,11 @@ _GLOBAL(kvmppc_entry_trampoline) li r6, MSR_IR | MSR_DR andc r6, r5, r6 /* Clear DR and IR in MSR value */ -#ifdef CONFIG_PPC_BOOK3S_32 /* * Set EE in HOST_MSR so that it's enabled when we get into our - * C exit handler function. On 64-bit we delay enabling - * interrupts until we have finished transferring stuff - * to or from the PACA. + * C exit handler function. */ ori r5, r5, MSR_EE -#endif mtsrr0 r7 mtsrr1 r6 RFI diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 53e65a210b9..0591e05db74 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -681,7 +681,7 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { int ret, s; - struct thread_struct thread; + struct debug_reg debug; #ifdef CONFIG_PPC_FPU struct thread_fp_state fp; int fpexc_mode; @@ -723,9 +723,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) #endif /* Switch to guest debug context */ - thread.debug = vcpu->arch.shadow_dbg_reg; - switch_booke_debug_regs(&thread); - thread.debug = current->thread.debug; + debug = vcpu->arch.shadow_dbg_reg; + switch_booke_debug_regs(&debug); + debug = current->thread.debug; current->thread.debug = vcpu->arch.shadow_dbg_reg; kvmppc_fix_ee_before_entry(); @@ -736,8 +736,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) We also get here with interrupts enabled. */ /* Switch back to user space debug context */ - switch_booke_debug_regs(&thread); - current->thread.debug = thread.debug; + switch_booke_debug_regs(&debug); + current->thread.debug = debug; #ifdef CONFIG_PPC_FPU kvmppc_save_guest_fp(vcpu); diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index d73a5901490..596a285c075 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -9,6 +9,14 @@ #include <asm/processor.h> #include <asm/ppc_asm.h> +#ifdef __BIG_ENDIAN__ +#define sLd sld /* Shift towards low-numbered address. */ +#define sHd srd /* Shift towards high-numbered address. */ +#else +#define sLd srd /* Shift towards low-numbered address. */ +#define sHd sld /* Shift towards high-numbered address. */ +#endif + .align 7 _GLOBAL(__copy_tofrom_user) BEGIN_FTR_SECTION @@ -118,10 +126,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ 25: ld r0,8(r4) - sld r6,r9,r10 + sLd r6,r9,r10 26: ldu r9,16(r4) - srd r7,r0,r11 - sld r8,r0,r10 + sHd r7,r0,r11 + sLd r8,r0,r10 or r7,r7,r6 blt cr6,79f 27: ld r0,8(r4) @@ -129,35 +137,35 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ 29: ldu r9,8(r4) - sld r8,r0,r10 + sLd r8,r0,r10 addi r3,r3,-8 blt cr6,5f 30: ld r0,8(r4) - srd r12,r9,r11 - sld r6,r9,r10 + sHd r12,r9,r11 + sLd r6,r9,r10 31: ldu r9,16(r4) or r12,r8,r12 - srd r7,r0,r11 - sld r8,r0,r10 + sHd r7,r0,r11 + sLd r8,r0,r10 addi r3,r3,16 beq cr6,78f 1: or r7,r7,r6 32: ld r0,8(r4) 76: std r12,8(r3) -2: srd r12,r9,r11 - sld r6,r9,r10 +2: sHd r12,r9,r11 + sLd r6,r9,r10 33: ldu r9,16(r4) or r12,r8,r12 77: stdu r7,16(r3) - srd r7,r0,r11 - sld r8,r0,r10 + sHd r7,r0,r11 + sLd r8,r0,r10 bdnz 1b 78: std r12,8(r3) or r7,r7,r6 79: std r7,16(r3) -5: srd r12,r9,r11 +5: sHd r12,r9,r11 or r12,r8,r12 80: std r12,24(r3) bne 6f @@ -165,23 +173,38 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) blr 6: cmpwi cr1,r5,8 addi r3,r3,32 - sld r9,r9,r10 + sLd r9,r9,r10 ble cr1,7f 34: ld r0,8(r4) - srd r7,r0,r11 + sHd r7,r0,r11 or r9,r7,r9 7: bf cr7*4+1,1f +#ifdef __BIG_ENDIAN__ rotldi r9,r9,32 +#endif 94: stw r9,0(r3) +#ifdef __LITTLE_ENDIAN__ + rotrdi r9,r9,32 +#endif addi r3,r3,4 1: bf cr7*4+2,2f +#ifdef __BIG_ENDIAN__ rotldi r9,r9,16 +#endif 95: sth r9,0(r3) +#ifdef __LITTLE_ENDIAN__ + rotrdi r9,r9,16 +#endif addi r3,r3,2 2: bf cr7*4+3,3f +#ifdef __BIG_ENDIAN__ rotldi r9,r9,8 +#endif 96: stb r9,0(r3) +#ifdef __LITTLE_ENDIAN__ + rotrdi r9,r9,8 +#endif 3: li r3,0 blr diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 02245cee781..d7ddcee7feb 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -36,7 +36,6 @@ #include "powernv.h" #include "pci.h" -static char *hub_diag = NULL; static int ioda_eeh_nb_init = 0; static int ioda_eeh_event(struct notifier_block *nb, @@ -140,15 +139,6 @@ static int ioda_eeh_post_init(struct pci_controller *hose) ioda_eeh_nb_init = 1; } - /* We needn't HUB diag-data on PHB3 */ - if (phb->type == PNV_PHB_IODA1 && !hub_diag) { - hub_diag = (char *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - if (!hub_diag) { - pr_err("%s: Out of memory !\n", __func__); - return -ENOMEM; - } - } - #ifdef CONFIG_DEBUG_FS if (phb->dbgfs) { debugfs_create_file("err_injct_outbound", 0600, @@ -633,11 +623,10 @@ static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data) static void ioda_eeh_hub_diag(struct pci_controller *hose) { struct pnv_phb *phb = hose->private_data; - struct OpalIoP7IOCErrorData *data; + struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag; long rc; - data = (struct OpalIoP7IOCErrorData *)ioda_eeh_hub_diag; - rc = opal_pci_get_hub_diag_data(phb->hub_id, data, PAGE_SIZE); + rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data)); if (rc != OPAL_SUCCESS) { pr_warning("%s: Failed to get HUB#%llx diag-data (%ld)\n", __func__, phb->hub_id, rc); @@ -820,14 +809,15 @@ static void ioda_eeh_phb_diag(struct pci_controller *hose) struct OpalIoPhbErrorCommon *common; long rc; - common = (struct OpalIoPhbErrorCommon *)phb->diag.blob; - rc = opal_pci_get_phb_diag_data2(phb->opal_id, common, PAGE_SIZE); + rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, + PNV_PCI_DIAG_BUF_SIZE); if (rc != OPAL_SUCCESS) { pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n", __func__, hose->global_number, rc); return; } + common = (struct OpalIoPhbErrorCommon *)phb->diag.blob; switch (common->ioType) { case OPAL_PHB_ERROR_DATA_TYPE_P7IOC: ioda_eeh_p7ioc_phb_diag(hose, common); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 911c24ef033..1ed8d5f40f5 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -172,11 +172,13 @@ struct pnv_phb { } ioda; }; - /* PHB status structure */ + /* PHB and hub status structure */ union { unsigned char blob[PNV_PCI_DIAG_BUF_SIZE]; struct OpalIoP7IOCPhbErrorData p7ioc; + struct OpalIoP7IOCErrorData hub_diag; } diag; + }; extern struct pci_ops pnv_pci_ops; diff --git a/arch/sh/lib/Makefile b/arch/sh/lib/Makefile index 7b95f29e317..3baff31e58c 100644 --- a/arch/sh/lib/Makefile +++ b/arch/sh/lib/Makefile @@ -6,7 +6,7 @@ lib-y = delay.o memmove.o memchr.o \ checksum.o strlen.o div64.o div64-generic.o # Extracted from libgcc -lib-y += movmem.o ashldi3.o ashrdi3.o lshrdi3.o \ +obj-y += movmem.o ashldi3.o ashrdi3.o lshrdi3.o \ ashlsi3.o ashrsi3.o ashiftrt.o lshrsi3.o \ udiv_qrnnd.o diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 8358dc14495..0f9e94537ee 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -619,7 +619,7 @@ static inline unsigned long pte_present(pte_t pte) } #define pte_accessible pte_accessible -static inline unsigned long pte_accessible(pte_t a) +static inline unsigned long pte_accessible(struct mm_struct *mm, pte_t a) { return pte_val(a) & _PAGE_VALID; } @@ -847,7 +847,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U * and SUN4V pte layout, so this inline test is fine. */ - if (likely(mm != &init_mm) && pte_accessible(orig)) + if (likely(mm != &init_mm) && pte_accessible(mm, orig)) tlb_batch_add(mm, addr, ptep, orig, fullmm); } diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 3d199945870..bbc8b12fa44 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -452,9 +452,16 @@ static inline int pte_present(pte_t a) } #define pte_accessible pte_accessible -static inline int pte_accessible(pte_t a) +static inline bool pte_accessible(struct mm_struct *mm, pte_t a) { - return pte_flags(a) & _PAGE_PRESENT; + if (pte_flags(a) & _PAGE_PRESENT) + return true; + + if ((pte_flags(a) & (_PAGE_PROTNONE | _PAGE_NUMA)) && + mm_tlb_flush_pending(mm)) + return true; + + return false; } static inline int pte_hidden(pte_t pte) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index dc1ec0dff93..ea04b342c02 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -387,7 +387,8 @@ static void init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_PEBS); } - if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) + if (c->x86 == 6 && cpu_has_clflush && + (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR); #ifdef CONFIG_X86_64 diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index dd74e46828c..0596e8e0cc1 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -83,6 +83,12 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, pte_t pte = gup_get_pte(ptep); struct page *page; + /* Similar to the PMD case, NUMA hinting must take slow path */ + if (pte_numa(pte)) { + pte_unmap(ptep); + return 0; + } + if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) { pte_unmap(ptep); return 0; @@ -167,6 +173,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, if (pmd_none(pmd) || pmd_trans_splitting(pmd)) return 0; if (unlikely(pmd_large(pmd))) { + /* + * NUMA hinting faults need to be handled in the GUP + * slowpath for accounting purposes and so that they + * can be serialised against THP migration. + */ + if (pmd_numa(pmd)) + return 0; if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) return 0; } else { diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index ba6cf8e9aa0..b91ce75bd35 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -335,9 +335,22 @@ static struct kobj_type blk_mq_hw_ktype = { void blk_mq_unregister_disk(struct gendisk *disk) { struct request_queue *q = disk->queue; + struct blk_mq_hw_ctx *hctx; + struct blk_mq_ctx *ctx; + int i, j; + + queue_for_each_hw_ctx(q, hctx, i) { + hctx_for_each_ctx(hctx, ctx, j) { + kobject_del(&ctx->kobj); + kobject_put(&ctx->kobj); + } + kobject_del(&hctx->kobj); + kobject_put(&hctx->kobj); + } kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); kobject_del(&q->mq_kobj); + kobject_put(&q->mq_kobj); kobject_put(&disk_to_dev(disk)->kobj); } diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 5d9248526d7..4770de5707b 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -348,7 +348,6 @@ source "drivers/acpi/apei/Kconfig" config ACPI_EXTLOG tristate "Extended Error Log support" depends on X86_MCE && X86_LOCAL_APIC - select EFI select UEFI_CPER default n help diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 6745fe137b9..e6039059737 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -162,6 +162,7 @@ static const struct acpi_device_id acpi_lpss_device_ids[] = { { "80860F14", (unsigned long)&byt_sdio_dev_desc }, { "80860F41", (unsigned long)&byt_i2c_dev_desc }, { "INT33B2", }, + { "INT33FC", }, { "INT3430", (unsigned long)&lpt_dev_desc }, { "INT3431", (unsigned long)&lpt_dev_desc }, diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index 786294bb682..3650b218322 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -2,7 +2,6 @@ config ACPI_APEI bool "ACPI Platform Error Interface (APEI)" select MISC_FILESYSTEMS select PSTORE - select EFI select UEFI_CPER depends on X86 help diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 26311f23c82..cb1d557fc22 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -942,6 +942,7 @@ static int erst_clearer(enum pstore_type_id type, u64 id, int count, static struct pstore_info erst_info = { .owner = THIS_MODULE, .name = "erst", + .flags = PSTORE_FLAGS_FRAGILE, .open = erst_open_pstore, .close = erst_close_pstore, .read = erst_reader, diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 14f1e950633..c0ed4f273cf 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1238,15 +1238,6 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) return rc; - /* AHCI controllers often implement SFF compatible interface. - * Grab all PCI BARs just in case. - */ - rc = pcim_iomap_regions_request_all(pdev, 1 << ahci_pci_bar, DRV_NAME); - if (rc == -EBUSY) - pcim_pin_device(pdev); - if (rc) - return rc; - if (pdev->vendor == PCI_VENDOR_ID_INTEL && (pdev->device == 0x2652 || pdev->device == 0x2653)) { u8 map; @@ -1263,6 +1254,15 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } } + /* AHCI controllers often implement SFF compatible interface. + * Grab all PCI BARs just in case. + */ + rc = pcim_iomap_regions_request_all(pdev, 1 << ahci_pci_bar, DRV_NAME); + if (rc == -EBUSY) + pcim_pin_device(pdev); + if (rc) + return rc; + hpriv = devm_kzalloc(dev, sizeof(*hpriv), GFP_KERNEL); if (!hpriv) return -ENOMEM; diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c index ae2d73fe321..3e23e9941da 100644 --- a/drivers/ata/ahci_imx.c +++ b/drivers/ata/ahci_imx.c @@ -113,7 +113,7 @@ static int imx6q_sata_init(struct device *dev, void __iomem *mmio) /* * set PHY Paremeters, two steps to configure the GPR13, * one write for rest of parameters, mask of first write - * is 0x07fffffd, and the other one write for setting + * is 0x07ffffff, and the other one write for setting * the mpll_clk_en. */ regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_RX_EQ_VAL_MASK @@ -124,6 +124,7 @@ static int imx6q_sata_init(struct device *dev, void __iomem *mmio) | IMX6Q_GPR13_SATA_TX_ATTEN_MASK | IMX6Q_GPR13_SATA_TX_BOOST_MASK | IMX6Q_GPR13_SATA_TX_LVL_MASK + | IMX6Q_GPR13_SATA_MPLL_CLK_EN | IMX6Q_GPR13_SATA_TX_EDGE_RATE , IMX6Q_GPR13_SATA_RX_EQ_VAL_3_0_DB | IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2M diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 75b93678bbc..1393a5890ed 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2149,9 +2149,16 @@ static int ata_dev_config_ncq(struct ata_device *dev, "failed to get NCQ Send/Recv Log Emask 0x%x\n", err_mask); } else { + u8 *cmds = dev->ncq_send_recv_cmds; + dev->flags |= ATA_DFLAG_NCQ_SEND_RECV; - memcpy(dev->ncq_send_recv_cmds, ap->sector_buf, - ATA_LOG_NCQ_SEND_RECV_SIZE); + memcpy(cmds, ap->sector_buf, ATA_LOG_NCQ_SEND_RECV_SIZE); + + if (dev->horkage & ATA_HORKAGE_NO_NCQ_TRIM) { + ata_dev_dbg(dev, "disabling queued TRIM support\n"); + cmds[ATA_LOG_NCQ_SEND_RECV_DSM_OFFSET] &= + ~ATA_LOG_NCQ_SEND_RECV_DSM_TRIM; + } } } @@ -4156,6 +4163,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "ST3320[68]13AS", "SD1[5-9]", ATA_HORKAGE_NONCQ | ATA_HORKAGE_FIRMWARE_WARN }, + /* Seagate Momentus SpinPoint M8 seem to have FPMDA_AA issues */ + { "ST1000LM024 HN-M101MBB", "2AR10001", ATA_HORKAGE_BROKEN_FPDMA_AA }, + /* Blacklist entries taken from Silicon Image 3124/3132 Windows driver .inf file - also several Linux problem reports */ { "HTS541060G9SA00", "MB3OC60D", ATA_HORKAGE_NONCQ, }, @@ -4202,6 +4212,10 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "PIONEER DVD-RW DVR-212D", NULL, ATA_HORKAGE_NOSETXFER }, { "PIONEER DVD-RW DVR-216D", NULL, ATA_HORKAGE_NOSETXFER }, + /* devices that don't properly handle queued TRIM commands */ + { "Micron_M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, + { "Crucial_CT???M500SSD1", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, + /* End Marker */ { } }; @@ -6519,6 +6533,7 @@ static int __init ata_parse_force_one(char **cur, { "norst", .lflags = ATA_LFLAG_NO_HRST | ATA_LFLAG_NO_SRST }, { "rstonce", .lflags = ATA_LFLAG_RST_ONCE }, { "atapi_dmadir", .horkage_on = ATA_HORKAGE_ATAPI_DMADIR }, + { "disable", .horkage_on = ATA_HORKAGE_DISABLE }, }; char *start = *cur, *p = *cur; char *id, *val, *endp; diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index ab58556d347..377eb889f55 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3872,6 +3872,27 @@ void ata_scsi_hotplug(struct work_struct *work) return; } + /* + * XXX - UGLY HACK + * + * The block layer suspend/resume path is fundamentally broken due + * to freezable kthreads and workqueue and may deadlock if a block + * device gets removed while resume is in progress. I don't know + * what the solution is short of removing freezable kthreads and + * workqueues altogether. + * + * The following is an ugly hack to avoid kicking off device + * removal while freezer is active. This is a joke but does avoid + * this particular deadlock scenario. + * + * https://bugzilla.kernel.org/show_bug.cgi?id=62801 + * http://marc.info/?l=linux-kernel&m=138695698516487 + */ +#ifdef CONFIG_FREEZER + while (pm_freezing) + msleep(10); +#endif + DPRINTK("ENTER\n"); mutex_lock(&ap->scsi_scan_mutex); diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index f370fc13aea..a2e69d26266 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -1,4 +1,5 @@ #include <linux/module.h> + #include <linux/moduleparam.h> #include <linux/sched.h> #include <linux/fs.h> @@ -65,7 +66,7 @@ enum { NULL_Q_MQ = 2, }; -static int submit_queues = 1; +static int submit_queues; module_param(submit_queues, int, S_IRUGO); MODULE_PARM_DESC(submit_queues, "Number of submission queues"); @@ -101,9 +102,9 @@ static int hw_queue_depth = 64; module_param(hw_queue_depth, int, S_IRUGO); MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64"); -static bool use_per_node_hctx = true; +static bool use_per_node_hctx = false; module_param(use_per_node_hctx, bool, S_IRUGO); -MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: true"); +MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false"); static void put_tag(struct nullb_queue *nq, unsigned int tag) { @@ -346,8 +347,37 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index) { - return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, - hctx_index); + int b_size = DIV_ROUND_UP(reg->nr_hw_queues, nr_online_nodes); + int tip = (reg->nr_hw_queues % nr_online_nodes); + int node = 0, i, n; + + /* + * Split submit queues evenly wrt to the number of nodes. If uneven, + * fill the first buckets with one extra, until the rest is filled with + * no extra. + */ + for (i = 0, n = 1; i < hctx_index; i++, n++) { + if (n % b_size == 0) { + n = 0; + node++; + + tip--; + if (!tip) + b_size = reg->nr_hw_queues / nr_online_nodes; + } + } + + /* + * A node might not be online, therefore map the relative node id to the + * real node id. + */ + for_each_online_node(n) { + if (!node) + break; + node--; + } + + return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, n); } static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index) @@ -355,16 +385,24 @@ static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index) kfree(hctx); } +static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) +{ + BUG_ON(!nullb); + BUG_ON(!nq); + + init_waitqueue_head(&nq->wait); + nq->queue_depth = nullb->queue_depth; +} + static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int index) { struct nullb *nullb = data; struct nullb_queue *nq = &nullb->queues[index]; - init_waitqueue_head(&nq->wait); - nq->queue_depth = nullb->queue_depth; - nullb->nr_queues++; hctx->driver_data = nq; + null_init_queue(nullb, nq); + nullb->nr_queues++; return 0; } @@ -417,13 +455,13 @@ static int setup_commands(struct nullb_queue *nq) nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL); if (!nq->cmds) - return 1; + return -ENOMEM; tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG; nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL); if (!nq->tag_map) { kfree(nq->cmds); - return 1; + return -ENOMEM; } for (i = 0; i < nq->queue_depth; i++) { @@ -454,33 +492,37 @@ static void cleanup_queues(struct nullb *nullb) static int setup_queues(struct nullb *nullb) { - struct nullb_queue *nq; - int i; - - nullb->queues = kzalloc(submit_queues * sizeof(*nq), GFP_KERNEL); + nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue), + GFP_KERNEL); if (!nullb->queues) - return 1; + return -ENOMEM; nullb->nr_queues = 0; nullb->queue_depth = hw_queue_depth; - if (queue_mode == NULL_Q_MQ) - return 0; + return 0; +} + +static int init_driver_queues(struct nullb *nullb) +{ + struct nullb_queue *nq; + int i, ret = 0; for (i = 0; i < submit_queues; i++) { nq = &nullb->queues[i]; - init_waitqueue_head(&nq->wait); - nq->queue_depth = hw_queue_depth; - if (setup_commands(nq)) - break; + + null_init_queue(nullb, nq); + + ret = setup_commands(nq); + if (ret) + goto err_queue; nullb->nr_queues++; } - if (i == submit_queues) - return 0; - + return 0; +err_queue: cleanup_queues(nullb); - return 1; + return ret; } static int null_add_dev(void) @@ -518,11 +560,13 @@ static int null_add_dev(void) } else if (queue_mode == NULL_Q_BIO) { nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); blk_queue_make_request(nullb->q, null_queue_bio); + init_driver_queues(nullb); } else { nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node); blk_queue_prep_rq(nullb->q, null_rq_prep_fn); if (nullb->q) blk_queue_softirq_done(nullb->q, null_softirq_done_fn); + init_driver_queues(nullb); } if (!nullb->q) @@ -579,7 +623,13 @@ static int __init null_init(void) } #endif - if (submit_queues > nr_cpu_ids) + if (queue_mode == NULL_Q_MQ && use_per_node_hctx) { + if (submit_queues < nr_online_nodes) { + pr_warn("null_blk: submit_queues param is set to %u.", + nr_online_nodes); + submit_queues = nr_online_nodes; + } + } else if (submit_queues > nr_cpu_ids) submit_queues = nr_cpu_ids; else if (!submit_queues) submit_queues = 1; diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 9199c93be92..eb6e1e0e8db 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -5269,7 +5269,7 @@ const char *skd_skdev_state_to_str(enum skd_drvr_state state) } } -const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state) +static const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state) { switch (state) { case SKD_MSG_STATE_IDLE: @@ -5281,7 +5281,7 @@ const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state) } } -const char *skd_skreq_state_to_str(enum skd_req_state state) +static const char *skd_skreq_state_to_str(enum skd_req_state state) { switch (state) { case SKD_REQ_STATE_IDLE: diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index d3fdc32b579..106d1d8e16a 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -88,6 +88,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x0CF3, 0xE004) }, { USB_DEVICE(0x0CF3, 0xE005) }, { USB_DEVICE(0x0930, 0x0219) }, + { USB_DEVICE(0x0930, 0x0220) }, { USB_DEVICE(0x0489, 0xe057) }, { USB_DEVICE(0x13d3, 0x3393) }, { USB_DEVICE(0x0489, 0xe04e) }, @@ -132,6 +133,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3393), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe04e), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index bfbcc5a772a..9f7e539de51 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -155,6 +155,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3393), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe04e), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 02d534da22d..16d7b4ac94b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -828,6 +828,12 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy) int ret = 0; memcpy(&new_policy, policy, sizeof(*policy)); + + /* Use the default policy if its valid. */ + if (cpufreq_driver->setpolicy) + cpufreq_parse_governor(policy->governor->name, + &new_policy.policy, NULL); + /* assure that the starting sequence is run in cpufreq_set_policy */ policy->governor = NULL; @@ -845,8 +851,7 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy) #ifdef CONFIG_HOTPLUG_CPU static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, - unsigned int cpu, struct device *dev, - bool frozen) + unsigned int cpu, struct device *dev) { int ret = 0; unsigned long flags; @@ -877,11 +882,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, } } - /* Don't touch sysfs links during light-weight init */ - if (!frozen) - ret = sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq"); - - return ret; + return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq"); } #endif @@ -926,6 +927,27 @@ err_free_policy: return NULL; } +static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy) +{ + struct kobject *kobj; + struct completion *cmp; + + down_read(&policy->rwsem); + kobj = &policy->kobj; + cmp = &policy->kobj_unregister; + up_read(&policy->rwsem); + kobject_put(kobj); + + /* + * We need to make sure that the underlying kobj is + * actually not referenced anymore by anybody before we + * proceed with unloading. + */ + pr_debug("waiting for dropping of refcount\n"); + wait_for_completion(cmp); + pr_debug("wait complete\n"); +} + static void cpufreq_policy_free(struct cpufreq_policy *policy) { free_cpumask_var(policy->related_cpus); @@ -986,7 +1008,7 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) { if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) { read_unlock_irqrestore(&cpufreq_driver_lock, flags); - ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev, frozen); + ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev); up_read(&cpufreq_rwsem); return ret; } @@ -1096,7 +1118,10 @@ err_get_freq: if (cpufreq_driver->exit) cpufreq_driver->exit(policy); err_set_policy_cpu: + if (frozen) + cpufreq_policy_put_kobj(policy); cpufreq_policy_free(policy); + nomem_out: up_read(&cpufreq_rwsem); @@ -1118,7 +1143,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) } static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy, - unsigned int old_cpu, bool frozen) + unsigned int old_cpu) { struct device *cpu_dev; int ret; @@ -1126,10 +1151,6 @@ static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy, /* first sibling now owns the new sysfs dir */ cpu_dev = get_cpu_device(cpumask_any_but(policy->cpus, old_cpu)); - /* Don't touch sysfs files during light-weight tear-down */ - if (frozen) - return cpu_dev->id; - sysfs_remove_link(&cpu_dev->kobj, "cpufreq"); ret = kobject_move(&policy->kobj, &cpu_dev->kobj); if (ret) { @@ -1196,7 +1217,7 @@ static int __cpufreq_remove_dev_prepare(struct device *dev, if (!frozen) sysfs_remove_link(&dev->kobj, "cpufreq"); } else if (cpus > 1) { - new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu, frozen); + new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu); if (new_cpu >= 0) { update_policy_cpu(policy, new_cpu); @@ -1218,8 +1239,6 @@ static int __cpufreq_remove_dev_finish(struct device *dev, int ret; unsigned long flags; struct cpufreq_policy *policy; - struct kobject *kobj; - struct completion *cmp; read_lock_irqsave(&cpufreq_driver_lock, flags); policy = per_cpu(cpufreq_cpu_data, cpu); @@ -1249,22 +1268,8 @@ static int __cpufreq_remove_dev_finish(struct device *dev, } } - if (!frozen) { - down_read(&policy->rwsem); - kobj = &policy->kobj; - cmp = &policy->kobj_unregister; - up_read(&policy->rwsem); - kobject_put(kobj); - - /* - * We need to make sure that the underlying kobj is - * actually not referenced anymore by anybody before we - * proceed with unloading. - */ - pr_debug("waiting for dropping of refcount\n"); - wait_for_completion(cmp); - pr_debug("wait complete\n"); - } + if (!frozen) + cpufreq_policy_put_kobj(policy); /* * Perform the ->exit() even during light-weight tear-down, diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 446687cc233..c823daaf904 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -62,6 +62,7 @@ config INTEL_IOATDMA tristate "Intel I/OAT DMA support" depends on PCI && X86 select DMA_ENGINE + select DMA_ENGINE_RAID select DCA help Enable support for the Intel(R) I/OAT DMA engine present @@ -112,6 +113,7 @@ config MV_XOR bool "Marvell XOR engine support" depends on PLAT_ORION select DMA_ENGINE + select DMA_ENGINE_RAID select ASYNC_TX_ENABLE_CHANNEL_SWITCH ---help--- Enable support for the Marvell XOR engine. @@ -187,6 +189,7 @@ config AMCC_PPC440SPE_ADMA tristate "AMCC PPC440SPe ADMA support" depends on 440SPe || 440SP select DMA_ENGINE + select DMA_ENGINE_RAID select ARCH_HAS_ASYNC_TX_FIND_CHANNEL select ASYNC_TX_ENABLE_CHANNEL_SWITCH help @@ -352,6 +355,7 @@ config NET_DMA bool "Network: TCP receive copy offload" depends on DMA_ENGINE && NET default (INTEL_IOATDMA || FSL_DMA) + depends on BROKEN help This enables the use of DMA engines in the network stack to offload receive copy-to-user operations, freeing CPU cycles. @@ -377,4 +381,7 @@ config DMATEST Simple DMA test client. Say N unless you're debugging a DMA Device driver. +config DMA_ENGINE_RAID + bool + endif diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index f31d647acdf..2787aba60c6 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -347,10 +347,6 @@ static struct device *chan2dev(struct dma_chan *chan) { return &chan->dev->device; } -static struct device *chan2parent(struct dma_chan *chan) -{ - return chan->dev->device.parent; -} #if defined(VERBOSE_DEBUG) static void vdbg_dump_regs(struct at_dma_chan *atchan) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index ea806bdc12e..ef63b9058f3 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -912,7 +912,7 @@ struct dmaengine_unmap_pool { #define __UNMAP_POOL(x) { .size = x, .name = "dmaengine-unmap-" __stringify(x) } static struct dmaengine_unmap_pool unmap_pool[] = { __UNMAP_POOL(2), - #if IS_ENABLED(CONFIG_ASYNC_TX_DMA) + #if IS_ENABLED(CONFIG_DMA_ENGINE_RAID) __UNMAP_POOL(16), __UNMAP_POOL(128), __UNMAP_POOL(256), @@ -1054,7 +1054,7 @@ dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg, dma_cookie_t cookie; unsigned long flags; - unmap = dmaengine_get_unmap_data(dev->dev, 2, GFP_NOIO); + unmap = dmaengine_get_unmap_data(dev->dev, 2, GFP_NOWAIT); if (!unmap) return -ENOMEM; diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 20f9a3aaf92..9dfcaf5c128 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -539,9 +539,9 @@ static int dmatest_func(void *data) um->len = params->buf_size; for (i = 0; i < src_cnt; i++) { - unsigned long buf = (unsigned long) thread->srcs[i]; + void *buf = thread->srcs[i]; struct page *pg = virt_to_page(buf); - unsigned pg_off = buf & ~PAGE_MASK; + unsigned pg_off = (unsigned long) buf & ~PAGE_MASK; um->addr[i] = dma_map_page(dev->dev, pg, pg_off, um->len, DMA_TO_DEVICE); @@ -559,9 +559,9 @@ static int dmatest_func(void *data) /* map with DMA_BIDIRECTIONAL to force writeback/invalidate */ dsts = &um->addr[src_cnt]; for (i = 0; i < dst_cnt; i++) { - unsigned long buf = (unsigned long) thread->dsts[i]; + void *buf = thread->dsts[i]; struct page *pg = virt_to_page(buf); - unsigned pg_off = buf & ~PAGE_MASK; + unsigned pg_off = (unsigned long) buf & ~PAGE_MASK; dsts[i] = dma_map_page(dev->dev, pg, pg_off, um->len, DMA_BIDIRECTIONAL); diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index 7086a16a55f..f157c6f76b3 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -86,11 +86,6 @@ static void set_desc_cnt(struct fsldma_chan *chan, hw->count = CPU_TO_DMA(chan, count, 32); } -static u32 get_desc_cnt(struct fsldma_chan *chan, struct fsl_desc_sw *desc) -{ - return DMA_TO_CPU(chan, desc->hw.count, 32); -} - static void set_desc_src(struct fsldma_chan *chan, struct fsl_dma_ld_hw *hw, dma_addr_t src) { @@ -101,16 +96,6 @@ static void set_desc_src(struct fsldma_chan *chan, hw->src_addr = CPU_TO_DMA(chan, snoop_bits | src, 64); } -static dma_addr_t get_desc_src(struct fsldma_chan *chan, - struct fsl_desc_sw *desc) -{ - u64 snoop_bits; - - snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) - ? ((u64)FSL_DMA_SATR_SREADTYPE_SNOOP_READ << 32) : 0; - return DMA_TO_CPU(chan, desc->hw.src_addr, 64) & ~snoop_bits; -} - static void set_desc_dst(struct fsldma_chan *chan, struct fsl_dma_ld_hw *hw, dma_addr_t dst) { @@ -121,16 +106,6 @@ static void set_desc_dst(struct fsldma_chan *chan, hw->dst_addr = CPU_TO_DMA(chan, snoop_bits | dst, 64); } -static dma_addr_t get_desc_dst(struct fsldma_chan *chan, - struct fsl_desc_sw *desc) -{ - u64 snoop_bits; - - snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) - ? ((u64)FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE << 32) : 0; - return DMA_TO_CPU(chan, desc->hw.dst_addr, 64) & ~snoop_bits; -} - static void set_desc_next(struct fsldma_chan *chan, struct fsl_dma_ld_hw *hw, dma_addr_t next) { @@ -408,7 +383,7 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) struct fsl_desc_sw *desc = tx_to_fsl_desc(tx); struct fsl_desc_sw *child; unsigned long flags; - dma_cookie_t cookie; + dma_cookie_t cookie = -EINVAL; spin_lock_irqsave(&chan->desc_lock, flags); @@ -854,10 +829,6 @@ static void fsldma_cleanup_descriptor(struct fsldma_chan *chan, struct fsl_desc_sw *desc) { struct dma_async_tx_descriptor *txd = &desc->async_tx; - struct device *dev = chan->common.device->dev; - dma_addr_t src = get_desc_src(chan, desc); - dma_addr_t dst = get_desc_dst(chan, desc); - u32 len = get_desc_cnt(chan, desc); /* Run the link descriptor callback function */ if (txd->callback) { diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 7807f0ef4e2..53fb0c8365b 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -54,12 +54,6 @@ static void mv_desc_init(struct mv_xor_desc_slot *desc, unsigned long flags) hw_desc->desc_command = (1 << 31); } -static u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc) -{ - struct mv_xor_desc *hw_desc = desc->hw_desc; - return hw_desc->phy_dest_addr; -} - static void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc, u32 byte_count) { @@ -787,7 +781,6 @@ static void mv_xor_issue_pending(struct dma_chan *chan) /* * Perform a transaction to verify the HW works. */ -#define MV_XOR_TEST_SIZE 2000 static int mv_xor_memcpy_self_test(struct mv_xor_chan *mv_chan) { @@ -797,20 +790,21 @@ static int mv_xor_memcpy_self_test(struct mv_xor_chan *mv_chan) struct dma_chan *dma_chan; dma_cookie_t cookie; struct dma_async_tx_descriptor *tx; + struct dmaengine_unmap_data *unmap; int err = 0; - src = kmalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL); + src = kmalloc(sizeof(u8) * PAGE_SIZE, GFP_KERNEL); if (!src) return -ENOMEM; - dest = kzalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL); + dest = kzalloc(sizeof(u8) * PAGE_SIZE, GFP_KERNEL); if (!dest) { kfree(src); return -ENOMEM; } /* Fill in src buffer */ - for (i = 0; i < MV_XOR_TEST_SIZE; i++) + for (i = 0; i < PAGE_SIZE; i++) ((u8 *) src)[i] = (u8)i; dma_chan = &mv_chan->dmachan; @@ -819,14 +813,26 @@ static int mv_xor_memcpy_self_test(struct mv_xor_chan *mv_chan) goto out; } - dest_dma = dma_map_single(dma_chan->device->dev, dest, - MV_XOR_TEST_SIZE, DMA_FROM_DEVICE); + unmap = dmaengine_get_unmap_data(dma_chan->device->dev, 2, GFP_KERNEL); + if (!unmap) { + err = -ENOMEM; + goto free_resources; + } + + src_dma = dma_map_page(dma_chan->device->dev, virt_to_page(src), 0, + PAGE_SIZE, DMA_TO_DEVICE); + unmap->to_cnt = 1; + unmap->addr[0] = src_dma; - src_dma = dma_map_single(dma_chan->device->dev, src, - MV_XOR_TEST_SIZE, DMA_TO_DEVICE); + dest_dma = dma_map_page(dma_chan->device->dev, virt_to_page(dest), 0, + PAGE_SIZE, DMA_FROM_DEVICE); + unmap->from_cnt = 1; + unmap->addr[1] = dest_dma; + + unmap->len = PAGE_SIZE; tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma, - MV_XOR_TEST_SIZE, 0); + PAGE_SIZE, 0); cookie = mv_xor_tx_submit(tx); mv_xor_issue_pending(dma_chan); async_tx_ack(tx); @@ -841,8 +847,8 @@ static int mv_xor_memcpy_self_test(struct mv_xor_chan *mv_chan) } dma_sync_single_for_cpu(dma_chan->device->dev, dest_dma, - MV_XOR_TEST_SIZE, DMA_FROM_DEVICE); - if (memcmp(src, dest, MV_XOR_TEST_SIZE)) { + PAGE_SIZE, DMA_FROM_DEVICE); + if (memcmp(src, dest, PAGE_SIZE)) { dev_err(dma_chan->device->dev, "Self-test copy failed compare, disabling\n"); err = -ENODEV; @@ -850,6 +856,7 @@ static int mv_xor_memcpy_self_test(struct mv_xor_chan *mv_chan) } free_resources: + dmaengine_unmap_put(unmap); mv_xor_free_chan_resources(dma_chan); out: kfree(src); @@ -867,13 +874,15 @@ mv_xor_xor_self_test(struct mv_xor_chan *mv_chan) dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST]; dma_addr_t dest_dma; struct dma_async_tx_descriptor *tx; + struct dmaengine_unmap_data *unmap; struct dma_chan *dma_chan; dma_cookie_t cookie; u8 cmp_byte = 0; u32 cmp_word; int err = 0; + int src_count = MV_XOR_NUM_SRC_TEST; - for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) { + for (src_idx = 0; src_idx < src_count; src_idx++) { xor_srcs[src_idx] = alloc_page(GFP_KERNEL); if (!xor_srcs[src_idx]) { while (src_idx--) @@ -890,13 +899,13 @@ mv_xor_xor_self_test(struct mv_xor_chan *mv_chan) } /* Fill in src buffers */ - for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) { + for (src_idx = 0; src_idx < src_count; src_idx++) { u8 *ptr = page_address(xor_srcs[src_idx]); for (i = 0; i < PAGE_SIZE; i++) ptr[i] = (1 << src_idx); } - for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) + for (src_idx = 0; src_idx < src_count; src_idx++) cmp_byte ^= (u8) (1 << src_idx); cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | @@ -910,16 +919,29 @@ mv_xor_xor_self_test(struct mv_xor_chan *mv_chan) goto out; } + unmap = dmaengine_get_unmap_data(dma_chan->device->dev, src_count + 1, + GFP_KERNEL); + if (!unmap) { + err = -ENOMEM; + goto free_resources; + } + /* test xor */ - dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE, - DMA_FROM_DEVICE); + for (i = 0; i < src_count; i++) { + unmap->addr[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i], + 0, PAGE_SIZE, DMA_TO_DEVICE); + dma_srcs[i] = unmap->addr[i]; + unmap->to_cnt++; + } - for (i = 0; i < MV_XOR_NUM_SRC_TEST; i++) - dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i], - 0, PAGE_SIZE, DMA_TO_DEVICE); + unmap->addr[src_count] = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE, + DMA_FROM_DEVICE); + dest_dma = unmap->addr[src_count]; + unmap->from_cnt = 1; + unmap->len = PAGE_SIZE; tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs, - MV_XOR_NUM_SRC_TEST, PAGE_SIZE, 0); + src_count, PAGE_SIZE, 0); cookie = mv_xor_tx_submit(tx); mv_xor_issue_pending(dma_chan); @@ -948,9 +970,10 @@ mv_xor_xor_self_test(struct mv_xor_chan *mv_chan) } free_resources: + dmaengine_unmap_put(unmap); mv_xor_free_chan_resources(dma_chan); out: - src_idx = MV_XOR_NUM_SRC_TEST; + src_idx = src_count; while (src_idx--) __free_page(xor_srcs[src_idx]); __free_page(dest); @@ -1176,6 +1199,7 @@ static int mv_xor_probe(struct platform_device *pdev) int i = 0; for_each_child_of_node(pdev->dev.of_node, np) { + struct mv_xor_chan *chan; dma_cap_mask_t cap_mask; int irq; @@ -1193,21 +1217,21 @@ static int mv_xor_probe(struct platform_device *pdev) goto err_channel_add; } - xordev->channels[i] = - mv_xor_channel_add(xordev, pdev, i, - cap_mask, irq); - if (IS_ERR(xordev->channels[i])) { - ret = PTR_ERR(xordev->channels[i]); - xordev->channels[i] = NULL; + chan = mv_xor_channel_add(xordev, pdev, i, + cap_mask, irq); + if (IS_ERR(chan)) { + ret = PTR_ERR(chan); irq_dispose_mapping(irq); goto err_channel_add; } + xordev->channels[i] = chan; i++; } } else if (pdata && pdata->channels) { for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) { struct mv_xor_channel_data *cd; + struct mv_xor_chan *chan; int irq; cd = &pdata->channels[i]; @@ -1222,13 +1246,14 @@ static int mv_xor_probe(struct platform_device *pdev) goto err_channel_add; } - xordev->channels[i] = - mv_xor_channel_add(xordev, pdev, i, - cd->cap_mask, irq); - if (IS_ERR(xordev->channels[i])) { - ret = PTR_ERR(xordev->channels[i]); + chan = mv_xor_channel_add(xordev, pdev, i, + cd->cap_mask, irq); + if (IS_ERR(chan)) { + ret = PTR_ERR(chan); goto err_channel_add; } + + xordev->channels[i] = chan; } } diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index cdf0483b8f2..536632f6479 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2492,12 +2492,9 @@ static dma_cookie_t pl330_tx_submit(struct dma_async_tx_descriptor *tx) static inline void _init_desc(struct dma_pl330_desc *desc) { - desc->pchan = NULL; desc->req.x = &desc->px; desc->req.token = desc; desc->rqcfg.swap = SWAP_NO; - desc->rqcfg.privileged = 0; - desc->rqcfg.insnaccess = 0; desc->rqcfg.scctl = SCCTRL0; desc->rqcfg.dcctl = DCCTRL0; desc->req.cfg = &desc->rqcfg; @@ -2517,7 +2514,7 @@ static int add_desc(struct dma_pl330_dmac *pdmac, gfp_t flg, int count) if (!pdmac) return 0; - desc = kmalloc(count * sizeof(*desc), flg); + desc = kcalloc(count, sizeof(*desc), flg); if (!desc) return 0; diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c index 8da48c6b2a3..8bba298535b 100644 --- a/drivers/dma/ppc4xx/adma.c +++ b/drivers/dma/ppc4xx/adma.c @@ -533,29 +533,6 @@ static void ppc440spe_desc_init_memcpy(struct ppc440spe_adma_desc_slot *desc, } /** - * ppc440spe_desc_init_memset - initialize the descriptor for MEMSET operation - */ -static void ppc440spe_desc_init_memset(struct ppc440spe_adma_desc_slot *desc, - int value, unsigned long flags) -{ - struct dma_cdb *hw_desc = desc->hw_desc; - - memset(desc->hw_desc, 0, sizeof(struct dma_cdb)); - desc->hw_next = NULL; - desc->src_cnt = 1; - desc->dst_cnt = 1; - - if (flags & DMA_PREP_INTERRUPT) - set_bit(PPC440SPE_DESC_INT, &desc->flags); - else - clear_bit(PPC440SPE_DESC_INT, &desc->flags); - - hw_desc->sg1u = hw_desc->sg1l = cpu_to_le32((u32)value); - hw_desc->sg3u = hw_desc->sg3l = cpu_to_le32((u32)value); - hw_desc->opc = DMA_CDB_OPC_DFILL128; -} - -/** * ppc440spe_desc_set_src_addr - set source address into the descriptor */ static void ppc440spe_desc_set_src_addr(struct ppc440spe_adma_desc_slot *desc, @@ -1504,8 +1481,6 @@ static dma_cookie_t ppc440spe_adma_run_tx_complete_actions( struct ppc440spe_adma_chan *chan, dma_cookie_t cookie) { - int i; - BUG_ON(desc->async_tx.cookie < 0); if (desc->async_tx.cookie > 0) { cookie = desc->async_tx.cookie; @@ -3898,7 +3873,7 @@ static void ppc440spe_adma_init_capabilities(struct ppc440spe_adma_device *adev) ppc440spe_adma_prep_dma_interrupt; } pr_info("%s: AMCC(R) PPC440SP(E) ADMA Engine: " - "( %s%s%s%s%s%s%s)\n", + "( %s%s%s%s%s%s)\n", dev_name(adev->dev), dma_has_cap(DMA_PQ, adev->common.cap_mask) ? "pq " : "", dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask) ? "pq_val " : "", diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c index bae6c29f550..17686caf64d 100644 --- a/drivers/dma/txx9dmac.c +++ b/drivers/dma/txx9dmac.c @@ -406,7 +406,6 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc, dma_async_tx_callback callback; void *param; struct dma_async_tx_descriptor *txd = &desc->txd; - struct txx9dmac_slave *ds = dc->chan.private; dev_vdbg(chan2dev(&dc->chan), "descriptor %u %p complete\n", txd->cookie, desc); diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index b0bb056458a..281029daf98 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -1623,7 +1623,6 @@ static struct scsi_host_template scsi_driver_template = { .cmd_per_lun = 1, .can_queue = 1, .sdev_attrs = sbp2_scsi_sysfs_attrs, - .no_write_same = 1, }; MODULE_AUTHOR("Kristian Hoegsberg <krh@bitplanet.net>"); diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile index 299fad6b586..5373dc5b601 100644 --- a/drivers/firmware/Makefile +++ b/drivers/firmware/Makefile @@ -14,3 +14,4 @@ obj-$(CONFIG_FIRMWARE_MEMMAP) += memmap.o obj-$(CONFIG_GOOGLE_FIRMWARE) += google/ obj-$(CONFIG_EFI) += efi/ +obj-$(CONFIG_UEFI_CPER) += efi/ diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index 3150aa4874e..6aecbc86ec9 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -36,7 +36,7 @@ config EFI_VARS_PSTORE_DEFAULT_DISABLE backend for pstore by default. This setting can be overridden using the efivars module's pstore_disable parameter. -config UEFI_CPER - def_bool n - endmenu + +config UEFI_CPER + bool diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index 9ba156d3c77..6c2a41ec21b 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -1,7 +1,7 @@ # # Makefile for linux kernel # -obj-y += efi.o vars.o +obj-$(CONFIG_EFI) += efi.o vars.o obj-$(CONFIG_EFI_VARS) += efivars.o obj-$(CONFIG_EFI_VARS_PSTORE) += efi-pstore.o obj-$(CONFIG_UEFI_CPER) += cper.o diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c index 743fd426f21..4b9dc836dcf 100644 --- a/drivers/firmware/efi/efi-pstore.c +++ b/drivers/firmware/efi/efi-pstore.c @@ -356,6 +356,7 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count, static struct pstore_info efi_pstore_info = { .owner = THIS_MODULE, .name = "efi", + .flags = PSTORE_FLAGS_FRAGILE, .open = efi_pstore_open, .close = efi_pstore_close, .read = efi_pstore_read, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 621c7c67a64..76d3d1ab73c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2343,15 +2343,24 @@ static void i915_gem_free_request(struct drm_i915_gem_request *request) kfree(request); } -static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, - struct intel_ring_buffer *ring) +static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, + struct intel_ring_buffer *ring) { - u32 completed_seqno; - u32 acthd; + u32 completed_seqno = ring->get_seqno(ring, false); + u32 acthd = intel_ring_get_active_head(ring); + struct drm_i915_gem_request *request; + + list_for_each_entry(request, &ring->request_list, list) { + if (i915_seqno_passed(completed_seqno, request->seqno)) + continue; - acthd = intel_ring_get_active_head(ring); - completed_seqno = ring->get_seqno(ring, false); + i915_set_reset_status(ring, request, acthd); + } +} +static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, + struct intel_ring_buffer *ring) +{ while (!list_empty(&ring->request_list)) { struct drm_i915_gem_request *request; @@ -2359,9 +2368,6 @@ static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, struct drm_i915_gem_request, list); - if (request->seqno > completed_seqno) - i915_set_reset_status(ring, request, acthd); - i915_gem_free_request(request); } @@ -2403,8 +2409,16 @@ void i915_gem_reset(struct drm_device *dev) struct intel_ring_buffer *ring; int i; + /* + * Before we free the objects from the requests, we need to inspect + * them for finding the guilty party. As the requests only borrow + * their reference to the objects, the inspection must be done first. + */ + for_each_ring(ring, dev_priv, i) + i915_gem_reset_ring_status(dev_priv, ring); + for_each_ring(ring, dev_priv, i) - i915_gem_reset_ring_lists(dev_priv, ring); + i915_gem_reset_ring_cleanup(dev_priv, ring); i915_gem_cleanup_ringbuffer(dev); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index b7e787fb464..a3ba9a8cd68 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -93,7 +93,7 @@ eb_lookup_vmas(struct eb_vmas *eb, { struct drm_i915_gem_object *obj; struct list_head objects; - int i, ret = 0; + int i, ret; INIT_LIST_HEAD(&objects); spin_lock(&file->table_lock); @@ -106,7 +106,7 @@ eb_lookup_vmas(struct eb_vmas *eb, DRM_DEBUG("Invalid object handle %d at index %d\n", exec[i].handle, i); ret = -ENOENT; - goto out; + goto err; } if (!list_empty(&obj->obj_exec_link)) { @@ -114,7 +114,7 @@ eb_lookup_vmas(struct eb_vmas *eb, DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", obj, exec[i].handle, i); ret = -EINVAL; - goto out; + goto err; } drm_gem_object_reference(&obj->base); @@ -123,9 +123,13 @@ eb_lookup_vmas(struct eb_vmas *eb, spin_unlock(&file->table_lock); i = 0; - list_for_each_entry(obj, &objects, obj_exec_link) { + while (!list_empty(&objects)) { struct i915_vma *vma; + obj = list_first_entry(&objects, + struct drm_i915_gem_object, + obj_exec_link); + /* * NOTE: We can leak any vmas created here when something fails * later on. But that's no issue since vma_unbind can deal with @@ -138,10 +142,12 @@ eb_lookup_vmas(struct eb_vmas *eb, if (IS_ERR(vma)) { DRM_DEBUG("Failed to lookup VMA\n"); ret = PTR_ERR(vma); - goto out; + goto err; } + /* Transfer ownership from the objects list to the vmas list. */ list_add_tail(&vma->exec_list, &eb->vmas); + list_del_init(&obj->obj_exec_link); vma->exec_entry = &exec[i]; if (eb->and < 0) { @@ -155,16 +161,22 @@ eb_lookup_vmas(struct eb_vmas *eb, ++i; } + return 0; + -out: +err: while (!list_empty(&objects)) { obj = list_first_entry(&objects, struct drm_i915_gem_object, obj_exec_link); list_del_init(&obj->obj_exec_link); - if (ret) - drm_gem_object_unreference(&obj->base); + drm_gem_object_unreference(&obj->base); } + /* + * Objects already transfered to the vmas list will be unreferenced by + * eb_destroy. + */ + return ret; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8b8bde7dce5..54e82a80cf5 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6303,7 +6303,7 @@ static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv) uint32_t val; list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) - WARN(crtc->base.enabled, "CRTC for pipe %c enabled\n", + WARN(crtc->active, "CRTC for pipe %c enabled\n", pipe_name(crtc->pipe)); WARN(I915_READ(HSW_PWR_WELL_DRIVER), "Power well on\n"); @@ -11126,14 +11126,15 @@ void intel_connector_attach_encoder(struct intel_connector *connector, int intel_modeset_vga_set_state(struct drm_device *dev, bool state) { struct drm_i915_private *dev_priv = dev->dev_private; + unsigned reg = INTEL_INFO(dev)->gen >= 6 ? SNB_GMCH_CTRL : INTEL_GMCH_CTRL; u16 gmch_ctrl; - pci_read_config_word(dev_priv->bridge_dev, INTEL_GMCH_CTRL, &gmch_ctrl); + pci_read_config_word(dev_priv->bridge_dev, reg, &gmch_ctrl); if (state) gmch_ctrl &= ~INTEL_GMCH_VGA_DISABLE; else gmch_ctrl |= INTEL_GMCH_VGA_DISABLE; - pci_write_config_word(dev_priv->bridge_dev, INTEL_GMCH_CTRL, gmch_ctrl); + pci_write_config_word(dev_priv->bridge_dev, reg, gmch_ctrl); return 0; } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3657ab43c8f..26c29c17322 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5688,6 +5688,8 @@ static void __intel_set_power_well(struct drm_device *dev, bool enable) unsigned long irqflags; uint32_t tmp; + WARN_ON(dev_priv->pc8.enabled); + tmp = I915_READ(HSW_PWR_WELL_DRIVER); is_enabled = tmp & HSW_PWR_WELL_STATE_ENABLED; enable_requested = tmp & HSW_PWR_WELL_ENABLE_REQUEST; @@ -5747,16 +5749,24 @@ static void __intel_set_power_well(struct drm_device *dev, bool enable) static void __intel_power_well_get(struct drm_device *dev, struct i915_power_well *power_well) { - if (!power_well->count++) + struct drm_i915_private *dev_priv = dev->dev_private; + + if (!power_well->count++) { + hsw_disable_package_c8(dev_priv); __intel_set_power_well(dev, true); + } } static void __intel_power_well_put(struct drm_device *dev, struct i915_power_well *power_well) { + struct drm_i915_private *dev_priv = dev->dev_private; + WARN_ON(!power_well->count); - if (!--power_well->count && i915_disable_power_well) + if (!--power_well->count && i915_disable_power_well) { __intel_set_power_well(dev, false); + hsw_enable_package_c8(dev_priv); + } } void intel_display_power_get(struct drm_device *dev, diff --git a/drivers/gpu/drm/qxl/Kconfig b/drivers/gpu/drm/qxl/Kconfig index 037d324bf58..66ac0ff95f5 100644 --- a/drivers/gpu/drm/qxl/Kconfig +++ b/drivers/gpu/drm/qxl/Kconfig @@ -8,5 +8,6 @@ config DRM_QXL select DRM_KMS_HELPER select DRM_KMS_FB_HELPER select DRM_TTM + select CRC32 help QXL virtual GPU for Spice virtualization desktop integration. Do not enable this driver unless your distro ships a corresponding X.org QXL driver that can handle kernel modesetting. diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 5e827c29d19..d70aafb8330 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -24,7 +24,7 @@ */ -#include "linux/crc32.h" +#include <linux/crc32.h> #include "qxl_drv.h" #include "qxl_object.h" diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index de86493cbc4..713a5d35990 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -174,7 +174,7 @@ void dce6_afmt_write_speaker_allocation(struct drm_encoder *encoder) } sad_count = drm_edid_to_speaker_allocation(radeon_connector->edid, &sadb); - if (sad_count < 0) { + if (sad_count <= 0) { DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); return; } @@ -235,7 +235,7 @@ void dce6_afmt_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(radeon_connector->edid, &sads); - if (sad_count < 0) { + if (sad_count <= 0) { DRM_ERROR("Couldn't read SADs: %d\n", sad_count); return; } @@ -308,7 +308,9 @@ int dce6_audio_init(struct radeon_device *rdev) rdev->audio.enabled = true; if (ASIC_IS_DCE8(rdev)) - rdev->audio.num_pins = 7; + rdev->audio.num_pins = 6; + else if (ASIC_IS_DCE61(rdev)) + rdev->audio.num_pins = 4; else rdev->audio.num_pins = 6; diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c index aa695c4feb3..0c6d5cef4cf 100644 --- a/drivers/gpu/drm/radeon/evergreen_hdmi.c +++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c @@ -118,7 +118,7 @@ static void dce4_afmt_write_speaker_allocation(struct drm_encoder *encoder) } sad_count = drm_edid_to_speaker_allocation(radeon_connector->edid, &sadb); - if (sad_count < 0) { + if (sad_count <= 0) { DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); return; } @@ -173,7 +173,7 @@ static void evergreen_hdmi_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(radeon_connector->edid, &sads); - if (sad_count < 0) { + if (sad_count <= 0) { DRM_ERROR("Couldn't read SADs: %d\n", sad_count); return; } diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 11aab2ab54c..f59a9e9fccf 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -895,6 +895,10 @@ static void cayman_gpu_init(struct radeon_device *rdev) (rdev->pdev->device == 0x999C)) { rdev->config.cayman.max_simds_per_se = 6; rdev->config.cayman.max_backends_per_se = 2; + rdev->config.cayman.max_hw_contexts = 8; + rdev->config.cayman.sx_max_export_size = 256; + rdev->config.cayman.sx_max_export_pos_size = 64; + rdev->config.cayman.sx_max_export_smx_size = 192; } else if ((rdev->pdev->device == 0x9903) || (rdev->pdev->device == 0x9904) || (rdev->pdev->device == 0x990A) || @@ -905,6 +909,10 @@ static void cayman_gpu_init(struct radeon_device *rdev) (rdev->pdev->device == 0x999D)) { rdev->config.cayman.max_simds_per_se = 4; rdev->config.cayman.max_backends_per_se = 2; + rdev->config.cayman.max_hw_contexts = 8; + rdev->config.cayman.sx_max_export_size = 256; + rdev->config.cayman.sx_max_export_pos_size = 64; + rdev->config.cayman.sx_max_export_smx_size = 192; } else if ((rdev->pdev->device == 0x9919) || (rdev->pdev->device == 0x9990) || (rdev->pdev->device == 0x9991) || @@ -915,9 +923,17 @@ static void cayman_gpu_init(struct radeon_device *rdev) (rdev->pdev->device == 0x99A0)) { rdev->config.cayman.max_simds_per_se = 3; rdev->config.cayman.max_backends_per_se = 1; + rdev->config.cayman.max_hw_contexts = 4; + rdev->config.cayman.sx_max_export_size = 128; + rdev->config.cayman.sx_max_export_pos_size = 32; + rdev->config.cayman.sx_max_export_smx_size = 96; } else { rdev->config.cayman.max_simds_per_se = 2; rdev->config.cayman.max_backends_per_se = 1; + rdev->config.cayman.max_hw_contexts = 4; + rdev->config.cayman.sx_max_export_size = 128; + rdev->config.cayman.sx_max_export_pos_size = 32; + rdev->config.cayman.sx_max_export_smx_size = 96; } rdev->config.cayman.max_texture_channel_caches = 2; rdev->config.cayman.max_gprs = 256; @@ -925,10 +941,6 @@ static void cayman_gpu_init(struct radeon_device *rdev) rdev->config.cayman.max_gs_threads = 32; rdev->config.cayman.max_stack_entries = 512; rdev->config.cayman.sx_num_of_sets = 8; - rdev->config.cayman.sx_max_export_size = 256; - rdev->config.cayman.sx_max_export_pos_size = 64; - rdev->config.cayman.sx_max_export_smx_size = 192; - rdev->config.cayman.max_hw_contexts = 8; rdev->config.cayman.sq_num_cf_insts = 2; rdev->config.cayman.sc_prim_fifo_size = 0x40; diff --git a/drivers/gpu/drm/radeon/rv770_dpm.c b/drivers/gpu/drm/radeon/rv770_dpm.c index 913b025ae9b..374499db20c 100644 --- a/drivers/gpu/drm/radeon/rv770_dpm.c +++ b/drivers/gpu/drm/radeon/rv770_dpm.c @@ -2328,6 +2328,12 @@ void rv770_get_engine_memory_ss(struct radeon_device *rdev) pi->mclk_ss = radeon_atombios_get_asic_ss_info(rdev, &ss, ASIC_INTERNAL_MEMORY_SS, 0); + /* disable ss, causes hangs on some cayman boards */ + if (rdev->family == CHIP_CAYMAN) { + pi->sclk_ss = false; + pi->mclk_ss = false; + } + if (pi->sclk_ss || pi->mclk_ss) pi->dynamic_ss = true; else diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 15b86a94949..40615215231 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -353,7 +353,8 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, * Don't move nonexistent data. Clear destination instead. */ if (old_iomap == NULL && - (ttm == NULL || ttm->state == tt_unpopulated)) { + (ttm == NULL || (ttm->state == tt_unpopulated && + !(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)))) { memset_io(new_iomap, 0, new_mem->num_pages*PAGE_SIZE); goto out2; } diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 92d1206482a..f80b700f821 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -377,6 +377,9 @@ static int intel_idle(struct cpuidle_device *dev, if (!current_set_polling_and_test()) { + if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) + clflush((void *)¤t_thread_info()->flags); + __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) diff --git a/drivers/iio/adc/ad7887.c b/drivers/iio/adc/ad7887.c index acb7f90359a..749a6cadab8 100644 --- a/drivers/iio/adc/ad7887.c +++ b/drivers/iio/adc/ad7887.c @@ -200,7 +200,13 @@ static const struct ad7887_chip_info ad7887_chip_info_tbl[] = { .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), .address = 1, .scan_index = 1, - .scan_type = IIO_ST('u', 12, 16, 0), + .scan_type = { + .sign = 'u', + .realbits = 12, + .storagebits = 16, + .shift = 0, + .endianness = IIO_BE, + }, }, .channel[1] = { .type = IIO_VOLTAGE, @@ -210,7 +216,13 @@ static const struct ad7887_chip_info ad7887_chip_info_tbl[] = { .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), .address = 0, .scan_index = 0, - .scan_type = IIO_ST('u', 12, 16, 0), + .scan_type = { + .sign = 'u', + .realbits = 12, + .storagebits = 16, + .shift = 0, + .endianness = IIO_BE, + }, }, .channel[2] = IIO_CHAN_SOFT_TIMESTAMP(2), .int_vref_mv = 2500, diff --git a/drivers/iio/imu/adis16400_core.c b/drivers/iio/imu/adis16400_core.c index 3fb7757a102..368660dfe13 100644 --- a/drivers/iio/imu/adis16400_core.c +++ b/drivers/iio/imu/adis16400_core.c @@ -651,7 +651,12 @@ static const struct iio_chan_spec adis16448_channels[] = { .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), .address = ADIS16448_BARO_OUT, .scan_index = ADIS16400_SCAN_BARO, - .scan_type = IIO_ST('s', 16, 16, 0), + .scan_type = { + .sign = 's', + .realbits = 16, + .storagebits = 16, + .endianness = IIO_BE, + }, }, ADIS16400_TEMP_CHAN(ADIS16448_TEMP_OUT, 12), IIO_CHAN_SOFT_TIMESTAMP(11) diff --git a/drivers/iio/light/cm36651.c b/drivers/iio/light/cm36651.c index 21df5713001..0922e39b0ea 100644 --- a/drivers/iio/light/cm36651.c +++ b/drivers/iio/light/cm36651.c @@ -387,7 +387,7 @@ static int cm36651_read_int_time(struct cm36651_data *cm36651, return -EINVAL; } - return IIO_VAL_INT_PLUS_MICRO; + return IIO_VAL_INT; } static int cm36651_write_int_time(struct cm36651_data *cm36651, diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index c47c2034ca7..0717940ec3b 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -181,9 +181,16 @@ static void add_ref(struct iw_cm_id *cm_id) static void rem_ref(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; + int cb_destroy; + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - if (iwcm_deref_id(cm_id_priv) && - test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) { + + /* + * Test bit before deref in case the cm_id gets freed on another + * thread. + */ + cb_destroy = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); + if (iwcm_deref_id(cm_id_priv) && cb_destroy) { BUG_ON(!list_empty(&cm_id_priv->work_list)); free_cm_id(cm_id_priv); } diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index bdc842e9fae..a283274a5a0 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -49,12 +49,20 @@ #define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ do { \ - (udata)->inbuf = (void __user *) (ibuf); \ + (udata)->inbuf = (const void __user *) (ibuf); \ (udata)->outbuf = (void __user *) (obuf); \ (udata)->inlen = (ilen); \ (udata)->outlen = (olen); \ } while (0) +#define INIT_UDATA_BUF_OR_NULL(udata, ibuf, obuf, ilen, olen) \ + do { \ + (udata)->inbuf = (ilen) ? (const void __user *) (ibuf) : NULL; \ + (udata)->outbuf = (olen) ? (void __user *) (obuf) : NULL; \ + (udata)->inlen = (ilen); \ + (udata)->outlen = (olen); \ + } while (0) + /* * Our lifetime rules for these structs are the following: * diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 65f6e7dc380..f1cc83855af 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2593,6 +2593,9 @@ out_put: static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec) { + if (kern_spec->reserved) + return -EINVAL; + ib_spec->type = kern_spec->type; switch (ib_spec->type) { @@ -2646,6 +2649,9 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, void *ib_spec; int i; + if (ucore->inlen < sizeof(cmd)) + return -EINVAL; + if (ucore->outlen < sizeof(resp)) return -ENOSPC; @@ -2671,6 +2677,10 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec))) return -EINVAL; + if (cmd.flow_attr.reserved[0] || + cmd.flow_attr.reserved[1]) + return -EINVAL; + if (cmd.flow_attr.num_of_specs) { kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size, GFP_KERNEL); @@ -2731,6 +2741,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) { pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n", i, cmd.flow_attr.size); + err = -EINVAL; goto err_free; } flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER); @@ -2791,10 +2802,16 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, struct ib_uobject *uobj; int ret; + if (ucore->inlen < sizeof(cmd)) + return -EINVAL; + ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); if (ret) return ret; + if (cmd.comp_mask) + return -EINVAL; + uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle, file->ucontext); if (!uobj) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 34386943ebc..08219fb3338 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -668,25 +668,30 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) return -EINVAL; + if (ex_hdr.cmd_hdr_reserved) + return -EINVAL; + if (ex_hdr.response) { if (!hdr.out_words && !ex_hdr.provider_out_words) return -EINVAL; + + if (!access_ok(VERIFY_WRITE, + (void __user *) (unsigned long) ex_hdr.response, + (hdr.out_words + ex_hdr.provider_out_words) * 8)) + return -EFAULT; } else { if (hdr.out_words || ex_hdr.provider_out_words) return -EINVAL; } - INIT_UDATA(&ucore, - (hdr.in_words) ? buf : 0, - (unsigned long)ex_hdr.response, - hdr.in_words * 8, - hdr.out_words * 8); - - INIT_UDATA(&uhw, - (ex_hdr.provider_in_words) ? buf + ucore.inlen : 0, - (ex_hdr.provider_out_words) ? (unsigned long)ex_hdr.response + ucore.outlen : 0, - ex_hdr.provider_in_words * 8, - ex_hdr.provider_out_words * 8); + INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response, + hdr.in_words * 8, hdr.out_words * 8); + + INIT_UDATA_BUF_OR_NULL(&uhw, + buf + ucore.inlen, + (unsigned long) ex_hdr.response + ucore.outlen, + ex_hdr.provider_in_words * 8, + ex_hdr.provider_out_words * 8); err = uverbs_ex_cmd_table[command](file, &ucore, diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 12fef76c791..45126879ad2 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -524,50 +524,6 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } -#define VLAN_NONE 0xfff -#define FILTER_SEL_VLAN_NONE 0xffff -#define FILTER_SEL_WIDTH_P_FC (3+1) /* port uses 3 bits, FCoE one bit */ -#define FILTER_SEL_WIDTH_VIN_P_FC \ - (6 + 7 + FILTER_SEL_WIDTH_P_FC) /* 6 bits are unused, VF uses 7 bits*/ -#define FILTER_SEL_WIDTH_TAG_P_FC \ - (3 + FILTER_SEL_WIDTH_VIN_P_FC) /* PF uses 3 bits */ -#define FILTER_SEL_WIDTH_VLD_TAG_P_FC (1 + FILTER_SEL_WIDTH_TAG_P_FC) - -static unsigned int select_ntuple(struct c4iw_dev *dev, struct dst_entry *dst, - struct l2t_entry *l2t) -{ - unsigned int ntuple = 0; - u32 viid; - - switch (dev->rdev.lldi.filt_mode) { - - /* default filter mode */ - case HW_TPL_FR_MT_PR_IV_P_FC: - if (l2t->vlan == VLAN_NONE) - ntuple |= FILTER_SEL_VLAN_NONE << FILTER_SEL_WIDTH_P_FC; - else { - ntuple |= l2t->vlan << FILTER_SEL_WIDTH_P_FC; - ntuple |= 1 << FILTER_SEL_WIDTH_TAG_P_FC; - } - ntuple |= l2t->lport << S_PORT | IPPROTO_TCP << - FILTER_SEL_WIDTH_VLD_TAG_P_FC; - break; - case HW_TPL_FR_MT_PR_OV_P_FC: { - viid = cxgb4_port_viid(l2t->neigh->dev); - - ntuple |= FW_VIID_VIN_GET(viid) << FILTER_SEL_WIDTH_P_FC; - ntuple |= FW_VIID_PFN_GET(viid) << FILTER_SEL_WIDTH_VIN_P_FC; - ntuple |= FW_VIID_VIVLD_GET(viid) << FILTER_SEL_WIDTH_TAG_P_FC; - ntuple |= l2t->lport << S_PORT | IPPROTO_TCP << - FILTER_SEL_WIDTH_VLD_TAG_P_FC; - break; - } - default: - break; - } - return ntuple; -} - static int send_connect(struct c4iw_ep *ep) { struct cpl_act_open_req *req; @@ -641,8 +597,9 @@ static int send_connect(struct c4iw_ep *ep) req->local_ip = la->sin_addr.s_addr; req->peer_ip = ra->sin_addr.s_addr; req->opt0 = cpu_to_be64(opt0); - req->params = cpu_to_be32(select_ntuple(ep->com.dev, - ep->dst, ep->l2t)); + req->params = cpu_to_be32(cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t)); req->opt2 = cpu_to_be32(opt2); } else { req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen); @@ -662,9 +619,9 @@ static int send_connect(struct c4iw_ep *ep) req6->peer_ip_lo = *((__be64 *) (ra6->sin6_addr.s6_addr + 8)); req6->opt0 = cpu_to_be64(opt0); - req6->params = cpu_to_be32( - select_ntuple(ep->com.dev, ep->dst, - ep->l2t)); + req6->params = cpu_to_be32(cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t)); req6->opt2 = cpu_to_be32(opt2); } } else { @@ -681,8 +638,9 @@ static int send_connect(struct c4iw_ep *ep) t5_req->peer_ip = ra->sin_addr.s_addr; t5_req->opt0 = cpu_to_be64(opt0); t5_req->params = cpu_to_be64(V_FILTER_TUPLE( - select_ntuple(ep->com.dev, - ep->dst, ep->l2t))); + cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t))); t5_req->opt2 = cpu_to_be32(opt2); } else { t5_req6 = (struct cpl_t5_act_open_req6 *) @@ -703,7 +661,9 @@ static int send_connect(struct c4iw_ep *ep) (ra6->sin6_addr.s6_addr + 8)); t5_req6->opt0 = cpu_to_be64(opt0); t5_req6->params = (__force __be64)cpu_to_be32( - select_ntuple(ep->com.dev, ep->dst, ep->l2t)); + cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t)); t5_req6->opt2 = cpu_to_be32(opt2); } } @@ -1630,7 +1590,8 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) memset(req, 0, sizeof(*req)); req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR)); req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); - req->le.filter = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst, + req->le.filter = cpu_to_be32(cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], ep->l2t)); sin = (struct sockaddr_in *)&ep->com.local_addr; req->le.lport = sin->sin_port; @@ -2938,7 +2899,8 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) /* * Allocate a server TID. */ - if (dev->rdev.lldi.enable_fw_ofld_conn) + if (dev->rdev.lldi.enable_fw_ofld_conn && + ep->com.local_addr.ss_family == AF_INET) ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids, cm_id->local_addr.ss_family, ep); else @@ -3323,9 +3285,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) /* * Calculate the server tid from filter hit index from cpl_rx_pkt. */ - stid = (__force int) cpu_to_be32((__force u32) rss->hash_val) - - dev->rdev.lldi.tids->sftid_base - + dev->rdev.lldi.tids->nstids; + stid = (__force int) cpu_to_be32((__force u32) rss->hash_val); lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid); if (!lep) { @@ -3397,7 +3357,9 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) window = (__force u16) htons((__force u16)tcph->window); /* Calcuate filter portion for LE region. */ - filter = (__force unsigned int) cpu_to_be32(select_ntuple(dev, dst, e)); + filter = (__force unsigned int) cpu_to_be32(cxgb4_select_ntuple( + dev->rdev.lldi.ports[0], + e)); /* * Synthesize the cpl_pass_accept_req. We have everything except the diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 4cb8eb24497..84e45006451 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -173,7 +173,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, return ret; } -int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data) +static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data) { u32 remain = len; u32 dmalen; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c index c29b5c83883..cdc7df4fdb8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c @@ -31,6 +31,7 @@ */ #include <linux/netdevice.h> +#include <linux/if_arp.h> /* For ARPHRD_xxx */ #include <linux/module.h> #include <net/rtnetlink.h> #include "ipoib.h" @@ -103,7 +104,7 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev, return -EINVAL; pdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); - if (!pdev) + if (!pdev || pdev->type != ARPHRD_INFINIBAND) return -ENODEV; ppriv = netdev_priv(pdev); diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 6be57c38638..9804fca6bf0 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -207,7 +207,9 @@ isert_free_rx_descriptors(struct isert_conn *isert_conn) isert_conn->conn_rx_descs = NULL; } +static void isert_cq_tx_work(struct work_struct *); static void isert_cq_tx_callback(struct ib_cq *, void *); +static void isert_cq_rx_work(struct work_struct *); static void isert_cq_rx_callback(struct ib_cq *, void *); static int @@ -259,26 +261,36 @@ isert_create_device_ib_res(struct isert_device *device) cq_desc[i].device = device; cq_desc[i].cq_index = i; + INIT_WORK(&cq_desc[i].cq_rx_work, isert_cq_rx_work); device->dev_rx_cq[i] = ib_create_cq(device->ib_device, isert_cq_rx_callback, isert_cq_event_callback, (void *)&cq_desc[i], ISER_MAX_RX_CQ_LEN, i); - if (IS_ERR(device->dev_rx_cq[i])) + if (IS_ERR(device->dev_rx_cq[i])) { + ret = PTR_ERR(device->dev_rx_cq[i]); + device->dev_rx_cq[i] = NULL; goto out_cq; + } + INIT_WORK(&cq_desc[i].cq_tx_work, isert_cq_tx_work); device->dev_tx_cq[i] = ib_create_cq(device->ib_device, isert_cq_tx_callback, isert_cq_event_callback, (void *)&cq_desc[i], ISER_MAX_TX_CQ_LEN, i); - if (IS_ERR(device->dev_tx_cq[i])) + if (IS_ERR(device->dev_tx_cq[i])) { + ret = PTR_ERR(device->dev_tx_cq[i]); + device->dev_tx_cq[i] = NULL; goto out_cq; + } - if (ib_req_notify_cq(device->dev_rx_cq[i], IB_CQ_NEXT_COMP)) + ret = ib_req_notify_cq(device->dev_rx_cq[i], IB_CQ_NEXT_COMP); + if (ret) goto out_cq; - if (ib_req_notify_cq(device->dev_tx_cq[i], IB_CQ_NEXT_COMP)) + ret = ib_req_notify_cq(device->dev_tx_cq[i], IB_CQ_NEXT_COMP); + if (ret) goto out_cq; } @@ -1724,7 +1736,6 @@ isert_cq_tx_callback(struct ib_cq *cq, void *context) { struct isert_cq_desc *cq_desc = (struct isert_cq_desc *)context; - INIT_WORK(&cq_desc->cq_tx_work, isert_cq_tx_work); queue_work(isert_comp_wq, &cq_desc->cq_tx_work); } @@ -1768,7 +1779,6 @@ isert_cq_rx_callback(struct ib_cq *cq, void *context) { struct isert_cq_desc *cq_desc = (struct isert_cq_desc *)context; - INIT_WORK(&cq_desc->cq_rx_work, isert_cq_rx_work); queue_work(isert_rx_wq, &cq_desc->cq_rx_work); } diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c index 82cec63a901..3ee78f02e5d 100644 --- a/drivers/irqchip/irq-renesas-intc-irqpin.c +++ b/drivers/irqchip/irq-renesas-intc-irqpin.c @@ -149,8 +149,9 @@ static void intc_irqpin_read_modify_write(struct intc_irqpin_priv *p, static void intc_irqpin_mask_unmask_prio(struct intc_irqpin_priv *p, int irq, int do_mask) { - int bitfield_width = 4; /* PRIO assumed to have fixed bitfield width */ - int shift = (7 - irq) * bitfield_width; /* PRIO assumed to be 32-bit */ + /* The PRIO register is assumed to be 32-bit with fixed 4-bit fields. */ + int bitfield_width = 4; + int shift = 32 - (irq + 1) * bitfield_width; intc_irqpin_read_modify_write(p, INTC_IRQPIN_REG_PRIO, shift, bitfield_width, @@ -159,8 +160,9 @@ static void intc_irqpin_mask_unmask_prio(struct intc_irqpin_priv *p, static int intc_irqpin_set_sense(struct intc_irqpin_priv *p, int irq, int value) { + /* The SENSE register is assumed to be 32-bit. */ int bitfield_width = p->config.sense_bitfield_width; - int shift = (7 - irq) * bitfield_width; /* SENSE assumed to be 32-bit */ + int shift = 32 - (irq + 1) * bitfield_width; dev_dbg(&p->pdev->dev, "sense irq = %d, mode = %d\n", irq, value); diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c index 497bd026c23..4a482552818 100644 --- a/drivers/isdn/hisax/hfc_pci.c +++ b/drivers/isdn/hisax/hfc_pci.c @@ -1643,10 +1643,6 @@ setup_hfcpci(struct IsdnCard *card) int i; struct pci_dev *tmp_hfcpci = NULL; -#ifdef __BIG_ENDIAN -#error "not running on big endian machines now" -#endif - strcpy(tmp, hfcpci_revision); printk(KERN_INFO "HiSax: HFC-PCI driver Rev. %s\n", HiSax_getrev(tmp)); diff --git a/drivers/isdn/hisax/telespci.c b/drivers/isdn/hisax/telespci.c index f6ab63aa699..33eeb4602c7 100644 --- a/drivers/isdn/hisax/telespci.c +++ b/drivers/isdn/hisax/telespci.c @@ -290,10 +290,6 @@ int setup_telespci(struct IsdnCard *card) struct IsdnCardState *cs = card->cs; char tmp[64]; -#ifdef __BIG_ENDIAN -#error "not running on big endian machines now" -#endif - strcpy(tmp, telespci_revision); printk(KERN_INFO "HiSax: Teles/PCI driver Rev. %s\n", HiSax_getrev(tmp)); if (cs->typ != ISDN_CTYPE_TELESPCI) diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 2b46bf1d7e4..4c9852d92b0 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -421,9 +421,11 @@ out: if (watermark <= WATERMARK_METADATA) { SET_GC_MARK(b, GC_MARK_METADATA); + SET_GC_MOVE(b, 0); b->prio = BTREE_PRIO; } else { SET_GC_MARK(b, GC_MARK_RECLAIMABLE); + SET_GC_MOVE(b, 0); b->prio = INITIAL_PRIO; } diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 4beb55a0ff3..754f4317748 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -197,7 +197,7 @@ struct bucket { uint8_t disk_gen; uint8_t last_gc; /* Most out of date gen in the btree */ uint8_t gc_gen; - uint16_t gc_mark; + uint16_t gc_mark; /* Bitfield used by GC. See below for field */ }; /* @@ -209,7 +209,8 @@ BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2); #define GC_MARK_RECLAIMABLE 0 #define GC_MARK_DIRTY 1 #define GC_MARK_METADATA 2 -BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 14); +BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 13); +BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1); #include "journal.h" #include "stats.h" @@ -372,14 +373,14 @@ struct cached_dev { unsigned char writeback_percent; unsigned writeback_delay; - int writeback_rate_change; - int64_t writeback_rate_derivative; uint64_t writeback_rate_target; + int64_t writeback_rate_proportional; + int64_t writeback_rate_derivative; + int64_t writeback_rate_change; unsigned writeback_rate_update_seconds; unsigned writeback_rate_d_term; unsigned writeback_rate_p_term_inverse; - unsigned writeback_rate_d_smooth; }; enum alloc_watermarks { @@ -445,7 +446,6 @@ struct cache { * call prio_write() to keep gens from wrapping. */ uint8_t need_save_prio; - unsigned gc_move_threshold; /* * If nonzero, we know we aren't going to find any buckets to invalidate diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 5e2765aadce..31bb53fcc67 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1561,6 +1561,28 @@ size_t bch_btree_gc_finish(struct cache_set *c) SET_GC_MARK(PTR_BUCKET(c, &c->uuid_bucket, i), GC_MARK_METADATA); + /* don't reclaim buckets to which writeback keys point */ + rcu_read_lock(); + for (i = 0; i < c->nr_uuids; i++) { + struct bcache_device *d = c->devices[i]; + struct cached_dev *dc; + struct keybuf_key *w, *n; + unsigned j; + + if (!d || UUID_FLASH_ONLY(&c->uuids[i])) + continue; + dc = container_of(d, struct cached_dev, disk); + + spin_lock(&dc->writeback_keys.lock); + rbtree_postorder_for_each_entry_safe(w, n, + &dc->writeback_keys.keys, node) + for (j = 0; j < KEY_PTRS(&w->key); j++) + SET_GC_MARK(PTR_BUCKET(c, &w->key, j), + GC_MARK_DIRTY); + spin_unlock(&dc->writeback_keys.lock); + } + rcu_read_unlock(); + for_each_cache(ca, c, i) { uint64_t *i; @@ -1817,7 +1839,8 @@ static bool fix_overlapping_extents(struct btree *b, struct bkey *insert, if (KEY_START(k) > KEY_START(insert) + sectors_found) goto check_failed; - if (KEY_PTRS(replace_key) != KEY_PTRS(k)) + if (KEY_PTRS(k) != KEY_PTRS(replace_key) || + KEY_DIRTY(k) != KEY_DIRTY(replace_key)) goto check_failed; /* skip past gen */ @@ -2217,7 +2240,7 @@ struct btree_insert_op { struct bkey *replace_key; }; -int btree_insert_fn(struct btree_op *b_op, struct btree *b) +static int btree_insert_fn(struct btree_op *b_op, struct btree *b) { struct btree_insert_op *op = container_of(b_op, struct btree_insert_op, op); diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 7c1275e6602..f2f0998c4a9 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -25,10 +25,9 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k) unsigned i; for (i = 0; i < KEY_PTRS(k); i++) { - struct cache *ca = PTR_CACHE(c, k, i); struct bucket *g = PTR_BUCKET(c, k, i); - if (GC_SECTORS_USED(g) < ca->gc_move_threshold) + if (GC_MOVE(g)) return true; } @@ -65,11 +64,16 @@ static void write_moving_finish(struct closure *cl) static void read_moving_endio(struct bio *bio, int error) { + struct bbio *b = container_of(bio, struct bbio, bio); struct moving_io *io = container_of(bio->bi_private, struct moving_io, cl); if (error) io->op.error = error; + else if (!KEY_DIRTY(&b->key) && + ptr_stale(io->op.c, &b->key, 0)) { + io->op.error = -EINTR; + } bch_bbio_endio(io->op.c, bio, error, "reading data to move"); } @@ -141,6 +145,11 @@ static void read_moving(struct cache_set *c) if (!w) break; + if (ptr_stale(c, &w->key, 0)) { + bch_keybuf_del(&c->moving_gc_keys, w); + continue; + } + io = kzalloc(sizeof(struct moving_io) + sizeof(struct bio_vec) * DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), GFP_KERNEL); @@ -184,7 +193,8 @@ static bool bucket_cmp(struct bucket *l, struct bucket *r) static unsigned bucket_heap_top(struct cache *ca) { - return GC_SECTORS_USED(heap_peek(&ca->heap)); + struct bucket *b; + return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0; } void bch_moving_gc(struct cache_set *c) @@ -226,9 +236,8 @@ void bch_moving_gc(struct cache_set *c) sectors_to_move -= GC_SECTORS_USED(b); } - ca->gc_move_threshold = bucket_heap_top(ca); - - pr_debug("threshold %u", ca->gc_move_threshold); + while (heap_pop(&ca->heap, b, bucket_cmp)) + SET_GC_MOVE(b, 1); } mutex_unlock(&c->bucket_lock); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index dec15cd2d79..c57bfa071a5 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1676,7 +1676,7 @@ err: static bool can_attach_cache(struct cache *ca, struct cache_set *c) { return ca->sb.block_size == c->sb.block_size && - ca->sb.bucket_size == c->sb.block_size && + ca->sb.bucket_size == c->sb.bucket_size && ca->sb.nr_in_set == c->sb.nr_in_set; } diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 80d4c2bee18..a1f85612f0b 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -83,7 +83,6 @@ rw_attribute(writeback_rate); rw_attribute(writeback_rate_update_seconds); rw_attribute(writeback_rate_d_term); rw_attribute(writeback_rate_p_term_inverse); -rw_attribute(writeback_rate_d_smooth); read_attribute(writeback_rate_debug); read_attribute(stripe_size); @@ -129,31 +128,41 @@ SHOW(__bch_cached_dev) var_printf(writeback_running, "%i"); var_print(writeback_delay); var_print(writeback_percent); - sysfs_print(writeback_rate, dc->writeback_rate.rate); + sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9); var_print(writeback_rate_update_seconds); var_print(writeback_rate_d_term); var_print(writeback_rate_p_term_inverse); - var_print(writeback_rate_d_smooth); if (attr == &sysfs_writeback_rate_debug) { + char rate[20]; char dirty[20]; - char derivative[20]; char target[20]; - bch_hprint(dirty, - bcache_dev_sectors_dirty(&dc->disk) << 9); - bch_hprint(derivative, dc->writeback_rate_derivative << 9); + char proportional[20]; + char derivative[20]; + char change[20]; + s64 next_io; + + bch_hprint(rate, dc->writeback_rate.rate << 9); + bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9); bch_hprint(target, dc->writeback_rate_target << 9); + bch_hprint(proportional,dc->writeback_rate_proportional << 9); + bch_hprint(derivative, dc->writeback_rate_derivative << 9); + bch_hprint(change, dc->writeback_rate_change << 9); + + next_io = div64_s64(dc->writeback_rate.next - local_clock(), + NSEC_PER_MSEC); return sprintf(buf, - "rate:\t\t%u\n" - "change:\t\t%i\n" + "rate:\t\t%s/sec\n" "dirty:\t\t%s\n" + "target:\t\t%s\n" + "proportional:\t%s\n" "derivative:\t%s\n" - "target:\t\t%s\n", - dc->writeback_rate.rate, - dc->writeback_rate_change, - dirty, derivative, target); + "change:\t\t%s/sec\n" + "next io:\t%llims\n", + rate, dirty, target, proportional, + derivative, change, next_io); } sysfs_hprint(dirty_data, @@ -189,6 +198,7 @@ STORE(__cached_dev) struct kobj_uevent_env *env; #define d_strtoul(var) sysfs_strtoul(var, dc->var) +#define d_strtoul_nonzero(var) sysfs_strtoul_clamp(var, dc->var, 1, INT_MAX) #define d_strtoi_h(var) sysfs_hatoi(var, dc->var) sysfs_strtoul(data_csum, dc->disk.data_csum); @@ -197,16 +207,15 @@ STORE(__cached_dev) d_strtoul(writeback_metadata); d_strtoul(writeback_running); d_strtoul(writeback_delay); - sysfs_strtoul_clamp(writeback_rate, - dc->writeback_rate.rate, 1, 1000000); + sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40); - d_strtoul(writeback_rate_update_seconds); + sysfs_strtoul_clamp(writeback_rate, + dc->writeback_rate.rate, 1, INT_MAX); + + d_strtoul_nonzero(writeback_rate_update_seconds); d_strtoul(writeback_rate_d_term); - d_strtoul(writeback_rate_p_term_inverse); - sysfs_strtoul_clamp(writeback_rate_p_term_inverse, - dc->writeback_rate_p_term_inverse, 1, INT_MAX); - d_strtoul(writeback_rate_d_smooth); + d_strtoul_nonzero(writeback_rate_p_term_inverse); d_strtoi_h(sequential_cutoff); d_strtoi_h(readahead); @@ -313,7 +322,6 @@ static struct attribute *bch_cached_dev_files[] = { &sysfs_writeback_rate_update_seconds, &sysfs_writeback_rate_d_term, &sysfs_writeback_rate_p_term_inverse, - &sysfs_writeback_rate_d_smooth, &sysfs_writeback_rate_debug, &sysfs_dirty_data, &sysfs_stripe_size, diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index 462214eeacb..bb37618e766 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -209,7 +209,13 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done) { uint64_t now = local_clock(); - d->next += div_u64(done, d->rate); + d->next += div_u64(done * NSEC_PER_SEC, d->rate); + + if (time_before64(now + NSEC_PER_SEC, d->next)) + d->next = now + NSEC_PER_SEC; + + if (time_after64(now - NSEC_PER_SEC * 2, d->next)) + d->next = now - NSEC_PER_SEC * 2; return time_after64(d->next, now) ? div_u64(d->next - now, NSEC_PER_SEC / HZ) diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index 362c4b3f8b4..1030c6020e9 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -110,7 +110,7 @@ do { \ _r; \ }) -#define heap_peek(h) ((h)->size ? (h)->data[0] : NULL) +#define heap_peek(h) ((h)->used ? (h)->data[0] : NULL) #define heap_full(h) ((h)->used == (h)->size) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 99053b1251b..6c44fe059c2 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -30,38 +30,40 @@ static void __update_writeback_rate(struct cached_dev *dc) /* PD controller */ - int change = 0; - int64_t error; int64_t dirty = bcache_dev_sectors_dirty(&dc->disk); int64_t derivative = dirty - dc->disk.sectors_dirty_last; + int64_t proportional = dirty - target; + int64_t change; dc->disk.sectors_dirty_last = dirty; - derivative *= dc->writeback_rate_d_term; - derivative = clamp(derivative, -dirty, dirty); + /* Scale to sectors per second */ - derivative = ewma_add(dc->disk.sectors_dirty_derivative, derivative, - dc->writeback_rate_d_smooth, 0); + proportional *= dc->writeback_rate_update_seconds; + proportional = div_s64(proportional, dc->writeback_rate_p_term_inverse); - /* Avoid divide by zero */ - if (!target) - goto out; + derivative = div_s64(derivative, dc->writeback_rate_update_seconds); - error = div64_s64((dirty + derivative - target) << 8, target); + derivative = ewma_add(dc->disk.sectors_dirty_derivative, derivative, + (dc->writeback_rate_d_term / + dc->writeback_rate_update_seconds) ?: 1, 0); + + derivative *= dc->writeback_rate_d_term; + derivative = div_s64(derivative, dc->writeback_rate_p_term_inverse); - change = div_s64((dc->writeback_rate.rate * error) >> 8, - dc->writeback_rate_p_term_inverse); + change = proportional + derivative; /* Don't increase writeback rate if the device isn't keeping up */ if (change > 0 && time_after64(local_clock(), - dc->writeback_rate.next + 10 * NSEC_PER_MSEC)) + dc->writeback_rate.next + NSEC_PER_MSEC)) change = 0; dc->writeback_rate.rate = - clamp_t(int64_t, dc->writeback_rate.rate + change, + clamp_t(int64_t, (int64_t) dc->writeback_rate.rate + change, 1, NSEC_PER_MSEC); -out: + + dc->writeback_rate_proportional = proportional; dc->writeback_rate_derivative = derivative; dc->writeback_rate_change = change; dc->writeback_rate_target = target; @@ -87,15 +89,11 @@ static void update_writeback_rate(struct work_struct *work) static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) { - uint64_t ret; - if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) || !dc->writeback_percent) return 0; - ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL); - - return min_t(uint64_t, ret, HZ); + return bch_next_delay(&dc->writeback_rate, sectors); } struct dirty_io { @@ -241,7 +239,7 @@ static void read_dirty(struct cached_dev *dc) if (KEY_START(&w->key) != dc->last_read || jiffies_to_msecs(delay) > 50) while (!kthread_should_stop() && delay) - delay = schedule_timeout_interruptible(delay); + delay = schedule_timeout_uninterruptible(delay); dc->last_read = KEY_OFFSET(&w->key); @@ -438,7 +436,7 @@ static int bch_writeback_thread(void *arg) while (delay && !kthread_should_stop() && !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) - delay = schedule_timeout_interruptible(delay); + delay = schedule_timeout_uninterruptible(delay); } } @@ -476,6 +474,8 @@ void bch_sectors_dirty_init(struct cached_dev *dc) bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0), sectors_dirty_init_fn, 0); + + dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk); } int bch_cached_dev_writeback_init(struct cached_dev *dc) @@ -490,18 +490,15 @@ int bch_cached_dev_writeback_init(struct cached_dev *dc) dc->writeback_delay = 30; dc->writeback_rate.rate = 1024; - dc->writeback_rate_update_seconds = 30; - dc->writeback_rate_d_term = 16; - dc->writeback_rate_p_term_inverse = 64; - dc->writeback_rate_d_smooth = 8; + dc->writeback_rate_update_seconds = 5; + dc->writeback_rate_d_term = 30; + dc->writeback_rate_p_term_inverse = 6000; dc->writeback_thread = kthread_create(bch_writeback_thread, dc, "bcache_writeback"); if (IS_ERR(dc->writeback_thread)) return PTR_ERR(dc->writeback_thread); - set_task_state(dc->writeback_thread, TASK_INTERRUPTIBLE); - INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate); schedule_delayed_work(&dc->writeback_rate_update, dc->writeback_rate_update_seconds * HZ); diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 81559b2deda..539e24a1c86 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2209,20 +2209,25 @@ void bond_3ad_adapter_speed_changed(struct slave *slave) port = &(SLAVE_AD_INFO(slave).port); - // if slave is null, the whole port is not initialized + /* if slave is null, the whole port is not initialized */ if (!port->slave) { pr_warning("Warning: %s: speed changed for uninitialized port on %s\n", slave->bond->dev->name, slave->dev->name); return; } + __get_state_machine_lock(port); + port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS; port->actor_oper_port_key = port->actor_admin_port_key |= (__get_link_speed(port) << 1); pr_debug("Port %d changed speed\n", port->actor_port_number); - // there is no need to reselect a new aggregator, just signal the - // state machines to reinitialize + /* there is no need to reselect a new aggregator, just signal the + * state machines to reinitialize + */ port->sm_vars |= AD_PORT_BEGIN; + + __release_state_machine_lock(port); } /** @@ -2237,20 +2242,25 @@ void bond_3ad_adapter_duplex_changed(struct slave *slave) port = &(SLAVE_AD_INFO(slave).port); - // if slave is null, the whole port is not initialized + /* if slave is null, the whole port is not initialized */ if (!port->slave) { pr_warning("%s: Warning: duplex changed for uninitialized port on %s\n", slave->bond->dev->name, slave->dev->name); return; } + __get_state_machine_lock(port); + port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS; port->actor_oper_port_key = port->actor_admin_port_key |= __get_duplex(port); pr_debug("Port %d changed duplex\n", port->actor_port_number); - // there is no need to reselect a new aggregator, just signal the - // state machines to reinitialize + /* there is no need to reselect a new aggregator, just signal the + * state machines to reinitialize + */ port->sm_vars |= AD_PORT_BEGIN; + + __release_state_machine_lock(port); } /** @@ -2266,15 +2276,21 @@ void bond_3ad_handle_link_change(struct slave *slave, char link) port = &(SLAVE_AD_INFO(slave).port); - // if slave is null, the whole port is not initialized + /* if slave is null, the whole port is not initialized */ if (!port->slave) { pr_warning("Warning: %s: link status changed for uninitialized port on %s\n", slave->bond->dev->name, slave->dev->name); return; } - // on link down we are zeroing duplex and speed since some of the adaptors(ce1000.lan) report full duplex/speed instead of N/A(duplex) / 0(speed) - // on link up we are forcing recheck on the duplex and speed since some of he adaptors(ce1000.lan) report + __get_state_machine_lock(port); + /* on link down we are zeroing duplex and speed since + * some of the adaptors(ce1000.lan) report full duplex/speed + * instead of N/A(duplex) / 0(speed). + * + * on link up we are forcing recheck on the duplex and speed since + * some of he adaptors(ce1000.lan) report. + */ if (link == BOND_LINK_UP) { port->is_enabled = true; port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS; @@ -2290,10 +2306,15 @@ void bond_3ad_handle_link_change(struct slave *slave, char link) port->actor_oper_port_key = (port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS); } - //BOND_PRINT_DBG(("Port %d changed link status to %s", port->actor_port_number, ((link == BOND_LINK_UP)?"UP":"DOWN"))); - // there is no need to reselect a new aggregator, just signal the - // state machines to reinitialize + pr_debug("Port %d changed link status to %s", + port->actor_port_number, + (link == BOND_LINK_UP) ? "UP" : "DOWN"); + /* there is no need to reselect a new aggregator, just signal the + * state machines to reinitialize + */ port->sm_vars |= AD_PORT_BEGIN; + + __release_state_machine_lock(port); } /* diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 9a1ea4a171c..011f163c2c6 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -776,7 +776,7 @@ static ssize_t bonding_store_num_peer_notif(struct device *d, int ret; ret = kstrtou8(buf, 10, &new_value); - if (!ret) { + if (ret) { pr_err("%s: invalid value %s specified.\n", bond->dev->name, buf); return ret; diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index eedf2a5fc2b..eeecc29cf5b 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -555,6 +555,8 @@ static int arc_emac_tx(struct sk_buff *skb, struct net_device *ndev) /* Make sure pointer to data buffer is set */ wmb(); + skb_tx_timestamp(skb); + *info = cpu_to_le32(FOR_EMAC | FIRST_OR_LAST_MASK | len); /* Increment index to point to the next BD */ @@ -569,8 +571,6 @@ static int arc_emac_tx(struct sk_buff *skb, struct net_device *ndev) arc_reg_set(priv, R_STATUS, TXPL_MASK); - skb_tx_timestamp(skb); - return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index a36a760ada2..29801750f23 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -145,9 +145,11 @@ static void atl1c_reset_pcie(struct atl1c_hw *hw, u32 flag) * Mask some pcie error bits */ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); - pci_read_config_dword(pdev, pos + PCI_ERR_UNCOR_SEVER, &data); - data &= ~(PCI_ERR_UNC_DLP | PCI_ERR_UNC_FCP); - pci_write_config_dword(pdev, pos + PCI_ERR_UNCOR_SEVER, data); + if (pos) { + pci_read_config_dword(pdev, pos + PCI_ERR_UNCOR_SEVER, &data); + data &= ~(PCI_ERR_UNC_DLP | PCI_ERR_UNC_FCP); + pci_write_config_dword(pdev, pos + PCI_ERR_UNCOR_SEVER, data); + } /* clear error status */ pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, PCI_EXP_DEVSTA_NFED | diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 5d41f414e11..7f968a9b52c 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1481,10 +1481,10 @@ static int bgmac_probe(struct bcma_device *core) /* Northstar has 4 GMAC cores */ for (ns_gmac = 0; ns_gmac < 4; ns_gmac++) { - /* As northstar requirement, we have to reset all GAMCs + /* As Northstar requirement, we have to reset all GMACs * before accessing one. bgmac_chip_reset() call * bcma_core_enable() for this core. Then the other - * three GAMCs didn't reset. We do it here. + * three GMACs didn't reset. We do it here. */ ns_core = bcma_find_core_unit(core->bus, BCMA_CORE_MAC_GBIT, diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index dad67905f4e..eb105abcf0e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1250,7 +1250,10 @@ struct bnx2x_slowpath { * Therefore, if they would have been defined in the same union, * data can get corrupted. */ - struct afex_vif_list_ramrod_data func_afex_rdata; + union { + struct afex_vif_list_ramrod_data viflist_data; + struct function_update_data func_update; + } func_afex_rdata; /* used by dmae command executer */ struct dmae_command dmae[MAX_DMAE_C]; @@ -2501,4 +2504,6 @@ void bnx2x_set_local_cmng(struct bnx2x *bp); #define MCPR_SCRATCH_BASE(bp) \ (CHIP_IS_E1x(bp) ? MCP_REG_MCPR_SCRATCH : MCP_A_REG_MCPR_SCRATCH) +#define E1H_MAX_MF_SB_COUNT (HC_SB_MAX_SB_E1X/(E1HVN_MAX * PORT_MAX)) + #endif /* bnx2x.h */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index 20dcc02431c..11fc7958549 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -3865,6 +3865,19 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy, bnx2x_warpcore_enable_AN_KR2(phy, params, vars); } else { + /* Enable Auto-Detect to support 1G over CL37 as well */ + bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD, + MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X1, 0x10); + + /* Force cl48 sync_status LOW to avoid getting stuck in CL73 + * parallel-detect loop when CL73 and CL37 are enabled. + */ + CL22_WR_OVER_CL45(bp, phy, MDIO_REG_BANK_AER_BLOCK, + MDIO_AER_BLOCK_AER_REG, 0); + bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD, + MDIO_WC_REG_RXB_ANA_RX_CONTROL_PCI, 0x0800); + bnx2x_set_aer_mmd(params, phy); + bnx2x_disable_kr2(params, vars, phy); } @@ -8120,17 +8133,20 @@ static int bnx2x_get_edc_mode(struct bnx2x_phy *phy, *edc_mode = EDC_MODE_ACTIVE_DAC; else check_limiting_mode = 1; - } else if (copper_module_type & - SFP_EEPROM_FC_TX_TECH_BITMASK_COPPER_PASSIVE) { + } else { + *edc_mode = EDC_MODE_PASSIVE_DAC; + /* Even in case PASSIVE_DAC indication is not set, + * treat it as a passive DAC cable, since some cables + * don't have this indication. + */ + if (copper_module_type & + SFP_EEPROM_FC_TX_TECH_BITMASK_COPPER_PASSIVE) { DP(NETIF_MSG_LINK, "Passive Copper cable detected\n"); - *edc_mode = - EDC_MODE_PASSIVE_DAC; - } else { - DP(NETIF_MSG_LINK, - "Unknown copper-cable-type 0x%x !!!\n", - copper_module_type); - return -EINVAL; + } else { + DP(NETIF_MSG_LINK, + "Unknown copper-cable-type\n"); + } } break; } @@ -10825,9 +10841,9 @@ static int bnx2x_54618se_config_init(struct bnx2x_phy *phy, (1<<11)); if (((phy->req_line_speed == SPEED_AUTO_NEG) && - (phy->speed_cap_mask & - PORT_HW_CFG_SPEED_CAPABILITY_D0_1G)) || - (phy->req_line_speed == SPEED_1000)) { + (phy->speed_cap_mask & + PORT_HW_CFG_SPEED_CAPABILITY_D0_1G)) || + (phy->req_line_speed == SPEED_1000)) { an_1000_val |= (1<<8); autoneg_val |= (1<<9 | 1<<12); if (phy->req_duplex == DUPLEX_FULL) @@ -10843,30 +10859,32 @@ static int bnx2x_54618se_config_init(struct bnx2x_phy *phy, 0x09, &an_1000_val); - /* Set 100 speed advertisement */ - if (((phy->req_line_speed == SPEED_AUTO_NEG) && - (phy->speed_cap_mask & - (PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_FULL | - PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_HALF)))) { - an_10_100_val |= (1<<7); - /* Enable autoneg and restart autoneg for legacy speeds */ - autoneg_val |= (1<<9 | 1<<12); - - if (phy->req_duplex == DUPLEX_FULL) - an_10_100_val |= (1<<8); - DP(NETIF_MSG_LINK, "Advertising 100M\n"); - } - - /* Set 10 speed advertisement */ - if (((phy->req_line_speed == SPEED_AUTO_NEG) && - (phy->speed_cap_mask & - (PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_FULL | - PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_HALF)))) { - an_10_100_val |= (1<<5); - autoneg_val |= (1<<9 | 1<<12); - if (phy->req_duplex == DUPLEX_FULL) + /* Advertise 10/100 link speed */ + if (phy->req_line_speed == SPEED_AUTO_NEG) { + if (phy->speed_cap_mask & + PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_HALF) { + an_10_100_val |= (1<<5); + autoneg_val |= (1<<9 | 1<<12); + DP(NETIF_MSG_LINK, "Advertising 10M-HD\n"); + } + if (phy->speed_cap_mask & + PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_FULL) { an_10_100_val |= (1<<6); - DP(NETIF_MSG_LINK, "Advertising 10M\n"); + autoneg_val |= (1<<9 | 1<<12); + DP(NETIF_MSG_LINK, "Advertising 10M-FD\n"); + } + if (phy->speed_cap_mask & + PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_HALF) { + an_10_100_val |= (1<<7); + autoneg_val |= (1<<9 | 1<<12); + DP(NETIF_MSG_LINK, "Advertising 100M-HD\n"); + } + if (phy->speed_cap_mask & + PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_FULL) { + an_10_100_val |= (1<<8); + autoneg_val |= (1<<9 | 1<<12); + DP(NETIF_MSG_LINK, "Advertising 100M-FD\n"); + } } /* Only 10/100 are allowed to work in FORCE mode */ @@ -13342,6 +13360,10 @@ static u8 bnx2x_analyze_link_error(struct link_params *params, DP(NETIF_MSG_LINK, "Link changed:[%x %x]->%x\n", vars->link_up, old_status, status); + /* Do not touch the link in case physical link down */ + if ((vars->phy_flags & PHY_PHYSICAL_LINK_FLAG) == 0) + return 1; + /* a. Update shmem->link_status accordingly * b. Update link_vars->link_up */ @@ -13550,7 +13572,7 @@ static void bnx2x_check_kr2_wa(struct link_params *params, */ not_kr2_device = (((base_page & 0x8000) == 0) || (((base_page & 0x8000) && - ((next_page & 0xe0) == 0x2)))); + ((next_page & 0xe0) == 0x20)))); /* In case KR2 is already disabled, check if we need to re-enable it */ if (!(vars->link_attr_sync & LINK_ATTR_SYNC_KR2_ENABLE)) { diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index d3748bf3ac7..18498fed520 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -11517,9 +11517,9 @@ static int bnx2x_get_hwinfo(struct bnx2x *bp) } } - /* adjust igu_sb_cnt to MF for E1x */ - if (CHIP_IS_E1x(bp) && IS_MF(bp)) - bp->igu_sb_cnt /= E1HVN_MAX; + /* adjust igu_sb_cnt to MF for E1H */ + if (CHIP_IS_E1H(bp) && IS_MF(bp)) + bp->igu_sb_cnt = min_t(u8, bp->igu_sb_cnt, E1H_MAX_MF_SB_COUNT); /* port info */ bnx2x_get_port_hwinfo(bp); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h index 08f8047188e..2beb5430b87 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h @@ -7180,6 +7180,7 @@ Theotherbitsarereservedandshouldbezero*/ #define MDIO_WC_REG_RX1_PCI_CTRL 0x80ca #define MDIO_WC_REG_RX2_PCI_CTRL 0x80da #define MDIO_WC_REG_RX3_PCI_CTRL 0x80ea +#define MDIO_WC_REG_RXB_ANA_RX_CONTROL_PCI 0x80fa #define MDIO_WC_REG_XGXSBLK2_UNICORE_MODE_10G 0x8104 #define MDIO_WC_REG_XGXS_STATUS3 0x8129 #define MDIO_WC_REG_PAR_DET_10G_STATUS 0x8130 diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index babf7b954ae..98cccd487fc 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -2036,6 +2036,7 @@ static int bnx2x_vlan_mac_del_all(struct bnx2x *bp, struct bnx2x_vlan_mac_ramrod_params p; struct bnx2x_exe_queue_obj *exeq = &o->exe_queue; struct bnx2x_exeq_elem *exeq_pos, *exeq_pos_n; + unsigned long flags; int read_lock; int rc = 0; @@ -2044,8 +2045,9 @@ static int bnx2x_vlan_mac_del_all(struct bnx2x *bp, spin_lock_bh(&exeq->lock); list_for_each_entry_safe(exeq_pos, exeq_pos_n, &exeq->exe_queue, link) { - if (exeq_pos->cmd_data.vlan_mac.vlan_mac_flags == - *vlan_mac_flags) { + flags = exeq_pos->cmd_data.vlan_mac.vlan_mac_flags; + if (BNX2X_VLAN_MAC_CMP_FLAGS(flags) == + BNX2X_VLAN_MAC_CMP_FLAGS(*vlan_mac_flags)) { rc = exeq->remove(bp, exeq->owner, exeq_pos); if (rc) { BNX2X_ERR("Failed to remove command\n"); @@ -2078,7 +2080,9 @@ static int bnx2x_vlan_mac_del_all(struct bnx2x *bp, return read_lock; list_for_each_entry(pos, &o->head, link) { - if (pos->vlan_mac_flags == *vlan_mac_flags) { + flags = pos->vlan_mac_flags; + if (BNX2X_VLAN_MAC_CMP_FLAGS(flags) == + BNX2X_VLAN_MAC_CMP_FLAGS(*vlan_mac_flags)) { p.user_req.vlan_mac_flags = pos->vlan_mac_flags; memcpy(&p.user_req.u, &pos->u, sizeof(pos->u)); rc = bnx2x_config_vlan_mac(bp, &p); @@ -4380,8 +4384,11 @@ int bnx2x_config_rss(struct bnx2x *bp, struct bnx2x_raw_obj *r = &o->raw; /* Do nothing if only driver cleanup was requested */ - if (test_bit(RAMROD_DRV_CLR_ONLY, &p->ramrod_flags)) + if (test_bit(RAMROD_DRV_CLR_ONLY, &p->ramrod_flags)) { + DP(BNX2X_MSG_SP, "Not configuring RSS ramrod_flags=%lx\n", + p->ramrod_flags); return 0; + } r->set_pending(r); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h index 658f4e33abf..6a53c15c85a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h @@ -266,6 +266,13 @@ enum { BNX2X_DONT_CONSUME_CAM_CREDIT, BNX2X_DONT_CONSUME_CAM_CREDIT_DEST, }; +/* When looking for matching filters, some flags are not interesting */ +#define BNX2X_VLAN_MAC_CMP_MASK (1 << BNX2X_UC_LIST_MAC | \ + 1 << BNX2X_ETH_MAC | \ + 1 << BNX2X_ISCSI_ETH_MAC | \ + 1 << BNX2X_NETQ_ETH_MAC) +#define BNX2X_VLAN_MAC_CMP_FLAGS(flags) \ + ((flags) & BNX2X_VLAN_MAC_CMP_MASK) struct bnx2x_vlan_mac_ramrod_params { /* Object to run the command from */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 6fe52d301df..31ab924600c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -1213,6 +1213,11 @@ static void bnx2x_vfop_rxmode(struct bnx2x *bp, struct bnx2x_virtf *vf) /* next state */ vfop->state = BNX2X_VFOP_RXMODE_DONE; + /* record the accept flags in vfdb so hypervisor can modify them + * if necessary + */ + bnx2x_vfq(vf, ramrod->cl_id - vf->igu_base_id, accept_flags) = + ramrod->rx_accept_flags; vfop->rc = bnx2x_config_rx_mode(bp, ramrod); bnx2x_vfop_finalize(vf, vfop->rc, VFOP_DONE); op_err: @@ -1228,39 +1233,43 @@ op_pending: return; } +static void bnx2x_vf_prep_rx_mode(struct bnx2x *bp, u8 qid, + struct bnx2x_rx_mode_ramrod_params *ramrod, + struct bnx2x_virtf *vf, + unsigned long accept_flags) +{ + struct bnx2x_vf_queue *vfq = vfq_get(vf, qid); + + memset(ramrod, 0, sizeof(*ramrod)); + ramrod->cid = vfq->cid; + ramrod->cl_id = vfq_cl_id(vf, vfq); + ramrod->rx_mode_obj = &bp->rx_mode_obj; + ramrod->func_id = FW_VF_HANDLE(vf->abs_vfid); + ramrod->rx_accept_flags = accept_flags; + ramrod->tx_accept_flags = accept_flags; + ramrod->pstate = &vf->filter_state; + ramrod->state = BNX2X_FILTER_RX_MODE_PENDING; + + set_bit(BNX2X_FILTER_RX_MODE_PENDING, &vf->filter_state); + set_bit(RAMROD_RX, &ramrod->ramrod_flags); + set_bit(RAMROD_TX, &ramrod->ramrod_flags); + + ramrod->rdata = bnx2x_vf_sp(bp, vf, rx_mode_rdata.e2); + ramrod->rdata_mapping = bnx2x_vf_sp_map(bp, vf, rx_mode_rdata.e2); +} + int bnx2x_vfop_rxmode_cmd(struct bnx2x *bp, struct bnx2x_virtf *vf, struct bnx2x_vfop_cmd *cmd, int qid, unsigned long accept_flags) { - struct bnx2x_vf_queue *vfq = vfq_get(vf, qid); struct bnx2x_vfop *vfop = bnx2x_vfop_add(bp, vf); if (vfop) { struct bnx2x_rx_mode_ramrod_params *ramrod = &vf->op_params.rx_mode; - memset(ramrod, 0, sizeof(*ramrod)); - - /* Prepare ramrod parameters */ - ramrod->cid = vfq->cid; - ramrod->cl_id = vfq_cl_id(vf, vfq); - ramrod->rx_mode_obj = &bp->rx_mode_obj; - ramrod->func_id = FW_VF_HANDLE(vf->abs_vfid); - - ramrod->rx_accept_flags = accept_flags; - ramrod->tx_accept_flags = accept_flags; - ramrod->pstate = &vf->filter_state; - ramrod->state = BNX2X_FILTER_RX_MODE_PENDING; - - set_bit(BNX2X_FILTER_RX_MODE_PENDING, &vf->filter_state); - set_bit(RAMROD_RX, &ramrod->ramrod_flags); - set_bit(RAMROD_TX, &ramrod->ramrod_flags); - - ramrod->rdata = - bnx2x_vf_sp(bp, vf, rx_mode_rdata.e2); - ramrod->rdata_mapping = - bnx2x_vf_sp_map(bp, vf, rx_mode_rdata.e2); + bnx2x_vf_prep_rx_mode(bp, qid, ramrod, vf, accept_flags); bnx2x_vfop_opset(BNX2X_VFOP_RXMODE_CONFIG, bnx2x_vfop_rxmode, cmd->done); @@ -3213,13 +3222,16 @@ int bnx2x_enable_sriov(struct bnx2x *bp) bnx2x_iov_static_resc(bp, vf); } - /* prepare msix vectors in VF configuration space */ + /* prepare msix vectors in VF configuration space - the value in the + * PCI configuration space should be the index of the last entry, + * namely one less than the actual size of the table + */ for (vf_idx = first_vf; vf_idx < first_vf + req_vfs; vf_idx++) { bnx2x_pretend_func(bp, HW_VF_HANDLE(bp, vf_idx)); REG_WR(bp, PCICFG_OFFSET + GRC_CONFIG_REG_VF_MSIX_CONTROL, - num_vf_queues); + num_vf_queues - 1); DP(BNX2X_MSG_IOV, "set msix vec num in VF %d cfg space to %d\n", - vf_idx, num_vf_queues); + vf_idx, num_vf_queues - 1); } bnx2x_pretend_func(bp, BP_ABS_FUNC(bp)); @@ -3447,10 +3459,18 @@ out: int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos) { + struct bnx2x_queue_state_params q_params = {NULL}; + struct bnx2x_vlan_mac_ramrod_params ramrod_param; + struct bnx2x_queue_update_params *update_params; + struct pf_vf_bulletin_content *bulletin = NULL; + struct bnx2x_rx_mode_ramrod_params rx_ramrod; struct bnx2x *bp = netdev_priv(dev); - int rc, q_logical_state; + struct bnx2x_vlan_mac_obj *vlan_obj; + unsigned long vlan_mac_flags = 0; + unsigned long ramrod_flags = 0; struct bnx2x_virtf *vf = NULL; - struct pf_vf_bulletin_content *bulletin = NULL; + unsigned long accept_flags; + int rc; /* sanity and init */ rc = bnx2x_vf_ndo_prep(bp, vfidx, &vf, &bulletin); @@ -3468,104 +3488,118 @@ int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos) /* update PF's copy of the VF's bulletin. No point in posting the vlan * to the VF since it doesn't have anything to do with it. But it useful * to store it here in case the VF is not up yet and we can only - * configure the vlan later when it does. + * configure the vlan later when it does. Treat vlan id 0 as remove the + * Host tag. */ - bulletin->valid_bitmap |= 1 << VLAN_VALID; + if (vlan > 0) + bulletin->valid_bitmap |= 1 << VLAN_VALID; + else + bulletin->valid_bitmap &= ~(1 << VLAN_VALID); bulletin->vlan = vlan; /* is vf initialized and queue set up? */ - q_logical_state = - bnx2x_get_q_logical_state(bp, &bnx2x_leading_vfq(vf, sp_obj)); - if (vf->state == VF_ENABLED && - q_logical_state == BNX2X_Q_LOGICAL_STATE_ACTIVE) { - /* configure the vlan in device on this vf's queue */ - unsigned long ramrod_flags = 0; - unsigned long vlan_mac_flags = 0; - struct bnx2x_vlan_mac_obj *vlan_obj = - &bnx2x_leading_vfq(vf, vlan_obj); - struct bnx2x_vlan_mac_ramrod_params ramrod_param; - struct bnx2x_queue_state_params q_params = {NULL}; - struct bnx2x_queue_update_params *update_params; + if (vf->state != VF_ENABLED || + bnx2x_get_q_logical_state(bp, &bnx2x_leading_vfq(vf, sp_obj)) != + BNX2X_Q_LOGICAL_STATE_ACTIVE) + return rc; - rc = validate_vlan_mac(bp, &bnx2x_leading_vfq(vf, mac_obj)); - if (rc) - return rc; - memset(&ramrod_param, 0, sizeof(ramrod_param)); + /* configure the vlan in device on this vf's queue */ + vlan_obj = &bnx2x_leading_vfq(vf, vlan_obj); + rc = validate_vlan_mac(bp, &bnx2x_leading_vfq(vf, mac_obj)); + if (rc) + return rc; - /* must lock vfpf channel to protect against vf flows */ - bnx2x_lock_vf_pf_channel(bp, vf, CHANNEL_TLV_PF_SET_VLAN); + /* must lock vfpf channel to protect against vf flows */ + bnx2x_lock_vf_pf_channel(bp, vf, CHANNEL_TLV_PF_SET_VLAN); - /* remove existing vlans */ - __set_bit(RAMROD_COMP_WAIT, &ramrod_flags); - rc = vlan_obj->delete_all(bp, vlan_obj, &vlan_mac_flags, - &ramrod_flags); - if (rc) { - BNX2X_ERR("failed to delete vlans\n"); - rc = -EINVAL; - goto out; - } + /* remove existing vlans */ + __set_bit(RAMROD_COMP_WAIT, &ramrod_flags); + rc = vlan_obj->delete_all(bp, vlan_obj, &vlan_mac_flags, + &ramrod_flags); + if (rc) { + BNX2X_ERR("failed to delete vlans\n"); + rc = -EINVAL; + goto out; + } + + /* need to remove/add the VF's accept_any_vlan bit */ + accept_flags = bnx2x_leading_vfq(vf, accept_flags); + if (vlan) + clear_bit(BNX2X_ACCEPT_ANY_VLAN, &accept_flags); + else + set_bit(BNX2X_ACCEPT_ANY_VLAN, &accept_flags); + + bnx2x_vf_prep_rx_mode(bp, LEADING_IDX, &rx_ramrod, vf, + accept_flags); + bnx2x_leading_vfq(vf, accept_flags) = accept_flags; + bnx2x_config_rx_mode(bp, &rx_ramrod); + + /* configure the new vlan to device */ + memset(&ramrod_param, 0, sizeof(ramrod_param)); + __set_bit(RAMROD_COMP_WAIT, &ramrod_flags); + ramrod_param.vlan_mac_obj = vlan_obj; + ramrod_param.ramrod_flags = ramrod_flags; + set_bit(BNX2X_DONT_CONSUME_CAM_CREDIT, + &ramrod_param.user_req.vlan_mac_flags); + ramrod_param.user_req.u.vlan.vlan = vlan; + ramrod_param.user_req.cmd = BNX2X_VLAN_MAC_ADD; + rc = bnx2x_config_vlan_mac(bp, &ramrod_param); + if (rc) { + BNX2X_ERR("failed to configure vlan\n"); + rc = -EINVAL; + goto out; + } - /* send queue update ramrod to configure default vlan and silent - * vlan removal + /* send queue update ramrod to configure default vlan and silent + * vlan removal + */ + __set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags); + q_params.cmd = BNX2X_Q_CMD_UPDATE; + q_params.q_obj = &bnx2x_leading_vfq(vf, sp_obj); + update_params = &q_params.params.update; + __set_bit(BNX2X_Q_UPDATE_DEF_VLAN_EN_CHNG, + &update_params->update_flags); + __set_bit(BNX2X_Q_UPDATE_SILENT_VLAN_REM_CHNG, + &update_params->update_flags); + if (vlan == 0) { + /* if vlan is 0 then we want to leave the VF traffic + * untagged, and leave the incoming traffic untouched + * (i.e. do not remove any vlan tags). */ - __set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags); - q_params.cmd = BNX2X_Q_CMD_UPDATE; - q_params.q_obj = &bnx2x_leading_vfq(vf, sp_obj); - update_params = &q_params.params.update; - __set_bit(BNX2X_Q_UPDATE_DEF_VLAN_EN_CHNG, + __clear_bit(BNX2X_Q_UPDATE_DEF_VLAN_EN, + &update_params->update_flags); + __clear_bit(BNX2X_Q_UPDATE_SILENT_VLAN_REM, + &update_params->update_flags); + } else { + /* configure default vlan to vf queue and set silent + * vlan removal (the vf remains unaware of this vlan). + */ + __set_bit(BNX2X_Q_UPDATE_DEF_VLAN_EN, &update_params->update_flags); - __set_bit(BNX2X_Q_UPDATE_SILENT_VLAN_REM_CHNG, + __set_bit(BNX2X_Q_UPDATE_SILENT_VLAN_REM, &update_params->update_flags); + update_params->def_vlan = vlan; + update_params->silent_removal_value = + vlan & VLAN_VID_MASK; + update_params->silent_removal_mask = VLAN_VID_MASK; + } - if (vlan == 0) { - /* if vlan is 0 then we want to leave the VF traffic - * untagged, and leave the incoming traffic untouched - * (i.e. do not remove any vlan tags). - */ - __clear_bit(BNX2X_Q_UPDATE_DEF_VLAN_EN, - &update_params->update_flags); - __clear_bit(BNX2X_Q_UPDATE_SILENT_VLAN_REM, - &update_params->update_flags); - } else { - /* configure the new vlan to device */ - __set_bit(RAMROD_COMP_WAIT, &ramrod_flags); - ramrod_param.vlan_mac_obj = vlan_obj; - ramrod_param.ramrod_flags = ramrod_flags; - ramrod_param.user_req.u.vlan.vlan = vlan; - ramrod_param.user_req.cmd = BNX2X_VLAN_MAC_ADD; - rc = bnx2x_config_vlan_mac(bp, &ramrod_param); - if (rc) { - BNX2X_ERR("failed to configure vlan\n"); - rc = -EINVAL; - goto out; - } - - /* configure default vlan to vf queue and set silent - * vlan removal (the vf remains unaware of this vlan). - */ - update_params = &q_params.params.update; - __set_bit(BNX2X_Q_UPDATE_DEF_VLAN_EN, - &update_params->update_flags); - __set_bit(BNX2X_Q_UPDATE_SILENT_VLAN_REM, - &update_params->update_flags); - update_params->def_vlan = vlan; - } + /* Update the Queue state */ + rc = bnx2x_queue_state_change(bp, &q_params); + if (rc) { + BNX2X_ERR("Failed to configure default VLAN\n"); + goto out; + } - /* Update the Queue state */ - rc = bnx2x_queue_state_change(bp, &q_params); - if (rc) { - BNX2X_ERR("Failed to configure default VLAN\n"); - goto out; - } - /* clear the flag indicating that this VF needs its vlan - * (will only be set if the HV configured the Vlan before vf was - * up and we were called because the VF came up later - */ + /* clear the flag indicating that this VF needs its vlan + * (will only be set if the HV configured the Vlan before vf was + * up and we were called because the VF came up later + */ out: - vf->cfg_flags &= ~VF_CFG_VLAN; - bnx2x_unlock_vf_pf_channel(bp, vf, CHANNEL_TLV_PF_SET_VLAN); - } + vf->cfg_flags &= ~VF_CFG_VLAN; + bnx2x_unlock_vf_pf_channel(bp, vf, CHANNEL_TLV_PF_SET_VLAN); + return rc; } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h index a5c84a7d454..d72ab7e24de 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h @@ -74,6 +74,7 @@ struct bnx2x_vf_queue { /* VLANs object */ struct bnx2x_vlan_mac_obj vlan_obj; atomic_t vlan_count; /* 0 means vlan-0 is set ~ untagged */ + unsigned long accept_flags; /* last accept flags configured */ /* Queue Slow-path State object */ struct bnx2x_queue_sp_obj sp_obj; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index e5f7985a372..1b1ad31b455 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -208,7 +208,7 @@ static int bnx2x_get_vf_id(struct bnx2x *bp, u32 *vf_id) return -EINVAL; } - BNX2X_ERR("valid ME register value: 0x%08x\n", me_reg); + DP(BNX2X_MSG_IOV, "valid ME register value: 0x%08x\n", me_reg); *vf_id = (me_reg & ME_REG_VF_NUM_MASK) >> ME_REG_VF_NUM_SHIFT; @@ -1610,6 +1610,8 @@ static void bnx2x_vfop_mbx_qfilters(struct bnx2x *bp, struct bnx2x_virtf *vf) if (msg->flags & VFPF_SET_Q_FILTERS_RX_MASK_CHANGED) { unsigned long accept = 0; + struct pf_vf_bulletin_content *bulletin = + BP_VF_BULLETIN(bp, vf->index); /* covert VF-PF if mask to bnx2x accept flags */ if (msg->rx_mask & VFPF_RX_MASK_ACCEPT_MATCHED_UNICAST) @@ -1629,9 +1631,11 @@ static void bnx2x_vfop_mbx_qfilters(struct bnx2x *bp, struct bnx2x_virtf *vf) __set_bit(BNX2X_ACCEPT_BROADCAST, &accept); /* A packet arriving the vf's mac should be accepted - * with any vlan + * with any vlan, unless a vlan has already been + * configured. */ - __set_bit(BNX2X_ACCEPT_ANY_VLAN, &accept); + if (!(bulletin->valid_bitmap & (1 << VLAN_VALID))) + __set_bit(BNX2X_ACCEPT_ANY_VLAN, &accept); /* set rx-mode */ rc = bnx2x_vfop_rxmode_cmd(bp, vf, &cmd, @@ -1722,6 +1726,21 @@ static void bnx2x_vf_mbx_set_q_filters(struct bnx2x *bp, goto response; } } + /* if vlan was set by hypervisor we don't allow guest to config vlan */ + if (bulletin->valid_bitmap & 1 << VLAN_VALID) { + int i; + + /* search for vlan filters */ + for (i = 0; i < filters->n_mac_vlan_filters; i++) { + if (filters->filters[i].flags & + VFPF_Q_FILTER_VLAN_TAG_VALID) { + BNX2X_ERR("VF[%d] attempted to configure vlan but one was already set by Hypervisor. Aborting request\n", + vf->abs_vfid); + vf->op_rc = -EPERM; + goto response; + } + } + } /* verify vf_qid */ if (filters->vf_qid > vf_rxq_count(vf)) @@ -1817,6 +1836,9 @@ static void bnx2x_vf_mbx_update_rss(struct bnx2x *bp, struct bnx2x_virtf *vf, vf_op_params->rss_result_mask = rss_tlv->rss_result_mask; /* flags handled individually for backward/forward compatability */ + vf_op_params->rss_flags = 0; + vf_op_params->ramrod_flags = 0; + if (rss_tlv->rss_flags & VFPF_RSS_MODE_DISABLED) __set_bit(BNX2X_RSS_MODE_DISABLED, &vf_op_params->rss_flags); if (rss_tlv->rss_flags & VFPF_RSS_MODE_REGULAR) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index d88ef551dfc..c37e9f27ff6 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -7638,7 +7638,7 @@ static inline int tg3_4g_overflow_test(dma_addr_t mapping, int len) { u32 base = (u32) mapping & 0xffffffff; - return (base > 0xffffdcc0) && (base + len + 8 < base); + return base + len + 8 < base; } /* Test for TSO DMA buffers that cross into regions which are within MSS bytes diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index d31524f9d06..5f24a9ffcfa 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -3071,11 +3071,11 @@ bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev) dma_addr = skb_frag_dma_map(&bnad->pcidev->dev, frag, 0, size, DMA_TO_DEVICE); - unmap->vectors[vect_id].dma_len = size; + dma_unmap_len_set(&unmap->vectors[vect_id], dma_len, size); BNA_SET_DMA_ADDR(dma_addr, &txqent->vector[vect_id].host_addr); txqent->vector[vect_id].length = htons(size); dma_unmap_addr_set(&unmap->vectors[vect_id], dma_addr, - dma_addr); + dma_addr); head_unmap->nvecs++; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 17fe50b9152..b97e35c33d1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -228,6 +228,25 @@ struct tp_params { uint32_t dack_re; /* DACK timer resolution */ unsigned short tx_modq[NCHAN]; /* channel to modulation queue map */ + + u32 vlan_pri_map; /* cached TP_VLAN_PRI_MAP */ + u32 ingress_config; /* cached TP_INGRESS_CONFIG */ + + /* TP_VLAN_PRI_MAP Compressed Filter Tuple field offsets. This is a + * subset of the set of fields which may be present in the Compressed + * Filter Tuple portion of filters and TCP TCB connections. The + * fields which are present are controlled by the TP_VLAN_PRI_MAP. + * Since a variable number of fields may or may not be present, their + * shifted field positions within the Compressed Filter Tuple may + * vary, or not even be present if the field isn't selected in + * TP_VLAN_PRI_MAP. Since some of these fields are needed in various + * places we store their offsets here, or a -1 if the field isn't + * present. + */ + int vlan_shift; + int vnic_shift; + int port_shift; + int protocol_shift; }; struct vpd_params { @@ -925,6 +944,8 @@ int t4_prep_fw(struct adapter *adap, struct fw_info *fw_info, const u8 *fw_data, unsigned int fw_size, struct fw_hdr *card_fw, enum dev_state state, int *reset); int t4_prep_adapter(struct adapter *adapter); +int t4_init_tp_params(struct adapter *adap); +int t4_filter_field_shift(const struct adapter *adap, int filter_sel); int t4_port_init(struct adapter *adap, int mbox, int pf, int vf); void t4_fatal_err(struct adapter *adapter); int t4_config_rss_range(struct adapter *adapter, int mbox, unsigned int viid, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index d6b12e035a7..fff02ed1295 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2986,7 +2986,14 @@ int cxgb4_alloc_stid(struct tid_info *t, int family, void *data) if (stid >= 0) { t->stid_tab[stid].data = data; stid += t->stid_base; - t->stids_in_use++; + /* IPv6 requires max of 520 bits or 16 cells in TCAM + * This is equivalent to 4 TIDs. With CLIP enabled it + * needs 2 TIDs. + */ + if (family == PF_INET) + t->stids_in_use++; + else + t->stids_in_use += 4; } spin_unlock_bh(&t->stid_lock); return stid; @@ -3012,7 +3019,8 @@ int cxgb4_alloc_sftid(struct tid_info *t, int family, void *data) } if (stid >= 0) { t->stid_tab[stid].data = data; - stid += t->stid_base; + stid -= t->nstids; + stid += t->sftid_base; t->stids_in_use++; } spin_unlock_bh(&t->stid_lock); @@ -3024,14 +3032,24 @@ EXPORT_SYMBOL(cxgb4_alloc_sftid); */ void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family) { - stid -= t->stid_base; + /* Is it a server filter TID? */ + if (t->nsftids && (stid >= t->sftid_base)) { + stid -= t->sftid_base; + stid += t->nstids; + } else { + stid -= t->stid_base; + } + spin_lock_bh(&t->stid_lock); if (family == PF_INET) __clear_bit(stid, t->stid_bmap); else bitmap_release_region(t->stid_bmap, stid, 2); t->stid_tab[stid].data = NULL; - t->stids_in_use--; + if (family == PF_INET) + t->stids_in_use--; + else + t->stids_in_use -= 4; spin_unlock_bh(&t->stid_lock); } EXPORT_SYMBOL(cxgb4_free_stid); @@ -3134,6 +3152,7 @@ static int tid_init(struct tid_info *t) size_t size; unsigned int stid_bmap_size; unsigned int natids = t->natids; + struct adapter *adap = container_of(t, struct adapter, tids); stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids); size = t->ntids * sizeof(*t->tid_tab) + @@ -3167,6 +3186,11 @@ static int tid_init(struct tid_info *t) t->afree = t->atid_tab; } bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); + /* Reserve stid 0 for T4/T5 adapters */ + if (!t->stid_base && + (is_t4(adap->params.chip) || is_t5(adap->params.chip))) + __set_bit(0, t->stid_bmap); + return 0; } @@ -3731,7 +3755,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld) lli.ucq_density = 1 << QUEUESPERPAGEPF0_GET( t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF) >> (adap->fn * 4)); - lli.filt_mode = adap->filter_mode; + lli.filt_mode = adap->params.tp.vlan_pri_map; /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */ for (i = 0; i < NCHAN; i++) lli.tx_modq[i] = i; @@ -4179,7 +4203,7 @@ int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, adap = netdev2adap(dev); /* Adjust stid to correct filter index */ - stid -= adap->tids.nstids; + stid -= adap->tids.sftid_base; stid += adap->tids.nftids; /* Check to make sure the filter requested is writable ... @@ -4205,12 +4229,17 @@ int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, f->fs.val.lip[i] = val[i]; f->fs.mask.lip[i] = ~0; } - if (adap->filter_mode & F_PORT) { + if (adap->params.tp.vlan_pri_map & F_PORT) { f->fs.val.iport = port; f->fs.mask.iport = mask; } } + if (adap->params.tp.vlan_pri_map & F_PROTOCOL) { + f->fs.val.proto = IPPROTO_TCP; + f->fs.mask.proto = ~0; + } + f->fs.dirsteer = 1; f->fs.iq = queue; /* Mark filter as locked */ @@ -4237,7 +4266,7 @@ int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, adap = netdev2adap(dev); /* Adjust stid to correct filter index */ - stid -= adap->tids.nstids; + stid -= adap->tids.sftid_base; stid += adap->tids.nftids; f = &adap->tids.ftid_tab[stid]; @@ -5092,7 +5121,7 @@ static int adap_init0(struct adapter *adap) enum dev_state state; u32 params[7], val[7]; struct fw_caps_config_cmd caps_cmd; - int reset = 1, j; + int reset = 1; /* * Contact FW, advertising Master capability (and potentially forcing @@ -5434,21 +5463,11 @@ static int adap_init0(struct adapter *adap) /* * These are finalized by FW initialization, load their values now. */ - v = t4_read_reg(adap, TP_TIMER_RESOLUTION); - adap->params.tp.tre = TIMERRESOLUTION_GET(v); - adap->params.tp.dack_re = DELAYEDACKRESOLUTION_GET(v); t4_read_mtu_tbl(adap, adap->params.mtus, NULL); t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd, adap->params.b_wnd); - /* MODQ_REQ_MAP defaults to setting queues 0-3 to chan 0-3 */ - for (j = 0; j < NCHAN; j++) - adap->params.tp.tx_modq[j] = j; - - t4_read_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, - &adap->filter_mode, 1, - TP_VLAN_PRI_MAP); - + t4_init_tp_params(adap); adap->flags |= FW_OK; return 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 6f21f2451c3..4dd0a82533e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -131,7 +131,14 @@ static inline void *lookup_atid(const struct tid_info *t, unsigned int atid) static inline void *lookup_stid(const struct tid_info *t, unsigned int stid) { - stid -= t->stid_base; + /* Is it a server filter TID? */ + if (t->nsftids && (stid >= t->sftid_base)) { + stid -= t->sftid_base; + stid += t->nstids; + } else { + stid -= t->stid_base; + } + return stid < (t->nstids + t->nsftids) ? t->stid_tab[stid].data : NULL; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index 29878098101..cb05be905de 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -45,6 +45,7 @@ #include "l2t.h" #include "t4_msg.h" #include "t4fw_api.h" +#include "t4_regs.h" #define VLAN_NONE 0xfff @@ -411,6 +412,40 @@ done: } EXPORT_SYMBOL(cxgb4_l2t_get); +u64 cxgb4_select_ntuple(struct net_device *dev, + const struct l2t_entry *l2t) +{ + struct adapter *adap = netdev2adap(dev); + struct tp_params *tp = &adap->params.tp; + u64 ntuple = 0; + + /* Initialize each of the fields which we care about which are present + * in the Compressed Filter Tuple. + */ + if (tp->vlan_shift >= 0 && l2t->vlan != VLAN_NONE) + ntuple |= (F_FT_VLAN_VLD | l2t->vlan) << tp->vlan_shift; + + if (tp->port_shift >= 0) + ntuple |= (u64)l2t->lport << tp->port_shift; + + if (tp->protocol_shift >= 0) + ntuple |= (u64)IPPROTO_TCP << tp->protocol_shift; + + if (tp->vnic_shift >= 0) { + u32 viid = cxgb4_port_viid(dev); + u32 vf = FW_VIID_VIN_GET(viid); + u32 pf = FW_VIID_PFN_GET(viid); + u32 vld = FW_VIID_VIVLD_GET(viid); + + ntuple |= (u64)(V_FT_VNID_ID_VF(vf) | + V_FT_VNID_ID_PF(pf) | + V_FT_VNID_ID_VLD(vld)) << tp->vnic_shift; + } + + return ntuple; +} +EXPORT_SYMBOL(cxgb4_select_ntuple); + /* * Called when address resolution fails for an L2T entry to handle packets * on the arpq head. If a packet specifies a failure handler it is invoked, diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.h b/drivers/net/ethernet/chelsio/cxgb4/l2t.h index 108c0f1fce1..85eb5c71358 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.h +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.h @@ -98,7 +98,8 @@ int cxgb4_l2t_send(struct net_device *dev, struct sk_buff *skb, struct l2t_entry *cxgb4_l2t_get(struct l2t_data *d, struct neighbour *neigh, const struct net_device *physdev, unsigned int priority); - +u64 cxgb4_select_ntuple(struct net_device *dev, + const struct l2t_entry *l2t); void t4_l2t_update(struct adapter *adap, struct neighbour *neigh); struct l2t_entry *t4_l2t_alloc_switching(struct l2t_data *d); int t4_l2t_set_switching(struct adapter *adap, struct l2t_entry *e, u16 vlan, diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 42745438c1e..47ffa64fcf1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2583,7 +2583,7 @@ static int t4_sge_init_soft(struct adapter *adap) #undef READ_FL_BUF if (fl_small_pg != PAGE_SIZE || - (fl_large_pg != 0 && (fl_large_pg <= fl_small_pg || + (fl_large_pg != 0 && (fl_large_pg < fl_small_pg || (fl_large_pg & (fl_large_pg-1)) != 0))) { dev_err(adap->pdev_dev, "bad SGE FL page buffer sizes [%d, %d]\n", fl_small_pg, fl_large_pg); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 9903a66b7ba..a3964753935 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3682,6 +3682,109 @@ int t4_prep_adapter(struct adapter *adapter) return 0; } +/** + * t4_init_tp_params - initialize adap->params.tp + * @adap: the adapter + * + * Initialize various fields of the adapter's TP Parameters structure. + */ +int t4_init_tp_params(struct adapter *adap) +{ + int chan; + u32 v; + + v = t4_read_reg(adap, TP_TIMER_RESOLUTION); + adap->params.tp.tre = TIMERRESOLUTION_GET(v); + adap->params.tp.dack_re = DELAYEDACKRESOLUTION_GET(v); + + /* MODQ_REQ_MAP defaults to setting queues 0-3 to chan 0-3 */ + for (chan = 0; chan < NCHAN; chan++) + adap->params.tp.tx_modq[chan] = chan; + + /* Cache the adapter's Compressed Filter Mode and global Incress + * Configuration. + */ + t4_read_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &adap->params.tp.vlan_pri_map, 1, + TP_VLAN_PRI_MAP); + t4_read_indirect(adap, TP_PIO_ADDR, TP_PIO_DATA, + &adap->params.tp.ingress_config, 1, + TP_INGRESS_CONFIG); + + /* Now that we have TP_VLAN_PRI_MAP cached, we can calculate the field + * shift positions of several elements of the Compressed Filter Tuple + * for this adapter which we need frequently ... + */ + adap->params.tp.vlan_shift = t4_filter_field_shift(adap, F_VLAN); + adap->params.tp.vnic_shift = t4_filter_field_shift(adap, F_VNIC_ID); + adap->params.tp.port_shift = t4_filter_field_shift(adap, F_PORT); + adap->params.tp.protocol_shift = t4_filter_field_shift(adap, + F_PROTOCOL); + + /* If TP_INGRESS_CONFIG.VNID == 0, then TP_VLAN_PRI_MAP.VNIC_ID + * represents the presense of an Outer VLAN instead of a VNIC ID. + */ + if ((adap->params.tp.ingress_config & F_VNIC) == 0) + adap->params.tp.vnic_shift = -1; + + return 0; +} + +/** + * t4_filter_field_shift - calculate filter field shift + * @adap: the adapter + * @filter_sel: the desired field (from TP_VLAN_PRI_MAP bits) + * + * Return the shift position of a filter field within the Compressed + * Filter Tuple. The filter field is specified via its selection bit + * within TP_VLAN_PRI_MAL (filter mode). E.g. F_VLAN. + */ +int t4_filter_field_shift(const struct adapter *adap, int filter_sel) +{ + unsigned int filter_mode = adap->params.tp.vlan_pri_map; + unsigned int sel; + int field_shift; + + if ((filter_mode & filter_sel) == 0) + return -1; + + for (sel = 1, field_shift = 0; sel < filter_sel; sel <<= 1) { + switch (filter_mode & sel) { + case F_FCOE: + field_shift += W_FT_FCOE; + break; + case F_PORT: + field_shift += W_FT_PORT; + break; + case F_VNIC_ID: + field_shift += W_FT_VNIC_ID; + break; + case F_VLAN: + field_shift += W_FT_VLAN; + break; + case F_TOS: + field_shift += W_FT_TOS; + break; + case F_PROTOCOL: + field_shift += W_FT_PROTOCOL; + break; + case F_ETHERTYPE: + field_shift += W_FT_ETHERTYPE; + break; + case F_MACMATCH: + field_shift += W_FT_MACMATCH; + break; + case F_MPSHITTYPE: + field_shift += W_FT_MPSHITTYPE; + break; + case F_FRAGMENTATION: + field_shift += W_FT_FRAGMENTATION; + break; + } + } + return field_shift; +} + int t4_port_init(struct adapter *adap, int mbox, int pf, int vf) { u8 addr[6]; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 0a8205d69d2..4082522d814 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -1171,10 +1171,50 @@ #define A_TP_TX_SCHED_PCMD 0x25 +#define S_VNIC 11 +#define V_VNIC(x) ((x) << S_VNIC) +#define F_VNIC V_VNIC(1U) + +#define S_FRAGMENTATION 9 +#define V_FRAGMENTATION(x) ((x) << S_FRAGMENTATION) +#define F_FRAGMENTATION V_FRAGMENTATION(1U) + +#define S_MPSHITTYPE 8 +#define V_MPSHITTYPE(x) ((x) << S_MPSHITTYPE) +#define F_MPSHITTYPE V_MPSHITTYPE(1U) + +#define S_MACMATCH 7 +#define V_MACMATCH(x) ((x) << S_MACMATCH) +#define F_MACMATCH V_MACMATCH(1U) + +#define S_ETHERTYPE 6 +#define V_ETHERTYPE(x) ((x) << S_ETHERTYPE) +#define F_ETHERTYPE V_ETHERTYPE(1U) + +#define S_PROTOCOL 5 +#define V_PROTOCOL(x) ((x) << S_PROTOCOL) +#define F_PROTOCOL V_PROTOCOL(1U) + +#define S_TOS 4 +#define V_TOS(x) ((x) << S_TOS) +#define F_TOS V_TOS(1U) + +#define S_VLAN 3 +#define V_VLAN(x) ((x) << S_VLAN) +#define F_VLAN V_VLAN(1U) + +#define S_VNIC_ID 2 +#define V_VNIC_ID(x) ((x) << S_VNIC_ID) +#define F_VNIC_ID V_VNIC_ID(1U) + #define S_PORT 1 #define V_PORT(x) ((x) << S_PORT) #define F_PORT V_PORT(1U) +#define S_FCOE 0 +#define V_FCOE(x) ((x) << S_FCOE) +#define F_FCOE V_FCOE(1U) + #define NUM_MPS_CLS_SRAM_L_INSTANCES 336 #define NUM_MPS_T5_CLS_SRAM_L_INSTANCES 512 @@ -1213,4 +1253,37 @@ #define V_CHIPID(x) ((x) << S_CHIPID) #define G_CHIPID(x) (((x) >> S_CHIPID) & M_CHIPID) +/* TP_VLAN_PRI_MAP controls which subset of fields will be present in the + * Compressed Filter Tuple for LE filters. Each bit set in TP_VLAN_PRI_MAP + * selects for a particular field being present. These fields, when present + * in the Compressed Filter Tuple, have the following widths in bits. + */ +#define W_FT_FCOE 1 +#define W_FT_PORT 3 +#define W_FT_VNIC_ID 17 +#define W_FT_VLAN 17 +#define W_FT_TOS 8 +#define W_FT_PROTOCOL 8 +#define W_FT_ETHERTYPE 16 +#define W_FT_MACMATCH 9 +#define W_FT_MPSHITTYPE 3 +#define W_FT_FRAGMENTATION 1 + +/* Some of the Compressed Filter Tuple fields have internal structure. These + * bit shifts/masks describe those structures. All shifts are relative to the + * base position of the fields within the Compressed Filter Tuple + */ +#define S_FT_VLAN_VLD 16 +#define V_FT_VLAN_VLD(x) ((x) << S_FT_VLAN_VLD) +#define F_FT_VLAN_VLD V_FT_VLAN_VLD(1U) + +#define S_FT_VNID_ID_VF 0 +#define V_FT_VNID_ID_VF(x) ((x) << S_FT_VNID_ID_VF) + +#define S_FT_VNID_ID_PF 7 +#define V_FT_VNID_ID_PF(x) ((x) << S_FT_VNID_ID_PF) + +#define S_FT_VNID_ID_VLD 16 +#define V_FT_VNID_ID_VLD(x) ((x) << S_FT_VNID_ID_VLD) + #endif /* __T4_REGS_H */ diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 5878df619b5..4ccaf9af6fc 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -104,6 +104,7 @@ static inline char *nic_name(struct pci_dev *pdev) #define BE3_MAX_RSS_QS 16 #define BE3_MAX_TX_QS 16 #define BE3_MAX_EVT_QS 16 +#define BE3_SRIOV_MAX_EVT_QS 8 #define MAX_RX_QS 32 #define MAX_EVT_QS 32 @@ -480,7 +481,7 @@ struct be_adapter { struct list_head entry; u32 flash_status; - struct completion flash_compl; + struct completion et_cmd_compl; struct be_resources res; /* resources available for the func */ u16 num_vfs; /* Number of VFs provisioned by PF */ diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index e0e8bc1ef14..94c35c8d799 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -141,11 +141,17 @@ static int be_mcc_compl_process(struct be_adapter *adapter, subsystem = resp_hdr->subsystem; } + if (opcode == OPCODE_LOWLEVEL_LOOPBACK_TEST && + subsystem == CMD_SUBSYSTEM_LOWLEVEL) { + complete(&adapter->et_cmd_compl); + return 0; + } + if (((opcode == OPCODE_COMMON_WRITE_FLASHROM) || (opcode == OPCODE_COMMON_WRITE_OBJECT)) && (subsystem == CMD_SUBSYSTEM_COMMON)) { adapter->flash_status = compl_status; - complete(&adapter->flash_compl); + complete(&adapter->et_cmd_compl); } if (compl_status == MCC_STATUS_SUCCESS) { @@ -2017,6 +2023,9 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, 0x3ea83c02, 0x4a110304}; int status; + if (!(be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS)) + return 0; + if (mutex_lock_interruptible(&adapter->mbox_lock)) return -1; @@ -2160,7 +2169,7 @@ int lancer_cmd_write_object(struct be_adapter *adapter, struct be_dma_mem *cmd, be_mcc_notify(adapter); spin_unlock_bh(&adapter->mcc_lock); - if (!wait_for_completion_timeout(&adapter->flash_compl, + if (!wait_for_completion_timeout(&adapter->et_cmd_compl, msecs_to_jiffies(60000))) status = -1; else @@ -2255,8 +2264,8 @@ int be_cmd_write_flashrom(struct be_adapter *adapter, struct be_dma_mem *cmd, be_mcc_notify(adapter); spin_unlock_bh(&adapter->mcc_lock); - if (!wait_for_completion_timeout(&adapter->flash_compl, - msecs_to_jiffies(40000))) + if (!wait_for_completion_timeout(&adapter->et_cmd_compl, + msecs_to_jiffies(40000))) status = -1; else status = adapter->flash_status; @@ -2367,6 +2376,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, { struct be_mcc_wrb *wrb; struct be_cmd_req_loopback_test *req; + struct be_cmd_resp_loopback_test *resp; int status; spin_lock_bh(&adapter->mcc_lock); @@ -2381,8 +2391,8 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_LOWLEVEL, OPCODE_LOWLEVEL_LOOPBACK_TEST, sizeof(*req), wrb, NULL); - req->hdr.timeout = cpu_to_le32(4); + req->hdr.timeout = cpu_to_le32(15); req->pattern = cpu_to_le64(pattern); req->src_port = cpu_to_le32(port_num); req->dest_port = cpu_to_le32(port_num); @@ -2390,12 +2400,15 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, req->num_pkts = cpu_to_le32(num_pkts); req->loopback_type = cpu_to_le32(loopback_type); - status = be_mcc_notify_wait(adapter); - if (!status) { - struct be_cmd_resp_loopback_test *resp = embedded_payload(wrb); - status = le32_to_cpu(resp->status); - } + be_mcc_notify(adapter); + + spin_unlock_bh(&adapter->mcc_lock); + wait_for_completion(&adapter->et_cmd_compl); + resp = embedded_payload(wrb); + status = le32_to_cpu(resp->status); + + return status; err: spin_unlock_bh(&adapter->mcc_lock); return status; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index b5c238aa686..3acf137b578 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2744,13 +2744,16 @@ static int be_rx_qs_create(struct be_adapter *adapter) if (!BEx_chip(adapter)) adapter->rss_flags |= RSS_ENABLE_UDP_IPV4 | RSS_ENABLE_UDP_IPV6; + } else { + /* Disable RSS, if only default RX Q is created */ + adapter->rss_flags = RSS_ENABLE_NONE; + } - rc = be_cmd_rss_config(adapter, rsstable, adapter->rss_flags, - 128); - if (rc) { - adapter->rss_flags = 0; - return rc; - } + rc = be_cmd_rss_config(adapter, rsstable, adapter->rss_flags, + 128); + if (rc) { + adapter->rss_flags = RSS_ENABLE_NONE; + return rc; } /* First time posting */ @@ -3124,11 +3127,11 @@ static void BEx_get_resources(struct be_adapter *adapter, { struct pci_dev *pdev = adapter->pdev; bool use_sriov = false; + int max_vfs; - if (BE3_chip(adapter) && sriov_want(adapter)) { - int max_vfs; + max_vfs = pci_sriov_get_totalvfs(pdev); - max_vfs = pci_sriov_get_totalvfs(pdev); + if (BE3_chip(adapter) && sriov_want(adapter)) { res->max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0; use_sriov = res->max_vfs; } @@ -3159,7 +3162,11 @@ static void BEx_get_resources(struct be_adapter *adapter, BE3_MAX_RSS_QS : BE2_MAX_RSS_QS; res->max_rx_qs = res->max_rss_qs + 1; - res->max_evt_qs = be_physfn(adapter) ? BE3_MAX_EVT_QS : 1; + if (be_physfn(adapter)) + res->max_evt_qs = (max_vfs > 0) ? + BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS; + else + res->max_evt_qs = 1; res->if_cap_flags = BE_IF_CAP_FLAGS_WANT; if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS)) @@ -4205,7 +4212,7 @@ static int be_ctrl_init(struct be_adapter *adapter) spin_lock_init(&adapter->mcc_lock); spin_lock_init(&adapter->mcc_cq_lock); - init_completion(&adapter->flash_compl); + init_completion(&adapter->et_cmd_compl); pci_save_state(adapter->pdev); return 0; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 05cd81aa981..6530177d53e 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -428,6 +428,8 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) /* If this was the last BD in the ring, start at the beginning again. */ bdp = fec_enet_get_nextdesc(bdp, fep); + skb_tx_timestamp(skb); + fep->cur_tx = bdp; if (fep->cur_tx == fep->dirty_tx) @@ -436,8 +438,6 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) /* Trigger transmission start */ writel(0, fep->hwp + FEC_X_DES_ACTIVE); - skb_tx_timestamp(skb); - return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c index 895450e9bb3..ff2d806eaef 100644 --- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c +++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c @@ -718,8 +718,11 @@ static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw) e1000_release_phy_80003es2lan(hw); /* Disable IBIST slave mode (far-end loopback) */ - e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, - &kum_reg_data); + ret_val = + e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, + &kum_reg_data); + if (ret_val) + return ret_val; kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE; e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, kum_reg_data); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 051d1583e21..d6570b2d5a6 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6184,7 +6184,7 @@ static int __e1000_resume(struct pci_dev *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_PM static int e1000_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -6203,7 +6203,7 @@ static int e1000_resume(struct device *dev) return __e1000_resume(pdev); } -#endif /* CONFIG_PM_SLEEP */ +#endif /* CONFIG_PM */ #ifdef CONFIG_PM_RUNTIME static int e1000_runtime_suspend(struct device *dev) diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index da2be59505c..20e71f4ca42 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -1757,19 +1757,23 @@ s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, * it across the board. */ ret_val = e1e_rphy(hw, MII_BMSR, &phy_status); - if (ret_val) + if (ret_val) { /* If the first read fails, another entity may have * ownership of the resources, wait and try again to * see if they have relinquished the resources yet. */ - udelay(usec_interval); + if (usec_interval >= 1000) + msleep(usec_interval / 1000); + else + udelay(usec_interval); + } ret_val = e1e_rphy(hw, MII_BMSR, &phy_status); if (ret_val) break; if (phy_status & BMSR_LSTATUS) break; if (usec_interval >= 1000) - mdelay(usec_interval / 1000); + msleep(usec_interval / 1000); else udelay(usec_interval); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index d04ad3b202e..2b320841a10 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -572,16 +572,18 @@ i40e_status i40e_init_adminq(struct i40e_hw *hw) if (ret_code != I40E_SUCCESS) goto init_adminq_free_arq; - if (hw->aq.api_maj_ver != I40E_FW_API_VERSION_MAJOR || - hw->aq.api_min_ver != I40E_FW_API_VERSION_MINOR) { - ret_code = I40E_ERR_FIRMWARE_API_VERSION; - goto init_adminq_free_arq; - } + /* get the NVM version info */ i40e_read_nvm_word(hw, I40E_SR_NVM_IMAGE_VERSION, &hw->nvm.version); i40e_read_nvm_word(hw, I40E_SR_NVM_EETRACK_LO, &eetrack_lo); i40e_read_nvm_word(hw, I40E_SR_NVM_EETRACK_HI, &eetrack_hi); hw->nvm.eetrack = (eetrack_hi << 16) | eetrack_lo; + if (hw->aq.api_maj_ver != I40E_FW_API_VERSION_MAJOR || + hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR) { + ret_code = I40E_ERR_FIRMWARE_API_VERSION; + goto init_adminq_free_arq; + } + ret_code = i40e_aq_set_hmc_resource_profile(hw, I40E_HMC_PROFILE_DEFAULT, 0, @@ -609,6 +611,9 @@ i40e_status i40e_shutdown_adminq(struct i40e_hw *hw) { i40e_status ret_code = 0; + if (i40e_check_asq_alive(hw)) + i40e_aq_queue_shutdown(hw, true); + i40e_shutdown_asq(hw); i40e_shutdown_arq(hw); @@ -667,7 +672,7 @@ static bool i40e_asq_done(struct i40e_hw *hw) /* AQ designers suggest use of head for better * timing reliability than DD bit */ - return (rd32(hw, hw->aq.asq.head) == hw->aq.asq.next_to_use); + return rd32(hw, hw->aq.asq.head) == hw->aq.asq.next_to_use; } @@ -677,7 +682,7 @@ static bool i40e_asq_done(struct i40e_hw *hw) * @desc: prefilled descriptor describing the command (non DMA mem) * @buff: buffer to use for indirect commands * @buff_size: size of buffer for indirect commands - * @opaque: pointer to info to be used in async cleanup + * @cmd_details: pointer to command details structure * * This is the main send command driver routine for the Admin Queue send * queue. It runs the queue, cleans the queue, etc @@ -928,6 +933,11 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw, * size */ bi = &hw->aq.arq.r.arq_bi[ntc]; + memset((void *)desc, 0, sizeof(struct i40e_aq_desc)); + + desc->flags = cpu_to_le16(I40E_AQ_FLAG_BUF); + if (hw->aq.arq_buf_size > I40E_AQ_LARGE_BUF) + desc->flags |= cpu_to_le16(I40E_AQ_FLAG_LB); desc->datalen = cpu_to_le16((u16)bi->size); desc->params.external.addr_high = cpu_to_le32(upper_32_bits(bi->pa)); desc->params.external.addr_low = cpu_to_le32(lower_32_bits(bi->pa)); @@ -952,20 +962,14 @@ clean_arq_element_out: static void i40e_resume_aq(struct i40e_hw *hw) { - u32 reg = 0; - /* Registers are reset after PF reset */ hw->aq.asq.next_to_use = 0; hw->aq.asq.next_to_clean = 0; i40e_config_asq_regs(hw); - reg = hw->aq.num_asq_entries | I40E_PF_ATQLEN_ATQENABLE_MASK; - wr32(hw, hw->aq.asq.len, reg); hw->aq.arq.next_to_use = 0; hw->aq.arq.next_to_clean = 0; i40e_config_arq_regs(hw); - reg = hw->aq.num_arq_entries | I40E_PF_ATQLEN_ATQENABLE_MASK; - wr32(hw, hw->aq.arq.len, reg); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 2859377abca..007508772cf 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -35,7 +35,7 @@ */ #define I40E_FW_API_VERSION_MAJOR 0x0001 -#define I40E_FW_API_VERSION_MINOR 0x0000 +#define I40E_FW_API_VERSION_MINOR 0x0001 struct i40e_aq_desc { __le16 flags; @@ -137,10 +137,13 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_set_ns_proxy_entry = 0x0105, /* LAA */ - i40e_aqc_opc_mng_laa = 0x0106, + i40e_aqc_opc_mng_laa = 0x0106, /* AQ obsolete */ i40e_aqc_opc_mac_address_read = 0x0107, i40e_aqc_opc_mac_address_write = 0x0108, + /* PXE */ + i40e_aqc_opc_clear_pxe_mode = 0x0110, + /* internal switch commands */ i40e_aqc_opc_get_switch_config = 0x0200, i40e_aqc_opc_add_statistics = 0x0201, @@ -317,13 +320,15 @@ struct i40e_aqc_get_version { I40E_CHECK_CMD_LENGTH(i40e_aqc_get_version); -/* Send driver version (direct 0x0002) */ +/* Send driver version (indirect 0x0002) */ struct i40e_aqc_driver_version { u8 driver_major_ver; u8 driver_minor_ver; u8 driver_build_ver; u8 driver_subbuild_ver; - u8 reserved[12]; + u8 reserved[4]; + __le32 address_high; + __le32 address_low; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_driver_version); @@ -479,7 +484,7 @@ struct i40e_aqc_mng_laa { u8 reserved2[6]; }; -/* Manage MAC Address Read Command (0x0107) */ +/* Manage MAC Address Read Command (indirect 0x0107) */ struct i40e_aqc_mac_address_read { __le16 command_flags; #define I40E_AQC_LAN_ADDR_VALID 0x10 @@ -517,6 +522,16 @@ struct i40e_aqc_mac_address_write { I40E_CHECK_CMD_LENGTH(i40e_aqc_mac_address_write); +/* PXE commands (0x011x) */ + +/* Clear PXE Command and response (direct 0x0110) */ +struct i40e_aqc_clear_pxe { + u8 rx_cnt; + u8 reserved[15]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_clear_pxe); + /* Switch configuration commands (0x02xx) */ /* Used by many indirect commands that only pass an seid and a buffer in the @@ -639,13 +654,15 @@ struct i40e_aqc_switch_resource_alloc_element_resp { u8 reserved2[6]; }; -/* Add VSI (indirect 0x210) +/* Add VSI (indirect 0x0210) * this indirect command uses struct i40e_aqc_vsi_properties_data * as the indirect buffer (128 bytes) * - * Update VSI (indirect 0x211) Get VSI (indirect 0x0212) - * use the generic i40e_aqc_switch_seid descriptor format - * use the same completion and data structure as Add VSI + * Update VSI (indirect 0x211) + * uses the same data structure as Add VSI + * + * Get VSI (indirect 0x0212) + * uses the same completion and data structure as Add VSI */ struct i40e_aqc_add_get_update_vsi { __le16 uplink_seid; @@ -1185,27 +1202,40 @@ struct i40e_aqc_add_remove_cloud_filters_element_data { #define I40E_AQC_ADD_CLOUD_FILTER_SHIFT 0 #define I40E_AQC_ADD_CLOUD_FILTER_MASK (0x3F << \ I40E_AQC_ADD_CLOUD_FILTER_SHIFT) +/* 0x0000 reserved */ #define I40E_AQC_ADD_CLOUD_FILTER_OIP 0x0001 -#define I40E_AQC_ADD_CLOUD_FILTER_OIP_GRE 0x0002 +/* 0x0002 reserved */ #define I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN 0x0003 -#define I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_GRE 0x0004 +#define I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID 0x0004 +/* 0x0005 reserved */ #define I40E_AQC_ADD_CLOUD_FILTER_IMAC_TEN_ID 0x0006 -#define I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_VNL 0x0007 +/* 0x0007 reserved */ /* 0x0008 reserved */ #define I40E_AQC_ADD_CLOUD_FILTER_OMAC 0x0009 #define I40E_AQC_ADD_CLOUD_FILTER_IMAC 0x000A +#define I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC 0x000B +#define I40E_AQC_ADD_CLOUD_FILTER_IIP 0x000C + #define I40E_AQC_ADD_CLOUD_FLAGS_TO_QUEUE 0x0080 #define I40E_AQC_ADD_CLOUD_VNK_SHIFT 6 #define I40E_AQC_ADD_CLOUD_VNK_MASK 0x00C0 #define I40E_AQC_ADD_CLOUD_FLAGS_IPV4 0 #define I40E_AQC_ADD_CLOUD_FLAGS_IPV6 0x0100 - __le32 key_low; - __le32 key_high; + +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT 9 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK 0x1E00 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_XVLAN 0 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_NVGRE_OMAC 1 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_NGE 2 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_IP 3 + + __le32 tenant_id; + u8 reserved[4]; __le16 queue_number; #define I40E_AQC_ADD_CLOUD_QUEUE_SHIFT 0 #define I40E_AQC_ADD_CLOUD_QUEUE_MASK (0x3F << \ I40E_AQC_ADD_CLOUD_QUEUE_SHIFT) - u8 reserved[14]; + u8 reserved2[14]; /* response section */ u8 allocation_result; #define I40E_AQC_ADD_CLOUD_FILTER_SUCCESS 0x0 @@ -1548,7 +1578,7 @@ struct i40e_aqc_module_desc { struct i40e_aq_get_phy_abilities_resp { __le32 phy_type; /* bitmap using the above enum for offsets */ - u8 link_speed; /* bitmap using the above enum */ + u8 link_speed; /* bitmap using the above enum bit patterns */ u8 abilities; #define I40E_AQ_PHY_FLAG_PAUSE_TX 0x01 #define I40E_AQ_PHY_FLAG_PAUSE_RX 0x02 @@ -1582,6 +1612,10 @@ struct i40e_aq_set_phy_config { /* same bits as above in all */ __le32 phy_type; u8 link_speed; u8 abilities; +/* bits 0-2 use the values from get_phy_abilities_resp */ +#define I40E_AQ_PHY_ENABLE_LINK 0x08 +#define I40E_AQ_PHY_ENABLE_AN 0x10 +#define I40E_AQ_PHY_ENABLE_ATOMIC_LINK 0x20 __le16 eee_capability; __le32 eeer; u8 low_power_ctrl; @@ -1915,22 +1949,39 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_start); struct i40e_aqc_add_udp_tunnel { __le16 udp_port; u8 header_len; /* in DWords, 1 to 15 */ - u8 protocol_index; -#define I40E_AQC_TUNNEL_TYPE_MAC 0x0 -#define I40E_AQC_TUNNEL_TYPE_UDP 0x1 -#define I40E_AQC_TUNNEL_TYPE_VXLAN 0x2 - u8 reserved[12]; + u8 protocol_type; +#define I40E_AQC_TUNNEL_TYPE_TEREDO 0x0 +#define I40E_AQC_TUNNEL_TYPE_VXLAN 0x2 +#define I40E_AQC_TUNNEL_TYPE_NGE 0x3 + u8 variable_udp_length; +#define I40E_AQC_TUNNEL_FIXED_UDP_LENGTH 0x0 +#define I40E_AQC_TUNNEL_VARIABLE_UDP_LENGTH 0x1 + u8 udp_key_index; +#define I40E_AQC_TUNNEL_KEY_INDEX_VXLAN 0x0 +#define I40E_AQC_TUNNEL_KEY_INDEX_NGE 0x1 +#define I40E_AQC_TUNNEL_KEY_INDEX_PROPRIETARY_UDP 0x2 + u8 reserved[10]; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_add_udp_tunnel); +struct i40e_aqc_add_udp_tunnel_completion { + __le16 udp_port; + u8 filter_entry_index; + u8 multiple_pfs; +#define I40E_AQC_SINGLE_PF 0x0 +#define I40E_AQC_MULTIPLE_PFS 0x1 + u8 total_filters; + u8 reserved[11]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_add_udp_tunnel_completion); + /* remove UDP Tunnel command (0x0B01) */ struct i40e_aqc_remove_udp_tunnel { u8 reserved[2]; u8 index; /* 0 to 15 */ - u8 pf_filters; - u8 total_filters; - u8 reserved2[11]; + u8 reserved2[13]; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_udp_tunnel); @@ -1938,28 +1989,32 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_udp_tunnel); struct i40e_aqc_del_udp_tunnel_completion { __le16 udp_port; u8 index; /* 0 to 15 */ - u8 multiple_entries; - u8 tunnels_used; - u8 reserved; - u8 tunnels_free; - u8 reserved1[9]; + u8 multiple_pfs; + u8 total_filters_used; + u8 reserved1[11]; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_del_udp_tunnel_completion); /* tunnel key structure 0x0B10 */ + struct i40e_aqc_tunnel_key_structure { - __le16 key1_off; - __le16 key1_len; - __le16 key2_off; - __le16 key2_len; - __le16 flags; + u8 key1_off; + u8 key2_off; + u8 key1_len; /* 0 to 15 */ + u8 key2_len; /* 0 to 15 */ + u8 flags; #define I40E_AQC_TUNNEL_KEY_STRUCT_OVERRIDE 0x01 /* response flags */ #define I40E_AQC_TUNNEL_KEY_STRUCT_SUCCESS 0x01 #define I40E_AQC_TUNNEL_KEY_STRUCT_MODIFIED 0x02 #define I40E_AQC_TUNNEL_KEY_STRUCT_OVERRIDDEN 0x03 - u8 resreved[6]; + u8 network_key_index; +#define I40E_AQC_NETWORK_KEY_INDEX_VXLAN 0x0 +#define I40E_AQC_NETWORK_KEY_INDEX_NGE 0x1 +#define I40E_AQC_NETWORK_KEY_INDEX_FLEX_MAC_IN_UDP 0x2 +#define I40E_AQC_NETWORK_KEY_INDEX_GRE 0x3 + u8 reserved[10]; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_tunnel_key_structure); @@ -2053,6 +2108,7 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_modify_reg); #define I40E_AQ_CLUSTER_ID_DCB 8 #define I40E_AQ_CLUSTER_ID_EMP_MEM 9 #define I40E_AQ_CLUSTER_ID_PKT_BUF 10 +#define I40E_AQ_CLUSTER_ID_ALTRAM 11 struct i40e_aqc_debug_dump_internals { u8 cluster_id; diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index e5694b9f4e9..d564910b4f5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -126,6 +126,44 @@ void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc, } /** + * i40e_check_asq_alive + * @hw: pointer to the hw struct + * + * Returns true if Queue is enabled else false. + **/ +bool i40e_check_asq_alive(struct i40e_hw *hw) +{ + return !!(rd32(hw, hw->aq.asq.len) & I40E_PF_ATQLEN_ATQENABLE_MASK); +} + +/** + * i40e_aq_queue_shutdown + * @hw: pointer to the hw struct + * @unloading: is the driver unloading itself + * + * Tell the Firmware that we're shutting down the AdminQ and whether + * or not the driver is unloading as well. + **/ +i40e_status i40e_aq_queue_shutdown(struct i40e_hw *hw, + bool unloading) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_queue_shutdown *cmd = + (struct i40e_aqc_queue_shutdown *)&desc.params.raw; + i40e_status status; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_queue_shutdown); + + if (unloading) + cmd->driver_unloading = cpu_to_le32(I40E_AQ_DRIVER_UNLOADING); + status = i40e_asq_send_command(hw, &desc, NULL, 0, NULL); + + return status; +} + + +/** * i40e_init_shared_code - Initialize the shared code * @hw: pointer to hardware structure * @@ -142,14 +180,6 @@ i40e_status i40e_init_shared_code(struct i40e_hw *hw) i40e_status status = 0; u32 reg; - hw->phy.get_link_info = true; - - /* Determine port number */ - reg = rd32(hw, I40E_PFGEN_PORTNUM); - reg = ((reg & I40E_PFGEN_PORTNUM_PORT_NUM_MASK) >> - I40E_PFGEN_PORTNUM_PORT_NUM_SHIFT); - hw->port = (u8)reg; - i40e_set_mac_type(hw); switch (hw->mac.type) { @@ -160,6 +190,21 @@ i40e_status i40e_init_shared_code(struct i40e_hw *hw) break; } + hw->phy.get_link_info = true; + + /* Determine port number */ + reg = rd32(hw, I40E_PFGEN_PORTNUM); + reg = ((reg & I40E_PFGEN_PORTNUM_PORT_NUM_MASK) >> + I40E_PFGEN_PORTNUM_PORT_NUM_SHIFT); + hw->port = (u8)reg; + + /* Determine the PF number based on the PCI fn */ + reg = rd32(hw, I40E_GLPCI_CAPSUP); + if (reg & I40E_GLPCI_CAPSUP_ARI_EN_MASK) + hw->pf_id = (u8)((hw->bus.device << 3) | hw->bus.func); + else + hw->pf_id = (u8)hw->bus.func; + status = i40e_init_nvm(hw); return status; } @@ -297,6 +342,7 @@ static enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw) i40e_status i40e_pf_reset(struct i40e_hw *hw) { u32 cnt = 0; + u32 cnt1 = 0; u32 reg = 0; u32 grst_del; @@ -317,12 +363,24 @@ i40e_status i40e_pf_reset(struct i40e_hw *hw) return I40E_ERR_RESET_FAILED; } - /* Determine the PF number based on the PCI fn */ - reg = rd32(hw, I40E_GLPCI_CAPSUP); - if (reg & I40E_GLPCI_CAPSUP_ARI_EN_MASK) - hw->pf_id = (u8)((hw->bus.device << 3) | hw->bus.func); - else - hw->pf_id = (u8)hw->bus.func; + /* Now Wait for the FW to be ready */ + for (cnt1 = 0; cnt1 < I40E_PF_RESET_WAIT_COUNT; cnt1++) { + reg = rd32(hw, I40E_GLNVM_ULD); + reg &= (I40E_GLNVM_ULD_CONF_CORE_DONE_MASK | + I40E_GLNVM_ULD_CONF_GLOBAL_DONE_MASK); + if (reg == (I40E_GLNVM_ULD_CONF_CORE_DONE_MASK | + I40E_GLNVM_ULD_CONF_GLOBAL_DONE_MASK)) { + hw_dbg(hw, "Core and Global modules ready %d\n", cnt1); + break; + } + usleep_range(10000, 20000); + } + if (!(reg & (I40E_GLNVM_ULD_CONF_CORE_DONE_MASK | + I40E_GLNVM_ULD_CONF_GLOBAL_DONE_MASK))) { + hw_dbg(hw, "wait for FW Reset complete timedout\n"); + hw_dbg(hw, "I40E_GLNVM_ULD = 0x%x\n", reg); + return I40E_ERR_RESET_FAILED; + } /* If there was a Global Reset in progress when we got here, * we don't need to do the PF Reset @@ -348,6 +406,7 @@ i40e_status i40e_pf_reset(struct i40e_hw *hw) } i40e_clear_pxe_mode(hw); + return 0; } @@ -478,31 +537,6 @@ void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink) } /* Admin command wrappers */ -/** - * i40e_aq_queue_shutdown - * @hw: pointer to the hw struct - * @unloading: is the driver unloading itself - * - * Tell the Firmware that we're shutting down the AdminQ and whether - * or not the driver is unloading as well. - **/ -i40e_status i40e_aq_queue_shutdown(struct i40e_hw *hw, - bool unloading) -{ - struct i40e_aq_desc desc; - struct i40e_aqc_queue_shutdown *cmd = - (struct i40e_aqc_queue_shutdown *)&desc.params.raw; - i40e_status status; - - i40e_fill_default_direct_cmd_desc(&desc, - i40e_aqc_opc_queue_shutdown); - - if (unloading) - cmd->driver_unloading = cpu_to_le32(I40E_AQ_DRIVER_UNLOADING); - status = i40e_asq_send_command(hw, &desc, NULL, 0, NULL); - - return status; -} /** * i40e_aq_set_link_restart_an @@ -748,8 +782,8 @@ i40e_status i40e_aq_get_vsi_params(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details) { struct i40e_aq_desc desc; - struct i40e_aqc_switch_seid *cmd = - (struct i40e_aqc_switch_seid *)&desc.params.raw; + struct i40e_aqc_add_get_update_vsi *cmd = + (struct i40e_aqc_add_get_update_vsi *)&desc.params.raw; struct i40e_aqc_add_get_update_vsi_completion *resp = (struct i40e_aqc_add_get_update_vsi_completion *) &desc.params.raw; @@ -758,7 +792,7 @@ i40e_status i40e_aq_get_vsi_params(struct i40e_hw *hw, i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_get_vsi_parameters); - cmd->seid = cpu_to_le16(vsi_ctx->seid); + cmd->uplink_seid = cpu_to_le16(vsi_ctx->seid); desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF); if (sizeof(vsi_ctx->info) > I40E_AQ_LARGE_BUF) @@ -792,13 +826,13 @@ i40e_status i40e_aq_update_vsi_params(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details) { struct i40e_aq_desc desc; - struct i40e_aqc_switch_seid *cmd = - (struct i40e_aqc_switch_seid *)&desc.params.raw; + struct i40e_aqc_add_get_update_vsi *cmd = + (struct i40e_aqc_add_get_update_vsi *)&desc.params.raw; i40e_status status; i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_update_vsi_parameters); - cmd->seid = cpu_to_le16(vsi_ctx->seid); + cmd->uplink_seid = cpu_to_le16(vsi_ctx->seid); desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); if (sizeof(vsi_ctx->info) > I40E_AQ_LARGE_BUF) @@ -1705,7 +1739,7 @@ i40e_status i40e_aq_add_udp_tunnel(struct i40e_hw *hw, cmd->udp_port = cpu_to_le16(udp_port); cmd->header_len = header_len; - cmd->protocol_index = protocol_index; + cmd->protocol_type = protocol_index; status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 22fcf94687b..125f758a03a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -533,6 +533,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) struct i40e_ring *tx_ring = ACCESS_ONCE(vsi->tx_rings[i]); if (!tx_ring) continue; + dev_info(&pf->pdev->dev, " tx_rings[%i]: desc = %p\n", i, tx_ring->desc); @@ -1522,6 +1523,118 @@ static ssize_t i40e_dbg_command_write(struct file *filp, } else { dev_info(&pf->pdev->dev, "clear_stats vsi [seid] or clear_stats pf\n"); } + } else if (strncmp(cmd_buf, "send aq_cmd", 11) == 0) { + struct i40e_aq_desc *desc; + i40e_status ret; + + desc = kzalloc(sizeof(struct i40e_aq_desc), GFP_KERNEL); + if (!desc) + goto command_write_done; + cnt = sscanf(&cmd_buf[11], + "%hx %hx %hx %hx %x %x %x %x %x %x", + &desc->flags, + &desc->opcode, &desc->datalen, &desc->retval, + &desc->cookie_high, &desc->cookie_low, + &desc->params.internal.param0, + &desc->params.internal.param1, + &desc->params.internal.param2, + &desc->params.internal.param3); + if (cnt != 10) { + dev_info(&pf->pdev->dev, + "send aq_cmd: bad command string, cnt=%d\n", + cnt); + kfree(desc); + desc = NULL; + goto command_write_done; + } + ret = i40e_asq_send_command(&pf->hw, desc, NULL, 0, NULL); + if (!ret) { + dev_info(&pf->pdev->dev, "AQ command sent Status : Success\n"); + } else if (ret == I40E_ERR_ADMIN_QUEUE_ERROR) { + dev_info(&pf->pdev->dev, + "AQ command send failed Opcode %x AQ Error: %d\n", + desc->opcode, pf->hw.aq.asq_last_status); + } else { + dev_info(&pf->pdev->dev, + "AQ command send failed Opcode %x Status: %d\n", + desc->opcode, ret); + } + dev_info(&pf->pdev->dev, + "AQ desc WB 0x%04x 0x%04x 0x%04x 0x%04x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", + desc->flags, desc->opcode, desc->datalen, desc->retval, + desc->cookie_high, desc->cookie_low, + desc->params.internal.param0, + desc->params.internal.param1, + desc->params.internal.param2, + desc->params.internal.param3); + kfree(desc); + desc = NULL; + } else if (strncmp(cmd_buf, "send indirect aq_cmd", 20) == 0) { + struct i40e_aq_desc *desc; + i40e_status ret; + u16 buffer_len; + u8 *buff; + + desc = kzalloc(sizeof(struct i40e_aq_desc), GFP_KERNEL); + if (!desc) + goto command_write_done; + cnt = sscanf(&cmd_buf[20], + "%hx %hx %hx %hx %x %x %x %x %x %x %hd", + &desc->flags, + &desc->opcode, &desc->datalen, &desc->retval, + &desc->cookie_high, &desc->cookie_low, + &desc->params.internal.param0, + &desc->params.internal.param1, + &desc->params.internal.param2, + &desc->params.internal.param3, + &buffer_len); + if (cnt != 11) { + dev_info(&pf->pdev->dev, + "send indirect aq_cmd: bad command string, cnt=%d\n", + cnt); + kfree(desc); + desc = NULL; + goto command_write_done; + } + /* Just stub a buffer big enough in case user messed up */ + if (buffer_len == 0) + buffer_len = 1280; + + buff = kzalloc(buffer_len, GFP_KERNEL); + if (!buff) { + kfree(desc); + desc = NULL; + goto command_write_done; + } + desc->flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF); + ret = i40e_asq_send_command(&pf->hw, desc, buff, + buffer_len, NULL); + if (!ret) { + dev_info(&pf->pdev->dev, "AQ command sent Status : Success\n"); + } else if (ret == I40E_ERR_ADMIN_QUEUE_ERROR) { + dev_info(&pf->pdev->dev, + "AQ command send failed Opcode %x AQ Error: %d\n", + desc->opcode, pf->hw.aq.asq_last_status); + } else { + dev_info(&pf->pdev->dev, + "AQ command send failed Opcode %x Status: %d\n", + desc->opcode, ret); + } + dev_info(&pf->pdev->dev, + "AQ desc WB 0x%04x 0x%04x 0x%04x 0x%04x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", + desc->flags, desc->opcode, desc->datalen, desc->retval, + desc->cookie_high, desc->cookie_low, + desc->params.internal.param0, + desc->params.internal.param1, + desc->params.internal.param2, + desc->params.internal.param3); + print_hex_dump(KERN_INFO, "AQ buffer WB: ", + DUMP_PREFIX_OFFSET, 16, 1, + buff, buffer_len, true); + kfree(buff); + buff = NULL; + kfree(desc); + desc = NULL; } else if ((strncmp(cmd_buf, "add fd_filter", 13) == 0) || (strncmp(cmd_buf, "rem fd_filter", 13) == 0)) { struct i40e_fdir_data fd_data; @@ -1773,6 +1886,8 @@ static ssize_t i40e_dbg_command_write(struct file *filp, dev_info(&pf->pdev->dev, " pfr\n"); dev_info(&pf->pdev->dev, " corer\n"); dev_info(&pf->pdev->dev, " globr\n"); + dev_info(&pf->pdev->dev, " send aq_cmd <flags> <opcode> <datalen> <retval> <cookie_h> <cookie_l> <param0> <param1> <param2> <param3>\n"); + dev_info(&pf->pdev->dev, " send indirect aq_cmd <flags> <opcode> <datalen> <retval> <cookie_h> <cookie_l> <param0> <param1> <param2> <param3> <buffer_len>\n"); dev_info(&pf->pdev->dev, " add fd_filter <dest q_index> <flex_off> <pctype> <dest_vsi> <dest_ctl> <fd_status> <cnt_index> <fd_id> <packet_len> <packet>\n"); dev_info(&pf->pdev->dev, " rem fd_filter <dest q_index> <flex_off> <pctype> <dest_vsi> <dest_ctl> <fd_status> <cnt_index> <fd_id> <packet_len> <packet>\n"); dev_info(&pf->pdev->dev, " lldp start\n"); diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.c b/drivers/net/ethernet/intel/i40e/i40e_diag.c index 98c1ef563bf..6a1657e6241 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_diag.c +++ b/drivers/net/ethernet/intel/i40e/i40e_diag.c @@ -70,9 +70,9 @@ struct i40e_diag_reg_test_info i40e_reg_list[] = { /* offset mask elements stride */ {I40E_QTX_CTL(0), 0x0000FFBF, 4, I40E_QTX_CTL(1) - I40E_QTX_CTL(0)}, {I40E_PFINT_ITR0(0), 0x00000FFF, 3, I40E_PFINT_ITR0(1) - I40E_PFINT_ITR0(0)}, - {I40E_PFINT_ITRN(0, 0), 0x00000FFF, 64, I40E_PFINT_ITRN(0, 1) - I40E_PFINT_ITRN(0, 0)}, - {I40E_PFINT_ITRN(1, 0), 0x00000FFF, 64, I40E_PFINT_ITRN(1, 1) - I40E_PFINT_ITRN(1, 0)}, - {I40E_PFINT_ITRN(2, 0), 0x00000FFF, 64, I40E_PFINT_ITRN(2, 1) - I40E_PFINT_ITRN(2, 0)}, + {I40E_PFINT_ITRN(0, 0), 0x00000FFF, 8, I40E_PFINT_ITRN(0, 1) - I40E_PFINT_ITRN(0, 0)}, + {I40E_PFINT_ITRN(1, 0), 0x00000FFF, 8, I40E_PFINT_ITRN(1, 1) - I40E_PFINT_ITRN(1, 0)}, + {I40E_PFINT_ITRN(2, 0), 0x00000FFF, 8, I40E_PFINT_ITRN(2, 1) - I40E_PFINT_ITRN(2, 0)}, {I40E_PFINT_STAT_CTL0, 0x0000000C, 1, 0}, {I40E_PFINT_LNKLST0, 0x00001FFF, 1, 0}, {I40E_PFINT_LNKLSTN(0), 0x000007FF, 64, I40E_PFINT_LNKLSTN(1) - I40E_PFINT_LNKLSTN(0)}, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 55fd23d8d5f..f736c447041 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -39,7 +39,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 0 #define DRV_VERSION_MINOR 3 -#define DRV_VERSION_BUILD 14 +#define DRV_VERSION_BUILD 25 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN @@ -1215,6 +1215,10 @@ static int i40e_set_mac(struct net_device *netdev, void *p) if (ether_addr_equal(netdev->dev_addr, addr->sa_data)) return 0; + if (test_bit(__I40E_DOWN, &vsi->back->state) || + test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state)) + return -EADDRNOTAVAIL; + if (vsi->type == I40E_VSI_MAIN) { i40e_status ret; ret = i40e_aq_mac_address_write(&vsi->back->hw, @@ -1505,11 +1509,6 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) cpu_to_le16((u16)(f->vlan == I40E_VLAN_ANY ? 0 : f->vlan)); - /* vlan0 as wild card to allow packets from all vlans */ - if (f->vlan == I40E_VLAN_ANY || - (vsi->netdev && !(vsi->netdev->features & - NETIF_F_HW_VLAN_CTAG_FILTER))) - cmd_flags |= I40E_AQC_MACVLAN_DEL_IGNORE_VLAN; cmd_flags |= I40E_AQC_MACVLAN_DEL_PERFECT_MATCH; del_list[num_del].flags = cmd_flags; num_del++; @@ -1575,12 +1574,6 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) add_list[num_add].queue_number = 0; cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH; - - /* vlan0 as wild card to allow packets from all vlans */ - if (f->vlan == I40E_VLAN_ANY || (vsi->netdev && - !(vsi->netdev->features & - NETIF_F_HW_VLAN_CTAG_FILTER))) - cmd_flags |= I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; add_list[num_add].flags = cpu_to_le16(cmd_flags); num_add++; @@ -1646,6 +1639,13 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) dev_info(&pf->pdev->dev, "set uni promisc failed, err %d, aq_err %d\n", aq_ret, pf->hw.aq.asq_last_status); + aq_ret = i40e_aq_set_vsi_broadcast(&vsi->back->hw, + vsi->seid, + cur_promisc, NULL); + if (aq_ret) + dev_info(&pf->pdev->dev, + "set brdcast promisc failed, err %d, aq_err %d\n", + aq_ret, pf->hw.aq.asq_last_status); } clear_bit(__I40E_CONFIG_BUSY, &vsi->state); @@ -1779,7 +1779,6 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid) { struct i40e_mac_filter *f, *add_f; bool is_netdev, is_vf; - int ret; is_vf = (vsi->type == I40E_VSI_SRIOV); is_netdev = !!(vsi->netdev); @@ -1805,13 +1804,6 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid) } } - ret = i40e_sync_vsi_filters(vsi); - if (ret) { - dev_info(&vsi->back->pdev->dev, - "Could not sync filters for vid %d\n", vid); - return ret; - } - /* Now if we add a vlan tag, make sure to check if it is the first * tag (i.e. a "tag" -1 does exist) and if so replace the -1 "tag" * with 0, so we now accept untagged and specified tagged traffic @@ -1848,10 +1840,13 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid) } } } - ret = i40e_sync_vsi_filters(vsi); } - return ret; + if (test_bit(__I40E_DOWN, &vsi->back->state) || + test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state)) + return 0; + + return i40e_sync_vsi_filters(vsi); } /** @@ -1867,7 +1862,6 @@ int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid) struct i40e_mac_filter *f, *add_f; bool is_vf, is_netdev; int filter_count = 0; - int ret; is_vf = (vsi->type == I40E_VSI_SRIOV); is_netdev = !!(netdev); @@ -1878,12 +1872,6 @@ int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid) list_for_each_entry(f, &vsi->mac_filter_list, list) i40e_del_filter(vsi, f->macaddr, vid, is_vf, is_netdev); - ret = i40e_sync_vsi_filters(vsi); - if (ret) { - dev_info(&vsi->back->pdev->dev, "Could not sync filters\n"); - return ret; - } - /* go through all the filters for this VSI and if there is only * vid == 0 it means there are no other filters, so vid 0 must * be replaced with -1. This signifies that we should from now @@ -1926,6 +1914,10 @@ int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid) } } + if (test_bit(__I40E_DOWN, &vsi->back->state) || + test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state)) + return 0; + return i40e_sync_vsi_filters(vsi); } @@ -2016,8 +2008,9 @@ int i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid) vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID); vsi->info.pvid = cpu_to_le16(vid); - vsi->info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_INSERT_PVID; - vsi->info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_UNTAGGED; + vsi->info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_TAGGED | + I40E_AQ_VSI_PVLAN_INSERT_PVID | + I40E_AQ_VSI_PVLAN_EMOD_STR; ctxt.seid = vsi->seid; memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info)); @@ -2040,8 +2033,9 @@ int i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid) **/ void i40e_vsi_remove_pvid(struct i40e_vsi *vsi) { + i40e_vlan_stripping_disable(vsi); + vsi->info.pvid = 0; - i40e_vlan_rx_register(vsi->netdev, vsi->netdev->features); } /** @@ -3989,13 +3983,6 @@ static int i40e_open(struct net_device *netdev) if (err) goto err_up_complete; - if ((vsi->type == I40E_VSI_MAIN) || (vsi->type == I40E_VSI_VMDQ2)) { - err = i40e_aq_set_vsi_broadcast(&pf->hw, vsi->seid, true, NULL); - if (err) - netdev_info(netdev, - "couldn't set broadcast err %d aq_err %d\n", - err, pf->hw.aq.asq_last_status); - } #ifdef CONFIG_I40E_VXLAN vxlan_get_rx_port(netdev); #endif @@ -4490,6 +4477,7 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) if (!test_bit(__I40E_ADMINQ_EVENT_PENDING, &pf->state)) return; + event.msg_size = I40E_MAX_AQ_BUF_SIZE; event.msg_buf = kzalloc(event.msg_size, GFP_KERNEL); if (!event.msg_buf) return; @@ -4773,7 +4761,8 @@ static int i40e_prep_for_reset(struct i40e_pf *pf) dev_info(&pf->pdev->dev, "Tearing down internal switch for reset\n"); - i40e_vc_notify_reset(pf); + if (i40e_check_asq_alive(hw)) + i40e_vc_notify_reset(pf); /* quiesce the VSIs and their queues that are not already DOWN */ i40e_pf_quiesce_all_vsi(pf); @@ -6067,6 +6056,7 @@ static const struct net_device_ops i40e_netdev_ops = { **/ static int i40e_config_netdev(struct i40e_vsi *vsi) { + u8 brdcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_netdev_priv *np; @@ -6116,6 +6106,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) random_ether_addr(mac_addr); i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY, false, false); } + i40e_add_filter(vsi, brdcast, I40E_VLAN_ANY, false, false); memcpy(netdev->dev_addr, mac_addr, ETH_ALEN); memcpy(netdev->perm_addr, mac_addr, ETH_ALEN); @@ -6901,7 +6892,7 @@ void i40e_veb_release(struct i40e_veb *veb) **/ static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi) { - bool is_default = (vsi->idx == vsi->back->lan_vsi); + bool is_default = false; bool is_cloud = false; int ret; @@ -7851,7 +7842,6 @@ static void i40e_remove(struct pci_dev *pdev) "Failed to destroy the HMC resources: %d\n", ret_code); /* shutdown the adminq */ - i40e_aq_queue_shutdown(&pf->hw, true); ret_code = i40e_shutdown_adminq(&pf->hw); if (ret_code) dev_warn(&pdev->dev, diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 97e1bb30ef8..e2da0a2784d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -166,15 +166,15 @@ static i40e_status i40e_poll_sr_srctl_done_bit(struct i40e_hw *hw) } /** - * i40e_read_nvm_srctl - Reads Shadow RAM. + * i40e_read_nvm_word - Reads Shadow RAM * @hw: pointer to the HW structure. * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF). * @data: word read from the Shadow RAM. * * Reads 16 bit word from the Shadow RAM using the GLNVM_SRCTL register. **/ -static i40e_status i40e_read_nvm_srctl(struct i40e_hw *hw, u16 offset, - u16 *data) +i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset, + u16 *data) { i40e_status ret_code = I40E_ERR_TIMEOUT; u32 sr_reg; @@ -211,29 +211,6 @@ read_nvm_exit: } /** - * i40e_read_nvm_word - Reads Shadow RAM word. - * @hw: pointer to the HW structure. - * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF). - * @data: word read from the Shadow RAM. - * - * Reads 16 bit word from the Shadow RAM. Each read is preceded - * with the NVM ownership taking and followed by the release. - **/ -i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset, - u16 *data) -{ - i40e_status ret_code = 0; - - ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); - if (!ret_code) { - ret_code = i40e_read_nvm_srctl(hw, offset, data); - i40e_release_nvm(hw); - } - - return ret_code; -} - -/** * i40e_read_nvm_buffer - Reads Shadow RAM buffer. * @hw: pointer to the HW structure. * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF). @@ -250,30 +227,18 @@ i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset, { i40e_status ret_code = 0; u16 index, word; - u32 time; - ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); - if (!ret_code) { - /* Loop thru the selected region. */ - for (word = 0; word < *words; word++) { - index = offset + word; - ret_code = i40e_read_nvm_srctl(hw, index, &data[word]); - if (ret_code) - break; - /* Check if we didn't exceeded the semaphore timeout. */ - time = rd32(hw, I40E_GLVFGEN_TIMER); - if (time >= hw->nvm.hw_semaphore_timeout) { - ret_code = I40E_ERR_TIMEOUT; - hw_dbg(hw, "NVM read error: timeout.\n"); - break; - } - } - /* Update the number of words read from the Shadow RAM. */ - *words = word; - /* Release the NVM ownership. */ - i40e_release_nvm(hw); + /* Loop thru the selected region. */ + for (word = 0; word < *words; word++) { + index = offset + word; + ret_code = i40e_read_nvm_word(hw, index, &data[word]); + if (ret_code) + break; } + /* Update the number of words read from the Shadow RAM. */ + *words = word; + return ret_code; } @@ -297,14 +262,14 @@ static i40e_status i40e_calc_nvm_checksum(struct i40e_hw *hw, u32 i = 0; /* read pointer to VPD area */ - ret_code = i40e_read_nvm_srctl(hw, I40E_SR_VPD_PTR, &vpd_module); + ret_code = i40e_read_nvm_word(hw, I40E_SR_VPD_PTR, &vpd_module); if (ret_code) { ret_code = I40E_ERR_NVM_CHECKSUM; goto i40e_calc_nvm_checksum_exit; } /* read pointer to PCIe Alt Auto-load module */ - ret_code = i40e_read_nvm_srctl(hw, I40E_SR_PCIE_ALT_AUTO_LOAD_PTR, + ret_code = i40e_read_nvm_word(hw, I40E_SR_PCIE_ALT_AUTO_LOAD_PTR, &pcie_alt_module); if (ret_code) { ret_code = I40E_ERR_NVM_CHECKSUM; @@ -331,7 +296,7 @@ static i40e_status i40e_calc_nvm_checksum(struct i40e_hw *hw, break; } - ret_code = i40e_read_nvm_srctl(hw, (u16)i, &word); + ret_code = i40e_read_nvm_word(hw, (u16)i, &word); if (ret_code) { ret_code = I40E_ERR_NVM_CHECKSUM; goto i40e_calc_nvm_checksum_exit; @@ -371,7 +336,7 @@ i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw, /* Do not use i40e_read_nvm_word() because we do not want to take * the synchronization semaphores twice here. */ - i40e_read_nvm_srctl(hw, I40E_SR_SW_CHECKSUM_WORD, &checksum_sr); + i40e_read_nvm_word(hw, I40E_SR_SW_CHECKSUM_WORD, &checksum_sr); /* Verify read checksum from EEPROM is the same as * calculated checksum diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index 800f3c81628..e05d303105a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -59,6 +59,9 @@ void i40e_debug_aq(struct i40e_hw *hw, void *buffer); void i40e_idle_aq(struct i40e_hw *hw); +bool i40e_check_asq_alive(struct i40e_hw *hw); +i40e_status i40e_aq_queue_shutdown(struct i40e_hw *hw, + bool unloading); u32 i40e_led_get(struct i40e_hw *hw); void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink); @@ -69,8 +72,6 @@ i40e_status i40e_aq_get_firmware_version(struct i40e_hw *hw, u16 *fw_major_version, u16 *fw_minor_version, u16 *api_major_version, u16 *api_minor_version, struct i40e_asq_cmd_details *cmd_details); -i40e_status i40e_aq_queue_shutdown(struct i40e_hw *hw, - bool unloading); i40e_status i40e_aq_set_phy_reset(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_set_default_vsi(struct i40e_hw *hw, u16 vsi_id, diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index 2394c66870f..d188ec03aff 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -2053,6 +2053,28 @@ #define I40E_GLNVM_SRDATA_WRDATA_MASK (0xFFFF << I40E_GLNVM_SRDATA_WRDATA_SHIFT) #define I40E_GLNVM_SRDATA_RDDATA_SHIFT 16 #define I40E_GLNVM_SRDATA_RDDATA_MASK (0xFFFF << I40E_GLNVM_SRDATA_RDDATA_SHIFT) +#define I40E_GLNVM_ULD 0x000B6008 +#define I40E_GLNVM_ULD_CONF_PCIR_DONE_SHIFT 0 +#define I40E_GLNVM_ULD_CONF_PCIR_DONE_MASK (0x1 << I40E_GLNVM_ULD_CONF_PCIR_DONE_SHIFT) +#define I40E_GLNVM_ULD_CONF_PCIRTL_DONE_SHIFT 1 +#define I40E_GLNVM_ULD_CONF_PCIRTL_DONE_MASK (0x1 << I40E_GLNVM_ULD_CONF_PCIRTL_DONE_SHIFT) +#define I40E_GLNVM_ULD_CONF_LCB_DONE_SHIFT 2 +#define I40E_GLNVM_ULD_CONF_LCB_DONE_MASK (0x1 << I40E_GLNVM_ULD_CONF_LCB_DONE_SHIFT) +#define I40E_GLNVM_ULD_CONF_CORE_DONE_SHIFT 3 +#define I40E_GLNVM_ULD_CONF_CORE_DONE_MASK (0x1 << I40E_GLNVM_ULD_CONF_CORE_DONE_SHIFT) +#define I40E_GLNVM_ULD_CONF_GLOBAL_DONE_SHIFT 4 +#define I40E_GLNVM_ULD_CONF_GLOBAL_DONE_MASK (0x1 << I40E_GLNVM_ULD_CONF_GLOBAL_DONE_SHIFT) +#define I40E_GLNVM_ULD_CONF_POR_DONE_SHIFT 5 +#define I40E_GLNVM_ULD_CONF_POR_DONE_MASK (0x1 << I40E_GLNVM_ULD_CONF_POR_DONE_SHIFT) +#define I40E_GLNVM_ULD_CONF_PCIE_ANA_DONE_SHIFT 6 +#define I40E_GLNVM_ULD_CONF_PCIE_ANA_DONE_MASK (0x1 << I40E_GLNVM_ULD_CONF_PCIE_ANA_DONE_SHIFT) +#define I40E_GLNVM_ULD_CONF_PHY_ANA_DONE_SHIFT 7 +#define I40E_GLNVM_ULD_CONF_PHY_ANA_DONE_MASK (0x1 << I40E_GLNVM_ULD_CONF_PHY_ANA_DONE_SHIFT) +#define I40E_GLNVM_ULD_CONF_EMP_DONE_SHIFT 8 +#define I40E_GLNVM_ULD_CONF_EMP_DONE_MASK (0x1 << I40E_GLNVM_ULD_CONF_EMP_DONE_SHIFT) +#define I40E_GLNVM_ULD_CONF_PCIALT_DONE_SHIFT 9 +#define I40E_GLNVM_ULD_CONF_PCIALT_DONE_MASK (0x1 << I40E_GLNVM_ULD_CONF_PCIALT_DONE_SHIFT) + #define I40E_GLPCI_BYTCTH 0x0009C484 #define I40E_GLPCI_BYTCTH_PCI_COUNT_BW_BCT_SHIFT 0 #define I40E_GLPCI_BYTCTH_PCI_COUNT_BW_BCT_MASK (0xFFFFFFFF << I40E_GLPCI_BYTCTH_PCI_COUNT_BW_BCT_SHIFT) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 2584d6b610e..fac4fb37f87 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -77,7 +77,6 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data, /* grab the next descriptor */ i = tx_ring->next_to_use; fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); - tx_buf = &tx_ring->tx_bi[i]; tx_ring->next_to_use = (i + 1 < tx_ring->count) ? i + 1 : 0; @@ -129,15 +128,23 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data, /* Now program a dummy descriptor */ i = tx_ring->next_to_use; tx_desc = I40E_TX_DESC(tx_ring, i); + tx_buf = &tx_ring->tx_bi[i]; tx_ring->next_to_use = (i + 1 < tx_ring->count) ? i + 1 : 0; + /* record length, and DMA address */ + dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_LOOKUP); + dma_unmap_addr_set(tx_buf, dma, dma); + tx_desc->buffer_addr = cpu_to_le64(dma); td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY; tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_LOOKUP, 0); + /* set the timestamp */ + tx_buf->time_stamp = jiffies; + /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index 7bbcc7165d2..bcf03177356 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -53,9 +53,6 @@ (d) == I40E_QSFP_B_DEVICE_ID || \ (d) == I40E_QSFP_C_DEVICE_ID) -#define I40E_FW_API_VERSION_MAJOR 0x0001 -#define I40E_FW_API_VERSION_MINOR 0x0000 - #define I40E_MAX_VSI_QP 16 #define I40E_MAX_VF_VSI 3 #define I40E_MAX_CHAINED_RX_BUFFERS 5 @@ -92,7 +89,7 @@ typedef void (*I40E_ADMINQ_CALLBACK)(struct i40e_hw *, struct i40e_aq_desc *); #define I40E_QTX_CTL_VF_QUEUE 0x0 #define I40E_QTX_CTL_PF_QUEUE 0x2 -/* debug masks */ +/* debug masks - set these bits in hw->debug_mask to control output */ enum i40e_debug_mask { I40E_DEBUG_INIT = 0x00000001, I40E_DEBUG_RELEASE = 0x00000002, @@ -106,10 +103,10 @@ enum i40e_debug_mask { I40E_DEBUG_DCB = 0x00000400, I40E_DEBUG_DIAG = 0x00000800, - I40E_DEBUG_AQ_MESSAGE = 0x01000000, /* for i40e_debug() */ + I40E_DEBUG_AQ_MESSAGE = 0x01000000, I40E_DEBUG_AQ_DESCRIPTOR = 0x02000000, I40E_DEBUG_AQ_DESC_BUFFER = 0x04000000, - I40E_DEBUG_AQ_COMMAND = 0x06000000, /* for i40e_debug_aq() */ + I40E_DEBUG_AQ_COMMAND = 0x06000000, I40E_DEBUG_AQ = 0x0F000000, I40E_DEBUG_USER = 0xF0000000, @@ -523,7 +520,7 @@ enum i40e_rx_desc_status_bits { #define I40E_RXD_QW1_STATUS_TSYNVALID_SHIFT I40E_RX_DESC_STATUS_TSYNVALID_SHIFT #define I40E_RXD_QW1_STATUS_TSYNVALID_MASK (0x1UL << \ - I40E_RXD_QW1_STATUS_TSYNVALID_SHIFT) + I40E_RXD_QW1_STATUS_TSYNVALID_SHIFT) enum i40e_rx_desc_fltstat_values { I40E_RX_DESC_FLTSTAT_NO_DATA = 0, diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 685b2851550..efb9a242d27 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -369,7 +369,6 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) { struct i40e_mac_filter *f = NULL; struct i40e_pf *pf = vf->pf; - struct i40e_hw *hw = &pf->hw; struct i40e_vsi *vsi; int ret = 0; @@ -383,13 +382,30 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) goto error_alloc_vsi_res; } if (type == I40E_VSI_SRIOV) { + u8 brdcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; vf->lan_vsi_index = vsi->idx; vf->lan_vsi_id = vsi->id; dev_info(&pf->pdev->dev, "LAN VSI index %d, VSI id %d\n", vsi->idx, vsi->id); + /* If the port VLAN has been configured and then the + * VF driver was removed then the VSI port VLAN + * configuration was destroyed. Check if there is + * a port VLAN and restore the VSI configuration if + * needed. + */ + if (vf->port_vlan_id) + i40e_vsi_add_pvid(vsi, vf->port_vlan_id); f = i40e_add_filter(vsi, vf->default_lan_addr.addr, - 0, true, false); + vf->port_vlan_id, true, false); + if (!f) + dev_info(&pf->pdev->dev, + "Could not allocate VF MAC addr\n"); + f = i40e_add_filter(vsi, brdcast, vf->port_vlan_id, + true, false); + if (!f) + dev_info(&pf->pdev->dev, + "Could not allocate VF broadcast filter\n"); } if (!f) { @@ -405,15 +421,6 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) goto error_alloc_vsi_res; } - /* accept bcast pkts. by default */ - ret = i40e_aq_set_vsi_broadcast(hw, vsi->seid, true, NULL); - if (ret) { - dev_err(&pf->pdev->dev, - "set vsi bcast failed for vf %d, vsi %d, aq_err %d\n", - vf->vf_id, vsi->idx, pf->hw.aq.asq_last_status); - ret = -EINVAL; - } - error_alloc_vsi_res: return ret; } @@ -592,8 +599,7 @@ static int i40e_quiesce_vf_pci(struct i40e_vf *vf) int vf_abs_id, i; u32 reg; - reg = rd32(hw, I40E_PF_VT_PFALLOC); - vf_abs_id = vf->vf_id + (reg & I40E_PF_VT_PFALLOC_FIRSTVF_MASK); + vf_abs_id = vf->vf_id + hw->func_caps.vf_base_id; wr32(hw, I40E_PF_PCI_CIAA, VF_DEVICE_STATUS | (vf_abs_id << I40E_PF_PCI_CIAA_VF_NUM_SHIFT)); @@ -710,6 +716,76 @@ static bool i40e_vfs_are_assigned(struct i40e_pf *pf) return false; } +#ifdef CONFIG_PCI_IOV + +/** + * i40e_enable_pf_switch_lb + * @pf: pointer to the pf structure + * + * enable switch loop back or die - no point in a return value + **/ +static void i40e_enable_pf_switch_lb(struct i40e_pf *pf) +{ + struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi_context ctxt; + int aq_ret; + + ctxt.seid = pf->main_vsi_seid; + ctxt.pf_num = pf->hw.pf_id; + ctxt.vf_num = 0; + aq_ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL); + if (aq_ret) { + dev_info(&pf->pdev->dev, + "%s couldn't get pf vsi config, err %d, aq_err %d\n", + __func__, aq_ret, pf->hw.aq.asq_last_status); + return; + } + ctxt.flags = I40E_AQ_VSI_TYPE_PF; + ctxt.info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); + ctxt.info.switch_id |= cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB); + + aq_ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); + if (aq_ret) { + dev_info(&pf->pdev->dev, + "%s: update vsi switch failed, aq_err=%d\n", + __func__, vsi->back->hw.aq.asq_last_status); + } +} +#endif + +/** + * i40e_disable_pf_switch_lb + * @pf: pointer to the pf structure + * + * disable switch loop back or die - no point in a return value + **/ +static void i40e_disable_pf_switch_lb(struct i40e_pf *pf) +{ + struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_vsi_context ctxt; + int aq_ret; + + ctxt.seid = pf->main_vsi_seid; + ctxt.pf_num = pf->hw.pf_id; + ctxt.vf_num = 0; + aq_ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL); + if (aq_ret) { + dev_info(&pf->pdev->dev, + "%s couldn't get pf vsi config, err %d, aq_err %d\n", + __func__, aq_ret, pf->hw.aq.asq_last_status); + return; + } + ctxt.flags = I40E_AQ_VSI_TYPE_PF; + ctxt.info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); + ctxt.info.switch_id &= ~cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB); + + aq_ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); + if (aq_ret) { + dev_info(&pf->pdev->dev, + "%s: update vsi switch failed, aq_err=%d\n", + __func__, vsi->back->hw.aq.asq_last_status); + } +} /** * i40e_free_vfs @@ -719,7 +795,9 @@ static bool i40e_vfs_are_assigned(struct i40e_pf *pf) **/ void i40e_free_vfs(struct i40e_pf *pf) { - int i, tmp; + struct i40e_hw *hw = &pf->hw; + u32 reg_idx, bit_idx; + int i, tmp, vf_id; if (!pf->vf) return; @@ -741,11 +819,21 @@ void i40e_free_vfs(struct i40e_pf *pf) kfree(pf->vf); pf->vf = NULL; - if (!i40e_vfs_are_assigned(pf)) + if (!i40e_vfs_are_assigned(pf)) { pci_disable_sriov(pf->pdev); - else + /* Acknowledge VFLR for all VFS. Without this, VFs will fail to + * work correctly when SR-IOV gets re-enabled. + */ + for (vf_id = 0; vf_id < tmp; vf_id++) { + reg_idx = (hw->func_caps.vf_base_id + vf_id) / 32; + bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32; + wr32(hw, I40E_GLGEN_VFLRSTAT(reg_idx), (1 << bit_idx)); + } + i40e_disable_pf_switch_lb(pf); + } else { dev_warn(&pf->pdev->dev, "unable to disable SR-IOV because VFs are assigned.\n"); + } /* Re-enable interrupt 0. */ i40e_irq_dynamic_enable_icr0(pf); @@ -799,6 +887,7 @@ static int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs) pf->vf = vfs; pf->num_alloc_vfs = num_alloc_vfs; + i40e_enable_pf_switch_lb(pf); err_alloc: if (ret) i40e_free_vfs(pf); @@ -885,6 +974,7 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode, { struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = &pf->hw; + int true_vf_id = vf->vf_id + hw->func_caps.vf_base_id; i40e_status aq_ret; /* single place to detect unsuccessful return values */ @@ -904,8 +994,8 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode, vf->num_valid_msgs++; } - aq_ret = i40e_aq_send_msg_to_vf(hw, vf->vf_id, v_opcode, v_retval, - msg, msglen, NULL); + aq_ret = i40e_aq_send_msg_to_vf(hw, true_vf_id, v_opcode, v_retval, + msg, msglen, NULL); if (aq_ret) { dev_err(&pf->pdev->dev, "Unable to send the message to VF %d aq_err %d\n", @@ -1303,7 +1393,7 @@ static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) goto error_param; } i40e_update_eth_stats(vsi); - memcpy(&stats, &vsi->eth_stats, sizeof(struct i40e_eth_stats)); + stats = vsi->eth_stats; error_param: /* send the response back to the vf */ @@ -1312,6 +1402,40 @@ error_param: } /** + * i40e_check_vf_permission + * @vf: pointer to the vf info + * @macaddr: pointer to the MAC Address being checked + * + * Check if the VF has permission to add or delete unicast MAC address + * filters and return error code -EPERM if not. Then check if the + * address filter requested is broadcast or zero and if so return + * an invalid MAC address error code. + **/ +static inline int i40e_check_vf_permission(struct i40e_vf *vf, u8 *macaddr) +{ + struct i40e_pf *pf = vf->pf; + int ret = 0; + + if (is_broadcast_ether_addr(macaddr) || + is_zero_ether_addr(macaddr)) { + dev_err(&pf->pdev->dev, "invalid VF MAC addr %pM\n", macaddr); + ret = I40E_ERR_INVALID_MAC_ADDR; + } else if (vf->pf_set_mac && !is_multicast_ether_addr(macaddr) && + !ether_addr_equal(macaddr, vf->default_lan_addr.addr)) { + /* If the host VMM administrator has set the VF MAC address + * administratively via the ndo_set_vf_mac command then deny + * permission to the VF to add or delete unicast MAC addresses. + * The VF may request to set the MAC address filter already + * assigned to it so do not return an error in that case. + */ + dev_err(&pf->pdev->dev, + "VF attempting to override administratively set MAC address\nPlease reload the VF driver to resume normal operation\n"); + ret = -EPERM; + } + return ret; +} + +/** * i40e_vc_add_mac_addr_msg * @vf: pointer to the vf info * @msg: pointer to the msg buffer @@ -1326,24 +1450,20 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi = NULL; u16 vsi_id = al->vsi_id; - i40e_status aq_ret = 0; + i40e_status ret = 0; int i; if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) || !i40e_vc_isvalid_vsi_id(vf, vsi_id)) { - aq_ret = I40E_ERR_PARAM; + ret = I40E_ERR_PARAM; goto error_param; } for (i = 0; i < al->num_elements; i++) { - if (is_broadcast_ether_addr(al->list[i].addr) || - is_zero_ether_addr(al->list[i].addr)) { - dev_err(&pf->pdev->dev, "invalid VF MAC addr %pMAC\n", - al->list[i].addr); - aq_ret = I40E_ERR_INVALID_MAC_ADDR; + ret = i40e_check_vf_permission(vf, al->list[i].addr); + if (ret) goto error_param; - } } vsi = pf->vsi[vsi_id]; @@ -1364,7 +1484,7 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) if (!f) { dev_err(&pf->pdev->dev, "Unable to add VF MAC filter\n"); - aq_ret = I40E_ERR_PARAM; + ret = I40E_ERR_PARAM; goto error_param; } } @@ -1376,7 +1496,7 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) error_param: /* send the response to the vf */ return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, - aq_ret); + ret); } /** @@ -1394,15 +1514,21 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi = NULL; u16 vsi_id = al->vsi_id; - i40e_status aq_ret = 0; + i40e_status ret = 0; int i; if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) || !i40e_vc_isvalid_vsi_id(vf, vsi_id)) { - aq_ret = I40E_ERR_PARAM; + ret = I40E_ERR_PARAM; goto error_param; } + + for (i = 0; i < al->num_elements; i++) { + ret = i40e_check_vf_permission(vf, al->list[i].addr); + if (ret) + goto error_param; + } vsi = pf->vsi[vsi_id]; /* delete addresses from the list */ @@ -1417,7 +1543,7 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) error_param: /* send the response to the vf */ return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS, - aq_ret); + ret); } /** @@ -1646,22 +1772,23 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, u16 vf_id, u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen) { struct i40e_hw *hw = &pf->hw; + int local_vf_id = vf_id - hw->func_caps.vf_base_id; struct i40e_vf *vf; int ret; pf->vf_aq_requests++; - if (vf_id >= pf->num_alloc_vfs) + if (local_vf_id >= pf->num_alloc_vfs) return -EINVAL; - vf = &(pf->vf[vf_id]); + vf = &(pf->vf[local_vf_id]); /* perform basic checks on the msg */ ret = i40e_vc_validate_vf_msg(vf, v_opcode, v_retval, msg, msglen); if (ret) { dev_err(&pf->pdev->dev, "Invalid message from vf %d, opcode %d, len %d\n", - vf_id, v_opcode, msglen); + local_vf_id, v_opcode, msglen); return ret; } - wr32(hw, I40E_VFGEN_RSTAT1(vf_id), I40E_VFR_VFACTIVE); + wr32(hw, I40E_VFGEN_RSTAT1(local_vf_id), I40E_VFR_VFACTIVE); switch (v_opcode) { case I40E_VIRTCHNL_OP_VERSION: ret = i40e_vc_get_version_msg(vf); @@ -1705,8 +1832,8 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, u16 vf_id, u32 v_opcode, break; case I40E_VIRTCHNL_OP_UNKNOWN: default: - dev_err(&pf->pdev->dev, - "Unsupported opcode %d from vf %d\n", v_opcode, vf_id); + dev_err(&pf->pdev->dev, "Unsupported opcode %d from vf %d\n", + v_opcode, local_vf_id); ret = i40e_vc_send_resp_to_vf(vf, v_opcode, I40E_ERR_NOT_IMPLEMENTED); break; @@ -1898,6 +2025,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) goto error_param; } memcpy(vf->default_lan_addr.addr, mac, ETH_ALEN); + vf->pf_set_mac = true; dev_info(&pf->pdev->dev, "Reload the VF driver to make this change effective.\n"); ret = 0; @@ -1958,7 +2086,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, ret = i40e_vsi_add_pvid(vsi, vlan_id | (qos << I40E_VLAN_PRIORITY_SHIFT)); else - i40e_vlan_stripping_disable(vsi); + i40e_vsi_remove_pvid(vsi); if (vlan_id) { dev_info(&pf->pdev->dev, "Setting VLAN %d, QOS 0x%x on VF %d\n", @@ -1978,6 +2106,10 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, dev_err(&pf->pdev->dev, "Unable to update VF vsi context\n"); goto error_pvid; } + /* The Port VLAN needs to be saved across resets the same as the + * default LAN MAC address. + */ + vf->port_vlan_id = le16_to_cpu(vsi->info.pvid); ret = 0; error_pvid: diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index d0b712c4bef..8d0f4dd2a25 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -82,6 +82,8 @@ struct i40e_vf { struct i40e_virtchnl_ether_addr default_lan_addr; struct i40e_virtchnl_ether_addr default_fcoe_addr; + u16 port_vlan_id; + bool pf_set_mac; /* The VMM admin set the VF MAC address */ /* VSI indices - actual VSI pointers are maintained in the PF structure * When assigned, these will be non-zero, because VSI 0 is always diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 9ce07f3ef62..359f6e60320 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -291,7 +291,9 @@ static int ixgbe_pci_sriov_disable(struct pci_dev *dev) { struct ixgbe_adapter *adapter = pci_get_drvdata(dev); int err; +#ifdef CONFIG_PCI_IOV u32 current_flags = adapter->flags; +#endif err = ixgbe_disable_sriov(adapter); diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c index 7354960b583..c4eeb69a5be 100644 --- a/drivers/net/ethernet/marvell/mvmdio.c +++ b/drivers/net/ethernet/marvell/mvmdio.c @@ -92,6 +92,12 @@ static int orion_mdio_wait_ready(struct mii_bus *bus) if (time_is_before_jiffies(end)) ++timedout; } else { + /* wait_event_timeout does not guarantee a delay of at + * least one whole jiffie, so timeout must be no less + * than two. + */ + if (timeout < 2) + timeout = 2; wait_event_timeout(dev->smi_busy_wait, orion_mdio_smi_is_done(dev), timeout); diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig index eb520ab6401..563495d8975 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -6,6 +6,7 @@ config MLX4_EN tristate "Mellanox Technologies 10Gbit Ethernet support" depends on PCI select MLX4_CORE + select PTP_1588_CLOCK ---help--- This driver supports Mellanox Technologies ConnectX Ethernet devices. diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 60b927eab2b..fad45316200 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -632,8 +632,9 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv) if (err) goto steer_err; - if (mlx4_en_tunnel_steer_add(priv, priv->dev->dev_addr, *qpn, - &priv->tunnel_reg_id)) + err = mlx4_en_tunnel_steer_add(priv, priv->dev->dev_addr, *qpn, + &priv->tunnel_reg_id); + if (err) goto tunnel_err; entry = kmalloc(sizeof(*entry), GFP_KERNEL); diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c index 3010abb55fb..32058614151 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c @@ -1602,13 +1602,13 @@ netxen_process_lro(struct netxen_adapter *adapter, u32 seq_number; u8 vhdr_len = 0; - if (unlikely(ring > adapter->max_rds_rings)) + if (unlikely(ring >= adapter->max_rds_rings)) return NULL; rds_ring = &recv_ctx->rds_rings[ring]; index = netxen_get_lro_sts_refhandle(sts_data0); - if (unlikely(index > rds_ring->num_desc)) + if (unlikely(index >= rds_ring->num_desc)) return NULL; buffer = &rds_ring->rx_buf_arr[index]; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 4afdef0cc17..35d48766d84 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -493,6 +493,7 @@ struct qlcnic_hardware_context { struct qlcnic_mailbox *mailbox; u8 extend_lb_time; u8 phys_port_id[ETH_ALEN]; + u8 lb_mode; }; struct qlcnic_adapter_stats { @@ -584,6 +585,8 @@ struct qlcnic_host_tx_ring { dma_addr_t phys_addr; dma_addr_t hw_cons_phys_addr; struct netdev_queue *txq; + /* Lock to protect Tx descriptors cleanup */ + spinlock_t tx_clean_lock; } ____cacheline_internodealigned_in_smp; /* @@ -815,6 +818,7 @@ struct qlcnic_mac_vlan_list { #define QLCNIC_ILB_MODE 0x1 #define QLCNIC_ELB_MODE 0x2 +#define QLCNIC_LB_MODE_MASK 0x3 #define QLCNIC_LINKEVENT 0x1 #define QLCNIC_LB_RESPONSE 0x2 @@ -1100,7 +1104,6 @@ struct qlcnic_adapter { struct qlcnic_filter_hash rx_fhash; struct list_head vf_mc_list; - spinlock_t tx_clean_lock; spinlock_t mac_learn_lock; /* spinlock for catching rcv filters for eswitch traffic */ spinlock_t rx_mac_learn_lock; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index b3fd1605773..03eb2ad9611 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -1685,12 +1685,6 @@ int qlcnic_83xx_loopback_test(struct net_device *netdev, u8 mode) } } while ((adapter->ahw->linkup && ahw->has_link_events) != 1); - /* Make sure carrier is off and queue is stopped during loopback */ - if (netif_running(netdev)) { - netif_carrier_off(netdev); - netif_tx_stop_all_queues(netdev); - } - ret = qlcnic_do_lb_test(adapter, mode); qlcnic_83xx_clear_lb_mode(adapter, mode); @@ -2122,6 +2116,7 @@ static void qlcnic_83xx_handle_link_aen(struct qlcnic_adapter *adapter, ahw->link_autoneg = MSB(MSW(data[3])); ahw->module_type = MSB(LSW(data[3])); ahw->has_link_events = 1; + ahw->lb_mode = data[4] & QLCNIC_LB_MODE_MASK; qlcnic_advert_link_change(adapter, link_status); } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c index e9c21e5d0ca..c4262c23ed7 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c @@ -134,6 +134,8 @@ void qlcnic_release_tx_buffers(struct qlcnic_adapter *adapter, struct qlcnic_skb_frag *buffrag; int i, j; + spin_lock(&tx_ring->tx_clean_lock); + cmd_buf = tx_ring->cmd_buf_arr; for (i = 0; i < tx_ring->num_desc; i++) { buffrag = cmd_buf->frag_array; @@ -157,6 +159,8 @@ void qlcnic_release_tx_buffers(struct qlcnic_adapter *adapter, } cmd_buf++; } + + spin_unlock(&tx_ring->tx_clean_lock); } void qlcnic_free_sw_resources(struct qlcnic_adapter *adapter) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index a215e0f6933..6373f602248 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -689,6 +689,10 @@ void qlcnic_advert_link_change(struct qlcnic_adapter *adapter, int linkup) adapter->ahw->linkup = 0; netif_carrier_off(netdev); } else if (!adapter->ahw->linkup && linkup) { + /* Do not advertise Link up if the port is in loopback mode */ + if (qlcnic_83xx_check(adapter) && adapter->ahw->lb_mode) + return; + netdev_info(netdev, "NIC Link is up\n"); adapter->ahw->linkup = 1; netif_carrier_on(netdev); @@ -778,7 +782,7 @@ static int qlcnic_process_cmd_ring(struct qlcnic_adapter *adapter, struct net_device *netdev = adapter->netdev; struct qlcnic_skb_frag *frag; - if (!spin_trylock(&adapter->tx_clean_lock)) + if (!spin_trylock(&tx_ring->tx_clean_lock)) return 1; sw_consumer = tx_ring->sw_consumer; @@ -807,8 +811,9 @@ static int qlcnic_process_cmd_ring(struct qlcnic_adapter *adapter, break; } + tx_ring->sw_consumer = sw_consumer; + if (count && netif_running(netdev)) { - tx_ring->sw_consumer = sw_consumer; smp_mb(); if (netif_tx_queue_stopped(tx_ring->txq) && netif_carrier_ok(netdev)) { @@ -834,7 +839,8 @@ static int qlcnic_process_cmd_ring(struct qlcnic_adapter *adapter, */ hw_consumer = le32_to_cpu(*(tx_ring->hw_consumer)); done = (sw_consumer == hw_consumer); - spin_unlock(&adapter->tx_clean_lock); + + spin_unlock(&tx_ring->tx_clean_lock); return done; } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index d131ec1321e..eeec83a0e66 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -1757,7 +1757,6 @@ void __qlcnic_down(struct qlcnic_adapter *adapter, struct net_device *netdev) if (qlcnic_sriov_vf_check(adapter)) qlcnic_sriov_cleanup_async_list(&adapter->ahw->sriov->bc); smp_mb(); - spin_lock(&adapter->tx_clean_lock); netif_carrier_off(netdev); adapter->ahw->linkup = 0; netif_tx_disable(netdev); @@ -1778,7 +1777,6 @@ void __qlcnic_down(struct qlcnic_adapter *adapter, struct net_device *netdev) for (ring = 0; ring < adapter->drv_tx_rings; ring++) qlcnic_release_tx_buffers(adapter, &adapter->tx_ring[ring]); - spin_unlock(&adapter->tx_clean_lock); } /* Usage: During suspend and firmware recovery module */ @@ -2173,6 +2171,7 @@ int qlcnic_alloc_tx_rings(struct qlcnic_adapter *adapter, } memset(cmd_buf_arr, 0, TX_BUFF_RINGSIZE(tx_ring)); tx_ring->cmd_buf_arr = cmd_buf_arr; + spin_lock_init(&tx_ring->tx_clean_lock); } if (qlcnic_83xx_check(adapter) || @@ -2300,7 +2299,6 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) rwlock_init(&adapter->ahw->crb_lock); mutex_init(&adapter->ahw->mem_lock); - spin_lock_init(&adapter->tx_clean_lock); INIT_LIST_HEAD(&adapter->mac_list); qlcnic_register_dcb(adapter); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c index 98b621fb122..d14d9a139ee 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c @@ -81,9 +81,12 @@ static int qlcnic_sriov_pf_cal_res_limit(struct qlcnic_adapter *adapter, if (qlcnic_83xx_pf_check(adapter)) num_macs = 1; + info->max_rx_mcast_mac_filters = res->num_rx_mcast_mac_filters; + if (adapter->ahw->pci_func == func) { info->min_tx_bw = 0; info->max_tx_bw = MAX_BW; + temp = res->num_rx_ucast_mac_filters - num_macs * num_vfs; info->max_rx_ucast_mac_filters = temp; temp = res->num_tx_mac_filters - num_macs * num_vfs; @@ -92,6 +95,7 @@ static int qlcnic_sriov_pf_cal_res_limit(struct qlcnic_adapter *adapter, temp = res->num_rx_mcast_mac_filters - temp; info->max_rx_mcast_mac_filters = temp; + info->max_tx_ques = res->num_tx_queues - sriov->num_vfs; } else { id = qlcnic_sriov_func_to_index(adapter, func); if (id < 0) @@ -99,10 +103,13 @@ static int qlcnic_sriov_pf_cal_res_limit(struct qlcnic_adapter *adapter, vp = sriov->vf_info[id].vp; info->min_tx_bw = vp->min_tx_bw; info->max_tx_bw = vp->max_tx_bw; + info->max_rx_ucast_mac_filters = num_macs; info->max_tx_mac_filters = num_macs; temp = num_macs * QLCNIC_SRIOV_VF_MAX_MAC; info->max_rx_mcast_mac_filters = temp; + + info->max_tx_ques = QLCNIC_SINGLE_RING; } info->max_rx_ip_addr = res->num_destip / max; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 21614102812..b8e3a4ce24b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -622,17 +622,15 @@ static int stmmac_init_ptp(struct stmmac_priv *priv) if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) return -EOPNOTSUPP; - if (netif_msg_hw(priv)) { - if (priv->dma_cap.time_stamp) { - pr_debug("IEEE 1588-2002 Time Stamp supported\n"); - priv->adv_ts = 0; - } - if (priv->dma_cap.atime_stamp && priv->extend_desc) { - pr_debug - ("IEEE 1588-2008 Advanced Time Stamp supported\n"); - priv->adv_ts = 1; - } - } + priv->adv_ts = 0; + if (priv->dma_cap.atime_stamp && priv->extend_desc) + priv->adv_ts = 1; + + if (netif_msg_hw(priv) && priv->dma_cap.time_stamp) + pr_debug("IEEE 1588-2002 Time Stamp supported\n"); + + if (netif_msg_hw(priv) && priv->adv_ts) + pr_debug("IEEE 1588-2008 Advanced Time Stamp supported\n"); priv->hw->ptp = &stmmac_ptp; priv->hwts_tx_en = 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index b8b0eeed0f9..7680581ebe1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -56,7 +56,7 @@ static int stmmac_adjust_freq(struct ptp_clock_info *ptp, s32 ppb) priv->hw->ptp->config_addend(priv->ioaddr, addend); - spin_unlock_irqrestore(&priv->lock, flags); + spin_unlock_irqrestore(&priv->ptp_lock, flags); return 0; } @@ -91,7 +91,7 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) priv->hw->ptp->adjust_systime(priv->ioaddr, sec, nsec, neg_adj); - spin_unlock_irqrestore(&priv->lock, flags); + spin_unlock_irqrestore(&priv->ptp_lock, flags); return 0; } diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 243fffbe18e..e8bb77d25d9 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -740,6 +740,8 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave, /* set speed_in input in case RMII mode is used in 100Mbps */ if (phy->speed == 100) mac_control |= BIT(15); + else if (phy->speed == 10) + mac_control |= BIT(18); /* In Band mode */ *link = true; } else { @@ -2126,7 +2128,7 @@ static int cpsw_probe(struct platform_device *pdev) while ((res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, k))) { for (i = res->start; i <= res->end; i++) { if (devm_request_irq(&pdev->dev, i, cpsw_interrupt, 0, - dev_name(priv->dev), priv)) { + dev_name(&pdev->dev), priv)) { dev_err(priv->dev, "error attaching irq\n"); goto clean_ale_ret; } diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c index 3169252613f..5d78c1d08ab 100644 --- a/drivers/net/hamradio/hdlcdrv.c +++ b/drivers/net/hamradio/hdlcdrv.c @@ -571,6 +571,8 @@ static int hdlcdrv_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case HDLCDRVCTL_CALIBRATE: if(!capable(CAP_SYS_RAWIO)) return -EPERM; + if (bi.data.calibrate > INT_MAX / s->par.bitrate) + return -EINVAL; s->hdlctx.calibrate = bi.data.calibrate * s->par.bitrate / 16; return 0; diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index 1971411574d..61dd2447e1b 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -1057,6 +1057,7 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; case SIOCYAMGCFG: + memset(&yi, 0, sizeof(yi)); yi.cfg.mask = 0xffffffff; yi.cfg.iobase = yp->iobase; yi.cfg.irq = yp->irq; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index f80bd0c90f1..7756118c2f0 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -260,9 +260,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, struct sk_buff *skb; net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev; - if (!net) { - netdev_err(net, "got receive callback but net device" - " not initialized yet\n"); + if (!net || net->reg_state != NETREG_REGISTERED) { packet->status = NVSP_STAT_FAIL; return 0; } @@ -434,19 +432,11 @@ static int netvsc_probe(struct hv_device *dev, SET_ETHTOOL_OPS(net, ðtool_ops); SET_NETDEV_DEV(net, &dev->device); - ret = register_netdev(net); - if (ret != 0) { - pr_err("Unable to register netdev.\n"); - free_netdev(net); - goto out; - } - /* Notify the netvsc driver of the new device */ device_info.ring_size = ring_size; ret = rndis_filter_device_add(dev, &device_info); if (ret != 0) { netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); - unregister_netdev(net); free_netdev(net); hv_set_drvdata(dev, NULL); return ret; @@ -455,7 +445,13 @@ static int netvsc_probe(struct hv_device *dev, netif_carrier_on(net); -out: + ret = register_netdev(net); + if (ret != 0) { + pr_err("Unable to register netdev.\n"); + rndis_filter_device_remove(dev); + free_netdev(net); + } + return ret; } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 94198366de7..09ababe54a5 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -689,8 +689,19 @@ static netdev_features_t macvlan_fix_features(struct net_device *dev, netdev_features_t features) { struct macvlan_dev *vlan = netdev_priv(dev); + netdev_features_t mask; - return features & (vlan->set_features | ~MACVLAN_FEATURES); + features |= NETIF_F_ALL_FOR_ALL; + features &= (vlan->set_features | ~MACVLAN_FEATURES); + mask = features; + + features = netdev_increment_features(vlan->lowerdev->features, + features, + mask); + if (!vlan->fwd_priv) + features |= NETIF_F_LLTX; + + return features; } static const struct ethtool_ops macvlan_ethtool_ops = { @@ -1009,9 +1020,8 @@ static int macvlan_device_event(struct notifier_block *unused, break; case NETDEV_FEAT_CHANGE: list_for_each_entry(vlan, &port->vlans, list) { - vlan->dev->features = dev->features & MACVLAN_FEATURES; vlan->dev->gso_max_size = dev->gso_max_size; - netdev_features_change(vlan->dev); + netdev_update_features(vlan->dev); } break; case NETDEV_UNREGISTER: diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 19da5ab615b..76e8936ab9e 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -533,10 +533,8 @@ phy_err: int phy_start_interrupts(struct phy_device *phydev) { atomic_set(&phydev->irq_disable, 0); - if (request_irq(phydev->irq, phy_interrupt, - IRQF_SHARED, - "phy_interrupt", - phydev) < 0) { + if (request_irq(phydev->irq, phy_interrupt, 0, "phy_interrupt", + phydev) < 0) { pr_warn("%s: Can't get IRQ %d (PHY)\n", phydev->bus->name, phydev->irq); phydev->irq = PHY_POLL; diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 85e4a01670f..47b0f732b0b 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -276,12 +276,12 @@ config USB_NET_CDC_MBIM module will be called cdc_mbim. config USB_NET_DM9601 - tristate "Davicom DM9601 based USB 1.1 10/100 ethernet devices" + tristate "Davicom DM96xx based USB 10/100 ethernet devices" depends on USB_USBNET select CRC32 help - This option adds support for Davicom DM9601 based USB 1.1 - 10/100 Ethernet adapters. + This option adds support for Davicom DM9601/DM9620/DM9621A + based USB 10/100 Ethernet adapters. config USB_NET_SR9700 tristate "CoreChip-sz SR9700 based USB 1.1 10/100 ethernet devices" diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index c6867f926cf..14aa48fa8d7 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -1,5 +1,5 @@ /* - * Davicom DM9601 USB 1.1 10/100Mbps ethernet devices + * Davicom DM96xx USB 10/100Mbps ethernet devices * * Peter Korsgaard <jacmet@sunsite.dk> * @@ -364,7 +364,12 @@ static int dm9601_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->ethtool_ops = &dm9601_ethtool_ops; dev->net->hard_header_len += DM_TX_OVERHEAD; dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; - dev->rx_urb_size = dev->net->mtu + ETH_HLEN + DM_RX_OVERHEAD; + + /* dm9620/21a require room for 4 byte padding, even in dm9601 + * mode, so we need +1 to be able to receive full size + * ethernet frames. + */ + dev->rx_urb_size = dev->net->mtu + ETH_HLEN + DM_RX_OVERHEAD + 1; dev->mii.dev = dev->net; dev->mii.mdio_read = dm9601_mdio_read; @@ -468,7 +473,7 @@ static int dm9601_rx_fixup(struct usbnet *dev, struct sk_buff *skb) static struct sk_buff *dm9601_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { - int len; + int len, pad; /* format: b1: packet length low @@ -476,12 +481,23 @@ static struct sk_buff *dm9601_tx_fixup(struct usbnet *dev, struct sk_buff *skb, b3..n: packet data */ - len = skb->len; + len = skb->len + DM_TX_OVERHEAD; + + /* workaround for dm962x errata with tx fifo getting out of + * sync if a USB bulk transfer retry happens right after a + * packet with odd / maxpacket length by adding up to 3 bytes + * padding. + */ + while ((len & 1) || !(len % dev->maxpacket)) + len++; - if (skb_headroom(skb) < DM_TX_OVERHEAD) { + len -= DM_TX_OVERHEAD; /* hw header doesn't count as part of length */ + pad = len - skb->len; + + if (skb_headroom(skb) < DM_TX_OVERHEAD || skb_tailroom(skb) < pad) { struct sk_buff *skb2; - skb2 = skb_copy_expand(skb, DM_TX_OVERHEAD, 0, flags); + skb2 = skb_copy_expand(skb, DM_TX_OVERHEAD, pad, flags); dev_kfree_skb_any(skb); skb = skb2; if (!skb) @@ -490,10 +506,10 @@ static struct sk_buff *dm9601_tx_fixup(struct usbnet *dev, struct sk_buff *skb, __skb_push(skb, DM_TX_OVERHEAD); - /* usbnet adds padding if length is a multiple of packet size - if so, adjust length value in header */ - if ((skb->len % dev->maxpacket) == 0) - len++; + if (pad) { + memset(skb->data + skb->len, 0, pad); + __skb_put(skb, pad); + } skb->data[0] = len; skb->data[1] = len >> 8; @@ -543,7 +559,7 @@ static int dm9601_link_reset(struct usbnet *dev) } static const struct driver_info dm9601_info = { - .description = "Davicom DM9601 USB Ethernet", + .description = "Davicom DM96xx USB 10/100 Ethernet", .flags = FLAG_ETHER | FLAG_LINK_INTR, .bind = dm9601_bind, .rx_fixup = dm9601_rx_fixup, @@ -594,6 +610,10 @@ static const struct usb_device_id products[] = { USB_DEVICE(0x0a46, 0x9620), /* DM9620 USB to Fast Ethernet Adapter */ .driver_info = (unsigned long)&dm9601_info, }, + { + USB_DEVICE(0x0a46, 0x9621), /* DM9621A USB to Fast Ethernet Adapter */ + .driver_info = (unsigned long)&dm9601_info, + }, {}, // END }; @@ -612,5 +632,5 @@ static struct usb_driver dm9601_driver = { module_usb_driver(dm9601_driver); MODULE_AUTHOR("Peter Korsgaard <jacmet@sunsite.dk>"); -MODULE_DESCRIPTION("Davicom DM9601 USB 1.1 ethernet devices"); +MODULE_DESCRIPTION("Davicom DM96xx USB 10/100 ethernet devices"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 86292e6aaf4..1a482344b3f 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -185,7 +185,6 @@ enum rx_ctrl_state{ #define BM_REQUEST_TYPE (0xa1) #define B_NOTIFICATION (0x20) #define W_VALUE (0x0) -#define W_INDEX (0x2) #define W_LENGTH (0x2) #define B_OVERRUN (0x1<<6) @@ -1487,6 +1486,7 @@ static void tiocmget_intr_callback(struct urb *urb) struct uart_icount *icount; struct hso_serial_state_notification *serial_state_notification; struct usb_device *usb; + int if_num; /* Sanity checks */ if (!serial) @@ -1495,15 +1495,24 @@ static void tiocmget_intr_callback(struct urb *urb) handle_usb_error(status, __func__, serial->parent); return; } + + /* tiocmget is only supported on HSO_PORT_MODEM */ tiocmget = serial->tiocmget; if (!tiocmget) return; + BUG_ON((serial->parent->port_spec & HSO_PORT_MASK) != HSO_PORT_MODEM); + usb = serial->parent->usb; + if_num = serial->parent->interface->altsetting->desc.bInterfaceNumber; + + /* wIndex should be the USB interface number of the port to which the + * notification applies, which should always be the Modem port. + */ serial_state_notification = &tiocmget->serial_state_notification; if (serial_state_notification->bmRequestType != BM_REQUEST_TYPE || serial_state_notification->bNotification != B_NOTIFICATION || le16_to_cpu(serial_state_notification->wValue) != W_VALUE || - le16_to_cpu(serial_state_notification->wIndex) != W_INDEX || + le16_to_cpu(serial_state_notification->wIndex) != if_num || le16_to_cpu(serial_state_notification->wLength) != W_LENGTH) { dev_warn(&usb->dev, "hso received invalid serial state notification\n"); diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c index aea68bc3358..36ff0019aa3 100644 --- a/drivers/net/usb/mcs7830.c +++ b/drivers/net/usb/mcs7830.c @@ -116,7 +116,6 @@ enum { struct mcs7830_data { u8 multi_filter[8]; u8 config; - u8 link_counter; }; static const char driver_name[] = "MOSCHIP usb-ethernet driver"; @@ -560,26 +559,16 @@ static void mcs7830_status(struct usbnet *dev, struct urb *urb) { u8 *buf = urb->transfer_buffer; bool link, link_changed; - struct mcs7830_data *data = mcs7830_get_data(dev); if (urb->actual_length < 16) return; - link = !(buf[1] & 0x20); + link = !(buf[1] == 0x20); link_changed = netif_carrier_ok(dev->net) != link; if (link_changed) { - data->link_counter++; - /* - track link state 20 times to guard against erroneous - link state changes reported sometimes by the chip - */ - if (data->link_counter > 20) { - data->link_counter = 0; - usbnet_link_change(dev, link, 0); - netdev_dbg(dev->net, "Link Status is: %d\n", link); - } - } else - data->link_counter = 0; + usbnet_link_change(dev, link, 0); + netdev_dbg(dev->net, "Link Status is: %d\n", link); + } } static const struct driver_info moschip_info = { diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 13fabbbaf54..bf7d549ab51 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -584,8 +584,8 @@ int set_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data) static int generic_ocp_read(struct r8152 *tp, u16 index, u16 size, void *data, u16 type) { - u16 limit = 64; - int ret = 0; + u16 limit = 64; + int ret = 0; if (test_bit(RTL8152_UNPLUG, &tp->flags)) return -ENODEV; @@ -624,9 +624,9 @@ static int generic_ocp_read(struct r8152 *tp, u16 index, u16 size, static int generic_ocp_write(struct r8152 *tp, u16 index, u16 byteen, u16 size, void *data, u16 type) { - int ret; - u16 byteen_start, byteen_end, byen; - u16 limit = 512; + int ret; + u16 byteen_start, byteen_end, byen; + u16 limit = 512; if (test_bit(RTL8152_UNPLUG, &tp->flags)) return -ENODEV; @@ -966,10 +966,12 @@ static void read_bulk_callback(struct urb *urb) case -ENOENT: return; /* the urb is in unlink state */ case -ETIME: - pr_warn_ratelimited("may be reset is needed?..\n"); + if (net_ratelimit()) + netdev_warn(netdev, "maybe reset is needed?\n"); break; default: - pr_warn_ratelimited("Rx status %d\n", status); + if (net_ratelimit()) + netdev_warn(netdev, "Rx status %d\n", status); break; } @@ -1002,7 +1004,8 @@ static void write_bulk_callback(struct urb *urb) stats = rtl8152_get_stats(tp->netdev); if (status) { - pr_warn_ratelimited("Tx status %d\n", status); + if (net_ratelimit()) + netdev_warn(tp->netdev, "Tx status %d\n", status); stats->tx_errors += agg->skb_num; } else { stats->tx_packets += agg->skb_num; @@ -1079,7 +1082,7 @@ resubmit: netif_device_detach(tp->netdev); else if (res) netif_err(tp, intr, tp->netdev, - "can't resubmit intr, status %d\n", res); + "can't resubmit intr, status %d\n", res); } static inline void *rx_agg_align(void *data) @@ -1490,7 +1493,7 @@ static void rtl8152_tx_timeout(struct net_device *netdev) struct r8152 *tp = netdev_priv(netdev); int i; - netif_warn(tp, tx_err, netdev, "Tx timeout.\n"); + netif_warn(tp, tx_err, netdev, "Tx timeout\n"); for (i = 0; i < RTL8152_MAX_TX; i++) usb_unlink_urb(tp->tx_info[i].urb); } @@ -1812,8 +1815,8 @@ static void r8152b_exit_oob(struct r8152 *tp) static void r8152b_enter_oob(struct r8152 *tp) { - u32 ocp_data; - int i; + u32 ocp_data; + int i; ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); ocp_data &= ~NOW_IS_OOB; @@ -2284,8 +2287,8 @@ static int rtl8152_open(struct net_device *netdev) if (res) { if (res == -ENODEV) netif_device_detach(tp->netdev); - netif_warn(tp, ifup, netdev, - "intr_urb submit failed: %d\n", res); + netif_warn(tp, ifup, netdev, "intr_urb submit failed: %d\n", + res); return res; } @@ -2340,7 +2343,7 @@ static void r8153_clear_bp(struct r8152 *tp) static void r8152b_enable_eee(struct r8152 *tp) { - u32 ocp_data; + u32 ocp_data; ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR); ocp_data |= EEE_RX_EN | EEE_TX_EN; @@ -2685,10 +2688,10 @@ static void rtl8153_unload(struct r8152 *tp) r8153_power_cut_en(tp, 1); } -static bool rtl_ops_init(struct r8152 *tp, const struct usb_device_id *id) +static int rtl_ops_init(struct r8152 *tp, const struct usb_device_id *id) { struct rtl_ops *ops = &tp->rtl_ops; - bool ret = true; + int ret = -ENODEV; switch (id->idVendor) { case VENDOR_ID_REALTEK: @@ -2699,6 +2702,7 @@ static bool rtl_ops_init(struct r8152 *tp, const struct usb_device_id *id) ops->disable = rtl8152_disable; ops->down = rtl8152_down; ops->unload = rtl8152_unload; + ret = 0; break; case PRODUCT_ID_RTL8153: ops->init = r8153_init; @@ -2706,9 +2710,9 @@ static bool rtl_ops_init(struct r8152 *tp, const struct usb_device_id *id) ops->disable = rtl8152_disable; ops->down = rtl8153_down; ops->unload = rtl8153_unload; + ret = 0; break; default: - ret = false; break; } break; @@ -2721,18 +2725,20 @@ static bool rtl_ops_init(struct r8152 *tp, const struct usb_device_id *id) ops->disable = rtl8152_disable; ops->down = rtl8153_down; ops->unload = rtl8153_unload; + ret = 0; break; default: - ret = false; break; } break; default: - ret = false; break; } + if (ret) + netif_err(tp, probe, tp->netdev, "Unknown Device\n"); + return ret; } @@ -2751,7 +2757,7 @@ static int rtl8152_probe(struct usb_interface *intf, netdev = alloc_etherdev(sizeof(struct r8152)); if (!netdev) { - dev_err(&intf->dev, "Out of memory"); + dev_err(&intf->dev, "Out of memory\n"); return -ENOMEM; } @@ -2759,17 +2765,17 @@ static int rtl8152_probe(struct usb_interface *intf, tp = netdev_priv(netdev); tp->msg_enable = 0x7FFF; - if (!rtl_ops_init(tp, id)) { - netif_err(tp, probe, netdev, "Unknown Device"); - return -ENODEV; - } + tp->udev = udev; + tp->netdev = netdev; + tp->intf = intf; + + ret = rtl_ops_init(tp, id); + if (ret) + goto out; tasklet_init(&tp->tl, bottom_half, (unsigned long)tp); INIT_DELAYED_WORK(&tp->schedule, rtl_work_func_t); - tp->udev = udev; - tp->netdev = netdev; - tp->intf = intf; netdev->netdev_ops = &rtl8152_netdev_ops; netdev->watchdog_timeo = RTL8152_TX_TIMEOUT; @@ -2797,11 +2803,11 @@ static int rtl8152_probe(struct usb_interface *intf, ret = register_netdev(netdev); if (ret != 0) { - netif_err(tp, probe, netdev, "couldn't register the device"); + netif_err(tp, probe, netdev, "couldn't register the device\n"); goto out1; } - netif_info(tp, probe, netdev, "%s", DRIVER_VERSION); + netif_info(tp, probe, netdev, "%s\n", DRIVER_VERSION); return 0; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c51a98867a4..7b172408cff 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1788,16 +1788,17 @@ static int virtnet_restore(struct virtio_device *vdev) if (err) return err; - if (netif_running(vi->dev)) + if (netif_running(vi->dev)) { + for (i = 0; i < vi->curr_queue_pairs; i++) + if (!try_fill_recv(&vi->rq[i], GFP_KERNEL)) + schedule_delayed_work(&vi->refill, 0); + for (i = 0; i < vi->max_queue_pairs; i++) virtnet_napi_enable(&vi->rq[i]); + } netif_device_attach(vi->dev); - for (i = 0; i < vi->curr_queue_pairs; i++) - if (!try_fill_recv(&vi->rq[i], GFP_KERNEL)) - schedule_delayed_work(&vi->refill, 0); - mutex_lock(&vi->config_lock); vi->config_enable = true; mutex_unlock(&vi->config_lock); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 474a99ed022..481f85d604a 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1381,20 +1381,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) return false; } -static void vxlan_sock_put(struct sk_buff *skb) -{ - sock_put(skb->sk); -} - -/* On transmit, associate with the tunnel socket */ -static void vxlan_set_owner(struct sock *sk, struct sk_buff *skb) -{ - skb_orphan(skb); - sock_hold(sk); - skb->sk = sk; - skb->destructor = vxlan_sock_put; -} - /* Compute source port for outgoing packet * first choice to use L4 flow hash since it will spread * better and maybe available from hardware @@ -1514,8 +1500,6 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, ip6h->daddr = *daddr; ip6h->saddr = *saddr; - vxlan_set_owner(vs->sock->sk, skb); - err = handle_offloads(skb); if (err) return err; @@ -1572,8 +1556,6 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, uh->len = htons(skb->len); uh->check = 0; - vxlan_set_owner(vs->sock->sk, skb); - err = handle_offloads(skb); if (err) return err; @@ -1786,7 +1768,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); struct ethhdr *eth; bool did_rsc = false; - struct vxlan_rdst *rdst; + struct vxlan_rdst *rdst, *fdst = NULL; struct vxlan_fdb *f; skb_reset_mac_header(skb); @@ -1828,7 +1810,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) vxlan_fdb_miss(vxlan, eth->h_dest); dev->stats.tx_dropped++; - dev_kfree_skb(skb); + kfree_skb(skb); return NETDEV_TX_OK; } } @@ -1836,12 +1818,19 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) list_for_each_entry_rcu(rdst, &f->remotes, list) { struct sk_buff *skb1; + if (!fdst) { + fdst = rdst; + continue; + } skb1 = skb_clone(skb, GFP_ATOMIC); if (skb1) vxlan_xmit_one(skb1, dev, rdst, did_rsc); } - dev_kfree_skb(skb); + if (fdst) + vxlan_xmit_one(skb, dev, fdst, did_rsc); + else + kfree_skb(skb); return NETDEV_TX_OK; } @@ -2477,7 +2466,8 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, /* update header length based on lower device */ dev->hard_header_len = lowerdev->hard_header_len + (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); - } + } else if (use_ipv6) + vxlan->flags |= VXLAN_F_IPV6; if (data[IFLA_VXLAN_TOS]) vxlan->tos = nla_get_u8(data[IFLA_VXLAN_TOS]); diff --git a/drivers/net/wireless/ath/ath9k/ar9002_mac.c b/drivers/net/wireless/ath/ath9k/ar9002_mac.c index 857ede3a999..741b38ddcb3 100644 --- a/drivers/net/wireless/ath/ath9k/ar9002_mac.c +++ b/drivers/net/wireless/ath/ath9k/ar9002_mac.c @@ -77,9 +77,16 @@ static bool ar9002_hw_get_isr(struct ath_hw *ah, enum ath9k_int *masked, mask2 |= ATH9K_INT_CST; if (isr2 & AR_ISR_S2_TSFOOR) mask2 |= ATH9K_INT_TSFOOR; + + if (!(pCap->hw_caps & ATH9K_HW_CAP_RAC_SUPPORTED)) { + REG_WRITE(ah, AR_ISR_S2, isr2); + isr &= ~AR_ISR_BCNMISC; + } } - isr = REG_READ(ah, AR_ISR_RAC); + if (pCap->hw_caps & ATH9K_HW_CAP_RAC_SUPPORTED) + isr = REG_READ(ah, AR_ISR_RAC); + if (isr == 0xffffffff) { *masked = 0; return false; @@ -98,11 +105,23 @@ static bool ar9002_hw_get_isr(struct ath_hw *ah, enum ath9k_int *masked, *masked |= ATH9K_INT_TX; - s0_s = REG_READ(ah, AR_ISR_S0_S); + if (pCap->hw_caps & ATH9K_HW_CAP_RAC_SUPPORTED) { + s0_s = REG_READ(ah, AR_ISR_S0_S); + s1_s = REG_READ(ah, AR_ISR_S1_S); + } else { + s0_s = REG_READ(ah, AR_ISR_S0); + REG_WRITE(ah, AR_ISR_S0, s0_s); + s1_s = REG_READ(ah, AR_ISR_S1); + REG_WRITE(ah, AR_ISR_S1, s1_s); + + isr &= ~(AR_ISR_TXOK | + AR_ISR_TXDESC | + AR_ISR_TXERR | + AR_ISR_TXEOL); + } + ah->intr_txqs |= MS(s0_s, AR_ISR_S0_QCU_TXOK); ah->intr_txqs |= MS(s0_s, AR_ISR_S0_QCU_TXDESC); - - s1_s = REG_READ(ah, AR_ISR_S1_S); ah->intr_txqs |= MS(s1_s, AR_ISR_S1_QCU_TXERR); ah->intr_txqs |= MS(s1_s, AR_ISR_S1_QCU_TXEOL); } @@ -115,13 +134,15 @@ static bool ar9002_hw_get_isr(struct ath_hw *ah, enum ath9k_int *masked, *masked |= mask2; } - if (AR_SREV_9100(ah)) - return true; - - if (isr & AR_ISR_GENTMR) { + if (!AR_SREV_9100(ah) && (isr & AR_ISR_GENTMR)) { u32 s5_s; - s5_s = REG_READ(ah, AR_ISR_S5_S); + if (pCap->hw_caps & ATH9K_HW_CAP_RAC_SUPPORTED) { + s5_s = REG_READ(ah, AR_ISR_S5_S); + } else { + s5_s = REG_READ(ah, AR_ISR_S5); + } + ah->intr_gen_timer_trigger = MS(s5_s, AR_ISR_S5_GENTIMER_TRIG); @@ -134,8 +155,21 @@ static bool ar9002_hw_get_isr(struct ath_hw *ah, enum ath9k_int *masked, if ((s5_s & AR_ISR_S5_TIM_TIMER) && !(pCap->hw_caps & ATH9K_HW_CAP_AUTOSLEEP)) *masked |= ATH9K_INT_TIM_TIMER; + + if (!(pCap->hw_caps & ATH9K_HW_CAP_RAC_SUPPORTED)) { + REG_WRITE(ah, AR_ISR_S5, s5_s); + isr &= ~AR_ISR_GENTMR; + } } + if (!(pCap->hw_caps & ATH9K_HW_CAP_RAC_SUPPORTED)) { + REG_WRITE(ah, AR_ISR, isr); + REG_READ(ah, AR_ISR); + } + + if (AR_SREV_9100(ah)) + return true; + if (sync_cause) { if (sync_cause_p) *sync_cause_p = sync_cause; diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index 9a2657fdd9c..608d739d137 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -127,21 +127,26 @@ static void ath9k_htc_bssid_iter(void *data, u8 *mac, struct ieee80211_vif *vif) struct ath9k_vif_iter_data *iter_data = data; int i; - for (i = 0; i < ETH_ALEN; i++) - iter_data->mask[i] &= ~(iter_data->hw_macaddr[i] ^ mac[i]); + if (iter_data->hw_macaddr != NULL) { + for (i = 0; i < ETH_ALEN; i++) + iter_data->mask[i] &= ~(iter_data->hw_macaddr[i] ^ mac[i]); + } else { + iter_data->hw_macaddr = mac; + } } -static void ath9k_htc_set_bssid_mask(struct ath9k_htc_priv *priv, +static void ath9k_htc_set_mac_bssid_mask(struct ath9k_htc_priv *priv, struct ieee80211_vif *vif) { struct ath_common *common = ath9k_hw_common(priv->ah); struct ath9k_vif_iter_data iter_data; /* - * Use the hardware MAC address as reference, the hardware uses it - * together with the BSSID mask when matching addresses. + * Pick the MAC address of the first interface as the new hardware + * MAC address. The hardware will use it together with the BSSID mask + * when matching addresses. */ - iter_data.hw_macaddr = common->macaddr; + iter_data.hw_macaddr = NULL; memset(&iter_data.mask, 0xff, ETH_ALEN); if (vif) @@ -153,6 +158,10 @@ static void ath9k_htc_set_bssid_mask(struct ath9k_htc_priv *priv, ath9k_htc_bssid_iter, &iter_data); memcpy(common->bssidmask, iter_data.mask, ETH_ALEN); + + if (iter_data.hw_macaddr) + memcpy(common->macaddr, iter_data.hw_macaddr, ETH_ALEN); + ath_hw_setbssidmask(common); } @@ -1063,7 +1072,7 @@ static int ath9k_htc_add_interface(struct ieee80211_hw *hw, goto out; } - ath9k_htc_set_bssid_mask(priv, vif); + ath9k_htc_set_mac_bssid_mask(priv, vif); priv->vif_slot |= (1 << avp->index); priv->nvifs++; @@ -1128,7 +1137,7 @@ static void ath9k_htc_remove_interface(struct ieee80211_hw *hw, ath9k_htc_set_opmode(priv); - ath9k_htc_set_bssid_mask(priv, vif); + ath9k_htc_set_mac_bssid_mask(priv, vif); /* * Stop ANI only if there are no associated station interfaces. diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 173a889f9db..21b764ba640 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -994,8 +994,9 @@ void ath9k_calculate_iter_data(struct ieee80211_hw *hw, struct ath_common *common = ath9k_hw_common(ah); /* - * Use the hardware MAC address as reference, the hardware uses it - * together with the BSSID mask when matching addresses. + * Pick the MAC address of the first interface as the new hardware + * MAC address. The hardware will use it together with the BSSID mask + * when matching addresses. */ memset(iter_data, 0, sizeof(*iter_data)); memset(&iter_data->mask, 0xff, ETH_ALEN); diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index 8707d1a9499..d7aa165fe67 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -738,6 +738,8 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) }; int index = rtlpci->rx_ring[rx_queue_idx].idx; + if (rtlpci->driver_is_goingto_unload) + return; /*RX NORMAL PKT */ while (count--) { /*rx descriptor */ @@ -1634,6 +1636,7 @@ static void rtl_pci_stop(struct ieee80211_hw *hw) */ set_hal_stop(rtlhal); + rtlpci->driver_is_goingto_unload = true; rtlpriv->cfg->ops->disable_interrupt(hw); cancel_work_sync(&rtlpriv->works.lps_change_work); @@ -1651,7 +1654,6 @@ static void rtl_pci_stop(struct ieee80211_hw *hw) ppsc->rfchange_inprogress = true; spin_unlock_irqrestore(&rtlpriv->locks.rf_ps_lock, flags); - rtlpci->driver_is_goingto_unload = true; rtlpriv->cfg->ops->hw_disable(hw); /* some things are not needed if firmware not available */ if (!rtlpriv->max_fw_size) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index ba30a6d9fef..c955fc39d69 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -101,6 +101,13 @@ struct xenvif_rx_meta { #define MAX_PENDING_REQS 256 +/* It's possible for an skb to have a maximal number of frags + * but still be less than MAX_BUFFER_OFFSET in size. Thus the + * worst-case number of copy operations is MAX_SKB_FRAGS per + * ring slot. + */ +#define MAX_GRANT_COPY_OPS (MAX_SKB_FRAGS * XEN_NETIF_RX_RING_SIZE) + struct xenvif { /* Unique identifier for this interface. */ domid_t domid; @@ -141,13 +148,13 @@ struct xenvif { */ bool rx_event; - /* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each - * head/fragment page uses 2 copy operations because it - * straddles two buffers in the frontend. - */ - struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE]; - struct xenvif_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE]; + /* This array is allocated seperately as it is large */ + struct gnttab_copy *grant_copy_op; + /* We create one meta structure per ring request we consume, so + * the maximum number is the same as the ring size. + */ + struct xenvif_rx_meta meta[XEN_NETIF_RX_RING_SIZE]; u8 fe_dev_addr[6]; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 1dcb9606e6e..b9de31ea7fc 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -34,6 +34,7 @@ #include <linux/ethtool.h> #include <linux/rtnetlink.h> #include <linux/if_vlan.h> +#include <linux/vmalloc.h> #include <xen/events.h> #include <asm/xen/hypercall.h> @@ -307,6 +308,15 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, SET_NETDEV_DEV(dev, parent); vif = netdev_priv(dev); + + vif->grant_copy_op = vmalloc(sizeof(struct gnttab_copy) * + MAX_GRANT_COPY_OPS); + if (vif->grant_copy_op == NULL) { + pr_warn("Could not allocate grant copy space for %s\n", name); + free_netdev(dev); + return ERR_PTR(-ENOMEM); + } + vif->domid = domid; vif->handle = handle; vif->can_sg = 1; @@ -488,6 +498,7 @@ void xenvif_free(struct xenvif *vif) unregister_netdev(vif->dev); + vfree(vif->grant_copy_op); free_netdev(vif->dev); module_put(THIS_MODULE); diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 611aebee458..4f81ac0e2f0 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -524,7 +524,7 @@ static void xenvif_rx_action(struct xenvif *vif) if (!npo.copy_prod) goto done; - BUG_ON(npo.copy_prod > ARRAY_SIZE(vif->grant_copy_op)); + BUG_ON(npo.copy_prod > MAX_GRANT_COPY_OPS); gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod); while ((skb = __skb_dequeue(&rxq)) != NULL) { @@ -1108,8 +1108,10 @@ static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb, goto out; if (!skb_partial_csum_set(skb, off, - offsetof(struct tcphdr, check))) + offsetof(struct tcphdr, check))) { + err = -EPROTO; goto out; + } if (recalculate_partial_csum) tcp_hdr(skb)->check = @@ -1126,8 +1128,10 @@ static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb, goto out; if (!skb_partial_csum_set(skb, off, - offsetof(struct udphdr, check))) + offsetof(struct udphdr, check))) { + err = -EPROTO; goto out; + } if (recalculate_partial_csum) udp_hdr(skb)->check = @@ -1249,8 +1253,10 @@ static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, goto out; if (!skb_partial_csum_set(skb, off, - offsetof(struct tcphdr, check))) + offsetof(struct tcphdr, check))) { + err = -EPROTO; goto out; + } if (recalculate_partial_csum) tcp_hdr(skb)->check = @@ -1267,8 +1273,10 @@ static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, goto out; if (!skb_partial_csum_set(skb, off, - offsetof(struct udphdr, check))) + offsetof(struct udphdr, check))) { + err = -EPROTO; goto out; + } if (recalculate_partial_csum) udp_hdr(skb)->check = diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index de6f8990246..c6973f101a3 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -20,7 +20,7 @@ config OF_SELFTEST depends on OF_IRQ help This option builds in test cases for the device tree infrastructure - that are executed one at boot time, and the results dumped to the + that are executed once at boot time, and the results dumped to the console. If unsure, say N here, but this option is safe to enable. diff --git a/drivers/of/address.c b/drivers/of/address.c index 4b9317bdb81..d3dd41c840f 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -69,14 +69,6 @@ static u64 of_bus_default_map(__be32 *addr, const __be32 *range, (unsigned long long)cp, (unsigned long long)s, (unsigned long long)da); - /* - * If the number of address cells is larger than 2 we assume the - * mapping doesn't specify a physical address. Rather, the address - * specifies an identifier that must match exactly. - */ - if (na > 2 && memcmp(range, addr, na * 4) != 0) - return OF_BAD_ADDR; - if (da < cp || da >= (cp + s)) return OF_BAD_ADDR; return da - cp; diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 2fa024b97c4..758b4f8b30b 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -922,8 +922,16 @@ void __init unflatten_device_tree(void) */ void __init unflatten_and_copy_device_tree(void) { - int size = __be32_to_cpu(initial_boot_params->totalsize); - void *dt = early_init_dt_alloc_memory_arch(size, + int size; + void *dt; + + if (!initial_boot_params) { + pr_warn("No valid device tree found, continuing without\n"); + return; + } + + size = __be32_to_cpu(initial_boot_params->totalsize); + dt = early_init_dt_alloc_memory_arch(size, __alignof__(struct boot_param_header)); if (dt) { diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 786b0b47fae..27212402c53 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -165,7 +165,6 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq) if (of_get_property(ipar, "interrupt-controller", NULL) != NULL) { pr_debug(" -> got it !\n"); - of_node_put(old); return 0; } @@ -250,8 +249,7 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq) * Successfully parsed an interrrupt-map translation; copy new * interrupt specifier into the out_irq structure */ - of_node_put(out_irq->np); - out_irq->np = of_node_get(newpar); + out_irq->np = newpar; match_array = imap - newaddrsize - newintsize; for (i = 0; i < newintsize; i++) @@ -268,7 +266,6 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq) } fail: of_node_put(ipar); - of_node_put(out_irq->np); of_node_put(newpar); return -EINVAL; diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig index a344f3d5236..330ef2d0656 100644 --- a/drivers/phy/Kconfig +++ b/drivers/phy/Kconfig @@ -24,8 +24,8 @@ config PHY_EXYNOS_MIPI_VIDEO config OMAP_USB2 tristate "OMAP USB2 PHY Driver" depends on ARCH_OMAP2PLUS + depends on USB_PHY select GENERIC_PHY - select USB_PHY select OMAP_CONTROL_USB help Enable this to support the transceiver that is part of SOC. This @@ -36,8 +36,8 @@ config OMAP_USB2 config TWL4030_USB tristate "TWL4030 USB Transceiver Driver" depends on TWL4030_CORE && REGULATOR_TWL4030 && USB_MUSB_OMAP2PLUS + depends on USB_PHY select GENERIC_PHY - select USB_PHY help Enable this to support the USB OTG transceiver on TWL4030 family chips (including the TWL5030 and TPS659x0 devices). diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c index 03cf8fb8155..58e0e973902 100644 --- a/drivers/phy/phy-core.c +++ b/drivers/phy/phy-core.c @@ -437,23 +437,18 @@ struct phy *phy_create(struct device *dev, const struct phy_ops *ops, int id; struct phy *phy; - if (!dev) { - dev_WARN(dev, "no device provided for PHY\n"); - ret = -EINVAL; - goto err0; - } + if (WARN_ON(!dev)) + return ERR_PTR(-EINVAL); phy = kzalloc(sizeof(*phy), GFP_KERNEL); - if (!phy) { - ret = -ENOMEM; - goto err0; - } + if (!phy) + return ERR_PTR(-ENOMEM); id = ida_simple_get(&phy_ida, 0, 0, GFP_KERNEL); if (id < 0) { dev_err(dev, "unable to get id\n"); ret = id; - goto err0; + goto free_phy; } device_initialize(&phy->dev); @@ -468,11 +463,11 @@ struct phy *phy_create(struct device *dev, const struct phy_ops *ops, ret = dev_set_name(&phy->dev, "phy-%s.%d", dev_name(dev), id); if (ret) - goto err1; + goto put_dev; ret = device_add(&phy->dev); if (ret) - goto err1; + goto put_dev; if (pm_runtime_enabled(dev)) { pm_runtime_enable(&phy->dev); @@ -481,12 +476,11 @@ struct phy *phy_create(struct device *dev, const struct phy_ops *ops, return phy; -err1: - ida_remove(&phy_ida, phy->id); +put_dev: put_device(&phy->dev); + ida_remove(&phy_ida, phy->id); +free_phy: kfree(phy); - -err0: return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(phy_create); diff --git a/drivers/pinctrl/pinctrl-baytrail.c b/drivers/pinctrl/pinctrl-baytrail.c index 2832576d8b1..114f5ef4b73 100644 --- a/drivers/pinctrl/pinctrl-baytrail.c +++ b/drivers/pinctrl/pinctrl-baytrail.c @@ -512,6 +512,7 @@ static const struct dev_pm_ops byt_gpio_pm_ops = { static const struct acpi_device_id byt_gpio_acpi_match[] = { { "INT33B2", 0 }, + { "INT33FC", 0 }, { } }; MODULE_DEVICE_TABLE(acpi, byt_gpio_acpi_match); diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 2a786c50446..3c6768378a9 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -833,6 +833,11 @@ static int rapl_write_data_raw(struct rapl_domain *rd, return 0; } +static const struct x86_cpu_id energy_unit_quirk_ids[] = { + { X86_VENDOR_INTEL, 6, 0x37},/* VLV */ + {} +}; + static int rapl_check_unit(struct rapl_package *rp, int cpu) { u64 msr_val; @@ -853,8 +858,11 @@ static int rapl_check_unit(struct rapl_package *rp, int cpu) * time unit: 1/time_unit_divisor Seconds */ value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; - rp->energy_unit_divisor = 1 << value; - + /* some CPUs have different way to calculate energy unit */ + if (x86_match_cpu(energy_unit_quirk_ids)) + rp->energy_unit_divisor = 1000000 / (1 << value); + else + rp->energy_unit_divisor = 1 << value; value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; rp->power_unit_divisor = 1 << value; @@ -941,6 +949,7 @@ static void package_power_limit_irq_restore(int package_id) static const struct x86_cpu_id rapl_ids[] = { { X86_VENDOR_INTEL, 6, 0x2a},/* SNB */ { X86_VENDOR_INTEL, 6, 0x2d},/* SNB EP */ + { X86_VENDOR_INTEL, 6, 0x37},/* VLV */ { X86_VENDOR_INTEL, 6, 0x3a},/* IVB */ { X86_VENDOR_INTEL, 6, 0x45},/* HSW */ /* TODO: Add more CPU IDs after testing */ diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 596480022b0..38a1257e76e 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -471,7 +471,7 @@ static void qlt_schedule_sess_for_deletion(struct qla_tgt_sess *sess, schedule_delayed_work(&tgt->sess_del_work, 0); else schedule_delayed_work(&tgt->sess_del_work, - jiffies - sess->expires); + sess->expires - jiffies); } /* ha->hardware_lock supposed to be held on entry */ @@ -550,13 +550,14 @@ static void qlt_del_sess_work_fn(struct delayed_work *work) struct scsi_qla_host *vha = tgt->vha; struct qla_hw_data *ha = vha->hw; struct qla_tgt_sess *sess; - unsigned long flags; + unsigned long flags, elapsed; spin_lock_irqsave(&ha->hardware_lock, flags); while (!list_empty(&tgt->del_sess_list)) { sess = list_entry(tgt->del_sess_list.next, typeof(*sess), del_list_entry); - if (time_after_eq(jiffies, sess->expires)) { + elapsed = jiffies; + if (time_after_eq(elapsed, sess->expires)) { qlt_undelete_sess(sess); ql_dbg(ql_dbg_tgt_mgt, vha, 0xf004, @@ -566,7 +567,7 @@ static void qlt_del_sess_work_fn(struct delayed_work *work) ha->tgt.tgt_ops->put_sess(sess); } else { schedule_delayed_work(&tgt->sess_del_work, - jiffies - sess->expires); + sess->expires - elapsed); break; } } @@ -4290,6 +4291,7 @@ int qlt_lport_register(struct qla_tgt_func_tmpl *qla_tgt_ops, u64 wwpn, if (rc != 0) { ha->tgt.tgt_ops = NULL; ha->tgt.target_lport_ptr = NULL; + scsi_host_put(host); } mutex_unlock(&qla_tgt_mutex); return rc; diff --git a/drivers/staging/comedi/drivers.c b/drivers/staging/comedi/drivers.c index 8f02bf66e20..4964d2a2fc7 100644 --- a/drivers/staging/comedi/drivers.c +++ b/drivers/staging/comedi/drivers.c @@ -446,7 +446,7 @@ int comedi_load_firmware(struct comedi_device *dev, release_firmware(fw); } - return ret; + return ret < 0 ? ret : 0; } EXPORT_SYMBOL_GPL(comedi_load_firmware); diff --git a/drivers/staging/comedi/drivers/8255_pci.c b/drivers/staging/comedi/drivers/8255_pci.c index 432e3f9c330..c55f234b29e 100644 --- a/drivers/staging/comedi/drivers/8255_pci.c +++ b/drivers/staging/comedi/drivers/8255_pci.c @@ -63,7 +63,8 @@ enum pci_8255_boardid { BOARD_ADLINK_PCI7296, BOARD_CB_PCIDIO24, BOARD_CB_PCIDIO24H, - BOARD_CB_PCIDIO48H, + BOARD_CB_PCIDIO48H_OLD, + BOARD_CB_PCIDIO48H_NEW, BOARD_CB_PCIDIO96H, BOARD_NI_PCIDIO96, BOARD_NI_PCIDIO96B, @@ -106,11 +107,16 @@ static const struct pci_8255_boardinfo pci_8255_boards[] = { .dio_badr = 2, .n_8255 = 1, }, - [BOARD_CB_PCIDIO48H] = { + [BOARD_CB_PCIDIO48H_OLD] = { .name = "cb_pci-dio48h", .dio_badr = 1, .n_8255 = 2, }, + [BOARD_CB_PCIDIO48H_NEW] = { + .name = "cb_pci-dio48h", + .dio_badr = 2, + .n_8255 = 2, + }, [BOARD_CB_PCIDIO96H] = { .name = "cb_pci-dio96h", .dio_badr = 2, @@ -263,7 +269,10 @@ static DEFINE_PCI_DEVICE_TABLE(pci_8255_pci_table) = { { PCI_VDEVICE(ADLINK, 0x7296), BOARD_ADLINK_PCI7296 }, { PCI_VDEVICE(CB, 0x0028), BOARD_CB_PCIDIO24 }, { PCI_VDEVICE(CB, 0x0014), BOARD_CB_PCIDIO24H }, - { PCI_VDEVICE(CB, 0x000b), BOARD_CB_PCIDIO48H }, + { PCI_DEVICE_SUB(PCI_VENDOR_ID_CB, 0x000b, 0x0000, 0x0000), + .driver_data = BOARD_CB_PCIDIO48H_OLD }, + { PCI_DEVICE_SUB(PCI_VENDOR_ID_CB, 0x000b, PCI_VENDOR_ID_CB, 0x000b), + .driver_data = BOARD_CB_PCIDIO48H_NEW }, { PCI_VDEVICE(CB, 0x0017), BOARD_CB_PCIDIO96H }, { PCI_VDEVICE(NI, 0x0160), BOARD_NI_PCIDIO96 }, { PCI_VDEVICE(NI, 0x1630), BOARD_NI_PCIDIO96B }, diff --git a/drivers/staging/iio/magnetometer/hmc5843.c b/drivers/staging/iio/magnetometer/hmc5843.c index 99421f90d18..0485d7f3986 100644 --- a/drivers/staging/iio/magnetometer/hmc5843.c +++ b/drivers/staging/iio/magnetometer/hmc5843.c @@ -451,7 +451,12 @@ done: .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE) | \ BIT(IIO_CHAN_INFO_SAMP_FREQ), \ .scan_index = idx, \ - .scan_type = IIO_ST('s', 16, 16, IIO_BE), \ + .scan_type = { \ + .sign = 's', \ + .realbits = 16, \ + .storagebits = 16, \ + .endianness = IIO_BE, \ + }, \ } static const struct iio_chan_spec hmc5843_channels[] = { diff --git a/drivers/staging/imx-drm/imx-drm-core.c b/drivers/staging/imx-drm/imx-drm-core.c index 6bd015ac9d6..96e4eee344e 100644 --- a/drivers/staging/imx-drm/imx-drm-core.c +++ b/drivers/staging/imx-drm/imx-drm-core.c @@ -88,8 +88,9 @@ static int imx_drm_driver_unload(struct drm_device *drm) imx_drm_device_put(); - drm_mode_config_cleanup(imxdrm->drm); + drm_vblank_cleanup(imxdrm->drm); drm_kms_helper_poll_fini(imxdrm->drm); + drm_mode_config_cleanup(imxdrm->drm); return 0; } @@ -199,8 +200,8 @@ static void imx_drm_driver_preclose(struct drm_device *drm, if (!file->is_master) return; - for (i = 0; i < 4; i++) - imx_drm_disable_vblank(drm , i); + for (i = 0; i < MAX_CRTC; i++) + imx_drm_disable_vblank(drm, i); } static const struct file_operations imx_drm_driver_fops = { @@ -376,8 +377,6 @@ static int imx_drm_crtc_register(struct imx_drm_crtc *imx_drm_crtc) struct imx_drm_device *imxdrm = __imx_drm_device(); int ret; - drm_crtc_init(imxdrm->drm, imx_drm_crtc->crtc, - imx_drm_crtc->imx_drm_helper_funcs.crtc_funcs); ret = drm_mode_crtc_set_gamma_size(imx_drm_crtc->crtc, 256); if (ret) return ret; @@ -385,6 +384,9 @@ static int imx_drm_crtc_register(struct imx_drm_crtc *imx_drm_crtc) drm_crtc_helper_add(imx_drm_crtc->crtc, imx_drm_crtc->imx_drm_helper_funcs.crtc_helper_funcs); + drm_crtc_init(imxdrm->drm, imx_drm_crtc->crtc, + imx_drm_crtc->imx_drm_helper_funcs.crtc_funcs); + drm_mode_group_reinit(imxdrm->drm); return 0; @@ -428,11 +430,11 @@ static int imx_drm_driver_load(struct drm_device *drm, unsigned long flags) ret = drm_mode_group_init_legacy_group(imxdrm->drm, &imxdrm->drm->primary->mode_group); if (ret) - goto err_init; + goto err_kms; ret = drm_vblank_init(imxdrm->drm, MAX_CRTC); if (ret) - goto err_init; + goto err_kms; /* * with vblank_disable_allowed = true, vblank interrupt will be disabled @@ -441,12 +443,19 @@ static int imx_drm_driver_load(struct drm_device *drm, unsigned long flags) */ imxdrm->drm->vblank_disable_allowed = true; - if (!imx_drm_device_get()) + if (!imx_drm_device_get()) { ret = -EINVAL; + goto err_vblank; + } - ret = 0; + mutex_unlock(&imxdrm->mutex); + return 0; -err_init: +err_vblank: + drm_vblank_cleanup(drm); +err_kms: + drm_kms_helper_poll_fini(drm); + drm_mode_config_cleanup(drm); mutex_unlock(&imxdrm->mutex); return ret; @@ -492,6 +501,15 @@ int imx_drm_add_crtc(struct drm_crtc *crtc, mutex_lock(&imxdrm->mutex); + /* + * The vblank arrays are dimensioned by MAX_CRTC - we can't + * pass IDs greater than this to those functions. + */ + if (imxdrm->pipes >= MAX_CRTC) { + ret = -EINVAL; + goto err_busy; + } + if (imxdrm->drm->open_count) { ret = -EBUSY; goto err_busy; @@ -528,6 +546,7 @@ int imx_drm_add_crtc(struct drm_crtc *crtc, return 0; err_register: + list_del(&imx_drm_crtc->list); kfree(imx_drm_crtc); err_alloc: err_busy: diff --git a/drivers/staging/imx-drm/imx-tve.c b/drivers/staging/imx-drm/imx-tve.c index 680f4c8fa08..2c44fef8d58 100644 --- a/drivers/staging/imx-drm/imx-tve.c +++ b/drivers/staging/imx-drm/imx-tve.c @@ -114,7 +114,6 @@ struct imx_tve { struct drm_encoder encoder; struct imx_drm_encoder *imx_drm_encoder; struct device *dev; - spinlock_t enable_lock; /* serializes tve_enable/disable */ spinlock_t lock; /* register lock */ bool enabled; int mode; @@ -146,10 +145,8 @@ __releases(&tve->lock) static void tve_enable(struct imx_tve *tve) { - unsigned long flags; int ret; - spin_lock_irqsave(&tve->enable_lock, flags); if (!tve->enabled) { tve->enabled = true; clk_prepare_enable(tve->clk); @@ -169,23 +166,18 @@ static void tve_enable(struct imx_tve *tve) TVE_CD_SM_IEN | TVE_CD_LM_IEN | TVE_CD_MON_END_IEN); - - spin_unlock_irqrestore(&tve->enable_lock, flags); } static void tve_disable(struct imx_tve *tve) { - unsigned long flags; int ret; - spin_lock_irqsave(&tve->enable_lock, flags); if (tve->enabled) { tve->enabled = false; ret = regmap_update_bits(tve->regmap, TVE_COM_CONF_REG, TVE_IPU_CLK_EN | TVE_EN, 0); clk_disable_unprepare(tve->clk); } - spin_unlock_irqrestore(&tve->enable_lock, flags); } static int tve_setup_tvout(struct imx_tve *tve) @@ -601,7 +593,6 @@ static int imx_tve_probe(struct platform_device *pdev) tve->dev = &pdev->dev; spin_lock_init(&tve->lock); - spin_lock_init(&tve->enable_lock); ddc_node = of_parse_phandle(np, "ddc", 0); if (ddc_node) { diff --git a/drivers/staging/imx-drm/ipu-v3/ipu-common.c b/drivers/staging/imx-drm/ipu-v3/ipu-common.c index 7a22ce619ed..97ca6924dbb 100644 --- a/drivers/staging/imx-drm/ipu-v3/ipu-common.c +++ b/drivers/staging/imx-drm/ipu-v3/ipu-common.c @@ -996,35 +996,35 @@ static const struct ipu_platform_reg client_reg[] = { }, }; +static DEFINE_MUTEX(ipu_client_id_mutex); static int ipu_client_id; -static int ipu_add_subdevice_pdata(struct device *dev, - const struct ipu_platform_reg *reg) -{ - struct platform_device *pdev; - - pdev = platform_device_register_data(dev, reg->name, ipu_client_id++, - ®->pdata, sizeof(struct ipu_platform_reg)); - - return PTR_ERR_OR_ZERO(pdev); -} - static int ipu_add_client_devices(struct ipu_soc *ipu) { - int ret; - int i; + struct device *dev = ipu->dev; + unsigned i; + int id, ret; + + mutex_lock(&ipu_client_id_mutex); + id = ipu_client_id; + ipu_client_id += ARRAY_SIZE(client_reg); + mutex_unlock(&ipu_client_id_mutex); for (i = 0; i < ARRAY_SIZE(client_reg); i++) { const struct ipu_platform_reg *reg = &client_reg[i]; - ret = ipu_add_subdevice_pdata(ipu->dev, reg); - if (ret) + struct platform_device *pdev; + + pdev = platform_device_register_data(dev, reg->name, + id++, ®->pdata, sizeof(reg->pdata)); + + if (IS_ERR(pdev)) goto err_register; } return 0; err_register: - platform_device_unregister_children(to_platform_device(ipu->dev)); + platform_device_unregister_children(to_platform_device(dev)); return ret; } diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index d70e9119e90..00867190413 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -465,6 +465,7 @@ int iscsit_del_np(struct iscsi_np *np) */ send_sig(SIGINT, np->np_thread, 1); kthread_stop(np->np_thread); + np->np_thread = NULL; } np->np_transport->iscsit_free_np(np); @@ -823,24 +824,22 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (((hdr->flags & ISCSI_FLAG_CMD_READ) || (hdr->flags & ISCSI_FLAG_CMD_WRITE)) && !hdr->data_length) { /* - * Vmware ESX v3.0 uses a modified Cisco Initiator (v3.4.2) - * that adds support for RESERVE/RELEASE. There is a bug - * add with this new functionality that sets R/W bits when - * neither CDB carries any READ or WRITE datapayloads. + * From RFC-3720 Section 10.3.1: + * + * "Either or both of R and W MAY be 1 when either the + * Expected Data Transfer Length and/or Bidirectional Read + * Expected Data Transfer Length are 0" + * + * For this case, go ahead and clear the unnecssary bits + * to avoid any confusion with ->data_direction. */ - if ((hdr->cdb[0] == 0x16) || (hdr->cdb[0] == 0x17)) { - hdr->flags &= ~ISCSI_FLAG_CMD_READ; - hdr->flags &= ~ISCSI_FLAG_CMD_WRITE; - goto done; - } + hdr->flags &= ~ISCSI_FLAG_CMD_READ; + hdr->flags &= ~ISCSI_FLAG_CMD_WRITE; - pr_err("ISCSI_FLAG_CMD_READ or ISCSI_FLAG_CMD_WRITE" + pr_warn("ISCSI_FLAG_CMD_READ or ISCSI_FLAG_CMD_WRITE" " set when Expected Data Transfer Length is 0 for" - " CDB: 0x%02x. Bad iSCSI Initiator.\n", hdr->cdb[0]); - return iscsit_add_reject_cmd(cmd, - ISCSI_REASON_BOOKMARK_INVALID, buf); + " CDB: 0x%02x, Fixing up flags\n", hdr->cdb[0]); } -done: if (!(hdr->flags & ISCSI_FLAG_CMD_READ) && !(hdr->flags & ISCSI_FLAG_CMD_WRITE) && (hdr->data_length != 0)) { diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index e3318edb233..1c0088fe9e9 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -474,7 +474,8 @@ static ssize_t __iscsi_##prefix##_store_##name( \ \ if (!capable(CAP_SYS_ADMIN)) \ return -EPERM; \ - \ + if (count >= sizeof(auth->name)) \ + return -EINVAL; \ snprintf(auth->name, sizeof(auth->name), "%s", page); \ if (!strncmp("NULL", auth->name, 4)) \ auth->naf_flags &= ~flags; \ diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 4eb93b2b647..e29279e6b57 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1403,11 +1403,6 @@ old_sess_out: out: stop = kthread_should_stop(); - if (!stop && signal_pending(current)) { - spin_lock_bh(&np->np_thread_lock); - stop = (np->np_thread_state == ISCSI_NP_THREAD_SHUTDOWN); - spin_unlock_bh(&np->np_thread_lock); - } /* Wait for another socket.. */ if (!stop) return 1; @@ -1415,7 +1410,6 @@ exit: iscsi_stop_login_thread_timer(np); spin_lock_bh(&np->np_thread_lock); np->np_thread_state = ISCSI_NP_THREAD_EXIT; - np->np_thread = NULL; spin_unlock_bh(&np->np_thread_lock); return 0; diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 207b340498a..d06de84b069 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -1106,6 +1106,11 @@ int se_dev_set_block_size(struct se_device *dev, u32 block_size) dev->dev_attrib.block_size = block_size; pr_debug("dev[%p]: SE Device block_size changed to %u\n", dev, block_size); + + if (dev->dev_attrib.max_bytes_per_io) + dev->dev_attrib.hw_max_sectors = + dev->dev_attrib.max_bytes_per_io / block_size; + return 0; } diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 0e34cda3271..78241a53b55 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -66,9 +66,8 @@ static int fd_attach_hba(struct se_hba *hba, u32 host_id) pr_debug("CORE_HBA[%d] - TCM FILEIO HBA Driver %s on Generic" " Target Core Stack %s\n", hba->hba_id, FD_VERSION, TARGET_CORE_MOD_VERSION); - pr_debug("CORE_HBA[%d] - Attached FILEIO HBA: %u to Generic" - " MaxSectors: %u\n", - hba->hba_id, fd_host->fd_host_id, FD_MAX_SECTORS); + pr_debug("CORE_HBA[%d] - Attached FILEIO HBA: %u to Generic\n", + hba->hba_id, fd_host->fd_host_id); return 0; } @@ -220,7 +219,8 @@ static int fd_configure_device(struct se_device *dev) } dev->dev_attrib.hw_block_size = fd_dev->fd_block_size; - dev->dev_attrib.hw_max_sectors = FD_MAX_SECTORS; + dev->dev_attrib.max_bytes_per_io = FD_MAX_BYTES; + dev->dev_attrib.hw_max_sectors = FD_MAX_BYTES / fd_dev->fd_block_size; dev->dev_attrib.hw_queue_depth = FD_MAX_DEVICE_QUEUE_DEPTH; if (fd_dev->fbd_flags & FDBD_HAS_BUFFERED_IO_WCE) { diff --git a/drivers/target/target_core_file.h b/drivers/target/target_core_file.h index 37ffc5bd239..d7772c16768 100644 --- a/drivers/target/target_core_file.h +++ b/drivers/target/target_core_file.h @@ -7,7 +7,10 @@ #define FD_DEVICE_QUEUE_DEPTH 32 #define FD_MAX_DEVICE_QUEUE_DEPTH 128 #define FD_BLOCKSIZE 512 -#define FD_MAX_SECTORS 2048 +/* + * Limited by the number of iovecs (2048) per vfs_[writev,readv] call + */ +#define FD_MAX_BYTES 8388608 #define RRF_EMULATE_CDB 0x01 #define RRF_GOT_LBA 0x02 diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index f697f8baec5..2a573de19a9 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -278,7 +278,6 @@ struct se_node_acl *core_tpg_check_initiator_node_acl( snprintf(acl->initiatorname, TRANSPORT_IQN_LEN, "%s", initiatorname); acl->se_tpg = tpg; acl->acl_index = scsi_get_new_index(SCSI_AUTH_INTR_INDEX); - spin_lock_init(&acl->stats_lock); acl->dynamic_node_acl = 1; tpg->se_tpg_tfo->set_default_node_attributes(acl); @@ -406,7 +405,6 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( snprintf(acl->initiatorname, TRANSPORT_IQN_LEN, "%s", initiatorname); acl->se_tpg = tpg; acl->acl_index = scsi_get_new_index(SCSI_AUTH_INTR_INDEX); - spin_lock_init(&acl->stats_lock); tpg->se_tpg_tfo->set_default_node_attributes(acl); @@ -658,15 +656,9 @@ static int core_tpg_setup_virtual_lun0(struct se_portal_group *se_tpg) spin_lock_init(&lun->lun_sep_lock); init_completion(&lun->lun_ref_comp); - ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release); - if (ret < 0) - return ret; - ret = core_tpg_post_addlun(se_tpg, lun, lun_access, dev); - if (ret < 0) { - percpu_ref_cancel_init(&lun->lun_ref); + if (ret < 0) return ret; - } return 0; } diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 268b62768f2..34aacaaae14 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -93,6 +93,7 @@ struct n_tty_data { size_t canon_head; size_t echo_head; size_t echo_commit; + size_t echo_mark; DECLARE_BITMAP(char_map, 256); /* private to n_tty_receive_overrun (single-threaded) */ @@ -336,6 +337,7 @@ static void reset_buffer_flags(struct n_tty_data *ldata) { ldata->read_head = ldata->canon_head = ldata->read_tail = 0; ldata->echo_head = ldata->echo_tail = ldata->echo_commit = 0; + ldata->echo_mark = 0; ldata->line_start = 0; ldata->erasing = 0; @@ -787,6 +789,7 @@ static void commit_echoes(struct tty_struct *tty) size_t head; head = ldata->echo_head; + ldata->echo_mark = head; old = ldata->echo_commit - ldata->echo_tail; /* Process committed echoes if the accumulated # of bytes @@ -811,10 +814,11 @@ static void process_echoes(struct tty_struct *tty) size_t echoed; if ((!L_ECHO(tty) && !L_ECHONL(tty)) || - ldata->echo_commit == ldata->echo_tail) + ldata->echo_mark == ldata->echo_tail) return; mutex_lock(&ldata->output_lock); + ldata->echo_commit = ldata->echo_mark; echoed = __process_echoes(tty); mutex_unlock(&ldata->output_lock); @@ -822,6 +826,7 @@ static void process_echoes(struct tty_struct *tty) tty->ops->flush_chars(tty); } +/* NB: echo_mark and echo_head should be equivalent here */ static void flush_echoes(struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index 4658e3e0ec4..06525f10e36 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -96,7 +96,8 @@ static void dw8250_serial_out(struct uart_port *p, int offset, int value) if (offset == UART_LCR) { int tries = 1000; while (tries--) { - if (value == p->serial_in(p, UART_LCR)) + unsigned int lcr = p->serial_in(p, UART_LCR); + if ((value & ~UART_LCR_SPAR) == (lcr & ~UART_LCR_SPAR)) return; dw8250_force_idle(p); writeb(value, p->membase + (UART_LCR << p->regshift)); @@ -132,7 +133,8 @@ static void dw8250_serial_out32(struct uart_port *p, int offset, int value) if (offset == UART_LCR) { int tries = 1000; while (tries--) { - if (value == p->serial_in(p, UART_LCR)) + unsigned int lcr = p->serial_in(p, UART_LCR); + if ((value & ~UART_LCR_SPAR) == (lcr & ~UART_LCR_SPAR)) return; dw8250_force_idle(p); writel(value, p->membase + (UART_LCR << p->regshift)); @@ -455,6 +457,8 @@ MODULE_DEVICE_TABLE(of, dw8250_of_match); static const struct acpi_device_id dw8250_acpi_match[] = { { "INT33C4", 0 }, { "INT33C5", 0 }, + { "INT3434", 0 }, + { "INT3435", 0 }, { "80860F0A", 0 }, { }, }; diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index e46e9f3f19b..f619ad5b5ea 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -240,6 +240,7 @@ static irqreturn_t xuartps_isr(int irq, void *dev_id) continue; } +#ifdef SUPPORT_SYSRQ /* * uart_handle_sysrq_char() doesn't work if * spinlocked, for some reason @@ -253,6 +254,7 @@ static irqreturn_t xuartps_isr(int irq, void *dev_id) } spin_lock(&port->lock); } +#endif port->icount.rx++; diff --git a/drivers/tty/tty_ldsem.c b/drivers/tty/tty_ldsem.c index 22fad8ad5ac..d8a55e87877 100644 --- a/drivers/tty/tty_ldsem.c +++ b/drivers/tty/tty_ldsem.c @@ -86,11 +86,21 @@ static inline long ldsem_atomic_update(long delta, struct ld_semaphore *sem) return atomic_long_add_return(delta, (atomic_long_t *)&sem->count); } +/* + * ldsem_cmpxchg() updates @*old with the last-known sem->count value. + * Returns 1 if count was successfully changed; @*old will have @new value. + * Returns 0 if count was not changed; @*old will have most recent sem->count + */ static inline int ldsem_cmpxchg(long *old, long new, struct ld_semaphore *sem) { - long tmp = *old; - *old = atomic_long_cmpxchg(&sem->count, *old, new); - return *old == tmp; + long tmp = atomic_long_cmpxchg(&sem->count, *old, new); + if (tmp == *old) { + *old = new; + return 1; + } else { + *old = tmp; + return 0; + } } /* diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 5d8981c5235..6e73f8cd60e 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -642,6 +642,10 @@ static int ci_hdrc_probe(struct platform_device *pdev) : CI_ROLE_GADGET; } + /* only update vbus status for peripheral */ + if (ci->role == CI_ROLE_GADGET) + ci_handle_vbus_change(ci); + ret = ci_role_start(ci, ci->role); if (ret) { dev_err(dev, "can't start %s role\n", ci_role(ci)->name); diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c index 59e6020ea75..526cd77563d 100644 --- a/drivers/usb/chipidea/host.c +++ b/drivers/usb/chipidea/host.c @@ -88,7 +88,8 @@ static int host_start(struct ci_hdrc *ci) return ret; disable_reg: - regulator_disable(ci->platdata->reg_vbus); + if (ci->platdata->reg_vbus) + regulator_disable(ci->platdata->reg_vbus); put_hcd: usb_put_hcd(hcd); diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index b34c81969cb..69d20fbb38a 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1795,9 +1795,6 @@ static int udc_start(struct ci_hdrc *ci) pm_runtime_no_callbacks(&ci->gadget.dev); pm_runtime_enable(&ci->gadget.dev); - /* Update ci->vbus_active */ - ci_handle_vbus_change(ci); - return retval; destroy_eps: diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 4d387596f3f..0b23a863931 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -854,13 +854,11 @@ static int wdm_manage_power(struct usb_interface *intf, int on) { /* need autopm_get/put here to ensure the usbcore sees the new value */ int rv = usb_autopm_get_interface(intf); - if (rv < 0) - goto err; intf->needs_remote_wakeup = on; - usb_autopm_put_interface(intf); -err: - return rv; + if (!rv) + usb_autopm_put_interface(intf); + return 0; } static int wdm_probe(struct usb_interface *intf, const struct usb_device_id *id) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 74f9cf02da0..a49217ae353 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -455,9 +455,6 @@ static int dwc3_probe(struct platform_device *pdev) if (IS_ERR(regs)) return PTR_ERR(regs); - usb_phy_set_suspend(dwc->usb2_phy, 0); - usb_phy_set_suspend(dwc->usb3_phy, 0); - spin_lock_init(&dwc->lock); platform_set_drvdata(pdev, dwc); @@ -488,6 +485,9 @@ static int dwc3_probe(struct platform_device *pdev) goto err0; } + usb_phy_set_suspend(dwc->usb2_phy, 0); + usb_phy_set_suspend(dwc->usb3_phy, 0); + ret = dwc3_event_buffers_setup(dwc); if (ret) { dev_err(dwc->dev, "failed to setup event buffers\n"); @@ -569,6 +569,8 @@ err2: dwc3_event_buffers_cleanup(dwc); err1: + usb_phy_set_suspend(dwc->usb2_phy, 1); + usb_phy_set_suspend(dwc->usb3_phy, 1); dwc3_core_exit(dwc); err0: diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c index 418444ebb1b..8c356af7940 100644 --- a/drivers/usb/host/ohci-at91.c +++ b/drivers/usb/host/ohci-at91.c @@ -136,23 +136,27 @@ static int usb_hcd_at91_probe(const struct hc_driver *driver, struct ohci_hcd *ohci; int retval; struct usb_hcd *hcd = NULL; - - if (pdev->num_resources != 2) { - pr_debug("hcd probe: invalid num_resources"); - return -ENODEV; + struct device *dev = &pdev->dev; + struct resource *res; + int irq; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_dbg(dev, "hcd probe: missing memory resource\n"); + return -ENXIO; } - if ((pdev->resource[0].flags != IORESOURCE_MEM) - || (pdev->resource[1].flags != IORESOURCE_IRQ)) { - pr_debug("hcd probe: invalid resource type\n"); - return -ENODEV; + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_dbg(dev, "hcd probe: missing irq resource\n"); + return irq; } hcd = usb_create_hcd(driver, &pdev->dev, "at91"); if (!hcd) return -ENOMEM; - hcd->rsrc_start = pdev->resource[0].start; - hcd->rsrc_len = resource_size(&pdev->resource[0]); + hcd->rsrc_start = res->start; + hcd->rsrc_len = resource_size(res); if (!request_mem_region(hcd->rsrc_start, hcd->rsrc_len, hcd_name)) { pr_debug("request_mem_region failed\n"); @@ -199,7 +203,7 @@ static int usb_hcd_at91_probe(const struct hc_driver *driver, ohci->num_ports = board->ports; at91_start_hc(pdev); - retval = usb_add_hcd(hcd, pdev->resource[1].start, IRQF_SHARED); + retval = usb_add_hcd(hcd, irq, IRQF_SHARED); if (retval == 0) return retval; diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index b8dffd59eb2..73f5208714a 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -128,7 +128,12 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) * any other sleep) on Haswell machines with LPT and LPT-LP * with the new Intel BIOS */ - xhci->quirks |= XHCI_SPURIOUS_WAKEUP; + /* Limit the quirk to only known vendors, as this triggers + * yet another BIOS bug on some other machines + * https://bugzilla.kernel.org/show_bug.cgi?id=66171 + */ + if (pdev->subsystem_vendor == PCI_VENDOR_ID_HP) + xhci->quirks |= XHCI_SPURIOUS_WAKEUP; } if (pdev->vendor == PCI_VENDOR_ID_ETRON && pdev->device == PCI_DEVICE_ID_ASROCK_P67) { diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index 08e2f39027e..2b41c636a52 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig @@ -19,8 +19,9 @@ config AB8500_USB in host mode, low speed. config FSL_USB2_OTG - bool "Freescale USB OTG Transceiver Driver" + tristate "Freescale USB OTG Transceiver Driver" depends on USB_EHCI_FSL && USB_FSL_USB2 && PM_RUNTIME + depends on USB select USB_OTG select USB_PHY help @@ -29,6 +30,7 @@ config FSL_USB2_OTG config ISP1301_OMAP tristate "Philips ISP1301 with OMAP OTG" depends on I2C && ARCH_OMAP_OTG + depends on USB select USB_PHY help If you say yes here you get support for the Philips ISP1301 diff --git a/drivers/usb/phy/phy-tegra-usb.c b/drivers/usb/phy/phy-tegra-usb.c index 82232acf1ab..bbe4f8e6e8d 100644 --- a/drivers/usb/phy/phy-tegra-usb.c +++ b/drivers/usb/phy/phy-tegra-usb.c @@ -876,7 +876,7 @@ static int utmi_phy_probe(struct tegra_usb_phy *tegra_phy, tegra_phy->pad_regs = devm_ioremap(&pdev->dev, res->start, resource_size(res)); - if (!tegra_phy->regs) { + if (!tegra_phy->pad_regs) { dev_err(&pdev->dev, "Failed to remap UTMI Pad regs\n"); return -ENOMEM; } diff --git a/drivers/usb/phy/phy-twl6030-usb.c b/drivers/usb/phy/phy-twl6030-usb.c index 30e8a61552d..bad57ce77ba 100644 --- a/drivers/usb/phy/phy-twl6030-usb.c +++ b/drivers/usb/phy/phy-twl6030-usb.c @@ -127,7 +127,8 @@ static inline int twl6030_writeb(struct twl6030_usb *twl, u8 module, static inline u8 twl6030_readb(struct twl6030_usb *twl, u8 module, u8 address) { - u8 data, ret = 0; + u8 data; + int ret; ret = twl_i2c_read_u8(module, &data, address); if (ret >= 0) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 496b7e39d5b..cc7a2415449 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -251,6 +251,7 @@ static void option_instat_callback(struct urb *urb); #define ZTE_PRODUCT_MF628 0x0015 #define ZTE_PRODUCT_MF626 0x0031 #define ZTE_PRODUCT_MC2718 0xffe8 +#define ZTE_PRODUCT_AC2726 0xfff1 #define BENQ_VENDOR_ID 0x04a5 #define BENQ_PRODUCT_H10 0x4068 @@ -1453,6 +1454,7 @@ static const struct usb_device_id option_ids[] = { { USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x02, 0x01) }, { USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x02, 0x05) }, { USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x86, 0x10) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_AC2726, 0xff, 0xff, 0xff) }, { USB_DEVICE(BENQ_VENDOR_ID, BENQ_PRODUCT_H10) }, { USB_DEVICE(DLINK_VENDOR_ID, DLINK_PRODUCT_DWM_652) }, diff --git a/drivers/usb/serial/zte_ev.c b/drivers/usb/serial/zte_ev.c index fca4c752a4e..eae2c873b39 100644 --- a/drivers/usb/serial/zte_ev.c +++ b/drivers/usb/serial/zte_ev.c @@ -281,8 +281,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x19d2, 0xfffd) }, { USB_DEVICE(0x19d2, 0xfffc) }, { USB_DEVICE(0x19d2, 0xfffb) }, - /* AC2726, AC8710_V3 */ - { USB_DEVICE_AND_INTERFACE_INFO(0x19d2, 0xfff1, 0xff, 0xff, 0xff) }, + /* AC8710_V3 */ { USB_DEVICE(0x19d2, 0xfff6) }, { USB_DEVICE(0x19d2, 0xfff7) }, { USB_DEVICE(0x19d2, 0xfff8) }, diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index c444654fc33..5c4a95b516c 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -285,7 +285,7 @@ static void update_balloon_size(struct virtio_balloon *vb) { __le32 actual = cpu_to_le32(vb->num_pages); - virtio_cwrite(vb->vdev, struct virtio_balloon_config, num_pages, + virtio_cwrite(vb->vdev, struct virtio_balloon_config, actual, &actual); } diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 55ea73f7c70..4c02e2b9410 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -350,17 +350,19 @@ static enum bp_state increase_reservation(unsigned long nr_pages) pfn = page_to_pfn(page); - set_phys_to_machine(pfn, frame_list[i]); - #ifdef CONFIG_XEN_HAVE_PVMMU - /* Link back into the page tables if not highmem. */ - if (xen_pv_domain() && !PageHighMem(page)) { - int ret; - ret = HYPERVISOR_update_va_mapping( - (unsigned long)__va(pfn << PAGE_SHIFT), - mfn_pte(frame_list[i], PAGE_KERNEL), - 0); - BUG_ON(ret); + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + set_phys_to_machine(pfn, frame_list[i]); + + /* Link back into the page tables if not highmem. */ + if (!PageHighMem(page)) { + int ret; + ret = HYPERVISOR_update_va_mapping( + (unsigned long)__va(pfn << PAGE_SHIFT), + mfn_pte(frame_list[i], PAGE_KERNEL), + 0); + BUG_ON(ret); + } } #endif @@ -378,7 +380,6 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) enum bp_state state = BP_DONE; unsigned long pfn, i; struct page *page; - struct page *scratch_page; int ret; struct xen_memory_reservation reservation = { .address_bits = 0, @@ -411,27 +412,29 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) scrub_page(page); +#ifdef CONFIG_XEN_HAVE_PVMMU /* * Ballooned out frames are effectively replaced with * a scratch frame. Ensure direct mappings and the * p2m are consistent. */ - scratch_page = get_balloon_scratch_page(); -#ifdef CONFIG_XEN_HAVE_PVMMU - if (xen_pv_domain() && !PageHighMem(page)) { - ret = HYPERVISOR_update_va_mapping( - (unsigned long)__va(pfn << PAGE_SHIFT), - pfn_pte(page_to_pfn(scratch_page), - PAGE_KERNEL_RO), 0); - BUG_ON(ret); - } -#endif if (!xen_feature(XENFEAT_auto_translated_physmap)) { unsigned long p; + struct page *scratch_page = get_balloon_scratch_page(); + + if (!PageHighMem(page)) { + ret = HYPERVISOR_update_va_mapping( + (unsigned long)__va(pfn << PAGE_SHIFT), + pfn_pte(page_to_pfn(scratch_page), + PAGE_KERNEL_RO), 0); + BUG_ON(ret); + } p = page_to_pfn(scratch_page); __set_phys_to_machine(pfn, pfn_to_mfn(p)); + + put_balloon_scratch_page(); } - put_balloon_scratch_page(); +#endif balloon_append(pfn_to_page(pfn)); } @@ -627,15 +630,17 @@ static int __init balloon_init(void) if (!xen_domain()) return -ENODEV; - for_each_online_cpu(cpu) - { - per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL); - if (per_cpu(balloon_scratch_page, cpu) == NULL) { - pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu); - return -ENOMEM; + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + for_each_online_cpu(cpu) + { + per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL); + if (per_cpu(balloon_scratch_page, cpu) == NULL) { + pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu); + return -ENOMEM; + } } + register_cpu_notifier(&balloon_cpu_notifier); } - register_cpu_notifier(&balloon_cpu_notifier); pr_info("Initialising balloon driver\n"); diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 028387192b6..aa846a48f40 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -1176,7 +1176,8 @@ static int gnttab_setup(void) gnttab_shared.addr = xen_remap(xen_hvm_resume_frames, PAGE_SIZE * max_nr_gframes); if (gnttab_shared.addr == NULL) { - pr_warn("Failed to ioremap gnttab share frames!\n"); + pr_warn("Failed to ioremap gnttab share frames (addr=0x%08lx)!\n", + xen_hvm_resume_frames); return -ENOMEM; } } diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 8e74590fa1b..569a13b9e85 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -533,12 +533,17 @@ static void privcmd_close(struct vm_area_struct *vma) { struct page **pages = vma->vm_private_data; int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + int rc; if (!xen_feature(XENFEAT_auto_translated_physmap) || !numpgs || !pages) return; - xen_unmap_domain_mfn_range(vma, numpgs, pages); - free_xenballooned_pages(numpgs, pages); + rc = xen_unmap_domain_mfn_range(vma, numpgs, pages); + if (rc == 0) + free_xenballooned_pages(numpgs, pages); + else + pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n", + numpgs, rc); kfree(pages); } @@ -244,9 +244,14 @@ static void aio_free_ring(struct kioctx *ctx) int i; for (i = 0; i < ctx->nr_pages; i++) { + struct page *page; pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i, page_count(ctx->ring_pages[i])); - put_page(ctx->ring_pages[i]); + page = ctx->ring_pages[i]; + if (!page) + continue; + ctx->ring_pages[i] = NULL; + put_page(page); } put_aio_ring_file(ctx); @@ -280,18 +285,38 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, unsigned long flags; int rc; + rc = 0; + + /* Make sure the old page hasn't already been changed */ + spin_lock(&mapping->private_lock); + ctx = mapping->private_data; + if (ctx) { + pgoff_t idx; + spin_lock_irqsave(&ctx->completion_lock, flags); + idx = old->index; + if (idx < (pgoff_t)ctx->nr_pages) { + if (ctx->ring_pages[idx] != old) + rc = -EAGAIN; + } else + rc = -EINVAL; + spin_unlock_irqrestore(&ctx->completion_lock, flags); + } else + rc = -EINVAL; + spin_unlock(&mapping->private_lock); + + if (rc != 0) + return rc; + /* Writeback must be complete */ BUG_ON(PageWriteback(old)); - put_page(old); + get_page(new); - rc = migrate_page_move_mapping(mapping, new, old, NULL, mode); + rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1); if (rc != MIGRATEPAGE_SUCCESS) { - get_page(old); + put_page(new); return rc; } - get_page(new); - /* We can potentially race against kioctx teardown here. Use the * address_space's private data lock to protect the mapping's * private_data. @@ -303,13 +328,24 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, spin_lock_irqsave(&ctx->completion_lock, flags); migrate_page_copy(new, old); idx = old->index; - if (idx < (pgoff_t)ctx->nr_pages) - ctx->ring_pages[idx] = new; + if (idx < (pgoff_t)ctx->nr_pages) { + /* And only do the move if things haven't changed */ + if (ctx->ring_pages[idx] == old) + ctx->ring_pages[idx] = new; + else + rc = -EAGAIN; + } else + rc = -EINVAL; spin_unlock_irqrestore(&ctx->completion_lock, flags); } else rc = -EBUSY; spin_unlock(&mapping->private_lock); + if (rc == MIGRATEPAGE_SUCCESS) + put_page(old); + else + put_page(new); + return rc; } #endif @@ -326,7 +362,7 @@ static int aio_setup_ring(struct kioctx *ctx) struct aio_ring *ring; unsigned nr_events = ctx->max_reqs; struct mm_struct *mm = current->mm; - unsigned long size, populate; + unsigned long size, unused; int nr_pages; int i; struct file *file; @@ -347,6 +383,20 @@ static int aio_setup_ring(struct kioctx *ctx) return -EAGAIN; } + ctx->aio_ring_file = file; + nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) + / sizeof(struct io_event); + + ctx->ring_pages = ctx->internal_pages; + if (nr_pages > AIO_RING_PAGES) { + ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), + GFP_KERNEL); + if (!ctx->ring_pages) { + put_aio_ring_file(ctx); + return -ENOMEM; + } + } + for (i = 0; i < nr_pages; i++) { struct page *page; page = find_or_create_page(file->f_inode->i_mapping, @@ -358,19 +408,14 @@ static int aio_setup_ring(struct kioctx *ctx) SetPageUptodate(page); SetPageDirty(page); unlock_page(page); + + ctx->ring_pages[i] = page; } - ctx->aio_ring_file = file; - nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) - / sizeof(struct io_event); + ctx->nr_pages = i; - ctx->ring_pages = ctx->internal_pages; - if (nr_pages > AIO_RING_PAGES) { - ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), - GFP_KERNEL); - if (!ctx->ring_pages) { - put_aio_ring_file(ctx); - return -ENOMEM; - } + if (unlikely(i != nr_pages)) { + aio_free_ring(ctx); + return -EAGAIN; } ctx->mmap_size = nr_pages * PAGE_SIZE; @@ -379,9 +424,9 @@ static int aio_setup_ring(struct kioctx *ctx) down_write(&mm->mmap_sem); ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, 0, &populate); + MAP_SHARED, 0, &unused); + up_write(&mm->mmap_sem); if (IS_ERR((void *)ctx->mmap_base)) { - up_write(&mm->mmap_sem); ctx->mmap_size = 0; aio_free_ring(ctx); return -EAGAIN; @@ -389,27 +434,6 @@ static int aio_setup_ring(struct kioctx *ctx) pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base); - /* We must do this while still holding mmap_sem for write, as we - * need to be protected against userspace attempting to mremap() - * or munmap() the ring buffer. - */ - ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages, - 1, 0, ctx->ring_pages, NULL); - - /* Dropping the reference here is safe as the page cache will hold - * onto the pages for us. It is also required so that page migration - * can unmap the pages and get the right reference count. - */ - for (i = 0; i < ctx->nr_pages; i++) - put_page(ctx->ring_pages[i]); - - up_write(&mm->mmap_sem); - - if (unlikely(ctx->nr_pages != nr_pages)) { - aio_free_ring(ctx); - return -EAGAIN; - } - ctx->user_id = ctx->mmap_base; ctx->nr_events = nr_events; /* trusted copy */ @@ -652,7 +676,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) aio_nr += ctx->max_reqs; spin_unlock(&aio_nr_lock); - percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */ + percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */ + percpu_ref_get(&ctx->reqs); /* free_ioctx_users() will drop this */ err = ioctx_add_table(ctx, mm); if (err) diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 288534920fe..20d6697bd63 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1493,6 +1493,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, sb->s_blocksize - offset : towrite; tmp_bh.b_state = 0; + tmp_bh.b_size = sb->s_blocksize; err = ext2_get_block(inode, blk, &tmp_bh, 1); if (err < 0) goto out; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index e6185031c1c..ece55565b9c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -268,6 +268,16 @@ struct ext4_io_submit { /* Translate # of blks to # of clusters */ #define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \ (sbi)->s_cluster_bits) +/* Mask out the low bits to get the starting block of the cluster */ +#define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \ + ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) +#define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \ + ~((ext4_lblk_t) (s)->s_cluster_ratio - 1)) +/* Get the cluster offset */ +#define EXT4_PBLK_COFF(s, pblk) ((pblk) & \ + ((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) +#define EXT4_LBLK_COFF(s, lblk) ((lblk) & \ + ((ext4_lblk_t) (s)->s_cluster_ratio - 1)) /* * Structure of a blocks group descriptor diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 17ac112ab10..3fe29de832c 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -259,6 +259,15 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, if (WARN_ON_ONCE(err)) { ext4_journal_abort_handle(where, line, __func__, bh, handle, err); + ext4_error_inode(inode, where, line, + bh->b_blocknr, + "journal_dirty_metadata failed: " + "handle type %u started at line %u, " + "credits %u/%u, errcode %d", + handle->h_type, + handle->h_line_no, + handle->h_requested_credits, + handle->h_buffer_credits, err); } } else { if (inode) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 35f65cf4f31..4410cc3d6ee 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -360,8 +360,10 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) { ext4_fsblk_t block = ext4_ext_pblock(ext); int len = ext4_ext_get_actual_len(ext); + ext4_lblk_t lblock = le32_to_cpu(ext->ee_block); + ext4_lblk_t last = lblock + len - 1; - if (len == 0) + if (lblock > last) return 0; return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); } @@ -387,11 +389,26 @@ static int ext4_valid_extent_entries(struct inode *inode, if (depth == 0) { /* leaf entries */ struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); + struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + ext4_fsblk_t pblock = 0; + ext4_lblk_t lblock = 0; + ext4_lblk_t prev = 0; + int len = 0; while (entries) { if (!ext4_valid_extent(inode, ext)) return 0; + + /* Check for overlapping extents */ + lblock = le32_to_cpu(ext->ee_block); + len = ext4_ext_get_actual_len(ext); + if ((lblock <= prev) && prev) { + pblock = ext4_ext_pblock(ext); + es->s_last_error_block = cpu_to_le64(pblock); + return 0; + } ext++; entries--; + prev = lblock + len - 1; } } else { struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); @@ -1834,8 +1851,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi, depth = ext_depth(inode); if (!path[depth].p_ext) goto out; - b2 = le32_to_cpu(path[depth].p_ext->ee_block); - b2 &= ~(sbi->s_cluster_ratio - 1); + b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block)); /* * get the next allocated block if the extent in the path @@ -1845,7 +1861,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi, b2 = ext4_ext_next_allocated_block(path); if (b2 == EXT_MAX_BLOCKS) goto out; - b2 &= ~(sbi->s_cluster_ratio - 1); + b2 = EXT4_LBLK_CMASK(sbi, b2); } /* check for wrap through zero on extent logical start block*/ @@ -2504,7 +2520,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, * extent, we have to mark the cluster as used (store negative * cluster number in partial_cluster). */ - unaligned = pblk & (sbi->s_cluster_ratio - 1); + unaligned = EXT4_PBLK_COFF(sbi, pblk); if (unaligned && (ee_len == num) && (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk)))) *partial_cluster = EXT4_B2C(sbi, pblk); @@ -2598,7 +2614,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, * accidentally freeing it later on */ pblk = ext4_ext_pblock(ex); - if (pblk & (sbi->s_cluster_ratio - 1)) + if (EXT4_PBLK_COFF(sbi, pblk)) *partial_cluster = -((long long)EXT4_B2C(sbi, pblk)); ex--; @@ -3753,7 +3769,7 @@ int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); ext4_lblk_t lblk_start, lblk_end; - lblk_start = lblk & (~(sbi->s_cluster_ratio - 1)); + lblk_start = EXT4_LBLK_CMASK(sbi, lblk); lblk_end = lblk_start + sbi->s_cluster_ratio - 1; return ext4_find_delalloc_range(inode, lblk_start, lblk_end); @@ -3812,9 +3828,9 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks); /* Check towards left side */ - c_offset = lblk_start & (sbi->s_cluster_ratio - 1); + c_offset = EXT4_LBLK_COFF(sbi, lblk_start); if (c_offset) { - lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1)); + lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start); lblk_to = lblk_from + c_offset - 1; if (ext4_find_delalloc_range(inode, lblk_from, lblk_to)) @@ -3822,7 +3838,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, } /* Now check towards right. */ - c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1); + c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks); if (allocated_clusters && c_offset) { lblk_from = lblk_start + num_blks; lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1; @@ -4030,7 +4046,7 @@ static int get_implied_cluster_alloc(struct super_block *sb, struct ext4_ext_path *path) { struct ext4_sb_info *sbi = EXT4_SB(sb); - ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); + ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk); ext4_lblk_t ex_cluster_start, ex_cluster_end; ext4_lblk_t rr_cluster_start; ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); @@ -4048,8 +4064,7 @@ static int get_implied_cluster_alloc(struct super_block *sb, (rr_cluster_start == ex_cluster_start)) { if (rr_cluster_start == ex_cluster_end) ee_start += ee_len - 1; - map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) + - c_offset; + map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset; map->m_len = min(map->m_len, (unsigned) sbi->s_cluster_ratio - c_offset); /* @@ -4203,7 +4218,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, */ map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; newex.ee_block = cpu_to_le32(map->m_lblk); - cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1); + cluster_offset = EXT4_LBLK_CMASK(sbi, map->m_lblk); /* * If we are doing bigalloc, check to see if the extent returned @@ -4271,7 +4286,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * needed so that future calls to get_implied_cluster_alloc() * work correctly. */ - offset = map->m_lblk & (sbi->s_cluster_ratio - 1); + offset = EXT4_LBLK_COFF(sbi, map->m_lblk); ar.len = EXT4_NUM_B2C(sbi, offset+allocated); ar.goal -= offset; ar.logical -= offset; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 07576347411..61d49ff22c8 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1206,7 +1206,6 @@ static int ext4_journalled_write_end(struct file *file, */ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) { - int retries = 0; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); unsigned int md_needed; @@ -1218,7 +1217,6 @@ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) * in order to allocate nrblocks * worse case is one extent per block */ -repeat: spin_lock(&ei->i_block_reservation_lock); /* * ext4_calc_metadata_amount() has side effects, which we have @@ -1238,10 +1236,6 @@ repeat: ei->i_da_metadata_calc_len = save_len; ei->i_da_metadata_calc_last_lblock = save_last_lblock; spin_unlock(&ei->i_block_reservation_lock); - if (ext4_should_retry_alloc(inode->i_sb, &retries)) { - cond_resched(); - goto repeat; - } return -ENOSPC; } ei->i_reserved_meta_blocks += md_needed; @@ -1255,7 +1249,6 @@ repeat: */ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) { - int retries = 0; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); unsigned int md_needed; @@ -1277,7 +1270,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) * in order to allocate nrblocks * worse case is one extent per block */ -repeat: spin_lock(&ei->i_block_reservation_lock); /* * ext4_calc_metadata_amount() has side effects, which we have @@ -1297,10 +1289,6 @@ repeat: ei->i_da_metadata_calc_len = save_len; ei->i_da_metadata_calc_last_lblock = save_last_lblock; spin_unlock(&ei->i_block_reservation_lock); - if (ext4_should_retry_alloc(inode->i_sb, &retries)) { - cond_resched(); - goto repeat; - } dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); return -ENOSPC; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 4d113efa024..04a5c7504be 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3442,6 +3442,9 @@ static void ext4_mb_pa_callback(struct rcu_head *head) { struct ext4_prealloc_space *pa; pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu); + + BUG_ON(atomic_read(&pa->pa_count)); + BUG_ON(pa->pa_deleted == 0); kmem_cache_free(ext4_pspace_cachep, pa); } @@ -3455,11 +3458,13 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac, ext4_group_t grp; ext4_fsblk_t grp_blk; - if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) - return; - /* in this short window concurrent discard can set pa_deleted */ spin_lock(&pa->pa_lock); + if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) { + spin_unlock(&pa->pa_lock); + return; + } + if (pa->pa_deleted == 1) { spin_unlock(&pa->pa_lock); return; @@ -4121,7 +4126,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, ext4_get_group_no_and_offset(sb, goal, &group, &block); /* set up allocation goals */ - ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); + ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical); ac->ac_status = AC_STATUS_CONTINUE; ac->ac_sb = sb; ac->ac_inode = ar->inode; @@ -4663,7 +4668,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, * blocks at the beginning or the end unless we are explicitly * requested to avoid doing so. */ - overflow = block & (sbi->s_cluster_ratio - 1); + overflow = EXT4_PBLK_COFF(sbi, block); if (overflow) { if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { overflow = sbi->s_cluster_ratio - overflow; @@ -4677,7 +4682,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, count += overflow; } } - overflow = count & (sbi->s_cluster_ratio - 1); + overflow = EXT4_LBLK_COFF(sbi, count); if (overflow) { if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { if (count > overflow) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c977f4e4e63..1f7784de05b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -792,7 +792,7 @@ static void ext4_put_super(struct super_block *sb) } ext4_es_unregister_shrinker(sbi); - del_timer(&sbi->s_err_report); + del_timer_sync(&sbi->s_err_report); ext4_release_system_zone(sb); ext4_mb_release(sb); ext4_ext_release(sb); @@ -3316,11 +3316,19 @@ int ext4_calculate_overhead(struct super_block *sb) } -static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi) +static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb) { ext4_fsblk_t resv_clusters; /* + * There's no need to reserve anything when we aren't using extents. + * The space estimates are exact, there are no unwritten extents, + * hole punching doesn't need new metadata... This is needed especially + * to keep ext2/3 backward compatibility. + */ + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) + return 0; + /* * By default we reserve 2% or 4096 clusters, whichever is smaller. * This should cover the situations where we can not afford to run * out of space like for example punch hole, or converting @@ -3328,7 +3336,8 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi) * allocation would require 1, or 2 blocks, higher numbers are * very rare. */ - resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits; + resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >> + EXT4_SB(sb)->s_cluster_bits; do_div(resv_clusters, 50); resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096); @@ -4071,10 +4080,10 @@ no_journal: "available"); } - err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi)); + err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb)); if (err) { ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " - "reserved pool", ext4_calculate_resv_clusters(sbi)); + "reserved pool", ext4_calculate_resv_clusters(sb)); goto failed_mount4a; } @@ -4184,7 +4193,7 @@ failed_mount_wq: } failed_mount3: ext4_es_unregister_shrinker(sbi); - del_timer(&sbi->s_err_report); + del_timer_sync(&sbi->s_err_report); if (sbi->s_flex_groups) ext4_kvfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeclusters_counter); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 52032647dd4..5fa344afb49 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -702,7 +702,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) read_lock(&journal->j_state_lock); #ifdef CONFIG_JBD2_DEBUG if (!tid_geq(journal->j_commit_request, tid)) { - printk(KERN_EMERG + printk(KERN_ERR "%s: error: j_commit_request=%d, tid=%d\n", __func__, journal->j_commit_request, tid); } @@ -718,10 +718,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) } read_unlock(&journal->j_state_lock); - if (unlikely(is_journal_aborted(journal))) { - printk(KERN_EMERG "journal commit I/O error\n"); + if (unlikely(is_journal_aborted(journal))) err = -EIO; - } return err; } @@ -1527,13 +1525,13 @@ static int journal_get_superblock(journal_t *journal) if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { /* Can't have checksum v1 and v2 on at the same time! */ - printk(KERN_ERR "JBD: Can't enable checksumming v1 and v2 " + printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 " "at the same time!\n"); goto out; } if (!jbd2_verify_csum_type(journal, sb)) { - printk(KERN_ERR "JBD: Unknown checksum type\n"); + printk(KERN_ERR "JBD2: Unknown checksum type\n"); goto out; } @@ -1541,7 +1539,7 @@ static int journal_get_superblock(journal_t *journal) if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); if (IS_ERR(journal->j_chksum_driver)) { - printk(KERN_ERR "JBD: Cannot load crc32c driver.\n"); + printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); err = PTR_ERR(journal->j_chksum_driver); journal->j_chksum_driver = NULL; goto out; @@ -1550,7 +1548,7 @@ static int journal_get_superblock(journal_t *journal) /* Check superblock checksum */ if (!jbd2_superblock_csum_verify(journal, sb)) { - printk(KERN_ERR "JBD: journal checksum error\n"); + printk(KERN_ERR "JBD2: journal checksum error\n"); goto out; } @@ -1836,7 +1834,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); if (IS_ERR(journal->j_chksum_driver)) { - printk(KERN_ERR "JBD: Cannot load crc32c " + printk(KERN_ERR "JBD2: Cannot load crc32c " "driver.\n"); journal->j_chksum_driver = NULL; return 0; @@ -2645,7 +2643,7 @@ static void __exit journal_exit(void) #ifdef CONFIG_JBD2_DEBUG int n = atomic_read(&nr_journal_heads); if (n) - printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n); + printk(KERN_ERR "JBD2: leaked %d journal_heads!\n", n); #endif jbd2_remove_jbd_stats_proc_entry(); jbd2_journal_destroy_caches(); diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 3929c50428b..3b6bb19d60b 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -594,7 +594,7 @@ static int do_one_pass(journal_t *journal, be32_to_cpu(tmp->h_sequence))) { brelse(obh); success = -EIO; - printk(KERN_ERR "JBD: Invalid " + printk(KERN_ERR "JBD2: Invalid " "checksum recovering " "block %llu in log\n", blocknr); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 7aa9a32573b..8360674c85b 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -932,7 +932,7 @@ repeat: jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!frozen_buffer) { - printk(KERN_EMERG + printk(KERN_ERR "%s: OOM for frozen_buffer\n", __func__); JBUFFER_TRACE(jh, "oom!"); @@ -1166,7 +1166,7 @@ repeat: if (!jh->b_committed_data) { committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!committed_data) { - printk(KERN_EMERG "%s: No memory for committed data\n", + printk(KERN_ERR "%s: No memory for committed data\n", __func__); err = -ENOMEM; goto out; @@ -1290,7 +1290,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) * once a transaction -bzzz */ jh->b_modified = 1; - J_ASSERT_JH(jh, handle->h_buffer_credits > 0); + if (handle->h_buffer_credits <= 0) { + ret = -ENOSPC; + goto out_unlock_bh; + } handle->h_buffer_credits--; } @@ -1305,7 +1308,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) JBUFFER_TRACE(jh, "fastpath"); if (unlikely(jh->b_transaction != journal->j_running_transaction)) { - printk(KERN_EMERG "JBD: %s: " + printk(KERN_ERR "JBD2: %s: " "jh->b_transaction (%llu, %p, %u) != " "journal->j_running_transaction (%p, %u)", journal->j_devname, @@ -1332,7 +1335,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) JBUFFER_TRACE(jh, "already on other transaction"); if (unlikely(jh->b_transaction != journal->j_committing_transaction)) { - printk(KERN_EMERG "JBD: %s: " + printk(KERN_ERR "JBD2: %s: " "jh->b_transaction (%llu, %p, %u) != " "journal->j_committing_transaction (%p, %u)", journal->j_devname, @@ -1345,7 +1348,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) ret = -EINVAL; } if (unlikely(jh->b_next_transaction != transaction)) { - printk(KERN_EMERG "JBD: %s: " + printk(KERN_ERR "JBD2: %s: " "jh->b_next_transaction (%llu, %p, %u) != " "transaction (%p, %u)", journal->j_devname, @@ -1373,7 +1376,6 @@ out_unlock_bh: jbd2_journal_put_journal_head(jh); out: JBUFFER_TRACE(jh, "exit"); - WARN_ON(ret); /* All errors are bugs, so dump the stack */ return ret; } diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index b8e93a40a5d..78c3c209778 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -443,8 +443,11 @@ int pstore_register(struct pstore_info *psi) pstore_get_records(0); kmsg_dump_register(&pstore_dumper); - pstore_register_console(); - pstore_register_ftrace(); + + if ((psi->flags & PSTORE_FLAGS_FRAGILE) == 0) { + pstore_register_console(); + pstore_register_ftrace(); + } if (pstore_update_ms >= 0) { pstore_timer.expires = jiffies + diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index b94f9368509..35e7d08fe62 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -609,7 +609,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file) struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; struct sysfs_open_file *of; - bool has_read, has_write, has_mmap; + bool has_read, has_write; int error = -EACCES; /* need attr_sd for attr and ops, its parent for kobj */ @@ -621,7 +621,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file) has_read = battr->read || battr->mmap; has_write = battr->write || battr->mmap; - has_mmap = battr->mmap; } else { const struct sysfs_ops *ops = sysfs_file_ops(attr_sd); @@ -633,7 +632,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file) has_read = ops->show; has_write = ops->store; - has_mmap = false; } /* check perms and supported operations */ @@ -661,9 +659,9 @@ static int sysfs_open_file(struct inode *inode, struct file *file) * open file has a separate mutex, it's okay as long as those don't * happen on the same file. At this point, we can't easily give * each file a separate locking class. Let's differentiate on - * whether the file has mmap or not for now. + * whether the file is bin or not for now. */ - if (has_mmap) + if (sysfs_is_bin(attr_sd)) mutex_init(&of->mutex); else mutex_init(&of->mutex); diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 3ef11b22e75..3b2c14b6f0f 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -1635,7 +1635,7 @@ xfs_bmap_last_extent( * blocks at the end of the file which do not start at the previous data block, * we will try to align the new blocks at stripe unit boundaries. * - * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be + * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be * at, or past the EOF. */ STATIC int @@ -1650,9 +1650,14 @@ xfs_bmap_isaeof( bma->aeof = 0; error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, &is_empty); - if (error || is_empty) + if (error) return error; + if (is_empty) { + bma->aeof = 1; + return 0; + } + /* * Check if we are allocation or past the last extent, or at least into * the last delayed allocated extent. @@ -3643,10 +3648,19 @@ xfs_bmap_btalloc( int isaligned; int tryagain; int error; + int stripe_align; ASSERT(ap->length); mp = ap->ip->i_mount; + + /* stripe alignment for allocation is determined by mount parameters */ + stripe_align = 0; + if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC)) + stripe_align = mp->m_swidth; + else if (mp->m_dalign) + stripe_align = mp->m_dalign; + align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; if (unlikely(align)) { error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, @@ -3655,6 +3669,8 @@ xfs_bmap_btalloc( ASSERT(!error); ASSERT(ap->length); } + + nullfb = *ap->firstblock == NULLFSBLOCK; fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); if (nullfb) { @@ -3730,7 +3746,7 @@ xfs_bmap_btalloc( */ if (!ap->flist->xbf_low && ap->aeof) { if (!ap->offset) { - args.alignment = mp->m_dalign; + args.alignment = stripe_align; atype = args.type; isaligned = 1; /* @@ -3755,13 +3771,13 @@ xfs_bmap_btalloc( * of minlen+alignment+slop doesn't go up * between the calls. */ - if (blen > mp->m_dalign && blen <= args.maxlen) - nextminlen = blen - mp->m_dalign; + if (blen > stripe_align && blen <= args.maxlen) + nextminlen = blen - stripe_align; else nextminlen = args.minlen; - if (nextminlen + mp->m_dalign > args.minlen + 1) + if (nextminlen + stripe_align > args.minlen + 1) args.minalignslop = - nextminlen + mp->m_dalign - + nextminlen + stripe_align - args.minlen - 1; else args.minalignslop = 0; @@ -3783,7 +3799,7 @@ xfs_bmap_btalloc( */ args.type = atype; args.fsbno = ap->blkno; - args.alignment = mp->m_dalign; + args.alignment = stripe_align; args.minlen = nextminlen; args.minalignslop = 0; isaligned = 1; diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 5887e41c032..1394106ed22 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1187,7 +1187,12 @@ xfs_zero_remaining_bytes( XFS_BUF_UNWRITE(bp); XFS_BUF_READ(bp); XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); - xfsbdstrat(mp, bp); + + if (XFS_FORCED_SHUTDOWN(mp)) { + error = XFS_ERROR(EIO); + break; + } + xfs_buf_iorequest(bp); error = xfs_buf_iowait(bp); if (error) { xfs_buf_ioerror_alert(bp, @@ -1200,7 +1205,12 @@ xfs_zero_remaining_bytes( XFS_BUF_UNDONE(bp); XFS_BUF_UNREAD(bp); XFS_BUF_WRITE(bp); - xfsbdstrat(mp, bp); + + if (XFS_FORCED_SHUTDOWN(mp)) { + error = XFS_ERROR(EIO); + break; + } + xfs_buf_iorequest(bp); error = xfs_buf_iowait(bp); if (error) { xfs_buf_ioerror_alert(bp, diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index c7f0b77dcb0..afe7645e4b2 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -698,7 +698,11 @@ xfs_buf_read_uncached( bp->b_flags |= XBF_READ; bp->b_ops = ops; - xfsbdstrat(target->bt_mount, bp); + if (XFS_FORCED_SHUTDOWN(target->bt_mount)) { + xfs_buf_relse(bp); + return NULL; + } + xfs_buf_iorequest(bp); xfs_buf_iowait(bp); return bp; } @@ -1089,7 +1093,7 @@ xfs_bioerror( * This is meant for userdata errors; metadata bufs come with * iodone functions attached, so that we can track down errors. */ -STATIC int +int xfs_bioerror_relse( struct xfs_buf *bp) { @@ -1152,7 +1156,7 @@ xfs_bwrite( ASSERT(xfs_buf_islocked(bp)); bp->b_flags |= XBF_WRITE; - bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q); + bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL); xfs_bdstrat_cb(bp); @@ -1164,25 +1168,6 @@ xfs_bwrite( return error; } -/* - * Wrapper around bdstrat so that we can stop data from going to disk in case - * we are shutting down the filesystem. Typically user data goes thru this - * path; one of the exceptions is the superblock. - */ -void -xfsbdstrat( - struct xfs_mount *mp, - struct xfs_buf *bp) -{ - if (XFS_FORCED_SHUTDOWN(mp)) { - trace_xfs_bdstrat_shut(bp, _RET_IP_); - xfs_bioerror_relse(bp); - return; - } - - xfs_buf_iorequest(bp); -} - STATIC void _xfs_buf_ioend( xfs_buf_t *bp, @@ -1516,6 +1501,12 @@ xfs_wait_buftarg( struct xfs_buf *bp; bp = list_first_entry(&dispose, struct xfs_buf, b_lru); list_del_init(&bp->b_lru); + if (bp->b_flags & XBF_WRITE_FAIL) { + xfs_alert(btp->bt_mount, +"Corruption Alert: Buffer at block 0x%llx had permanent write failures!\n" +"Please run xfs_repair to determine the extent of the problem.", + (long long)bp->b_bn); + } xfs_buf_rele(bp); } if (loop++ != 0) @@ -1799,7 +1790,7 @@ __xfs_buf_delwri_submit( blk_start_plug(&plug); list_for_each_entry_safe(bp, n, io_list, b_list) { - bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC); + bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL); bp->b_flags |= XBF_WRITE; if (!wait) { diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index e6568336101..1cf21a4a9f2 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -45,6 +45,7 @@ typedef enum { #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ #define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ +#define XBF_WRITE_FAIL (1 << 24)/* async writes have failed on this buffer */ /* I/O hints for the BIO layer */ #define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ @@ -70,6 +71,7 @@ typedef unsigned int xfs_buf_flags_t; { XBF_ASYNC, "ASYNC" }, \ { XBF_DONE, "DONE" }, \ { XBF_STALE, "STALE" }, \ + { XBF_WRITE_FAIL, "WRITE_FAIL" }, \ { XBF_SYNCIO, "SYNCIO" }, \ { XBF_FUA, "FUA" }, \ { XBF_FLUSH, "FLUSH" }, \ @@ -80,6 +82,7 @@ typedef unsigned int xfs_buf_flags_t; { _XBF_DELWRI_Q, "DELWRI_Q" }, \ { _XBF_COMPOUND, "COMPOUND" } + /* * Internal state flags. */ @@ -269,9 +272,6 @@ extern void xfs_buf_unlock(xfs_buf_t *); /* Buffer Read and Write Routines */ extern int xfs_bwrite(struct xfs_buf *bp); - -extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); - extern void xfs_buf_ioend(xfs_buf_t *, int); extern void xfs_buf_ioerror(xfs_buf_t *, int); extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); @@ -282,6 +282,8 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, #define xfs_buf_zero(bp, off, len) \ xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) +extern int xfs_bioerror_relse(struct xfs_buf *); + static inline int xfs_buf_geterror(xfs_buf_t *bp) { return bp ? bp->b_error : ENOMEM; @@ -301,7 +303,8 @@ extern void xfs_buf_terminate(void); #define XFS_BUF_ZEROFLAGS(bp) \ ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \ - XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) + XBF_SYNCIO|XBF_FUA|XBF_FLUSH| \ + XBF_WRITE_FAIL)) void xfs_buf_stale(struct xfs_buf *bp); #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index a64f67ba25d..2227b9b050b 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -496,6 +496,14 @@ xfs_buf_item_unpin( } } +/* + * Buffer IO error rate limiting. Limit it to no more than 10 messages per 30 + * seconds so as to not spam logs too much on repeated detection of the same + * buffer being bad.. + */ + +DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10); + STATIC uint xfs_buf_item_push( struct xfs_log_item *lip, @@ -524,6 +532,14 @@ xfs_buf_item_push( trace_xfs_buf_item_push(bip); + /* has a previous flush failed due to IO errors? */ + if ((bp->b_flags & XBF_WRITE_FAIL) && + ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) { + xfs_warn(bp->b_target->bt_mount, +"Detected failing async write on buffer block 0x%llx. Retrying async write.\n", + (long long)bp->b_bn); + } + if (!xfs_buf_delwri_queue(bp, buffer_list)) rval = XFS_ITEM_FLUSHING; xfs_buf_unlock(bp); @@ -1096,8 +1112,9 @@ xfs_buf_iodone_callbacks( xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ - if (!XFS_BUF_ISSTALE(bp)) { - bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE; + if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) { + bp->b_flags |= XBF_WRITE | XBF_ASYNC | + XBF_DONE | XBF_WRITE_FAIL; xfs_buf_iorequest(bp); } else { xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index 56369d4509d..48c7d18f68c 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -2067,12 +2067,12 @@ xfs_dir2_node_lookup( */ int /* error */ xfs_dir2_node_removename( - xfs_da_args_t *args) /* operation arguments */ + struct xfs_da_args *args) /* operation arguments */ { - xfs_da_state_blk_t *blk; /* leaf block */ + struct xfs_da_state_blk *blk; /* leaf block */ int error; /* error return value */ int rval; /* operation return value */ - xfs_da_state_t *state; /* btree cursor */ + struct xfs_da_state *state; /* btree cursor */ trace_xfs_dir2_node_removename(args); @@ -2084,19 +2084,18 @@ xfs_dir2_node_removename( state->mp = args->dp->i_mount; state->blocksize = state->mp->m_dirblksize; state->node_ents = state->mp->m_dir_node_ents; - /* - * Look up the entry we're deleting, set up the cursor. - */ + + /* Look up the entry we're deleting, set up the cursor. */ error = xfs_da3_node_lookup_int(state, &rval); if (error) - rval = error; - /* - * Didn't find it, upper layer screwed up. - */ + goto out_free; + + /* Didn't find it, upper layer screwed up. */ if (rval != EEXIST) { - xfs_da_state_free(state); - return rval; + error = rval; + goto out_free; } + blk = &state->path.blk[state->path.active - 1]; ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); ASSERT(state->extravalid); @@ -2107,7 +2106,7 @@ xfs_dir2_node_removename( error = xfs_dir2_leafn_remove(args, blk->bp, blk->index, &state->extrablk, &rval); if (error) - return error; + goto out_free; /* * Fix the hash values up the btree. */ @@ -2122,6 +2121,7 @@ xfs_dir2_node_removename( */ if (!error) error = xfs_dir2_node_to_leaf(state); +out_free: xfs_da_state_free(state); return error; } diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 27e0e544e96..104455b8046 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -618,7 +618,8 @@ xfs_setattr_nonsize( } if (!gid_eq(igid, gid)) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { - ASSERT(!XFS_IS_PQUOTA_ON(mp)); + ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) || + !XFS_IS_PQUOTA_ON(mp)); ASSERT(mask & ATTR_GID); ASSERT(gdqp); olddquot2 = xfs_qm_vop_chown(tp, ip, diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index b6b669df40f..eae16920655 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -193,7 +193,10 @@ xlog_bread_noalign( bp->b_io_length = nbblks; bp->b_error = 0; - xfsbdstrat(log->l_mp, bp); + if (XFS_FORCED_SHUTDOWN(log->l_mp)) + return XFS_ERROR(EIO); + + xfs_buf_iorequest(bp); error = xfs_buf_iowait(bp); if (error) xfs_buf_ioerror_alert(bp, __func__); @@ -4397,7 +4400,13 @@ xlog_do_recover( XFS_BUF_READ(bp); XFS_BUF_UNASYNC(bp); bp->b_ops = &xfs_sb_buf_ops; - xfsbdstrat(log->l_mp, bp); + + if (XFS_FORCED_SHUTDOWN(log->l_mp)) { + xfs_buf_relse(bp); + return XFS_ERROR(EIO); + } + + xfs_buf_iorequest(bp); error = xfs_buf_iowait(bp); if (error) { xfs_buf_ioerror_alert(bp, __func__); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 14a4996cfec..dd88f0e27bd 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -134,8 +134,6 @@ xfs_qm_dqpurge( { struct xfs_mount *mp = dqp->q_mount; struct xfs_quotainfo *qi = mp->m_quotainfo; - struct xfs_dquot *gdqp = NULL; - struct xfs_dquot *pdqp = NULL; xfs_dqlock(dqp); if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { @@ -143,21 +141,6 @@ xfs_qm_dqpurge( return EAGAIN; } - /* - * If this quota has a hint attached, prepare for releasing it now. - */ - gdqp = dqp->q_gdquot; - if (gdqp) { - xfs_dqlock(gdqp); - dqp->q_gdquot = NULL; - } - - pdqp = dqp->q_pdquot; - if (pdqp) { - xfs_dqlock(pdqp); - dqp->q_pdquot = NULL; - } - dqp->dq_flags |= XFS_DQ_FREEING; xfs_dqflock(dqp); @@ -206,11 +189,47 @@ xfs_qm_dqpurge( XFS_STATS_DEC(xs_qm_dquot_unused); xfs_qm_dqdestroy(dqp); + return 0; +} + +/* + * Release the group or project dquot pointers the user dquots maybe carrying + * around as a hint, and proceed to purge the user dquot cache if requested. +*/ +STATIC int +xfs_qm_dqpurge_hints( + struct xfs_dquot *dqp, + void *data) +{ + struct xfs_dquot *gdqp = NULL; + struct xfs_dquot *pdqp = NULL; + uint flags = *((uint *)data); + + xfs_dqlock(dqp); + if (dqp->dq_flags & XFS_DQ_FREEING) { + xfs_dqunlock(dqp); + return EAGAIN; + } + + /* If this quota has a hint attached, prepare for releasing it now */ + gdqp = dqp->q_gdquot; + if (gdqp) + dqp->q_gdquot = NULL; + + pdqp = dqp->q_pdquot; + if (pdqp) + dqp->q_pdquot = NULL; + + xfs_dqunlock(dqp); if (gdqp) - xfs_qm_dqput(gdqp); + xfs_qm_dqrele(gdqp); if (pdqp) - xfs_qm_dqput(pdqp); + xfs_qm_dqrele(pdqp); + + if (flags & XFS_QMOPT_UQUOTA) + return xfs_qm_dqpurge(dqp, NULL); + return 0; } @@ -222,8 +241,18 @@ xfs_qm_dqpurge_all( struct xfs_mount *mp, uint flags) { - if (flags & XFS_QMOPT_UQUOTA) - xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL); + /* + * We have to release group/project dquot hint(s) from the user dquot + * at first if they are there, otherwise we would run into an infinite + * loop while walking through radix tree to purge other type of dquots + * since their refcount is not zero if the user dquot refers to them + * as hint. + * + * Call the special xfs_qm_dqpurge_hints() will end up go through the + * general xfs_qm_dqpurge() against user dquot cache if requested. + */ + xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge_hints, &flags); + if (flags & XFS_QMOPT_GQUOTA) xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL); if (flags & XFS_QMOPT_PQUOTA) @@ -2082,24 +2111,21 @@ xfs_qm_vop_create_dqattach( ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - if (udqp) { + if (udqp && XFS_IS_UQUOTA_ON(mp)) { ASSERT(ip->i_udquot == NULL); - ASSERT(XFS_IS_UQUOTA_ON(mp)); ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); ip->i_udquot = xfs_qm_dqhold(udqp); xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); } - if (gdqp) { + if (gdqp && XFS_IS_GQUOTA_ON(mp)) { ASSERT(ip->i_gdquot == NULL); - ASSERT(XFS_IS_GQUOTA_ON(mp)); ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id)); ip->i_gdquot = xfs_qm_dqhold(gdqp); xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); } - if (pdqp) { + if (pdqp && XFS_IS_PQUOTA_ON(mp)) { ASSERT(ip->i_pdquot == NULL); - ASSERT(XFS_IS_PQUOTA_ON(mp)); ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id)); ip->i_pdquot = xfs_qm_dqhold(pdqp); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index c035d11b773..647b6f1d892 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -314,7 +314,18 @@ xfs_trans_read_buf_map( ASSERT(bp->b_iodone == NULL); XFS_BUF_READ(bp); bp->b_ops = ops; - xfsbdstrat(tp->t_mountp, bp); + + /* + * XXX(hch): clean up the error handling here to be less + * of a mess.. + */ + if (XFS_FORCED_SHUTDOWN(mp)) { + trace_xfs_bdstrat_shut(bp, _RET_IP_); + xfs_bioerror_relse(bp); + } else { + xfs_buf_iorequest(bp); + } + error = xfs_buf_iowait(bp); if (error) { xfs_buf_ioerror_alert(bp, __func__); diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index f330d28e4d0..db092345894 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -217,7 +217,7 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) #endif #ifndef pte_accessible -# define pte_accessible(pte) ((void)(pte),1) +# define pte_accessible(mm, pte) ((void)(pte), 1) #endif #ifndef flush_tlb_fix_spurious_fault @@ -599,11 +599,10 @@ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) #ifdef CONFIG_TRANSPARENT_HUGEPAGE barrier(); #endif - if (pmd_none(pmdval)) + if (pmd_none(pmdval) || pmd_trans_huge(pmdval)) return 1; if (unlikely(pmd_bad(pmdval))) { - if (!pmd_trans_huge(pmdval)) - pmd_clear_bad(pmd); + pmd_clear_bad(pmd); return 1; } return 0; diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h index 669fef5c745..3e0fbe44176 100644 --- a/include/linux/auxvec.h +++ b/include/linux/auxvec.h @@ -3,6 +3,6 @@ #include <uapi/linux/auxvec.h> -#define AT_VECTOR_SIZE_BASE 19 /* NEW_AUX_ENT entries in auxiliary table */ +#define AT_VECTOR_SIZE_BASE 20 /* NEW_AUX_ENT entries in auxiliary table */ /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */ #endif /* _LINUX_AUXVEC_H */ diff --git a/include/linux/libata.h b/include/linux/libata.h index 0e23c26485f..9b503376738 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -418,6 +418,7 @@ enum { ATA_HORKAGE_DUMP_ID = (1 << 16), /* dump IDENTIFY data */ ATA_HORKAGE_MAX_SEC_LBA48 = (1 << 17), /* Set max sects to 65535 */ ATA_HORKAGE_ATAPI_DMADIR = (1 << 18), /* device requires dmadir */ + ATA_HORKAGE_NO_NCQ_TRIM = (1 << 19), /* don't use queued TRIM */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ diff --git a/include/linux/lockref.h b/include/linux/lockref.h index c8929c3832d..4bfde0e99ed 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -19,7 +19,7 @@ #define USE_CMPXCHG_LOCKREF \ (IS_ENABLED(CONFIG_ARCH_USE_CMPXCHG_LOCKREF) && \ - IS_ENABLED(CONFIG_SMP) && !BLOATED_SPINLOCKS) + IS_ENABLED(CONFIG_SMP) && SPINLOCK_SIZE <= 4) struct lockref { union { diff --git a/include/linux/migrate.h b/include/linux/migrate.h index f5096b58b20..f015c059e15 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -55,7 +55,8 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, - struct buffer_head *head, enum migrate_mode mode); + struct buffer_head *head, enum migrate_mode mode, + int extra_count); #else static inline void putback_lru_pages(struct list_head *l) {} @@ -90,10 +91,19 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_NUMA_BALANCING +extern bool pmd_trans_migrating(pmd_t pmd); +extern void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd); extern int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, int node); extern bool migrate_ratelimited(int node); #else +static inline bool pmd_trans_migrating(pmd_t pmd) +{ + return false; +} +static inline void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd) +{ +} static inline int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, int node) { diff --git a/include/linux/mm.h b/include/linux/mm.h index 1cedd000cf2..35527173cf5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1317,7 +1317,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ #if USE_SPLIT_PTE_PTLOCKS -#if BLOATED_SPINLOCKS +#if ALLOC_SPLIT_PTLOCKS extern bool ptlock_alloc(struct page *page); extern void ptlock_free(struct page *page); @@ -1325,7 +1325,7 @@ static inline spinlock_t *ptlock_ptr(struct page *page) { return page->ptl; } -#else /* BLOATED_SPINLOCKS */ +#else /* ALLOC_SPLIT_PTLOCKS */ static inline bool ptlock_alloc(struct page *page) { return true; @@ -1339,7 +1339,7 @@ static inline spinlock_t *ptlock_ptr(struct page *page) { return &page->ptl; } -#endif /* BLOATED_SPINLOCKS */ +#endif /* ALLOC_SPLIT_PTLOCKS */ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) { diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index bd299418a93..290901a8c1d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -26,6 +26,7 @@ struct address_space; #define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) #define USE_SPLIT_PMD_PTLOCKS (USE_SPLIT_PTE_PTLOCKS && \ IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK)) +#define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) /* * Each physical page in the system has a struct page associated with @@ -155,7 +156,7 @@ struct page { * system if PG_buddy is set. */ #if USE_SPLIT_PTE_PTLOCKS -#if BLOATED_SPINLOCKS +#if ALLOC_SPLIT_PTLOCKS spinlock_t *ptl; #else spinlock_t ptl; @@ -443,6 +444,14 @@ struct mm_struct { /* numa_scan_seq prevents two threads setting pte_numa */ int numa_scan_seq; #endif +#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) + /* + * An operation with batched TLB flushing is going on. Anything that + * can move process memory needs to flush the TLB when moving a + * PROT_NONE or PROT_NUMA mapped page. + */ + bool tlb_flush_pending; +#endif struct uprobes_state uprobes_state; }; @@ -459,4 +468,45 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) return mm->cpu_vm_mask_var; } +#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) +/* + * Memory barriers to keep this state in sync are graciously provided by + * the page table locks, outside of which no page table modifications happen. + * The barriers below prevent the compiler from re-ordering the instructions + * around the memory barriers that are already present in the code. + */ +static inline bool mm_tlb_flush_pending(struct mm_struct *mm) +{ + barrier(); + return mm->tlb_flush_pending; +} +static inline void set_tlb_flush_pending(struct mm_struct *mm) +{ + mm->tlb_flush_pending = true; + + /* + * Guarantee that the tlb_flush_pending store does not leak into the + * critical section updating the page tables + */ + smp_mb__before_spinlock(); +} +/* Clearing is done after a TLB flush, which also provides a barrier. */ +static inline void clear_tlb_flush_pending(struct mm_struct *mm) +{ + barrier(); + mm->tlb_flush_pending = false; +} +#else +static inline bool mm_tlb_flush_pending(struct mm_struct *mm) +{ + return false; +} +static inline void set_tlb_flush_pending(struct mm_struct *mm) +{ +} +static inline void clear_tlb_flush_pending(struct mm_struct *mm) +{ +} +#endif + #endif /* _LINUX_MM_TYPES_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0c30af38be0..a2a70cc70e7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1632,7 +1632,10 @@ struct napi_gro_cb { int data_offset; /* This is non-zero if the packet cannot be merged with the new skb. */ - int flush; + u16 flush; + + /* Save the IP ID here and check when we get to the transport layer */ + u16 flush_id; /* Number of segments aggregated. */ u16 count; @@ -1651,6 +1654,9 @@ struct napi_gro_cb { /* Used in ipv6_gro_receive() */ int proto; + /* used to support CHECKSUM_COMPLETE for tunneling protocols */ + __wsum csum; + /* used in skb_gro_receive() slow path */ struct sk_buff *last; }; @@ -1900,6 +1906,14 @@ static inline void *skb_gro_network_header(struct sk_buff *skb) skb_network_offset(skb); } +static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, + const void *start, unsigned int len) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + NAPI_GRO_CB(skb)->csum = csum_sub(NAPI_GRO_CB(skb)->csum, + csum_partial(start, len, 0)); +} + static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, @@ -1921,6 +1935,15 @@ static inline int dev_parse_header(const struct sk_buff *skb, return dev->header_ops->parse(skb, haddr); } +static inline int dev_rebuild_header(struct sk_buff *skb) +{ + const struct net_device *dev = skb->dev; + + if (!dev->header_ops || !dev->header_ops->rebuild) + return 0; + return dev->header_ops->rebuild(skb); +} + typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len); int register_gifconf(unsigned int family, gifconf_func_t *gifconf); static inline int unregister_gifconf(unsigned int family) @@ -2431,6 +2454,8 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); gro_result_t napi_gro_frags(struct napi_struct *napi); +struct packet_offload *gro_find_receive_by_type(__be16 type); +struct packet_offload *gro_find_complete_by_type(__be16 type); static inline void napi_free_frags(struct napi_struct *napi) { @@ -3039,6 +3064,19 @@ static inline void netif_set_gso_max_size(struct net_device *dev, dev->gso_max_size = size; } +static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, + int pulled_hlen, u16 mac_offset, + int mac_len) +{ + skb->protocol = protocol; + skb->encapsulation = 1; + skb_push(skb, pulled_hlen); + skb_reset_transport_header(skb); + skb->mac_header = mac_offset; + skb->network_header = skb->mac_header + mac_len; + skb->mac_len = mac_len; +} + static inline bool netif_is_macvlan(struct net_device *dev) { return dev->priv_flags & IFF_MACVLAN; diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 57e890abe1f..a5fc7d01aad 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -69,6 +69,7 @@ __PCPU_DUMMY_ATTRS char __pcpu_scope_##name; \ extern __PCPU_DUMMY_ATTRS char __pcpu_unique_##name; \ __PCPU_DUMMY_ATTRS char __pcpu_unique_##name; \ + extern __PCPU_ATTRS(sec) __typeof__(type) name; \ __PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES __weak \ __typeof__(type) name #else diff --git a/include/linux/pstore.h b/include/linux/pstore.h index abd437d0a8a..ece0c6bbfcc 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -51,6 +51,7 @@ struct pstore_info { char *buf; size_t bufsize; struct mutex read_mutex; /* serialize open/read/close */ + int flags; int (*open)(struct pstore_info *psi); int (*close)(struct pstore_info *psi); ssize_t (*read)(u64 *id, enum pstore_type_id *type, @@ -70,6 +71,8 @@ struct pstore_info { void *data; }; +#define PSTORE_FLAGS_FRAGILE 1 + #ifdef CONFIG_PSTORE extern int pstore_register(struct pstore_info *); extern bool pstore_cannot_block_path(enum kmsg_dump_reason reason); diff --git a/include/linux/reboot.h b/include/linux/reboot.h index 8e00f9f6f96..9e7db9e73cc 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -43,6 +43,7 @@ extern int unregister_reboot_notifier(struct notifier_block *); * Architecture-specific implementations of sys_reboot commands. */ +extern void migrate_to_reboot_cpu(void); extern void machine_restart(char *cmd); extern void machine_halt(void); extern void machine_power_off(void); diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 939428ad25a..8e3e66ac0a5 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -24,6 +24,11 @@ extern int rtnl_trylock(void); extern int rtnl_is_locked(void); #ifdef CONFIG_PROVE_LOCKING extern int lockdep_rtnl_is_held(void); +#else +static inline int lockdep_rtnl_is_held(void) +{ + return 1; +} #endif /* #ifdef CONFIG_PROVE_LOCKING */ /** diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c5cd016f512..d97f2d07d02 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -827,7 +827,7 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) */ static inline int skb_queue_empty(const struct sk_buff_head *list) { - return list->next == (struct sk_buff *)list; + return list->next == (const struct sk_buff *) list; } /** @@ -840,7 +840,7 @@ static inline int skb_queue_empty(const struct sk_buff_head *list) static inline bool skb_queue_is_last(const struct sk_buff_head *list, const struct sk_buff *skb) { - return skb->next == (struct sk_buff *)list; + return skb->next == (const struct sk_buff *) list; } /** @@ -853,7 +853,7 @@ static inline bool skb_queue_is_last(const struct sk_buff_head *list, static inline bool skb_queue_is_first(const struct sk_buff_head *list, const struct sk_buff *skb) { - return skb->prev == (struct sk_buff *)list; + return skb->prev == (const struct sk_buff *) list; } /** @@ -1715,6 +1715,11 @@ static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) skb->mac_header += offset; } +static inline void skb_pop_mac_header(struct sk_buff *skb) +{ + skb->mac_header = skb->network_header; +} + static inline void skb_probe_transport_header(struct sk_buff *skb, const int offset_hint) { @@ -2440,6 +2445,9 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, struct pipe_inode_info *pipe, unsigned int len, unsigned int flags); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); +unsigned int skb_zerocopy_headlen(const struct sk_buff *from); +void skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, + int len, int hlen); void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); @@ -2621,6 +2629,10 @@ static inline void sw_tx_timestamp(struct sk_buff *skb) * Ethernet MAC Drivers should call this function in their hard_xmit() * function immediately before giving the sk_buff to the MAC hardware. * + * Specifically, one should make absolutely sure that this function is + * called before TX completion of this packet can trigger. Otherwise + * the packet could potentially already be freed. + * * @skb: A socket buffer. */ static inline void skb_tx_timestamp(struct sk_buff *skb) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index d68633452d9..4ad0706d40e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -248,7 +248,10 @@ struct tcp_sock { struct sk_buff* lost_skb_hint; struct sk_buff *retransmit_skb_hint; - struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ + /* OOO segments go in this list. Note that socket lock must be held, + * as we do not use sk_buff_head lock. + */ + struct sk_buff_head out_of_order_queue; /* SACKs data, these 2 need to be together (see tcp_options_write) */ struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 1b177ed803b..93695f0e22a 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -73,6 +73,7 @@ struct genl_family { * @attrs: netlink attributes * @_net: network namespace * @user_ptr: user pointers + * @dst_sk: destination socket */ struct genl_info { u32 snd_seq; @@ -85,6 +86,7 @@ struct genl_info { struct net * _net; #endif void * user_ptr[2]; + struct sock * dst_sk; }; static inline struct net *genl_info_net(struct genl_info *info) @@ -177,6 +179,8 @@ void genl_notify(struct genl_family *family, struct sk_buff *skb, struct net *net, u32 portid, u32 group, struct nlmsghdr *nlh, gfp_t flags); +struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info, + gfp_t flags); void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, struct genl_family *family, int flags, u8 cmd); diff --git a/include/net/gre.h b/include/net/gre.h index dcd9ae3270d..70046a0b0b8 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -33,9 +33,6 @@ struct gre_cisco_protocol { int gre_cisco_register(struct gre_cisco_protocol *proto); int gre_cisco_unregister(struct gre_cisco_protocol *proto); -int gre_offload_init(void); -void gre_offload_exit(void); - void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, int hdr_len); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 1fb6cddbd44..017badb1aec 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -152,6 +152,13 @@ static inline bool ipv6_unicast_destination(const struct sk_buff *skb) return rt->rt6i_flags & RTF_LOCAL; } +static inline bool ipv6_anycast_destination(const struct sk_buff *skb) +{ + struct rt6_info *rt = (struct rt6_info *) skb_dst(skb); + + return rt->rt6i_flags & RTF_ANYCAST; +} + int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); static inline int ip6_skb_dst_mtu(struct sk_buff *skb) diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index 31e2de7d57c..c0f0a13ed81 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -142,7 +142,7 @@ #define LLC_S_PF_IS_1(pdu) ((pdu->ctrl_2 & LLC_S_PF_BIT_MASK) ? 1 : 0) #define PDU_SUPV_GET_Nr(pdu) ((pdu->ctrl_2 & 0xFE) >> 1) -#define PDU_GET_NEXT_Vr(sn) (++sn & ~LLC_2_SEQ_NBR_MODULO) +#define PDU_GET_NEXT_Vr(sn) (((sn) + 1) & ~LLC_2_SEQ_NBR_MODULO) /* FRMR information field macros */ diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h new file mode 100644 index 00000000000..931fbf81217 --- /dev/null +++ b/include/net/netfilter/ipv4/nf_reject.h @@ -0,0 +1,128 @@ +#ifndef _IPV4_NF_REJECT_H +#define _IPV4_NF_REJECT_H + +#include <net/ip.h> +#include <net/tcp.h> +#include <net/route.h> +#include <net/dst.h> + +static inline void nf_send_unreach(struct sk_buff *skb_in, int code) +{ + icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); +} + +/* Send RST reply */ +static void nf_send_reset(struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + const struct iphdr *oiph; + struct iphdr *niph; + const struct tcphdr *oth; + struct tcphdr _otcph, *tcph; + + /* IP header checks: fragment. */ + if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) + return; + + oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), + sizeof(_otcph), &_otcph); + if (oth == NULL) + return; + + /* No RST for RST. */ + if (oth->rst) + return; + + if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + return; + + /* Check checksum */ + if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) + return; + oiph = ip_hdr(oldskb); + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + + skb_reset_network_header(nskb); + niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); + niph->version = 4; + niph->ihl = sizeof(struct iphdr) / 4; + niph->tos = 0; + niph->id = 0; + niph->frag_off = htons(IP_DF); + niph->protocol = IPPROTO_TCP; + niph->check = 0; + niph->saddr = oiph->daddr; + niph->daddr = oiph->saddr; + + skb_reset_transport_header(nskb); + tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); + memset(tcph, 0, sizeof(*tcph)); + tcph->source = oth->dest; + tcph->dest = oth->source; + tcph->doff = sizeof(struct tcphdr) / 4; + + if (oth->ack) + tcph->seq = oth->ack_seq; + else { + tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + + oldskb->len - ip_hdrlen(oldskb) - + (oth->doff << 2)); + tcph->ack = 1; + } + + tcph->rst = 1; + tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr, + niph->daddr, 0); + nskb->ip_summed = CHECKSUM_PARTIAL; + nskb->csum_start = (unsigned char *)tcph - nskb->head; + nskb->csum_offset = offsetof(struct tcphdr, check); + + /* ip_route_me_harder expects skb->dst to be set */ + skb_dst_set_noref(nskb, skb_dst(oldskb)); + + nskb->protocol = htons(ETH_P_IP); + if (ip_route_me_harder(nskb, RTN_UNSPEC)) + goto free_nskb; + + niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); + + /* "Never happens" */ + if (nskb->len > dst_mtu(skb_dst(nskb))) + goto free_nskb; + + nf_ct_attach(nskb, oldskb); + +#ifdef CONFIG_BRIDGE_NETFILTER + /* If we use ip_local_out for bridged traffic, the MAC source on + * the RST will be ours, instead of the destination's. This confuses + * some routers/firewalls, and they drop the packet. So we need to + * build the eth header using the original destination's MAC as the + * source, and send the RST packet directly. + */ + if (oldskb->nf_bridge) { + struct ethhdr *oeth = eth_hdr(oldskb); + nskb->dev = oldskb->nf_bridge->physindev; + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), + oeth->h_source, oeth->h_dest, nskb->len) < 0) + goto free_nskb; + dev_queue_xmit(nskb); + } else +#endif + ip_local_out(nskb); + + return; + + free_nskb: + kfree_skb(nskb); +} + + +#endif /* _IPV4_NF_REJECT_H */ diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h new file mode 100644 index 00000000000..710d17ed70b --- /dev/null +++ b/include/net/netfilter/ipv6/nf_reject.h @@ -0,0 +1,171 @@ +#ifndef _IPV6_NF_REJECT_H +#define _IPV6_NF_REJECT_H + +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <net/ip6_fib.h> +#include <net/ip6_checksum.h> +#include <linux/netfilter_ipv6.h> + +static inline void +nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code, + unsigned int hooknum) +{ + if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) + skb_in->dev = net->loopback_dev; + + icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); +} + +/* Send RST reply */ +static void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + struct tcphdr otcph, *tcph; + unsigned int otcplen, hh_len; + int tcphoff, needs_ack; + const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); + struct ipv6hdr *ip6h; +#define DEFAULT_TOS_VALUE 0x0U + const __u8 tclass = DEFAULT_TOS_VALUE; + struct dst_entry *dst = NULL; + u8 proto; + __be16 frag_off; + struct flowi6 fl6; + + if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || + (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { + pr_debug("addr is not unicast.\n"); + return; + } + + proto = oip6h->nexthdr; + tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off); + + if ((tcphoff < 0) || (tcphoff > oldskb->len)) { + pr_debug("Cannot get TCP header.\n"); + return; + } + + otcplen = oldskb->len - tcphoff; + + /* IP header checks: fragment, too short. */ + if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { + pr_debug("proto(%d) != IPPROTO_TCP, " + "or too short. otcplen = %d\n", + proto, otcplen); + return; + } + + if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) + BUG(); + + /* No RST for RST. */ + if (otcph.rst) { + pr_debug("RST is set\n"); + return; + } + + /* Check checksum. */ + if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) { + pr_debug("TCP checksum is invalid\n"); + return; + } + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + fl6.saddr = oip6h->daddr; + fl6.daddr = oip6h->saddr; + fl6.fl6_sport = otcph.dest; + fl6.fl6_dport = otcph.source; + security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); + dst = ip6_route_output(net, NULL, &fl6); + if (dst == NULL || dst->error) { + dst_release(dst); + return; + } + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(dst)) + return; + + hh_len = (dst->dev->hard_header_len + 15)&~15; + nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + dst->trailer_len, + GFP_ATOMIC); + + if (!nskb) { + net_dbg_ratelimited("cannot alloc skb\n"); + dst_release(dst); + return; + } + + skb_dst_set(nskb, dst); + + skb_reserve(nskb, hh_len + dst->header_len); + + skb_put(nskb, sizeof(struct ipv6hdr)); + skb_reset_network_header(nskb); + ip6h = ipv6_hdr(nskb); + ip6_flow_hdr(ip6h, tclass, 0); + ip6h->hop_limit = ip6_dst_hoplimit(dst); + ip6h->nexthdr = IPPROTO_TCP; + ip6h->saddr = oip6h->daddr; + ip6h->daddr = oip6h->saddr; + + skb_reset_transport_header(nskb); + tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); + /* Truncate to length (no data) */ + tcph->doff = sizeof(struct tcphdr)/4; + tcph->source = otcph.dest; + tcph->dest = otcph.source; + + if (otcph.ack) { + needs_ack = 0; + tcph->seq = otcph.ack_seq; + tcph->ack_seq = 0; + } else { + needs_ack = 1; + tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin + + otcplen - (otcph.doff<<2)); + tcph->seq = 0; + } + + /* Reset flags */ + ((u_int8_t *)tcph)[13] = 0; + tcph->rst = 1; + tcph->ack = needs_ack; + tcph->window = 0; + tcph->urg_ptr = 0; + tcph->check = 0; + + /* Adjust TCP checksum */ + tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr, + &ipv6_hdr(nskb)->daddr, + sizeof(struct tcphdr), IPPROTO_TCP, + csum_partial(tcph, + sizeof(struct tcphdr), 0)); + + nf_ct_attach(nskb, oldskb); + +#ifdef CONFIG_BRIDGE_NETFILTER + /* If we use ip6_local_out for bridged traffic, the MAC source on + * the RST will be ours, instead of the destination's. This confuses + * some routers/firewalls, and they drop the packet. So we need to + * build the eth header using the original destination's MAC as the + * source, and send the RST packet directly. + */ + if (oldskb->nf_bridge) { + struct ethhdr *oeth = eth_hdr(oldskb); + nskb->dev = oldskb->nf_bridge->physindev; + nskb->protocol = htons(ETH_P_IPV6); + ip6h->payload_len = htons(sizeof(struct tcphdr)); + if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), + oeth->h_source, oeth->h_dest, nskb->len) < 0) + return; + dev_queue_xmit(nskb); + } else +#endif + ip6_local_out(nskb); +} + +#endif /* _IPV6_NF_REJECT_H */ diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index c1d5b3e34a2..84a53d78030 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -1,6 +1,10 @@ #ifndef _NF_QUEUE_H #define _NF_QUEUE_H +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/jhash.h> + /* Each queued (to userspace) skbuff has one of these. */ struct nf_queue_entry { struct list_head list; @@ -33,4 +37,62 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); void nf_queue_entry_release_refs(struct nf_queue_entry *entry); +static inline void init_hashrandom(u32 *jhash_initval) +{ + while (*jhash_initval == 0) + *jhash_initval = prandom_u32(); +} + +static inline u32 hash_v4(const struct sk_buff *skb, u32 jhash_initval) +{ + const struct iphdr *iph = ip_hdr(skb); + + /* packets in either direction go into same queue */ + if ((__force u32)iph->saddr < (__force u32)iph->daddr) + return jhash_3words((__force u32)iph->saddr, + (__force u32)iph->daddr, iph->protocol, jhash_initval); + + return jhash_3words((__force u32)iph->daddr, + (__force u32)iph->saddr, iph->protocol, jhash_initval); +} + +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) +static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + u32 a, b, c; + + if ((__force u32)ip6h->saddr.s6_addr32[3] < + (__force u32)ip6h->daddr.s6_addr32[3]) { + a = (__force u32) ip6h->saddr.s6_addr32[3]; + b = (__force u32) ip6h->daddr.s6_addr32[3]; + } else { + b = (__force u32) ip6h->saddr.s6_addr32[3]; + a = (__force u32) ip6h->daddr.s6_addr32[3]; + } + + if ((__force u32)ip6h->saddr.s6_addr32[1] < + (__force u32)ip6h->daddr.s6_addr32[1]) + c = (__force u32) ip6h->saddr.s6_addr32[1]; + else + c = (__force u32) ip6h->daddr.s6_addr32[1]; + + return jhash_3words(a, b, c, jhash_initval); +} +#endif + +static inline u32 +nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, + u32 jhash_initval) +{ + if (family == NFPROTO_IPV4) + queue += ((u64) hash_v4(skb, jhash_initval) * queues_total) >> 32; +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) + else if (family == NFPROTO_IPV6) + queue += ((u64) hash_v6(skb, jhash_initval) * queues_total) >> 32; +#endif + + return queue; +} + #endif /* _NF_QUEUE_H */ diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 0fb2401197c..76fc7d1dbfd 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -73,6 +73,7 @@ struct netns_ipv6 { #endif atomic_t dev_addr_genid; atomic_t rt_genid; + int anycast_src_echo_reply; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 41c7013e269..e9f732fda95 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1045,9 +1045,6 @@ struct sctp_outq { /* Corked? */ char cork; - - /* Is this structure empty? */ - char empty; }; void sctp_outq_init(struct sctp_association *, struct sctp_outq *); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 979874c627e..61e1935c91b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -978,7 +978,7 @@ struct ib_uobject { }; struct ib_udata { - void __user *inbuf; + const void __user *inbuf; void __user *outbuf; size_t inlen; size_t outlen; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 45412a6afa6..321301c0a64 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -517,10 +517,6 @@ struct se_node_acl { u32 acl_index; #define MAX_ACL_TAG_SIZE 64 char acl_tag[MAX_ACL_TAG_SIZE]; - u64 num_cmds; - u64 read_bytes; - u64 write_bytes; - spinlock_t stats_lock; /* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */ atomic_t acl_pr_ref_count; struct se_dev_entry **device_list; @@ -624,6 +620,7 @@ struct se_dev_attrib { u32 unmap_granularity; u32 unmap_granularity_alignment; u32 max_write_same_len; + u32 max_bytes_per_io; struct se_device *da_dev; struct config_group da_group; }; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index fbfd229a8e9..aa86a15293e 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -110,11 +110,13 @@ enum nft_table_flags { * * @NFTA_TABLE_NAME: name of the table (NLA_STRING) * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32) + * @NFTA_TABLE_USE: number of chains in this table (NLA_U32) */ enum nft_table_attributes { NFTA_TABLE_UNSPEC, NFTA_TABLE_NAME, NFTA_TABLE_FLAGS, + NFTA_TABLE_USE, __NFTA_TABLE_MAX }; #define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) @@ -553,11 +555,13 @@ enum nft_meta_keys { * * @NFTA_META_DREG: destination register (NLA_U32) * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys) + * @NFTA_META_SREG: source register (NLA_U32) */ enum nft_meta_attributes { NFTA_META_UNSPEC, NFTA_META_DREG, NFTA_META_KEY, + NFTA_META_SREG, __NFTA_META_MAX }; #define NFTA_META_MAX (__NFTA_META_MAX - 1) @@ -658,6 +662,26 @@ enum nft_log_attributes { #define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1) /** + * enum nft_queue_attributes - nf_tables queue expression netlink attributes + * + * @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16) + * @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16) + * @NFTA_QUEUE_FLAGS: various flags (NLA_U16) + */ +enum nft_queue_attributes { + NFTA_QUEUE_UNSPEC, + NFTA_QUEUE_NUM, + NFTA_QUEUE_TOTAL, + NFTA_QUEUE_FLAGS, + __NFTA_QUEUE_MAX +}; +#define NFTA_QUEUE_MAX (__NFTA_QUEUE_MAX - 1) + +#define NFT_QUEUE_FLAG_BYPASS 0x01 /* for compatibility with v2 */ +#define NFT_QUEUE_FLAG_CPU_FANOUT 0x02 /* use current CPU (no hashing) */ +#define NFT_QUEUE_FLAG_MASK 0x03 + +/** * enum nft_reject_types - nf_tables reject expression reject types * * @NFT_REJECT_ICMP_UNREACH: reject using ICMP unreachable diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index d120f9fe001..970553cbbc8 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -40,7 +40,15 @@ struct ovs_header { #define OVS_DATAPATH_FAMILY "ovs_datapath" #define OVS_DATAPATH_MCGROUP "ovs_datapath" -#define OVS_DATAPATH_VERSION 0x1 + +/* V2: + * - API users are expected to provide OVS_DP_ATTR_USER_FEATURES + * when creating the datapath. + */ +#define OVS_DATAPATH_VERSION 2 + +/* First OVS datapath version to support features */ +#define OVS_DP_VER_FEATURES 2 enum ovs_datapath_cmd { OVS_DP_CMD_UNSPEC, @@ -75,6 +83,7 @@ enum ovs_datapath_attr { OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */ + OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ __OVS_DP_ATTR_MAX }; @@ -106,6 +115,9 @@ struct ovs_vport_stats { __u64 tx_dropped; /* no space available in linux */ }; +/* Allow last Netlink attribute to be unaligned */ +#define OVS_DP_F_UNALIGNED (1 << 0) + /* Fixed logical ports. */ #define OVSP_LOCAL ((__u32)0) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index e1802d6153a..959d454f76a 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -679,6 +679,7 @@ enum perf_event_type { * * { u64 weight; } && PERF_SAMPLE_WEIGHT * { u64 data_src; } && PERF_SAMPLE_DATA_SRC + * { u64 transaction; } && PERF_SAMPLE_TRANSACTION * }; */ PERF_RECORD_SAMPLE = 9, diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 77eb331810b..d62316baae9 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -818,4 +818,29 @@ struct tc_hhf_xstats { __u32 hh_tot_count; /* number of captured heavy-hitters so far */ __u32 hh_cur_count; /* number of current heavy-hitters */ }; + +/* PIE */ +enum { + TCA_PIE_UNSPEC, + TCA_PIE_TARGET, + TCA_PIE_LIMIT, + TCA_PIE_TUPDATE, + TCA_PIE_ALPHA, + TCA_PIE_BETA, + TCA_PIE_ECN, + TCA_PIE_BYTEMODE, + __TCA_PIE_MAX +}; +#define TCA_PIE_MAX (__TCA_PIE_MAX - 1) + +struct tc_pie_xstats { + __u32 prob; /* current probability */ + __u32 delay; /* current delay in ms */ + __u32 avg_dq_rate; /* current average dq_rate in bits/pie_time */ + __u32 packets_in; /* total number of packets enqueued */ + __u32 dropped; /* packets dropped due to pie_action */ + __u32 overlimit; /* dropped due to lack of space in queue */ + __u32 maxq; /* maximum queue size */ + __u32 ecn_mark; /* packets marked with ecn*/ +}; #endif diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index 65e12099ef8..ae665ac59c3 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h @@ -146,7 +146,7 @@ struct blkif_request_segment_aligned { struct blkif_request_rw { uint8_t nr_segments; /* number of segments */ blkif_vdev_t handle; /* only for read/write requests */ -#ifdef CONFIG_X86_64 +#ifndef CONFIG_X86_32 uint32_t _pad1; /* offsetof(blkif_request,u.rw.id) == 8 */ #endif uint64_t id; /* private guest value, echoed in resp */ @@ -163,7 +163,7 @@ struct blkif_request_discard { uint8_t flag; /* BLKIF_DISCARD_SECURE or zero. */ #define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */ blkif_vdev_t _pad1; /* only for read/write requests */ -#ifdef CONFIG_X86_64 +#ifndef CONFIG_X86_32 uint32_t _pad2; /* offsetof(blkif_req..,u.discard.id)==8*/ #endif uint64_t id; /* private guest value, echoed in resp */ @@ -175,7 +175,7 @@ struct blkif_request_discard { struct blkif_request_other { uint8_t _pad1; blkif_vdev_t _pad2; /* only for read/write requests */ -#ifdef CONFIG_X86_64 +#ifndef CONFIG_X86_32 uint32_t _pad3; /* offsetof(blkif_req..,u.other.id)==8*/ #endif uint64_t id; /* private guest value, echoed in resp */ @@ -184,7 +184,7 @@ struct blkif_request_other { struct blkif_request_indirect { uint8_t indirect_op; uint16_t nr_segments; -#ifdef CONFIG_X86_64 +#ifndef CONFIG_X86_32 uint32_t _pad1; /* offsetof(blkif_...,u.indirect.id) == 8 */ #endif uint64_t id; @@ -192,7 +192,7 @@ struct blkif_request_indirect { blkif_vdev_t handle; uint16_t _pad2; grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; -#ifdef CONFIG_X86_64 +#ifndef CONFIG_X86_32 uint32_t _pad3; /* make it 64 byte aligned */ #else uint64_t _pad3; /* make it 64 byte aligned */ diff --git a/kernel/Makefile b/kernel/Makefile index bbaf7d59c1b..bc010ee272b 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -137,9 +137,10 @@ $(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE ############################################################################### ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y) X509_CERTIFICATES-y := $(wildcard *.x509) $(wildcard $(srctree)/*.x509) -X509_CERTIFICATES-$(CONFIG_MODULE_SIG) += signing_key.x509 -X509_CERTIFICATES := $(sort $(foreach CERT,$(X509_CERTIFICATES-y), \ +X509_CERTIFICATES-$(CONFIG_MODULE_SIG) += $(objtree)/signing_key.x509 +X509_CERTIFICATES-raw := $(sort $(foreach CERT,$(X509_CERTIFICATES-y), \ $(or $(realpath $(CERT)),$(CERT)))) +X509_CERTIFICATES := $(subst $(realpath $(objtree))/,,$(X509_CERTIFICATES-raw)) ifeq ($(X509_CERTIFICATES),) $(warning *** No X.509 certificates found ***) @@ -164,9 +165,9 @@ $(obj)/x509_certificate_list: $(X509_CERTIFICATES) $(obj)/.x509.list targets += $(obj)/.x509.list $(obj)/.x509.list: @echo $(X509_CERTIFICATES) >$@ +endif clean-files := x509_certificate_list .x509.list -endif ifeq ($(CONFIG_MODULE_SIG),y) ############################################################################### diff --git a/kernel/bounds.c b/kernel/bounds.c index 5253204afdc..9fd4246b04b 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -22,6 +22,6 @@ void foo(void) #ifdef CONFIG_SMP DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS)); #endif - DEFINE(BLOATED_SPINLOCKS, sizeof(spinlock_t) > sizeof(int)); + DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); /* End of constants */ } diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 8b729c278b6..bc1dcabe921 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -890,6 +890,16 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) struct cgroup *cgrp = dentry->d_fsdata; BUG_ON(!(cgroup_is_dead(cgrp))); + + /* + * XXX: cgrp->id is only used to look up css's. As cgroup + * and css's lifetimes will be decoupled, it should be made + * per-subsystem and moved to css->id so that lookups are + * successful until the target css is released. + */ + idr_remove(&cgrp->root->cgroup_idr, cgrp->id); + cgrp->id = -1; + call_rcu(&cgrp->rcu_head, cgroup_free_rcu); } else { struct cfent *cfe = __d_cfe(dentry); @@ -4268,6 +4278,7 @@ static void css_release(struct percpu_ref *ref) struct cgroup_subsys_state *css = container_of(ref, struct cgroup_subsys_state, refcnt); + rcu_assign_pointer(css->cgroup->subsys[css->ss->subsys_id], NULL); call_rcu(&css->rcu_head, css_free_rcu_fn); } @@ -4426,14 +4437,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); root->number_of_cgroups++; - /* each css holds a ref to the cgroup's dentry and the parent css */ - for_each_root_subsys(root, ss) { - struct cgroup_subsys_state *css = css_ar[ss->subsys_id]; - - dget(dentry); - css_get(css->parent); - } - /* hold a ref to the parent's dentry */ dget(parent->dentry); @@ -4445,6 +4448,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, if (err) goto err_destroy; + /* each css holds a ref to the cgroup's dentry and parent css */ + dget(dentry); + css_get(css->parent); + + /* mark it consumed for error path */ + css_ar[ss->subsys_id] = NULL; + if (ss->broken_hierarchy && !ss->warned_broken_hierarchy && parent->parent) { pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n", @@ -4491,6 +4501,14 @@ err_free_cgrp: return err; err_destroy: + for_each_root_subsys(root, ss) { + struct cgroup_subsys_state *css = css_ar[ss->subsys_id]; + + if (css) { + percpu_ref_cancel_init(&css->refcnt); + ss->css_free(css); + } + } cgroup_destroy_locked(cgrp); mutex_unlock(&cgroup_mutex); mutex_unlock(&dentry->d_inode->i_mutex); @@ -4652,8 +4670,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) * will be invoked to perform the rest of destruction once the * percpu refs of all css's are confirmed to be killed. */ - for_each_root_subsys(cgrp->root, ss) - kill_css(cgroup_css(cgrp, ss)); + for_each_root_subsys(cgrp->root, ss) { + struct cgroup_subsys_state *css = cgroup_css(cgrp, ss); + + if (css) + kill_css(css); + } /* * Mark @cgrp dead. This prevents further task migration and child @@ -4722,14 +4744,6 @@ static void cgroup_destroy_css_killed(struct cgroup *cgrp) /* delete this cgroup from parent->children */ list_del_rcu(&cgrp->sibling); - /* - * We should remove the cgroup object from idr before its grace - * period starts, so we won't be looking up a cgroup while the - * cgroup is being freed. - */ - idr_remove(&cgrp->root->cgroup_idr, cgrp->id); - cgrp->id = -1; - dput(d); set_bit(CGRP_RELEASABLE, &parent->flags); diff --git a/kernel/events/core.c b/kernel/events/core.c index 72348dc192c..f5744010a8d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1396,6 +1396,8 @@ event_sched_out(struct perf_event *event, if (event->state != PERF_EVENT_STATE_ACTIVE) return; + perf_pmu_disable(event->pmu); + event->state = PERF_EVENT_STATE_INACTIVE; if (event->pending_disable) { event->pending_disable = 0; @@ -1412,6 +1414,8 @@ event_sched_out(struct perf_event *event, ctx->nr_freq--; if (event->attr.exclusive || !cpuctx->active_oncpu) cpuctx->exclusive = 0; + + perf_pmu_enable(event->pmu); } static void @@ -1652,6 +1656,7 @@ event_sched_in(struct perf_event *event, struct perf_event_context *ctx) { u64 tstamp = perf_event_time(event); + int ret = 0; if (event->state <= PERF_EVENT_STATE_OFF) return 0; @@ -1674,10 +1679,13 @@ event_sched_in(struct perf_event *event, */ smp_wmb(); + perf_pmu_disable(event->pmu); + if (event->pmu->add(event, PERF_EF_START)) { event->state = PERF_EVENT_STATE_INACTIVE; event->oncpu = -1; - return -EAGAIN; + ret = -EAGAIN; + goto out; } event->tstamp_running += tstamp - event->tstamp_stopped; @@ -1693,7 +1701,10 @@ event_sched_in(struct perf_event *event, if (event->attr.exclusive) cpuctx->exclusive = 1; - return 0; +out: + perf_pmu_enable(event->pmu); + + return ret; } static int @@ -2743,6 +2754,8 @@ static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx, if (!event_filter_match(event)) continue; + perf_pmu_disable(event->pmu); + hwc = &event->hw; if (hwc->interrupts == MAX_INTERRUPTS) { @@ -2752,7 +2765,7 @@ static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx, } if (!event->attr.freq || !event->attr.sample_freq) - continue; + goto next; /* * stop the event and update event->count @@ -2774,6 +2787,8 @@ static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx, perf_adjust_period(event, period, delta, false); event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0); + next: + perf_pmu_enable(event->pmu); } perf_pmu_enable(ctx->pmu); diff --git a/kernel/fork.c b/kernel/fork.c index 728d5be9548..5721f0e3f2d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -537,6 +537,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) spin_lock_init(&mm->page_table_lock); mm_init_aio(mm); mm_init_owner(mm, p); + clear_tlb_flush_pending(mm); if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; diff --git a/kernel/freezer.c b/kernel/freezer.c index b462fa19751..aa6a8aadb91 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -19,6 +19,12 @@ EXPORT_SYMBOL(system_freezing_cnt); bool pm_freezing; bool pm_nosig_freezing; +/* + * Temporary export for the deadlock workaround in ata_scsi_hotplug(). + * Remove once the hack becomes unnecessary. + */ +EXPORT_SYMBOL_GPL(pm_freezing); + /* protects freezing and frozen transitions */ static DEFINE_SPINLOCK(freezer_lock); diff --git a/kernel/kexec.c b/kernel/kexec.c index d0d8fca5406..9c970167e40 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1680,6 +1680,7 @@ int kernel_kexec(void) { kexec_in_progress = true; kernel_restart_prepare(NULL); + migrate_to_reboot_cpu(); printk(KERN_EMERG "Starting new kernel\n"); machine_shutdown(); } diff --git a/kernel/power/console.c b/kernel/power/console.c index 463aa673675..eacb8bd8cab 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c @@ -81,6 +81,7 @@ void pm_vt_switch_unregister(struct device *dev) list_for_each_entry(tmp, &pm_vt_switch_list, head) { if (tmp->dev == dev) { list_del(&tmp->head); + kfree(tmp); break; } } diff --git a/kernel/reboot.c b/kernel/reboot.c index f813b347464..662c83fc16b 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -104,7 +104,7 @@ int unregister_reboot_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_reboot_notifier); -static void migrate_to_reboot_cpu(void) +void migrate_to_reboot_cpu(void) { /* The boot cpu is always logical cpu 0 */ int cpu = reboot_cpu; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 19af58f3a26..a88f4a485c5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4902,6 +4902,7 @@ DEFINE_PER_CPU(struct sched_domain *, sd_asym); static void update_top_cache_domain(int cpu) { struct sched_domain *sd; + struct sched_domain *busy_sd = NULL; int id = cpu; int size = 1; @@ -4909,9 +4910,9 @@ static void update_top_cache_domain(int cpu) if (sd) { id = cpumask_first(sched_domain_span(sd)); size = cpumask_weight(sched_domain_span(sd)); - sd = sd->parent; /* sd_busy */ + busy_sd = sd->parent; /* sd_busy */ } - rcu_assign_pointer(per_cpu(sd_busy, cpu), sd); + rcu_assign_pointer(per_cpu(sd_busy, cpu), busy_sd); rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); per_cpu(sd_llc_size, cpu) = size; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 9030da7bcb1..c7395d97e4c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1738,6 +1738,13 @@ void task_numa_work(struct callback_head *work) (vma->vm_file && (vma->vm_flags & (VM_READ|VM_WRITE)) == (VM_READ))) continue; + /* + * Skip inaccessible VMAs to avoid any confusion between + * PROT_NONE and NUMA hinting ptes + */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) + continue; + do { start = max(start, vma->vm_start); end = ALIGN(start + (pages << PAGE_SHIFT), HPAGE_SIZE); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 7d57275fc39..1c4065575fa 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -901,6 +901,13 @@ inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) { struct rq *rq = rq_of_rt_rq(rt_rq); +#ifdef CONFIG_RT_GROUP_SCHED + /* + * Change rq's cpupri only if rt_rq is the top queue. + */ + if (&rq->rt != rt_rq) + return; +#endif if (rq->online && prio < prev_prio) cpupri_set(&rq->rd->cpupri, rq->cpu, prio); } @@ -910,6 +917,13 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) { struct rq *rq = rq_of_rt_rq(rt_rq); +#ifdef CONFIG_RT_GROUP_SCHED + /* + * Change rq's cpupri only if rt_rq is the top queue. + */ + if (&rq->rt != rt_rq) + return; +#endif if (rq->online && rt_rq->highest_prio.curr != prev_prio) cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr); } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0e9f9eaade2..72a0f81dc5a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -775,7 +775,7 @@ static int ftrace_profile_init(void) int cpu; int ret = 0; - for_each_online_cpu(cpu) { + for_each_possible_cpu(cpu) { ret = ftrace_profile_init_cpu(cpu); if (ret) break; diff --git a/kernel/user.c b/kernel/user.c index a3a0dbfda32..c006131beb7 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -51,9 +51,9 @@ struct user_namespace init_user_ns = { .owner = GLOBAL_ROOT_UID, .group = GLOBAL_ROOT_GID, .proc_inum = PROC_USER_INIT_INO, -#ifdef CONFIG_KEYS_KERBEROS_CACHE - .krb_cache_register_sem = - __RWSEM_INITIALIZER(init_user_ns.krb_cache_register_sem), +#ifdef CONFIG_PERSISTENT_KEYRINGS + .persistent_keyring_register_sem = + __RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem), #endif }; EXPORT_SYMBOL_GPL(init_user_ns); diff --git a/mm/Kconfig b/mm/Kconfig index eb69f352401..723bbe04a0b 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -543,7 +543,7 @@ config ZSWAP config MEM_SOFT_DIRTY bool "Track memory changes" - depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY + depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS select PROC_PAGE_MONITOR help This option enables memory changes tracking by introducing a diff --git a/mm/compaction.c b/mm/compaction.c index 805165bcd3d..f58bcd016f4 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -134,6 +134,10 @@ static void update_pageblock_skip(struct compact_control *cc, bool migrate_scanner) { struct zone *zone = cc->zone; + + if (cc->ignore_skip_hint) + return; + if (!page) return; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 33a5dc49281..7de1bf85f68 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -882,6 +882,10 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, ret = 0; goto out_unlock; } + + /* mmap_sem prevents this happening but warn if that changes */ + WARN_ON(pmd_trans_migrating(pmd)); + if (unlikely(pmd_trans_splitting(pmd))) { /* split huge page running from under us */ spin_unlock(src_ptl); @@ -1243,6 +1247,10 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) return ERR_PTR(-EFAULT); + /* Full NUMA hinting faults to serialise migration in fault paths */ + if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) + goto out; + page = pmd_page(*pmd); VM_BUG_ON(!PageHead(page)); if (flags & FOLL_TOUCH) { @@ -1295,6 +1303,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(!pmd_same(pmd, *pmdp))) goto out_unlock; + /* + * If there are potential migrations, wait for completion and retry + * without disrupting NUMA hinting information. Do not relock and + * check_same as the page may no longer be mapped. + */ + if (unlikely(pmd_trans_migrating(*pmdp))) { + spin_unlock(ptl); + wait_migrate_huge_page(vma->anon_vma, pmdp); + goto out; + } + page = pmd_page(pmd); BUG_ON(is_huge_zero_page(page)); page_nid = page_to_nid(page); @@ -1323,23 +1342,22 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, /* If the page was locked, there are no parallel migrations */ if (page_locked) goto clear_pmdnuma; + } - /* - * Otherwise wait for potential migrations and retry. We do - * relock and check_same as the page may no longer be mapped. - * As the fault is being retried, do not account for it. - */ + /* Migration could have started since the pmd_trans_migrating check */ + if (!page_locked) { spin_unlock(ptl); wait_on_page_locked(page); page_nid = -1; goto out; } - /* Page is misplaced, serialise migrations and parallel THP splits */ + /* + * Page is misplaced. Page lock serialises migrations. Acquire anon_vma + * to serialises splits + */ get_page(page); spin_unlock(ptl); - if (!page_locked) - lock_page(page); anon_vma = page_lock_anon_vma_read(page); /* Confirm the PMD did not change while page_table_lock was released */ @@ -1351,6 +1369,13 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, goto out_unlock; } + /* Bail if we fail to protect against THP splits for any reason */ + if (unlikely(!anon_vma)) { + put_page(page); + page_nid = -1; + goto clear_pmdnuma; + } + /* * Migrate the THP to the requested node, returns with page unlocked * and pmd_numa cleared. @@ -1517,6 +1542,8 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, ret = 1; if (!prot_numa) { entry = pmdp_get_and_clear(mm, addr, pmd); + if (pmd_numa(entry)) + entry = pmd_mknonnuma(entry); entry = pmd_modify(entry, newprot); ret = HPAGE_PMD_NR; BUG_ON(pmd_write(entry)); @@ -1531,7 +1558,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, */ if (!is_huge_zero_page(page) && !pmd_numa(*pmd)) { - entry = pmdp_get_and_clear(mm, addr, pmd); + entry = *pmd; entry = pmd_mknuma(entry); ret = HPAGE_PMD_NR; } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index b7c171602ba..db08af92c6f 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1505,10 +1505,16 @@ static int soft_offline_huge_page(struct page *page, int flags) if (ret > 0) ret = -EIO; } else { - set_page_hwpoison_huge_page(hpage); - dequeue_hwpoisoned_huge_page(hpage); - atomic_long_add(1 << compound_order(hpage), - &num_poisoned_pages); + /* overcommit hugetlb page will be freed to buddy */ + if (PageHuge(page)) { + set_page_hwpoison_huge_page(hpage); + dequeue_hwpoisoned_huge_page(hpage); + atomic_long_add(1 << compound_order(hpage), + &num_poisoned_pages); + } else { + SetPageHWPoison(page); + atomic_long_inc(&num_poisoned_pages); + } } return ret; } diff --git a/mm/memory.c b/mm/memory.c index 5d9025f3b3e..6768ce9e57d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4271,7 +4271,7 @@ void copy_user_huge_page(struct page *dst, struct page *src, } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ -#if USE_SPLIT_PTE_PTLOCKS && BLOATED_SPINLOCKS +#if USE_SPLIT_PTE_PTLOCKS && ALLOC_SPLIT_PTLOCKS bool ptlock_alloc(struct page *page) { spinlock_t *ptl; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index eca4a312912..0cd2c4d4e27 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1197,14 +1197,16 @@ static struct page *new_vma_page(struct page *page, unsigned long private, int * break; vma = vma->vm_next; } + + if (PageHuge(page)) { + if (vma) + return alloc_huge_page_noerr(vma, address, 1); + else + return NULL; + } /* - * queue_pages_range() confirms that @page belongs to some vma, - * so vma shouldn't be NULL. + * if !vma, alloc_page_vma() will use task or system default policy */ - BUG_ON(!vma); - - if (PageHuge(page)) - return alloc_huge_page_noerr(vma, address, 1); return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); } #else @@ -1318,7 +1320,7 @@ static long do_mbind(unsigned long start, unsigned long len, if (nr_failed && (flags & MPOL_MF_STRICT)) err = -EIO; } else - putback_lru_pages(&pagelist); + putback_movable_pages(&pagelist); up_write(&mm->mmap_sem); mpol_out: diff --git a/mm/migrate.c b/mm/migrate.c index bb940045fe8..9194375b230 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -36,6 +36,7 @@ #include <linux/hugetlb_cgroup.h> #include <linux/gfp.h> #include <linux/balloon_compaction.h> +#include <linux/mmu_notifier.h> #include <asm/tlbflush.h> @@ -316,14 +317,15 @@ static inline bool buffer_migrate_lock_buffers(struct buffer_head *head, */ int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, - struct buffer_head *head, enum migrate_mode mode) + struct buffer_head *head, enum migrate_mode mode, + int extra_count) { - int expected_count = 0; + int expected_count = 1 + extra_count; void **pslot; if (!mapping) { /* Anonymous page without mapping */ - if (page_count(page) != 1) + if (page_count(page) != expected_count) return -EAGAIN; return MIGRATEPAGE_SUCCESS; } @@ -333,7 +335,7 @@ int migrate_page_move_mapping(struct address_space *mapping, pslot = radix_tree_lookup_slot(&mapping->page_tree, page_index(page)); - expected_count = 2 + page_has_private(page); + expected_count += 1 + page_has_private(page); if (page_count(page) != expected_count || radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) { spin_unlock_irq(&mapping->tree_lock); @@ -583,7 +585,7 @@ int migrate_page(struct address_space *mapping, BUG_ON(PageWriteback(page)); /* Writeback must be complete */ - rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode); + rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0); if (rc != MIGRATEPAGE_SUCCESS) return rc; @@ -610,7 +612,7 @@ int buffer_migrate_page(struct address_space *mapping, head = page_buffers(page); - rc = migrate_page_move_mapping(mapping, newpage, page, head, mode); + rc = migrate_page_move_mapping(mapping, newpage, page, head, mode, 0); if (rc != MIGRATEPAGE_SUCCESS) return rc; @@ -1654,6 +1656,18 @@ int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) return 1; } +bool pmd_trans_migrating(pmd_t pmd) +{ + struct page *page = pmd_page(pmd); + return PageLocked(page); +} + +void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd) +{ + struct page *page = pmd_page(*pmd); + wait_on_page_locked(page); +} + /* * Attempt to migrate a misplaced page to the specified destination * node. Caller is expected to have an elevated reference count on @@ -1716,12 +1730,14 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, struct page *page, int node) { spinlock_t *ptl; - unsigned long haddr = address & HPAGE_PMD_MASK; pg_data_t *pgdat = NODE_DATA(node); int isolated = 0; struct page *new_page = NULL; struct mem_cgroup *memcg = NULL; int page_lru = page_is_file_cache(page); + unsigned long mmun_start = address & HPAGE_PMD_MASK; + unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE; + pmd_t orig_entry; /* * Rate-limit the amount of data that is being migrated to a node. @@ -1744,6 +1760,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, goto out_fail; } + if (mm_tlb_flush_pending(mm)) + flush_tlb_range(vma, mmun_start, mmun_end); + /* Prepare a page as a migration target */ __set_page_locked(new_page); SetPageSwapBacked(new_page); @@ -1755,9 +1774,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, WARN_ON(PageLRU(new_page)); /* Recheck the target PMD */ + mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); ptl = pmd_lock(mm, pmd); - if (unlikely(!pmd_same(*pmd, entry))) { + if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) { +fail_putback: spin_unlock(ptl); + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); /* Reverse changes made by migrate_page_copy() */ if (TestClearPageActive(new_page)) @@ -1774,7 +1796,8 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, putback_lru_page(page); mod_zone_page_state(page_zone(page), NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR); - goto out_fail; + + goto out_unlock; } /* @@ -1786,16 +1809,35 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, */ mem_cgroup_prepare_migration(page, new_page, &memcg); + orig_entry = *pmd; entry = mk_pmd(new_page, vma->vm_page_prot); - entry = pmd_mknonnuma(entry); - entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); entry = pmd_mkhuge(entry); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); - pmdp_clear_flush(vma, haddr, pmd); - set_pmd_at(mm, haddr, pmd, entry); - page_add_new_anon_rmap(new_page, vma, haddr); + /* + * Clear the old entry under pagetable lock and establish the new PTE. + * Any parallel GUP will either observe the old page blocking on the + * page lock, block on the page table lock or observe the new page. + * The SetPageUptodate on the new page and page_add_new_anon_rmap + * guarantee the copy is visible before the pagetable update. + */ + flush_cache_range(vma, mmun_start, mmun_end); + page_add_new_anon_rmap(new_page, vma, mmun_start); + pmdp_clear_flush(vma, mmun_start, pmd); + set_pmd_at(mm, mmun_start, pmd, entry); + flush_tlb_range(vma, mmun_start, mmun_end); update_mmu_cache_pmd(vma, address, &entry); + + if (page_count(page) != 2) { + set_pmd_at(mm, mmun_start, pmd, orig_entry); + flush_tlb_range(vma, mmun_start, mmun_end); + update_mmu_cache_pmd(vma, address, &entry); + page_remove_rmap(new_page); + goto fail_putback; + } + page_remove_rmap(page); + /* * Finish the charge transaction under the page table lock to * prevent split_huge_page() from dividing up the charge @@ -1803,6 +1845,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, */ mem_cgroup_end_migration(memcg, page, new_page, true); spin_unlock(ptl); + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); unlock_page(new_page); unlock_page(page); @@ -1820,10 +1863,15 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, out_fail: count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); out_dropref: - entry = pmd_mknonnuma(entry); - set_pmd_at(mm, haddr, pmd, entry); - update_mmu_cache_pmd(vma, address, &entry); + ptl = pmd_lock(mm, pmd); + if (pmd_same(*pmd, entry)) { + entry = pmd_mknonnuma(entry); + set_pmd_at(mm, mmun_start, pmd, entry); + update_mmu_cache_pmd(vma, address, &entry); + } + spin_unlock(ptl); +out_unlock: unlock_page(page); put_page(page); return 0; diff --git a/mm/mprotect.c b/mm/mprotect.c index 26667971c82..bb53a6591ae 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -52,17 +52,21 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, pte_t ptent; bool updated = false; - ptent = ptep_modify_prot_start(mm, addr, pte); if (!prot_numa) { + ptent = ptep_modify_prot_start(mm, addr, pte); + if (pte_numa(ptent)) + ptent = pte_mknonnuma(ptent); ptent = pte_modify(ptent, newprot); updated = true; } else { struct page *page; + ptent = *pte; page = vm_normal_page(vma, addr, oldpte); if (page) { if (!pte_numa(oldpte)) { ptent = pte_mknuma(ptent); + set_pte_at(mm, addr, pte, ptent); updated = true; } } @@ -79,7 +83,10 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, if (updated) pages++; - ptep_modify_prot_commit(mm, addr, pte, ptent); + + /* Only !prot_numa always clears the pte */ + if (!prot_numa) + ptep_modify_prot_commit(mm, addr, pte, ptent); } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { swp_entry_t entry = pte_to_swp_entry(oldpte); @@ -181,6 +188,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma, BUG_ON(addr >= end); pgd = pgd_offset(mm, addr); flush_cache_range(vma, addr, end); + set_tlb_flush_pending(mm); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) @@ -192,6 +200,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma, /* Only flush the TLB if we actually modified any entries: */ if (pages) flush_tlb_range(vma, start, end); + clear_tlb_flush_pending(mm); return pages; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 580a5f075ed..5248fe070aa 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1816,7 +1816,7 @@ static void zlc_clear_zones_full(struct zonelist *zonelist) static bool zone_local(struct zone *local_zone, struct zone *zone) { - return node_distance(local_zone->node, zone->node) == LOCAL_DISTANCE; + return local_zone->node == zone->node; } static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) @@ -1913,18 +1913,17 @@ zonelist_scan: * page was allocated in should have no effect on the * time the page has in memory before being reclaimed. * - * When zone_reclaim_mode is enabled, try to stay in - * local zones in the fastpath. If that fails, the - * slowpath is entered, which will do another pass - * starting with the local zones, but ultimately fall - * back to remote zones that do not partake in the - * fairness round-robin cycle of this zonelist. + * Try to stay in local zones in the fastpath. If + * that fails, the slowpath is entered, which will do + * another pass starting with the local zones, but + * ultimately fall back to remote zones that do not + * partake in the fairness round-robin cycle of this + * zonelist. */ if (alloc_flags & ALLOC_WMARK_LOW) { if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) continue; - if (zone_reclaim_mode && - !zone_local(preferred_zone, zone)) + if (!zone_local(preferred_zone, zone)) continue; } /* @@ -2390,7 +2389,7 @@ static void prepare_slowpath(gfp_t gfp_mask, unsigned int order, * thrash fairness information for zones that are not * actually part of this zonelist's round-robin cycle. */ - if (zone_reclaim_mode && !zone_local(preferred_zone, zone)) + if (!zone_local(preferred_zone, zone)) continue; mod_zone_page_state(zone, NR_ALLOC_BATCH, high_wmark_pages(zone) - diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index cbb38545d9d..a8b91992593 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -110,9 +110,10 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma, pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { + struct mm_struct *mm = (vma)->vm_mm; pte_t pte; - pte = ptep_get_and_clear((vma)->vm_mm, address, ptep); - if (pte_accessible(pte)) + pte = ptep_get_and_clear(mm, address, ptep); + if (pte_accessible(mm, pte)) flush_tlb_page(vma, address); return pte; } @@ -191,6 +192,9 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { + pmd_t entry = *pmdp; + if (pmd_numa(entry)) + entry = pmd_mknonnuma(entry); set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(*pmdp)); flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); } diff --git a/mm/rmap.c b/mm/rmap.c index 55c8b8dc9ff..068522d8502 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -600,7 +600,11 @@ pte_t *__page_check_address(struct page *page, struct mm_struct *mm, spinlock_t *ptl; if (unlikely(PageHuge(page))) { + /* when pud is not present, pte will be NULL */ pte = huge_pte_offset(mm, address); + if (!pte) + return NULL; + ptl = huge_pte_lockptr(page_hstate(page), mm, pte); goto check; } diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 762896ebfcf..47c908f1f62 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -530,6 +530,23 @@ static const struct header_ops vlan_header_ops = { .parse = eth_header_parse, }; +static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + const void *daddr, const void *saddr, + unsigned int len) +{ + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + struct net_device *real_dev = vlan->real_dev; + + return dev_hard_header(skb, real_dev, type, daddr, saddr, len); +} + +static const struct header_ops vlan_passthru_header_ops = { + .create = vlan_passthru_hard_header, + .rebuild = dev_rebuild_header, + .parse = eth_header_parse, +}; + static struct device_type vlan_type = { .name = "vlan", }; @@ -573,7 +590,7 @@ static int vlan_dev_init(struct net_device *dev) dev->needed_headroom = real_dev->needed_headroom; if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) { - dev->header_ops = real_dev->header_ops; + dev->header_ops = &vlan_passthru_header_ops; dev->hard_header_len = real_dev->hard_header_len; } else { dev->header_ops = &vlan_header_ops; diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index a2b480a9087..b9c8a6eedf4 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -307,9 +307,9 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) hard_iface->bat_iv.ogm_buff = ogm_buff; batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; - batadv_ogm_packet->header.packet_type = BATADV_IV_OGM; - batadv_ogm_packet->header.version = BATADV_COMPAT_VERSION; - batadv_ogm_packet->header.ttl = 2; + batadv_ogm_packet->packet_type = BATADV_IV_OGM; + batadv_ogm_packet->version = BATADV_COMPAT_VERSION; + batadv_ogm_packet->ttl = 2; batadv_ogm_packet->flags = BATADV_NO_FLAGS; batadv_ogm_packet->reserved = 0; batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE; @@ -346,7 +346,7 @@ batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface) batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; batadv_ogm_packet->flags = BATADV_PRIMARIES_FIRST_HOP; - batadv_ogm_packet->header.ttl = BATADV_TTL; + batadv_ogm_packet->ttl = BATADV_TTL; } /* when do we schedule our own ogm to be sent */ @@ -435,7 +435,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, fwd_str, (packet_num > 0 ? "aggregated " : ""), batadv_ogm_packet->orig, ntohl(batadv_ogm_packet->seqno), - batadv_ogm_packet->tq, batadv_ogm_packet->header.ttl, + batadv_ogm_packet->tq, batadv_ogm_packet->ttl, (batadv_ogm_packet->flags & BATADV_DIRECTLINK ? "on" : "off"), hard_iface->net_dev->name, @@ -491,7 +491,7 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) /* multihomed peer assumed * non-primary OGMs are only broadcasted on their interface */ - if ((directlink && (batadv_ogm_packet->header.ttl == 1)) || + if ((directlink && (batadv_ogm_packet->ttl == 1)) || (forw_packet->own && (forw_packet->if_incoming != primary_if))) { /* FIXME: what about aggregated packets ? */ batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -499,7 +499,7 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) (forw_packet->own ? "Sending own" : "Forwarding"), batadv_ogm_packet->orig, ntohl(batadv_ogm_packet->seqno), - batadv_ogm_packet->header.ttl, + batadv_ogm_packet->ttl, forw_packet->if_incoming->net_dev->name, forw_packet->if_incoming->net_dev->dev_addr); @@ -572,7 +572,7 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, */ if ((!directlink) && (!(batadv_ogm_packet->flags & BATADV_DIRECTLINK)) && - (batadv_ogm_packet->header.ttl != 1) && + (batadv_ogm_packet->ttl != 1) && /* own packets originating non-primary * interfaces leave only that interface @@ -587,7 +587,7 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, * interface only - we still can aggregate */ if ((directlink) && - (new_bat_ogm_packet->header.ttl == 1) && + (new_bat_ogm_packet->ttl == 1) && (forw_packet->if_incoming == if_incoming) && /* packets from direct neighbors or @@ -778,7 +778,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); uint16_t tvlv_len; - if (batadv_ogm_packet->header.ttl <= 1) { + if (batadv_ogm_packet->ttl <= 1) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "ttl exceeded\n"); return; } @@ -798,7 +798,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, tvlv_len = ntohs(batadv_ogm_packet->tvlv_len); - batadv_ogm_packet->header.ttl--; + batadv_ogm_packet->ttl--; memcpy(batadv_ogm_packet->prev_sender, ethhdr->h_source, ETH_ALEN); /* apply hop penalty */ @@ -807,7 +807,7 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Forwarding packet: tq: %i, ttl: %i\n", - batadv_ogm_packet->tq, batadv_ogm_packet->header.ttl); + batadv_ogm_packet->tq, batadv_ogm_packet->ttl); /* switch of primaries first hop flag when forwarding */ batadv_ogm_packet->flags &= ~BATADV_PRIMARIES_FIRST_HOP; @@ -972,8 +972,8 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, spin_unlock_bh(&neigh_node->bat_iv.lq_update_lock); if (dup_status == BATADV_NO_DUP) { - orig_node->last_ttl = batadv_ogm_packet->header.ttl; - neigh_node->last_ttl = batadv_ogm_packet->header.ttl; + orig_node->last_ttl = batadv_ogm_packet->ttl; + neigh_node->last_ttl = batadv_ogm_packet->ttl; } batadv_bonding_candidate_add(bat_priv, orig_node, neigh_node); @@ -1247,7 +1247,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, * packet in an aggregation. Here we expect that the padding * is always zero (or not 0x01) */ - if (batadv_ogm_packet->header.packet_type != BATADV_IV_OGM) + if (batadv_ogm_packet->packet_type != BATADV_IV_OGM) return; /* could be changed by schedule_own_packet() */ @@ -1267,8 +1267,8 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, if_incoming->net_dev->dev_addr, batadv_ogm_packet->orig, batadv_ogm_packet->prev_sender, ntohl(batadv_ogm_packet->seqno), batadv_ogm_packet->tq, - batadv_ogm_packet->header.ttl, - batadv_ogm_packet->header.version, has_directlink_flag); + batadv_ogm_packet->ttl, + batadv_ogm_packet->version, has_directlink_flag); rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { @@ -1433,7 +1433,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, * seqno and similar ttl as the non-duplicate */ sameseq = orig_node->last_real_seqno == ntohl(batadv_ogm_packet->seqno); - similar_ttl = orig_node->last_ttl - 3 <= batadv_ogm_packet->header.ttl; + similar_ttl = orig_node->last_ttl - 3 <= batadv_ogm_packet->ttl; if (is_bidirect && ((dup_status == BATADV_NO_DUP) || (sameseq && similar_ttl))) batadv_iv_ogm_orig_update(bat_priv, orig_node, ethhdr, diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 6c8c3934bd7..b316a4cb6f1 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -349,7 +349,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - switch (unicast_4addr_packet->u.header.packet_type) { + switch (unicast_4addr_packet->u.packet_type) { case BATADV_UNICAST: batadv_dbg(BATADV_DBG_DAT, bat_priv, "* encapsulated within a UNICAST packet\n"); @@ -374,7 +374,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, break; default: batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: Unknown (%u)!\n", - unicast_4addr_packet->u.header.packet_type); + unicast_4addr_packet->u.packet_type); } break; case BATADV_BCAST: @@ -387,7 +387,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, default: batadv_dbg(BATADV_DBG_DAT, bat_priv, "* encapsulated within an unknown packet type (0x%x)\n", - unicast_4addr_packet->u.header.packet_type); + unicast_4addr_packet->u.packet_type); } } diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 271d321b3a0..6ddb6145ffb 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -355,7 +355,7 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb, batadv_add_counter(bat_priv, BATADV_CNT_FRAG_FWD_BYTES, skb->len + ETH_HLEN); - packet->header.ttl--; + packet->ttl--; batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); ret = true; @@ -444,9 +444,9 @@ bool batadv_frag_send_packet(struct sk_buff *skb, goto out_err; /* Create one header to be copied to all fragments */ - frag_header.header.packet_type = BATADV_UNICAST_FRAG; - frag_header.header.version = BATADV_COMPAT_VERSION; - frag_header.header.ttl = BATADV_TTL; + frag_header.packet_type = BATADV_UNICAST_FRAG; + frag_header.version = BATADV_COMPAT_VERSION; + frag_header.ttl = BATADV_TTL; frag_header.seqno = htons(atomic_inc_return(&bat_priv->frag_seqno)); frag_header.reserved = 0; frag_header.no = 0; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 29ae4efe354..130cc3217e2 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -194,7 +194,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, goto free_skb; } - if (icmp_header->header.packet_type != BATADV_ICMP) { + if (icmp_header->packet_type != BATADV_ICMP) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Error - can't send packet from char device: got bogus packet type (expected: BAT_ICMP)\n"); len = -EINVAL; @@ -243,9 +243,9 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, icmp_header->uid = socket_client->index; - if (icmp_header->header.version != BATADV_COMPAT_VERSION) { + if (icmp_header->version != BATADV_COMPAT_VERSION) { icmp_header->msg_type = BATADV_PARAMETER_PROBLEM; - icmp_header->header.version = BATADV_COMPAT_VERSION; + icmp_header->version = BATADV_COMPAT_VERSION; batadv_socket_add_packet(socket_client, icmp_header, packet_len); goto free_skb; diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index c51a5e568f0..1511f64a6ce 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -383,17 +383,17 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, batadv_ogm_packet = (struct batadv_ogm_packet *)skb->data; - if (batadv_ogm_packet->header.version != BATADV_COMPAT_VERSION) { + if (batadv_ogm_packet->version != BATADV_COMPAT_VERSION) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: incompatible batman version (%i)\n", - batadv_ogm_packet->header.version); + batadv_ogm_packet->version); goto err_free; } /* all receive handlers return whether they received or reused * the supplied skb. if not, we have to free the skb. */ - idx = batadv_ogm_packet->header.packet_type; + idx = batadv_ogm_packet->packet_type; ret = (*batadv_rx_handler[idx])(skb, hard_iface); if (ret == NET_RX_DROP) @@ -426,8 +426,8 @@ static void batadv_recv_handler_init(void) BUILD_BUG_ON(offsetof(struct batadv_unicast_packet, dest) != 4); BUILD_BUG_ON(offsetof(struct batadv_unicast_tvlv_packet, dst) != 4); BUILD_BUG_ON(offsetof(struct batadv_frag_packet, dest) != 4); - BUILD_BUG_ON(offsetof(struct batadv_icmp_packet, icmph.dst) != 4); - BUILD_BUG_ON(offsetof(struct batadv_icmp_packet_rr, icmph.dst) != 4); + BUILD_BUG_ON(offsetof(struct batadv_icmp_packet, dst) != 4); + BUILD_BUG_ON(offsetof(struct batadv_icmp_packet_rr, dst) != 4); /* broadcast packet */ batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet; @@ -1119,9 +1119,9 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, uint8_t *src, skb_reserve(skb, ETH_HLEN); tvlv_buff = skb_put(skb, sizeof(*unicast_tvlv_packet) + tvlv_len); unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)tvlv_buff; - unicast_tvlv_packet->header.packet_type = BATADV_UNICAST_TVLV; - unicast_tvlv_packet->header.version = BATADV_COMPAT_VERSION; - unicast_tvlv_packet->header.ttl = BATADV_TTL; + unicast_tvlv_packet->packet_type = BATADV_UNICAST_TVLV; + unicast_tvlv_packet->version = BATADV_COMPAT_VERSION; + unicast_tvlv_packet->ttl = BATADV_TTL; unicast_tvlv_packet->reserved = 0; unicast_tvlv_packet->tvlv_len = htons(tvlv_len); unicast_tvlv_packet->align = 0; diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 351e199bc0a..511d7e1eea3 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -722,7 +722,7 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv, { if (orig_node->last_real_seqno != ntohl(ogm_packet->seqno)) return false; - if (orig_node->last_ttl != ogm_packet->header.ttl + 1) + if (orig_node->last_ttl != ogm_packet->ttl + 1) return false; if (!batadv_compare_eth(ogm_packet->orig, ogm_packet->prev_sender)) return false; @@ -1082,9 +1082,9 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, coded_packet = (struct batadv_coded_packet *)skb_dest->data; skb_reset_mac_header(skb_dest); - coded_packet->header.packet_type = BATADV_CODED; - coded_packet->header.version = BATADV_COMPAT_VERSION; - coded_packet->header.ttl = packet1->header.ttl; + coded_packet->packet_type = BATADV_CODED; + coded_packet->version = BATADV_COMPAT_VERSION; + coded_packet->ttl = packet1->ttl; /* Info about first unicast packet */ memcpy(coded_packet->first_source, first_source, ETH_ALEN); @@ -1097,7 +1097,7 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, memcpy(coded_packet->second_source, second_source, ETH_ALEN); memcpy(coded_packet->second_orig_dest, packet2->dest, ETH_ALEN); coded_packet->second_crc = packet_id2; - coded_packet->second_ttl = packet2->header.ttl; + coded_packet->second_ttl = packet2->ttl; coded_packet->second_ttvn = packet2->ttvn; coded_packet->coded_len = htons(coding_len); @@ -1452,7 +1452,7 @@ bool batadv_nc_skb_forward(struct sk_buff *skb, /* We only handle unicast packets */ payload = skb_network_header(skb); packet = (struct batadv_unicast_packet *)payload; - if (packet->header.packet_type != BATADV_UNICAST) + if (packet->packet_type != BATADV_UNICAST) goto out; /* Try to find a coding opportunity and send the skb if one is found */ @@ -1505,7 +1505,7 @@ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, /* Check for supported packet type */ payload = skb_network_header(skb); packet = (struct batadv_unicast_packet *)payload; - if (packet->header.packet_type != BATADV_UNICAST) + if (packet->packet_type != BATADV_UNICAST) goto out; /* Find existing nc_path or create a new */ @@ -1623,7 +1623,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, ttvn = coded_packet_tmp.second_ttvn; } else { orig_dest = coded_packet_tmp.first_orig_dest; - ttl = coded_packet_tmp.header.ttl; + ttl = coded_packet_tmp.ttl; ttvn = coded_packet_tmp.first_ttvn; } @@ -1648,9 +1648,9 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, /* Create decoded unicast packet */ unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_packet->header.packet_type = BATADV_UNICAST; - unicast_packet->header.version = BATADV_COMPAT_VERSION; - unicast_packet->header.ttl = ttl; + unicast_packet->packet_type = BATADV_UNICAST; + unicast_packet->version = BATADV_COMPAT_VERSION; + unicast_packet->ttl = ttl; memcpy(unicast_packet->dest, orig_dest, ETH_ALEN); unicast_packet->ttvn = ttvn; diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 207459b6296..2dd8f242255 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -155,6 +155,7 @@ enum batadv_tvlv_type { BATADV_TVLV_ROAM = 0x05, }; +#pragma pack(2) /* the destination hardware field in the ARP frame is used to * transport the claim type and the group id */ @@ -163,24 +164,20 @@ struct batadv_bla_claim_dst { uint8_t type; /* bla_claimframe */ __be16 group; /* group id */ }; - -struct batadv_header { - uint8_t packet_type; - uint8_t version; /* batman version field */ - uint8_t ttl; - /* the parent struct has to add a byte after the header to make - * everything 4 bytes aligned again - */ -}; +#pragma pack() /** * struct batadv_ogm_packet - ogm (routing protocol) packet - * @header: common batman packet header + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @flags: contains routing relevant flags - see enum batadv_iv_flags * @tvlv_len: length of tvlv data following the ogm header */ struct batadv_ogm_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; uint8_t flags; __be32 seqno; uint8_t orig[ETH_ALEN]; @@ -196,29 +193,51 @@ struct batadv_ogm_packet { #define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet) /** - * batadv_icmp_header - common ICMP header - * @header: common batman header + * batadv_icmp_header - common members among all the ICMP packets + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @msg_type: ICMP packet type * @dst: address of the destination node * @orig: address of the source node * @uid: local ICMP socket identifier + * @align: not used - useful for alignment purposes only + * + * This structure is used for ICMP packets parsing only and it is never sent + * over the wire. The alignment field at the end is there to ensure that + * members are padded the same way as they are in real packets. */ struct batadv_icmp_header { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; uint8_t msg_type; /* see ICMP message types above */ uint8_t dst[ETH_ALEN]; uint8_t orig[ETH_ALEN]; uint8_t uid; + uint8_t align[3]; }; /** * batadv_icmp_packet - ICMP packet - * @icmph: common ICMP header + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @msg_type: ICMP packet type + * @dst: address of the destination node + * @orig: address of the source node + * @uid: local ICMP socket identifier * @reserved: not used - useful for alignment * @seqno: ICMP sequence number */ struct batadv_icmp_packet { - struct batadv_icmp_header icmph; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; + uint8_t msg_type; /* see ICMP message types above */ + uint8_t dst[ETH_ALEN]; + uint8_t orig[ETH_ALEN]; + uint8_t uid; uint8_t reserved; __be16 seqno; }; @@ -227,13 +246,25 @@ struct batadv_icmp_packet { /** * batadv_icmp_packet_rr - ICMP RouteRecord packet - * @icmph: common ICMP header + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @msg_type: ICMP packet type + * @dst: address of the destination node + * @orig: address of the source node + * @uid: local ICMP socket identifier * @rr_cur: number of entries the rr array * @seqno: ICMP sequence number * @rr: route record array */ struct batadv_icmp_packet_rr { - struct batadv_icmp_header icmph; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; + uint8_t msg_type; /* see ICMP message types above */ + uint8_t dst[ETH_ALEN]; + uint8_t orig[ETH_ALEN]; + uint8_t uid; uint8_t rr_cur; __be16 seqno; uint8_t rr[BATADV_RR_LEN][ETH_ALEN]; @@ -253,8 +284,18 @@ struct batadv_icmp_packet_rr { */ #pragma pack(2) +/** + * struct batadv_unicast_packet - unicast packet for network payload + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @ttvn: translation table version number + * @dest: originator destination of the unicast packet + */ struct batadv_unicast_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; + uint8_t ttl; uint8_t ttvn; /* destination translation table version number */ uint8_t dest[ETH_ALEN]; /* "4 bytes boundary + 2 bytes" long to make the payload after the @@ -280,7 +321,9 @@ struct batadv_unicast_4addr_packet { /** * struct batadv_frag_packet - fragmented packet - * @header: common batman packet header with type, compatversion, and ttl + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @dest: final destination used when routing fragments * @orig: originator of the fragment used when merging the packet * @no: fragment number within this sequence @@ -289,7 +332,9 @@ struct batadv_unicast_4addr_packet { * @total_size: size of the merged packet */ struct batadv_frag_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t ttl; #if defined(__BIG_ENDIAN_BITFIELD) uint8_t no:4; uint8_t reserved:4; @@ -305,8 +350,19 @@ struct batadv_frag_packet { __be16 total_size; }; +/** + * struct batadv_bcast_packet - broadcast packet for network payload + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @reserved: reserved byte for alignment + * @seqno: sequence identification + * @orig: originator of the broadcast packet + */ struct batadv_bcast_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t ttl; uint8_t reserved; __be32 seqno; uint8_t orig[ETH_ALEN]; @@ -315,11 +371,11 @@ struct batadv_bcast_packet { */ }; -#pragma pack() - /** * struct batadv_coded_packet - network coded packet - * @header: common batman packet header and ttl of first included packet + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @reserved: Align following fields to 2-byte boundaries * @first_source: original source of first included packet * @first_orig_dest: original destinal of first included packet @@ -334,7 +390,9 @@ struct batadv_bcast_packet { * @coded_len: length of network coded part of the payload */ struct batadv_coded_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t ttl; uint8_t first_ttvn; /* uint8_t first_dest[ETH_ALEN]; - saved in mac header destination */ uint8_t first_source[ETH_ALEN]; @@ -349,9 +407,13 @@ struct batadv_coded_packet { __be16 coded_len; }; +#pragma pack() + /** * struct batadv_unicast_tvlv - generic unicast packet with tvlv payload - * @header: common batman packet header + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header * @reserved: reserved field (for packet alignment) * @src: address of the source * @dst: address of the destination @@ -359,7 +421,9 @@ struct batadv_coded_packet { * @align: 2 bytes to align the header to a 4 byte boundry */ struct batadv_unicast_tvlv_packet { - struct batadv_header header; + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t ttl; uint8_t reserved; uint8_t dst[ETH_ALEN]; uint8_t src[ETH_ALEN]; @@ -420,13 +484,13 @@ struct batadv_tvlv_tt_vlan_data { * struct batadv_tvlv_tt_change - translation table diff data * @flags: status indicators concerning the non-mesh client (see * batadv_tt_client_flags) - * @reserved: reserved field + * @reserved: reserved field - useful for alignment purposes only * @addr: mac address of non-mesh client that triggered this tt change * @vid: VLAN identifier */ struct batadv_tvlv_tt_change { uint8_t flags; - uint8_t reserved; + uint8_t reserved[3]; uint8_t addr[ETH_ALEN]; __be16 vid; }; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index d4114d775ad..46278bfb8fd 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -308,7 +308,7 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, memcpy(icmph->dst, icmph->orig, ETH_ALEN); memcpy(icmph->orig, primary_if->net_dev->dev_addr, ETH_ALEN); icmph->msg_type = BATADV_ECHO_REPLY; - icmph->header.ttl = BATADV_TTL; + icmph->ttl = BATADV_TTL; res = batadv_send_skb_to_orig(skb, orig_node, NULL); if (res != NET_XMIT_DROP) @@ -338,9 +338,9 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet = (struct batadv_icmp_packet *)skb->data; /* send TTL exceeded if packet is an echo request (traceroute) */ - if (icmp_packet->icmph.msg_type != BATADV_ECHO_REQUEST) { + if (icmp_packet->msg_type != BATADV_ECHO_REQUEST) { pr_debug("Warning - can't forward icmp packet from %pM to %pM: ttl exceeded\n", - icmp_packet->icmph.orig, icmp_packet->icmph.dst); + icmp_packet->orig, icmp_packet->dst); goto out; } @@ -349,7 +349,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, goto out; /* get routing information */ - orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.orig); + orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->orig); if (!orig_node) goto out; @@ -359,11 +359,11 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet = (struct batadv_icmp_packet *)skb->data; - memcpy(icmp_packet->icmph.dst, icmp_packet->icmph.orig, ETH_ALEN); - memcpy(icmp_packet->icmph.orig, primary_if->net_dev->dev_addr, + memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); + memcpy(icmp_packet->orig, primary_if->net_dev->dev_addr, ETH_ALEN); - icmp_packet->icmph.msg_type = BATADV_TTL_EXCEEDED; - icmp_packet->icmph.header.ttl = BATADV_TTL; + icmp_packet->msg_type = BATADV_TTL_EXCEEDED; + icmp_packet->ttl = BATADV_TTL; if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; @@ -434,7 +434,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, return batadv_recv_my_icmp_packet(bat_priv, skb); /* TTL exceeded */ - if (icmph->header.ttl < 2) + if (icmph->ttl < 2) return batadv_recv_icmp_ttl_exceeded(bat_priv, skb); /* get routing information */ @@ -449,7 +449,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, icmph = (struct batadv_icmp_header *)skb->data; /* decrement ttl */ - icmph->header.ttl--; + icmph->ttl--; /* route it */ if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP) @@ -709,7 +709,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, unicast_packet = (struct batadv_unicast_packet *)skb->data; /* TTL exceeded */ - if (unicast_packet->header.ttl < 2) { + if (unicast_packet->ttl < 2) { pr_debug("Warning - can't forward unicast packet from %pM to %pM: ttl exceeded\n", ethhdr->h_source, unicast_packet->dest); goto out; @@ -727,9 +727,9 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, /* decrement ttl */ unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_packet->header.ttl--; + unicast_packet->ttl--; - switch (unicast_packet->header.packet_type) { + switch (unicast_packet->packet_type) { case BATADV_UNICAST_4ADDR: hdr_len = sizeof(struct batadv_unicast_4addr_packet); break; @@ -970,7 +970,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, unicast_packet = (struct batadv_unicast_packet *)skb->data; unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - is4addr = unicast_packet->header.packet_type == BATADV_UNICAST_4ADDR; + is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR; /* the caller function should have already pulled 2 bytes */ if (is4addr) hdr_size = sizeof(*unicast_4addr_packet); @@ -1160,7 +1160,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, if (batadv_is_my_mac(bat_priv, bcast_packet->orig)) goto out; - if (bcast_packet->header.ttl < 2) + if (bcast_packet->ttl < 2) goto out; orig_node = batadv_orig_hash_find(bat_priv, bcast_packet->orig); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index c83be5ebaa2..fba4dcfcfac 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -161,11 +161,11 @@ batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size, return false; unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_packet->header.version = BATADV_COMPAT_VERSION; + unicast_packet->version = BATADV_COMPAT_VERSION; /* batman packet type: unicast */ - unicast_packet->header.packet_type = BATADV_UNICAST; + unicast_packet->packet_type = BATADV_UNICAST; /* set unicast ttl */ - unicast_packet->header.ttl = BATADV_TTL; + unicast_packet->ttl = BATADV_TTL; /* copy the destination for faster routing */ memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN); /* set the destination tt version number */ @@ -221,7 +221,7 @@ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv, goto out; uc_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - uc_4addr_packet->u.header.packet_type = BATADV_UNICAST_4ADDR; + uc_4addr_packet->u.packet_type = BATADV_UNICAST_4ADDR; memcpy(uc_4addr_packet->src, primary_if->net_dev->dev_addr, ETH_ALEN); uc_4addr_packet->subtype = packet_subtype; uc_4addr_packet->reserved = 0; @@ -436,7 +436,7 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, /* as we have a copy now, it is safe to decrease the TTL */ bcast_packet = (struct batadv_bcast_packet *)newskb->data; - bcast_packet->header.ttl--; + bcast_packet->ttl--; skb_reset_mac_header(newskb); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 36f050876f8..a8f99d1486c 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -264,11 +264,11 @@ static int batadv_interface_tx(struct sk_buff *skb, goto dropped; bcast_packet = (struct batadv_bcast_packet *)skb->data; - bcast_packet->header.version = BATADV_COMPAT_VERSION; - bcast_packet->header.ttl = BATADV_TTL; + bcast_packet->version = BATADV_COMPAT_VERSION; + bcast_packet->ttl = BATADV_TTL; /* batman packet type: broadcast */ - bcast_packet->header.packet_type = BATADV_BCAST; + bcast_packet->packet_type = BATADV_BCAST; bcast_packet->reserved = 0; /* hw address of first interface is the orig mac because only @@ -328,7 +328,7 @@ void batadv_interface_rx(struct net_device *soft_iface, struct sk_buff *skb, struct batadv_hard_iface *recv_if, int hdr_size, struct batadv_orig_node *orig_node) { - struct batadv_header *batadv_header = (struct batadv_header *)skb->data; + struct batadv_bcast_packet *batadv_bcast_packet; struct batadv_priv *bat_priv = netdev_priv(soft_iface); __be16 ethertype = htons(ETH_P_BATMAN); struct vlan_ethhdr *vhdr; @@ -336,7 +336,8 @@ void batadv_interface_rx(struct net_device *soft_iface, unsigned short vid; bool is_bcast; - is_bcast = (batadv_header->packet_type == BATADV_BCAST); + batadv_bcast_packet = (struct batadv_bcast_packet *)skb->data; + is_bcast = (batadv_bcast_packet->packet_type == BATADV_BCAST); /* check if enough space is available for pulling, and pull */ if (!pskb_may_pull(skb, hdr_size)) @@ -345,7 +346,12 @@ void batadv_interface_rx(struct net_device *soft_iface, skb_pull_rcsum(skb, hdr_size); skb_reset_mac_header(skb); - vid = batadv_get_vid(skb, hdr_size); + /* clean the netfilter state now that the batman-adv header has been + * removed + */ + nf_reset(skb); + + vid = batadv_get_vid(skb, 0); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 06506e6f900..19bc42f8b8b 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -333,7 +333,8 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, return; tt_change_node->change.flags = flags; - tt_change_node->change.reserved = 0; + memset(tt_change_node->change.reserved, 0, + sizeof(tt_change_node->change.reserved)); memcpy(tt_change_node->change.addr, common->addr, ETH_ALEN); tt_change_node->change.vid = htons(common->vid); @@ -2221,7 +2222,8 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, ETH_ALEN); tt_change->flags = tt_common_entry->flags; tt_change->vid = htons(tt_common_entry->vid); - tt_change->reserved = 0; + memset(tt_change->reserved, 0, + sizeof(tt_change->reserved)); tt_num_entries++; tt_change++; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 6a6c8bb4fd7..7552f9e3089 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -940,8 +940,22 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, bt_cb(skb)->pkt_type = *((unsigned char *) skb->data); skb_pull(skb, 1); - if (hci_pi(sk)->channel == HCI_CHANNEL_RAW && - bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) { + if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { + /* No permission check is needed for user channel + * since that gets enforced when binding the socket. + * + * However check that the packet type is valid. + */ + if (bt_cb(skb)->pkt_type != HCI_COMMAND_PKT && + bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && + bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) { + err = -EINVAL; + goto drop; + } + + skb_queue_tail(&hdev->raw_q, skb); + queue_work(hdev->workqueue, &hdev->tx_work); + } else if (bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) { u16 opcode = get_unaligned_le16(skb->data); u16 ogf = hci_opcode_ogf(opcode); u16 ocf = hci_opcode_ocf(opcode); @@ -972,14 +986,6 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto drop; } - if (hci_pi(sk)->channel == HCI_CHANNEL_USER && - bt_cb(skb)->pkt_type != HCI_COMMAND_PKT && - bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && - bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) { - err = -EINVAL; - goto drop; - } - skb_queue_tail(&hdev->raw_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 4c214b2b88e..ef66365b735 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1998,7 +1998,7 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val) u32 old; struct net_bridge_mdb_htable *mdb; - spin_lock(&br->multicast_lock); + spin_lock_bh(&br->multicast_lock); if (!netif_running(br->dev)) goto unlock; @@ -2030,7 +2030,7 @@ rollback: } unlock: - spin_unlock(&br->multicast_lock); + spin_unlock_bh(&br->multicast_lock); return err; } diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 3b9637fb793..8dac65552f1 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -49,53 +49,51 @@ static ssize_t store_bridge_parm(struct device *d, } -static ssize_t show_forward_delay(struct device *d, +static ssize_t forward_delay_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); } -static ssize_t store_forward_delay(struct device *d, +static ssize_t forward_delay_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_set_forward_delay); } -static DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR, - show_forward_delay, store_forward_delay); +static DEVICE_ATTR_RW(forward_delay); -static ssize_t show_hello_time(struct device *d, struct device_attribute *attr, +static ssize_t hello_time_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%lu\n", jiffies_to_clock_t(to_bridge(d)->hello_time)); } -static ssize_t store_hello_time(struct device *d, +static ssize_t hello_time_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_set_hello_time); } -static DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time, - store_hello_time); +static DEVICE_ATTR_RW(hello_time); -static ssize_t show_max_age(struct device *d, struct device_attribute *attr, +static ssize_t max_age_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%lu\n", jiffies_to_clock_t(to_bridge(d)->max_age)); } -static ssize_t store_max_age(struct device *d, struct device_attribute *attr, +static ssize_t max_age_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_set_max_age); } -static DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age, store_max_age); +static DEVICE_ATTR_RW(max_age); -static ssize_t show_ageing_time(struct device *d, +static ssize_t ageing_time_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -108,16 +106,15 @@ static int set_ageing_time(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_ageing_time(struct device *d, +static ssize_t ageing_time_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_ageing_time); } -static DEVICE_ATTR(ageing_time, S_IRUGO | S_IWUSR, show_ageing_time, - store_ageing_time); +static DEVICE_ATTR_RW(ageing_time); -static ssize_t show_stp_state(struct device *d, +static ssize_t stp_state_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -125,7 +122,7 @@ static ssize_t show_stp_state(struct device *d, } -static ssize_t store_stp_state(struct device *d, +static ssize_t stp_state_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { @@ -147,20 +144,21 @@ static ssize_t store_stp_state(struct device *d, return len; } -static DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state, - store_stp_state); +static DEVICE_ATTR_RW(stp_state); -static ssize_t show_group_fwd_mask(struct device *d, - struct device_attribute *attr, char *buf) +static ssize_t group_fwd_mask_show(struct device *d, + struct device_attribute *attr, + char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%#x\n", br->group_fwd_mask); } -static ssize_t store_group_fwd_mask(struct device *d, - struct device_attribute *attr, const char *buf, - size_t len) +static ssize_t group_fwd_mask_store(struct device *d, + struct device_attribute *attr, + const char *buf, + size_t len) { struct net_bridge *br = to_bridge(d); char *endp; @@ -180,10 +178,9 @@ static ssize_t store_group_fwd_mask(struct device *d, return len; } -static DEVICE_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask, - store_group_fwd_mask); +static DEVICE_ATTR_RW(group_fwd_mask); -static ssize_t show_priority(struct device *d, struct device_attribute *attr, +static ssize_t priority_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -197,93 +194,91 @@ static int set_priority(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_priority(struct device *d, struct device_attribute *attr, - const char *buf, size_t len) +static ssize_t priority_store(struct device *d, struct device_attribute *attr, + const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_priority); } -static DEVICE_ATTR(priority, S_IRUGO | S_IWUSR, show_priority, store_priority); +static DEVICE_ATTR_RW(priority); -static ssize_t show_root_id(struct device *d, struct device_attribute *attr, +static ssize_t root_id_show(struct device *d, struct device_attribute *attr, char *buf) { return br_show_bridge_id(buf, &to_bridge(d)->designated_root); } -static DEVICE_ATTR(root_id, S_IRUGO, show_root_id, NULL); +static DEVICE_ATTR_RO(root_id); -static ssize_t show_bridge_id(struct device *d, struct device_attribute *attr, +static ssize_t bridge_id_show(struct device *d, struct device_attribute *attr, char *buf) { return br_show_bridge_id(buf, &to_bridge(d)->bridge_id); } -static DEVICE_ATTR(bridge_id, S_IRUGO, show_bridge_id, NULL); +static DEVICE_ATTR_RO(bridge_id); -static ssize_t show_root_port(struct device *d, struct device_attribute *attr, +static ssize_t root_port_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", to_bridge(d)->root_port); } -static DEVICE_ATTR(root_port, S_IRUGO, show_root_port, NULL); +static DEVICE_ATTR_RO(root_port); -static ssize_t show_root_path_cost(struct device *d, +static ssize_t root_path_cost_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", to_bridge(d)->root_path_cost); } -static DEVICE_ATTR(root_path_cost, S_IRUGO, show_root_path_cost, NULL); +static DEVICE_ATTR_RO(root_path_cost); -static ssize_t show_topology_change(struct device *d, +static ssize_t topology_change_show(struct device *d, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", to_bridge(d)->topology_change); } -static DEVICE_ATTR(topology_change, S_IRUGO, show_topology_change, NULL); +static DEVICE_ATTR_RO(topology_change); -static ssize_t show_topology_change_detected(struct device *d, +static ssize_t topology_change_detected_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->topology_change_detected); } -static DEVICE_ATTR(topology_change_detected, S_IRUGO, - show_topology_change_detected, NULL); +static DEVICE_ATTR_RO(topology_change_detected); -static ssize_t show_hello_timer(struct device *d, +static ssize_t hello_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->hello_timer)); } -static DEVICE_ATTR(hello_timer, S_IRUGO, show_hello_timer, NULL); +static DEVICE_ATTR_RO(hello_timer); -static ssize_t show_tcn_timer(struct device *d, struct device_attribute *attr, +static ssize_t tcn_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->tcn_timer)); } -static DEVICE_ATTR(tcn_timer, S_IRUGO, show_tcn_timer, NULL); +static DEVICE_ATTR_RO(tcn_timer); -static ssize_t show_topology_change_timer(struct device *d, +static ssize_t topology_change_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->topology_change_timer)); } -static DEVICE_ATTR(topology_change_timer, S_IRUGO, show_topology_change_timer, - NULL); +static DEVICE_ATTR_RO(topology_change_timer); -static ssize_t show_gc_timer(struct device *d, struct device_attribute *attr, +static ssize_t gc_timer_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->gc_timer)); } -static DEVICE_ATTR(gc_timer, S_IRUGO, show_gc_timer, NULL); +static DEVICE_ATTR_RO(gc_timer); -static ssize_t show_group_addr(struct device *d, +static ssize_t group_addr_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -293,7 +288,7 @@ static ssize_t show_group_addr(struct device *d, br->group_addr[4], br->group_addr[5]); } -static ssize_t store_group_addr(struct device *d, +static ssize_t group_addr_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { @@ -324,10 +319,9 @@ static ssize_t store_group_addr(struct device *d, return len; } -static DEVICE_ATTR(group_addr, S_IRUGO | S_IWUSR, - show_group_addr, store_group_addr); +static DEVICE_ATTR_RW(group_addr); -static ssize_t store_flush(struct device *d, +static ssize_t flush_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { @@ -339,26 +333,25 @@ static ssize_t store_flush(struct device *d, br_fdb_flush(br); return len; } -static DEVICE_ATTR(flush, S_IWUSR, NULL, store_flush); +static DEVICE_ATTR_WO(flush); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING -static ssize_t show_multicast_router(struct device *d, +static ssize_t multicast_router_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->multicast_router); } -static ssize_t store_multicast_router(struct device *d, +static ssize_t multicast_router_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_multicast_set_router); } -static DEVICE_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router, - store_multicast_router); +static DEVICE_ATTR_RW(multicast_router); -static ssize_t show_multicast_snooping(struct device *d, +static ssize_t multicast_snooping_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -366,18 +359,17 @@ static ssize_t show_multicast_snooping(struct device *d, return sprintf(buf, "%d\n", !br->multicast_disabled); } -static ssize_t store_multicast_snooping(struct device *d, +static ssize_t multicast_snooping_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_multicast_toggle); } -static DEVICE_ATTR(multicast_snooping, S_IRUGO | S_IWUSR, - show_multicast_snooping, store_multicast_snooping); +static DEVICE_ATTR_RW(multicast_snooping); -static ssize_t show_multicast_query_use_ifaddr(struct device *d, - struct device_attribute *attr, - char *buf) +static ssize_t multicast_query_use_ifaddr_show(struct device *d, + struct device_attribute *attr, + char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->multicast_query_use_ifaddr); @@ -390,17 +382,15 @@ static int set_query_use_ifaddr(struct net_bridge *br, unsigned long val) } static ssize_t -store_multicast_query_use_ifaddr(struct device *d, +multicast_query_use_ifaddr_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_query_use_ifaddr); } -static DEVICE_ATTR(multicast_query_use_ifaddr, S_IRUGO | S_IWUSR, - show_multicast_query_use_ifaddr, - store_multicast_query_use_ifaddr); +static DEVICE_ATTR_RW(multicast_query_use_ifaddr); -static ssize_t show_multicast_querier(struct device *d, +static ssize_t multicast_querier_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -408,16 +398,15 @@ static ssize_t show_multicast_querier(struct device *d, return sprintf(buf, "%d\n", br->multicast_querier); } -static ssize_t store_multicast_querier(struct device *d, +static ssize_t multicast_querier_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_multicast_set_querier); } -static DEVICE_ATTR(multicast_querier, S_IRUGO | S_IWUSR, - show_multicast_querier, store_multicast_querier); +static DEVICE_ATTR_RW(multicast_querier); -static ssize_t show_hash_elasticity(struct device *d, +static ssize_t hash_elasticity_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -430,31 +419,29 @@ static int set_elasticity(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_hash_elasticity(struct device *d, +static ssize_t hash_elasticity_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_elasticity); } -static DEVICE_ATTR(hash_elasticity, S_IRUGO | S_IWUSR, show_hash_elasticity, - store_hash_elasticity); +static DEVICE_ATTR_RW(hash_elasticity); -static ssize_t show_hash_max(struct device *d, struct device_attribute *attr, +static ssize_t hash_max_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); return sprintf(buf, "%u\n", br->hash_max); } -static ssize_t store_hash_max(struct device *d, struct device_attribute *attr, +static ssize_t hash_max_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_multicast_set_hash_max); } -static DEVICE_ATTR(hash_max, S_IRUGO | S_IWUSR, show_hash_max, - store_hash_max); +static DEVICE_ATTR_RW(hash_max); -static ssize_t show_multicast_last_member_count(struct device *d, +static ssize_t multicast_last_member_count_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -468,17 +455,15 @@ static int set_last_member_count(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_last_member_count(struct device *d, +static ssize_t multicast_last_member_count_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_last_member_count); } -static DEVICE_ATTR(multicast_last_member_count, S_IRUGO | S_IWUSR, - show_multicast_last_member_count, - store_multicast_last_member_count); +static DEVICE_ATTR_RW(multicast_last_member_count); -static ssize_t show_multicast_startup_query_count( +static ssize_t multicast_startup_query_count_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -491,17 +476,15 @@ static int set_startup_query_count(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_startup_query_count( +static ssize_t multicast_startup_query_count_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_startup_query_count); } -static DEVICE_ATTR(multicast_startup_query_count, S_IRUGO | S_IWUSR, - show_multicast_startup_query_count, - store_multicast_startup_query_count); +static DEVICE_ATTR_RW(multicast_startup_query_count); -static ssize_t show_multicast_last_member_interval( +static ssize_t multicast_last_member_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -515,17 +498,15 @@ static int set_last_member_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_last_member_interval( +static ssize_t multicast_last_member_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_last_member_interval); } -static DEVICE_ATTR(multicast_last_member_interval, S_IRUGO | S_IWUSR, - show_multicast_last_member_interval, - store_multicast_last_member_interval); +static DEVICE_ATTR_RW(multicast_last_member_interval); -static ssize_t show_multicast_membership_interval( +static ssize_t multicast_membership_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -539,17 +520,15 @@ static int set_membership_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_membership_interval( +static ssize_t multicast_membership_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_membership_interval); } -static DEVICE_ATTR(multicast_membership_interval, S_IRUGO | S_IWUSR, - show_multicast_membership_interval, - store_multicast_membership_interval); +static DEVICE_ATTR_RW(multicast_membership_interval); -static ssize_t show_multicast_querier_interval(struct device *d, +static ssize_t multicast_querier_interval_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -564,17 +543,15 @@ static int set_querier_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_querier_interval(struct device *d, +static ssize_t multicast_querier_interval_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_querier_interval); } -static DEVICE_ATTR(multicast_querier_interval, S_IRUGO | S_IWUSR, - show_multicast_querier_interval, - store_multicast_querier_interval); +static DEVICE_ATTR_RW(multicast_querier_interval); -static ssize_t show_multicast_query_interval(struct device *d, +static ssize_t multicast_query_interval_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -589,17 +566,15 @@ static int set_query_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_query_interval(struct device *d, +static ssize_t multicast_query_interval_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_query_interval); } -static DEVICE_ATTR(multicast_query_interval, S_IRUGO | S_IWUSR, - show_multicast_query_interval, - store_multicast_query_interval); +static DEVICE_ATTR_RW(multicast_query_interval); -static ssize_t show_multicast_query_response_interval( +static ssize_t multicast_query_response_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -614,17 +589,15 @@ static int set_query_response_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_query_response_interval( +static ssize_t multicast_query_response_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_query_response_interval); } -static DEVICE_ATTR(multicast_query_response_interval, S_IRUGO | S_IWUSR, - show_multicast_query_response_interval, - store_multicast_query_response_interval); +static DEVICE_ATTR_RW(multicast_query_response_interval); -static ssize_t show_multicast_startup_query_interval( +static ssize_t multicast_startup_query_interval_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -639,18 +612,16 @@ static int set_startup_query_interval(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_multicast_startup_query_interval( +static ssize_t multicast_startup_query_interval_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_startup_query_interval); } -static DEVICE_ATTR(multicast_startup_query_interval, S_IRUGO | S_IWUSR, - show_multicast_startup_query_interval, - store_multicast_startup_query_interval); +static DEVICE_ATTR_RW(multicast_startup_query_interval); #endif #ifdef CONFIG_BRIDGE_NETFILTER -static ssize_t show_nf_call_iptables( +static ssize_t nf_call_iptables_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -663,16 +634,15 @@ static int set_nf_call_iptables(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_nf_call_iptables( +static ssize_t nf_call_iptables_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_nf_call_iptables); } -static DEVICE_ATTR(nf_call_iptables, S_IRUGO | S_IWUSR, - show_nf_call_iptables, store_nf_call_iptables); +static DEVICE_ATTR_RW(nf_call_iptables); -static ssize_t show_nf_call_ip6tables( +static ssize_t nf_call_ip6tables_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -685,16 +655,15 @@ static int set_nf_call_ip6tables(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_nf_call_ip6tables( +static ssize_t nf_call_ip6tables_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_nf_call_ip6tables); } -static DEVICE_ATTR(nf_call_ip6tables, S_IRUGO | S_IWUSR, - show_nf_call_ip6tables, store_nf_call_ip6tables); +static DEVICE_ATTR_RW(nf_call_ip6tables); -static ssize_t show_nf_call_arptables( +static ssize_t nf_call_arptables_show( struct device *d, struct device_attribute *attr, char *buf) { struct net_bridge *br = to_bridge(d); @@ -707,17 +676,16 @@ static int set_nf_call_arptables(struct net_bridge *br, unsigned long val) return 0; } -static ssize_t store_nf_call_arptables( +static ssize_t nf_call_arptables_store( struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, set_nf_call_arptables); } -static DEVICE_ATTR(nf_call_arptables, S_IRUGO | S_IWUSR, - show_nf_call_arptables, store_nf_call_arptables); +static DEVICE_ATTR_RW(nf_call_arptables); #endif #ifdef CONFIG_BRIDGE_VLAN_FILTERING -static ssize_t show_vlan_filtering(struct device *d, +static ssize_t vlan_filtering_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -725,14 +693,13 @@ static ssize_t show_vlan_filtering(struct device *d, return sprintf(buf, "%d\n", br->vlan_enabled); } -static ssize_t store_vlan_filtering(struct device *d, +static ssize_t vlan_filtering_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { return store_bridge_parm(d, buf, len, br_vlan_filter_toggle); } -static DEVICE_ATTR(vlan_filtering, S_IRUGO | S_IWUSR, - show_vlan_filtering, store_vlan_filtering); +static DEVICE_ATTR_RW(vlan_filtering); #endif static struct attribute *bridge_attrs[] = { diff --git a/net/core/dev.c b/net/core/dev.c index 153ee2f8c33..ce01847793c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3846,6 +3846,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff skb_gro_reset_offset(skb); gro_list_prepare(napi, skb); + NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */ rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { @@ -3922,6 +3923,31 @@ normal: goto pull; } +struct packet_offload *gro_find_receive_by_type(__be16 type) +{ + struct list_head *offload_head = &offload_base; + struct packet_offload *ptype; + + list_for_each_entry_rcu(ptype, offload_head, list) { + if (ptype->type != type || !ptype->callbacks.gro_receive) + continue; + return ptype; + } + return NULL; +} + +struct packet_offload *gro_find_complete_by_type(__be16 type) +{ + struct list_head *offload_head = &offload_base; + struct packet_offload *ptype; + + list_for_each_entry_rcu(ptype, offload_head, list) { + if (ptype->type != type || !ptype->callbacks.gro_complete) + continue; + return ptype; + } + return NULL; +} static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { @@ -4446,7 +4472,7 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, { struct netdev_adjacent *upper; - WARN_ON_ONCE(!rcu_read_lock_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held()); upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); @@ -5783,13 +5809,8 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_SOFT_FEATURES; dev->wanted_features = dev->features & dev->hw_features; - /* Turn on no cache copy if HW is doing checksum */ if (!(dev->flags & IFF_LOOPBACK)) { dev->hw_features |= NETIF_F_NOCACHE_COPY; - if (dev->features & NETIF_F_ALL_CSUM) { - dev->wanted_features |= NETIF_F_NOCACHE_COPY; - dev->features |= NETIF_F_NOCACHE_COPY; - } } /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a666740051d..ea97361f0e9 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1298,7 +1298,7 @@ int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb) if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, skb->len) < 0 && - dev->header_ops->rebuild(skb)) + dev_rebuild_header(skb)) return 0; return dev_queue_xmit(skb); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 8f971990677..30309787463 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -386,8 +386,14 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, !vlan_hw_offload_capable(netif_skb_features(skb), skb->vlan_proto)) { skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)); - if (unlikely(!skb)) - break; + if (unlikely(!skb)) { + /* This is actually a packet drop, but we + * don't want the code at the end of this + * function to try and re-queue a NULL skb. + */ + status = NETDEV_TX_OK; + goto unlock_txq; + } skb->vlan_tci = 0; } @@ -395,6 +401,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, if (status == NETDEV_TX_OK) txq_trans_update(txq); } + unlock_txq: __netif_tx_unlock(txq); if (status == NETDEV_TX_OK) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index eb96c2c2240..1d641e781f8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2121,6 +2121,91 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, } EXPORT_SYMBOL(skb_copy_and_csum_bits); + /** + * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy() + * @from: source buffer + * + * Calculates the amount of linear headroom needed in the 'to' skb passed + * into skb_zerocopy(). + */ +unsigned int +skb_zerocopy_headlen(const struct sk_buff *from) +{ + unsigned int hlen = 0; + + if (!from->head_frag || + skb_headlen(from) < L1_CACHE_BYTES || + skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) + hlen = skb_headlen(from); + + if (skb_has_frag_list(from)) + hlen = from->len; + + return hlen; +} +EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); + +/** + * skb_zerocopy - Zero copy skb to skb + * @to: destination buffer + * @source: source buffer + * @len: number of bytes to copy from source buffer + * @hlen: size of linear headroom in destination buffer + * + * Copies up to `len` bytes from `from` to `to` by creating references + * to the frags in the source buffer. + * + * The `hlen` as calculated by skb_zerocopy_headlen() specifies the + * headroom in the `to` buffer. + */ +void +skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) +{ + int i, j = 0; + int plen = 0; /* length of skb->head fragment */ + struct page *page; + unsigned int offset; + + BUG_ON(!from->head_frag && !hlen); + + /* dont bother with small payloads */ + if (len <= skb_tailroom(to)) { + skb_copy_bits(from, 0, skb_put(to, len), len); + return; + } + + if (hlen) { + skb_copy_bits(from, 0, skb_put(to, hlen), hlen); + len -= hlen; + } else { + plen = min_t(int, skb_headlen(from), len); + if (plen) { + page = virt_to_head_page(from->head); + offset = from->data - (unsigned char *)page_address(page); + __skb_fill_page_desc(to, 0, page, offset, plen); + get_page(page); + j = 1; + len -= plen; + } + } + + to->truesize += len + plen; + to->len += len + plen; + to->data_len += len + plen; + + for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { + if (!len) + break; + skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; + skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); + len -= skb_shinfo(to)->frags[j].size; + skb_frag_ref(to, j); + j++; + } + skb_shinfo(to)->nr_frags = j; +} +EXPORT_SYMBOL_GPL(skb_zerocopy); + void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) { __wsum csum; diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 4c6bdf97a65..595ddf0459d 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -152,17 +152,6 @@ static const struct file_operations dccpprobe_fops = { .llseek = noop_llseek, }; -static __init int setup_jprobe(void) -{ - int ret = register_jprobe(&dccp_send_probe); - - if (ret) { - request_module("dccp"); - ret = register_jprobe(&dccp_send_probe); - } - return ret; -} - static __init int dccpprobe_init(void) { int ret = -ENOMEM; @@ -174,7 +163,13 @@ static __init int dccpprobe_init(void) if (!proc_create(procname, S_IRUSR, init_net.proc_net, &dccpprobe_fops)) goto err0; - ret = setup_jprobe(); + ret = register_jprobe(&dccp_send_probe); + if (ret) { + ret = request_module("dccp"); + if (!ret) + ret = register_jprobe(&dccp_send_probe); + } + if (ret) goto err1; diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 459e200c08a..a2d2456a557 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -547,7 +547,7 @@ static int lowpan_header_create(struct sk_buff *skb, hc06_ptr += 3; } else { /* compress nothing */ - memcpy(hc06_ptr, &hdr, 4); + memcpy(hc06_ptr, hdr, 4); /* replace the top byte with new ECN | DSCP format */ *hc06_ptr = tmp; hc06_ptr += 4; diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4b81e91c80f..f8c49ce5b28 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ - inet_fragment.o ping.o ip_tunnel_core.o + inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o @@ -19,7 +19,7 @@ obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o obj-$(CONFIG_IP_MROUTE) += ipmr.o obj-$(CONFIG_NET_IPIP) += ipip.o -gre-y := gre_demux.o gre_offload.o +gre-y := gre_demux.o obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o obj-$(CONFIG_NET_IPGRE) += ip_gre.o obj-$(CONFIG_NET_IPVTI) += ip_vti.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index b8bc1a3d5cf..6268a4751e6 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1391,9 +1391,15 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, NAPI_GRO_CB(p)->flush |= (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | - (__force int)((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)) | - ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); + ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)); + /* Save the IP ID check to be included later when we get to + * the transport layer so only the inner most IP ID is checked. + * This is because some GSO/TSO implementations do not + * correctly increment the IP ID for the outer hdrs. + */ + NAPI_GRO_CB(p)->flush_id = + ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 0feebd5de29..9809f7b6972 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1385,6 +1385,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); + ipv4_devconf_setall(in_dev); + neigh_parms_data_state_setall(in_dev->arp_parms); inet_insert_ifa(ifa); } } diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 5893e99e829..1863422fb7d 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -355,14 +355,7 @@ static int __init gre_init(void) goto err_gre; } - if (gre_offload_init()) { - pr_err("can't add protocol offload\n"); - goto err_gso; - } - return 0; -err_gso: - gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); err_gre: inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); err: @@ -371,8 +364,6 @@ err: static void __exit gre_exit(void) { - gre_offload_exit(); - gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); } diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index e5d43618846..746a7b10d43 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -28,6 +28,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, netdev_features_t enc_features; int ghl = GRE_HEADER_SECTION; struct gre_base_hdr *greh; + u16 mac_offset = skb->mac_header; int mac_len = skb->mac_len; __be16 protocol = skb->protocol; int tnl_hlen; @@ -58,13 +59,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, } else csum = false; + if (unlikely(!pskb_may_pull(skb, ghl))) + goto out; + /* setup inner skb. */ skb->protocol = greh->protocol; skb->encapsulation = 0; - if (unlikely(!pskb_may_pull(skb, ghl))) - goto out; - __skb_pull(skb, ghl); skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); @@ -73,8 +74,10 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); segs = skb_mac_gso_segment(skb, enc_features); - if (!segs || IS_ERR(segs)) + if (!segs || IS_ERR(segs)) { + skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len); goto out; + } skb = segs; tnl_hlen = skb_tnl_header_len(skb); @@ -113,19 +116,182 @@ out: return segs; } +/* Compute the whole skb csum in s/w and store it, then verify GRO csum + * starting from gro_offset. + */ +static __sum16 gro_skb_checksum(struct sk_buff *skb) +{ + __sum16 sum; + + skb->csum = skb_checksum(skb, 0, skb->len, 0); + NAPI_GRO_CB(skb)->csum = csum_sub(skb->csum, + csum_partial(skb->data, skb_gro_offset(skb), 0)); + sum = csum_fold(NAPI_GRO_CB(skb)->csum); + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) { + if (unlikely(!sum)) + netdev_rx_csum_fault(skb->dev); + } else + skb->ip_summed = CHECKSUM_COMPLETE; + + return sum; +} + +static struct sk_buff **gre_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct sk_buff **pp = NULL; + struct sk_buff *p; + const struct gre_base_hdr *greh; + unsigned int hlen, grehlen; + unsigned int off; + int flush = 1; + struct packet_offload *ptype; + __be16 type; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*greh); + greh = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + greh = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!greh)) + goto out; + } + + /* Only support version 0 and K (key), C (csum) flags. Note that + * although the support for the S (seq#) flag can be added easily + * for GRO, this is problematic for GSO hence can not be enabled + * here because a GRO pkt may end up in the forwarding path, thus + * requiring GSO support to break it up correctly. + */ + if ((greh->flags & ~(GRE_KEY|GRE_CSUM)) != 0) + goto out; + + type = greh->protocol; + + rcu_read_lock(); + ptype = gro_find_receive_by_type(type); + if (ptype == NULL) + goto out_unlock; + + grehlen = GRE_HEADER_SECTION; + + if (greh->flags & GRE_KEY) + grehlen += GRE_HEADER_SECTION; + + if (greh->flags & GRE_CSUM) + grehlen += GRE_HEADER_SECTION; + + hlen = off + grehlen; + if (skb_gro_header_hard(skb, hlen)) { + greh = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!greh)) + goto out_unlock; + } + if (greh->flags & GRE_CSUM) { /* Need to verify GRE csum first */ + __sum16 csum = 0; + + if (skb->ip_summed == CHECKSUM_COMPLETE) + csum = csum_fold(NAPI_GRO_CB(skb)->csum); + /* Don't trust csum error calculated/reported by h/w */ + if (skb->ip_summed == CHECKSUM_NONE || csum != 0) + csum = gro_skb_checksum(skb); + + /* GRE CSUM is the 1's complement of the 1's complement sum + * of the GRE hdr plus payload so it should add up to 0xffff + * (and 0 after csum_fold()) just like the IPv4 hdr csum. + */ + if (csum) + goto out_unlock; + } + flush = 0; + + for (p = *head; p; p = p->next) { + const struct gre_base_hdr *greh2; + + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + /* The following checks are needed to ensure only pkts + * from the same tunnel are considered for aggregation. + * The criteria for "the same tunnel" includes: + * 1) same version (we only support version 0 here) + * 2) same protocol (we only support ETH_P_IP for now) + * 3) same set of flags + * 4) same key if the key field is present. + */ + greh2 = (struct gre_base_hdr *)(p->data + off); + + if (greh2->flags != greh->flags || + greh2->protocol != greh->protocol) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + if (greh->flags & GRE_KEY) { + /* compare keys */ + if (*(__be32 *)(greh2+1) != *(__be32 *)(greh+1)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + } + } + + skb_gro_pull(skb, grehlen); + + /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/ + skb_gro_postpull_rcsum(skb, greh, grehlen); + + pp = ptype->callbacks.gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +int gre_gro_complete(struct sk_buff *skb, int nhoff) +{ + struct gre_base_hdr *greh = (struct gre_base_hdr *)(skb->data + nhoff); + struct packet_offload *ptype; + unsigned int grehlen = sizeof(*greh); + int err = -ENOENT; + __be16 type; + + type = greh->protocol; + if (greh->flags & GRE_KEY) + grehlen += GRE_HEADER_SECTION; + + if (greh->flags & GRE_CSUM) + grehlen += GRE_HEADER_SECTION; + + rcu_read_lock(); + ptype = gro_find_complete_by_type(type); + if (ptype != NULL) + err = ptype->callbacks.gro_complete(skb, nhoff + grehlen); + + rcu_read_unlock(); + return err; +} + static const struct net_offload gre_offload = { .callbacks = { .gso_send_check = gre_gso_send_check, .gso_segment = gre_gso_segment, + .gro_receive = gre_gro_receive, + .gro_complete = gre_gro_complete, }, }; -int __init gre_offload_init(void) +static int __init gre_offload_init(void) { return inet_add_offload(&gre_offload, IPPROTO_GRE); } -void __exit gre_offload_exit(void) +static void __exit gre_offload_exit(void) { inet_del_offload(&gre_offload, IPPROTO_GRE); } + +module_init(gre_offload_init); +module_exit(gre_offload_exit); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 56a964a553d..a0f52dac894 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -106,6 +106,10 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->id.idiag_sport = inet->inet_sport; r->id.idiag_dport = inet->inet_dport; + + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + r->id.idiag_src[0] = inet->inet_rcv_saddr; r->id.idiag_dst[0] = inet->inet_daddr; @@ -240,12 +244,19 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r->idiag_family = tw->tw_family; r->idiag_retrans = 0; + r->id.idiag_if = tw->tw_bound_dev_if; sock_diag_save_cookie(tw, r->id.idiag_cookie); + r->id.idiag_sport = tw->tw_sport; r->id.idiag_dport = tw->tw_dport; + + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + r->id.idiag_src[0] = tw->tw_rcv_saddr; r->id.idiag_dst[0] = tw->tw_daddr; + r->idiag_state = tw->tw_substate; r->idiag_timer = 3; r->idiag_expires = jiffies_to_msecs(tmo); @@ -726,8 +737,13 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, r->id.idiag_sport = inet->inet_sport; r->id.idiag_dport = ireq->ir_rmt_port; + + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + r->id.idiag_src[0] = ireq->ir_loc_addr; r->id.idiag_dst[0] = ireq->ir_rmt_addr; + r->idiag_expires = jiffies_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d7aea4c5b94..e560ef34cf4 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -217,6 +217,7 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) iph->saddr, iph->daddr, tpi->key); if (tunnel) { + skb_pop_mac_header(skb); ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error); return PACKET_RCVD; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 912402752f2..df184616493 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -828,7 +828,7 @@ static int __ip_append_data(struct sock *sk, if (cork->length + length > maxnonfragsize - fragheaderlen) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, - mtu-exthdrlen); + mtu - (opt ? opt->optlen : 0)); return -EMSGSIZE; } @@ -1151,7 +1151,8 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, mtu : 0xFFFF; if (cork->length + size > maxnonfragsize - fragheaderlen) { - ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, mtu); + ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, + mtu - (opt ? opt->optlen : 0)); return -EMSGSIZE; } diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 40d56073cd1..81c6910cfa9 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -39,23 +39,33 @@ config NF_CONNTRACK_PROC_COMPAT config NF_TABLES_IPV4 depends on NF_TABLES tristate "IPv4 nf_tables support" - -config NFT_REJECT_IPV4 - depends on NF_TABLES_IPV4 - tristate "nf_tables IPv4 reject support" + help + This option enables the IPv4 support for nf_tables. config NFT_CHAIN_ROUTE_IPV4 depends on NF_TABLES_IPV4 tristate "IPv4 nf_tables route chain support" + help + This option enables the "route" chain for IPv4 in nf_tables. This + chain type is used to force packet re-routing after mangling header + fields such as the source, destination, type of service and + the packet mark. config NFT_CHAIN_NAT_IPV4 depends on NF_TABLES_IPV4 depends on NF_NAT_IPV4 && NFT_NAT tristate "IPv4 nf_tables nat chain support" + help + This option enables the "nat" chain for IPv4 in nf_tables. This + chain type is used to perform Network Address Translation (NAT) + packet transformations such as the source, destination address and + source and destination ports. config NF_TABLES_ARP depends on NF_TABLES tristate "ARP nf_tables support" + help + This option enables the ARP support for nf_tables. config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 19df72b7ba8..c16be9d5842 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -28,7 +28,6 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o -obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index b969131ad1c..5b6e0df4ccf 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -17,10 +17,6 @@ #include <linux/udp.h> #include <linux/icmp.h> #include <net/icmp.h> -#include <net/ip.h> -#include <net/tcp.h> -#include <net/route.h> -#include <net/dst.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_REJECT.h> @@ -28,128 +24,12 @@ #include <linux/netfilter_bridge.h> #endif +#include <net/netfilter/ipv4/nf_reject.h> + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4"); -/* Send RST reply */ -static void send_reset(struct sk_buff *oldskb, int hook) -{ - struct sk_buff *nskb; - const struct iphdr *oiph; - struct iphdr *niph; - const struct tcphdr *oth; - struct tcphdr _otcph, *tcph; - - /* IP header checks: fragment. */ - if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) - return; - - oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), - sizeof(_otcph), &_otcph); - if (oth == NULL) - return; - - /* No RST for RST. */ - if (oth->rst) - return; - - if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) - return; - - /* Check checksum */ - if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) - return; - oiph = ip_hdr(oldskb); - - nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + - LL_MAX_HEADER, GFP_ATOMIC); - if (!nskb) - return; - - skb_reserve(nskb, LL_MAX_HEADER); - - skb_reset_network_header(nskb); - niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); - niph->version = 4; - niph->ihl = sizeof(struct iphdr) / 4; - niph->tos = 0; - niph->id = 0; - niph->frag_off = htons(IP_DF); - niph->protocol = IPPROTO_TCP; - niph->check = 0; - niph->saddr = oiph->daddr; - niph->daddr = oiph->saddr; - - skb_reset_transport_header(nskb); - tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); - memset(tcph, 0, sizeof(*tcph)); - tcph->source = oth->dest; - tcph->dest = oth->source; - tcph->doff = sizeof(struct tcphdr) / 4; - - if (oth->ack) - tcph->seq = oth->ack_seq; - else { - tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + - oldskb->len - ip_hdrlen(oldskb) - - (oth->doff << 2)); - tcph->ack = 1; - } - - tcph->rst = 1; - tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr, - niph->daddr, 0); - nskb->ip_summed = CHECKSUM_PARTIAL; - nskb->csum_start = (unsigned char *)tcph - nskb->head; - nskb->csum_offset = offsetof(struct tcphdr, check); - - /* ip_route_me_harder expects skb->dst to be set */ - skb_dst_set_noref(nskb, skb_dst(oldskb)); - - nskb->protocol = htons(ETH_P_IP); - if (ip_route_me_harder(nskb, RTN_UNSPEC)) - goto free_nskb; - - niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); - - /* "Never happens" */ - if (nskb->len > dst_mtu(skb_dst(nskb))) - goto free_nskb; - - nf_ct_attach(nskb, oldskb); - -#ifdef CONFIG_BRIDGE_NETFILTER - /* If we use ip_local_out for bridged traffic, the MAC source on - * the RST will be ours, instead of the destination's. This confuses - * some routers/firewalls, and they drop the packet. So we need to - * build the eth header using the original destination's MAC as the - * source, and send the RST packet directly. - */ - if (oldskb->nf_bridge) { - struct ethhdr *oeth = eth_hdr(oldskb); - nskb->dev = oldskb->nf_bridge->physindev; - niph->tot_len = htons(nskb->len); - ip_send_check(niph); - if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), - oeth->h_source, oeth->h_dest, nskb->len) < 0) - goto free_nskb; - dev_queue_xmit(nskb); - } else -#endif - ip_local_out(nskb); - - return; - - free_nskb: - kfree_skb(nskb); -} - -static inline void send_unreach(struct sk_buff *skb_in, int code) -{ - icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); -} - static unsigned int reject_tg(struct sk_buff *skb, const struct xt_action_param *par) { @@ -157,28 +37,28 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par) switch (reject->with) { case IPT_ICMP_NET_UNREACHABLE: - send_unreach(skb, ICMP_NET_UNREACH); + nf_send_unreach(skb, ICMP_NET_UNREACH); break; case IPT_ICMP_HOST_UNREACHABLE: - send_unreach(skb, ICMP_HOST_UNREACH); + nf_send_unreach(skb, ICMP_HOST_UNREACH); break; case IPT_ICMP_PROT_UNREACHABLE: - send_unreach(skb, ICMP_PROT_UNREACH); + nf_send_unreach(skb, ICMP_PROT_UNREACH); break; case IPT_ICMP_PORT_UNREACHABLE: - send_unreach(skb, ICMP_PORT_UNREACH); + nf_send_unreach(skb, ICMP_PORT_UNREACH); break; case IPT_ICMP_NET_PROHIBITED: - send_unreach(skb, ICMP_NET_ANO); + nf_send_unreach(skb, ICMP_NET_ANO); break; case IPT_ICMP_HOST_PROHIBITED: - send_unreach(skb, ICMP_HOST_ANO); + nf_send_unreach(skb, ICMP_HOST_ANO); break; case IPT_ICMP_ADMIN_PROHIBITED: - send_unreach(skb, ICMP_PKT_FILTERED); + nf_send_unreach(skb, ICMP_PKT_FILTERED); break; case IPT_TCP_RESET: - send_reset(skb, par->hooknum); + nf_send_reset(skb, par->hooknum); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d099f9a055c..e2a40515e66 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -381,7 +381,7 @@ void tcp_init_sock(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - skb_queue_head_init(&tp->out_of_order_queue); + __skb_queue_head_init(&tp->out_of_order_queue); tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); INIT_LIST_HEAD(&tp->tsq_node); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 97b68415986..3aa9e324799 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -425,7 +425,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); - skb_queue_head_init(&newtp->out_of_order_queue); + __skb_queue_head_init(&newtp->out_of_order_queue); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; newtp->rx_opt.saw_tstamp = 0; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 2658a27f540..771a3950d87 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -197,7 +197,8 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) goto out_check_final; found: - flush = NAPI_GRO_CB(p)->flush; + /* Include the IP ID check below from the inner most IP hdr */ + flush = NAPI_GRO_CB(p)->flush | NAPI_GRO_CB(p)->flush_id; flush |= (__force int)(flags & TCP_FLAG_CWR); flush |= (__force int)((flags ^ tcp_flag_word(th2)) & ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); @@ -230,7 +231,7 @@ out_check_final: pp = head; out: - NAPI_GRO_CB(skb)->flush |= flush; + NAPI_GRO_CB(skb)->flush |= (flush != 0); return pp; } @@ -280,7 +281,7 @@ static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff * if (NAPI_GRO_CB(skb)->flush) goto skip_csum; - wsum = skb->csum; + wsum = NAPI_GRO_CB(skb)->csum; switch (skb->ip_summed) { case CHECKSUM_NONE: diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d5d24ecde6a..80f649fbee6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2478,6 +2478,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); + u16 mac_offset = skb->mac_header; int mac_len = skb->mac_len; int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); __be16 protocol = skb->protocol; @@ -2497,8 +2498,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); segs = skb_mac_gso_segment(skb, enc_features); - if (!segs || IS_ERR(segs)) + if (!segs || IS_ERR(segs)) { + skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, + mac_len); goto out; + } outer_hlen = skb_tnl_header_len(skb); skb = segs; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 83206de2bc7..79c62bdcd3c 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -41,6 +41,14 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; + int offset; + __wsum csum; + + if (skb->encapsulation && + skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) { + segs = skb_udp_tunnel_segment(skb, features); + goto out; + } mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) @@ -63,27 +71,20 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; } + /* Do software UFO. Complete and fill in the UDP checksum as + * HW cannot do checksum of UDP packets sent as multiple + * IP fragments. + */ + offset = skb_checksum_start_offset(skb); + csum = skb_checksum(skb, offset, skb->len - offset, 0); + offset += skb->csum_offset; + *(__sum16 *)(skb->data + offset) = csum_fold(csum); + skb->ip_summed = CHECKSUM_NONE; + /* Fragment the skb. IP headers of the fragments are updated in * inet_gso_segment() */ - if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) - segs = skb_udp_tunnel_segment(skb, features); - else { - int offset; - __wsum csum; - - /* Do software UFO. Complete and fill in the UDP checksum as - * HW cannot do checksum of UDP packets sent as multiple - * IP fragments. - */ - offset = skb_checksum_start_offset(skb); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - offset += skb->csum_offset; - *(__sum16 *)(skb->data + offset) = csum_fold(csum); - skb->ip_summed = CHECKSUM_NONE; - - segs = skb_segment(skb, features); - } + segs = skb_segment(skb, features); out: return segs; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6c1634507ec..31f75ea9cb6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1677,7 +1677,7 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) static void addrconf_join_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; - if (ifp->prefix_len == 127) /* RFC 6164 */ + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); if (ipv6_addr_any(&addr)) @@ -1688,7 +1688,7 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; - if (ifp->prefix_len == 127) /* RFC 6164 */ + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); if (ipv6_addr_any(&addr)) @@ -3476,7 +3476,12 @@ restart: &inet6_addr_lst[i], addr_lst) { unsigned long age; - if (ifp->flags & IFA_F_PERMANENT) + /* When setting preferred_lft to a value not zero or + * infinity, while valid_lft is infinity + * IFA_F_PERMANENT has a non-infinity life time. + */ + if ((ifp->flags & IFA_F_PERMANENT) && + (ifp->prefered_lft == INFINITY_LIFE_TIME)) continue; spin_lock(&ifp->lock); @@ -3501,7 +3506,8 @@ restart: ifp->flags |= IFA_F_DEPRECATED; } - if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next)) + if ((ifp->valid_lft != INFINITY_LIFE_TIME) && + (time_before(ifp->tstamp + ifp->valid_lft * HZ, next))) next = ifp->tstamp + ifp->valid_lft * HZ; spin_unlock(&ifp->lock); @@ -3801,7 +3807,8 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), ifa->idev->dev->ifindex); - if (!(ifa->flags&IFA_F_PERMANENT)) { + if (!((ifa->flags&IFA_F_PERMANENT) && + (ifa->prefered_lft == INFINITY_LIFE_TIME))) { preferred = ifa->prefered_lft; valid = ifa->valid_lft; if (preferred != INFINITY_LIFE_TIME) { diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 5d420095190..9a809a4b3d8 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -556,7 +556,9 @@ static void icmpv6_echo_reply(struct sk_buff *skb) saddr = &ipv6_hdr(skb)->daddr; - if (!ipv6_unicast_destination(skb)) + if (!ipv6_unicast_destination(skb) && + !(net->ipv6.anycast_src_echo_reply && + ipv6_anycast_destination(skb))) saddr = NULL; memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 6fb4162fa78..1e8683b135b 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -190,7 +190,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, unsigned int nlen; unsigned int hlen; unsigned int off; - int flush = 1; + u16 flush = 1; int proto; __wsum csum; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 788c01a5359..d1de9560c42 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1188,11 +1188,35 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); - maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); + maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - + sizeof(struct frag_hdr); if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { - if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { - ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); + unsigned int maxnonfragsize, headersize; + + headersize = sizeof(struct ipv6hdr) + + (opt ? opt->tot_len : 0) + + (dst_allfrag(&rt->dst) ? + sizeof(struct frag_hdr) : 0) + + rt->rt6i_nfheader_len; + + maxnonfragsize = (np->pmtudisc >= IPV6_PMTUDISC_DO) ? + mtu : sizeof(struct ipv6hdr) + IPV6_MAXPLEN; + + /* dontfrag active */ + if ((cork->length + length > mtu - headersize) && dontfrag && + (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_RAW)) { + ipv6_local_rxpmtu(sk, fl6, mtu - headersize + + sizeof(struct ipv6hdr)); + goto emsgsize; + } + + if (cork->length + length > maxnonfragsize - headersize) { +emsgsize: + ipv6_local_error(sk, EMSGSIZE, fl6, + mtu - headersize + + sizeof(struct ipv6hdr)); return -EMSGSIZE; } } @@ -1217,12 +1241,6 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, * --yoshfuji */ - if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP || - sk->sk_protocol == IPPROTO_RAW)) { - ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); - return -EMSGSIZE; - } - skb = skb_peek_tail(&sk->sk_write_queue); cork->length += length; if (((length > mtu) || diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 02894216a46..1e5e2404f1a 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -101,17 +101,26 @@ struct ip6_tnl_net { static struct net_device_stats *ip6_get_stats(struct net_device *dev) { - struct pcpu_sw_netstats sum = { 0 }; + struct pcpu_sw_netstats tmp, sum = { 0 }; int i; for_each_possible_cpu(i) { + unsigned int start; const struct pcpu_sw_netstats *tstats = per_cpu_ptr(dev->tstats, i); - sum.rx_packets += tstats->rx_packets; - sum.rx_bytes += tstats->rx_bytes; - sum.tx_packets += tstats->tx_packets; - sum.tx_bytes += tstats->tx_bytes; + do { + start = u64_stats_fetch_begin_bh(&tstats->syncp); + tmp.rx_packets = tstats->rx_packets; + tmp.rx_bytes = tstats->rx_bytes; + tmp.tx_packets = tstats->tx_packets; + tmp.tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + + sum.rx_packets += tmp.rx_packets; + sum.rx_bytes += tmp.rx_bytes; + sum.tx_packets += tmp.tx_packets; + sum.tx_bytes += tmp.tx_bytes; } dev->stats.rx_packets = sum.rx_packets; dev->stats.rx_bytes = sum.rx_bytes; @@ -823,8 +832,10 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, } tstats = this_cpu_ptr(t->dev->tstats); + u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); netif_rx(skb); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index da1d9e4d62c..b50acd5e75d 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -74,27 +74,6 @@ struct vti6_net { struct ip6_tnl __rcu **tnls[2]; }; -static struct net_device_stats *vti6_get_stats(struct net_device *dev) -{ - struct pcpu_sw_netstats sum = { 0 }; - int i; - - for_each_possible_cpu(i) { - const struct pcpu_sw_netstats *tstats = - per_cpu_ptr(dev->tstats, i); - - sum.rx_packets += tstats->rx_packets; - sum.rx_bytes += tstats->rx_bytes; - sum.tx_packets += tstats->tx_packets; - sum.tx_bytes += tstats->tx_bytes; - } - dev->stats.rx_packets = sum.rx_packets; - dev->stats.rx_bytes = sum.rx_bytes; - dev->stats.tx_packets = sum.tx_packets; - dev->stats.tx_bytes = sum.tx_bytes; - return &dev->stats; -} - #define for_each_vti6_tunnel_rcu(start) \ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) @@ -331,8 +310,10 @@ static int vti6_rcv(struct sk_buff *skb) } tstats = this_cpu_ptr(t->dev->tstats); + u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); skb->mark = 0; secpath_reset(skb); @@ -716,7 +697,7 @@ static const struct net_device_ops vti6_netdev_ops = { .ndo_start_xmit = vti6_tnl_xmit, .ndo_do_ioctl = vti6_ioctl, .ndo_change_mtu = vti6_change_mtu, - .ndo_get_stats = vti6_get_stats, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; /** diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 7702f9e90a0..35750df744d 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -28,15 +28,27 @@ config NF_CONNTRACK_IPV6 config NF_TABLES_IPV6 depends on NF_TABLES tristate "IPv6 nf_tables support" + help + This option enables the IPv6 support for nf_tables. config NFT_CHAIN_ROUTE_IPV6 depends on NF_TABLES_IPV6 tristate "IPv6 nf_tables route chain support" + help + This option enables the "route" chain for IPv6 in nf_tables. This + chain type is used to force packet re-routing after mangling header + fields such as the source, destination, flowlabel, hop-limit and + the packet mark. config NFT_CHAIN_NAT_IPV6 depends on NF_TABLES_IPV6 depends on NF_NAT_IPV6 && NFT_NAT tristate "IPv6 nf_tables nat chain support" + help + This option enables the "nat" chain for IPv6 in nf_tables. This + chain type is used to perform Network Address Translation (NAT) + packet transformations such as the source, destination address and + source and destination ports. config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index da00a2ecde5..544b0a9da1b 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -23,181 +23,18 @@ #include <linux/skbuff.h> #include <linux/icmpv6.h> #include <linux/netdevice.h> -#include <net/ipv6.h> -#include <net/tcp.h> #include <net/icmp.h> -#include <net/ip6_checksum.h> -#include <net/ip6_fib.h> -#include <net/ip6_route.h> #include <net/flow.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_REJECT.h> +#include <net/netfilter/ipv6/nf_reject.h> + MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>"); MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6"); MODULE_LICENSE("GPL"); -/* Send RST reply */ -static void send_reset(struct net *net, struct sk_buff *oldskb, int hook) -{ - struct sk_buff *nskb; - struct tcphdr otcph, *tcph; - unsigned int otcplen, hh_len; - int tcphoff, needs_ack; - const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); - struct ipv6hdr *ip6h; -#define DEFAULT_TOS_VALUE 0x0U - const __u8 tclass = DEFAULT_TOS_VALUE; - struct dst_entry *dst = NULL; - u8 proto; - __be16 frag_off; - struct flowi6 fl6; - - if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || - (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { - pr_debug("addr is not unicast.\n"); - return; - } - - proto = oip6h->nexthdr; - tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off); - - if ((tcphoff < 0) || (tcphoff > oldskb->len)) { - pr_debug("Cannot get TCP header.\n"); - return; - } - - otcplen = oldskb->len - tcphoff; - - /* IP header checks: fragment, too short. */ - if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { - pr_debug("proto(%d) != IPPROTO_TCP, " - "or too short. otcplen = %d\n", - proto, otcplen); - return; - } - - if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) - BUG(); - - /* No RST for RST. */ - if (otcph.rst) { - pr_debug("RST is set\n"); - return; - } - - /* Check checksum. */ - if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) { - pr_debug("TCP checksum is invalid\n"); - return; - } - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_TCP; - fl6.saddr = oip6h->daddr; - fl6.daddr = oip6h->saddr; - fl6.fl6_sport = otcph.dest; - fl6.fl6_dport = otcph.source; - security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); - dst = ip6_route_output(net, NULL, &fl6); - if (dst == NULL || dst->error) { - dst_release(dst); - return; - } - dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); - if (IS_ERR(dst)) - return; - - hh_len = (dst->dev->hard_header_len + 15)&~15; - nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) + dst->trailer_len, - GFP_ATOMIC); - - if (!nskb) { - net_dbg_ratelimited("cannot alloc skb\n"); - dst_release(dst); - return; - } - - skb_dst_set(nskb, dst); - - skb_reserve(nskb, hh_len + dst->header_len); - - skb_put(nskb, sizeof(struct ipv6hdr)); - skb_reset_network_header(nskb); - ip6h = ipv6_hdr(nskb); - ip6_flow_hdr(ip6h, tclass, 0); - ip6h->hop_limit = ip6_dst_hoplimit(dst); - ip6h->nexthdr = IPPROTO_TCP; - ip6h->saddr = oip6h->daddr; - ip6h->daddr = oip6h->saddr; - - skb_reset_transport_header(nskb); - tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); - /* Truncate to length (no data) */ - tcph->doff = sizeof(struct tcphdr)/4; - tcph->source = otcph.dest; - tcph->dest = otcph.source; - - if (otcph.ack) { - needs_ack = 0; - tcph->seq = otcph.ack_seq; - tcph->ack_seq = 0; - } else { - needs_ack = 1; - tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin - + otcplen - (otcph.doff<<2)); - tcph->seq = 0; - } - - /* Reset flags */ - ((u_int8_t *)tcph)[13] = 0; - tcph->rst = 1; - tcph->ack = needs_ack; - tcph->window = 0; - tcph->urg_ptr = 0; - tcph->check = 0; - - /* Adjust TCP checksum */ - tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr, - &ipv6_hdr(nskb)->daddr, - sizeof(struct tcphdr), IPPROTO_TCP, - csum_partial(tcph, - sizeof(struct tcphdr), 0)); - - nf_ct_attach(nskb, oldskb); - -#ifdef CONFIG_BRIDGE_NETFILTER - /* If we use ip6_local_out for bridged traffic, the MAC source on - * the RST will be ours, instead of the destination's. This confuses - * some routers/firewalls, and they drop the packet. So we need to - * build the eth header using the original destination's MAC as the - * source, and send the RST packet directly. - */ - if (oldskb->nf_bridge) { - struct ethhdr *oeth = eth_hdr(oldskb); - nskb->dev = oldskb->nf_bridge->physindev; - nskb->protocol = htons(ETH_P_IPV6); - ip6h->payload_len = htons(sizeof(struct tcphdr)); - if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), - oeth->h_source, oeth->h_dest, nskb->len) < 0) - return; - dev_queue_xmit(nskb); - } else -#endif - ip6_local_out(nskb); -} - -static inline void -send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code, - unsigned int hooknum) -{ - if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) - skb_in->dev = net->loopback_dev; - - icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); -} static unsigned int reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) @@ -208,25 +45,25 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) pr_debug("%s: medium point\n", __func__); switch (reject->with) { case IP6T_ICMP6_NO_ROUTE: - send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum); break; case IP6T_ICMP6_ADM_PROHIBITED: - send_unreach(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum); break; case IP6T_ICMP6_NOT_NEIGHBOUR: - send_unreach(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum); break; case IP6T_ICMP6_ADDR_UNREACH: - send_unreach(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum); break; case IP6T_ICMP6_PORT_UNREACH: - send_unreach(net, skb, ICMPV6_PORT_UNREACH, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum); break; case IP6T_ICMP6_ECHOREPLY: /* Do nothing */ break; case IP6T_TCP_RESET: - send_reset(net, skb, par->hooknum); + nf_send_reset6(net, skb, par->hooknum); break; default: net_info_ratelimited("case %u not handled yet\n", reject->with); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 266f110cb6f..11dac21e658 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1923,9 +1923,7 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, else rt->rt6i_gateway = *dest; rt->rt6i_flags = ort->rt6i_flags; - if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == - (RTF_DEFAULT | RTF_ADDRCONF)) - rt6_set_from(rt, ort); + rt6_set_from(rt, ort); rt->rt6i_metric = 0; #ifdef CONFIG_IPV6_SUBTREES diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 9937b261671..3dfbcf1dcb1 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -702,8 +702,10 @@ static int ipip6_rcv(struct sk_buff *skb) } tstats = this_cpu_ptr(tunnel->dev->tstats); + u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); netif_rx(skb); @@ -924,7 +926,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (tunnel->parms.iph.daddr && skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (skb->len > mtu) { + if (skb->len > mtu && !skb_is_gso(skb)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; @@ -966,8 +968,10 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT); - if (IS_ERR(skb)) + if (IS_ERR(skb)) { + ip_rt_put(rt); goto out; + } err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 107b2f1d90a..6b6a2c83027 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -24,6 +24,13 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "anycast_src_echo_reply", + .data = &init_net.ipv6.anycast_src_echo_reply, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; @@ -51,6 +58,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) if (!ipv6_table) goto out; ipv6_table[0].data = &net->ipv6.sysctl.bindv6only; + ipv6_table[1].data = &net->ipv6.anycast_src_echo_reply; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 7b01b9f5846..c71b699eb55 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -715,7 +715,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, unsigned long cpu_flags; size_t copied = 0; u32 peek_seq = 0; - u32 *seq; + u32 *seq, skb_len; unsigned long used; int target; /* Read at least this many bytes */ long timeo; @@ -812,6 +812,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, } continue; found_ok_skb: + skb_len = skb->len; /* Ok so how much can we use? */ used = skb->len - offset; if (len < used) @@ -844,7 +845,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, } /* Partial read */ - if (used + offset < skb->len) + if (used + offset < skb_len) continue; } while (len > 0); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c17902cb5df..c3b3b26c4c4 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -414,47 +414,104 @@ config NETFILTER_SYNPROXY endif # NF_CONNTRACK config NF_TABLES - depends on NETFILTER_NETLINK + select NETFILTER_NETLINK tristate "Netfilter nf_tables support" + help + nftables is the new packet classification framework that intends to + replace the existing {ip,ip6,arp,eb}_tables infrastructure. It + provides a pseudo-state machine with an extensible instruction-set + (also known as expressions) that the userspace 'nft' utility + (http://www.netfilter.org/projects/nftables) uses to build the + rule-set. It also comes with the generic set infrastructure that + allows you to construct mappings between matchings and actions + for performance lookups. + + To compile it as a module, choose M here. config NFT_EXTHDR depends on NF_TABLES tristate "Netfilter nf_tables IPv6 exthdr module" + help + This option adds the "exthdr" expression that you can use to match + IPv6 extension headers. config NFT_META depends on NF_TABLES tristate "Netfilter nf_tables meta module" + help + This option adds the "meta" expression that you can use to match and + to set packet metainformation such as the packet mark. config NFT_CT depends on NF_TABLES depends on NF_CONNTRACK tristate "Netfilter nf_tables conntrack module" + help + This option adds the "meta" expression that you can use to match + connection tracking information such as the flow state. config NFT_RBTREE depends on NF_TABLES tristate "Netfilter nf_tables rbtree set module" + help + This option adds the "rbtree" set type (Red Black tree) that is used + to build interval-based sets. config NFT_HASH depends on NF_TABLES tristate "Netfilter nf_tables hash set module" + help + This option adds the "hash" set type that is used to build one-way + mappings between matchings and actions. config NFT_COUNTER depends on NF_TABLES tristate "Netfilter nf_tables counter module" + help + This option adds the "counter" expression that you can use to + include packet and byte counters in a rule. config NFT_LOG depends on NF_TABLES tristate "Netfilter nf_tables log module" + help + This option adds the "log" expression that you can use to log + packets matching some criteria. config NFT_LIMIT depends on NF_TABLES tristate "Netfilter nf_tables limit module" + help + This option adds the "limit" expression that you can use to + ratelimit rule matchings. config NFT_NAT depends on NF_TABLES depends on NF_CONNTRACK depends on NF_NAT tristate "Netfilter nf_tables nat module" + help + This option adds the "nat" expression that you can use to perform + typical Network Address Translation (NAT) packet transformations. + +config NFT_QUEUE + depends on NF_TABLES + depends on NETFILTER_XTABLES + depends on NETFILTER_NETLINK_QUEUE + tristate "Netfilter nf_tables queue module" + help + This is required if you intend to use the userspace queueing + infrastructure (also known as NFQUEUE) from nftables. + +config NFT_REJECT + depends on NF_TABLES + depends on NF_TABLES_IPV6 || !NF_TABLES_IPV6 + default m if NETFILTER_ADVANCED=n + tristate "Netfilter nf_tables reject support" + help + This option adds the "reject" expression that you can use to + explicitly deny and notify via TCP reset/ICMP informational errors + unallowed traffic. config NFT_COMPAT depends on NF_TABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 407fc232f62..78b4e1c9c59 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -76,7 +76,8 @@ obj-$(CONFIG_NFT_META) += nft_meta.o obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o obj-$(CONFIG_NFT_NAT) += nft_nat.o -#nf_tables-objs += nft_meta_target.o +obj-$(CONFIG_NFT_QUEUE) += nft_queue.o +obj-$(CONFIG_NFT_REJECT) += nft_reject.o obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o obj-$(CONFIG_NFT_HASH) += nft_hash.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index d5f41514f57..5882bbfd198 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -62,6 +62,7 @@ #include <net/ip_vs.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_zones.h> @@ -96,6 +97,11 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) return; + /* Applications may adjust TCP seqs */ + if (cp->app && nf_ct_protonum(ct) == IPPROTO_TCP && + !nfct_seqadj(ct) && !nfct_seqadj_ext_add(ct)) + return; + /* * The connection is not yet in the hashtable, so we update it. * CIP->VIP will remain the same, so leave the tuple in diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index 17c1bcb182c..b2d38da6782 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -36,6 +36,11 @@ int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo, if (off == 0) return 0; + if (unlikely(!seqadj)) { + WARN(1, "Wrong seqadj usage, missing nfct_seqadj_ext_add()\n"); + return 0; + } + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); spin_lock_bh(&ct->lock); diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c index 902fb0a6b38..7a394df0deb 100644 --- a/net/netfilter/nf_conntrack_timestamp.c +++ b/net/netfilter/nf_conntrack_timestamp.c @@ -97,7 +97,6 @@ int nf_conntrack_tstamp_pernet_init(struct net *net) void nf_conntrack_tstamp_pernet_fini(struct net *net) { nf_conntrack_tstamp_fini_sysctl(net); - nf_ct_extend_unregister(&tstamp_extend); } int nf_conntrack_tstamp_init(void) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f93b7d06f4b..1fcef1ec1dc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -180,7 +180,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq, nfmsg->res_id = 0; if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || - nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags))) + nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) || + nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use))) goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -312,6 +313,9 @@ static int nf_tables_table_enable(struct nft_table *table) int err, i = 0; list_for_each_entry(chain, &table->chains, list) { + if (!(chain->flags & NFT_BASE_CHAIN)) + continue; + err = nf_register_hook(&nft_base_chain(chain)->ops); if (err < 0) goto err; @@ -321,6 +325,9 @@ static int nf_tables_table_enable(struct nft_table *table) return 0; err: list_for_each_entry(chain, &table->chains, list) { + if (!(chain->flags & NFT_BASE_CHAIN)) + continue; + if (i-- <= 0) break; @@ -333,8 +340,10 @@ static int nf_tables_table_disable(struct nft_table *table) { struct nft_chain *chain; - list_for_each_entry(chain, &table->chains, list) - nf_unregister_hook(&nft_base_chain(chain)->ops); + list_for_each_entry(chain, &table->chains, list) { + if (chain->flags & NFT_BASE_CHAIN) + nf_unregister_hook(&nft_base_chain(chain)->ops); + } return 0; } @@ -1923,12 +1932,14 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, { struct net *net = sock_net(skb->sk); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nft_af_info *afi; + const struct nft_af_info *afi = NULL; const struct nft_table *table = NULL; - afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); - if (IS_ERR(afi)) - return PTR_ERR(afi); + if (nfmsg->nfgen_family != NFPROTO_UNSPEC) { + afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + } if (nla[NFTA_SET_TABLE] != NULL) { table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); @@ -1973,11 +1984,14 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, return -ENOMEM; list_for_each_entry(i, &ctx->table->sets, list) { - if (!sscanf(i->name, name, &n)) + int tmp; + + if (!sscanf(i->name, name, &tmp)) continue; - if (n < 0 || n > BITS_PER_LONG * PAGE_SIZE) + if (tmp < 0 || tmp > BITS_PER_LONG * PAGE_SIZE) continue; - set_bit(n, inuse); + + set_bit(tmp, inuse); } n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE); @@ -2094,21 +2108,25 @@ done: return skb->len; } -static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, - struct netlink_callback *cb) +static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb, + struct netlink_callback *cb) { const struct nft_set *set; - unsigned int idx = 0, s_idx = cb->args[0]; + unsigned int idx, s_idx = cb->args[0]; struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; if (cb->args[1]) return skb->len; list_for_each_entry(table, &ctx->afi->tables, list) { - if (cur_table && cur_table != table) - continue; + if (cur_table) { + if (cur_table != table) + continue; + cur_table = NULL; + } ctx->table = table; + idx = 0; list_for_each_entry(set, &ctx->table->sets, list) { if (idx < s_idx) goto cont; @@ -2127,6 +2145,61 @@ done: return skb->len; } +static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb, + struct netlink_callback *cb) +{ + const struct nft_set *set; + unsigned int idx, s_idx = cb->args[0]; + const struct nft_af_info *afi; + struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; + struct net *net = sock_net(skb->sk); + int cur_family = cb->args[3]; + + if (cb->args[1]) + return skb->len; + + list_for_each_entry(afi, &net->nft.af_info, list) { + if (cur_family) { + if (afi->family != cur_family) + continue; + + cur_family = 0; + } + + list_for_each_entry(table, &afi->tables, list) { + if (cur_table) { + if (cur_table != table) + continue; + + cur_table = NULL; + } + + ctx->table = table; + ctx->afi = afi; + idx = 0; + list_for_each_entry(set, &ctx->table->sets, list) { + if (idx < s_idx) + goto cont; + if (nf_tables_fill_set(skb, ctx, set, + NFT_MSG_NEWSET, + NLM_F_MULTI) < 0) { + cb->args[0] = idx; + cb->args[2] = (unsigned long) table; + cb->args[3] = afi->family; + goto done; + } +cont: + idx++; + } + if (s_idx) + s_idx = 0; + } + } + cb->args[1] = 1; +done: + return skb->len; +} + static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); @@ -2143,9 +2216,12 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) if (err < 0) return err; - if (ctx.table == NULL) - ret = nf_tables_dump_sets_all(&ctx, skb, cb); - else + if (ctx.table == NULL) { + if (ctx.afi == NULL) + ret = nf_tables_dump_sets_all(&ctx, skb, cb); + else + ret = nf_tables_dump_sets_family(&ctx, skb, cb); + } else ret = nf_tables_dump_sets_table(&ctx, skb, cb); return ret; @@ -2158,6 +2234,7 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, const struct nft_set *set; struct nft_ctx ctx; struct sk_buff *skb2; + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); int err; /* Verify existance before starting dump */ @@ -2172,6 +2249,10 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, return netlink_dump_start(nlsk, skb, nlh, &c); } + /* Only accept unspec with dump */ + if (nfmsg->nfgen_family == NFPROTO_UNSPEC) + return -EAFNOSUPPORT; + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) return PTR_ERR(set); @@ -2341,6 +2422,7 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_set *set; struct nft_ctx ctx; int err; @@ -2352,6 +2434,9 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, if (err < 0) return err; + if (nfmsg->nfgen_family == NFPROTO_UNSPEC) + return -EAFNOSUPPORT; + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) return PTR_ERR(set); @@ -2370,7 +2455,9 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, enum nft_registers dreg; dreg = nft_type_to_reg(set->dtype); - return nft_validate_data_load(ctx, dreg, &elem->data, set->dtype); + return nft_validate_data_load(ctx, dreg, &elem->data, + set->dtype == NFT_DATA_VERDICT ? + NFT_DATA_VERDICT : NFT_DATA_VALUE); } int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, @@ -2521,9 +2608,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) u32 portid, seq; int event, err; - nfmsg = nlmsg_data(cb->nlh); - err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_ELEM_LIST_MAX, - nft_set_elem_list_policy); + err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla, + NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy); if (err < 0) return err; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index cb9e685caae..e8fcc343c2b 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -164,7 +164,7 @@ next_rule: break; } - switch (data[NFT_REG_VERDICT].verdict) { + switch (data[NFT_REG_VERDICT].verdict & NF_VERDICT_MASK) { case NF_ACCEPT: case NF_DROP: case NF_QUEUE: @@ -172,6 +172,9 @@ next_rule: nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); return data[NFT_REG_VERDICT].verdict; + } + + switch (data[NFT_REG_VERDICT].verdict) { case NFT_JUMP: if (unlikely(pkt->skb->nf_trace)) nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 3c4b69e5fe1..a155d19a225 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -1053,6 +1053,7 @@ static void __net_exit nfnl_log_net_exit(struct net *net) #ifdef CONFIG_PROC_FS remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter); #endif + nf_log_unset(net, &nfulnl_logger); } static struct pernet_operations nfnl_log_net_ops = { diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index d3cf12b8317..f072fe80351 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -29,6 +29,7 @@ #include <linux/netfilter/nfnetlink_queue.h> #include <linux/list.h> #include <net/sock.h> +#include <net/tcp_states.h> #include <net/netfilter/nf_queue.h> #include <net/netns/generic.h> #include <net/netfilter/nfnetlink_queue.h> @@ -235,51 +236,6 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) spin_unlock_bh(&queue->lock); } -static void -nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) -{ - int i, j = 0; - int plen = 0; /* length of skb->head fragment */ - struct page *page; - unsigned int offset; - - /* dont bother with small payloads */ - if (len <= skb_tailroom(to)) { - skb_copy_bits(from, 0, skb_put(to, len), len); - return; - } - - if (hlen) { - skb_copy_bits(from, 0, skb_put(to, hlen), hlen); - len -= hlen; - } else { - plen = min_t(int, skb_headlen(from), len); - if (plen) { - page = virt_to_head_page(from->head); - offset = from->data - (unsigned char *)page_address(page); - __skb_fill_page_desc(to, 0, page, offset, plen); - get_page(page); - j = 1; - len -= plen; - } - } - - to->truesize += len + plen; - to->len += len + plen; - to->data_len += len + plen; - - for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { - if (!len) - break; - skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; - skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); - len -= skb_shinfo(to)->frags[j].size; - skb_frag_ref(to, j); - j++; - } - skb_shinfo(to)->nr_frags = j; -} - static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, bool csum_verify) @@ -329,7 +285,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, { size_t size; size_t data_len = 0, cap_len = 0; - int hlen = 0; + unsigned int hlen = 0; struct sk_buff *skb; struct nlattr *nla; struct nfqnl_msg_packet_hdr *pmsg; @@ -381,14 +337,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (data_len > entskb->len) data_len = entskb->len; - if (!entskb->head_frag || - skb_headlen(entskb) < L1_CACHE_BYTES || - skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS) - hlen = skb_headlen(entskb); - - if (skb_has_frag_list(entskb)) - hlen = entskb->len; - hlen = min_t(int, data_len, hlen); + hlen = skb_zerocopy_headlen(entskb); + hlen = min_t(unsigned int, hlen, data_len); size += sizeof(struct nlattr) + hlen; cap_len = entskb->len; break; @@ -538,7 +488,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, nla->nla_type = NFQA_PAYLOAD; nla->nla_len = nla_attr_size(data_len); - nfqnl_zcopy(skb, entskb, data_len, hlen); + skb_zerocopy(skb, entskb, data_len, hlen); } nlh->nlmsg_len = skb->len; diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 8e0bb75e7c5..55c939f5371 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -31,7 +31,7 @@ static void nft_exthdr_eval(const struct nft_expr *expr, { struct nft_exthdr *priv = nft_expr_priv(expr); struct nft_data *dest = &data[priv->dreg]; - unsigned int offset; + unsigned int offset = 0; int err; err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL); diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 8c28220a90b..1ceaaa6dfe7 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -21,12 +21,15 @@ struct nft_meta { enum nft_meta_keys key:8; - enum nft_registers dreg:8; + union { + enum nft_registers dreg:8; + enum nft_registers sreg:8; + }; }; -static void nft_meta_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +static void nft_meta_get_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { const struct nft_meta *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; @@ -132,23 +135,50 @@ err: data[NFT_REG_VERDICT].verdict = NFT_BREAK; } +static void nft_meta_set_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *meta = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + u32 value = data[meta->sreg].data[0]; + + switch (meta->key) { + case NFT_META_MARK: + skb->mark = value; + break; + case NFT_META_PRIORITY: + skb->priority = value; + break; + case NFT_META_NFTRACE: + skb->nf_trace = 1; + break; + default: + WARN_ON(1); + } +} + static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { [NFTA_META_DREG] = { .type = NLA_U32 }, [NFTA_META_KEY] = { .type = NLA_U32 }, + [NFTA_META_SREG] = { .type = NLA_U32 }, }; -static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_meta_init_validate_set(uint32_t key) { - struct nft_meta *priv = nft_expr_priv(expr); - int err; - - if (tb[NFTA_META_DREG] == NULL || - tb[NFTA_META_KEY] == NULL) - return -EINVAL; + switch (key) { + case NFT_META_MARK: + case NFT_META_PRIORITY: + case NFT_META_NFTRACE: + return 0; + default: + return -EOPNOTSUPP; + } +} - priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); - switch (priv->key) { +static int nft_meta_init_validate_get(uint32_t key) +{ + switch (key) { case NFT_META_LEN: case NFT_META_PROTOCOL: case NFT_META_PRIORITY: @@ -167,26 +197,69 @@ static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, #ifdef CONFIG_NETWORK_SECMARK case NFT_META_SECMARK: #endif - break; + return 0; default: return -EOPNOTSUPP; } - priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); - err = nft_validate_output_register(priv->dreg); +} + +static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + int err; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + + if (tb[NFTA_META_DREG]) { + err = nft_meta_init_validate_get(priv->key); + if (err < 0) + return err; + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + return nft_validate_data_load(ctx, priv->dreg, NULL, + NFT_DATA_VALUE); + } + + err = nft_meta_init_validate_set(priv->key); if (err < 0) return err; - return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + + priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG])); + + return 0; } -static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_meta_get_dump(struct sk_buff *skb, + const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); + if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) + goto nla_put_failure; if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg))) goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static int nft_meta_set_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_META_SREG, htonl(priv->sreg))) + goto nla_put_failure; + return 0; nla_put_failure: @@ -194,17 +267,44 @@ nla_put_failure: } static struct nft_expr_type nft_meta_type; -static const struct nft_expr_ops nft_meta_ops = { +static const struct nft_expr_ops nft_meta_get_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), - .eval = nft_meta_eval, + .eval = nft_meta_get_eval, .init = nft_meta_init, - .dump = nft_meta_dump, + .dump = nft_meta_get_dump, }; +static const struct nft_expr_ops nft_meta_set_ops = { + .type = &nft_meta_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .eval = nft_meta_set_eval, + .init = nft_meta_init, + .dump = nft_meta_set_dump, +}; + +static const struct nft_expr_ops * +nft_meta_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_META_KEY] == NULL) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG]) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG]) + return &nft_meta_get_ops; + + if (tb[NFTA_META_SREG]) + return &nft_meta_set_ops; + + return ERR_PTR(-EINVAL); +} + static struct nft_expr_type nft_meta_type __read_mostly = { .name = "meta", - .ops = &nft_meta_ops, + .select_ops = &nft_meta_select_ops, .policy = nft_meta_policy, .maxattr = NFTA_META_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_meta_target.c b/net/netfilter/nft_meta_target.c deleted file mode 100644 index 71177df75ff..00000000000 --- a/net/netfilter/nft_meta_target.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/list.h> -#include <linux/rbtree.h> -#include <linux/netlink.h> -#include <linux/netfilter.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> - -struct nft_meta { - enum nft_meta_keys key; -}; - -static void nft_meta_eval(const struct nft_expr *expr, - struct nft_data *nfres, - struct nft_data *data, - const struct nft_pktinfo *pkt) -{ - const struct nft_meta *meta = nft_expr_priv(expr); - struct sk_buff *skb = pkt->skb; - u32 val = data->data[0]; - - switch (meta->key) { - case NFT_META_MARK: - skb->mark = val; - break; - case NFT_META_PRIORITY: - skb->priority = val; - break; - case NFT_META_NFTRACE: - skb->nf_trace = val; - break; -#ifdef CONFIG_NETWORK_SECMARK - case NFT_META_SECMARK: - skb->secmark = val; - break; -#endif - default: - WARN_ON(1); - } -} - -static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { - [NFTA_META_KEY] = { .type = NLA_U32 }, -}; - -static int nft_meta_init(const struct nft_expr *expr, struct nlattr *tb[]) -{ - struct nft_meta *meta = nft_expr_priv(expr); - - if (tb[NFTA_META_KEY] == NULL) - return -EINVAL; - - meta->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); - switch (meta->key) { - case NFT_META_MARK: - case NFT_META_PRIORITY: - case NFT_META_NFTRACE: -#ifdef CONFIG_NETWORK_SECMARK - case NFT_META_SECMARK: -#endif - break; - default: - return -EINVAL; - } - - return 0; -} - -static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr) -{ - struct nft_meta *meta = nft_expr_priv(expr); - - NLA_PUT_BE32(skb, NFTA_META_KEY, htonl(meta->key)); - return 0; - -nla_put_failure: - return -1; -} - -static struct nft_expr_ops meta_target __read_mostly = { - .name = "meta", - .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), - .owner = THIS_MODULE, - .eval = nft_meta_eval, - .init = nft_meta_init, - .dump = nft_meta_dump, - .policy = nft_meta_policy, - .maxattr = NFTA_META_MAX, -}; - -static int __init nft_meta_target_init(void) -{ - return nft_register_expr(&meta_target); -} - -static void __exit nft_meta_target_exit(void) -{ - nft_unregister_expr(&meta_target); -} - -module_init(nft_meta_target_init); -module_exit(nft_meta_target_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_ALIAS_NFT_EXPR("meta"); diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c new file mode 100644 index 00000000000..cbea473d69e --- /dev/null +++ b/net/netfilter/nft_queue.c @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2013 Eric Leblond <eric@regit.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code partly funded by OISF + * (http://www.openinfosecfoundation.org/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/jhash.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_queue.h> + +static u32 jhash_initval __read_mostly; + +struct nft_queue { + u16 queuenum; + u16 queues_total; + u16 flags; + u8 family; +}; + +static void nft_queue_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_queue *priv = nft_expr_priv(expr); + u32 queue = priv->queuenum; + u32 ret; + + if (priv->queues_total > 1) { + if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT) { + int cpu = smp_processor_id(); + + queue = priv->queuenum + cpu % priv->queues_total; + } else { + queue = nfqueue_hash(pkt->skb, queue, + priv->queues_total, priv->family, + jhash_initval); + } + } + + ret = NF_QUEUE_NR(queue); + if (priv->flags & NFT_QUEUE_FLAG_BYPASS) + ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; + + data[NFT_REG_VERDICT].verdict = ret; +} + +static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = { + [NFTA_QUEUE_NUM] = { .type = NLA_U16 }, + [NFTA_QUEUE_TOTAL] = { .type = NLA_U16 }, + [NFTA_QUEUE_FLAGS] = { .type = NLA_U16 }, +}; + +static int nft_queue_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_queue *priv = nft_expr_priv(expr); + + if (tb[NFTA_QUEUE_NUM] == NULL) + return -EINVAL; + + init_hashrandom(&jhash_initval); + priv->family = ctx->afi->family; + priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM])); + + if (tb[NFTA_QUEUE_TOTAL] != NULL) + priv->queues_total = ntohs(nla_get_be16(tb[NFTA_QUEUE_TOTAL])); + if (tb[NFTA_QUEUE_FLAGS] != NULL) { + priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS])); + if (priv->flags & ~NFT_QUEUE_FLAG_MASK) + return -EINVAL; + } + return 0; +} + +static int nft_queue_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_queue *priv = nft_expr_priv(expr); + + if (nla_put_be16(skb, NFTA_QUEUE_NUM, htons(priv->queuenum)) || + nla_put_be16(skb, NFTA_QUEUE_TOTAL, htons(priv->queues_total)) || + nla_put_be16(skb, NFTA_QUEUE_FLAGS, htons(priv->flags))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_queue_type; +static const struct nft_expr_ops nft_queue_ops = { + .type = &nft_queue_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_queue)), + .eval = nft_queue_eval, + .init = nft_queue_init, + .dump = nft_queue_dump, +}; + +static struct nft_expr_type nft_queue_type __read_mostly = { + .name = "queue", + .ops = &nft_queue_ops, + .policy = nft_queue_policy, + .maxattr = NFTA_QUEUE_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_queue_module_init(void) +{ + return nft_register_expr(&nft_queue_type); +} + +static void __exit nft_queue_module_exit(void) +{ + nft_unregister_expr(&nft_queue_type); +} + +module_init(nft_queue_module_init); +module_exit(nft_queue_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Eric Leblond <eric@regit.org>"); +MODULE_ALIAS_NFT_EXPR("queue"); diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/netfilter/nft_reject.c index 4a5e94ac314..0d690d4101d 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/netfilter/nft_reject.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2013 Eric Leblond <eric@regit.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -16,10 +17,16 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/icmp.h> +#include <net/netfilter/ipv4/nf_reject.h> + +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) +#include <net/netfilter/ipv6/nf_reject.h> +#endif struct nft_reject { enum nft_reject_types type:8; u8 icmp_code; + u8 family; }; static void nft_reject_eval(const struct nft_expr *expr, @@ -27,12 +34,25 @@ static void nft_reject_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); + struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - icmp_send(pkt->skb, ICMP_DEST_UNREACH, priv->icmp_code, 0); + if (priv->family == NFPROTO_IPV4) + nf_send_unreach(pkt->skb, priv->icmp_code); +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + else if (priv->family == NFPROTO_IPV6) + nf_send_unreach6(net, pkt->skb, priv->icmp_code, + pkt->hooknum); +#endif break; case NFT_REJECT_TCP_RST: + if (priv->family == NFPROTO_IPV4) + nf_send_reset(pkt->skb, pkt->hooknum); +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + else if (priv->family == NFPROTO_IPV6) + nf_send_reset6(net, pkt->skb, pkt->hooknum); +#endif break; } @@ -53,6 +73,7 @@ static int nft_reject_init(const struct nft_ctx *ctx, if (tb[NFTA_REJECT_TYPE] == NULL) return -EINVAL; + priv->family = ctx->afi->family; priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE])); switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index ed00fef5899..8f1779ff7e3 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -11,15 +11,13 @@ #include <linux/module.h> #include <linux/skbuff.h> -#include <linux/ip.h> -#include <linux/ipv6.h> -#include <linux/jhash.h> - #include <linux/netfilter.h> #include <linux/netfilter_arp.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_NFQUEUE.h> +#include <net/netfilter/nf_queue.h> + MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("Xtables: packet forwarding to netlink"); MODULE_LICENSE("GPL"); @@ -28,7 +26,6 @@ MODULE_ALIAS("ip6t_NFQUEUE"); MODULE_ALIAS("arpt_NFQUEUE"); static u32 jhash_initval __read_mostly; -static bool rnd_inited __read_mostly; static unsigned int nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par) @@ -38,69 +35,16 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par) return NF_QUEUE_NR(tinfo->queuenum); } -static u32 hash_v4(const struct sk_buff *skb) -{ - const struct iphdr *iph = ip_hdr(skb); - - /* packets in either direction go into same queue */ - if ((__force u32)iph->saddr < (__force u32)iph->daddr) - return jhash_3words((__force u32)iph->saddr, - (__force u32)iph->daddr, iph->protocol, jhash_initval); - - return jhash_3words((__force u32)iph->daddr, - (__force u32)iph->saddr, iph->protocol, jhash_initval); -} - -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) -static u32 hash_v6(const struct sk_buff *skb) -{ - const struct ipv6hdr *ip6h = ipv6_hdr(skb); - u32 a, b, c; - - if ((__force u32)ip6h->saddr.s6_addr32[3] < - (__force u32)ip6h->daddr.s6_addr32[3]) { - a = (__force u32) ip6h->saddr.s6_addr32[3]; - b = (__force u32) ip6h->daddr.s6_addr32[3]; - } else { - b = (__force u32) ip6h->saddr.s6_addr32[3]; - a = (__force u32) ip6h->daddr.s6_addr32[3]; - } - - if ((__force u32)ip6h->saddr.s6_addr32[1] < - (__force u32)ip6h->daddr.s6_addr32[1]) - c = (__force u32) ip6h->saddr.s6_addr32[1]; - else - c = (__force u32) ip6h->daddr.s6_addr32[1]; - - return jhash_3words(a, b, c, jhash_initval); -} -#endif - -static u32 -nfqueue_hash(const struct sk_buff *skb, const struct xt_action_param *par) -{ - const struct xt_NFQ_info_v1 *info = par->targinfo; - u32 queue = info->queuenum; - - if (par->family == NFPROTO_IPV4) - queue += ((u64) hash_v4(skb) * info->queues_total) >> 32; -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - else if (par->family == NFPROTO_IPV6) - queue += ((u64) hash_v6(skb) * info->queues_total) >> 32; -#endif - - return queue; -} - static unsigned int nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_NFQ_info_v1 *info = par->targinfo; u32 queue = info->queuenum; - if (info->queues_total > 1) - queue = nfqueue_hash(skb, par); - + if (info->queues_total > 1) { + queue = nfqueue_hash(skb, queue, info->queues_total, + par->family, jhash_initval); + } return NF_QUEUE_NR(queue); } @@ -120,10 +64,8 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par) const struct xt_NFQ_info_v3 *info = par->targinfo; u32 maxid; - if (unlikely(!rnd_inited)) { - get_random_bytes(&jhash_initval, sizeof(jhash_initval)); - rnd_inited = true; - } + init_hashrandom(&jhash_initval); + if (info->queues_total == 0) { pr_err("NFQUEUE: number of total queues is 0\n"); return -EINVAL; @@ -154,8 +96,10 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) int cpu = smp_processor_id(); queue = info->queuenum + cpu % info->queues_total; - } else - queue = nfqueue_hash(skb, par); + } else { + queue = nfqueue_hash(skb, queue, info->queues_total, + par->family, jhash_initval); + } } ret = NF_QUEUE_NR(queue); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index b077b90c125..34a656d9017 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1773,6 +1773,9 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, if (ring->pg_vec == NULL) goto out_put; + if (ring->frame_size - NL_MMAP_HDRLEN < size) + goto out_put; + skb = alloc_skb_head(gfp_mask); if (skb == NULL) goto err1; @@ -1782,6 +1785,7 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, if (ring->pg_vec == NULL) goto out_free; + /* check again under lock */ maxlen = ring->frame_size - NL_MMAP_HDRLEN; if (maxlen < size) goto out_free; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 713671ae45a..b1dcdb932a8 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -461,6 +461,26 @@ int genl_unregister_family(struct genl_family *family) EXPORT_SYMBOL(genl_unregister_family); /** + * genlmsg_new_unicast - Allocate generic netlink message for unicast + * @payload: size of the message payload + * @info: information on destination + * @flags: the type of memory to allocate + * + * Allocates a new sk_buff large enough to cover the specified payload + * plus required Netlink headers. Will check receiving socket for + * memory mapped i/o capability and use it if enabled. Will fall back + * to non-mapped skb if message size exceeds the frame size of the ring. + */ +struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info, + gfp_t flags) +{ + size_t len = nlmsg_total_size(genlmsg_total_size(payload)); + + return netlink_alloc_skb(info->dst_sk, len, info->snd_portid, flags); +} +EXPORT_SYMBOL_GPL(genlmsg_new_unicast); + +/** * genlmsg_put - Add generic netlink header to netlink message * @skb: socket buffer holding the message * @portid: netlink portid the message is addressed to @@ -600,6 +620,7 @@ static int genl_family_rcv_msg(struct genl_family *family, info.genlhdr = nlmsg_data(nlh); info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; info.attrs = attrbuf; + info.dst_sk = skb->sk; genl_info_net_set(&info, net); memset(&info.user_ptr, 0, sizeof(info.user_ptr)); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6f5e1dd3be2..df4692826ea 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -108,10 +108,9 @@ int lockdep_ovsl_is_held(void) #endif static struct vport *new_vport(const struct vport_parms *); -static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *, +static int queue_gso_packets(struct datapath *dp, struct sk_buff *, const struct dp_upcall_info *); -static int queue_userspace_packet(struct net *, int dp_ifindex, - struct sk_buff *, +static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, const struct dp_upcall_info *); /* Must be called with rcu_read_lock or ovs_mutex. */ @@ -133,7 +132,7 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex) } /* Must be called with rcu_read_lock or ovs_mutex. */ -const char *ovs_dp_name(const struct datapath *dp) +static const char *ovs_dp_name(const struct datapath *dp) { struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL); return vport->ops->get_name(vport); @@ -234,7 +233,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) } /* Look up flow. */ - flow = ovs_flow_tbl_lookup(&dp->table, &key, &n_mask_hit); + flow = ovs_flow_tbl_lookup_stats(&dp->table, &key, &n_mask_hit); if (unlikely(!flow)) { struct dp_upcall_info upcall; @@ -251,9 +250,9 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) OVS_CB(skb)->flow = flow; OVS_CB(skb)->pkt_key = &key; - stats_counter = &stats->n_hit; - ovs_flow_used(OVS_CB(skb)->flow, skb); + ovs_flow_stats_update(OVS_CB(skb)->flow, skb); ovs_execute_actions(dp, skb); + stats_counter = &stats->n_hit; out: /* Update datapath statistics. */ @@ -277,7 +276,6 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { struct dp_stats_percpu *stats; - int dp_ifindex; int err; if (upcall_info->portid == 0) { @@ -285,16 +283,10 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, goto err; } - dp_ifindex = get_dpifindex(dp); - if (!dp_ifindex) { - err = -ENODEV; - goto err; - } - if (!skb_is_gso(skb)) - err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); + err = queue_userspace_packet(dp, skb, upcall_info); else - err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); + err = queue_gso_packets(dp, skb, upcall_info); if (err) goto err; @@ -310,8 +302,7 @@ err: return err; } -static int queue_gso_packets(struct net *net, int dp_ifindex, - struct sk_buff *skb, +static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { unsigned short gso_type = skb_shinfo(skb)->gso_type; @@ -320,14 +311,14 @@ static int queue_gso_packets(struct net *net, int dp_ifindex, struct sk_buff *segs, *nskb; int err; - segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false); + segs = __skb_gso_segment(skb, NETIF_F_SG, false); if (IS_ERR(segs)) return PTR_ERR(segs); /* Queue all of the segments. */ skb = segs; do { - err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info); + err = queue_userspace_packet(dp, skb, upcall_info); if (err) break; @@ -380,11 +371,11 @@ static size_t key_attr_size(void) + nla_total_size(28); /* OVS_KEY_ATTR_ND */ } -static size_t upcall_msg_size(const struct sk_buff *skb, - const struct nlattr *userdata) +static size_t upcall_msg_size(const struct nlattr *userdata, + unsigned int hdrlen) { size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) - + nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */ + + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */ /* OVS_PACKET_ATTR_USERDATA */ @@ -394,15 +385,24 @@ static size_t upcall_msg_size(const struct sk_buff *skb, return size; } -static int queue_userspace_packet(struct net *net, int dp_ifindex, - struct sk_buff *skb, +static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { struct ovs_header *upcall; struct sk_buff *nskb = NULL; struct sk_buff *user_skb; /* to be queued to userspace */ struct nlattr *nla; - int err; + struct genl_info info = { + .dst_sk = ovs_dp_get_net(dp)->genl_sock, + .snd_portid = upcall_info->portid, + }; + size_t len; + unsigned int hlen; + int err, dp_ifindex; + + dp_ifindex = get_dpifindex(dp); + if (!dp_ifindex) + return -ENODEV; if (vlan_tx_tag_present(skb)) { nskb = skb_clone(skb, GFP_ATOMIC); @@ -422,7 +422,22 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, goto out; } - user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC); + /* Complete checksum if needed */ + if (skb->ip_summed == CHECKSUM_PARTIAL && + (err = skb_checksum_help(skb))) + goto out; + + /* Older versions of OVS user space enforce alignment of the last + * Netlink attribute to NLA_ALIGNTO which would require extensive + * padding logic. Only perform zerocopy if padding is not required. + */ + if (dp->user_features & OVS_DP_F_UNALIGNED) + hlen = skb_zerocopy_headlen(skb); + else + hlen = skb->len; + + len = upcall_msg_size(upcall_info->userdata, hlen); + user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC); if (!user_skb) { err = -ENOMEM; goto out; @@ -441,26 +456,24 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, nla_len(upcall_info->userdata), nla_data(upcall_info->userdata)); - nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); + /* Only reserve room for attribute header, packet data is added + * in skb_zerocopy() */ + if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { + err = -ENOBUFS; + goto out; + } + nla->nla_len = nla_attr_size(skb->len); - skb_copy_and_csum_dev(skb, nla_data(nla)); + skb_zerocopy(user_skb, skb, skb->len, hlen); - genlmsg_end(user_skb, upcall); - err = genlmsg_unicast(net, user_skb, upcall_info->portid); + ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; + err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); out: kfree_skb(nskb); return err; } -static void clear_stats(struct sw_flow *flow) -{ - flow->used = 0; - flow->tcp_flags = 0; - flow->packet_count = 0; - flow->byte_count = 0; -} - static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) { struct ovs_header *ovs_header = info->userhdr; @@ -499,7 +512,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) packet->protocol = htons(ETH_P_802_2); /* Build an sw_flow for sending this packet. */ - flow = ovs_flow_alloc(); + flow = ovs_flow_alloc(false); err = PTR_ERR(flow); if (IS_ERR(flow)) goto err_kfree_skb; @@ -635,10 +648,10 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, const int skb_orig_len = skb->len; struct nlattr *start; struct ovs_flow_stats stats; + __be16 tcp_flags; + unsigned long used; struct ovs_header *ovs_header; struct nlattr *nla; - unsigned long used; - u8 tcp_flags; int err; ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); @@ -667,24 +680,17 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, nla_nest_end(skb, nla); - spin_lock_bh(&flow->lock); - used = flow->used; - stats.n_packets = flow->packet_count; - stats.n_bytes = flow->byte_count; - tcp_flags = (u8)ntohs(flow->tcp_flags); - spin_unlock_bh(&flow->lock); - + ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); if (used && nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) goto nla_put_failure; if (stats.n_packets && - nla_put(skb, OVS_FLOW_ATTR_STATS, - sizeof(struct ovs_flow_stats), &stats)) + nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats)) goto nla_put_failure; - if (tcp_flags && - nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags)) + if ((u8)ntohs(tcp_flags) && + nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags))) goto nla_put_failure; /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if @@ -701,8 +707,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, if (start) { const struct sw_flow_actions *sf_acts; - sf_acts = rcu_dereference_check(flow->sf_acts, - lockdep_ovsl_is_held()); + sf_acts = rcu_dereference_ovsl(flow->sf_acts); err = ovs_nla_put_actions(sf_acts->actions, sf_acts->actions_len, skb); @@ -726,39 +731,34 @@ error: return err; } -static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) +static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow, + struct genl_info *info) { - const struct sw_flow_actions *sf_acts; + size_t len; - sf_acts = ovsl_dereference(flow->sf_acts); + len = ovs_flow_cmd_msg_size(ovsl_dereference(flow->sf_acts)); - return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL); + return genlmsg_new_unicast(len, info, GFP_KERNEL); } static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp, - u32 portid, u32 seq, u8 cmd) + struct genl_info *info, + u8 cmd) { struct sk_buff *skb; int retval; - skb = ovs_flow_cmd_alloc_info(flow); + skb = ovs_flow_cmd_alloc_info(flow, info); if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd); + retval = ovs_flow_cmd_fill_info(flow, dp, skb, info->snd_portid, + info->snd_seq, 0, cmd); BUG_ON(retval < 0); return skb; } -static struct sw_flow *__ovs_flow_tbl_lookup(struct flow_table *tbl, - const struct sw_flow_key *key) -{ - u32 __always_unused n_mask_hit; - - return ovs_flow_tbl_lookup(tbl, key, &n_mask_hit); -} - static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; @@ -770,6 +770,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct sw_flow_actions *acts = NULL; struct sw_flow_match match; + bool exact_5tuple; int error; /* Extract key. */ @@ -778,7 +779,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto error; ovs_match_init(&match, &key, &mask); - error = ovs_nla_get_match(&match, + error = ovs_nla_get_match(&match, &exact_5tuple, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); if (error) goto error; @@ -809,7 +810,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_unlock_ovs; /* Check if this is a duplicate flow */ - flow = __ovs_flow_tbl_lookup(&dp->table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (!flow) { /* Bail out if we're not allowed to create a new flow. */ error = -ENOENT; @@ -817,12 +818,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_unlock_ovs; /* Allocate flow. */ - flow = ovs_flow_alloc(); + flow = ovs_flow_alloc(!exact_5tuple); if (IS_ERR(flow)) { error = PTR_ERR(flow); goto err_unlock_ovs; } - clear_stats(flow); flow->key = masked_key; flow->unmasked_key = key; @@ -835,8 +835,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_flow_free; } - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, - info->snd_seq, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); } else { /* We found a matching flow. */ struct sw_flow_actions *old_acts; @@ -864,15 +863,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); ovs_nla_free_flow_actions(old_acts); - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, - info->snd_seq, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); /* Clear stats. */ - if (a[OVS_FLOW_ATTR_CLEAR]) { - spin_lock_bh(&flow->lock); - clear_stats(flow); - spin_unlock_bh(&flow->lock); - } + if (a[OVS_FLOW_ATTR_CLEAR]) + ovs_flow_stats_clear(flow); } ovs_unlock(); @@ -910,7 +905,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) } ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL); if (err) return err; @@ -921,14 +916,13 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - flow = __ovs_flow_tbl_lookup(&dp->table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { err = -ENOENT; goto unlock; } - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, - info->snd_seq, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -965,17 +959,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) } ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL); if (err) goto unlock; - flow = __ovs_flow_tbl_lookup(&dp->table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { err = -ENOENT; goto unlock; } - reply = ovs_flow_cmd_alloc_info(flow); + reply = ovs_flow_cmd_alloc_info(flow, info); if (!reply) { err = -ENOMEM; goto unlock; @@ -1061,6 +1055,7 @@ static const struct genl_ops dp_flow_genl_ops[] = { static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, + [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, }; static struct genl_family dp_datapath_genl_family = { @@ -1119,6 +1114,9 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, &dp_megaflow_stats)) goto nla_put_failure; + if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) + goto nla_put_failure; + return genlmsg_end(skb, ovs_header); nla_put_failure: @@ -1127,17 +1125,17 @@ error: return -EMSGSIZE; } -static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid, - u32 seq, u8 cmd) +static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, + struct genl_info *info, u8 cmd) { struct sk_buff *skb; int retval; - skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL); + skb = genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd); + retval = ovs_dp_cmd_fill_info(dp, skb, info->snd_portid, info->snd_seq, 0, cmd); if (retval < 0) { kfree_skb(skb); return ERR_PTR(retval); @@ -1165,6 +1163,24 @@ static struct datapath *lookup_datapath(struct net *net, return dp ? dp : ERR_PTR(-ENODEV); } +static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info) +{ + struct datapath *dp; + + dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); + if (!dp) + return; + + WARN(dp->user_features, "Dropping previously announced user features\n"); + dp->user_features = 0; +} + +static void ovs_dp_change(struct datapath *dp, struct nlattr **a) +{ + if (a[OVS_DP_ATTR_USER_FEATURES]) + dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); +} + static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; @@ -1223,17 +1239,27 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.port_no = OVSP_LOCAL; parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); + ovs_dp_change(dp, a); + vport = new_vport(&parms); if (IS_ERR(vport)) { err = PTR_ERR(vport); if (err == -EBUSY) err = -EEXIST; + if (err == -EEXIST) { + /* An outdated user space instance that does not understand + * the concept of user_features has attempted to create a new + * datapath and is likely to reuse it. Drop all user features. + */ + if (info->genlhdr->version < OVS_DP_VER_FEATURES) + ovs_dp_reset_user_features(skb, info); + } + goto err_destroy_ports_array; } - reply = ovs_dp_cmd_build_info(dp, info->snd_portid, - info->snd_seq, OVS_DP_CMD_NEW); + reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); err = PTR_ERR(reply); if (IS_ERR(reply)) goto err_destroy_local_port; @@ -1299,8 +1325,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) goto unlock; - reply = ovs_dp_cmd_build_info(dp, info->snd_portid, - info->snd_seq, OVS_DP_CMD_DEL); + reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_DEL); err = PTR_ERR(reply); if (IS_ERR(reply)) goto unlock; @@ -1328,8 +1353,9 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) goto unlock; - reply = ovs_dp_cmd_build_info(dp, info->snd_portid, - info->snd_seq, OVS_DP_CMD_NEW); + ovs_dp_change(dp, info->attrs); + + reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); genl_set_err(&dp_datapath_genl_family, sock_net(skb->sk), 0, @@ -1360,8 +1386,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - reply = ovs_dp_cmd_build_info(dp, info->snd_portid, - info->snd_seq, OVS_DP_CMD_NEW); + reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -1441,7 +1466,7 @@ struct genl_family dp_vport_genl_family = { .parallel_ops = true, }; -struct genl_multicast_group ovs_dp_vport_multicast_group = { +static struct genl_multicast_group ovs_dp_vport_multicast_group = { .name = OVS_VPORT_MCGROUP }; diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 4067ea41be2..6be9fbb5e9c 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -88,6 +88,8 @@ struct datapath { /* Network namespace ref. */ struct net *net; #endif + + u32 user_features; }; /** @@ -145,6 +147,8 @@ int lockdep_ovsl_is_held(void); #define ASSERT_OVSL() WARN_ON(unlikely(!lockdep_ovsl_is_held())) #define ovsl_dereference(p) \ rcu_dereference_protected(p, lockdep_ovsl_is_held()) +#define rcu_dereference_ovsl(p) \ + rcu_dereference_check(p, lockdep_ovsl_is_held()) static inline struct net *ovs_dp_get_net(struct datapath *dp) { @@ -178,14 +182,12 @@ static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_n extern struct notifier_block ovs_dp_device_notifier; extern struct genl_family dp_vport_genl_family; -extern struct genl_multicast_group ovs_dp_vport_multicast_group; void ovs_dp_process_received_packet(struct vport *, struct sk_buff *); void ovs_dp_detach_port(struct vport *); int ovs_dp_upcall(struct datapath *, struct sk_buff *, const struct dp_upcall_info *); -const char *ovs_dp_name(const struct datapath *dp); struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq, u8 cmd); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index b409f527960..16f4b46161d 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -35,6 +35,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/sctp.h> +#include <linux/smp.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/icmp.h> @@ -60,10 +61,16 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies) #define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF)) -void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) +void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) { + struct flow_stats *stats; __be16 tcp_flags = 0; + if (!flow->stats.is_percpu) + stats = flow->stats.stat; + else + stats = this_cpu_ptr(flow->stats.cpu_stats); + if ((flow->key.eth.type == htons(ETH_P_IP) || flow->key.eth.type == htons(ETH_P_IPV6)) && flow->key.ip.proto == IPPROTO_TCP && @@ -71,12 +78,87 @@ void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); } - spin_lock(&flow->lock); - flow->used = jiffies; - flow->packet_count++; - flow->byte_count += skb->len; - flow->tcp_flags |= tcp_flags; - spin_unlock(&flow->lock); + spin_lock(&stats->lock); + stats->used = jiffies; + stats->packet_count++; + stats->byte_count += skb->len; + stats->tcp_flags |= tcp_flags; + spin_unlock(&stats->lock); +} + +static void stats_read(struct flow_stats *stats, + struct ovs_flow_stats *ovs_stats, + unsigned long *used, __be16 *tcp_flags) +{ + spin_lock(&stats->lock); + if (time_after(stats->used, *used)) + *used = stats->used; + *tcp_flags |= stats->tcp_flags; + ovs_stats->n_packets += stats->packet_count; + ovs_stats->n_bytes += stats->byte_count; + spin_unlock(&stats->lock); +} + +void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, + unsigned long *used, __be16 *tcp_flags) +{ + int cpu, cur_cpu; + + *used = 0; + *tcp_flags = 0; + memset(ovs_stats, 0, sizeof(*ovs_stats)); + + if (!flow->stats.is_percpu) { + stats_read(flow->stats.stat, ovs_stats, used, tcp_flags); + } else { + cur_cpu = get_cpu(); + for_each_possible_cpu(cpu) { + struct flow_stats *stats; + + if (cpu == cur_cpu) + local_bh_disable(); + + stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); + stats_read(stats, ovs_stats, used, tcp_flags); + + if (cpu == cur_cpu) + local_bh_enable(); + } + put_cpu(); + } +} + +static void stats_reset(struct flow_stats *stats) +{ + spin_lock(&stats->lock); + stats->used = 0; + stats->packet_count = 0; + stats->byte_count = 0; + stats->tcp_flags = 0; + spin_unlock(&stats->lock); +} + +void ovs_flow_stats_clear(struct sw_flow *flow) +{ + int cpu, cur_cpu; + + if (!flow->stats.is_percpu) { + stats_reset(flow->stats.stat); + } else { + cur_cpu = get_cpu(); + + for_each_possible_cpu(cpu) { + + if (cpu == cur_cpu) + local_bh_disable(); + + stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu)); + + if (cpu == cur_cpu) + local_bh_enable(); + } + put_cpu(); + } } static int check_header(struct sk_buff *skb, int len) diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 1510f51dbf7..2d770e28a3a 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -19,6 +19,7 @@ #ifndef FLOW_H #define FLOW_H 1 +#include <linux/cache.h> #include <linux/kernel.h> #include <linux/netlink.h> #include <linux/openvswitch.h> @@ -122,8 +123,8 @@ struct sw_flow_key { } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ struct sw_flow_key_range { - size_t start; - size_t end; + unsigned short int start; + unsigned short int end; }; struct sw_flow_mask { @@ -146,6 +147,22 @@ struct sw_flow_actions { struct nlattr actions[]; }; +struct flow_stats { + u64 packet_count; /* Number of packets matched. */ + u64 byte_count; /* Number of bytes matched. */ + unsigned long used; /* Last used time (in jiffies). */ + spinlock_t lock; /* Lock for atomic stats update. */ + __be16 tcp_flags; /* Union of seen TCP flags. */ +}; + +struct sw_flow_stats { + bool is_percpu; + union { + struct flow_stats *stat; + struct flow_stats __percpu *cpu_stats; + }; +}; + struct sw_flow { struct rcu_head rcu; struct hlist_node hash_node[2]; @@ -155,12 +172,7 @@ struct sw_flow { struct sw_flow_key unmasked_key; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; - - spinlock_t lock; /* Lock for values below. */ - unsigned long used; /* Last used time (in jiffies). */ - u64 packet_count; /* Number of packets matched. */ - u64 byte_count; /* Number of bytes matched. */ - __be16 tcp_flags; /* Union of seen TCP flags. */ + struct sw_flow_stats stats; }; struct arp_eth_header { @@ -177,7 +189,10 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; -void ovs_flow_used(struct sw_flow *, struct sk_buff *); +void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb); +void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *stats, + unsigned long *used, __be16 *tcp_flags); +void ovs_flow_stats_clear(struct sw_flow *flow); u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 2bc1bc1aca3..4d000acaed0 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -266,6 +266,20 @@ static bool is_all_zero(const u8 *fp, size_t size) return true; } +static bool is_all_set(const u8 *fp, size_t size) +{ + int i; + + if (!fp) + return false; + + for (i = 0; i < size; i++) + if (fp[i] != 0xff) + return false; + + return true; +} + static int __parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], u64 *attrsp, bool nz) @@ -487,8 +501,9 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, return 0; } -static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, - const struct nlattr **a, bool is_mask) +static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple, + u64 attrs, const struct nlattr **a, + bool is_mask) { int err; u64 orig_attrs = attrs; @@ -545,6 +560,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); } + if (is_mask && exact_5tuple) { + if (match->mask->key.eth.type != htons(0xffff)) + *exact_5tuple = false; + } + if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { const struct ovs_key_ipv4 *ipv4_key; @@ -567,6 +587,13 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, SW_FLOW_KEY_PUT(match, ipv4.addr.dst, ipv4_key->ipv4_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_IPV4); + + if (is_mask && exact_5tuple && *exact_5tuple) { + if (ipv4_key->ipv4_proto != 0xff || + ipv4_key->ipv4_src != htonl(0xffffffff) || + ipv4_key->ipv4_dst != htonl(0xffffffff)) + *exact_5tuple = false; + } } if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { @@ -598,6 +625,13 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_IPV6); + + if (is_mask && exact_5tuple && *exact_5tuple) { + if (ipv6_key->ipv6_proto != 0xff || + !is_all_set((u8 *)ipv6_key->ipv6_src, sizeof(match->key->ipv6.addr.src)) || + !is_all_set((u8 *)ipv6_key->ipv6_dst, sizeof(match->key->ipv6.addr.dst))) + *exact_5tuple = false; + } } if (attrs & (1 << OVS_KEY_ATTR_ARP)) { @@ -640,6 +674,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, tcp_key->tcp_dst, is_mask); } attrs &= ~(1 << OVS_KEY_ATTR_TCP); + + if (is_mask && exact_5tuple && *exact_5tuple && + (tcp_key->tcp_src != htons(0xffff) || + tcp_key->tcp_dst != htons(0xffff))) + *exact_5tuple = false; } if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { @@ -671,6 +710,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, udp_key->udp_dst, is_mask); } attrs &= ~(1 << OVS_KEY_ATTR_UDP); + + if (is_mask && exact_5tuple && *exact_5tuple && + (udp_key->udp_src != htons(0xffff) || + udp_key->udp_dst != htons(0xffff))) + *exact_5tuple = false; } if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { @@ -756,6 +800,7 @@ static void sw_flow_mask_set(struct sw_flow_mask *mask, * attribute specifies the mask field of the wildcarded flow. */ int ovs_nla_get_match(struct sw_flow_match *match, + bool *exact_5tuple, const struct nlattr *key, const struct nlattr *mask) { @@ -803,10 +848,13 @@ int ovs_nla_get_match(struct sw_flow_match *match, } } - err = ovs_key_from_nlattrs(match, key_attrs, a, false); + err = ovs_key_from_nlattrs(match, NULL, key_attrs, a, false); if (err) return err; + if (exact_5tuple) + *exact_5tuple = true; + if (mask) { err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); if (err) @@ -844,7 +892,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, } } - err = ovs_key_from_nlattrs(match, mask_attrs, a, true); + err = ovs_key_from_nlattrs(match, exact_5tuple, mask_attrs, a, true); if (err) return err; } else { @@ -1128,19 +1176,11 @@ struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size) return sfa; } -/* RCU callback used by ovs_nla_free_flow_actions. */ -static void rcu_free_acts_callback(struct rcu_head *rcu) -{ - struct sw_flow_actions *sf_acts = container_of(rcu, - struct sw_flow_actions, rcu); - kfree(sf_acts); -} - /* Schedules 'sf_acts' to be freed after the next RCU grace period. * The caller must hold rcu_read_lock for this to be sensible. */ void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) { - call_rcu(&sf_acts->rcu, rcu_free_acts_callback); + kfree_rcu(sf_acts, rcu); } static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 440151045d3..b31fbe28bc7 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -45,6 +45,7 @@ int ovs_nla_put_flow(const struct sw_flow_key *, int ovs_nla_get_flow_metadata(struct sw_flow *flow, const struct nlattr *attr); int ovs_nla_get_match(struct sw_flow_match *match, + bool *exact_5tuple, const struct nlattr *, const struct nlattr *); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 0e720c31607..b430d42b2d0 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -44,8 +44,6 @@ #include <net/ipv6.h> #include <net/ndisc.h> -#include "datapath.h" - #define TBL_MIN_BUCKETS 1024 #define REHASH_INTERVAL (10 * 60 * HZ) @@ -72,19 +70,42 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, *d++ = *s++ & *m++; } -struct sw_flow *ovs_flow_alloc(void) +struct sw_flow *ovs_flow_alloc(bool percpu_stats) { struct sw_flow *flow; + int cpu; flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); if (!flow) return ERR_PTR(-ENOMEM); - spin_lock_init(&flow->lock); flow->sf_acts = NULL; flow->mask = NULL; + flow->stats.is_percpu = percpu_stats; + + if (!percpu_stats) { + flow->stats.stat = kzalloc(sizeof(*flow->stats.stat), GFP_KERNEL); + if (!flow->stats.stat) + goto err; + + spin_lock_init(&flow->stats.stat->lock); + } else { + flow->stats.cpu_stats = alloc_percpu(struct flow_stats); + if (!flow->stats.cpu_stats) + goto err; + + for_each_possible_cpu(cpu) { + struct flow_stats *cpu_stats; + + cpu_stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); + spin_lock_init(&cpu_stats->lock); + } + } return flow; +err: + kfree(flow); + return ERR_PTR(-ENOMEM); } int ovs_flow_tbl_count(struct flow_table *table) @@ -118,6 +139,10 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets) static void flow_free(struct sw_flow *flow) { kfree((struct sf_flow_acts __force *)flow->sf_acts); + if (flow->stats.is_percpu) + free_percpu(flow->stats.cpu_stats); + else + kfree(flow->stats.stat); kmem_cache_free(flow_cache, flow); } @@ -128,13 +153,6 @@ static void rcu_free_flow_callback(struct rcu_head *rcu) flow_free(flow); } -static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu) -{ - struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu); - - kfree(mask); -} - static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) { if (!mask) @@ -146,7 +164,7 @@ static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) if (!mask->ref_count) { list_del_rcu(&mask->list); if (deferred) - call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); + kfree_rcu(mask, rcu); else kfree(mask); } @@ -429,11 +447,11 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, return NULL; } -struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, +struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, const struct sw_flow_key *key, u32 *n_mask_hit) { - struct table_instance *ti = rcu_dereference(tbl->ti); + struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); struct sw_flow_mask *mask; struct sw_flow *flow; @@ -447,6 +465,14 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, return NULL; } +struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, + const struct sw_flow_key *key) +{ + u32 __always_unused n_mask_hit; + + return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit); +} + int ovs_flow_tbl_num_masks(const struct flow_table *table) { struct sw_flow_mask *mask; @@ -514,11 +540,7 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, return NULL; } -/** - * add a new mask into the mask list. - * The caller needs to make sure that 'mask' is not the same - * as any masks that are already on the list. - */ +/* Add 'mask' into the mask list, if it is not already there. */ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, struct sw_flow_mask *new) { diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index fbe45d5ad07..1996e34c0fd 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -55,7 +55,7 @@ struct flow_table { int ovs_flow_init(void); void ovs_flow_exit(void); -struct sw_flow *ovs_flow_alloc(void); +struct sw_flow *ovs_flow_alloc(bool percpu_stats); void ovs_flow_free(struct sw_flow *, bool deferred); int ovs_flow_tbl_init(struct flow_table *); @@ -69,9 +69,11 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); int ovs_flow_tbl_num_masks(const struct flow_table *table); struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table, u32 *bucket, u32 *idx); -struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, +struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *, const struct sw_flow_key *, u32 *n_mask_hit); +struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, + const struct sw_flow_key *); bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, struct sw_flow_match *match); diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index f5275dd29cd..208dd9a26dd 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -33,6 +33,9 @@ #include "vport.h" #include "vport-internal_dev.h" +static void ovs_vport_record_error(struct vport *, + enum vport_err_type err_type); + /* List of statically compiled vport implementations. Don't forget to also * add yours to the list at the bottom of vport.h. */ static const struct vport_ops *vport_ops_list[] = { @@ -396,7 +399,8 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) * If using the vport generic stats layer indicate that an error of the given * type has occurred. */ -void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) +static void ovs_vport_record_error(struct vport *vport, + enum vport_err_type err_type) { spin_lock(&vport->stats_lock); diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index bc97ef7fa2a..d7e50a17396 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -192,7 +192,6 @@ static inline struct vport *vport_from_priv(const void *priv) void ovs_vport_receive(struct vport *, struct sk_buff *, struct ovs_key_ipv4_tunnel *); -void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); /* List of statically compiled vport implementations. Don't forget to also * add yours to the list at the top of vport.c. */ diff --git a/net/rds/ib.c b/net/rds/ib.c index b4c8b0022fe..ba2dffeff60 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -338,7 +338,8 @@ static int rds_ib_laddr_check(__be32 addr) ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); /* due to this, we will claim to support iWARP devices unless we check node_type. */ - if (ret || cm_id->device->node_type != RDMA_NODE_IB_CA) + if (ret || !cm_id->device || + cm_id->device->node_type != RDMA_NODE_IB_CA) ret = -EADDRNOTAVAIL; rdsdebug("addr %pI4 ret %d node type %d\n", diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 81f94b1ae1c..d080eb4b0d2 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1253,6 +1253,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, if (msg->msg_name) { struct sockaddr_rose *srose; + struct full_sockaddr_rose *full_srose = msg->msg_name; memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose)); srose = msg->msg_name; @@ -1260,18 +1261,9 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, srose->srose_addr = rose->dest_addr; srose->srose_call = rose->dest_call; srose->srose_ndigis = rose->dest_ndigis; - if (msg->msg_namelen >= sizeof(struct full_sockaddr_rose)) { - struct full_sockaddr_rose *full_srose = (struct full_sockaddr_rose *)msg->msg_name; - for (n = 0 ; n < rose->dest_ndigis ; n++) - full_srose->srose_digis[n] = rose->dest_digis[n]; - msg->msg_namelen = sizeof(struct full_sockaddr_rose); - } else { - if (rose->dest_ndigis >= 1) { - srose->srose_ndigis = 1; - srose->srose_digi = rose->dest_digis[0]; - } - msg->msg_namelen = sizeof(struct sockaddr_rose); - } + for (n = 0 ; n < rose->dest_ndigis ; n++) + full_srose->srose_digis[n] = rose->dest_digis[n]; + msg->msg_namelen = sizeof(struct full_sockaddr_rose); } skb_free_datagram(sk, skb); diff --git a/net/sched/Kconfig b/net/sched/Kconfig index d3d7a0a66e2..a1a8e29e5fc 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -295,6 +295,19 @@ config NET_SCH_HHF To compile this driver as a module, choose M here: the module will be called sch_hhf. +config NET_SCH_PIE + tristate "Proportional Integral controller Enhanced (PIE) scheduler" + help + Say Y here if you want to use the Proportional Integral controller + Enhanced scheduler packet scheduling algorithm. + For more information, please see + http://tools.ietf.org/html/draft-pan-tsvwg-pie-00 + + To compile this driver as a module, choose M here: the module + will be called sch_pie. + + If unsure, say N. + config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT diff --git a/net/sched/Makefile b/net/sched/Makefile index 3442e5fbc4d..0a869a11f3e 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o +obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 6f103fd76c1..f63e1467cd3 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -126,9 +126,10 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, for (i = 0; i < (hinfo->hmask + 1); i++) { head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; hlist_for_each_entry_safe(p, n, head, tcfc_head) { - if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) + if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) { module_put(a->ops->owner); - n_i++; + n_i++; + } } } if (nla_put_u32(skb, TCA_FCNT, n_i)) @@ -352,18 +353,16 @@ int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions, } list_for_each_entry(a, actions, list) { repeat: - if (a->ops) { - ret = a->ops->act(skb, a, res); - if (TC_MUNGED & skb->tc_verd) { - /* copied already, allow trampling */ - skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); - skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd); - } - if (ret == TC_ACT_REPEAT) - goto repeat; /* we need a ttl - JHS */ - if (ret != TC_ACT_PIPE) - goto exec_done; + ret = a->ops->act(skb, a, res); + if (TC_MUNGED & skb->tc_verd) { + /* copied already, allow trampling */ + skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); + skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd); } + if (ret == TC_ACT_REPEAT) + goto repeat; /* we need a ttl - JHS */ + if (ret != TC_ACT_PIPE) + goto exec_done; } exec_done: return ret; @@ -375,27 +374,16 @@ void tcf_action_destroy(struct list_head *actions, int bind) struct tc_action *a, *tmp; list_for_each_entry_safe(a, tmp, actions, list) { - if (a->ops) { - if (a->ops->cleanup(a, bind) == ACT_P_DELETED) - module_put(a->ops->owner); - list_del(&a->list); - kfree(a); - } else { - /*FIXME: Remove later - catch insertion bugs*/ - WARN(1, "tcf_action_destroy: BUG? destroying NULL ops\n"); - list_del(&a->list); - kfree(a); - } + if (a->ops->cleanup(a, bind) == ACT_P_DELETED) + module_put(a->ops->owner); + list_del(&a->list); + kfree(a); } } int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { - int err = -EINVAL; - - if (a->ops == NULL) - return err; return a->ops->dump(skb, a, bind, ref); } @@ -406,9 +394,6 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - if (a->ops == NULL) - return err; - if (nla_put_string(skb, TCA_KIND, a->ops->kind)) goto nla_put_failure; if (tcf_action_copy_stats(skb, a, 0)) @@ -684,7 +669,7 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid) INIT_LIST_HEAD(&a->list); err = -EINVAL; a->ops = tc_lookup_action(tb[TCA_ACT_KIND]); - if (a->ops == NULL) + if (a->ops == NULL) /* could happen in batch of actions */ goto err_free; err = -ENOENT; if (a->ops->lookup(a, index) == 0) @@ -760,7 +745,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, err = -EINVAL; kind = tb[TCA_ACT_KIND]; a->ops = tc_lookup_action(kind); - if (a->ops == NULL) + if (a->ops == NULL) /*some idjot trying to flush unknown action */ goto err_out; nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 9cc6717c5f1..8b1d65772a8 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -70,16 +70,16 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est, &csum_idx_gen, &csum_hash_info); if (IS_ERR(pc)) return PTR_ERR(pc); - p = to_tcf_csum(pc); ret = ACT_P_CREATED; } else { - p = to_tcf_csum(pc); - if (!ovr) { - tcf_hash_release(pc, bind, &csum_hash_info); + if (bind)/* dont override defaults */ + return 0; + tcf_hash_release(pc, bind, &csum_hash_info); + if (!ovr) return -EEXIST; - } } + p = to_tcf_csum(pc); spin_lock_bh(&p->tcf_lock); p->tcf_action = parm->action; p->update_flags = parm->update_flags; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index dea927343bf..af5641c290f 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -95,10 +95,11 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, return PTR_ERR(pc); ret = ACT_P_CREATED; } else { - if (!ovr) { - tcf_hash_release(pc, bind, &gact_hash_info); + if (bind)/* dont override defaults */ + return 0; + tcf_hash_release(pc, bind, &gact_hash_info); + if (!ovr) return -EEXIST; - } } gact = to_gact(pc); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index e13ecbbfe8c..242636950ea 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -134,10 +134,12 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, return PTR_ERR(pc); ret = ACT_P_CREATED; } else { - if (!ovr) { - tcf_ipt_release(to_ipt(pc), bind); + if (bind)/* dont override defaults */ + return 0; + tcf_ipt_release(to_ipt(pc), bind); + + if (!ovr) return -EEXIST; - } } ipt = to_ipt(pc); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 921fea43fca..584e65503ed 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -64,15 +64,15 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, &nat_idx_gen, &nat_hash_info); if (IS_ERR(pc)) return PTR_ERR(pc); - p = to_tcf_nat(pc); ret = ACT_P_CREATED; } else { - p = to_tcf_nat(pc); - if (!ovr) { - tcf_hash_release(pc, bind, &nat_hash_info); + if (bind) + return 0; + tcf_hash_release(pc, bind, &nat_hash_info); + if (!ovr) return -EEXIST; - } } + p = to_tcf_nat(pc); spin_lock_bh(&p->tcf_lock); p->old_addr = parm->old_addr; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index e2520e90a10..72918934193 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -78,10 +78,12 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { p = to_pedit(pc); - if (!ovr) { - tcf_hash_release(pc, bind, &pedit_hash_info); + tcf_hash_release(pc, bind, &pedit_hash_info); + if (bind) + return 0; + if (!ovr) return -EEXIST; - } + if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) { keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 819a9a4d198..9295b86d531 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -162,10 +162,12 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, if (bind) { police->tcf_bindcnt += 1; police->tcf_refcnt += 1; + return 0; } if (ovr) goto override; - return ret; + /* not replacing */ + return -EEXIST; } } diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 81aebc162e5..b44491e3ec1 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -135,10 +135,13 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { d = to_defact(pc); - if (!ovr) { - tcf_simp_release(d, bind); + + if (bind) + return 0; + tcf_simp_release(d, bind); + if (!ovr) return -EEXIST; - } + reset_policy(d, defdata, parm); } diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index aa0a4c056f3..0fa1aad6e20 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -112,10 +112,11 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { d = to_skbedit(pc); - if (!ovr) { - tcf_hash_release(pc, bind, &skbedit_hash_info); + if (bind) + return 0; + tcf_hash_release(pc, bind, &skbedit_hash_info); + if (!ovr) return -EEXIST; - } } spin_lock_bh(&d->tcf_lock); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 12e882ef596..d8c42b1b88e 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -577,7 +577,7 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) } else if (exts->police) { struct tc_action *act = tcf_exts_first_act(exts); nest = nla_nest_start(skb, exts->police); - if (nest == NULL) + if (nest == NULL || !act) goto nla_put_failure; if (tcf_action_dump_old(skb, act, 0, 0) < 0) goto nla_put_failure; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c new file mode 100644 index 00000000000..fe65340c8eb --- /dev/null +++ b/net/sched/sch_pie.c @@ -0,0 +1,555 @@ +/* Copyright (C) 2013 Cisco Systems, Inc, 2013. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Author: Vijay Subramanian <vijaynsu@cisco.com> + * Author: Mythili Prabhu <mysuryan@cisco.com> + * + * ECN support is added by Naeem Khademi <naeemk@ifi.uio.no> + * University of Oslo, Norway. + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <net/pkt_sched.h> +#include <net/inet_ecn.h> + +#define QUEUE_THRESHOLD 10000 +#define DQCOUNT_INVALID -1 +#define MAX_PROB 0xffffffff +#define PIE_SCALE 8 + +/* parameters used */ +struct pie_params { + psched_time_t target; /* user specified target delay in pschedtime */ + u32 tupdate; /* timer frequency (in jiffies) */ + u32 limit; /* number of packets that can be enqueued */ + u32 alpha; /* alpha and beta are between -4 and 4 */ + u32 beta; /* and are used for shift relative to 1 */ + bool ecn; /* true if ecn is enabled */ + bool bytemode; /* to scale drop early prob based on pkt size */ +}; + +/* variables used */ +struct pie_vars { + u32 prob; /* probability but scaled by u32 limit. */ + psched_time_t burst_time; + psched_time_t qdelay; + psched_time_t qdelay_old; + u64 dq_count; /* measured in bytes */ + psched_time_t dq_tstamp; /* drain rate */ + u32 avg_dq_rate; /* bytes per pschedtime tick,scaled */ + u32 qlen_old; /* in bytes */ +}; + +/* statistics gathering */ +struct pie_stats { + u32 packets_in; /* total number of packets enqueued */ + u32 dropped; /* packets dropped due to pie_action */ + u32 overlimit; /* dropped due to lack of space in queue */ + u32 maxq; /* maximum queue size */ + u32 ecn_mark; /* packets marked with ECN */ +}; + +/* private data for the Qdisc */ +struct pie_sched_data { + struct pie_params params; + struct pie_vars vars; + struct pie_stats stats; + struct timer_list adapt_timer; +}; + +static void pie_params_init(struct pie_params *params) +{ + params->alpha = 2; + params->beta = 20; + params->tupdate = usecs_to_jiffies(30 * USEC_PER_MSEC); /* 30 ms */ + params->limit = 1000; /* default of 1000 packets */ + params->target = PSCHED_NS2TICKS(20 * NSEC_PER_MSEC); /* 20 ms */ + params->ecn = false; + params->bytemode = false; +} + +static void pie_vars_init(struct pie_vars *vars) +{ + vars->dq_count = DQCOUNT_INVALID; + vars->avg_dq_rate = 0; + /* default of 100 ms in pschedtime */ + vars->burst_time = PSCHED_NS2TICKS(100 * NSEC_PER_MSEC); +} + +static bool drop_early(struct Qdisc *sch, u32 packet_size) +{ + struct pie_sched_data *q = qdisc_priv(sch); + u32 rnd; + u32 local_prob = q->vars.prob; + u32 mtu = psched_mtu(qdisc_dev(sch)); + + /* If there is still burst allowance left skip random early drop */ + if (q->vars.burst_time > 0) + return false; + + /* If current delay is less than half of target, and + * if drop prob is low already, disable early_drop + */ + if ((q->vars.qdelay < q->params.target / 2) + && (q->vars.prob < MAX_PROB / 5)) + return false; + + /* If we have fewer than 2 mtu-sized packets, disable drop_early, + * similar to min_th in RED + */ + if (sch->qstats.backlog < 2 * mtu) + return false; + + /* If bytemode is turned on, use packet size to compute new + * probablity. Smaller packets will have lower drop prob in this case + */ + if (q->params.bytemode && packet_size <= mtu) + local_prob = (local_prob / mtu) * packet_size; + else + local_prob = q->vars.prob; + + rnd = net_random(); + if (rnd < local_prob) + return true; + + return false; +} + +static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct pie_sched_data *q = qdisc_priv(sch); + bool enqueue = false; + + if (unlikely(qdisc_qlen(sch) >= sch->limit)) { + q->stats.overlimit++; + goto out; + } + + if (!drop_early(sch, skb->len)) { + enqueue = true; + } else if (q->params.ecn && (q->vars.prob <= MAX_PROB / 10) && + INET_ECN_set_ce(skb)) { + /* If packet is ecn capable, mark it if drop probability + * is lower than 10%, else drop it. + */ + q->stats.ecn_mark++; + enqueue = true; + } + + /* we can enqueue the packet */ + if (enqueue) { + q->stats.packets_in++; + if (qdisc_qlen(sch) > q->stats.maxq) + q->stats.maxq = qdisc_qlen(sch); + + return qdisc_enqueue_tail(skb, sch); + } + +out: + q->stats.dropped++; + return qdisc_drop(skb, sch); +} + +static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = { + [TCA_PIE_TARGET] = {.type = NLA_U32}, + [TCA_PIE_LIMIT] = {.type = NLA_U32}, + [TCA_PIE_TUPDATE] = {.type = NLA_U32}, + [TCA_PIE_ALPHA] = {.type = NLA_U32}, + [TCA_PIE_BETA] = {.type = NLA_U32}, + [TCA_PIE_ECN] = {.type = NLA_U32}, + [TCA_PIE_BYTEMODE] = {.type = NLA_U32}, +}; + +static int pie_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct pie_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_PIE_MAX + 1]; + unsigned int qlen; + int err; + + if (!opt) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy); + if (err < 0) + return err; + + sch_tree_lock(sch); + + /* convert from microseconds to pschedtime */ + if (tb[TCA_PIE_TARGET]) { + /* target is in us */ + u32 target = nla_get_u32(tb[TCA_PIE_TARGET]); + + /* convert to pschedtime */ + q->params.target = PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC); + } + + /* tupdate is in jiffies */ + if (tb[TCA_PIE_TUPDATE]) + q->params.tupdate = usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE])); + + if (tb[TCA_PIE_LIMIT]) { + u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]); + + q->params.limit = limit; + sch->limit = limit; + } + + if (tb[TCA_PIE_ALPHA]) + q->params.alpha = nla_get_u32(tb[TCA_PIE_ALPHA]); + + if (tb[TCA_PIE_BETA]) + q->params.beta = nla_get_u32(tb[TCA_PIE_BETA]); + + if (tb[TCA_PIE_ECN]) + q->params.ecn = nla_get_u32(tb[TCA_PIE_ECN]); + + if (tb[TCA_PIE_BYTEMODE]) + q->params.bytemode = nla_get_u32(tb[TCA_PIE_BYTEMODE]); + + /* Drop excess packets if new limit is lower */ + qlen = sch->q.qlen; + while (sch->q.qlen > sch->limit) { + struct sk_buff *skb = __skb_dequeue(&sch->q); + + sch->qstats.backlog -= qdisc_pkt_len(skb); + qdisc_drop(skb, sch); + } + qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); + + sch_tree_unlock(sch); + return 0; +} + +static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb) +{ + + struct pie_sched_data *q = qdisc_priv(sch); + int qlen = sch->qstats.backlog; /* current queue size in bytes */ + + /* If current queue is about 10 packets or more and dq_count is unset + * we have enough packets to calculate the drain rate. Save + * current time as dq_tstamp and start measurement cycle. + */ + if (qlen >= QUEUE_THRESHOLD && q->vars.dq_count == DQCOUNT_INVALID) { + q->vars.dq_tstamp = psched_get_time(); + q->vars.dq_count = 0; + } + + /* Calculate the average drain rate from this value. If queue length + * has receded to a small value viz., <= QUEUE_THRESHOLD bytes,reset + * the dq_count to -1 as we don't have enough packets to calculate the + * drain rate anymore The following if block is entered only when we + * have a substantial queue built up (QUEUE_THRESHOLD bytes or more) + * and we calculate the drain rate for the threshold here. dq_count is + * in bytes, time difference in psched_time, hence rate is in + * bytes/psched_time. + */ + if (q->vars.dq_count != DQCOUNT_INVALID) { + q->vars.dq_count += skb->len; + + if (q->vars.dq_count >= QUEUE_THRESHOLD) { + psched_time_t now = psched_get_time(); + u32 dtime = now - q->vars.dq_tstamp; + u32 count = q->vars.dq_count << PIE_SCALE; + + if (dtime == 0) + return; + + count = count / dtime; + + if (q->vars.avg_dq_rate == 0) + q->vars.avg_dq_rate = count; + else + q->vars.avg_dq_rate = + (q->vars.avg_dq_rate - + (q->vars.avg_dq_rate >> 3)) + (count >> 3); + + /* If the queue has receded below the threshold, we hold + * on to the last drain rate calculated, else we reset + * dq_count to 0 to re-enter the if block when the next + * packet is dequeued + */ + if (qlen < QUEUE_THRESHOLD) + q->vars.dq_count = DQCOUNT_INVALID; + else { + q->vars.dq_count = 0; + q->vars.dq_tstamp = psched_get_time(); + } + + if (q->vars.burst_time > 0) { + if (q->vars.burst_time > dtime) + q->vars.burst_time -= dtime; + else + q->vars.burst_time = 0; + } + } + } +} + +static void calculate_probability(struct Qdisc *sch) +{ + struct pie_sched_data *q = qdisc_priv(sch); + u32 qlen = sch->qstats.backlog; /* queue size in bytes */ + psched_time_t qdelay = 0; /* in pschedtime */ + psched_time_t qdelay_old = q->vars.qdelay; /* in pschedtime */ + s32 delta = 0; /* determines the change in probability */ + u32 oldprob; + u32 alpha, beta; + bool update_prob = true; + + q->vars.qdelay_old = q->vars.qdelay; + + if (q->vars.avg_dq_rate > 0) + qdelay = (qlen << PIE_SCALE) / q->vars.avg_dq_rate; + else + qdelay = 0; + + /* If qdelay is zero and qlen is not, it means qlen is very small, less + * than dequeue_rate, so we do not update probabilty in this round + */ + if (qdelay == 0 && qlen != 0) + update_prob = false; + + /* Add ranges for alpha and beta, more aggressive for high dropping + * mode and gentle steps for light dropping mode + * In light dropping mode, take gentle steps; in medium dropping mode, + * take medium steps; in high dropping mode, take big steps. + */ + if (q->vars.prob < MAX_PROB / 100) { + alpha = + (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7; + beta = + (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7; + } else if (q->vars.prob < MAX_PROB / 10) { + alpha = + (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5; + beta = + (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5; + } else { + alpha = + (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4; + beta = + (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4; + } + + /* alpha and beta should be between 0 and 32, in multiples of 1/16 */ + delta += alpha * ((qdelay - q->params.target)); + delta += beta * ((qdelay - qdelay_old)); + + oldprob = q->vars.prob; + + /* to ensure we increase probability in steps of no more than 2% */ + if (delta > (s32) (MAX_PROB / (100 / 2)) && + q->vars.prob >= MAX_PROB / 10) + delta = (MAX_PROB / 100) * 2; + + /* Non-linear drop: + * Tune drop probability to increase quickly for high delays(>= 250ms) + * 250ms is derived through experiments and provides error protection + */ + + if (qdelay > (PSCHED_NS2TICKS(250 * NSEC_PER_MSEC))) + delta += MAX_PROB / (100 / 2); + + q->vars.prob += delta; + + if (delta > 0) { + /* prevent overflow */ + if (q->vars.prob < oldprob) { + q->vars.prob = MAX_PROB; + /* Prevent normalization error. If probability is at + * maximum value already, we normalize it here, and + * skip the check to do a non-linear drop in the next + * section. + */ + update_prob = false; + } + } else { + /* prevent underflow */ + if (q->vars.prob > oldprob) + q->vars.prob = 0; + } + + /* Non-linear drop in probability: Reduce drop probability quickly if + * delay is 0 for 2 consecutive Tupdate periods. + */ + + if ((qdelay == 0) && (qdelay_old == 0) && update_prob) + q->vars.prob = (q->vars.prob * 98) / 100; + + q->vars.qdelay = qdelay; + q->vars.qlen_old = qlen; + + /* We restart the measurement cycle if the following conditions are met + * 1. If the delay has been low for 2 consecutive Tupdate periods + * 2. Calculated drop probability is zero + * 3. We have atleast one estimate for the avg_dq_rate ie., + * is a non-zero value + */ + if ((q->vars.qdelay < q->params.target / 2) && + (q->vars.qdelay_old < q->params.target / 2) && + (q->vars.prob == 0) && + (q->vars.avg_dq_rate > 0)) + pie_vars_init(&q->vars); +} + +static void pie_timer(unsigned long arg) +{ + struct Qdisc *sch = (struct Qdisc *)arg; + struct pie_sched_data *q = qdisc_priv(sch); + spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + + spin_lock(root_lock); + calculate_probability(sch); + + /* reset the timer to fire after 'tupdate'. tupdate is in jiffies. */ + if (q->params.tupdate) + mod_timer(&q->adapt_timer, jiffies + q->params.tupdate); + spin_unlock(root_lock); + +} + +static int pie_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct pie_sched_data *q = qdisc_priv(sch); + + pie_params_init(&q->params); + pie_vars_init(&q->vars); + sch->limit = q->params.limit; + + setup_timer(&q->adapt_timer, pie_timer, (unsigned long)sch); + mod_timer(&q->adapt_timer, jiffies + HZ / 2); + + if (opt) { + int err = pie_change(sch, opt); + + if (err) + return err; + } + + return 0; +} + +static int pie_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct pie_sched_data *q = qdisc_priv(sch); + struct nlattr *opts; + + opts = nla_nest_start(skb, TCA_OPTIONS); + if (opts == NULL) + goto nla_put_failure; + + /* convert target from pschedtime to us */ + if (nla_put_u32(skb, TCA_PIE_TARGET, + ((u32) PSCHED_TICKS2NS(q->params.target)) / + NSEC_PER_USEC) || + nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) || + nla_put_u32(skb, TCA_PIE_TUPDATE, jiffies_to_usecs(q->params.tupdate)) || + nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) || + nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) || + nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) || + nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode)) + goto nla_put_failure; + + return nla_nest_end(skb, opts); + +nla_put_failure: + nla_nest_cancel(skb, opts); + return -1; + +} + +static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) +{ + struct pie_sched_data *q = qdisc_priv(sch); + struct tc_pie_xstats st = { + .prob = q->vars.prob, + .delay = ((u32) PSCHED_TICKS2NS(q->vars.qdelay)) / + NSEC_PER_USEC, + /* unscale and return dq_rate in bytes per sec */ + .avg_dq_rate = q->vars.avg_dq_rate * + (PSCHED_TICKS_PER_SEC) >> PIE_SCALE, + .packets_in = q->stats.packets_in, + .overlimit = q->stats.overlimit, + .maxq = q->stats.maxq, + .dropped = q->stats.dropped, + .ecn_mark = q->stats.ecn_mark, + }; + + return gnet_stats_copy_app(d, &st, sizeof(st)); +} + +static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch) +{ + struct sk_buff *skb; + skb = __qdisc_dequeue_head(sch, &sch->q); + + if (!skb) + return NULL; + + pie_process_dequeue(sch, skb); + return skb; +} + +static void pie_reset(struct Qdisc *sch) +{ + struct pie_sched_data *q = qdisc_priv(sch); + qdisc_reset_queue(sch); + pie_vars_init(&q->vars); +} + +static void pie_destroy(struct Qdisc *sch) +{ + struct pie_sched_data *q = qdisc_priv(sch); + q->params.tupdate = 0; + del_timer_sync(&q->adapt_timer); +} + +static struct Qdisc_ops pie_qdisc_ops __read_mostly = { + .id = "pie", + .priv_size = sizeof(struct pie_sched_data), + .enqueue = pie_qdisc_enqueue, + .dequeue = pie_qdisc_dequeue, + .peek = qdisc_peek_dequeued, + .init = pie_init, + .destroy = pie_destroy, + .reset = pie_reset, + .change = pie_change, + .dump = pie_dump, + .dump_stats = pie_dump_stats, + .owner = THIS_MODULE, +}; + +static int __init pie_module_init(void) +{ + return register_qdisc(&pie_qdisc_ops); +} + +static void __exit pie_module_exit(void) +{ + unregister_qdisc(&pie_qdisc_ops); +} + +module_init(pie_module_init); +module_exit(pie_module_exit); + +MODULE_DESCRIPTION("Proportional Integral controller Enhanced (PIE) scheduler"); +MODULE_AUTHOR("Vijay Subramanian"); +MODULE_AUTHOR("Mythili Prabhu"); +MODULE_LICENSE("GPL"); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 111516c3d34..9c77947c059 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -207,8 +207,6 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) INIT_LIST_HEAD(&q->retransmit); INIT_LIST_HEAD(&q->sacked); INIT_LIST_HEAD(&q->abandoned); - - q->empty = 1; } /* Free the outqueue structure and any related pending chunks. @@ -331,7 +329,6 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS); else SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS); - q->empty = 0; break; } } else { @@ -653,7 +650,6 @@ redo: if (chunk->fast_retransmit == SCTP_NEED_FRTX) chunk->fast_retransmit = SCTP_DONT_FRTX; - q->empty = 0; q->asoc->stats.rtxchunks++; break; } @@ -1064,8 +1060,6 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) sctp_transport_reset_timers(transport); - q->empty = 0; - /* Only let one DATA chunk get bundled with a * COOKIE-ECHO chunk. */ @@ -1274,29 +1268,17 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) "advertised peer ack point:0x%x\n", __func__, asoc, ctsn, asoc->adv_peer_ack_point); - /* See if all chunks are acked. - * Make sure the empty queue handler will get run later. - */ - q->empty = (list_empty(&q->out_chunk_list) && - list_empty(&q->retransmit)); - if (!q->empty) - goto finish; - - list_for_each_entry(transport, transport_list, transports) { - q->empty = q->empty && list_empty(&transport->transmitted); - if (!q->empty) - goto finish; - } - - pr_debug("%s: sack queue is empty\n", __func__); -finish: - return q->empty; + return sctp_outq_is_empty(q); } -/* Is the outqueue empty? */ +/* Is the outqueue empty? + * The queue is empty when we have not pending data, no in-flight data + * and nothing pending retransmissions. + */ int sctp_outq_is_empty(const struct sctp_outq *q) { - return q->empty; + return q->out_qlen == 0 && q->outstanding_bytes == 0 && + list_empty(&q->retransmit); } /******************************************************************** diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 4c2a80b3c01..bf860d9e75a 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -794,7 +794,7 @@ void tipc_bclink_init(void) void tipc_bclink_stop(void) { spin_lock_bh(&bc_lock); - tipc_link_stop(bcl); + tipc_link_purge_queues(bcl); spin_unlock_bh(&bc_lock); memset(bclink, 0, sizeof(*bclink)); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 07ed5cc8235..2d456ab0e84 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -541,7 +541,7 @@ static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, if (likely(b_ptr)) { if (likely(buf->pkt_type <= PACKET_BROADCAST)) { buf->next = NULL; - tipc_recv_msg(buf, b_ptr); + tipc_rcv(buf, b_ptr); rcu_read_unlock(); return NET_RX_SUCCESS; } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 410efb1e930..4f5db9ad5bf 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -161,8 +161,7 @@ extern struct tipc_bearer tipc_bearers[]; * TIPC routines available to supported media types */ -void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr); - +void tipc_rcv(struct sk_buff *buf, struct tipc_bearer *tb_ptr); int tipc_enable_bearer(const char *bearer_name, u32 disc_domain, u32 priority); int tipc_disable_bearer(const char *name); diff --git a/net/tipc/discover.c b/net/tipc/discover.c index bc849f1efa1..412ff41b861 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -50,6 +50,7 @@ * @dest: destination address for request messages * @domain: network domain to which links can be established * @num_nodes: number of nodes currently discovered (i.e. with an active link) + * @lock: spinlock for controlling access to requests * @buf: request message to be (repeatedly) sent * @timer: timer governing period between requests * @timer_intv: current interval between requests (in ms) @@ -59,6 +60,7 @@ struct tipc_link_req { struct tipc_media_addr dest; u32 domain; int num_nodes; + spinlock_t lock; struct sk_buff *buf; struct timer_list timer; unsigned int timer_intv; @@ -274,7 +276,9 @@ static void disc_update(struct tipc_link_req *req) */ void tipc_disc_add_dest(struct tipc_link_req *req) { + spin_lock_bh(&req->lock); req->num_nodes++; + spin_unlock_bh(&req->lock); } /** @@ -283,8 +287,10 @@ void tipc_disc_add_dest(struct tipc_link_req *req) */ void tipc_disc_remove_dest(struct tipc_link_req *req) { + spin_lock_bh(&req->lock); req->num_nodes--; disc_update(req); + spin_unlock_bh(&req->lock); } /** @@ -297,7 +303,7 @@ static void disc_timeout(struct tipc_link_req *req) { int max_delay; - spin_lock_bh(&req->bearer->lock); + spin_lock_bh(&req->lock); /* Stop searching if only desired node has been found */ if (tipc_node(req->domain) && req->num_nodes) { @@ -325,7 +331,7 @@ static void disc_timeout(struct tipc_link_req *req) k_start_timer(&req->timer, req->timer_intv); exit: - spin_unlock_bh(&req->bearer->lock); + spin_unlock_bh(&req->lock); } /** @@ -356,6 +362,7 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest, req->domain = dest_domain; req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; + spin_lock_init(&req->lock); k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req); k_start_timer(&req->timer, req->timer_intv); b_ptr->link_req = req; diff --git a/net/tipc/link.c b/net/tipc/link.c index 131a32a7b17..471973ff134 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1,7 +1,7 @@ /* * net/tipc/link.c: TIPC link code * - * Copyright (c) 1996-2007, 2012, Ericsson AB + * Copyright (c) 1996-2007, 2012-2014, Ericsson AB * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * @@ -78,8 +78,8 @@ static const char *link_unk_evt = "Unknown link event "; static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, struct sk_buff *buf); static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf); -static int link_recv_changeover_msg(struct tipc_link **l_ptr, - struct sk_buff **buf); +static int tipc_link_tunnel_rcv(struct tipc_link **l_ptr, + struct sk_buff **buf); static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tolerance); static int link_send_sections_long(struct tipc_port *sender, struct iovec const *msg_sect, @@ -87,7 +87,6 @@ static int link_send_sections_long(struct tipc_port *sender, static void link_state_event(struct tipc_link *l_ptr, u32 event); static void link_reset_statistics(struct tipc_link *l_ptr); static void link_print(struct tipc_link *l_ptr, const char *str); -static void link_start(struct tipc_link *l_ptr); static int link_send_long_buf(struct tipc_link *l_ptr, struct sk_buff *buf); static void tipc_link_send_sync(struct tipc_link *l); static void tipc_link_recv_sync(struct tipc_node *n, struct sk_buff *buf); @@ -278,9 +277,11 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, tipc_node_attach_link(n_ptr, l_ptr); - k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr); + k_init_timer(&l_ptr->timer, (Handler)link_timeout, + (unsigned long)l_ptr); list_add_tail(&l_ptr->link_list, &b_ptr->links); - tipc_k_signal((Handler)link_start, (unsigned long)l_ptr); + + link_state_event(l_ptr, STARTING_EVT); return l_ptr; } @@ -305,19 +306,13 @@ void tipc_link_delete(struct tipc_link *l_ptr) tipc_node_lock(l_ptr->owner); tipc_link_reset(l_ptr); tipc_node_detach_link(l_ptr->owner, l_ptr); - tipc_link_stop(l_ptr); + tipc_link_purge_queues(l_ptr); list_del_init(&l_ptr->link_list); tipc_node_unlock(l_ptr->owner); k_term_timer(&l_ptr->timer); kfree(l_ptr); } -static void link_start(struct tipc_link *l_ptr) -{ - tipc_node_lock(l_ptr->owner); - link_state_event(l_ptr, STARTING_EVT); - tipc_node_unlock(l_ptr->owner); -} /** * link_schedule_port - schedule port for deferred sending @@ -403,10 +398,10 @@ void tipc_link_reset_fragments(struct tipc_link *l_ptr) } /** - * tipc_link_stop - purge all inbound and outbound messages associated with link + * tipc_link_purge_queues - purge all pkt queues associated with link * @l_ptr: pointer to link */ -void tipc_link_stop(struct tipc_link *l_ptr) +void tipc_link_purge_queues(struct tipc_link *l_ptr) { kfree_skb_list(l_ptr->oldest_deferred_in); kfree_skb_list(l_ptr->first_out); @@ -437,8 +432,7 @@ void tipc_link_reset(struct tipc_link *l_ptr) tipc_node_link_down(l_ptr->owner, l_ptr); tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr); - if (was_active_link && tipc_node_active_links(l_ptr->owner) && - l_ptr->owner->permit_changeover) { + if (was_active_link && tipc_node_active_links(l_ptr->owner)) { l_ptr->reset_checkpoint = checkpoint; l_ptr->exp_msg_count = START_CHANGEOVER; } @@ -1422,14 +1416,14 @@ static int link_recv_buf_validate(struct sk_buff *buf) } /** - * tipc_recv_msg - process TIPC messages arriving from off-node + * tipc_rcv - process TIPC packets/messages arriving from off-node * @head: pointer to message buffer chain * @tb_ptr: pointer to bearer message arrived on * * Invoked with no locks held. Bearer pointer must point to a valid bearer * structure (i.e. cannot be NULL), but bearer can be inactive. */ -void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr) +void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr) { read_lock_bh(&tipc_net_lock); while (head) { @@ -1603,7 +1597,7 @@ deliver: continue; case CHANGEOVER_PROTOCOL: type = msg_type(msg); - if (link_recv_changeover_msg(&l_ptr, &buf)) { + if (tipc_link_tunnel_rcv(&l_ptr, &buf)) { msg = buf_msg(buf); seq_no = msg_seqno(msg); if (type == ORIGINAL_MSG) @@ -1837,8 +1831,6 @@ static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf) if (tipc_own_addr > msg_prevnode(msg)) l_ptr->b_ptr->net_plane = msg_net_plane(msg); - l_ptr->owner->permit_changeover = msg_redundant_link(msg); - switch (msg_type(msg)) { case RESET_MSG: @@ -1954,13 +1946,13 @@ exit: } -/* - * tipc_link_tunnel(): Send one message via a link belonging to - * another bearer. Owner node is locked. +/* tipc_link_tunnel_xmit(): Tunnel one packet via a link belonging to + * a different bearer. Owner node is locked. */ -static void tipc_link_tunnel(struct tipc_link *l_ptr, - struct tipc_msg *tunnel_hdr, struct tipc_msg *msg, - u32 selector) +static void tipc_link_tunnel_xmit(struct tipc_link *l_ptr, + struct tipc_msg *tunnel_hdr, + struct tipc_msg *msg, + u32 selector) { struct tipc_link *tunnel; struct sk_buff *buf; @@ -1983,12 +1975,13 @@ static void tipc_link_tunnel(struct tipc_link *l_ptr, } - -/* - * changeover(): Send whole message queue via the remaining link - * Owner node is locked. +/* tipc_link_failover_send_queue(): A link has gone down, but a second + * link is still active. We can do failover. Tunnel the failing link's + * whole send queue via the remaining link. This way, we don't lose + * any packets, and sequence order is preserved for subsequent traffic + * sent over the remaining link. Owner node is locked. */ -void tipc_link_changeover(struct tipc_link *l_ptr) +void tipc_link_failover_send_queue(struct tipc_link *l_ptr) { u32 msgcount = l_ptr->out_queue_size; struct sk_buff *crs = l_ptr->first_out; @@ -1999,11 +1992,6 @@ void tipc_link_changeover(struct tipc_link *l_ptr) if (!tunnel) return; - if (!l_ptr->owner->permit_changeover) { - pr_warn("%speer did not permit changeover\n", link_co_err); - return; - } - tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); @@ -2037,20 +2025,30 @@ void tipc_link_changeover(struct tipc_link *l_ptr) msgcount = msg_msgcnt(msg); while (msgcount--) { msg_set_seqno(m, msg_seqno(msg)); - tipc_link_tunnel(l_ptr, &tunnel_hdr, m, - msg_link_selector(m)); + tipc_link_tunnel_xmit(l_ptr, &tunnel_hdr, m, + msg_link_selector(m)); pos += align(msg_size(m)); m = (struct tipc_msg *)pos; } } else { - tipc_link_tunnel(l_ptr, &tunnel_hdr, msg, - msg_link_selector(msg)); + tipc_link_tunnel_xmit(l_ptr, &tunnel_hdr, msg, + msg_link_selector(msg)); } crs = crs->next; } } -void tipc_link_send_duplicate(struct tipc_link *l_ptr, struct tipc_link *tunnel) +/* tipc_link_dup_send_queue(): A second link has become active. Tunnel a + * duplicate of the first link's send queue via the new link. This way, we + * are guaranteed that currently queued packets from a socket are delivered + * before future traffic from the same socket, even if this is using the + * new link. The last arriving copy of each duplicate packet is dropped at + * the receiving end by the regular protocol check, so packet cardinality + * and sequence order is preserved per sender/receiver socket pair. + * Owner node is locked. + */ +void tipc_link_dup_send_queue(struct tipc_link *l_ptr, + struct tipc_link *tunnel) { struct sk_buff *iter; struct tipc_msg tunnel_hdr; @@ -2106,12 +2104,14 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos) return eb; } -/* - * link_recv_changeover_msg(): Receive tunneled packet sent - * via other link. Node is locked. Return extracted buffer. +/* tipc_link_tunnel_rcv(): Receive a tunneled packet, sent + * via other link as result of a failover (ORIGINAL_MSG) or + * a new active link (DUPLICATE_MSG). Failover packets are + * returned to the active link for delivery upwards. + * Owner node is locked. */ -static int link_recv_changeover_msg(struct tipc_link **l_ptr, - struct sk_buff **buf) +static int tipc_link_tunnel_rcv(struct tipc_link **l_ptr, + struct sk_buff **buf) { struct sk_buff *tunnel_buf = *buf; struct tipc_link *dest_link; diff --git a/net/tipc/link.h b/net/tipc/link.h index 0636ca95be3..3b6aa65b608 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -216,15 +216,20 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr); void tipc_link_delete(struct tipc_link *l_ptr); -void tipc_link_changeover(struct tipc_link *l_ptr); -void tipc_link_send_duplicate(struct tipc_link *l_ptr, struct tipc_link *dest); +void tipc_link_failover_send_queue(struct tipc_link *l_ptr); +void tipc_link_dup_send_queue(struct tipc_link *l_ptr, + struct tipc_link *dest); void tipc_link_reset_fragments(struct tipc_link *l_ptr); int tipc_link_is_up(struct tipc_link *l_ptr); int tipc_link_is_active(struct tipc_link *l_ptr); -void tipc_link_stop(struct tipc_link *l_ptr); -struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, u16 cmd); -struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_space); -struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space); +void tipc_link_purge_queues(struct tipc_link *l_ptr); +struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, + int req_tlv_space, + u16 cmd); +struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, + int req_tlv_space); +struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, + int req_tlv_space); void tipc_link_reset(struct tipc_link *l_ptr); int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector); void tipc_link_send_names(struct list_head *message_list, u32 dest); diff --git a/net/tipc/node.c b/net/tipc/node.c index e167d261556..efe4d41bf11 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -162,7 +162,7 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) pr_info("New link <%s> becomes standby\n", l_ptr->name); return; } - tipc_link_send_duplicate(active[0], l_ptr); + tipc_link_dup_send_queue(active[0], l_ptr); if (l_ptr->priority == active[0]->priority) { active[0] = l_ptr; return; @@ -225,7 +225,7 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) if (active[0] == l_ptr) node_select_active_links(n_ptr); if (tipc_node_is_up(n_ptr)) - tipc_link_changeover(l_ptr); + tipc_link_failover_send_queue(l_ptr); else node_lost_contact(n_ptr); } diff --git a/net/tipc/node.h b/net/tipc/node.h index d4bb654c858..63e2e8ead2f 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -64,7 +64,6 @@ * @working_links: number of working links to node (both active and standby) * @block_setup: bit mask of conditions preventing link establishment to node * @link_cnt: number of links to node - * @permit_changeover: non-zero if node has redundant links to this system * @signature: node instance identifier * @bclink: broadcast-related info * @acked: sequence # of last outbound b'cast message acknowledged by node @@ -89,7 +88,6 @@ struct tipc_node { int link_cnt; int working_links; int block_setup; - int permit_changeover; u32 signature; struct { u32 acked; diff --git a/net/tipc/port.c b/net/tipc/port.c index 5fd4c8cec08..b742b265452 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -251,18 +251,15 @@ struct tipc_port *tipc_createport(struct sock *sk, return p_ptr; } -int tipc_deleteport(u32 ref) +int tipc_deleteport(struct tipc_port *p_ptr) { - struct tipc_port *p_ptr; struct sk_buff *buf = NULL; - tipc_withdraw(ref, 0, NULL); - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; + tipc_withdraw(p_ptr, 0, NULL); - tipc_ref_discard(ref); - tipc_port_unlock(p_ptr); + spin_lock_bh(p_ptr->lock); + tipc_ref_discard(p_ptr->ref); + spin_unlock_bh(p_ptr->lock); k_cancel_timer(&p_ptr->timer); if (p_ptr->connected) { @@ -704,47 +701,36 @@ int tipc_set_portimportance(u32 ref, unsigned int imp) } -int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) +int tipc_publish(struct tipc_port *p_ptr, unsigned int scope, + struct tipc_name_seq const *seq) { - struct tipc_port *p_ptr; struct publication *publ; u32 key; - int res = -EINVAL; - p_ptr = tipc_port_lock(ref); - if (!p_ptr) + if (p_ptr->connected) return -EINVAL; + key = p_ptr->ref + p_ptr->pub_count + 1; + if (key == p_ptr->ref) + return -EADDRINUSE; - if (p_ptr->connected) - goto exit; - key = ref + p_ptr->pub_count + 1; - if (key == ref) { - res = -EADDRINUSE; - goto exit; - } publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper, scope, p_ptr->ref, key); if (publ) { list_add(&publ->pport_list, &p_ptr->publications); p_ptr->pub_count++; p_ptr->published = 1; - res = 0; + return 0; } -exit: - tipc_port_unlock(p_ptr); - return res; + return -EINVAL; } -int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) +int tipc_withdraw(struct tipc_port *p_ptr, unsigned int scope, + struct tipc_name_seq const *seq) { - struct tipc_port *p_ptr; struct publication *publ; struct publication *tpubl; int res = -EINVAL; - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; if (!seq) { list_for_each_entry_safe(publ, tpubl, &p_ptr->publications, pport_list) { @@ -771,7 +757,6 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) } if (list_empty(&p_ptr->publications)) p_ptr->published = 0; - tipc_port_unlock(p_ptr); return res; } diff --git a/net/tipc/port.h b/net/tipc/port.h index 91225359734..34f12bd4074 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -116,7 +116,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err); void tipc_acknowledge(u32 port_ref, u32 ack); -int tipc_deleteport(u32 portref); +int tipc_deleteport(struct tipc_port *p_ptr); int tipc_portimportance(u32 portref, unsigned int *importance); int tipc_set_portimportance(u32 portref, unsigned int importance); @@ -127,9 +127,9 @@ int tipc_set_portunreliable(u32 portref, unsigned int isunreliable); int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable); int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable); -int tipc_publish(u32 portref, unsigned int scope, +int tipc_publish(struct tipc_port *p_ptr, unsigned int scope, struct tipc_name_seq const *name_seq); -int tipc_withdraw(u32 portref, unsigned int scope, +int tipc_withdraw(struct tipc_port *p_ptr, unsigned int scope, struct tipc_name_seq const *name_seq); int tipc_connect(u32 portref, struct tipc_portid const *port); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 5efdeef06f9..c8341d1f995 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -351,7 +351,7 @@ static int release(struct socket *sock) * Delete TIPC port; this ensures no more messages are queued * (also disconnects an active connection & sends a 'FIN-' to peer) */ - res = tipc_deleteport(tport->ref); + res = tipc_deleteport(tport); /* Discard any remaining (connection-based) messages in receive queue */ __skb_queue_purge(&sk->sk_receive_queue); @@ -383,30 +383,46 @@ static int release(struct socket *sock) */ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len) { + struct sock *sk = sock->sk; struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; - u32 portref = tipc_sk_port(sock->sk)->ref; + struct tipc_port *tport = tipc_sk_port(sock->sk); + int res = -EINVAL; - if (unlikely(!uaddr_len)) - return tipc_withdraw(portref, 0, NULL); + lock_sock(sk); + if (unlikely(!uaddr_len)) { + res = tipc_withdraw(tport, 0, NULL); + goto exit; + } - if (uaddr_len < sizeof(struct sockaddr_tipc)) - return -EINVAL; - if (addr->family != AF_TIPC) - return -EAFNOSUPPORT; + if (uaddr_len < sizeof(struct sockaddr_tipc)) { + res = -EINVAL; + goto exit; + } + if (addr->family != AF_TIPC) { + res = -EAFNOSUPPORT; + goto exit; + } if (addr->addrtype == TIPC_ADDR_NAME) addr->addr.nameseq.upper = addr->addr.nameseq.lower; - else if (addr->addrtype != TIPC_ADDR_NAMESEQ) - return -EAFNOSUPPORT; + else if (addr->addrtype != TIPC_ADDR_NAMESEQ) { + res = -EAFNOSUPPORT; + goto exit; + } if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) && (addr->addr.nameseq.type != TIPC_TOP_SRV) && - (addr->addr.nameseq.type != TIPC_CFG_SRV)) - return -EACCES; + (addr->addr.nameseq.type != TIPC_CFG_SRV)) { + res = -EACCES; + goto exit; + } - return (addr->scope > 0) ? - tipc_publish(portref, addr->scope, &addr->addr.nameseq) : - tipc_withdraw(portref, -addr->scope, &addr->addr.nameseq); + res = (addr->scope > 0) ? + tipc_publish(tport, addr->scope, &addr->addr.nameseq) : + tipc_withdraw(tport, -addr->scope, &addr->addr.nameseq); +exit: + release_sock(sk); + return res; } /** diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index a271c27fac7..722da616438 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -124,6 +124,10 @@ int ieee80211_radiotap_iterator_init( /* find payload start allowing for extended bitmap(s) */ if (iterator->_bitmap_shifter & (1<<IEEE80211_RADIOTAP_EXT)) { + if ((unsigned long)iterator->_arg - + (unsigned long)iterator->_rtheader + sizeof(uint32_t) > + (unsigned long)iterator->_max_length) + return -EINVAL; while (get_unaligned_le32(iterator->_arg) & (1 << IEEE80211_RADIOTAP_EXT)) { iterator->_arg += sizeof(uint32_t); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 65f800890d7..d3c5bd7c6b5 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -632,6 +632,16 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, } #endif + if (!bss && (status == WLAN_STATUS_SUCCESS)) { + WARN_ON_ONCE(!wiphy_to_dev(wdev->wiphy)->ops->connect); + bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, + wdev->ssid, wdev->ssid_len, + WLAN_CAPABILITY_ESS, + WLAN_CAPABILITY_ESS); + if (bss) + cfg80211_hold_bss(bss_from_pub(bss)); + } + if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); @@ -649,16 +659,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, return; } - if (!bss) { - WARN_ON_ONCE(!wiphy_to_dev(wdev->wiphy)->ops->connect); - bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, - wdev->ssid, wdev->ssid_len, - WLAN_CAPABILITY_ESS, - WLAN_CAPABILITY_ESS); - if (WARN_ON(!bss)) - return; - cfg80211_hold_bss(bss_from_pub(bss)); - } + if (WARN_ON(!bss)) + return; wdev->current_bss = bss_from_pub(bss); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index a7487f34e81..e205c4b56af 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1641,20 +1641,22 @@ free_dst: goto out; } -static int inline -xfrm_dst_alloc_copy(void **target, const void *src, int size) +#ifdef CONFIG_XFRM_SUB_POLICY +static int xfrm_dst_alloc_copy(void **target, const void *src, int size) { if (!*target) { *target = kmalloc(size, GFP_ATOMIC); if (!*target) return -ENOMEM; } + memcpy(*target, src, size); return 0; } +#endif -static int inline -xfrm_dst_update_parent(struct dst_entry *dst, const struct xfrm_selector *sel) +static int xfrm_dst_update_parent(struct dst_entry *dst, + const struct xfrm_selector *sel) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_dst *xdst = (struct xfrm_dst *)dst; @@ -1665,8 +1667,8 @@ xfrm_dst_update_parent(struct dst_entry *dst, const struct xfrm_selector *sel) #endif } -static int inline -xfrm_dst_update_origin(struct dst_entry *dst, const struct flowi *fl) +static int xfrm_dst_update_origin(struct dst_entry *dst, + const struct flowi *fl) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_dst *xdst = (struct xfrm_dst *)dst; diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 32b10f53d0b..2dcb37736d8 100644 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -82,7 +82,9 @@ kallsyms() kallsymopt="${kallsymopt} --all-symbols" fi - kallsymopt="${kallsymopt} --page-offset=$CONFIG_PAGE_OFFSET" + if [ -n "${CONFIG_ARM}" ] && [ -n "${CONFIG_PAGE_OFFSET}" ]; then + kallsymopt="${kallsymopt} --page-offset=$CONFIG_PAGE_OFFSET" + fi local aflags="${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL} \ ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS}" diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 419491d8e7d..6625699f497 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4334,8 +4334,10 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) } err = avc_has_perm(sk_sid, peer_sid, SECCLASS_PEER, PEER__RECV, &ad); - if (err) + if (err) { selinux_netlbl_err(skb, err, 0); + return err; + } } if (secmark_active) { @@ -5586,11 +5588,11 @@ static int selinux_setprocattr(struct task_struct *p, /* Check for ptracing, and update the task SID if ok. Otherwise, leave SID unchanged and fail. */ ptsid = 0; - task_lock(p); + rcu_read_lock(); tracer = ptrace_parent(p); if (tracer) ptsid = task_sid(tracer); - task_unlock(p); + rcu_read_unlock(); if (tracer) { error = avc_has_perm(ptsid, sid, SECCLASS_PROCESS, diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 6e03b465e44..a2104671f51 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1937,6 +1937,8 @@ static int wait_for_avail(struct snd_pcm_substream *substream, case SNDRV_PCM_STATE_DISCONNECTED: err = -EBADFD; goto _endloop; + case SNDRV_PCM_STATE_PAUSED: + continue; } if (!tout) { snd_printd("%s write error (DMA or IRQ trouble?)\n", diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 27aa14007cb..956871d8b3d 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -3433,6 +3433,10 @@ static void check_probe_mask(struct azx *chip, int dev) * white/black-list for enable_msi */ static struct snd_pci_quirk msi_black_list[] = { + SND_PCI_QUIRK(0x103c, 0x2191, "HP", 0), /* AMD Hudson */ + SND_PCI_QUIRK(0x103c, 0x2192, "HP", 0), /* AMD Hudson */ + SND_PCI_QUIRK(0x103c, 0x21f7, "HP", 0), /* AMD Hudson */ + SND_PCI_QUIRK(0x103c, 0x21fa, "HP", 0), /* AMD Hudson */ SND_PCI_QUIRK(0x1043, 0x81f2, "ASUS", 0), /* Athlon64 X2 + nvidia */ SND_PCI_QUIRK(0x1043, 0x81f6, "ASUS", 0), /* nvidia */ SND_PCI_QUIRK(0x1043, 0x822d, "ASUS", 0), /* Athlon64 X2 + nvidia MCP55 */ diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 34de5dc2fe9..c5646941539 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4247,12 +4247,16 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0606, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0608, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0609, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0610, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0613, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0614, "Dell Inspiron 3135", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_MONO_SPEAKERS), SND_PCI_QUIRK(0x1028, 0x061f, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0629, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS), + SND_PCI_QUIRK(0x1028, 0x063e, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x063f, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0640, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x15cc, "Dell X5 Precision", ALC269_FIXUP_DELL2_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x15cd, "Dell X5 Precision", ALC269_FIXUP_DELL2_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c index 8697cedccd2..1ead3c977a5 100644 --- a/sound/soc/atmel/atmel_ssc_dai.c +++ b/sound/soc/atmel/atmel_ssc_dai.c @@ -648,7 +648,7 @@ static int atmel_ssc_prepare(struct snd_pcm_substream *substream, dma_params = ssc_p->dma_params[dir]; - ssc_writel(ssc_p->ssc->regs, CR, dma_params->mask->ssc_enable); + ssc_writel(ssc_p->ssc->regs, CR, dma_params->mask->ssc_disable); ssc_writel(ssc_p->ssc->regs, IDR, dma_params->mask->ssc_error); pr_debug("%s enabled SSC_SR=0x%08x\n", @@ -657,6 +657,33 @@ static int atmel_ssc_prepare(struct snd_pcm_substream *substream, return 0; } +static int atmel_ssc_trigger(struct snd_pcm_substream *substream, + int cmd, struct snd_soc_dai *dai) +{ + struct atmel_ssc_info *ssc_p = &ssc_info[dai->id]; + struct atmel_pcm_dma_params *dma_params; + int dir; + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + dir = 0; + else + dir = 1; + + dma_params = ssc_p->dma_params[dir]; + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + ssc_writel(ssc_p->ssc->regs, CR, dma_params->mask->ssc_enable); + break; + default: + ssc_writel(ssc_p->ssc->regs, CR, dma_params->mask->ssc_disable); + break; + } + + return 0; +} #ifdef CONFIG_PM static int atmel_ssc_suspend(struct snd_soc_dai *cpu_dai) @@ -731,6 +758,7 @@ static const struct snd_soc_dai_ops atmel_ssc_dai_ops = { .startup = atmel_ssc_startup, .shutdown = atmel_ssc_shutdown, .prepare = atmel_ssc_prepare, + .trigger = atmel_ssc_trigger, .hw_params = atmel_ssc_hw_params, .set_fmt = atmel_ssc_set_dai_fmt, .set_clkdiv = atmel_ssc_set_dai_clkdiv, diff --git a/sound/soc/atmel/sam9x5_wm8731.c b/sound/soc/atmel/sam9x5_wm8731.c index 1b372283bd0..7d6a9055874 100644 --- a/sound/soc/atmel/sam9x5_wm8731.c +++ b/sound/soc/atmel/sam9x5_wm8731.c @@ -109,7 +109,7 @@ static int sam9x5_wm8731_driver_probe(struct platform_device *pdev) dai->stream_name = "WM8731 PCM"; dai->codec_dai_name = "wm8731-hifi"; dai->init = sam9x5_wm8731_init; - dai->dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF + dai->dai_fmt = SND_SOC_DAIFMT_DSP_A | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM; ret = snd_soc_of_parse_card_name(card, "atmel,model"); diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index 99b359e19d3..0ab2dc29647 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -1012,7 +1012,7 @@ static const struct snd_soc_dapm_route wm5110_dapm_routes[] = { { "AEC Loopback", "HPOUT3L", "OUT3L" }, { "AEC Loopback", "HPOUT3R", "OUT3R" }, { "HPOUT3L", NULL, "OUT3L" }, - { "HPOUT3R", NULL, "OUT3L" }, + { "HPOUT3R", NULL, "OUT3R" }, { "AEC Loopback", "SPKOUTL", "OUT4L" }, { "SPKOUTLN", NULL, "OUT4L" }, diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c index 3938fb1c203..53bbfac6a83 100644 --- a/sound/soc/codecs/wm8904.c +++ b/sound/soc/codecs/wm8904.c @@ -1444,7 +1444,7 @@ static int wm8904_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_DSP_B: - aif1 |= WM8904_AIF_LRCLK_INV; + aif1 |= 0x3 | WM8904_AIF_LRCLK_INV; case SND_SOC_DAIFMT_DSP_A: aif1 |= 0x3; break; diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 543c5c2631b..0f17ed3e29f 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2439,7 +2439,20 @@ static void wm8962_configure_bclk(struct snd_soc_codec *codec) snd_soc_update_bits(codec, WM8962_CLOCKING_4, WM8962_SYSCLK_RATE_MASK, clocking4); + /* DSPCLK_DIV can be only generated correctly after enabling SYSCLK. + * So we here provisionally enable it and then disable it afterward + * if current bias_level hasn't reached SND_SOC_BIAS_ON. + */ + if (codec->dapm.bias_level != SND_SOC_BIAS_ON) + snd_soc_update_bits(codec, WM8962_CLOCKING2, + WM8962_SYSCLK_ENA_MASK, WM8962_SYSCLK_ENA); + dspclk = snd_soc_read(codec, WM8962_CLOCKING1); + + if (codec->dapm.bias_level != SND_SOC_BIAS_ON) + snd_soc_update_bits(codec, WM8962_CLOCKING2, + WM8962_SYSCLK_ENA_MASK, 0); + if (dspclk < 0) { dev_err(codec->dev, "Failed to read DSPCLK: %d\n", dspclk); return; diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 46ec0e9744d..4fbcab63e61 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1474,13 +1474,17 @@ static int wm_adsp2_ena(struct wm_adsp *dsp) return ret; /* Wait for the RAM to start, should be near instantaneous */ - count = 0; - do { + for (count = 0; count < 10; ++count) { ret = regmap_read(dsp->regmap, dsp->base + ADSP2_STATUS1, &val); if (ret != 0) return ret; - } while (!(val & ADSP2_RAM_RDY) && ++count < 10); + + if (val & ADSP2_RAM_RDY) + break; + + msleep(1); + } if (!(val & ADSP2_RAM_RDY)) { adsp_err(dsp, "Failed to start DSP RAM\n"); diff --git a/sound/soc/fsl/imx-wm8962.c b/sound/soc/fsl/imx-wm8962.c index 61e48852b9e..3fd76bc391d 100644 --- a/sound/soc/fsl/imx-wm8962.c +++ b/sound/soc/fsl/imx-wm8962.c @@ -130,8 +130,6 @@ static int imx_wm8962_set_bias_level(struct snd_soc_card *card, break; } - dapm->bias_level = level; - return 0; } diff --git a/sound/soc/kirkwood/kirkwood-i2s.c b/sound/soc/kirkwood/kirkwood-i2s.c index 0b18f654b41..3920a5e8125 100644 --- a/sound/soc/kirkwood/kirkwood-i2s.c +++ b/sound/soc/kirkwood/kirkwood-i2s.c @@ -473,17 +473,17 @@ static struct snd_soc_dai_driver kirkwood_i2s_dai_extclk[2] = { .playback = { .channels_min = 1, .channels_max = 2, - .rates = SNDRV_PCM_RATE_8000_192000 | - SNDRV_PCM_RATE_CONTINUOUS | - SNDRV_PCM_RATE_KNOT, + .rates = SNDRV_PCM_RATE_CONTINUOUS, + .rate_min = 5512, + .rate_max = 192000, .formats = KIRKWOOD_I2S_FORMATS, }, .capture = { .channels_min = 1, .channels_max = 2, - .rates = SNDRV_PCM_RATE_8000_192000 | - SNDRV_PCM_RATE_CONTINUOUS | - SNDRV_PCM_RATE_KNOT, + .rates = SNDRV_PCM_RATE_CONTINUOUS, + .rate_min = 5512, + .rate_max = 192000, .formats = KIRKWOOD_I2S_FORMATS, }, .ops = &kirkwood_i2s_dai_ops, @@ -494,17 +494,17 @@ static struct snd_soc_dai_driver kirkwood_i2s_dai_extclk[2] = { .playback = { .channels_min = 1, .channels_max = 2, - .rates = SNDRV_PCM_RATE_8000_192000 | - SNDRV_PCM_RATE_CONTINUOUS | - SNDRV_PCM_RATE_KNOT, + .rates = SNDRV_PCM_RATE_CONTINUOUS, + .rate_min = 5512, + .rate_max = 192000, .formats = KIRKWOOD_SPDIF_FORMATS, }, .capture = { .channels_min = 1, .channels_max = 2, - .rates = SNDRV_PCM_RATE_8000_192000 | - SNDRV_PCM_RATE_CONTINUOUS | - SNDRV_PCM_RATE_KNOT, + .rates = SNDRV_PCM_RATE_CONTINUOUS, + .rate_min = 5512, + .rate_max = 192000, .formats = KIRKWOOD_SPDIF_FORMATS, }, .ops = &kirkwood_i2s_dai_ops, diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c index cbc9c96ce1f..41949af3baa 100644 --- a/sound/soc/soc-generic-dmaengine-pcm.c +++ b/sound/soc/soc-generic-dmaengine-pcm.c @@ -305,6 +305,20 @@ static void dmaengine_pcm_request_chan_of(struct dmaengine_pcm *pcm, } } +static void dmaengine_pcm_release_chan(struct dmaengine_pcm *pcm) +{ + unsigned int i; + + for (i = SNDRV_PCM_STREAM_PLAYBACK; i <= SNDRV_PCM_STREAM_CAPTURE; + i++) { + if (!pcm->chan[i]) + continue; + dma_release_channel(pcm->chan[i]); + if (pcm->flags & SND_DMAENGINE_PCM_FLAG_HALF_DUPLEX) + break; + } +} + /** * snd_dmaengine_pcm_register - Register a dmaengine based PCM device * @dev: The parent device for the PCM device @@ -315,6 +329,7 @@ int snd_dmaengine_pcm_register(struct device *dev, const struct snd_dmaengine_pcm_config *config, unsigned int flags) { struct dmaengine_pcm *pcm; + int ret; pcm = kzalloc(sizeof(*pcm), GFP_KERNEL); if (!pcm) @@ -326,11 +341,20 @@ int snd_dmaengine_pcm_register(struct device *dev, dmaengine_pcm_request_chan_of(pcm, dev); if (flags & SND_DMAENGINE_PCM_FLAG_NO_RESIDUE) - return snd_soc_add_platform(dev, &pcm->platform, + ret = snd_soc_add_platform(dev, &pcm->platform, &dmaengine_no_residue_pcm_platform); else - return snd_soc_add_platform(dev, &pcm->platform, + ret = snd_soc_add_platform(dev, &pcm->platform, &dmaengine_pcm_platform); + if (ret) + goto err_free_dma; + + return 0; + +err_free_dma: + dmaengine_pcm_release_chan(pcm); + kfree(pcm); + return ret; } EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_register); @@ -345,7 +369,6 @@ void snd_dmaengine_pcm_unregister(struct device *dev) { struct snd_soc_platform *platform; struct dmaengine_pcm *pcm; - unsigned int i; platform = snd_soc_lookup_platform(dev); if (!platform) @@ -353,15 +376,8 @@ void snd_dmaengine_pcm_unregister(struct device *dev) pcm = soc_platform_to_pcm(platform); - for (i = SNDRV_PCM_STREAM_PLAYBACK; i <= SNDRV_PCM_STREAM_CAPTURE; i++) { - if (pcm->chan[i]) { - dma_release_channel(pcm->chan[i]); - if (pcm->flags & SND_DMAENGINE_PCM_FLAG_HALF_DUPLEX) - break; - } - } - snd_soc_remove_platform(platform); + dmaengine_pcm_release_chan(pcm); kfree(pcm); } EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_unregister); diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 11a90cd027f..891b9a9bcbf 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -600,12 +600,13 @@ static int soc_pcm_hw_free(struct snd_pcm_substream *substream) struct snd_soc_platform *platform = rtd->platform; struct snd_soc_dai *cpu_dai = rtd->cpu_dai; struct snd_soc_dai *codec_dai = rtd->codec_dai; - struct snd_soc_codec *codec = rtd->codec; + bool playback = substream->stream == SNDRV_PCM_STREAM_PLAYBACK; mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass); /* apply codec digital mute */ - if (!codec->active) + if ((playback && codec_dai->playback_active == 1) || + (!playback && codec_dai->capture_active == 1)) snd_soc_dai_digital_mute(codec_dai, 1, substream->stream); /* free any machine hw params */ diff --git a/sound/soc/tegra/tegra20_i2s.c b/sound/soc/tegra/tegra20_i2s.c index 364bf6a907e..8c819f81147 100644 --- a/sound/soc/tegra/tegra20_i2s.c +++ b/sound/soc/tegra/tegra20_i2s.c @@ -74,7 +74,7 @@ static int tegra20_i2s_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) { struct tegra20_i2s *i2s = snd_soc_dai_get_drvdata(dai); - unsigned int mask, val; + unsigned int mask = 0, val = 0; switch (fmt & SND_SOC_DAIFMT_INV_MASK) { case SND_SOC_DAIFMT_NB_NF: @@ -83,10 +83,10 @@ static int tegra20_i2s_set_fmt(struct snd_soc_dai *dai, return -EINVAL; } - mask = TEGRA20_I2S_CTRL_MASTER_ENABLE; + mask |= TEGRA20_I2S_CTRL_MASTER_ENABLE; switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { case SND_SOC_DAIFMT_CBS_CFS: - val = TEGRA20_I2S_CTRL_MASTER_ENABLE; + val |= TEGRA20_I2S_CTRL_MASTER_ENABLE; break; case SND_SOC_DAIFMT_CBM_CFM: break; diff --git a/sound/soc/tegra/tegra20_spdif.c b/sound/soc/tegra/tegra20_spdif.c index 08bc6931c7c..8c7c1028e57 100644 --- a/sound/soc/tegra/tegra20_spdif.c +++ b/sound/soc/tegra/tegra20_spdif.c @@ -67,15 +67,15 @@ static int tegra20_spdif_hw_params(struct snd_pcm_substream *substream, { struct device *dev = dai->dev; struct tegra20_spdif *spdif = snd_soc_dai_get_drvdata(dai); - unsigned int mask, val; + unsigned int mask = 0, val = 0; int ret, spdifclock; - mask = TEGRA20_SPDIF_CTRL_PACK | - TEGRA20_SPDIF_CTRL_BIT_MODE_MASK; + mask |= TEGRA20_SPDIF_CTRL_PACK | + TEGRA20_SPDIF_CTRL_BIT_MODE_MASK; switch (params_format(params)) { case SNDRV_PCM_FORMAT_S16_LE: - val = TEGRA20_SPDIF_CTRL_PACK | - TEGRA20_SPDIF_CTRL_BIT_MODE_16BIT; + val |= TEGRA20_SPDIF_CTRL_PACK | + TEGRA20_SPDIF_CTRL_BIT_MODE_16BIT; break; default: return -EINVAL; diff --git a/sound/soc/tegra/tegra30_i2s.c b/sound/soc/tegra/tegra30_i2s.c index 231a785b392..02247fee1cf 100644 --- a/sound/soc/tegra/tegra30_i2s.c +++ b/sound/soc/tegra/tegra30_i2s.c @@ -118,7 +118,7 @@ static int tegra30_i2s_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) { struct tegra30_i2s *i2s = snd_soc_dai_get_drvdata(dai); - unsigned int mask, val; + unsigned int mask = 0, val = 0; switch (fmt & SND_SOC_DAIFMT_INV_MASK) { case SND_SOC_DAIFMT_NB_NF: @@ -127,10 +127,10 @@ static int tegra30_i2s_set_fmt(struct snd_soc_dai *dai, return -EINVAL; } - mask = TEGRA30_I2S_CTRL_MASTER_ENABLE; + mask |= TEGRA30_I2S_CTRL_MASTER_ENABLE; switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { case SND_SOC_DAIFMT_CBS_CFS: - val = TEGRA30_I2S_CTRL_MASTER_ENABLE; + val |= TEGRA30_I2S_CTRL_MASTER_ENABLE; break; case SND_SOC_DAIFMT_CBM_CFM: break; |