diff options
54 files changed, 916 insertions, 336 deletions
diff --git a/Documentation/edac.txt b/Documentation/edac.txt index 0b875e8da96..9ee774de57c 100644 --- a/Documentation/edac.txt +++ b/Documentation/edac.txt @@ -196,7 +196,7 @@ csrow3. The representation of the above is reflected in the directory tree in EDAC's sysfs interface. Starting in directory /sys/devices/system/edac/mc each memory controller will be represented -by its own 'mcX' directory, where 'X" is the index of the MC. +by its own 'mcX' directory, where 'X' is the index of the MC. ..../edac/mc/ @@ -207,7 +207,7 @@ by its own 'mcX' directory, where 'X" is the index of the MC. .... Under each 'mcX' directory each 'csrowX' is again represented by a -'csrowX', where 'X" is the csrow index: +'csrowX', where 'X' is the csrow index: .../mc/mc0/ @@ -232,7 +232,7 @@ EDAC control and attribute files. In 'mcX' directories are EDAC control and attribute files for -this 'X" instance of the memory controllers: +this 'X' instance of the memory controllers: Counter reset control file: @@ -343,7 +343,7 @@ Sdram memory scrubbing rate: 'csrowX' DIRECTORIES In the 'csrowX' directories are EDAC control and attribute files for -this 'X" instance of csrow: +this 'X' instance of csrow: Total Uncorrectable Errors count attribute file: diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index fe95105992c..3c5e465296e 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -144,6 +144,7 @@ tcp_adv_win_scale - INTEGER Count buffering overhead as bytes/2^tcp_adv_win_scale (if tcp_adv_win_scale > 0) or bytes-bytes/2^(-tcp_adv_win_scale), if it is <= 0. + Possible values are [-31, 31], inclusive. Default: 2 tcp_allowed_congestion_control - STRING diff --git a/arch/arm/mach-omap2/pm24xx.c b/arch/arm/mach-omap2/pm24xx.c index a40457d8192..c85923e56b8 100644 --- a/arch/arm/mach-omap2/pm24xx.c +++ b/arch/arm/mach-omap2/pm24xx.c @@ -30,6 +30,7 @@ #include <linux/irq.h> #include <linux/time.h> #include <linux/gpio.h> +#include <linux/console.h> #include <asm/mach/time.h> #include <asm/mach/irq.h> @@ -118,6 +119,10 @@ static void omap2_enter_full_retention(void) if (omap_irq_pending()) goto no_sleep; + /* Block console output in case it is on one of the OMAP UARTs */ + if (try_acquire_console_sem()) + goto no_sleep; + omap_uart_prepare_idle(0); omap_uart_prepare_idle(1); omap_uart_prepare_idle(2); @@ -131,6 +136,8 @@ static void omap2_enter_full_retention(void) omap_uart_resume_idle(1); omap_uart_resume_idle(0); + release_console_sem(); + no_sleep: if (omap2_pm_debug) { unsigned long long tmp; diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 75c0cd13ad8..0ec8a04b747 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -28,6 +28,7 @@ #include <linux/clk.h> #include <linux/delay.h> #include <linux/slab.h> +#include <linux/console.h> #include <plat/sram.h> #include <plat/clockdomain.h> @@ -385,6 +386,12 @@ void omap_sram_idle(void) omap3_enable_io_chain(); } + /* Block console output in case it is on one of the OMAP UARTs */ + if (per_next_state < PWRDM_POWER_ON || + core_next_state < PWRDM_POWER_ON) + if (try_acquire_console_sem()) + goto console_still_active; + /* PER */ if (per_next_state < PWRDM_POWER_ON) { omap_uart_prepare_idle(2); @@ -463,6 +470,9 @@ void omap_sram_idle(void) omap_uart_resume_idle(3); } + release_console_sem(); + +console_still_active: /* Disable IO-PAD and IO-CHAIN wakeup */ if (omap3_has_io_wakeup() && (per_next_state < PWRDM_POWER_ON || diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c index becf0e38ef7..d17960a1be2 100644 --- a/arch/arm/mach-omap2/serial.c +++ b/arch/arm/mach-omap2/serial.c @@ -27,6 +27,7 @@ #include <linux/slab.h> #include <linux/serial_8250.h> #include <linux/pm_runtime.h> +#include <linux/console.h> #ifdef CONFIG_SERIAL_OMAP #include <plat/omap-serial.h> @@ -406,7 +407,7 @@ void omap_uart_resume_idle(int num) struct omap_uart_state *uart; list_for_each_entry(uart, &uart_list, node) { - if (num == uart->num) { + if (num == uart->num && uart->can_sleep) { omap_uart_enable_clocks(uart); /* Check for IO pad wakeup */ @@ -807,6 +808,8 @@ void __init omap_serial_init_port(int port) oh->dev_attr = uart; + acquire_console_sem(); /* in case the earlycon is on the UART */ + /* * Because of early UART probing, UART did not get idled * on init. Now that omap_device is ready, ensure full idle @@ -831,6 +834,8 @@ void __init omap_serial_init_port(int port) omap_uart_block_sleep(uart); uart->timeout = DEFAULT_TIMEOUT; + release_console_sem(); + if ((cpu_is_omap34xx() && uart->padconf) || (uart->wk_en && uart->wk_mask)) { device_init_wakeup(&od->pdev.dev, true); diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 1030f842013..c17a305ecb2 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/wait.h> +#include <linux/acpi.h> #include "tpm.h" #define TPM_HEADER_SIZE 10 @@ -78,6 +79,26 @@ enum tis_defaults { static LIST_HEAD(tis_chips); static DEFINE_SPINLOCK(tis_lock); +#ifdef CONFIG_ACPI +static int is_itpm(struct pnp_dev *dev) +{ + struct acpi_device *acpi = pnp_acpi_device(dev); + struct acpi_hardware_id *id; + + list_for_each_entry(id, &acpi->pnp.ids, list) { + if (!strcmp("INTC0102", id->id)) + return 1; + } + + return 0; +} +#else +static int is_itpm(struct pnp_dev *dev) +{ + return 0; +} +#endif + static int check_locality(struct tpm_chip *chip, int l) { if ((ioread8(chip->vendor.iobase + TPM_ACCESS(l)) & @@ -472,6 +493,9 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, "1.2 TPM (device-id 0x%X, rev-id %d)\n", vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0))); + if (is_itpm(to_pnp_dev(dev))) + itpm = 1; + if (itpm) dev_info(dev, "Intel iTPM workaround enabled\n"); diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index b3781399b38..ba2898b3639 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -10,16 +10,16 @@ obj-$(CONFIG_EDAC) := edac_stub.o obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o obj-$(CONFIG_EDAC_MCE) += edac_mce.o -edac_core-objs := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o -edac_core-objs += edac_module.o edac_device_sysfs.o +edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o +edac_core-y += edac_module.o edac_device_sysfs.o ifdef CONFIG_PCI -edac_core-objs += edac_pci.o edac_pci_sysfs.o +edac_core-y += edac_pci.o edac_pci_sysfs.o endif obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o -edac_mce_amd-objs := mce_amd.o +edac_mce_amd-y := mce_amd.o obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c index 8d0688f36d4..39faded3cad 100644 --- a/drivers/edac/mce_amd_inj.c +++ b/drivers/edac/mce_amd_inj.c @@ -139,7 +139,7 @@ static int __init edac_init_mce_inject(void) return 0; err_sysfs_create: - while (i-- >= 0) + while (--i >= 0) sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr); kobject_del(mce_kobj); diff --git a/drivers/isdn/icn/icn.c b/drivers/isdn/icn/icn.c index 2e847a90bad..f2b5bab5e6a 100644 --- a/drivers/isdn/icn/icn.c +++ b/drivers/isdn/icn/icn.c @@ -1627,7 +1627,7 @@ __setup("icn=", icn_setup); static int __init icn_init(void) { char *p; - char rev[10]; + char rev[20]; memset(&dev, 0, sizeof(icn_dev)); dev.memaddr = (membase & 0x0ffc000); @@ -1637,9 +1637,10 @@ static int __init icn_init(void) spin_lock_init(&dev.devlock); if ((p = strchr(revision, ':'))) { - strcpy(rev, p + 1); + strncpy(rev, p + 1, 20); p = strchr(rev, '$'); - *p = 0; + if (p) + *p = 0; } else strcpy(rev, " ??? "); printk(KERN_NOTICE "ICN-ISDN-driver Rev%smem=0x%08lx\n", rev, diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 43db398437b..4f1755bddf6 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2543,10 +2543,10 @@ config PCH_GBE depends on PCI select MII ---help--- - This is a gigabit ethernet driver for Topcliff PCH. - Topcliff PCH is the platform controller hub that is used in Intel's + This is a gigabit ethernet driver for EG20T PCH. + EG20T PCH is the platform controller hub that is used in Intel's general embedded platform. - Topcliff PCH has Gigabit Ethernet interface. + EG20T PCH has Gigabit Ethernet interface. Using this interface, it is able to access system devices connected to Gigabit Ethernet. This driver enables Gigabit Ethernet function. diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c index 43489f89c14..53eff9ba6e9 100644 --- a/drivers/net/au1000_eth.c +++ b/drivers/net/au1000_eth.c @@ -155,10 +155,10 @@ static void au1000_enable_mac(struct net_device *dev, int force_reset) spin_lock_irqsave(&aup->lock, flags); if (force_reset || (!aup->mac_enabled)) { - writel(MAC_EN_CLOCK_ENABLE, &aup->enable); + writel(MAC_EN_CLOCK_ENABLE, aup->enable); au_sync_delay(2); writel((MAC_EN_RESET0 | MAC_EN_RESET1 | MAC_EN_RESET2 - | MAC_EN_CLOCK_ENABLE), &aup->enable); + | MAC_EN_CLOCK_ENABLE), aup->enable); au_sync_delay(2); aup->mac_enabled = 1; @@ -503,9 +503,9 @@ static void au1000_reset_mac_unlocked(struct net_device *dev) au1000_hard_stop(dev); - writel(MAC_EN_CLOCK_ENABLE, &aup->enable); + writel(MAC_EN_CLOCK_ENABLE, aup->enable); au_sync_delay(2); - writel(0, &aup->enable); + writel(0, aup->enable); au_sync_delay(2); aup->tx_full = 0; @@ -1119,7 +1119,7 @@ static int __devinit au1000_probe(struct platform_device *pdev) /* set a random MAC now in case platform_data doesn't provide one */ random_ether_addr(dev->dev_addr); - writel(0, &aup->enable); + writel(0, aup->enable); aup->mac_enabled = 0; pd = pdev->dev.platform_data; diff --git a/drivers/net/cxgb4vf/cxgb4vf_main.c b/drivers/net/cxgb4vf/cxgb4vf_main.c index c3449bbc585..d887a76cd39 100644 --- a/drivers/net/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/cxgb4vf/cxgb4vf_main.c @@ -816,40 +816,48 @@ static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev) } /* - * Collect up to maxaddrs worth of a netdevice's unicast addresses into an - * array of addrss pointers and return the number collected. + * Collect up to maxaddrs worth of a netdevice's unicast addresses, starting + * at a specified offset within the list, into an array of addrss pointers and + * return the number collected. */ -static inline int collect_netdev_uc_list_addrs(const struct net_device *dev, - const u8 **addr, - unsigned int maxaddrs) +static inline unsigned int collect_netdev_uc_list_addrs(const struct net_device *dev, + const u8 **addr, + unsigned int offset, + unsigned int maxaddrs) { + unsigned int index = 0; unsigned int naddr = 0; const struct netdev_hw_addr *ha; - for_each_dev_addr(dev, ha) { - addr[naddr++] = ha->addr; - if (naddr >= maxaddrs) - break; - } + for_each_dev_addr(dev, ha) + if (index++ >= offset) { + addr[naddr++] = ha->addr; + if (naddr >= maxaddrs) + break; + } return naddr; } /* - * Collect up to maxaddrs worth of a netdevice's multicast addresses into an - * array of addrss pointers and return the number collected. + * Collect up to maxaddrs worth of a netdevice's multicast addresses, starting + * at a specified offset within the list, into an array of addrss pointers and + * return the number collected. */ -static inline int collect_netdev_mc_list_addrs(const struct net_device *dev, - const u8 **addr, - unsigned int maxaddrs) +static inline unsigned int collect_netdev_mc_list_addrs(const struct net_device *dev, + const u8 **addr, + unsigned int offset, + unsigned int maxaddrs) { + unsigned int index = 0; unsigned int naddr = 0; const struct netdev_hw_addr *ha; - netdev_for_each_mc_addr(ha, dev) { - addr[naddr++] = ha->addr; - if (naddr >= maxaddrs) - break; - } + netdev_for_each_mc_addr(ha, dev) + if (index++ >= offset) { + addr[naddr++] = ha->addr; + if (naddr >= maxaddrs) + break; + } return naddr; } @@ -862,16 +870,20 @@ static int set_addr_filters(const struct net_device *dev, bool sleep) u64 mhash = 0; u64 uhash = 0; bool free = true; - u16 filt_idx[7]; + unsigned int offset, naddr; const u8 *addr[7]; - int ret, naddr = 0; + int ret; const struct port_info *pi = netdev_priv(dev); /* first do the secondary unicast addresses */ - naddr = collect_netdev_uc_list_addrs(dev, addr, ARRAY_SIZE(addr)); - if (naddr > 0) { + for (offset = 0; ; offset += naddr) { + naddr = collect_netdev_uc_list_addrs(dev, addr, offset, + ARRAY_SIZE(addr)); + if (naddr == 0) + break; + ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free, - naddr, addr, filt_idx, &uhash, sleep); + naddr, addr, NULL, &uhash, sleep); if (ret < 0) return ret; @@ -879,12 +891,17 @@ static int set_addr_filters(const struct net_device *dev, bool sleep) } /* next set up the multicast addresses */ - naddr = collect_netdev_mc_list_addrs(dev, addr, ARRAY_SIZE(addr)); - if (naddr > 0) { + for (offset = 0; ; offset += naddr) { + naddr = collect_netdev_mc_list_addrs(dev, addr, offset, + ARRAY_SIZE(addr)); + if (naddr == 0) + break; + ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free, - naddr, addr, filt_idx, &mhash, sleep); + naddr, addr, NULL, &mhash, sleep); if (ret < 0) return ret; + free = false; } return t4vf_set_addr_hash(pi->adapter, pi->viid, uhash != 0, diff --git a/drivers/net/cxgb4vf/t4vf_hw.c b/drivers/net/cxgb4vf/t4vf_hw.c index e306c20dfae..19520afe1a1 100644 --- a/drivers/net/cxgb4vf/t4vf_hw.c +++ b/drivers/net/cxgb4vf/t4vf_hw.c @@ -1014,48 +1014,72 @@ int t4vf_alloc_mac_filt(struct adapter *adapter, unsigned int viid, bool free, unsigned int naddr, const u8 **addr, u16 *idx, u64 *hash, bool sleep_ok) { - int i, ret; + int offset, ret = 0; + unsigned nfilters = 0; + unsigned int rem = naddr; struct fw_vi_mac_cmd cmd, rpl; - struct fw_vi_mac_exact *p; - size_t len16; - if (naddr > ARRAY_SIZE(cmd.u.exact)) + if (naddr > FW_CLS_TCAM_NUM_ENTRIES) return -EINVAL; - len16 = DIV_ROUND_UP(offsetof(struct fw_vi_mac_cmd, - u.exact[naddr]), 16); - memset(&cmd, 0, sizeof(cmd)); - cmd.op_to_viid = cpu_to_be32(FW_CMD_OP(FW_VI_MAC_CMD) | - FW_CMD_REQUEST | - FW_CMD_WRITE | - (free ? FW_CMD_EXEC : 0) | - FW_VI_MAC_CMD_VIID(viid)); - cmd.freemacs_to_len16 = cpu_to_be32(FW_VI_MAC_CMD_FREEMACS(free) | - FW_CMD_LEN16(len16)); + for (offset = 0; offset < naddr; /**/) { + unsigned int fw_naddr = (rem < ARRAY_SIZE(cmd.u.exact) + ? rem + : ARRAY_SIZE(cmd.u.exact)); + size_t len16 = DIV_ROUND_UP(offsetof(struct fw_vi_mac_cmd, + u.exact[fw_naddr]), 16); + struct fw_vi_mac_exact *p; + int i; - for (i = 0, p = cmd.u.exact; i < naddr; i++, p++) { - p->valid_to_idx = - cpu_to_be16(FW_VI_MAC_CMD_VALID | - FW_VI_MAC_CMD_IDX(FW_VI_MAC_ADD_MAC)); - memcpy(p->macaddr, addr[i], sizeof(p->macaddr)); - } + memset(&cmd, 0, sizeof(cmd)); + cmd.op_to_viid = cpu_to_be32(FW_CMD_OP(FW_VI_MAC_CMD) | + FW_CMD_REQUEST | + FW_CMD_WRITE | + (free ? FW_CMD_EXEC : 0) | + FW_VI_MAC_CMD_VIID(viid)); + cmd.freemacs_to_len16 = + cpu_to_be32(FW_VI_MAC_CMD_FREEMACS(free) | + FW_CMD_LEN16(len16)); + + for (i = 0, p = cmd.u.exact; i < fw_naddr; i++, p++) { + p->valid_to_idx = cpu_to_be16( + FW_VI_MAC_CMD_VALID | + FW_VI_MAC_CMD_IDX(FW_VI_MAC_ADD_MAC)); + memcpy(p->macaddr, addr[offset+i], sizeof(p->macaddr)); + } + + + ret = t4vf_wr_mbox_core(adapter, &cmd, sizeof(cmd), &rpl, + sleep_ok); + if (ret && ret != -ENOMEM) + break; - ret = t4vf_wr_mbox_core(adapter, &cmd, sizeof(cmd), &rpl, sleep_ok); - if (ret) - return ret; - - for (i = 0, p = rpl.u.exact; i < naddr; i++, p++) { - u16 index = FW_VI_MAC_CMD_IDX_GET(be16_to_cpu(p->valid_to_idx)); - - if (idx) - idx[i] = (index >= FW_CLS_TCAM_NUM_ENTRIES - ? 0xffff - : index); - if (index < FW_CLS_TCAM_NUM_ENTRIES) - ret++; - else if (hash) - *hash |= (1 << hash_mac_addr(addr[i])); + for (i = 0, p = rpl.u.exact; i < fw_naddr; i++, p++) { + u16 index = FW_VI_MAC_CMD_IDX_GET( + be16_to_cpu(p->valid_to_idx)); + + if (idx) + idx[offset+i] = + (index >= FW_CLS_TCAM_NUM_ENTRIES + ? 0xffff + : index); + if (index < FW_CLS_TCAM_NUM_ENTRIES) + nfilters++; + else if (hash) + *hash |= (1ULL << hash_mac_addr(addr[offset+i])); + } + + free = false; + offset += fw_naddr; + rem -= fw_naddr; } + + /* + * If there were no errors or we merely ran out of room in our MAC + * address arena, return the number of filters actually written. + */ + if (ret == 0 || ret == -ENOMEM) + ret = nfilters; return ret; } diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c index 182b2a7be8d..3d0af08483a 100644 --- a/drivers/net/ehea/ehea_main.c +++ b/drivers/net/ehea/ehea_main.c @@ -400,6 +400,7 @@ static void ehea_refill_rq1(struct ehea_port_res *pr, int index, int nr_of_wqes) skb_arr_rq1[index] = netdev_alloc_skb(dev, EHEA_L_PKT_SIZE); if (!skb_arr_rq1[index]) { + ehea_info("Unable to allocate enough skb in the array\n"); pr->rq1_skba.os_skbs = fill_wqes - i; break; } @@ -422,13 +423,20 @@ static void ehea_init_fill_rq1(struct ehea_port_res *pr, int nr_rq1a) struct net_device *dev = pr->port->netdev; int i; - for (i = 0; i < pr->rq1_skba.len; i++) { + if (nr_rq1a > pr->rq1_skba.len) { + ehea_error("NR_RQ1A bigger than skb array len\n"); + return; + } + + for (i = 0; i < nr_rq1a; i++) { skb_arr_rq1[i] = netdev_alloc_skb(dev, EHEA_L_PKT_SIZE); - if (!skb_arr_rq1[i]) + if (!skb_arr_rq1[i]) { + ehea_info("No enough memory to allocate skb array\n"); break; + } } /* Ring doorbell */ - ehea_update_rq1a(pr->qp, nr_rq1a); + ehea_update_rq1a(pr->qp, i); } static int ehea_refill_rq_def(struct ehea_port_res *pr, @@ -735,8 +743,10 @@ static int ehea_proc_rwqes(struct net_device *dev, skb = netdev_alloc_skb(dev, EHEA_L_PKT_SIZE); - if (!skb) + if (!skb) { + ehea_info("Not enough memory to allocate skb\n"); break; + } } skb_copy_to_linear_data(skb, ((char *)cqe) + 64, cqe->num_bytes_transfered - 4); diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c index 472056b4744..03a1d280105 100644 --- a/drivers/net/pch_gbe/pch_gbe_main.c +++ b/drivers/net/pch_gbe/pch_gbe_main.c @@ -1,6 +1,6 @@ /* * Copyright (C) 1999 - 2010 Intel Corporation. - * Copyright (C) 2010 OKI SEMICONDUCTOR Co., LTD. + * Copyright (C) 2010 OKI SEMICONDUCTOR CO., LTD. * * This code was derived from the Intel e1000e Linux driver. * @@ -2464,8 +2464,8 @@ static void __exit pch_gbe_exit_module(void) module_init(pch_gbe_init_module); module_exit(pch_gbe_exit_module); -MODULE_DESCRIPTION("OKI semiconductor PCH Gigabit ethernet Driver"); -MODULE_AUTHOR("OKI semiconductor, <masa-korg@dsn.okisemi.com>"); +MODULE_DESCRIPTION("EG20T PCH Gigabit ethernet Driver"); +MODULE_AUTHOR("OKI SEMICONDUCTOR, <toshiharu-linux@dsn.okisemi.com>"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); MODULE_DEVICE_TABLE(pci, pch_gbe_pcidev_id); diff --git a/drivers/net/pch_gbe/pch_gbe_param.c b/drivers/net/pch_gbe/pch_gbe_param.c index 2510146fc56..ef0996a0eaa 100644 --- a/drivers/net/pch_gbe/pch_gbe_param.c +++ b/drivers/net/pch_gbe/pch_gbe_param.c @@ -434,8 +434,8 @@ void pch_gbe_check_options(struct pch_gbe_adapter *adapter) .err = "using default of " __MODULE_STRING(PCH_GBE_DEFAULT_TXD), .def = PCH_GBE_DEFAULT_TXD, - .arg = { .r = { .min = PCH_GBE_MIN_TXD } }, - .arg = { .r = { .max = PCH_GBE_MAX_TXD } } + .arg = { .r = { .min = PCH_GBE_MIN_TXD, + .max = PCH_GBE_MAX_TXD } } }; struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring; tx_ring->count = TxDescriptors; @@ -450,8 +450,8 @@ void pch_gbe_check_options(struct pch_gbe_adapter *adapter) .err = "using default of " __MODULE_STRING(PCH_GBE_DEFAULT_RXD), .def = PCH_GBE_DEFAULT_RXD, - .arg = { .r = { .min = PCH_GBE_MIN_RXD } }, - .arg = { .r = { .max = PCH_GBE_MAX_RXD } } + .arg = { .r = { .min = PCH_GBE_MIN_RXD, + .max = PCH_GBE_MAX_RXD } } }; struct pch_gbe_rx_ring *rx_ring = adapter->rx_ring; rx_ring->count = RxDescriptors; diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 09cf56d0416..39659976a1a 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -2584,16 +2584,16 @@ ppp_create_interface(struct net *net, int unit, int *retp) */ dev_net_set(dev, net); - ret = -EEXIST; mutex_lock(&pn->all_ppp_mutex); if (unit < 0) { unit = unit_get(&pn->units_idr, ppp); if (unit < 0) { - *retp = unit; + ret = unit; goto out2; } } else { + ret = -EEXIST; if (unit_find(&pn->units_idr, unit)) goto out2; /* unit already exists */ /* @@ -2668,10 +2668,10 @@ static void ppp_shutdown_interface(struct ppp *ppp) ppp->closing = 1; ppp_unlock(ppp); unregister_netdev(ppp->dev); + unit_put(&pn->units_idr, ppp->file.index); } else ppp_unlock(ppp); - unit_put(&pn->units_idr, ppp->file.index); ppp->file.dead = 1; ppp->owner = NULL; wake_up_interruptible(&ppp->file.rwait); @@ -2859,8 +2859,7 @@ static void __exit ppp_cleanup(void) * by holding all_ppp_mutex */ -/* associate pointer with specified number */ -static int unit_set(struct idr *p, void *ptr, int n) +static int __unit_alloc(struct idr *p, void *ptr, int n) { int unit, err; @@ -2871,10 +2870,24 @@ again: } err = idr_get_new_above(p, ptr, n, &unit); - if (err == -EAGAIN) - goto again; + if (err < 0) { + if (err == -EAGAIN) + goto again; + return err; + } + + return unit; +} + +/* associate pointer with specified number */ +static int unit_set(struct idr *p, void *ptr, int n) +{ + int unit; - if (unit != n) { + unit = __unit_alloc(p, ptr, n); + if (unit < 0) + return unit; + else if (unit != n) { idr_remove(p, unit); return -EINVAL; } @@ -2885,19 +2898,7 @@ again: /* get new free unit number and associate pointer with it */ static int unit_get(struct idr *p, void *ptr) { - int unit, err; - -again: - if (!idr_pre_get(p, GFP_KERNEL)) { - printk(KERN_ERR "PPP: No free memory for idr\n"); - return -ENOMEM; - } - - err = idr_get_new_above(p, ptr, 0, &unit); - if (err == -EAGAIN) - goto again; - - return unit; + return __unit_alloc(p, ptr, 0); } /* put unit number back to a pool */ diff --git a/drivers/net/ucc_geth.h b/drivers/net/ucc_geth.h index 05a95586f3c..055b87ab4f0 100644 --- a/drivers/net/ucc_geth.h +++ b/drivers/net/ucc_geth.h @@ -899,7 +899,8 @@ struct ucc_geth_hardware_statistics { #define UCC_GETH_UTFS_INIT 512 /* Tx virtual FIFO size */ #define UCC_GETH_UTFET_INIT 256 /* 1/2 utfs */ -#define UCC_GETH_UTFTT_INIT 512 +#define UCC_GETH_UTFTT_INIT 256 /* 1/2 utfs + due to errata */ /* Gigabit Ethernet (1000 Mbps) */ #define UCC_GETH_URFS_GIGA_INIT 4096/*2048*/ /* Rx virtual FIFO size */ diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index b154a94de03..62e9e8dc819 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2994,12 +2994,14 @@ static int hso_probe(struct usb_interface *interface, case HSO_INTF_BULK: /* It's a regular bulk interface */ - if (((port_spec & HSO_PORT_MASK) == HSO_PORT_NETWORK) && - !disable_net) - hso_dev = hso_create_net_device(interface, port_spec); - else + if ((port_spec & HSO_PORT_MASK) == HSO_PORT_NETWORK) { + if (!disable_net) + hso_dev = + hso_create_net_device(interface, port_spec); + } else { hso_dev = hso_create_bulk_serial_device(interface, port_spec); + } if (!hso_dev) goto exit; break; diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index d81ad839788..cf05504d951 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -498,7 +498,6 @@ norbuff: static int x25_asy_close(struct net_device *dev) { struct x25_asy *sl = netdev_priv(dev); - int err; spin_lock(&sl->lock); if (sl->tty) @@ -507,10 +506,6 @@ static int x25_asy_close(struct net_device *dev) netif_stop_queue(dev); sl->rcount = 0; sl->xleft = 0; - err = lapb_unregister(dev); - if (err != LAPB_OK) - printk(KERN_ERR "x25_asy_close: lapb_unregister error -%d\n", - err); spin_unlock(&sl->lock); return 0; } @@ -595,6 +590,7 @@ static int x25_asy_open_tty(struct tty_struct *tty) static void x25_asy_close_tty(struct tty_struct *tty) { struct x25_asy *sl = tty->disc_data; + int err; /* First make sure we're connected. */ if (!sl || sl->magic != X25_ASY_MAGIC) @@ -605,6 +601,11 @@ static void x25_asy_close_tty(struct tty_struct *tty) dev_close(sl->dev); rtnl_unlock(); + err = lapb_unregister(sl->dev); + if (err != LAPB_OK) + printk(KERN_ERR "x25_asy_close: lapb_unregister error -%d\n", + err); + tty->disc_data = NULL; sl->tty = NULL; x25_asy_free(sl); diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c index c76ea53c20c..1a62e351ec7 100644 --- a/drivers/net/wireless/ath/ath9k/recv.c +++ b/drivers/net/wireless/ath/ath9k/recv.c @@ -518,7 +518,7 @@ bool ath_stoprecv(struct ath_softc *sc) bool stopped; spin_lock_bh(&sc->rx.rxbuflock); - ath9k_hw_stoppcurecv(ah); + ath9k_hw_abortpcurecv(ah); ath9k_hw_setrxfilter(ah, 0); stopped = ath9k_hw_stopdmarecv(ah); diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index 980ae70ea42..a314c2c2bfb 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -647,7 +647,7 @@ init: } unlock: - if (err && (vif_id != -1)) { + if (err && (vif_id >= 0)) { vif_priv->active = false; bitmap_release_region(&ar->vif_bitmap, vif_id, 0); ar->vifs--; diff --git a/drivers/net/wireless/b43/sdio.c b/drivers/net/wireless/b43/sdio.c index 9a55338d957..09e2dfd7b17 100644 --- a/drivers/net/wireless/b43/sdio.c +++ b/drivers/net/wireless/b43/sdio.c @@ -163,6 +163,7 @@ static int b43_sdio_probe(struct sdio_func *func, err_free_ssb: kfree(sdio); err_disable_func: + sdio_claim_host(func); sdio_disable_func(func); err_release_host: sdio_release_host(func); diff --git a/drivers/ssb/b43_pci_bridge.c b/drivers/ssb/b43_pci_bridge.c index ef9c6a04ad8..744d3f6e470 100644 --- a/drivers/ssb/b43_pci_bridge.c +++ b/drivers/ssb/b43_pci_bridge.c @@ -24,6 +24,7 @@ static const struct pci_device_id b43_pci_bridge_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4312) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4315) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4318) }, + { PCI_DEVICE(PCI_VENDOR_ID_BCM_GVC, 0x4318) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4319) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4320) }, { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4321) }, diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 4b4da5b86ff..f442668a1e5 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -129,8 +129,9 @@ static void handle_tx(struct vhost_net *net) size_t hdr_size; struct socket *sock; - sock = rcu_dereference_check(vq->private_data, - lockdep_is_held(&vq->mutex)); + /* TODO: check that we are running from vhost_worker? + * Not sure it's worth it, it's straight-forward enough. */ + sock = rcu_dereference_check(vq->private_data, 1); if (!sock) return; diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 7845d1f7d1d..b50bc4bd5c5 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -91,23 +91,10 @@ static inline int compressed_bio_size(struct btrfs_root *root, static struct bio *compressed_bio_alloc(struct block_device *bdev, u64 first_byte, gfp_t gfp_flags) { - struct bio *bio; int nr_vecs; nr_vecs = bio_get_nr_vecs(bdev); - bio = bio_alloc(gfp_flags, nr_vecs); - - if (bio == NULL && (current->flags & PF_MEMALLOC)) { - while (!bio && (nr_vecs /= 2)) - bio = bio_alloc(gfp_flags, nr_vecs); - } - - if (bio) { - bio->bi_size = 0; - bio->bi_bdev = bdev; - bio->bi_sector = first_byte >> 9; - } - return bio; + return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags); } static int check_compressed_csum(struct inode *inode, diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8db9234f6b4..af52f6d7a4d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -808,9 +808,9 @@ struct btrfs_block_group_cache { int extents_thresh; int free_extents; int total_bitmaps; - int ro:1; - int dirty:1; - int iref:1; + unsigned int ro:1; + unsigned int dirty:1; + unsigned int iref:1; int disk_cache_state; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fb827d0d718..c547cca26a2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -28,6 +28,7 @@ #include <linux/freezer.h> #include <linux/crc32c.h> #include <linux/slab.h> +#include <linux/migrate.h> #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -355,6 +356,8 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, btrfs_header_generation(eb)); BUG_ON(ret); + WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN)); + found_start = btrfs_header_bytenr(eb); if (found_start != start) { WARN_ON(1); @@ -693,6 +696,29 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, __btree_submit_bio_done); } +static int btree_migratepage(struct address_space *mapping, + struct page *newpage, struct page *page) +{ + /* + * we can't safely write a btree page from here, + * we haven't done the locking hook + */ + if (PageDirty(page)) + return -EAGAIN; + /* + * Buffers may be managed in a filesystem specific way. + * We must have no buffers or drop them. + */ + if (page_has_private(page) && + !try_to_release_page(page, GFP_KERNEL)) + return -EAGAIN; +#ifdef CONFIG_MIGRATION + return migrate_page(mapping, newpage, page); +#else + return -ENOSYS; +#endif +} + static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct extent_io_tree *tree; @@ -707,8 +733,7 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) } redirty_page_for_writepage(wbc, page); - eb = btrfs_find_tree_block(root, page_offset(page), - PAGE_CACHE_SIZE); + eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE); WARN_ON(!eb); was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); @@ -799,6 +824,9 @@ static const struct address_space_operations btree_aops = { .releasepage = btree_releasepage, .invalidatepage = btree_invalidatepage, .sync_page = block_sync_page, +#ifdef CONFIG_MIGRATION + .migratepage = btree_migratepage, +#endif }; int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, @@ -1538,10 +1566,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, GFP_NOFS); struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); - struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root), - GFP_NOFS); - struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), - GFP_NOFS); + struct btrfs_root *tree_root = btrfs_sb(sb); + struct btrfs_fs_info *fs_info = tree_root->fs_info; struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 951ef09b82f..6f044447359 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -232,9 +232,85 @@ fail: return ERR_PTR(ret); } +static int btrfs_get_name(struct dentry *parent, char *name, + struct dentry *child) +{ + struct inode *inode = child->d_inode; + struct inode *dir = parent->d_inode; + struct btrfs_path *path; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_inode_ref *iref; + struct btrfs_root_ref *rref; + struct extent_buffer *leaf; + unsigned long name_ptr; + struct btrfs_key key; + int name_len; + int ret; + + if (!dir || !inode) + return -EINVAL; + + if (!S_ISDIR(dir->i_mode)) + return -EINVAL; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->leave_spinning = 1; + + if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { + key.objectid = BTRFS_I(inode)->root->root_key.objectid; + key.type = BTRFS_ROOT_BACKREF_KEY; + key.offset = (u64)-1; + root = root->fs_info->tree_root; + } else { + key.objectid = inode->i_ino; + key.offset = dir->i_ino; + key.type = BTRFS_INODE_REF_KEY; + } + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + btrfs_free_path(path); + return ret; + } else if (ret > 0) { + if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { + path->slots[0]--; + } else { + btrfs_free_path(path); + return -ENOENT; + } + } + leaf = path->nodes[0]; + + if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { + rref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_root_ref); + name_ptr = (unsigned long)(rref + 1); + name_len = btrfs_root_ref_name_len(leaf, rref); + } else { + iref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_inode_ref); + name_ptr = (unsigned long)(iref + 1); + name_len = btrfs_inode_ref_name_len(leaf, iref); + } + + read_extent_buffer(leaf, name, name_ptr, name_len); + btrfs_free_path(path); + + /* + * have to add the null termination to make sure that reconnect_path + * gets the right len for strlen + */ + name[name_len] = '\0'; + + return 0; +} + const struct export_operations btrfs_export_ops = { .encode_fh = btrfs_encode_fh, .fh_to_dentry = btrfs_fh_to_dentry, .fh_to_parent = btrfs_fh_to_parent, .get_parent = btrfs_get_parent, + .get_name = btrfs_get_name, }; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0c097f3aec4..bcd59c7dfb5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3412,7 +3412,7 @@ again: * our reservation. */ if (unused <= space_info->total_bytes) { - unused -= space_info->total_bytes; + unused = space_info->total_bytes - unused; if (unused >= num_bytes) { if (!reserved) space_info->bytes_reserved += orig_bytes; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index eac10e3260a..3e86b9f3650 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1828,9 +1828,9 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err) bio_put(bio); } -static struct bio * -extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, - gfp_t gfp_flags) +struct bio * +btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, + gfp_t gfp_flags) { struct bio *bio; @@ -1919,7 +1919,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, else nr = bio_get_nr_vecs(bdev); - bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); + bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); bio_add_page(bio, page, page_size, offset); bio->bi_end_io = end_io_func; @@ -2901,21 +2901,53 @@ out: int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len, get_extent_t *get_extent) { - int ret; + int ret = 0; u64 off = start; u64 max = start + len; u32 flags = 0; + u32 found_type; + u64 last; u64 disko = 0; + struct btrfs_key found_key; struct extent_map *em = NULL; struct extent_state *cached_state = NULL; + struct btrfs_path *path; + struct btrfs_file_extent_item *item; int end = 0; u64 em_start = 0, em_len = 0; unsigned long emflags; - ret = 0; + int hole = 0; if (len == 0) return -EINVAL; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->leave_spinning = 1; + + ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, + path, inode->i_ino, -1, 0); + if (ret < 0) { + btrfs_free_path(path); + return ret; + } + WARN_ON(!ret); + path->slots[0]--; + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); + found_type = btrfs_key_type(&found_key); + + /* No extents, just return */ + if (found_key.objectid != inode->i_ino || + found_type != BTRFS_EXTENT_DATA_KEY) { + btrfs_free_path(path); + return 0; + } + last = found_key.offset; + btrfs_free_path(path); + lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, &cached_state, GFP_NOFS); em = get_extent(inode, NULL, 0, off, max - off, 0); @@ -2925,11 +2957,18 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, ret = PTR_ERR(em); goto out; } + while (!end) { + hole = 0; off = em->start + em->len; if (off >= max) end = 1; + if (em->block_start == EXTENT_MAP_HOLE) { + hole = 1; + goto next; + } + em_start = em->start; em_len = em->len; @@ -2939,8 +2978,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (em->block_start == EXTENT_MAP_LAST_BYTE) { end = 1; flags |= FIEMAP_EXTENT_LAST; - } else if (em->block_start == EXTENT_MAP_HOLE) { - flags |= FIEMAP_EXTENT_UNWRITTEN; } else if (em->block_start == EXTENT_MAP_INLINE) { flags |= (FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED); @@ -2953,10 +2990,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) flags |= FIEMAP_EXTENT_ENCODED; +next: emflags = em->flags; free_extent_map(em); em = NULL; - if (!end) { em = get_extent(inode, NULL, 0, off, max - off, 0); if (!em) @@ -2967,15 +3004,23 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, } emflags = em->flags; } + if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { flags |= FIEMAP_EXTENT_LAST; end = 1; } - ret = fiemap_fill_next_extent(fieinfo, em_start, disko, - em_len, flags); - if (ret) - goto out_free; + if (em_start == last) { + flags |= FIEMAP_EXTENT_LAST; + end = 1; + } + + if (!hole) { + ret = fiemap_fill_next_extent(fieinfo, em_start, disko, + em_len, flags); + if (ret) + goto out_free; + } } out_free: free_extent_map(em); @@ -3836,8 +3881,10 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) spin_lock(&tree->buffer_lock); eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); - if (!eb) - goto out; + if (!eb) { + spin_unlock(&tree->buffer_lock); + return ret; + } if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { ret = 0; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 1c6d4f342ef..4183c8178f0 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -310,4 +310,7 @@ int extent_clear_unlock_delalloc(struct inode *inode, struct extent_io_tree *tree, u64 start, u64 end, struct page *locked_page, unsigned long op); +struct bio * +btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, + gfp_t gfp_flags); #endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e354c33df08..c1faded5fca 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1047,8 +1047,14 @@ out: if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + num_written = PTR_ERR(trans); + goto done; + } + mutex_lock(&inode->i_mutex); ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); + mutex_unlock(&inode->i_mutex); if (ret == 0) { ret = btrfs_sync_log(trans, root); if (ret == 0) @@ -1067,6 +1073,7 @@ out: (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); } } +done: current->backing_dev_info = NULL; return num_written ? num_written : err; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 558cac2dfa5..8039390bd6a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4501,6 +4501,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, BTRFS_I(inode)->index_cnt = 2; BTRFS_I(inode)->root = root; BTRFS_I(inode)->generation = trans->transid; + inode->i_generation = BTRFS_I(inode)->generation; btrfs_set_inode_space_info(root, inode); if (mode & S_IFDIR) @@ -4622,12 +4623,12 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, } static int btrfs_add_nondir(struct btrfs_trans_handle *trans, - struct dentry *dentry, struct inode *inode, - int backref, u64 index) + struct inode *dir, struct dentry *dentry, + struct inode *inode, int backref, u64 index) { - int err = btrfs_add_link(trans, dentry->d_parent->d_inode, - inode, dentry->d_name.name, - dentry->d_name.len, backref, index); + int err = btrfs_add_link(trans, dir, inode, + dentry->d_name.name, dentry->d_name.len, + backref, index); if (!err) { d_instantiate(dentry, inode); return 0; @@ -4668,8 +4669,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, btrfs_set_trans_block_group(trans, dir); inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, - dentry->d_name.len, - dentry->d_parent->d_inode->i_ino, objectid, + dentry->d_name.len, dir->i_ino, objectid, BTRFS_I(dir)->block_group, mode, &index); err = PTR_ERR(inode); if (IS_ERR(inode)) @@ -4682,7 +4682,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, } btrfs_set_trans_block_group(trans, inode); - err = btrfs_add_nondir(trans, dentry, inode, 0, index); + err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; else { @@ -4730,10 +4730,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, btrfs_set_trans_block_group(trans, dir); inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, - dentry->d_name.len, - dentry->d_parent->d_inode->i_ino, - objectid, BTRFS_I(dir)->block_group, mode, - &index); + dentry->d_name.len, dir->i_ino, objectid, + BTRFS_I(dir)->block_group, mode, &index); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_unlock; @@ -4745,7 +4743,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, } btrfs_set_trans_block_group(trans, inode); - err = btrfs_add_nondir(trans, dentry, inode, 0, index); + err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; else { @@ -4787,6 +4785,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, return -EPERM; btrfs_inc_nlink(inode); + inode->i_ctime = CURRENT_TIME; err = btrfs_set_inode_index(dir, &index); if (err) @@ -4805,15 +4804,17 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, btrfs_set_trans_block_group(trans, dir); ihold(inode); - err = btrfs_add_nondir(trans, dentry, inode, 1, index); + err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); if (err) { drop_inode = 1; } else { + struct dentry *parent = dget_parent(dentry); btrfs_update_inode_block_group(trans, dir); err = btrfs_update_inode(trans, root, inode); BUG_ON(err); - btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); + btrfs_log_new_name(trans, inode, NULL, parent); + dput(parent); } nr = trans->blocks_used; @@ -4853,8 +4854,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) btrfs_set_trans_block_group(trans, dir); inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, - dentry->d_name.len, - dentry->d_parent->d_inode->i_ino, objectid, + dentry->d_name.len, dir->i_ino, objectid, BTRFS_I(dir)->block_group, S_IFDIR | mode, &index); if (IS_ERR(inode)) { @@ -4877,9 +4877,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (err) goto out_fail; - err = btrfs_add_link(trans, dentry->d_parent->d_inode, - inode, dentry->d_name.name, - dentry->d_name.len, 0, index); + err = btrfs_add_link(trans, dir, inode, dentry->d_name.name, + dentry->d_name.len, 0, index); if (err) goto out_fail; @@ -5535,13 +5534,21 @@ struct btrfs_dio_private { u64 bytes; u32 *csums; void *private; + + /* number of bios pending for this dio */ + atomic_t pending_bios; + + /* IO errors */ + int errors; + + struct bio *orig_bio; }; static void btrfs_endio_direct_read(struct bio *bio, int err) { + struct btrfs_dio_private *dip = bio->bi_private; struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; struct bio_vec *bvec = bio->bi_io_vec; - struct btrfs_dio_private *dip = bio->bi_private; struct inode *inode = dip->inode; struct btrfs_root *root = BTRFS_I(inode)->root; u64 start; @@ -5595,15 +5602,18 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) struct btrfs_trans_handle *trans; struct btrfs_ordered_extent *ordered = NULL; struct extent_state *cached_state = NULL; + u64 ordered_offset = dip->logical_offset; + u64 ordered_bytes = dip->bytes; int ret; if (err) goto out_done; - - ret = btrfs_dec_test_ordered_pending(inode, &ordered, - dip->logical_offset, dip->bytes); +again: + ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, + &ordered_offset, + ordered_bytes); if (!ret) - goto out_done; + goto out_test; BUG_ON(!ordered); @@ -5663,8 +5673,20 @@ out_unlock: out: btrfs_delalloc_release_metadata(inode, ordered->len); btrfs_end_transaction(trans, root); + ordered_offset = ordered->file_offset + ordered->len; btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered); + +out_test: + /* + * our bio might span multiple ordered extents. If we haven't + * completed the accounting for the whole dio, go back and try again + */ + if (ordered_offset < dip->logical_offset + dip->bytes) { + ordered_bytes = dip->logical_offset + dip->bytes - + ordered_offset; + goto again; + } out_done: bio->bi_private = dip->private; @@ -5684,6 +5706,176 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, return 0; } +static void btrfs_end_dio_bio(struct bio *bio, int err) +{ + struct btrfs_dio_private *dip = bio->bi_private; + + if (err) { + printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu " + "disk_bytenr %lu len %u err no %d\n", + dip->inode->i_ino, bio->bi_rw, bio->bi_sector, + bio->bi_size, err); + dip->errors = 1; + + /* + * before atomic variable goto zero, we must make sure + * dip->errors is perceived to be set. + */ + smp_mb__before_atomic_dec(); + } + + /* if there are more bios still pending for this dio, just exit */ + if (!atomic_dec_and_test(&dip->pending_bios)) + goto out; + + if (dip->errors) + bio_io_error(dip->orig_bio); + else { + set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags); + bio_endio(dip->orig_bio, 0); + } +out: + bio_put(bio); +} + +static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, + u64 first_sector, gfp_t gfp_flags) +{ + int nr_vecs = bio_get_nr_vecs(bdev); + return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags); +} + +static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, + int rw, u64 file_offset, int skip_sum, + u32 *csums) +{ + int write = rw & REQ_WRITE; + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + + bio_get(bio); + ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + if (ret) + goto err; + + if (write && !skip_sum) { + ret = btrfs_wq_submit_bio(root->fs_info, + inode, rw, bio, 0, 0, + file_offset, + __btrfs_submit_bio_start_direct_io, + __btrfs_submit_bio_done); + goto err; + } else if (!skip_sum) + btrfs_lookup_bio_sums_dio(root, inode, bio, + file_offset, csums); + + ret = btrfs_map_bio(root, rw, bio, 0, 1); +err: + bio_put(bio); + return ret; +} + +static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, + int skip_sum) +{ + struct inode *inode = dip->inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; + struct bio *bio; + struct bio *orig_bio = dip->orig_bio; + struct bio_vec *bvec = orig_bio->bi_io_vec; + u64 start_sector = orig_bio->bi_sector; + u64 file_offset = dip->logical_offset; + u64 submit_len = 0; + u64 map_length; + int nr_pages = 0; + u32 *csums = dip->csums; + int ret = 0; + + bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); + if (!bio) + return -ENOMEM; + bio->bi_private = dip; + bio->bi_end_io = btrfs_end_dio_bio; + atomic_inc(&dip->pending_bios); + + map_length = orig_bio->bi_size; + ret = btrfs_map_block(map_tree, READ, start_sector << 9, + &map_length, NULL, 0); + if (ret) { + bio_put(bio); + return -EIO; + } + + while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { + if (unlikely(map_length < submit_len + bvec->bv_len || + bio_add_page(bio, bvec->bv_page, bvec->bv_len, + bvec->bv_offset) < bvec->bv_len)) { + /* + * inc the count before we submit the bio so + * we know the end IO handler won't happen before + * we inc the count. Otherwise, the dip might get freed + * before we're done setting it up + */ + atomic_inc(&dip->pending_bios); + ret = __btrfs_submit_dio_bio(bio, inode, rw, + file_offset, skip_sum, + csums); + if (ret) { + bio_put(bio); + atomic_dec(&dip->pending_bios); + goto out_err; + } + + if (!skip_sum) + csums = csums + nr_pages; + start_sector += submit_len >> 9; + file_offset += submit_len; + + submit_len = 0; + nr_pages = 0; + + bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, + start_sector, GFP_NOFS); + if (!bio) + goto out_err; + bio->bi_private = dip; + bio->bi_end_io = btrfs_end_dio_bio; + + map_length = orig_bio->bi_size; + ret = btrfs_map_block(map_tree, READ, start_sector << 9, + &map_length, NULL, 0); + if (ret) { + bio_put(bio); + goto out_err; + } + } else { + submit_len += bvec->bv_len; + nr_pages ++; + bvec++; + } + } + + ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, + csums); + if (!ret) + return 0; + + bio_put(bio); +out_err: + dip->errors = 1; + /* + * before atomic variable goto zero, we must + * make sure dip->errors is perceived to be set. + */ + smp_mb__before_atomic_dec(); + if (atomic_dec_and_test(&dip->pending_bios)) + bio_io_error(dip->orig_bio); + + /* bio_end_io() will handle error, so we needn't return it */ + return 0; +} + static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, loff_t file_offset) { @@ -5723,36 +5915,18 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, dip->disk_bytenr = (u64)bio->bi_sector << 9; bio->bi_private = dip; + dip->errors = 0; + dip->orig_bio = bio; + atomic_set(&dip->pending_bios, 0); if (write) bio->bi_end_io = btrfs_endio_direct_write; else bio->bi_end_io = btrfs_endio_direct_read; - ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); - if (ret) - goto out_err; - - if (write && !skip_sum) { - ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, - inode, rw, bio, 0, 0, - dip->logical_offset, - __btrfs_submit_bio_start_direct_io, - __btrfs_submit_bio_done); - if (ret) - goto out_err; + ret = btrfs_submit_direct_hook(rw, dip, skip_sum); + if (!ret) return; - } else if (!skip_sum) - btrfs_lookup_bio_sums_dio(root, inode, bio, - dip->logical_offset, dip->csums); - - ret = btrfs_map_bio(root, rw, bio, 0, 1); - if (ret) - goto out_err; - return; -out_err: - kfree(dip->csums); - kfree(dip); free_ordered: /* * If this is a write, we need to clean up the reserved space and kill @@ -6607,8 +6781,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, BUG_ON(ret); if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { - btrfs_log_new_name(trans, old_inode, old_dir, - new_dentry->d_parent); + struct dentry *parent = dget_parent(new_dentry); + btrfs_log_new_name(trans, old_inode, old_dir, parent); + dput(parent); btrfs_end_log_trans(root); } out_fail: @@ -6758,8 +6933,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, btrfs_set_trans_block_group(trans, dir); inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, - dentry->d_name.len, - dentry->d_parent->d_inode->i_ino, objectid, + dentry->d_name.len, dir->i_ino, objectid, BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, &index); err = PTR_ERR(inode); @@ -6773,7 +6947,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, } btrfs_set_trans_block_group(trans, inode); - err = btrfs_add_nondir(trans, dentry, inode, 0, index); + err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; else { @@ -6844,6 +7018,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; u64 cur_offset = start; + u64 i_size; int ret = 0; bool own_trans = true; @@ -6885,11 +7060,11 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, (actual_len > inode->i_size) && (cur_offset > inode->i_size)) { if (cur_offset > actual_len) - i_size_write(inode, actual_len); + i_size = actual_len; else - i_size_write(inode, cur_offset); - i_size_write(inode, cur_offset); - btrfs_ordered_update_i_size(inode, cur_offset, NULL); + i_size = cur_offset; + i_size_write(inode, i_size); + btrfs_ordered_update_i_size(inode, i_size, NULL); } ret = btrfs_update_inode(trans, root, inode); @@ -6943,6 +7118,10 @@ static long btrfs_fallocate(struct inode *inode, int mode, btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); mutex_lock(&inode->i_mutex); + ret = inode_newsize_ok(inode, alloc_end); + if (ret) + goto out; + if (alloc_start > inode->i_size) { ret = btrfs_cont_expand(inode, alloc_start); if (ret) @@ -7139,6 +7318,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, + .getattr = btrfs_getattr, .permission = btrfs_permission, .setxattr = btrfs_setxattr, .getxattr = btrfs_getxattr, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 463d91b4dd3..f1c9bb4079e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -233,7 +233,8 @@ static noinline int create_subvol(struct btrfs_root *root, struct btrfs_inode_item *inode_item; struct extent_buffer *leaf; struct btrfs_root *new_root; - struct inode *dir = dentry->d_parent->d_inode; + struct dentry *parent = dget_parent(dentry); + struct inode *dir; int ret; int err; u64 objectid; @@ -242,8 +243,13 @@ static noinline int create_subvol(struct btrfs_root *root, ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, 0, &objectid); - if (ret) + if (ret) { + dput(parent); return ret; + } + + dir = parent->d_inode; + /* * 1 - inode item * 2 - refs @@ -251,8 +257,10 @@ static noinline int create_subvol(struct btrfs_root *root, * 2 - dir items */ trans = btrfs_start_transaction(root, 6); - if (IS_ERR(trans)) + if (IS_ERR(trans)) { + dput(parent); return PTR_ERR(trans); + } leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, objectid, NULL, 0, 0, 0); @@ -339,6 +347,7 @@ static noinline int create_subvol(struct btrfs_root *root, d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); fail: + dput(parent); if (async_transid) { *async_transid = trans->transid; err = btrfs_commit_transaction_async(trans, root, 1); @@ -354,6 +363,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, char *name, int namelen, u64 *async_transid) { struct inode *inode; + struct dentry *parent; struct btrfs_pending_snapshot *pending_snapshot; struct btrfs_trans_handle *trans; int ret; @@ -396,7 +406,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, btrfs_orphan_cleanup(pending_snapshot->snap); - inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); + parent = dget_parent(dentry); + inode = btrfs_lookup_dentry(parent->d_inode, dentry); + dput(parent); if (IS_ERR(inode)) { ret = PTR_ERR(inode); goto fail; @@ -1669,12 +1681,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, olen = len = src->i_size - off; /* if we extend to eof, continue to block boundary */ if (off + len == src->i_size) - len = ((src->i_size + bs-1) & ~(bs-1)) - - off; + len = ALIGN(src->i_size, bs) - off; /* verify the end result is block aligned */ - if ((off & (bs-1)) || - ((off + len) & (bs-1))) + if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || + !IS_ALIGNED(destoff, bs)) goto out_unlock; /* do any pending delalloc/csum calc on src, one way or @@ -1874,8 +1885,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, * but shouldn't round up the file size */ endoff = new_key.offset + datal; - if (endoff > off+olen) - endoff = off+olen; + if (endoff > destoff+olen) + endoff = destoff+olen; if (endoff > inode->i_size) btrfs_i_size_write(inode, endoff); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index f4621f6deca..ae7737e352c 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -250,6 +250,73 @@ int btrfs_add_ordered_sum(struct inode *inode, /* * this is used to account for finished IO across a given range + * of the file. The IO may span ordered extents. If + * a given ordered_extent is completely done, 1 is returned, otherwise + * 0. + * + * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used + * to make sure this function only returns 1 once for a given ordered extent. + * + * file_offset is updated to one byte past the range that is recorded as + * complete. This allows you to walk forward in the file. + */ +int btrfs_dec_test_first_ordered_pending(struct inode *inode, + struct btrfs_ordered_extent **cached, + u64 *file_offset, u64 io_size) +{ + struct btrfs_ordered_inode_tree *tree; + struct rb_node *node; + struct btrfs_ordered_extent *entry = NULL; + int ret; + u64 dec_end; + u64 dec_start; + u64 to_dec; + + tree = &BTRFS_I(inode)->ordered_tree; + spin_lock(&tree->lock); + node = tree_search(tree, *file_offset); + if (!node) { + ret = 1; + goto out; + } + + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + if (!offset_in_entry(entry, *file_offset)) { + ret = 1; + goto out; + } + + dec_start = max(*file_offset, entry->file_offset); + dec_end = min(*file_offset + io_size, entry->file_offset + + entry->len); + *file_offset = dec_end; + if (dec_start > dec_end) { + printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n", + (unsigned long long)dec_start, + (unsigned long long)dec_end); + } + to_dec = dec_end - dec_start; + if (to_dec > entry->bytes_left) { + printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", + (unsigned long long)entry->bytes_left, + (unsigned long long)to_dec); + } + entry->bytes_left -= to_dec; + if (entry->bytes_left == 0) + ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); + else + ret = 1; +out: + if (!ret && cached && entry) { + *cached = entry; + atomic_inc(&entry->refs); + } + spin_unlock(&tree->lock); + return ret == 0; +} + +/* + * this is used to account for finished IO across a given range * of the file. The IO should not span ordered extents. If * a given ordered_extent is completely done, 1 is returned, otherwise * 0. diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 8ac365492a3..61dca83119d 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -141,6 +141,9 @@ int btrfs_remove_ordered_extent(struct inode *inode, int btrfs_dec_test_ordered_pending(struct inode *inode, struct btrfs_ordered_extent **cached, u64 file_offset, u64 io_size); +int btrfs_dec_test_first_ordered_pending(struct inode *inode, + struct btrfs_ordered_extent **cached, + u64 *file_offset, u64 io_size); int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, u64 start, u64 len, u64 disk_len, int type); int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8299a25ffc8..dbb51ea7a13 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -244,6 +244,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) case Opt_space_cache: printk(KERN_INFO "btrfs: enabling disk space caching\n"); btrfs_set_opt(info->mount_opt, SPACE_CACHE); + break; case Opt_clear_cache: printk(KERN_INFO "btrfs: force clearing of disk cache\n"); btrfs_set_opt(info->mount_opt, CLEAR_CACHE); @@ -562,12 +563,26 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) static int btrfs_test_super(struct super_block *s, void *data) { - struct btrfs_fs_devices *test_fs_devices = data; + struct btrfs_root *test_root = data; struct btrfs_root *root = btrfs_sb(s); - return root->fs_info->fs_devices == test_fs_devices; + /* + * If this super block is going away, return false as it + * can't match as an existing super block. + */ + if (!atomic_read(&s->s_active)) + return 0; + return root->fs_info->fs_devices == test_root->fs_info->fs_devices; +} + +static int btrfs_set_super(struct super_block *s, void *data) +{ + s->s_fs_info = data; + + return set_anon_super(s, data); } + /* * Find a superblock for the given device / mount point. * @@ -581,6 +596,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, struct super_block *s; struct dentry *root; struct btrfs_fs_devices *fs_devices = NULL; + struct btrfs_root *tree_root = NULL; + struct btrfs_fs_info *fs_info = NULL; fmode_t mode = FMODE_READ; char *subvol_name = NULL; u64 subvol_objectid = 0; @@ -608,8 +625,24 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, goto error_close_devices; } + /* + * Setup a dummy root and fs_info for test/set super. This is because + * we don't actually fill this stuff out until open_ctree, but we need + * it for searching for existing supers, so this lets us do that and + * then open_ctree will properly initialize everything later. + */ + fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS); + tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); + if (!fs_info || !tree_root) { + error = -ENOMEM; + goto error_close_devices; + } + fs_info->tree_root = tree_root; + fs_info->fs_devices = fs_devices; + tree_root->fs_info = fs_info; + bdev = fs_devices->latest_bdev; - s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); + s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root); if (IS_ERR(s)) goto error_s; @@ -675,6 +708,8 @@ error_s: error = PTR_ERR(s); error_close_devices: btrfs_close_devices(fs_devices); + kfree(fs_info); + kfree(tree_root); error_free_subvol_name: kfree(subvol_name); return ERR_PTR(error); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1fffbc017bd..f50e931fc21 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -902,6 +902,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *root = pending->root; struct btrfs_root *parent_root; struct inode *parent_inode; + struct dentry *parent; struct dentry *dentry; struct extent_buffer *tmp; struct extent_buffer *old; @@ -941,7 +942,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, trans->block_rsv = &pending->block_rsv; dentry = pending->dentry; - parent_inode = dentry->d_parent->d_inode; + parent = dget_parent(dentry); + parent_inode = parent->d_inode; parent_root = BTRFS_I(parent_inode)->root; record_root_in_trans(trans, parent_root); @@ -989,6 +991,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, parent_inode->i_ino, index, dentry->d_name.name, dentry->d_name.len); BUG_ON(ret); + dput(parent); key.offset = (u64)-1; pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index a29f19384a2..054744ac571 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2869,6 +2869,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, { int ret = 0; struct btrfs_root *root; + struct dentry *old_parent = NULL; /* * for regular files, if its inode is already on disk, we don't @@ -2910,10 +2911,13 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, if (IS_ROOT(parent)) break; - parent = parent->d_parent; + parent = dget_parent(parent); + dput(old_parent); + old_parent = parent; inode = parent->d_inode; } + dput(old_parent); out: return ret; } @@ -2945,6 +2949,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, { int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; struct super_block *sb; + struct dentry *old_parent = NULL; int ret = 0; u64 last_committed = root->fs_info->last_trans_committed; @@ -3016,10 +3021,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (IS_ROOT(parent)) break; - parent = parent->d_parent; + parent = dget_parent(parent); + dput(old_parent); + old_parent = parent; } ret = 0; end_trans: + dput(old_parent); if (ret < 0) { BUG_ON(ret != -ENOSPC); root->fs_info->last_trans_log_full_commit = trans->transid; @@ -3039,8 +3047,13 @@ end_no_trans: int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct dentry *dentry) { - return btrfs_log_inode_parent(trans, root, dentry->d_inode, - dentry->d_parent, 0); + struct dentry *parent = dget_parent(dentry); + int ret; + + ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0); + dput(parent); + + return ret; } /* diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 58a9b9998b4..f606baf9ba7 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -631,6 +631,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, struct fs_disk_quota *fdq) { struct inode *inode = &ip->i_inode; + struct gfs2_sbd *sdp = GFS2_SB(inode); struct address_space *mapping = inode->i_mapping; unsigned long index = loc >> PAGE_CACHE_SHIFT; unsigned offset = loc & (PAGE_CACHE_SIZE - 1); @@ -658,11 +659,11 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, qd->qd_qb.qb_value = qp->qu_value; if (fdq) { if (fdq->d_fieldmask & FS_DQ_BSOFT) { - qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit); + qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); qd->qd_qb.qb_warn = qp->qu_warn; } if (fdq->d_fieldmask & FS_DQ_BHARD) { - qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit); + qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); qd->qd_qb.qb_limit = qp->qu_limit; } } @@ -1497,9 +1498,9 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id, fdq->d_version = FS_DQUOT_VERSION; fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; fdq->d_id = id; - fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit); - fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn); - fdq->d_bcount = be64_to_cpu(qlvb->qb_value); + fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift; + fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift; + fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift; gfs2_glock_dq_uninit(&q_gh); out: @@ -1566,10 +1567,10 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, /* If nothing has changed, this is a no-op */ if ((fdq->d_fieldmask & FS_DQ_BSOFT) && - (fdq->d_blk_softlimit == be64_to_cpu(qd->qd_qb.qb_warn))) + ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn))) fdq->d_fieldmask ^= FS_DQ_BSOFT; if ((fdq->d_fieldmask & FS_DQ_BHARD) && - (fdq->d_blk_hardlimit == be64_to_cpu(qd->qd_qb.qb_limit))) + ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit))) fdq->d_fieldmask ^= FS_DQ_BHARD; if (fdq->d_fieldmask == 0) goto out_i; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index d369b533dc2..cb845c16ad7 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2047,6 +2047,7 @@ #define PCI_DEVICE_ID_AFAVLAB_P030 0x2182 #define PCI_SUBDEVICE_ID_AFAVLAB_P061 0x2150 +#define PCI_VENDOR_ID_BCM_GVC 0x14a4 #define PCI_VENDOR_ID_BROADCOM 0x14e4 #define PCI_DEVICE_ID_TIGON3_5752 0x1600 #define PCI_DEVICE_ID_TIGON3_5752M 0x1601 diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 90c9e2872f2..18e5c3f6758 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -10,6 +10,7 @@ extern void unix_inflight(struct file *fp); extern void unix_notinflight(struct file *fp); extern void unix_gc(void); extern void wait_for_unix_gc(void); +extern struct sock *unix_get_socket(struct file *filp); #define UNIX_HASH_SIZE 256 @@ -56,6 +57,7 @@ struct unix_sock { spinlock_t lock; unsigned int gc_candidate : 1; unsigned int gc_maybe_cycle : 1; + unsigned char recursion_level; struct socket_wq peer_wq; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) diff --git a/lib/debug_locks.c b/lib/debug_locks.c index 5bf0020b924..b1c17730767 100644 --- a/lib/debug_locks.c +++ b/lib/debug_locks.c @@ -8,7 +8,6 @@ * * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> */ -#include <linux/kernel.h> #include <linux/rwsem.h> #include <linux/mutex.h> #include <linux/module.h> @@ -39,7 +38,6 @@ int debug_locks_off(void) { if (__debug_locks_off()) { if (!debug_locks_silent) { - oops_in_progress = 1; console_verbose(); return 1; } diff --git a/net/ceph/Makefile b/net/ceph/Makefile index aab1cabb803..5f19415ec9c 100644 --- a/net/ceph/Makefile +++ b/net/ceph/Makefile @@ -1,9 +1,6 @@ # # Makefile for CEPH filesystem. # - -ifneq ($(KERNELRELEASE),) - obj-$(CONFIG_CEPH_LIB) += libceph.o libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ @@ -16,22 +13,3 @@ libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ ceph_fs.o ceph_strings.o ceph_hash.o \ pagevec.o -else -#Otherwise we were called directly from the command -# line; invoke the kernel build system. - -KERNELDIR ?= /lib/modules/$(shell uname -r)/build -PWD := $(shell pwd) - -default: all - -all: - $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules - -modules_install: - $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install - -clean: - $(MAKE) -C $(KERNELDIR) M=$(PWD) clean - -endif diff --git a/net/dccp/input.c b/net/dccp/input.c index 265985370fa..e424a09e83f 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -239,7 +239,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) dccp_update_gsr(sk, seqno); if (dh->dccph_type != DCCP_PKT_SYNC && - (ackno != DCCP_PKT_WITHOUT_ACK_SEQ)) + ackno != DCCP_PKT_WITHOUT_ACK_SEQ && + after48(ackno, dp->dccps_gar)) dp->dccps_gar = ackno; } else { unsigned long now = jiffies; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index a76b78de679..6f97268ed85 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1556,6 +1556,8 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us if (r_len > sizeof(struct linkinfo_dn)) r_len = sizeof(struct linkinfo_dn); + memset(&link, 0, sizeof(link)); + switch(sock->state) { case SS_CONNECTING: link.idn_linkstate = LL_CONNECTING; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index f8c1ae4b41f..13992e1d272 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -31,6 +31,7 @@ #include <linux/skbuff.h> #include <linux/udp.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include <net/sock.h> #include <net/inet_common.h> #include <linux/stat.h> @@ -276,12 +277,12 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, #endif #ifdef CONFIG_ECONET_AUNUDP struct msghdr udpmsg; - struct iovec iov[msg->msg_iovlen+1]; + struct iovec iov[2]; struct aunhdr ah; struct sockaddr_in udpdest; __kernel_size_t size; - int i; mm_segment_t oldfs; + char *userbuf; #endif /* @@ -297,23 +298,14 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, mutex_lock(&econet_mutex); - if (saddr == NULL) { - struct econet_sock *eo = ec_sk(sk); - - addr.station = eo->station; - addr.net = eo->net; - port = eo->port; - cb = eo->cb; - } else { - if (msg->msg_namelen < sizeof(struct sockaddr_ec)) { - mutex_unlock(&econet_mutex); - return -EINVAL; - } - addr.station = saddr->addr.station; - addr.net = saddr->addr.net; - port = saddr->port; - cb = saddr->cb; - } + if (saddr == NULL || msg->msg_namelen < sizeof(struct sockaddr_ec)) { + mutex_unlock(&econet_mutex); + return -EINVAL; + } + addr.station = saddr->addr.station; + addr.net = saddr->addr.net; + port = saddr->port; + cb = saddr->cb; /* Look for a device with the right network number. */ dev = net2dev_map[addr.net]; @@ -328,17 +320,17 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, } } - if (len + 15 > dev->mtu) { - mutex_unlock(&econet_mutex); - return -EMSGSIZE; - } - if (dev->type == ARPHRD_ECONET) { /* Real hardware Econet. We're not worthy etc. */ #ifdef CONFIG_ECONET_NATIVE unsigned short proto = 0; int res; + if (len + 15 > dev->mtu) { + mutex_unlock(&econet_mutex); + return -EMSGSIZE; + } + dev_hold(dev); skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev), @@ -351,7 +343,6 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, eb = (struct ec_cb *)&skb->cb; - /* BUG: saddr may be NULL */ eb->cookie = saddr->cookie; eb->sec = *saddr; eb->sent = ec_tx_done; @@ -415,6 +406,11 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, return -ENETDOWN; /* No socket - can't send */ } + if (len > 32768) { + err = -E2BIG; + goto error; + } + /* Make up a UDP datagram and hand it off to some higher intellect. */ memset(&udpdest, 0, sizeof(udpdest)); @@ -446,36 +442,26 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, /* tack our header on the front of the iovec */ size = sizeof(struct aunhdr); - /* - * XXX: that is b0rken. We can't mix userland and kernel pointers - * in iovec, since on a lot of platforms copy_from_user() will - * *not* work with the kernel and userland ones at the same time, - * regardless of what we do with set_fs(). And we are talking about - * econet-over-ethernet here, so "it's only ARM anyway" doesn't - * apply. Any suggestions on fixing that code? -- AV - */ iov[0].iov_base = (void *)&ah; iov[0].iov_len = size; - for (i = 0; i < msg->msg_iovlen; i++) { - void __user *base = msg->msg_iov[i].iov_base; - size_t iov_len = msg->msg_iov[i].iov_len; - /* Check it now since we switch to KERNEL_DS later. */ - if (!access_ok(VERIFY_READ, base, iov_len)) { - mutex_unlock(&econet_mutex); - return -EFAULT; - } - iov[i+1].iov_base = base; - iov[i+1].iov_len = iov_len; - size += iov_len; + + userbuf = vmalloc(len); + if (userbuf == NULL) { + err = -ENOMEM; + goto error; } + iov[1].iov_base = userbuf; + iov[1].iov_len = len; + err = memcpy_fromiovec(userbuf, msg->msg_iov, len); + if (err) + goto error_free_buf; + /* Get a skbuff (no data, just holds our cb information) */ if ((skb = sock_alloc_send_skb(sk, 0, msg->msg_flags & MSG_DONTWAIT, - &err)) == NULL) { - mutex_unlock(&econet_mutex); - return err; - } + &err)) == NULL) + goto error_free_buf; eb = (struct ec_cb *)&skb->cb; @@ -491,7 +477,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, udpmsg.msg_name = (void *)&udpdest; udpmsg.msg_namelen = sizeof(udpdest); udpmsg.msg_iov = &iov[0]; - udpmsg.msg_iovlen = msg->msg_iovlen + 1; + udpmsg.msg_iovlen = 2; udpmsg.msg_control = NULL; udpmsg.msg_controllen = 0; udpmsg.msg_flags=0; @@ -499,9 +485,13 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, oldfs = get_fs(); set_fs(KERNEL_DS); /* More privs :-) */ err = sock_sendmsg(udpsock, &udpmsg, size); set_fs(oldfs); + +error_free_buf: + vfree(userbuf); #else err = -EPROTOTYPE; #endif + error: mutex_unlock(&econet_mutex); return err; @@ -671,6 +661,9 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg) err = 0; switch (cmd) { case SIOCSIFADDR: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + edev = dev->ec_ptr; if (edev == NULL) { /* Magic up a new one. */ diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 1b344f30b46..3c0369a3a66 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -133,8 +133,7 @@ int __inet_inherit_port(struct sock *sk, struct sock *child) } } } - sk_add_bind_node(child, &tb->owners); - inet_csk(child)->icsk_bind_hash = tb; + inet_bind_hash(child, tb, port); spin_unlock(&head->lock); return 0; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e91911d7aae..1b4ec21497a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -26,6 +26,8 @@ static int zero; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; +static int tcp_adv_win_scale_min = -31; +static int tcp_adv_win_scale_max = 31; /* Update system visible IP port range */ static void set_local_port_range(int range[2]) @@ -426,7 +428,9 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_adv_win_scale, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_adv_win_scale_min, + .extra2 = &tcp_adv_win_scale_max, }, { .procname = "tcp_tw_reuse", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 08141996948..f15c36a706e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2246,7 +2246,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, /* Values greater than interface MTU won't take effect. However * at the point when this call is done we typically don't yet * know which interface is going to be used */ - if (val < 64 || val > MAX_TCP_WINDOW) { + if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) { err = -EINVAL; break; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 69ccbc1dde9..e13da6de1fc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2043,7 +2043,9 @@ get_req: } get_sk: sk_nulls_for_each_from(sk, node) { - if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) { + if (!net_eq(sock_net(sk), net)) + continue; + if (sk->sk_family == st->family) { cur = sk; goto out; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3c95304a081..2268e679812 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1343,9 +1343,25 @@ static void unix_destruct_scm(struct sk_buff *skb) sock_wfree(skb); } +#define MAX_RECURSION_LEVEL 4 + static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; + unsigned char max_level = 0; + int unix_sock_count = 0; + + for (i = scm->fp->count - 1; i >= 0; i--) { + struct sock *sk = unix_get_socket(scm->fp->fp[i]); + + if (sk) { + unix_sock_count++; + max_level = max(max_level, + unix_sk(sk)->recursion_level); + } + } + if (unlikely(max_level > MAX_RECURSION_LEVEL)) + return -ETOOMANYREFS; /* * Need to duplicate file references for the sake of garbage @@ -1356,9 +1372,11 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) if (!UNIXCB(skb).fp) return -ENOMEM; - for (i = scm->fp->count-1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); - return 0; + if (unix_sock_count) { + for (i = scm->fp->count - 1; i >= 0; i--) + unix_inflight(scm->fp->fp[i]); + } + return max_level; } static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) @@ -1393,6 +1411,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sk_buff *skb; long timeo; struct scm_cookie tmp_scm; + int max_level; if (NULL == siocb->scm) siocb->scm = &tmp_scm; @@ -1431,8 +1450,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; err = unix_scm_to_skb(siocb->scm, skb, true); - if (err) + if (err < 0) goto out_free; + max_level = err + 1; unix_get_secdata(siocb->scm, skb); skb_reset_transport_header(skb); @@ -1514,6 +1534,8 @@ restart: if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); skb_queue_tail(&other->sk_receive_queue, skb); + if (max_level > unix_sk(other)->recursion_level) + unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other, len); sock_put(other); @@ -1544,6 +1566,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, int sent = 0; struct scm_cookie tmp_scm; bool fds_sent = false; + int max_level; if (NULL == siocb->scm) siocb->scm = &tmp_scm; @@ -1607,10 +1630,11 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, /* Only send the fds in the first buffer */ err = unix_scm_to_skb(siocb->scm, skb, !fds_sent); - if (err) { + if (err < 0) { kfree_skb(skb); goto out_err; } + max_level = err + 1; fds_sent = true; err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); @@ -1626,6 +1650,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, goto pipe_err_free; skb_queue_tail(&other->sk_receive_queue, skb); + if (max_level > unix_sk(other)->recursion_level) + unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other, size); sent += size; @@ -1845,6 +1871,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, unix_state_lock(sk); skb = skb_dequeue(&sk->sk_receive_queue); if (skb == NULL) { + unix_sk(sk)->recursion_level = 0; if (copied >= target) goto unlock; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index c8df6fda0b1..f89f83bf828 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -96,7 +96,7 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait); unsigned int unix_tot_inflight; -static struct sock *unix_get_socket(struct file *filp) +struct sock *unix_get_socket(struct file *filp) { struct sock *u_sock = NULL; struct inode *inode = filp->f_path.dentry->d_inode; @@ -259,9 +259,16 @@ static void inc_inflight_move_tail(struct unix_sock *u) } static bool gc_in_progress = false; +#define UNIX_INFLIGHT_TRIGGER_GC 16000 void wait_for_unix_gc(void) { + /* + * If number of inflight sockets is insane, + * force a garbage collect right now. + */ + if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) + unix_gc(); wait_event(unix_gc_wait, gc_in_progress == false); } |