aboutsummaryrefslogtreecommitdiff
path: root/drivers/scsi/qla2xxx/qla_attr.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/scsi/qla2xxx/qla_attr.c')
-rw-r--r--drivers/scsi/qla2xxx/qla_attr.c1259
1 files changed, 1081 insertions, 178 deletions
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index ee9d4015243..16fe5196e6d 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -1,13 +1,15 @@
/*
* QLogic Fibre Channel HBA Driver
- * Copyright (c) 2003-2008 QLogic Corporation
+ * Copyright (c) 2003-2014 QLogic Corporation
*
* See LICENSE.qla2xxx for copyright and licensing details.
*/
#include "qla_def.h"
+#include "qla_target.h"
#include <linux/kthread.h>
#include <linux/vmalloc.h>
+#include <linux/slab.h>
#include <linux/delay.h>
static int qla24xx_vport_disable(struct fc_vport *, bool);
@@ -15,23 +17,40 @@ static int qla24xx_vport_disable(struct fc_vport *, bool);
/* SYSFS attributes --------------------------------------------------------- */
static ssize_t
-qla2x00_sysfs_read_fw_dump(struct kobject *kobj,
+qla2x00_sysfs_read_fw_dump(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t count)
{
struct scsi_qla_host *vha = shost_priv(dev_to_shost(container_of(kobj,
struct device, kobj)));
struct qla_hw_data *ha = vha->hw;
+ int rval = 0;
- if (ha->fw_dump_reading == 0)
+ if (!(ha->fw_dump_reading || ha->mctp_dump_reading))
return 0;
- return memory_read_from_buffer(buf, count, &off, ha->fw_dump,
+ if (IS_P3P_TYPE(ha)) {
+ if (off < ha->md_template_size) {
+ rval = memory_read_from_buffer(buf, count,
+ &off, ha->md_tmplt_hdr, ha->md_template_size);
+ return rval;
+ }
+ off -= ha->md_template_size;
+ rval = memory_read_from_buffer(buf, count,
+ &off, ha->md_dump, ha->md_dump_size);
+ return rval;
+ } else if (ha->mctp_dumped && ha->mctp_dump_reading)
+ return memory_read_from_buffer(buf, count, &off, ha->mctp_dump,
+ MCTP_DUMP_SIZE);
+ else if (ha->fw_dump_reading)
+ return memory_read_from_buffer(buf, count, &off, ha->fw_dump,
ha->fw_dump_len);
+ else
+ return 0;
}
static ssize_t
-qla2x00_sysfs_write_fw_dump(struct kobject *kobj,
+qla2x00_sysfs_write_fw_dump(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t count)
{
@@ -49,9 +68,13 @@ qla2x00_sysfs_write_fw_dump(struct kobject *kobj,
if (!ha->fw_dump_reading)
break;
- qla_printk(KERN_INFO, ha,
+ ql_log(ql_log_info, vha, 0x705d,
"Firmware dump cleared on (%ld).\n", vha->host_no);
+ if (IS_P3P_TYPE(ha)) {
+ qla82xx_md_free(vha);
+ qla82xx_md_prep(vha);
+ }
ha->fw_dump_reading = 0;
ha->fw_dumped = 0;
break;
@@ -59,7 +82,7 @@ qla2x00_sysfs_write_fw_dump(struct kobject *kobj,
if (ha->fw_dumped && !ha->fw_dump_reading) {
ha->fw_dump_reading = 1;
- qla_printk(KERN_INFO, ha,
+ ql_log(ql_log_info, vha, 0x705e,
"Raw firmware dump ready for read on (%ld).\n",
vha->host_no);
}
@@ -68,10 +91,49 @@ qla2x00_sysfs_write_fw_dump(struct kobject *kobj,
qla2x00_alloc_fw_dump(vha);
break;
case 3:
- qla2x00_system_error(vha);
+ if (IS_QLA82XX(ha)) {
+ qla82xx_idc_lock(ha);
+ qla82xx_set_reset_owner(vha);
+ qla82xx_idc_unlock(ha);
+ } else if (IS_QLA8044(ha)) {
+ qla8044_idc_lock(ha);
+ qla82xx_set_reset_owner(vha);
+ qla8044_idc_unlock(ha);
+ } else
+ qla2x00_system_error(vha);
+ break;
+ case 4:
+ if (IS_P3P_TYPE(ha)) {
+ if (ha->md_tmplt_hdr)
+ ql_dbg(ql_dbg_user, vha, 0x705b,
+ "MiniDump supported with this firmware.\n");
+ else
+ ql_dbg(ql_dbg_user, vha, 0x709d,
+ "MiniDump not supported with this firmware.\n");
+ }
+ break;
+ case 5:
+ if (IS_P3P_TYPE(ha))
+ set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+ break;
+ case 6:
+ if (!ha->mctp_dump_reading)
+ break;
+ ql_log(ql_log_info, vha, 0x70c1,
+ "MCTP dump cleared on (%ld).\n", vha->host_no);
+ ha->mctp_dump_reading = 0;
+ ha->mctp_dumped = 0;
+ break;
+ case 7:
+ if (ha->mctp_dumped && !ha->mctp_dump_reading) {
+ ha->mctp_dump_reading = 1;
+ ql_log(ql_log_info, vha, 0x70c2,
+ "Raw mctp dump ready for read on (%ld).\n",
+ vha->host_no);
+ }
break;
}
- return (count);
+ return count;
}
static struct bin_attribute sysfs_fw_dump_attr = {
@@ -85,7 +147,93 @@ static struct bin_attribute sysfs_fw_dump_attr = {
};
static ssize_t
-qla2x00_sysfs_read_nvram(struct kobject *kobj,
+qla2x00_sysfs_read_fw_dump_template(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t count)
+{
+ struct scsi_qla_host *vha = shost_priv(dev_to_shost(container_of(kobj,
+ struct device, kobj)));
+ struct qla_hw_data *ha = vha->hw;
+
+ if (!ha->fw_dump_template || !ha->fw_dump_template_len)
+ return 0;
+
+ ql_dbg(ql_dbg_user, vha, 0x70e2,
+ "chunk <- off=%llx count=%zx\n", off, count);
+ return memory_read_from_buffer(buf, count, &off,
+ ha->fw_dump_template, ha->fw_dump_template_len);
+}
+
+static ssize_t
+qla2x00_sysfs_write_fw_dump_template(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t count)
+{
+ struct scsi_qla_host *vha = shost_priv(dev_to_shost(container_of(kobj,
+ struct device, kobj)));
+ struct qla_hw_data *ha = vha->hw;
+ uint32_t size;
+
+ if (off == 0) {
+ if (ha->fw_dump)
+ vfree(ha->fw_dump);
+ if (ha->fw_dump_template)
+ vfree(ha->fw_dump_template);
+
+ ha->fw_dump = NULL;
+ ha->fw_dump_len = 0;
+ ha->fw_dump_template = NULL;
+ ha->fw_dump_template_len = 0;
+
+ size = qla27xx_fwdt_template_size(buf);
+ ql_dbg(ql_dbg_user, vha, 0x70d1,
+ "-> allocating fwdt (%x bytes)...\n", size);
+ ha->fw_dump_template = vmalloc(size);
+ if (!ha->fw_dump_template) {
+ ql_log(ql_log_warn, vha, 0x70d2,
+ "Failed allocate fwdt (%x bytes).\n", size);
+ return -ENOMEM;
+ }
+ ha->fw_dump_template_len = size;
+ }
+
+ if (off + count > ha->fw_dump_template_len) {
+ count = ha->fw_dump_template_len - off;
+ ql_dbg(ql_dbg_user, vha, 0x70d3,
+ "chunk -> truncating to %zx bytes.\n", count);
+ }
+
+ ql_dbg(ql_dbg_user, vha, 0x70d4,
+ "chunk -> off=%llx count=%zx\n", off, count);
+ memcpy(ha->fw_dump_template + off, buf, count);
+
+ if (off + count == ha->fw_dump_template_len) {
+ size = qla27xx_fwdt_calculate_dump_size(vha);
+ ql_dbg(ql_dbg_user, vha, 0x70d5,
+ "-> allocating fwdump (%x bytes)...\n", size);
+ ha->fw_dump = vmalloc(size);
+ if (!ha->fw_dump) {
+ ql_log(ql_log_warn, vha, 0x70d6,
+ "Failed allocate fwdump (%x bytes).\n", size);
+ return -ENOMEM;
+ }
+ ha->fw_dump_len = size;
+ }
+
+ return count;
+}
+static struct bin_attribute sysfs_fw_dump_template_attr = {
+ .attr = {
+ .name = "fw_dump_template",
+ .mode = S_IRUSR | S_IWUSR,
+ },
+ .size = 0,
+ .read = qla2x00_sysfs_read_fw_dump_template,
+ .write = qla2x00_sysfs_write_fw_dump_template,
+};
+
+static ssize_t
+qla2x00_sysfs_read_nvram(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t count)
{
@@ -96,13 +244,15 @@ qla2x00_sysfs_read_nvram(struct kobject *kobj,
if (!capable(CAP_SYS_ADMIN))
return 0;
- /* Read NVRAM data from cache. */
+ if (IS_NOCACHE_VPD_TYPE(ha))
+ ha->isp_ops->read_optrom(vha, ha->nvram, ha->flt_region_nvram << 2,
+ ha->nvram_size);
return memory_read_from_buffer(buf, count, &off, ha->nvram,
ha->nvram_size);
}
static ssize_t
-qla2x00_sysfs_write_nvram(struct kobject *kobj,
+qla2x00_sysfs_write_nvram(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t count)
{
@@ -111,8 +261,9 @@ qla2x00_sysfs_write_nvram(struct kobject *kobj,
struct qla_hw_data *ha = vha->hw;
uint16_t cnt;
- if (!capable(CAP_SYS_ADMIN) || off != 0 || count != ha->nvram_size)
- return 0;
+ if (!capable(CAP_SYS_ADMIN) || off != 0 || count != ha->nvram_size ||
+ !ha->isp_ops->write_nvram)
+ return -EINVAL;
/* Checksum NVRAM. */
if (IS_FWI2_CAPABLE(ha)) {
@@ -137,14 +288,25 @@ qla2x00_sysfs_write_nvram(struct kobject *kobj,
*iter = chksum;
}
+ if (qla2x00_wait_for_hba_online(vha) != QLA_SUCCESS) {
+ ql_log(ql_log_warn, vha, 0x705f,
+ "HBA not online, failing NVRAM update.\n");
+ return -EAGAIN;
+ }
+
/* Write NVRAM. */
ha->isp_ops->write_nvram(vha, (uint8_t *)buf, ha->nvram_base, count);
ha->isp_ops->read_nvram(vha, (uint8_t *)ha->nvram, ha->nvram_base,
count);
+ ql_dbg(ql_dbg_user, vha, 0x7060,
+ "Setting ISP_ABORT_NEEDED\n");
+ /* NVRAM settings take effect immediately. */
set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+ qla2xxx_wake_dpc(vha);
+ qla2x00_wait_for_chip_reset(vha);
- return (count);
+ return count;
}
static struct bin_attribute sysfs_nvram_attr = {
@@ -158,23 +320,28 @@ static struct bin_attribute sysfs_nvram_attr = {
};
static ssize_t
-qla2x00_sysfs_read_optrom(struct kobject *kobj,
+qla2x00_sysfs_read_optrom(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t count)
{
struct scsi_qla_host *vha = shost_priv(dev_to_shost(container_of(kobj,
struct device, kobj)));
struct qla_hw_data *ha = vha->hw;
+ ssize_t rval = 0;
if (ha->optrom_state != QLA_SREADING)
return 0;
- return memory_read_from_buffer(buf, count, &off, ha->optrom_buffer,
- ha->optrom_region_size);
+ mutex_lock(&ha->optrom_mutex);
+ rval = memory_read_from_buffer(buf, count, &off, ha->optrom_buffer,
+ ha->optrom_region_size);
+ mutex_unlock(&ha->optrom_mutex);
+
+ return rval;
}
static ssize_t
-qla2x00_sysfs_write_optrom(struct kobject *kobj,
+qla2x00_sysfs_write_optrom(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t count)
{
@@ -189,7 +356,9 @@ qla2x00_sysfs_write_optrom(struct kobject *kobj,
if (off + count > ha->optrom_region_size)
count = ha->optrom_region_size - off;
+ mutex_lock(&ha->optrom_mutex);
memcpy(&ha->optrom_buffer[off], buf, count);
+ mutex_unlock(&ha->optrom_mutex);
return count;
}
@@ -205,44 +374,51 @@ static struct bin_attribute sysfs_optrom_attr = {
};
static ssize_t
-qla2x00_sysfs_write_optrom_ctl(struct kobject *kobj,
+qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t count)
{
struct scsi_qla_host *vha = shost_priv(dev_to_shost(container_of(kobj,
struct device, kobj)));
struct qla_hw_data *ha = vha->hw;
-
uint32_t start = 0;
uint32_t size = ha->optrom_size;
int val, valid;
+ ssize_t rval = count;
if (off)
- return 0;
+ return -EINVAL;
+
+ if (unlikely(pci_channel_offline(ha->pdev)))
+ return -EAGAIN;
if (sscanf(buf, "%d:%x:%x", &val, &start, &size) < 1)
return -EINVAL;
if (start > ha->optrom_size)
return -EINVAL;
+ mutex_lock(&ha->optrom_mutex);
switch (val) {
case 0:
if (ha->optrom_state != QLA_SREADING &&
- ha->optrom_state != QLA_SWRITING)
- break;
-
+ ha->optrom_state != QLA_SWRITING) {
+ rval = -EINVAL;
+ goto out;
+ }
ha->optrom_state = QLA_SWAITING;
- DEBUG2(qla_printk(KERN_INFO, ha,
+ ql_dbg(ql_dbg_user, vha, 0x7061,
"Freeing flash region allocation -- 0x%x bytes.\n",
- ha->optrom_region_size));
+ ha->optrom_region_size);
vfree(ha->optrom_buffer);
ha->optrom_buffer = NULL;
break;
case 1:
- if (ha->optrom_state != QLA_SWAITING)
- break;
+ if (ha->optrom_state != QLA_SWAITING) {
+ rval = -EINVAL;
+ goto out;
+ }
ha->optrom_region_start = start;
ha->optrom_region_size = start + size > ha->optrom_size ?
@@ -251,25 +427,35 @@ qla2x00_sysfs_write_optrom_ctl(struct kobject *kobj,
ha->optrom_state = QLA_SREADING;
ha->optrom_buffer = vmalloc(ha->optrom_region_size);
if (ha->optrom_buffer == NULL) {
- qla_printk(KERN_WARNING, ha,
+ ql_log(ql_log_warn, vha, 0x7062,
"Unable to allocate memory for optrom retrieval "
"(%x).\n", ha->optrom_region_size);
ha->optrom_state = QLA_SWAITING;
- return count;
+ rval = -ENOMEM;
+ goto out;
}
- DEBUG2(qla_printk(KERN_INFO, ha,
+ if (qla2x00_wait_for_hba_online(vha) != QLA_SUCCESS) {
+ ql_log(ql_log_warn, vha, 0x7063,
+ "HBA not online, failing NVRAM update.\n");
+ rval = -EAGAIN;
+ goto out;
+ }
+
+ ql_dbg(ql_dbg_user, vha, 0x7064,
"Reading flash region -- 0x%x/0x%x.\n",
- ha->optrom_region_start, ha->optrom_region_size));
+ ha->optrom_region_start, ha->optrom_region_size);
memset(ha->optrom_buffer, 0, ha->optrom_region_size);
ha->isp_ops->read_optrom(vha, ha->optrom_buffer,
ha->optrom_region_start, ha->optrom_region_size);
break;
case 2:
- if (ha->optrom_state != QLA_SWAITING)
- break;
+ if (ha->optrom_state != QLA_SWAITING) {
+ rval = -EINVAL;
+ goto out;
+ }
/*
* We need to be more restrictive on which FLASH regions are
@@ -297,12 +483,14 @@ qla2x00_sysfs_write_optrom_ctl(struct kobject *kobj,
else if (start == (ha->flt_region_boot * 4) ||
start == (ha->flt_region_fw * 4))
valid = 1;
- else if (IS_QLA25XX(ha) || IS_QLA81XX(ha))
- valid = 1;
+ else if (IS_QLA24XX_TYPE(ha) || IS_QLA25XX(ha)
+ || IS_CNA_CAPABLE(ha) || IS_QLA2031(ha))
+ valid = 1;
if (!valid) {
- qla_printk(KERN_WARNING, ha,
+ ql_log(ql_log_warn, vha, 0x7065,
"Invalid start region 0x%x/0x%x.\n", start, size);
- return -EINVAL;
+ rval = -EINVAL;
+ goto out;
}
ha->optrom_region_start = start;
@@ -312,35 +500,48 @@ qla2x00_sysfs_write_optrom_ctl(struct kobject *kobj,
ha->optrom_state = QLA_SWRITING;
ha->optrom_buffer = vmalloc(ha->optrom_region_size);
if (ha->optrom_buffer == NULL) {
- qla_printk(KERN_WARNING, ha,
+ ql_log(ql_log_warn, vha, 0x7066,
"Unable to allocate memory for optrom update "
- "(%x).\n", ha->optrom_region_size);
+ "(%x)\n", ha->optrom_region_size);
ha->optrom_state = QLA_SWAITING;
- return count;
+ rval = -ENOMEM;
+ goto out;
}
- DEBUG2(qla_printk(KERN_INFO, ha,
+ ql_dbg(ql_dbg_user, vha, 0x7067,
"Staging flash region write -- 0x%x/0x%x.\n",
- ha->optrom_region_start, ha->optrom_region_size));
+ ha->optrom_region_start, ha->optrom_region_size);
memset(ha->optrom_buffer, 0, ha->optrom_region_size);
break;
case 3:
- if (ha->optrom_state != QLA_SWRITING)
- break;
+ if (ha->optrom_state != QLA_SWRITING) {
+ rval = -EINVAL;
+ goto out;
+ }
+
+ if (qla2x00_wait_for_hba_online(vha) != QLA_SUCCESS) {
+ ql_log(ql_log_warn, vha, 0x7068,
+ "HBA not online, failing flash update.\n");
+ rval = -EAGAIN;
+ goto out;
+ }
- DEBUG2(qla_printk(KERN_INFO, ha,
+ ql_dbg(ql_dbg_user, vha, 0x7069,
"Writing flash region -- 0x%x/0x%x.\n",
- ha->optrom_region_start, ha->optrom_region_size));
+ ha->optrom_region_start, ha->optrom_region_size);
ha->isp_ops->write_optrom(vha, ha->optrom_buffer,
ha->optrom_region_start, ha->optrom_region_size);
break;
default:
- count = -EINVAL;
+ rval = -EINVAL;
}
- return count;
+
+out:
+ mutex_unlock(&ha->optrom_mutex);
+ return rval;
}
static struct bin_attribute sysfs_optrom_ctl_attr = {
@@ -353,7 +554,7 @@ static struct bin_attribute sysfs_optrom_ctl_attr = {
};
static ssize_t
-qla2x00_sysfs_read_vpd(struct kobject *kobj,
+qla2x00_sysfs_read_vpd(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t count)
{
@@ -361,29 +562,58 @@ qla2x00_sysfs_read_vpd(struct kobject *kobj,
struct device, kobj)));
struct qla_hw_data *ha = vha->hw;
+ if (unlikely(pci_channel_offline(ha->pdev)))
+ return -EAGAIN;
+
if (!capable(CAP_SYS_ADMIN))
- return 0;
+ return -EINVAL;
- /* Read NVRAM data from cache. */
+ if (IS_NOCACHE_VPD_TYPE(ha))
+ ha->isp_ops->read_optrom(vha, ha->vpd, ha->flt_region_vpd << 2,
+ ha->vpd_size);
return memory_read_from_buffer(buf, count, &off, ha->vpd, ha->vpd_size);
}
static ssize_t
-qla2x00_sysfs_write_vpd(struct kobject *kobj,
+qla2x00_sysfs_write_vpd(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t count)
{
struct scsi_qla_host *vha = shost_priv(dev_to_shost(container_of(kobj,
struct device, kobj)));
struct qla_hw_data *ha = vha->hw;
+ uint8_t *tmp_data;
- if (!capable(CAP_SYS_ADMIN) || off != 0 || count != ha->vpd_size)
+ if (unlikely(pci_channel_offline(ha->pdev)))
return 0;
+ if (!capable(CAP_SYS_ADMIN) || off != 0 || count != ha->vpd_size ||
+ !ha->isp_ops->write_nvram)
+ return 0;
+
+ if (qla2x00_wait_for_hba_online(vha) != QLA_SUCCESS) {
+ ql_log(ql_log_warn, vha, 0x706a,
+ "HBA not online, failing VPD update.\n");
+ return -EAGAIN;
+ }
+
/* Write NVRAM. */
ha->isp_ops->write_nvram(vha, (uint8_t *)buf, ha->vpd_base, count);
ha->isp_ops->read_nvram(vha, (uint8_t *)ha->vpd, ha->vpd_base, count);
+ /* Update flash version information for 4Gb & above. */
+ if (!IS_FWI2_CAPABLE(ha))
+ return -EINVAL;
+
+ tmp_data = vmalloc(256);
+ if (!tmp_data) {
+ ql_log(ql_log_warn, vha, 0x706b,
+ "Unable to allocate memory for VPD information update.\n");
+ return -ENOMEM;
+ }
+ ha->isp_ops->get_flash_version(vha, tmp_data);
+ vfree(tmp_data);
+
return count;
}
@@ -398,7 +628,7 @@ static struct bin_attribute sysfs_vpd_attr = {
};
static ssize_t
-qla2x00_sysfs_read_sfp(struct kobject *kobj,
+qla2x00_sysfs_read_sfp(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t count)
{
@@ -417,7 +647,7 @@ qla2x00_sysfs_read_sfp(struct kobject *kobj,
ha->sfp_data = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL,
&ha->sfp_data_dma);
if (!ha->sfp_data) {
- qla_printk(KERN_WARNING, ha,
+ ql_log(ql_log_warn, vha, 0x706c,
"Unable to allocate memory for SFP read-data.\n");
return 0;
}
@@ -433,14 +663,14 @@ do_read:
offset = 0;
}
- rval = qla2x00_read_sfp(vha, ha->sfp_data_dma, addr, offset,
- SFP_BLOCK_SIZE);
+ rval = qla2x00_read_sfp(vha, ha->sfp_data_dma, ha->sfp_data,
+ addr, offset, SFP_BLOCK_SIZE, BIT_1);
if (rval != QLA_SUCCESS) {
- qla_printk(KERN_WARNING, ha,
+ ql_log(ql_log_warn, vha, 0x706d,
"Unable to read SFP data (%x/%x/%x).\n", rval,
addr, offset);
- count = 0;
- break;
+
+ return -EIO;
}
memcpy(buf, ha->sfp_data, SFP_BLOCK_SIZE);
buf += SFP_BLOCK_SIZE;
@@ -458,17 +688,258 @@ static struct bin_attribute sysfs_sfp_attr = {
.read = qla2x00_sysfs_read_sfp,
};
+static ssize_t
+qla2x00_sysfs_write_reset(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t count)
+{
+ struct scsi_qla_host *vha = shost_priv(dev_to_shost(container_of(kobj,
+ struct device, kobj)));
+ struct qla_hw_data *ha = vha->hw;
+ struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
+ int type;
+ uint32_t idc_control;
+ uint8_t *tmp_data = NULL;
+ if (off != 0)
+ return -EINVAL;
+
+ type = simple_strtol(buf, NULL, 10);
+ switch (type) {
+ case 0x2025c:
+ ql_log(ql_log_info, vha, 0x706e,
+ "Issuing ISP reset.\n");
+
+ scsi_block_requests(vha->host);
+ if (IS_QLA82XX(ha)) {
+ ha->flags.isp82xx_no_md_cap = 1;
+ qla82xx_idc_lock(ha);
+ qla82xx_set_reset_owner(vha);
+ qla82xx_idc_unlock(ha);
+ } else if (IS_QLA8044(ha)) {
+ qla8044_idc_lock(ha);
+ idc_control = qla8044_rd_reg(ha,
+ QLA8044_IDC_DRV_CTRL);
+ qla8044_wr_reg(ha, QLA8044_IDC_DRV_CTRL,
+ (idc_control | GRACEFUL_RESET_BIT1));
+ qla82xx_set_reset_owner(vha);
+ qla8044_idc_unlock(ha);
+ } else {
+ set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+ qla2xxx_wake_dpc(vha);
+ }
+ qla2x00_wait_for_chip_reset(vha);
+ scsi_unblock_requests(vha->host);
+ break;
+ case 0x2025d:
+ if (!IS_QLA81XX(ha) && !IS_QLA83XX(ha))
+ return -EPERM;
+
+ ql_log(ql_log_info, vha, 0x706f,
+ "Issuing MPI reset.\n");
+
+ if (IS_QLA83XX(ha)) {
+ uint32_t idc_control;
+
+ qla83xx_idc_lock(vha, 0);
+ __qla83xx_get_idc_control(vha, &idc_control);
+ idc_control |= QLA83XX_IDC_GRACEFUL_RESET;
+ __qla83xx_set_idc_control(vha, idc_control);
+ qla83xx_wr_reg(vha, QLA83XX_IDC_DEV_STATE,
+ QLA8XXX_DEV_NEED_RESET);
+ qla83xx_idc_audit(vha, IDC_AUDIT_TIMESTAMP);
+ qla83xx_idc_unlock(vha, 0);
+ break;
+ } else {
+ /* Make sure FC side is not in reset */
+ qla2x00_wait_for_hba_online(vha);
+
+ /* Issue MPI reset */
+ scsi_block_requests(vha->host);
+ if (qla81xx_restart_mpi_firmware(vha) != QLA_SUCCESS)
+ ql_log(ql_log_warn, vha, 0x7070,
+ "MPI reset failed.\n");
+ scsi_unblock_requests(vha->host);
+ break;
+ }
+ case 0x2025e:
+ if (!IS_P3P_TYPE(ha) || vha != base_vha) {
+ ql_log(ql_log_info, vha, 0x7071,
+ "FCoE ctx reset no supported.\n");
+ return -EPERM;
+ }
+
+ ql_log(ql_log_info, vha, 0x7072,
+ "Issuing FCoE ctx reset.\n");
+ set_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags);
+ qla2xxx_wake_dpc(vha);
+ qla2x00_wait_for_fcoe_ctx_reset(vha);
+ break;
+ case 0x2025f:
+ if (!IS_QLA8031(ha))
+ return -EPERM;
+ ql_log(ql_log_info, vha, 0x70bc,
+ "Disabling Reset by IDC control\n");
+ qla83xx_idc_lock(vha, 0);
+ __qla83xx_get_idc_control(vha, &idc_control);
+ idc_control |= QLA83XX_IDC_RESET_DISABLED;
+ __qla83xx_set_idc_control(vha, idc_control);
+ qla83xx_idc_unlock(vha, 0);
+ break;
+ case 0x20260:
+ if (!IS_QLA8031(ha))
+ return -EPERM;
+ ql_log(ql_log_info, vha, 0x70bd,
+ "Enabling Reset by IDC control\n");
+ qla83xx_idc_lock(vha, 0);
+ __qla83xx_get_idc_control(vha, &idc_control);
+ idc_control &= ~QLA83XX_IDC_RESET_DISABLED;
+ __qla83xx_set_idc_control(vha, idc_control);
+ qla83xx_idc_unlock(vha, 0);
+ break;
+ case 0x20261:
+ ql_dbg(ql_dbg_user, vha, 0x70e0,
+ "Updating cache versions without reset ");
+
+ tmp_data = vmalloc(256);
+ if (!tmp_data) {
+ ql_log(ql_log_warn, vha, 0x70e1,
+ "Unable to allocate memory for VPD information update.\n");
+ return -ENOMEM;
+ }
+ ha->isp_ops->get_flash_version(vha, tmp_data);
+ vfree(tmp_data);
+ break;
+ }
+ return count;
+}
+
+static struct bin_attribute sysfs_reset_attr = {
+ .attr = {
+ .name = "reset",
+ .mode = S_IWUSR,
+ },
+ .size = 0,
+ .write = qla2x00_sysfs_write_reset,
+};
+
+static ssize_t
+qla2x00_sysfs_read_xgmac_stats(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t count)
+{
+ struct scsi_qla_host *vha = shost_priv(dev_to_shost(container_of(kobj,
+ struct device, kobj)));
+ struct qla_hw_data *ha = vha->hw;
+ int rval;
+ uint16_t actual_size;
+
+ if (!capable(CAP_SYS_ADMIN) || off != 0 || count > XGMAC_DATA_SIZE)
+ return 0;
+
+ if (ha->xgmac_data)
+ goto do_read;
+
+ ha->xgmac_data = dma_alloc_coherent(&ha->pdev->dev, XGMAC_DATA_SIZE,
+ &ha->xgmac_data_dma, GFP_KERNEL);
+ if (!ha->xgmac_data) {
+ ql_log(ql_log_warn, vha, 0x7076,
+ "Unable to allocate memory for XGMAC read-data.\n");
+ return 0;
+ }
+
+do_read:
+ actual_size = 0;
+ memset(ha->xgmac_data, 0, XGMAC_DATA_SIZE);
+
+ rval = qla2x00_get_xgmac_stats(vha, ha->xgmac_data_dma,
+ XGMAC_DATA_SIZE, &actual_size);
+ if (rval != QLA_SUCCESS) {
+ ql_log(ql_log_warn, vha, 0x7077,
+ "Unable to read XGMAC data (%x).\n", rval);
+ count = 0;
+ }
+
+ count = actual_size > count ? count: actual_size;
+ memcpy(buf, ha->xgmac_data, count);
+
+ return count;
+}
+
+static struct bin_attribute sysfs_xgmac_stats_attr = {
+ .attr = {
+ .name = "xgmac_stats",
+ .mode = S_IRUSR,
+ },
+ .size = 0,
+ .read = qla2x00_sysfs_read_xgmac_stats,
+};
+
+static ssize_t
+qla2x00_sysfs_read_dcbx_tlv(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t count)
+{
+ struct scsi_qla_host *vha = shost_priv(dev_to_shost(container_of(kobj,
+ struct device, kobj)));
+ struct qla_hw_data *ha = vha->hw;
+ int rval;
+ uint16_t actual_size;
+
+ if (!capable(CAP_SYS_ADMIN) || off != 0 || count > DCBX_TLV_DATA_SIZE)
+ return 0;
+
+ if (ha->dcbx_tlv)
+ goto do_read;
+
+ ha->dcbx_tlv = dma_alloc_coherent(&ha->pdev->dev, DCBX_TLV_DATA_SIZE,
+ &ha->dcbx_tlv_dma, GFP_KERNEL);
+ if (!ha->dcbx_tlv) {
+ ql_log(ql_log_warn, vha, 0x7078,
+ "Unable to allocate memory for DCBX TLV read-data.\n");
+ return -ENOMEM;
+ }
+
+do_read:
+ actual_size = 0;
+ memset(ha->dcbx_tlv, 0, DCBX_TLV_DATA_SIZE);
+
+ rval = qla2x00_get_dcbx_params(vha, ha->dcbx_tlv_dma,
+ DCBX_TLV_DATA_SIZE);
+ if (rval != QLA_SUCCESS) {
+ ql_log(ql_log_warn, vha, 0x7079,
+ "Unable to read DCBX TLV (%x).\n", rval);
+ return -EIO;
+ }
+
+ memcpy(buf, ha->dcbx_tlv, count);
+
+ return count;
+}
+
+static struct bin_attribute sysfs_dcbx_tlv_attr = {
+ .attr = {
+ .name = "dcbx_tlv",
+ .mode = S_IRUSR,
+ },
+ .size = 0,
+ .read = qla2x00_sysfs_read_dcbx_tlv,
+};
+
static struct sysfs_entry {
char *name;
struct bin_attribute *attr;
int is4GBp_only;
} bin_file_entries[] = {
{ "fw_dump", &sysfs_fw_dump_attr, },
+ { "fw_dump_template", &sysfs_fw_dump_template_attr, 0x27 },
{ "nvram", &sysfs_nvram_attr, },
{ "optrom", &sysfs_optrom_attr, },
{ "optrom_ctl", &sysfs_optrom_ctl_attr, },
{ "vpd", &sysfs_vpd_attr, 1 },
{ "sfp", &sysfs_sfp_attr, 1 },
+ { "reset", &sysfs_reset_attr, },
+ { "xgmac_stats", &sysfs_xgmac_stats_attr, 3 },
+ { "dcbx_tlv", &sysfs_dcbx_tlv_attr, 3 },
{ NULL },
};
@@ -482,18 +953,28 @@ qla2x00_alloc_sysfs_attr(scsi_qla_host_t *vha)
for (iter = bin_file_entries; iter->name; iter++) {
if (iter->is4GBp_only && !IS_FWI2_CAPABLE(vha->hw))
continue;
+ if (iter->is4GBp_only == 2 && !IS_QLA25XX(vha->hw))
+ continue;
+ if (iter->is4GBp_only == 3 && !(IS_CNA_CAPABLE(vha->hw)))
+ continue;
+ if (iter->is4GBp_only == 0x27 && !IS_QLA27XX(vha->hw))
+ continue;
ret = sysfs_create_bin_file(&host->shost_gendev.kobj,
iter->attr);
if (ret)
- qla_printk(KERN_INFO, vha->hw,
- "Unable to create sysfs %s binary attribute "
- "(%d).\n", iter->name, ret);
+ ql_log(ql_log_warn, vha, 0x00f3,
+ "Unable to create sysfs %s binary attribute (%d).\n",
+ iter->name, ret);
+ else
+ ql_dbg(ql_dbg_init, vha, 0x00f4,
+ "Successfully created sysfs %s binary attribure.\n",
+ iter->name);
}
}
void
-qla2x00_free_sysfs_attr(scsi_qla_host_t *vha)
+qla2x00_free_sysfs_attr(scsi_qla_host_t *vha, bool stop_beacon)
{
struct Scsi_Host *host = vha->host;
struct sysfs_entry *iter;
@@ -502,12 +983,16 @@ qla2x00_free_sysfs_attr(scsi_qla_host_t *vha)
for (iter = bin_file_entries; iter->name; iter++) {
if (iter->is4GBp_only && !IS_FWI2_CAPABLE(ha))
continue;
+ if (iter->is4GBp_only == 2 && !IS_QLA25XX(ha))
+ continue;
+ if (iter->is4GBp_only == 3 && !(IS_CNA_CAPABLE(vha->hw)))
+ continue;
sysfs_remove_bin_file(&host->shost_gendev.kobj,
iter->attr);
}
- if (ha->beacon_blink_led == 1)
+ if (stop_beacon && ha->beacon_blink_led == 1)
ha->isp_ops->beacon_off(vha);
}
@@ -517,7 +1002,7 @@ static ssize_t
qla2x00_drvr_version_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%s\n", qla2x00_version_str);
+ return scnprintf(buf, PAGE_SIZE, "%s\n", qla2x00_version_str);
}
static ssize_t
@@ -528,7 +1013,7 @@ qla2x00_fw_version_show(struct device *dev,
struct qla_hw_data *ha = vha->hw;
char fw_str[128];
- return snprintf(buf, PAGE_SIZE, "%s\n",
+ return scnprintf(buf, PAGE_SIZE, "%s\n",
ha->isp_ops->fw_version_str(vha, fw_str));
}
@@ -540,13 +1025,16 @@ qla2x00_serial_num_show(struct device *dev, struct device_attribute *attr,
struct qla_hw_data *ha = vha->hw;
uint32_t sn;
- if (IS_FWI2_CAPABLE(ha)) {
- qla2xxx_get_vpd_field(vha, "SN", buf, PAGE_SIZE);
- return snprintf(buf, PAGE_SIZE, "%s\n", buf);
+ if (IS_QLAFX00(vha->hw)) {
+ return scnprintf(buf, PAGE_SIZE, "%s\n",
+ vha->hw->mr.serial_num);
+ } else if (IS_FWI2_CAPABLE(ha)) {
+ qla2xxx_get_vpd_field(vha, "SN", buf, PAGE_SIZE - 1);
+ return strlen(strcat(buf, "\n"));
}
sn = ((ha->serial0 & 0x1f) << 16) | (ha->serial2 << 8) | ha->serial1;
- return snprintf(buf, PAGE_SIZE, "%c%05d\n", 'A' + sn / 100000,
+ return scnprintf(buf, PAGE_SIZE, "%c%05d\n", 'A' + sn / 100000,
sn % 100000);
}
@@ -555,7 +1043,7 @@ qla2x00_isp_name_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
- return snprintf(buf, PAGE_SIZE, "ISP%04X\n", vha->hw->pdev->device);
+ return scnprintf(buf, PAGE_SIZE, "ISP%04X\n", vha->hw->pdev->device);
}
static ssize_t
@@ -564,7 +1052,12 @@ qla2x00_isp_id_show(struct device *dev, struct device_attribute *attr,
{
scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
struct qla_hw_data *ha = vha->hw;
- return snprintf(buf, PAGE_SIZE, "%04x %04x %04x %04x\n",
+
+ if (IS_QLAFX00(vha->hw))
+ return scnprintf(buf, PAGE_SIZE, "%s\n",
+ vha->hw->mr.hw_version);
+
+ return scnprintf(buf, PAGE_SIZE, "%04x %04x %04x %04x\n",
ha->product_id[0], ha->product_id[1], ha->product_id[2],
ha->product_id[3]);
}
@@ -574,7 +1067,8 @@ qla2x00_model_name_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
- return snprintf(buf, PAGE_SIZE, "%s\n", vha->hw->model_number);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n", vha->hw->model_number);
}
static ssize_t
@@ -582,7 +1076,7 @@ qla2x00_model_desc_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
- return snprintf(buf, PAGE_SIZE, "%s\n",
+ return scnprintf(buf, PAGE_SIZE, "%s\n",
vha->hw->model_desc ? vha->hw->model_desc : "");
}
@@ -593,7 +1087,7 @@ qla2x00_pci_info_show(struct device *dev, struct device_attribute *attr,
scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
char pci_info[30];
- return snprintf(buf, PAGE_SIZE, "%s\n",
+ return scnprintf(buf, PAGE_SIZE, "%s\n",
vha->hw->isp_ops->pci_info_str(vha, pci_info));
}
@@ -606,31 +1100,31 @@ qla2x00_link_state_show(struct device *dev, struct device_attribute *attr,
int len = 0;
if (atomic_read(&vha->loop_state) == LOOP_DOWN ||
- atomic_read(&vha->loop_state) == LOOP_DEAD)
- len = snprintf(buf, PAGE_SIZE, "Link Down\n");
+ atomic_read(&vha->loop_state) == LOOP_DEAD ||
+ vha->device_flags & DFLG_NO_CABLE)
+ len = scnprintf(buf, PAGE_SIZE, "Link Down\n");
else if (atomic_read(&vha->loop_state) != LOOP_READY ||
- test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) ||
- test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags))
- len = snprintf(buf, PAGE_SIZE, "Unknown Link State\n");
+ qla2x00_reset_active(vha))
+ len = scnprintf(buf, PAGE_SIZE, "Unknown Link State\n");
else {
- len = snprintf(buf, PAGE_SIZE, "Link Up - ");
+ len = scnprintf(buf, PAGE_SIZE, "Link Up - ");
switch (ha->current_topology) {
case ISP_CFG_NL:
- len += snprintf(buf + len, PAGE_SIZE-len, "Loop\n");
+ len += scnprintf(buf + len, PAGE_SIZE-len, "Loop\n");
break;
case ISP_CFG_FL:
- len += snprintf(buf + len, PAGE_SIZE-len, "FL_Port\n");
+ len += scnprintf(buf + len, PAGE_SIZE-len, "FL_Port\n");
break;
case ISP_CFG_N:
- len += snprintf(buf + len, PAGE_SIZE-len,
+ len += scnprintf(buf + len, PAGE_SIZE-len,
"N_Port to N_Port\n");
break;
case ISP_CFG_F:
- len += snprintf(buf + len, PAGE_SIZE-len, "F_Port\n");
+ len += scnprintf(buf + len, PAGE_SIZE-len, "F_Port\n");
break;
default:
- len += snprintf(buf + len, PAGE_SIZE-len, "Loop\n");
+ len += scnprintf(buf + len, PAGE_SIZE-len, "Loop\n");
break;
}
}
@@ -646,10 +1140,10 @@ qla2x00_zio_show(struct device *dev, struct device_attribute *attr,
switch (vha->hw->zio_mode) {
case QLA_ZIO_MODE_6:
- len += snprintf(buf + len, PAGE_SIZE-len, "Mode 6\n");
+ len += scnprintf(buf + len, PAGE_SIZE-len, "Mode 6\n");
break;
case QLA_ZIO_DISABLED:
- len += snprintf(buf + len, PAGE_SIZE-len, "Disabled\n");
+ len += scnprintf(buf + len, PAGE_SIZE-len, "Disabled\n");
break;
}
return len;
@@ -689,7 +1183,7 @@ qla2x00_zio_timer_show(struct device *dev, struct device_attribute *attr,
{
scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
- return snprintf(buf, PAGE_SIZE, "%d us\n", vha->hw->zio_timer * 100);
+ return scnprintf(buf, PAGE_SIZE, "%d us\n", vha->hw->zio_timer * 100);
}
static ssize_t
@@ -719,9 +1213,9 @@ qla2x00_beacon_show(struct device *dev, struct device_attribute *attr,
int len = 0;
if (vha->hw->beacon_blink_led)
- len += snprintf(buf + len, PAGE_SIZE-len, "Enabled\n");
+ len += scnprintf(buf + len, PAGE_SIZE-len, "Enabled\n");
else
- len += snprintf(buf + len, PAGE_SIZE-len, "Disabled\n");
+ len += scnprintf(buf + len, PAGE_SIZE-len, "Disabled\n");
return len;
}
@@ -738,7 +1232,7 @@ qla2x00_beacon_store(struct device *dev, struct device_attribute *attr,
return -EPERM;
if (test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags)) {
- qla_printk(KERN_WARNING, ha,
+ ql_log(ql_log_warn, vha, 0x707a,
"Abort ISP active -- ignoring beacon request.\n");
return -EBUSY;
}
@@ -763,7 +1257,7 @@ qla2x00_optrom_bios_version_show(struct device *dev,
{
scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
struct qla_hw_data *ha = vha->hw;
- return snprintf(buf, PAGE_SIZE, "%d.%02d\n", ha->bios_revision[1],
+ return scnprintf(buf, PAGE_SIZE, "%d.%02d\n", ha->bios_revision[1],
ha->bios_revision[0]);
}
@@ -773,7 +1267,7 @@ qla2x00_optrom_efi_version_show(struct device *dev,
{
scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
struct qla_hw_data *ha = vha->hw;
- return snprintf(buf, PAGE_SIZE, "%d.%02d\n", ha->efi_revision[1],
+ return scnprintf(buf, PAGE_SIZE, "%d.%02d\n", ha->efi_revision[1],
ha->efi_revision[0]);
}
@@ -783,7 +1277,7 @@ qla2x00_optrom_fcode_version_show(struct device *dev,
{
scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
struct qla_hw_data *ha = vha->hw;
- return snprintf(buf, PAGE_SIZE, "%d.%02d\n", ha->fcode_revision[1],
+ return scnprintf(buf, PAGE_SIZE, "%d.%02d\n", ha->fcode_revision[1],
ha->fcode_revision[0]);
}
@@ -793,19 +1287,55 @@ qla2x00_optrom_fw_version_show(struct device *dev,
{
scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
struct qla_hw_data *ha = vha->hw;
- return snprintf(buf, PAGE_SIZE, "%d.%02d.%02d %d\n",
+ return scnprintf(buf, PAGE_SIZE, "%d.%02d.%02d %d\n",
ha->fw_revision[0], ha->fw_revision[1], ha->fw_revision[2],
ha->fw_revision[3]);
}
static ssize_t
+qla2x00_optrom_gold_fw_version_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+ struct qla_hw_data *ha = vha->hw;
+
+ if (!IS_QLA81XX(ha) && !IS_QLA83XX(ha) && !IS_QLA27XX(ha))
+ return scnprintf(buf, PAGE_SIZE, "\n");
+
+ return scnprintf(buf, PAGE_SIZE, "%d.%02d.%02d (%d)\n",
+ ha->gold_fw_version[0], ha->gold_fw_version[1],
+ ha->gold_fw_version[2], ha->gold_fw_version[3]);
+}
+
+static ssize_t
qla2x00_total_isp_aborts_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+ return scnprintf(buf, PAGE_SIZE, "%d\n",
+ vha->qla_stats.total_isp_aborts);
+}
+
+static ssize_t
+qla24xx_84xx_fw_version_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int rval = QLA_SUCCESS;
+ uint16_t status[2] = {0, 0};
+ scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
struct qla_hw_data *ha = vha->hw;
- return snprintf(buf, PAGE_SIZE, "%d\n",
- ha->qla_stats.total_isp_aborts);
+
+ if (!IS_QLA84XX(ha))
+ return scnprintf(buf, PAGE_SIZE, "\n");
+
+ if (ha->cs84xx->op_fw_version == 0)
+ rval = qla84xx_verify_chip(vha, status);
+
+ if ((rval == QLA_SUCCESS) && (status[0] == 0))
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ (uint32_t)ha->cs84xx->op_fw_version);
+
+ return scnprintf(buf, PAGE_SIZE, "\n");
}
static ssize_t
@@ -815,12 +1345,193 @@ qla2x00_mpi_version_show(struct device *dev, struct device_attribute *attr,
scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
struct qla_hw_data *ha = vha->hw;
- if (!IS_QLA81XX(ha))
- return snprintf(buf, PAGE_SIZE, "\n");
+ if (!IS_QLA81XX(ha) && !IS_QLA8031(ha) && !IS_QLA8044(ha))
+ return scnprintf(buf, PAGE_SIZE, "\n");
- return snprintf(buf, PAGE_SIZE, "%02x.%02x.%02x.%02x (%x)\n",
+ return scnprintf(buf, PAGE_SIZE, "%d.%02d.%02d (%x)\n",
ha->mpi_version[0], ha->mpi_version[1], ha->mpi_version[2],
- ha->mpi_version[3], ha->mpi_capabilities);
+ ha->mpi_capabilities);
+}
+
+static ssize_t
+qla2x00_phy_version_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+ struct qla_hw_data *ha = vha->hw;
+
+ if (!IS_QLA81XX(ha) && !IS_QLA8031(ha))
+ return scnprintf(buf, PAGE_SIZE, "\n");
+
+ return scnprintf(buf, PAGE_SIZE, "%d.%02d.%02d\n",
+ ha->phy_version[0], ha->phy_version[1], ha->phy_version[2]);
+}
+
+static ssize_t
+qla2x00_flash_block_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+ struct qla_hw_data *ha = vha->hw;
+
+ return scnprintf(buf, PAGE_SIZE, "0x%x\n", ha->fdt_block_size);
+}
+
+static ssize_t
+qla2x00_vlan_id_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+
+ if (!IS_CNA_CAPABLE(vha->hw))
+ return scnprintf(buf, PAGE_SIZE, "\n");
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n", vha->fcoe_vlan_id);
+}
+
+static ssize_t
+qla2x00_vn_port_mac_address_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+
+ if (!IS_CNA_CAPABLE(vha->hw))
+ return scnprintf(buf, PAGE_SIZE, "\n");
+
+ return scnprintf(buf, PAGE_SIZE, "%pMR\n", vha->fcoe_vn_port_mac);
+}
+
+static ssize_t
+qla2x00_fabric_param_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n", vha->hw->switch_cap);
+}
+
+static ssize_t
+qla2x00_thermal_temp_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+ uint16_t temp = 0;
+
+ if (qla2x00_reset_active(vha)) {
+ ql_log(ql_log_warn, vha, 0x70dc, "ISP reset active.\n");
+ goto done;
+ }
+
+ if (vha->hw->flags.eeh_busy) {
+ ql_log(ql_log_warn, vha, 0x70dd, "PCI EEH busy.\n");
+ goto done;
+ }
+
+ if (qla2x00_get_thermal_temp(vha, &temp) == QLA_SUCCESS)
+ return scnprintf(buf, PAGE_SIZE, "%d\n", temp);
+
+done:
+ return scnprintf(buf, PAGE_SIZE, "\n");
+}
+
+static ssize_t
+qla2x00_fw_state_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+ int rval = QLA_FUNCTION_FAILED;
+ uint16_t state[5];
+ uint32_t pstate;
+
+ if (IS_QLAFX00(vha->hw)) {
+ pstate = qlafx00_fw_state_show(dev, attr, buf);
+ return scnprintf(buf, PAGE_SIZE, "0x%x\n", pstate);
+ }
+
+ if (qla2x00_reset_active(vha))
+ ql_log(ql_log_warn, vha, 0x707c,
+ "ISP reset active.\n");
+ else if (!vha->hw->flags.eeh_busy)
+ rval = qla2x00_get_firmware_state(vha, state);
+ if (rval != QLA_SUCCESS)
+ memset(state, -1, sizeof(state));
+
+ return scnprintf(buf, PAGE_SIZE, "0x%x 0x%x 0x%x 0x%x 0x%x\n", state[0],
+ state[1], state[2], state[3], state[4]);
+}
+
+static ssize_t
+qla2x00_diag_requests_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+
+ if (!IS_BIDI_CAPABLE(vha->hw))
+ return scnprintf(buf, PAGE_SIZE, "\n");
+
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", vha->bidi_stats.io_count);
+}
+
+static ssize_t
+qla2x00_diag_megabytes_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+
+ if (!IS_BIDI_CAPABLE(vha->hw))
+ return scnprintf(buf, PAGE_SIZE, "\n");
+
+ return scnprintf(buf, PAGE_SIZE, "%llu\n",
+ vha->bidi_stats.transfer_bytes >> 20);
+}
+
+static ssize_t
+qla2x00_fw_dump_size_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+ struct qla_hw_data *ha = vha->hw;
+ uint32_t size;
+
+ if (!ha->fw_dumped)
+ size = 0;
+ else if (IS_P3P_TYPE(ha))
+ size = ha->md_template_size + ha->md_dump_size;
+ else
+ size = ha->fw_dump_len;
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n", size);
+}
+
+static ssize_t
+qla2x00_allow_cna_fw_dump_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+
+ if (!IS_P3P_TYPE(vha->hw))
+ return scnprintf(buf, PAGE_SIZE, "\n");
+ else
+ return scnprintf(buf, PAGE_SIZE, "%s\n",
+ vha->hw->allow_cna_fw_dump ? "true" : "false");
+}
+
+static ssize_t
+qla2x00_allow_cna_fw_dump_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+ int val = 0;
+
+ if (!IS_P3P_TYPE(vha->hw))
+ return -EINVAL;
+
+ if (sscanf(buf, "%d", &val) != 1)
+ return -EINVAL;
+
+ vha->hw->allow_cna_fw_dump = val != 0;
+
+ return strlen(buf);
}
static DEVICE_ATTR(driver_version, S_IRUGO, qla2x00_drvr_version_show, NULL);
@@ -845,9 +1556,28 @@ static DEVICE_ATTR(optrom_fcode_version, S_IRUGO,
qla2x00_optrom_fcode_version_show, NULL);
static DEVICE_ATTR(optrom_fw_version, S_IRUGO, qla2x00_optrom_fw_version_show,
NULL);
+static DEVICE_ATTR(optrom_gold_fw_version, S_IRUGO,
+ qla2x00_optrom_gold_fw_version_show, NULL);
+static DEVICE_ATTR(84xx_fw_version, S_IRUGO, qla24xx_84xx_fw_version_show,
+ NULL);
static DEVICE_ATTR(total_isp_aborts, S_IRUGO, qla2x00_total_isp_aborts_show,
NULL);
static DEVICE_ATTR(mpi_version, S_IRUGO, qla2x00_mpi_version_show, NULL);
+static DEVICE_ATTR(phy_version, S_IRUGO, qla2x00_phy_version_show, NULL);
+static DEVICE_ATTR(flash_block_size, S_IRUGO, qla2x00_flash_block_size_show,
+ NULL);
+static DEVICE_ATTR(vlan_id, S_IRUGO, qla2x00_vlan_id_show, NULL);
+static DEVICE_ATTR(vn_port_mac_address, S_IRUGO,
+ qla2x00_vn_port_mac_address_show, NULL);
+static DEVICE_ATTR(fabric_param, S_IRUGO, qla2x00_fabric_param_show, NULL);
+static DEVICE_ATTR(fw_state, S_IRUGO, qla2x00_fw_state_show, NULL);
+static DEVICE_ATTR(thermal_temp, S_IRUGO, qla2x00_thermal_temp_show, NULL);
+static DEVICE_ATTR(diag_requests, S_IRUGO, qla2x00_diag_requests_show, NULL);
+static DEVICE_ATTR(diag_megabytes, S_IRUGO, qla2x00_diag_megabytes_show, NULL);
+static DEVICE_ATTR(fw_dump_size, S_IRUGO, qla2x00_fw_dump_size_show, NULL);
+static DEVICE_ATTR(allow_cna_fw_dump, S_IRUGO | S_IWUSR,
+ qla2x00_allow_cna_fw_dump_show,
+ qla2x00_allow_cna_fw_dump_store);
struct device_attribute *qla2x00_host_attrs[] = {
&dev_attr_driver_version,
@@ -866,8 +1596,21 @@ struct device_attribute *qla2x00_host_attrs[] = {
&dev_attr_optrom_efi_version,
&dev_attr_optrom_fcode_version,
&dev_attr_optrom_fw_version,
+ &dev_attr_84xx_fw_version,
&dev_attr_total_isp_aborts,
&dev_attr_mpi_version,
+ &dev_attr_phy_version,
+ &dev_attr_flash_block_size,
+ &dev_attr_vlan_id,
+ &dev_attr_vn_port_mac_address,
+ &dev_attr_fabric_param,
+ &dev_attr_fw_state,
+ &dev_attr_optrom_gold_fw_version,
+ &dev_attr_thermal_temp,
+ &dev_attr_diag_requests,
+ &dev_attr_diag_megabytes,
+ &dev_attr_fw_dump_size,
+ &dev_attr_allow_cna_fw_dump,
NULL,
};
@@ -889,6 +1632,11 @@ qla2x00_get_host_speed(struct Scsi_Host *shost)
(shost_priv(shost)))->hw;
u32 speed = FC_PORTSPEED_UNKNOWN;
+ if (IS_QLAFX00(ha)) {
+ qlafx00_get_host_speed(shost);
+ return;
+ }
+
switch (ha->link_data_rate) {
case PORT_SPEED_1GB:
speed = FC_PORTSPEED_1GBIT;
@@ -905,6 +1653,12 @@ qla2x00_get_host_speed(struct Scsi_Host *shost)
case PORT_SPEED_10GB:
speed = FC_PORTSPEED_10GBIT;
break;
+ case PORT_SPEED_16GB:
+ speed = FC_PORTSPEED_16GBIT;
+ break;
+ case PORT_SPEED_32GB:
+ speed = FC_PORTSPEED_32GBIT;
+ break;
}
fc_host_speed(shost) = speed;
}
@@ -1008,20 +1762,31 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport)
{
struct Scsi_Host *host = rport_to_shost(rport);
fc_port_t *fcport = *(fc_port_t **)rport->dd_data;
+ unsigned long flags;
if (!fcport)
return;
- qla2x00_abort_fcport_cmds(fcport);
+ /* Now that the rport has been deleted, set the fcport state to
+ FCS_DEVICE_DEAD */
+ qla2x00_set_fcport_state(fcport, FCS_DEVICE_DEAD);
/*
* Transport has effectively 'deleted' the rport, clear
* all local references.
*/
- spin_lock_irq(host->host_lock);
- fcport->rport = NULL;
+ spin_lock_irqsave(host->host_lock, flags);
+ fcport->rport = fcport->drport = NULL;
*((fc_port_t **)rport->dd_data) = NULL;
- spin_unlock_irq(host->host_lock);
+ spin_unlock_irqrestore(host->host_lock, flags);
+
+ if (test_bit(ABORT_ISP_ACTIVE, &fcport->vha->dpc_flags))
+ return;
+
+ if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) {
+ qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16);
+ return;
+ }
}
static void
@@ -1032,18 +1797,25 @@ qla2x00_terminate_rport_io(struct fc_rport *rport)
if (!fcport)
return;
+ if (test_bit(ABORT_ISP_ACTIVE, &fcport->vha->dpc_flags))
+ return;
+
+ if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) {
+ qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16);
+ return;
+ }
/*
* At this point all fcport's software-states are cleared. Perform any
* final cleanup of firmware resources (PCBs and XCBs).
*/
if (fcport->loop_id != FC_NO_LOOP_ID) {
- fcport->vha->hw->isp_ops->fabric_logout(fcport->vha,
- fcport->loop_id, fcport->d_id.b.domain,
- fcport->d_id.b.area, fcport->d_id.b.al_pa);
- fcport->loop_id = FC_NO_LOOP_ID;
+ if (IS_FWI2_CAPABLE(fcport->vha->hw))
+ fcport->vha->hw->isp_ops->fabric_logout(fcport->vha,
+ fcport->loop_id, fcport->d_id.b.domain,
+ fcport->d_id.b.area, fcport->d_id.b.al_pa);
+ else
+ qla2x00_port_logout(fcport->vha, fcport);
}
-
- qla2x00_abort_fcport_cmds(fcport);
}
static int
@@ -1051,6 +1823,9 @@ qla2x00_issue_lip(struct Scsi_Host *shost)
{
scsi_qla_host_t *vha = shost_priv(shost);
+ if (IS_QLAFX00(vha->hw))
+ return 0;
+
qla2x00_loop_reset(vha);
return 0;
}
@@ -1066,13 +1841,25 @@ qla2x00_get_fc_host_stats(struct Scsi_Host *shost)
dma_addr_t stats_dma;
struct fc_host_statistics *pfc_host_stat;
- pfc_host_stat = &ha->fc_host_stat;
+ pfc_host_stat = &vha->fc_host_stat;
memset(pfc_host_stat, -1, sizeof(struct fc_host_statistics));
+ if (IS_QLAFX00(vha->hw))
+ goto done;
+
+ if (test_bit(UNLOADING, &vha->dpc_flags))
+ goto done;
+
+ if (unlikely(pci_channel_offline(ha->pdev)))
+ goto done;
+
+ if (qla2x00_reset_active(vha))
+ goto done;
+
stats = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &stats_dma);
if (stats == NULL) {
- DEBUG2_3_11(printk("%s(%ld): Failed to allocate memory.\n",
- __func__, base_vha->host_no));
+ ql_log(ql_log_warn, vha, 0x707d,
+ "Failed to allocate memory for stats.\n");
goto done;
}
memset(stats, 0, DMA_POOL_SIZE);
@@ -1081,9 +1868,7 @@ qla2x00_get_fc_host_stats(struct Scsi_Host *shost)
if (IS_FWI2_CAPABLE(ha)) {
rval = qla24xx_get_isp_stats(base_vha, stats, stats_dma);
} else if (atomic_read(&base_vha->loop_state) == LOOP_READY &&
- !test_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags) &&
- !test_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags) &&
- !ha->dpc_active) {
+ !ha->dpc_active) {
/* Must be in a 'READY' state for statistics retrieval. */
rval = qla2x00_get_link_status(base_vha, base_vha->loop_id,
stats, stats_dma);
@@ -1102,11 +1887,21 @@ qla2x00_get_fc_host_stats(struct Scsi_Host *shost)
pfc_host_stat->lip_count = stats->lip_cnt;
pfc_host_stat->tx_frames = stats->tx_frames;
pfc_host_stat->rx_frames = stats->rx_frames;
- pfc_host_stat->dumped_frames = stats->dumped_frames;
+ pfc_host_stat->dumped_frames = stats->discarded_frames;
pfc_host_stat->nos_count = stats->nos_rcvd;
+ pfc_host_stat->error_frames =
+ stats->dropped_frames + stats->discarded_frames;
+ pfc_host_stat->rx_words = vha->qla_stats.input_bytes;
+ pfc_host_stat->tx_words = vha->qla_stats.output_bytes;
}
- pfc_host_stat->fcp_input_megabytes = ha->qla_stats.input_bytes >> 20;
- pfc_host_stat->fcp_output_megabytes = ha->qla_stats.output_bytes >> 20;
+ pfc_host_stat->fcp_control_requests = vha->qla_stats.control_requests;
+ pfc_host_stat->fcp_input_requests = vha->qla_stats.input_requests;
+ pfc_host_stat->fcp_output_requests = vha->qla_stats.output_requests;
+ pfc_host_stat->fcp_input_megabytes = vha->qla_stats.input_bytes >> 20;
+ pfc_host_stat->fcp_output_megabytes = vha->qla_stats.output_bytes >> 20;
+ pfc_host_stat->seconds_since_last_reset =
+ get_jiffies_64() - vha->qla_stats.jiffies_at_last_reset;
+ do_div(pfc_host_stat->seconds_since_last_reset, HZ);
done_free:
dma_pool_free(ha->s_dma_pool, stats, stats_dma);
@@ -1115,6 +1910,16 @@ done:
}
static void
+qla2x00_reset_host_stats(struct Scsi_Host *shost)
+{
+ scsi_qla_host_t *vha = shost_priv(shost);
+
+ memset(&vha->fc_host_stat, 0, sizeof(vha->fc_host_stat));
+
+ vha->qla_stats.jiffies_at_last_reset = get_jiffies_64();
+}
+
+static void
qla2x00_get_host_symbolic_name(struct Scsi_Host *shost)
{
scsi_qla_host_t *vha = shost_priv(shost);
@@ -1134,14 +1939,14 @@ static void
qla2x00_get_host_fabric_name(struct Scsi_Host *shost)
{
scsi_qla_host_t *vha = shost_priv(shost);
- u64 node_name;
+ uint8_t node_name[WWN_SIZE] = { 0xFF, 0xFF, 0xFF, 0xFF, \
+ 0xFF, 0xFF, 0xFF, 0xFF};
+ u64 fabric_name = wwn_to_u64(node_name);
if (vha->device_flags & SWITCH_FOUND)
- node_name = wwn_to_u64(vha->fabric_node_name);
- else
- node_name = wwn_to_u64(vha->node_name);
+ fabric_name = wwn_to_u64(vha->fabric_node_name);
- fc_host_fabric_name(shost) = node_name;
+ fc_host_fabric_name(shost) = fabric_name;
}
static void
@@ -1150,35 +1955,55 @@ qla2x00_get_host_port_state(struct Scsi_Host *shost)
scsi_qla_host_t *vha = shost_priv(shost);
struct scsi_qla_host *base_vha = pci_get_drvdata(vha->hw->pdev);
- if (!base_vha->flags.online)
+ if (!base_vha->flags.online) {
fc_host_port_state(shost) = FC_PORTSTATE_OFFLINE;
- else if (atomic_read(&base_vha->loop_state) == LOOP_TIMEOUT)
- fc_host_port_state(shost) = FC_PORTSTATE_UNKNOWN;
- else
+ return;
+ }
+
+ switch (atomic_read(&base_vha->loop_state)) {
+ case LOOP_UPDATE:
+ fc_host_port_state(shost) = FC_PORTSTATE_DIAGNOSTICS;
+ break;
+ case LOOP_DOWN:
+ if (test_bit(LOOP_RESYNC_NEEDED, &base_vha->dpc_flags))
+ fc_host_port_state(shost) = FC_PORTSTATE_DIAGNOSTICS;
+ else
+ fc_host_port_state(shost) = FC_PORTSTATE_LINKDOWN;
+ break;
+ case LOOP_DEAD:
+ fc_host_port_state(shost) = FC_PORTSTATE_LINKDOWN;
+ break;
+ case LOOP_READY:
fc_host_port_state(shost) = FC_PORTSTATE_ONLINE;
+ break;
+ default:
+ fc_host_port_state(shost) = FC_PORTSTATE_UNKNOWN;
+ break;
+ }
}
static int
qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
{
int ret = 0;
- int cnt = 0;
- uint8_t qos = QLA_DEFAULT_QUE_QOS;
+ uint8_t qos = 0;
scsi_qla_host_t *base_vha = shost_priv(fc_vport->shost);
scsi_qla_host_t *vha = NULL;
struct qla_hw_data *ha = base_vha->hw;
+ uint16_t options = 0;
+ int cnt;
+ struct req_que *req = ha->req_q_map[0];
ret = qla24xx_vport_create_req_sanity_check(fc_vport);
if (ret) {
- DEBUG15(printk("qla24xx_vport_create_req_sanity_check failed, "
- "status %x\n", ret));
+ ql_log(ql_log_warn, vha, 0x707e,
+ "Vport sanity check failed, status %x\n", ret);
return (ret);
}
vha = qla24xx_create_vhost(fc_vport);
if (vha == NULL) {
- DEBUG15(printk ("qla24xx_create_vhost failed, vha = %p\n",
- vha));
+ ql_log(ql_log_warn, vha, 0x707f, "Vport create host failed.\n");
return FC_VPORT_FAILED;
}
if (disable) {
@@ -1188,8 +2013,8 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
atomic_set(&vha->vp_state, VP_FAILED);
/* ready to create vport */
- qla_printk(KERN_INFO, vha->hw, "VP entry id %d assigned.\n",
- vha->vp_idx);
+ ql_log(ql_log_info, vha, 0x7080,
+ "VP entry id %d assigned.\n", vha->vp_idx);
/* initialized vport states */
atomic_set(&vha->loop_state, LOOP_DOWN);
@@ -1199,20 +2024,49 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
if (atomic_read(&base_vha->loop_state) == LOOP_DOWN ||
atomic_read(&base_vha->loop_state) == LOOP_DEAD) {
/* Don't retry or attempt login of this virtual port */
- DEBUG15(printk ("scsi(%ld): pport loop_state is not UP.\n",
- base_vha->host_no));
+ ql_dbg(ql_dbg_user, vha, 0x7081,
+ "Vport loop state is not UP.\n");
atomic_set(&vha->loop_state, LOOP_DEAD);
if (!disable)
fc_vport_set_state(fc_vport, FC_VPORT_LINKDOWN);
}
- if (scsi_add_host(vha->host, &fc_vport->dev)) {
- DEBUG15(printk("scsi(%ld): scsi_add_host failure for VP[%d].\n",
- vha->host_no, vha->vp_idx));
+ if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif) {
+ if (ha->fw_attributes & BIT_4) {
+ int prot = 0, guard;
+ vha->flags.difdix_supported = 1;
+ ql_dbg(ql_dbg_user, vha, 0x7082,
+ "Registered for DIF/DIX type 1 and 3 protection.\n");
+ if (ql2xenabledif == 1)
+ prot = SHOST_DIX_TYPE0_PROTECTION;
+ scsi_host_set_prot(vha->host,
+ prot | SHOST_DIF_TYPE1_PROTECTION
+ | SHOST_DIF_TYPE2_PROTECTION
+ | SHOST_DIF_TYPE3_PROTECTION
+ | SHOST_DIX_TYPE1_PROTECTION
+ | SHOST_DIX_TYPE2_PROTECTION
+ | SHOST_DIX_TYPE3_PROTECTION);
+
+ guard = SHOST_DIX_GUARD_CRC;
+
+ if (IS_PI_IPGUARD_CAPABLE(ha) &&
+ (ql2xenabledif > 1 || IS_PI_DIFB_DIX0_CAPABLE(ha)))
+ guard |= SHOST_DIX_GUARD_IP;
+
+ scsi_host_set_guard(vha->host, guard);
+ } else
+ vha->flags.difdix_supported = 0;
+ }
+
+ if (scsi_add_host_with_dma(vha->host, &fc_vport->dev,
+ &ha->pdev->dev)) {
+ ql_dbg(ql_dbg_user, vha, 0x7083,
+ "scsi_add_host failure for VP[%d].\n", vha->vp_idx);
goto vport_create_failed_2;
}
/* initialize attributes */
+ fc_host_dev_loss_tmo(vha->host) = ha->port_down_retry_count;
fc_host_node_name(vha->host) = wwn_to_u64(vha->node_name);
fc_host_port_name(vha->host) = wwn_to_u64(vha->port_name);
fc_host_supported_classes(vha->host) =
@@ -1220,25 +2074,50 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
fc_host_supported_speeds(vha->host) =
fc_host_supported_speeds(base_vha->host);
+ qlt_vport_create(vha, ha);
qla24xx_vport_disable(fc_vport, disable);
- /* Create a queue pair for the vport */
- if (ha->mqenable) {
- if (ha->npiv_info) {
- for (; cnt < ha->nvram_npiv_size; cnt++) {
- if (ha->npiv_info[cnt].port_name ==
- vha->port_name &&
- ha->npiv_info[cnt].node_name ==
- vha->node_name) {
- qos = ha->npiv_info[cnt].q_qos;
- break;
- }
- }
+ if (ha->flags.cpu_affinity_enabled) {
+ req = ha->req_q_map[1];
+ ql_dbg(ql_dbg_multiq, vha, 0xc000,
+ "Request queue %p attached with "
+ "VP[%d], cpu affinity =%d\n",
+ req, vha->vp_idx, ha->flags.cpu_affinity_enabled);
+ goto vport_queue;
+ } else if (ql2xmaxqueues == 1 || !ha->npiv_info)
+ goto vport_queue;
+ /* Create a request queue in QoS mode for the vport */
+ for (cnt = 0; cnt < ha->nvram_npiv_size; cnt++) {
+ if (memcmp(ha->npiv_info[cnt].port_name, vha->port_name, 8) == 0
+ && memcmp(ha->npiv_info[cnt].node_name, vha->node_name,
+ 8) == 0) {
+ qos = ha->npiv_info[cnt].q_qos;
+ break;
+ }
+ }
+
+ if (qos) {
+ ret = qla25xx_create_req_que(ha, options, vha->vp_idx, 0, 0,
+ qos);
+ if (!ret)
+ ql_log(ql_log_warn, vha, 0x7084,
+ "Can't create request queue for VP[%d]\n",
+ vha->vp_idx);
+ else {
+ ql_dbg(ql_dbg_multiq, vha, 0xc001,
+ "Request Que:%d Q0s: %d) created for VP[%d]\n",
+ ret, qos, vha->vp_idx);
+ ql_dbg(ql_dbg_user, vha, 0x7085,
+ "Request Que:%d Q0s: %d) created for VP[%d]\n",
+ ret, qos, vha->vp_idx);
+ req = ha->req_q_map[ret];
}
- qla25xx_create_queues(vha, qos);
}
+vport_queue:
+ vha->req = req;
return 0;
+
vport_create_failed_2:
qla24xx_disable_vp(vha);
qla24xx_deallocate_vp_id(vha);
@@ -1250,7 +2129,6 @@ static int
qla24xx_vport_delete(struct fc_vport *fc_vport)
{
scsi_qla_host_t *vha = fc_vport->dd_data;
- fc_port_t *fcport, *tfcport;
struct qla_hw_data *ha = vha->hw;
uint16_t id = vha->vp_idx;
@@ -1260,33 +2138,40 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
qla24xx_disable_vp(vha);
+ vha->flags.delete_progress = 1;
+
+ qlt_remove_target(ha, vha);
+
fc_remove_host(vha->host);
scsi_remove_host(vha->host);
- list_for_each_entry_safe(fcport, tfcport, &vha->vp_fcports, list) {
- list_del(&fcport->list);
- kfree(fcport);
- fcport = NULL;
- }
-
+ /* Allow timer to run to drain queued items, when removing vp */
qla24xx_deallocate_vp_id(vha);
if (vha->timer_active) {
qla2x00_vp_stop_timer(vha);
- DEBUG15(printk ("scsi(%ld): timer for the vport[%d] = %p "
- "has stopped\n",
- vha->host_no, vha->vp_idx, vha));
- }
-
- if (ha->mqenable) {
- if (qla25xx_delete_queues(vha, 0) != QLA_SUCCESS)
- qla_printk(KERN_WARNING, ha,
- "Queue delete failed.\n");
+ ql_dbg(ql_dbg_user, vha, 0x7086,
+ "Timer for the VP[%d] has stopped\n", vha->vp_idx);
}
+ BUG_ON(atomic_read(&vha->vref_count));
+
+ qla2x00_free_fcports(vha);
+
+ mutex_lock(&ha->vport_lock);
+ ha->cur_vport_count--;
+ clear_bit(vha->vp_idx, ha->vp_idx_map);
+ mutex_unlock(&ha->vport_lock);
+
+ if (vha->req->id && !ha->flags.cpu_affinity_enabled) {
+ if (qla25xx_delete_req_que(vha, vha->req) != QLA_SUCCESS)
+ ql_log(ql_log_warn, vha, 0x7087,
+ "Queue delete failed.\n");
+ }
+
+ ql_log(ql_log_info, vha, 0x7088, "VP[%d] deleted.\n", id);
scsi_host_put(vha->host);
- qla_printk(KERN_INFO, ha, "vport %d deleted\n", id);
return 0;
}
@@ -1342,10 +2227,13 @@ struct fc_function_template qla2xxx_transport_functions = {
.dev_loss_tmo_callbk = qla2x00_dev_loss_tmo_callbk,
.terminate_rport_io = qla2x00_terminate_rport_io,
.get_fc_host_stats = qla2x00_get_fc_host_stats,
+ .reset_fc_host_stats = qla2x00_reset_host_stats,
.vport_create = qla24xx_vport_create,
.vport_disable = qla24xx_vport_disable,
.vport_delete = qla24xx_vport_delete,
+ .bsg_request = qla24xx_bsg_request,
+ .bsg_timeout = qla24xx_bsg_timeout,
};
struct fc_function_template qla2xxx_transport_vport_functions = {
@@ -1386,6 +2274,10 @@ struct fc_function_template qla2xxx_transport_vport_functions = {
.dev_loss_tmo_callbk = qla2x00_dev_loss_tmo_callbk,
.terminate_rport_io = qla2x00_terminate_rport_io,
.get_fc_host_stats = qla2x00_get_fc_host_stats,
+ .reset_fc_host_stats = qla2x00_reset_host_stats,
+
+ .bsg_request = qla24xx_bsg_request,
+ .bsg_timeout = qla24xx_bsg_timeout,
};
void
@@ -1394,14 +2286,19 @@ qla2x00_init_host_attr(scsi_qla_host_t *vha)
struct qla_hw_data *ha = vha->hw;
u32 speed = FC_PORTSPEED_UNKNOWN;
+ fc_host_dev_loss_tmo(vha->host) = ha->port_down_retry_count;
fc_host_node_name(vha->host) = wwn_to_u64(vha->node_name);
fc_host_port_name(vha->host) = wwn_to_u64(vha->port_name);
- fc_host_supported_classes(vha->host) = FC_COS_CLASS3;
+ fc_host_supported_classes(vha->host) = ha->tgt.enable_class_2 ?
+ (FC_COS_CLASS2|FC_COS_CLASS3) : FC_COS_CLASS3;
fc_host_max_npiv_vports(vha->host) = ha->max_npiv_vports;
fc_host_npiv_vports_inuse(vha->host) = ha->cur_vport_count;
- if (IS_QLA81XX(ha))
+ if (IS_CNA_CAPABLE(ha))
speed = FC_PORTSPEED_10GBIT;
+ else if (IS_QLA2031(ha))
+ speed = FC_PORTSPEED_16GBIT | FC_PORTSPEED_8GBIT |
+ FC_PORTSPEED_4GBIT;
else if (IS_QLA25XX(ha))
speed = FC_PORTSPEED_8GBIT | FC_PORTSPEED_4GBIT |
FC_PORTSPEED_2GBIT | FC_PORTSPEED_1GBIT;
@@ -1410,6 +2307,12 @@ qla2x00_init_host_attr(scsi_qla_host_t *vha)
FC_PORTSPEED_1GBIT;
else if (IS_QLA23XX(ha))
speed = FC_PORTSPEED_2GBIT | FC_PORTSPEED_1GBIT;
+ else if (IS_QLAFX00(ha))
+ speed = FC_PORTSPEED_8GBIT | FC_PORTSPEED_4GBIT |
+ FC_PORTSPEED_2GBIT | FC_PORTSPEED_1GBIT;
+ else if (IS_QLA27XX(ha))
+ speed = FC_PORTSPEED_32GBIT | FC_PORTSPEED_16GBIT |
+ FC_PORTSPEED_8GBIT;
else
speed = FC_PORTSPEED_1GBIT;
fc_host_supported_speeds(vha->host) = speed;
a href='/linux/diff/fs/hpfs/dir.c?id2=b9d12085f2f531fdea67f0361564e0812696227c'>fs/hpfs/dir.c6
-rw-r--r--fs/hpfs/dnode.c44
-rw-r--r--fs/hpfs/ea.c6
-rw-r--r--fs/hpfs/file.c8
-rw-r--r--fs/hpfs/hpfs_fn.h5
-rw-r--r--fs/hpfs/inode.c3
-rw-r--r--fs/hpfs/map.c17
-rw-r--r--fs/hpfs/name.c11
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/hpfs/super.c55
-rw-r--r--fs/hugetlbfs/inode.c28
-rw-r--r--fs/inode.c12
-rw-r--r--fs/ioprio.c241
-rw-r--r--fs/jbd/revoke.c12
-rw-r--r--fs/jbd2/commit.c6
-rw-r--r--fs/jbd2/transaction.c5
-rw-r--r--fs/jffs2/background.c12
-rw-r--r--fs/jffs2/file.c8
-rw-r--r--fs/jfs/acl.c16
-rw-r--r--fs/jfs/file.c10
-rw-r--r--fs/jfs/inode.c8
-rw-r--r--fs/jfs/jfs_dmap.c9
-rw-r--r--fs/jfs/jfs_inode.c16
-rw-r--r--fs/jfs/jfs_logmgr.c2
-rw-r--r--fs/jfs/super.c77
-rw-r--r--fs/kernfs/dir.c10
-rw-r--r--fs/kernfs/file.c111
-rw-r--r--fs/kernfs/inode.c14
-rw-r--r--fs/kernfs/kernfs-internal.h5
-rw-r--r--fs/kernfs/mount.c52
-rw-r--r--fs/libfs.c34
-rw-r--r--fs/lockd/clnt4xdr.c2
-rw-r--r--fs/lockd/clntxdr.c2
-rw-r--r--fs/lockd/svc.c8
-rw-r--r--fs/lockd/svcsubs.c3
-rw-r--r--fs/lockd/xdr.c2
-rw-r--r--fs/locks.c126
-rw-r--r--fs/logfs/file.c8
-rw-r--r--fs/mbcache.c3
-rw-r--r--fs/minix/file.c8
-rw-r--r--fs/mpage.c84
-rw-r--r--fs/namei.c20
-rw-r--r--fs/ncpfs/getopt.c13
-rw-r--r--fs/nfs/Makefile4
-rw-r--r--fs/nfs/blocklayout/blocklayout.c38
-rw-r--r--fs/nfs/dir.c12
-rw-r--r--fs/nfs/direct.c437
-rw-r--r--fs/nfs/file.c69
-rw-r--r--fs/nfs/filelayout/Makefile5
-rw-r--r--fs/nfs/filelayout/filelayout.c (renamed from fs/nfs/nfs4filelayout.c)203
-rw-r--r--fs/nfs/filelayout/filelayout.h (renamed from fs/nfs/nfs4filelayout.h)2
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c (renamed from fs/nfs/nfs4filelayoutdev.c)10
-rw-r--r--fs/nfs/getroot.c3
-rw-r--r--fs/nfs/inode.c104
-rw-r--r--fs/nfs/internal.h40
-rw-r--r--fs/nfs/nfs2xdr.c14
-rw-r--r--fs/nfs/nfs3acl.c43
-rw-r--r--fs/nfs/nfs3proc.c25
-rw-r--r--fs/nfs/nfs3xdr.c16
-rw-r--r--fs/nfs/nfs4_fs.h6
-rw-r--r--fs/nfs/nfs4file.c13
-rw-r--r--fs/nfs/nfs4namespace.c102
-rw-r--r--fs/nfs/nfs4proc.c60
-rw-r--r--fs/nfs/nfs4state.c10
-rw-r--r--fs/nfs/nfs4sysctl.c6
-rw-r--r--fs/nfs/nfs4trace.h8
-rw-r--r--fs/nfs/nfs4xdr.c19
-rw-r--r--fs/nfs/objlayout/objio_osd.c24
-rw-r--r--fs/nfs/objlayout/objlayout.c24
-rw-r--r--fs/nfs/objlayout/objlayout.h8
-rw-r--r--fs/nfs/pagelist.c649
-rw-r--r--fs/nfs/pnfs.c168
-rw-r--r--fs/nfs/pnfs.h32
-rw-r--r--fs/nfs/proc.c21
-rw-r--r--fs/nfs/read.c414
-rw-r--r--fs/nfs/super.c27
-rw-r--r--fs/nfs/sysctl.c6
-rw-r--r--fs/nfs/write.c925
-rw-r--r--fs/nfsd/acl.h2
-rw-r--r--fs/nfsd/auth.c5
-rw-r--r--fs/nfsd/export.c88
-rw-r--r--fs/nfsd/export.h110
-rw-r--r--fs/nfsd/fault_inject.c15
-rw-r--r--fs/nfsd/idmap.h4
-rw-r--r--fs/nfsd/nfs2acl.c12
-rw-r--r--fs/nfsd/nfs3acl.c6
-rw-r--r--fs/nfsd/nfs3xdr.c27
-rw-r--r--fs/nfsd/nfs4acl.c31
-rw-r--r--fs/nfsd/nfs4callback.c4
-rw-r--r--fs/nfsd/nfs4idmap.c42
-rw-r--r--fs/nfsd/nfs4proc.c189
-rw-r--r--fs/nfsd/nfs4state.c387
-rw-r--r--fs/nfsd/nfs4xdr.c1937
-rw-r--r--fs/nfsd/nfscache.c17
-rw-r--r--fs/nfsd/nfsctl.c1
-rw-r--r--fs/nfsd/nfsd.h17
-rw-r--r--fs/nfsd/nfsfh.c25
-rw-r--r--fs/nfsd/nfsfh.h59
-rw-r--r--fs/nfsd/nfssvc.c6
-rw-r--r--fs/nfsd/nfsxdr.c15
-rw-r--r--fs/nfsd/state.h5
-rw-r--r--fs/nfsd/stats.c1
-rw-r--r--fs/nfsd/stats.h43
-rw-r--r--fs/nfsd/vfs.c155
-rw-r--r--fs/nfsd/vfs.h10
-rw-r--r--fs/nfsd/xdr4.h23
-rw-r--r--fs/nilfs2/file.c8
-rw-r--r--fs/nilfs2/inode.c9
-rw-r--r--fs/notify/fanotify/fanotify_user.c47
-rw-r--r--fs/notify/inotify/inotify_user.c2
-rw-r--r--fs/notify/mark.c2
-rw-r--r--fs/ntfs/attrib.c1
-rw-r--r--fs/ntfs/compress.c2
-rw-r--r--fs/ntfs/file.c10
-rw-r--r--fs/ntfs/super.c4
-rw-r--r--fs/ntfs/sysctl.c6
-rw-r--r--fs/ocfs2/alloc.c6
-rw-r--r--fs/ocfs2/aops.c7
-rw-r--r--fs/ocfs2/cluster/heartbeat.c2
-rw-r--r--fs/ocfs2/cluster/tcp.c33
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h5
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c2
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c14
-rw-r--r--fs/ocfs2/dlm/dlmlock.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c71
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c13
-rw-r--r--fs/ocfs2/dlm/dlmthread.c13
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c18
-rw-r--r--fs/ocfs2/dlmglue.c5
-rw-r--r--fs/ocfs2/file.c140
-rw-r--r--fs/ocfs2/ioctl.c81
-rw-r--r--fs/ocfs2/journal.c17
-rw-r--r--fs/ocfs2/namei.c145
-rw-r--r--fs/ocfs2/ocfs2.h1
-rw-r--r--fs/ocfs2/ocfs2_trace.h2
-rw-r--r--fs/ocfs2/refcounttree.c15
-rw-r--r--fs/ocfs2/resize.c10
-rw-r--r--fs/ocfs2/stackglue.c2
-rw-r--r--fs/ocfs2/super.c16
-rw-r--r--fs/ocfs2/uptodate.c2
-rw-r--r--fs/omfs/file.c8
-rw-r--r--fs/open.c26
-rw-r--r--fs/pipe.c145
-rw-r--r--fs/posix_acl.c6
-rw-r--r--fs/proc/stat.c22
-rw-r--r--fs/proc/task_mmu.c30
-rw-r--r--fs/proc/vmcore.c2
-rw-r--r--fs/pstore/platform.c19
-rw-r--r--fs/pstore/ram_core.c36
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/quota/quota.c14
-rw-r--r--fs/ramfs/file-mmu.c10
-rw-r--r--fs/ramfs/file-nommu.c10
-rw-r--r--fs/read_write.c108
-rw-r--r--fs/readdir.c2
-rw-r--r--fs/reiserfs/bitmap.c272
-rw-r--r--fs/reiserfs/dir.c156
-rw-r--r--fs/reiserfs/do_balan.c2449
-rw-r--r--fs/reiserfs/file.c100
-rw-r--r--fs/reiserfs/fix_node.c1008
-rw-r--r--fs/reiserfs/hashes.c15
-rw-r--r--fs/reiserfs/ibalance.c271
-rw-r--r--fs/reiserfs/inode.c1216
-rw-r--r--fs/reiserfs/ioctl.c27
-rw-r--r--fs/reiserfs/item_ops.c108
-rw-r--r--fs/reiserfs/journal.c1339
-rw-r--r--fs/reiserfs/lbalance.c501
-rw-r--r--fs/reiserfs/namei.c513
-rw-r--r--fs/reiserfs/objectid.c101
-rw-r--r--fs/reiserfs/prints.c176
-rw-r--r--fs/reiserfs/reiserfs.h1921
-rw-r--r--fs/reiserfs/resize.c75
-rw-r--r--fs/reiserfs/stree.c892
-rw-r--r--fs/reiserfs/super.c552
-rw-r--r--fs/reiserfs/tail_conversion.c161
-rw-r--r--fs/reiserfs/xattr.c70
-rw-r--r--fs/reiserfs/xattr.h3
-rw-r--r--fs/reiserfs/xattr_acl.c38
-rw-r--r--fs/romfs/mmap-nommu.c4
-rw-r--r--fs/seq_file.c30
-rw-r--r--fs/splice.c201
-rw-r--r--fs/squashfs/squashfs.h2
-rw-r--r--fs/super.c21
-rw-r--r--fs/sysfs/file.c95
-rw-r--r--fs/sysfs/group.c10
-rw-r--r--fs/sysfs/mount.c7
-rw-r--r--fs/sysv/file.c8
-rw-r--r--fs/ubifs/budget.c1
-rw-r--r--fs/ubifs/debug.c4
-rw-r--r--fs/ubifs/file.c31
-rw-r--r--fs/ubifs/io.c18
-rw-r--r--fs/ubifs/lpt_commit.c4
-rw-r--r--fs/ubifs/shrinker.c1
-rw-r--r--fs/ubifs/super.c7
-rw-r--r--fs/ubifs/tnc.c5
-rw-r--r--fs/ubifs/tnc_commit.c4
-rw-r--r--fs/ubifs/ubifs.h11
-rw-r--r--fs/udf/file.c19
-rw-r--r--fs/udf/inode.c10
-rw-r--r--fs/ufs/balloc.c36
-rw-r--r--fs/ufs/file.c8
-rw-r--r--fs/ufs/ialloc.c17
-rw-r--r--fs/ufs/super.c28
-rw-r--r--fs/ufs/ufs.h1
-rw-r--r--fs/xattr.c2
-rw-r--r--fs/xfs/xfs_ag.h36
-rw-r--r--fs/xfs/xfs_alloc.c19
-rw-r--r--fs/xfs/xfs_alloc_btree.c1
-rw-r--r--fs/xfs/xfs_aops.c123
-rw-r--r--fs/xfs/xfs_attr.c365
-rw-r--r--fs/xfs/xfs_attr_leaf.c205
-rw-r--r--fs/xfs/xfs_attr_leaf.h3
-rw-r--r--fs/xfs/xfs_attr_list.c2
-rw-r--r--fs/xfs/xfs_attr_remote.c66
-rw-r--r--fs/xfs/xfs_bit.h7
-rw-r--r--fs/xfs/xfs_bmap.c235
-rw-r--r--fs/xfs/xfs_bmap.h8
-rw-r--r--fs/xfs/xfs_bmap_btree.c9
-rw-r--r--fs/xfs/xfs_bmap_btree.h2
-rw-r--r--fs/xfs/xfs_bmap_util.c58
-rw-r--r--fs/xfs/xfs_bmap_util.h13
-rw-r--r--fs/xfs/xfs_btree.c138
-rw-r--r--fs/xfs/xfs_btree.h5
-rw-r--r--fs/xfs/xfs_buf.c33
-rw-r--r--fs/xfs/xfs_buf.h9
-rw-r--r--fs/xfs/xfs_buf_item.c5
-rw-r--r--fs/xfs/xfs_da_btree.c114
-rw-r--r--fs/xfs/xfs_da_btree.h28
-rw-r--r--fs/xfs/xfs_da_format.c36
-rw-r--r--fs/xfs/xfs_da_format.h154
-rw-r--r--fs/xfs/xfs_dir2.c136
-rw-r--r--fs/xfs/xfs_dir2.h30
-rw-r--r--fs/xfs/xfs_dir2_block.c97
-rw-r--r--fs/xfs/xfs_dir2_data.c83
-rw-r--r--fs/xfs/xfs_dir2_leaf.c202
-rw-r--r--fs/xfs/xfs_dir2_node.c190
-rw-r--r--fs/xfs/xfs_dir2_priv.h142
-rw-r--r--fs/xfs/xfs_dir2_readdir.c155
-rw-r--r--fs/xfs/xfs_dir2_sf.c39
-rw-r--r--fs/xfs/xfs_dquot.c59
-rw-r--r--fs/xfs/xfs_dquot.h2
-rw-r--r--fs/xfs/xfs_dquot_buf.c5
-rw-r--r--fs/xfs/xfs_export.c2
-rw-r--r--fs/xfs/xfs_file.c139
-rw-r--r--fs/xfs/xfs_filestream.c684
-rw-r--r--fs/xfs/xfs_filestream.h34
-rw-r--r--fs/xfs/xfs_format.h14
-rw-r--r--fs/xfs/xfs_fs.h1
-rw-r--r--fs/xfs/xfs_fsops.c49
-rw-r--r--fs/xfs/xfs_ialloc.c704
-rw-r--r--fs/xfs/xfs_ialloc.h2
-rw-r--r--fs/xfs/xfs_ialloc_btree.c69
-rw-r--r--fs/xfs/xfs_ialloc_btree.h3
-rw-r--r--fs/xfs/xfs_icache.c12
-rw-r--r--fs/xfs/xfs_icache.h6
-rw-r--r--fs/xfs/xfs_inode.c183
-rw-r--r--fs/xfs/xfs_inode.h7
-rw-r--r--fs/xfs/xfs_inode_buf.c17
-rw-r--r--fs/xfs/xfs_inode_fork.c3
-rw-r--r--fs/xfs/xfs_inode_fork.h3
-rw-r--r--fs/xfs/xfs_inode_item.c32
-rw-r--r--fs/xfs/xfs_ioctl.c16
-rw-r--r--fs/xfs/xfs_ioctl32.c5
-rw-r--r--fs/xfs/xfs_iomap.c5
-rw-r--r--fs/xfs/xfs_iops.c73
-rw-r--r--fs/xfs/xfs_itable.c6
-rw-r--r--fs/xfs/xfs_log.c74
-rw-r--r--fs/xfs/xfs_log.h19
-rw-r--r--fs/xfs/xfs_log_cil.c57
-rw-r--r--fs/xfs/xfs_log_recover.c11
-rw-r--r--fs/xfs/xfs_log_rlimit.c2
-rw-r--r--fs/xfs/xfs_mount.c47
-rw-r--r--fs/xfs/xfs_mount.h12
-rw-r--r--fs/xfs/xfs_mru_cache.c151
-rw-r--r--fs/xfs/xfs_mru_cache.h31
-rw-r--r--fs/xfs/xfs_qm.c243
-rw-r--r--fs/xfs/xfs_qm_syscalls.c6
-rw-r--r--fs/xfs/xfs_quota_defs.h2
-rw-r--r--fs/xfs/xfs_quotaops.c29
-rw-r--r--fs/xfs/xfs_rtbitmap.c1
-rw-r--r--fs/xfs/xfs_sb.c41
-rw-r--r--fs/xfs/xfs_sb.h235
-rw-r--r--fs/xfs/xfs_shared.h2
-rw-r--r--fs/xfs/xfs_stats.c1
-rw-r--r--fs/xfs/xfs_stats.h18
-rw-r--r--fs/xfs/xfs_super.c26
-rw-r--r--fs/xfs/xfs_symlink.c3
-rw-r--r--fs/xfs/xfs_symlink_remote.c1
-rw-r--r--fs/xfs/xfs_trace.c1
-rw-r--r--fs/xfs/xfs_trace.h60
-rw-r--r--fs/xfs/xfs_trans.c2
-rw-r--r--fs/xfs/xfs_trans_ail.c5
-rw-r--r--fs/xfs/xfs_trans_priv.h3
-rw-r--r--fs/xfs/xfs_trans_resv.c56
-rw-r--r--fs/xfs/xfs_trans_space.h12
-rw-r--r--fs/xfs/xfs_types.h2
571 files changed, 24519 insertions, 20086 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 14da82564f4..6894b085f0e 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -537,7 +537,7 @@ static struct attribute_group v9fs_attr_group = {
*
*/
-static int v9fs_sysfs_init(void)
+static int __init v9fs_sysfs_init(void)
{
v9fs_kobj = kobject_create_and_add("9p", fs_kobj);
if (!v9fs_kobj)
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index c71e88602ff..cc1cfae726b 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -259,8 +259,7 @@ static int v9fs_launder_page(struct page *page)
*
*/
static ssize_t
-v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
- loff_t pos, unsigned long nr_segs)
+v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
{
/*
* FIXME
@@ -269,7 +268,7 @@ v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
*/
p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) off/no(%lld/%lu) EINVAL\n",
iocb->ki_filp->f_path.dentry->d_name.name,
- (long long)pos, nr_segs);
+ (long long)pos, iter->nr_segs);
return -EINVAL;
}
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 4d0c2e0be7e..0b3bfa303dd 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -42,7 +42,6 @@
/**
* struct p9_rdir - readdir accounting
- * @mutex: mutex protecting readdir
* @head: start offset of current dirread buffer
* @tail: end offset of current dirread buffer
* @buf: dirread buffer
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index d8223209d4b..520c11c2dcc 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -352,9 +352,6 @@ static int v9fs_file_flock_dotl(struct file *filp, int cmd,
invalidate_mapping_pages(&inode->i_data, 0, -1);
}
/* Convert flock to posix lock */
- fl->fl_owner = (fl_owner_t)filp;
- fl->fl_start = 0;
- fl->fl_end = OFFSET_MAX;
fl->fl_flags |= FL_POSIX;
fl->fl_flags ^= FL_FLOCK;
@@ -684,7 +681,7 @@ v9fs_direct_read(struct file *filp, char __user *udata, size_t count,
/**
* v9fs_cached_file_read - read from a file
* @filp: file pointer to read
- * @udata: user data buffer to read data into
+ * @data: user data buffer to read data into
* @count: size of buffer
* @offset: offset at which to read data
*
@@ -695,13 +692,13 @@ v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
{
if (filp->f_flags & O_DIRECT)
return v9fs_direct_read(filp, data, count, offset);
- return do_sync_read(filp, data, count, offset);
+ return new_sync_read(filp, data, count, offset);
}
/**
* v9fs_mmap_file_read - read from a file
* @filp: file pointer to read
- * @udata: user data buffer to read data into
+ * @data: user data buffer to read data into
* @count: size of buffer
* @offset: offset at which to read data
*
@@ -763,7 +760,7 @@ err_out:
buff_write:
mutex_unlock(&inode->i_mutex);
- return do_sync_write(filp, data, count, offsetp);
+ return new_sync_write(filp, data, count, offsetp);
}
/**
@@ -781,7 +778,7 @@ v9fs_cached_file_write(struct file *filp, const char __user * data,
if (filp->f_flags & O_DIRECT)
return v9fs_direct_write(filp, data, count, offset);
- return do_sync_write(filp, data, count, offset);
+ return new_sync_write(filp, data, count, offset);
}
@@ -850,8 +847,8 @@ const struct file_operations v9fs_cached_file_operations = {
.llseek = generic_file_llseek,
.read = v9fs_cached_file_read,
.write = v9fs_cached_file_write,
- .aio_read = generic_file_aio_read,
- .aio_write = generic_file_aio_write,
+ .read_iter = generic_file_read_iter,
+ .write_iter = generic_file_write_iter,
.open = v9fs_file_open,
.release = v9fs_dir_release,
.lock = v9fs_file_lock,
@@ -863,8 +860,8 @@ const struct file_operations v9fs_cached_file_operations_dotl = {
.llseek = generic_file_llseek,
.read = v9fs_cached_file_read,
.write = v9fs_cached_file_write,
- .aio_read = generic_file_aio_read,
- .aio_write = generic_file_aio_write,
+ .read_iter = generic_file_read_iter,
+ .write_iter = generic_file_write_iter,
.open = v9fs_file_open,
.release = v9fs_dir_release,
.lock = v9fs_file_lock_dotl,
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 53161ec058a..7fa4f7a7653 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -147,7 +147,7 @@ static umode_t p9mode2unixmode(struct v9fs_session_info *v9ses,
int major = -1, minor = -1;
strlcpy(ext, stat->extension, sizeof(ext));
- sscanf(ext, "%c %u %u", &type, &major, &minor);
+ sscanf(ext, "%c %i %i", &type, &major, &minor);
switch (type) {
case 'c':
res |= S_IFCHR;
@@ -580,7 +580,7 @@ static int v9fs_at_to_dotl_flags(int flags)
* v9fs_remove - helper function to remove files and directories
* @dir: directory inode that is being deleted
* @dentry: dentry that is being deleted
- * @rmdir: removing a directory
+ * @flags: removing a directory
*
*/
@@ -778,7 +778,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
* v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode
* @dir: inode that is being walked from
* @dentry: dentry that is being walked to?
- * @nameidata: path data
+ * @flags: lookup flags (unused)
*
*/
@@ -1324,7 +1324,7 @@ v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
* v9fs_vfs_mkspecial - create a special file
* @dir: inode to create special file in
* @dentry: dentry to create
- * @mode: mode to create special file
+ * @perm: mode to create special file
* @extension: 9p2000.u format extension string representing special file
*
*/
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 59dc8e87647..1fa85aae24d 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -226,7 +226,7 @@ int v9fs_open_to_dotl_flags(int flags)
* v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
* @dir: directory inode that is being created
* @dentry: dentry that is being deleted
- * @mode: create permissions
+ * @omode: create permissions
*
*/
@@ -375,7 +375,7 @@ err_clunk_old_fid:
* v9fs_vfs_mkdir_dotl - VFS mkdir hook to create a directory
* @dir: inode that is being unlinked
* @dentry: dentry that is being unlinked
- * @mode: mode for new directory
+ * @omode: mode for new directory
*
*/
@@ -607,7 +607,6 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
* v9fs_stat2inode_dotl - populate an inode structure with stat info
* @stat: stat structure
* @inode: inode to populate
- * @sb: superblock of filesystem
*
*/
@@ -808,7 +807,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
* v9fs_vfs_mknod_dotl - create a special file
* @dir: inode destination for new link
* @dentry: dentry for file
- * @mode: mode for creation
+ * @omode: mode for creation
* @rdev: device associated with special file
*
*/
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index 04133a1fd9c..f95e01e058e 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -156,7 +156,7 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
offset += write_count;
value_len -= write_count;
}
- retval = offset;
+ retval = 0;
err:
p9_client_clunk(fid);
return retval;
diff --git a/fs/Makefile b/fs/Makefile
index f9cb9876e46..4030cbfbc9a 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -14,14 +14,13 @@ obj-y := open.o read_write.o file_table.o super.o \
stack.o fs_struct.o statfs.o
ifeq ($(CONFIG_BLOCK),y)
-obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
+obj-y += buffer.o block_dev.o direct-io.o mpage.o
else
obj-y += no-block.o
endif
obj-$(CONFIG_PROC_FS) += proc_namespace.o
-obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
obj-y += notify/
obj-$(CONFIG_EPOLL) += eventpoll.o
obj-$(CONFIG_ANON_INODES) += anon_inodes.o
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index a36da5382b4..07c9edce5aa 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -23,12 +23,12 @@
const struct file_operations adfs_file_operations = {
.llseek = generic_file_llseek,
- .read = do_sync_read,
- .aio_read = generic_file_aio_read,
+ .read = new_sync_read,
+ .read_iter = generic_file_read_iter,
.mmap = generic_file_mmap,
.fsync = generic_file_fsync,
- .write = do_sync_write,
- .aio_write = generic_file_aio_write,
+ .write = new_sync_write,
+ .write_iter = generic_file_write_iter,
.splice_read = generic_file_splice_read,
};
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 25b23b1e7f2..9bca8815972 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -1,3 +1,9 @@
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
@@ -206,7 +212,7 @@ affs_set_blocksize(struct super_block *sb, int size)
static inline struct buffer_head *
affs_bread(struct super_block *sb, int block)
{
- pr_debug("affs_bread: %d\n", block);
+ pr_debug("%s: %d\n", __func__, block);
if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size)
return sb_bread(sb, block);
return NULL;
@@ -214,7 +220,7 @@ affs_bread(struct super_block *sb, int block)
static inline struct buffer_head *
affs_getblk(struct super_block *sb, int block)
{
- pr_debug("affs_getblk: %d\n", block);
+ pr_debug("%s: %d\n", __func__, block);
if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size)
return sb_getblk(sb, block);
return NULL;
@@ -223,7 +229,7 @@ static inline struct buffer_head *
affs_getzeroblk(struct super_block *sb, int block)
{
struct buffer_head *bh;
- pr_debug("affs_getzeroblk: %d\n", block);
+ pr_debug("%s: %d\n", __func__, block);
if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) {
bh = sb_getblk(sb, block);
lock_buffer(bh);
@@ -238,7 +244,7 @@ static inline struct buffer_head *
affs_getemptyblk(struct super_block *sb, int block)
{
struct buffer_head *bh;
- pr_debug("affs_getemptyblk: %d\n", block);
+ pr_debug("%s: %d\n", __func__, block);
if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) {
bh = sb_getblk(sb, block);
wait_on_buffer(bh);
@@ -251,7 +257,7 @@ static inline void
affs_brelse(struct buffer_head *bh)
{
if (bh)
- pr_debug("affs_brelse: %lld\n", (long long) bh->b_blocknr);
+ pr_debug("%s: %lld\n", __func__, (long long) bh->b_blocknr);
brelse(bh);
}
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 533a322c41c..406b29836b1 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -34,7 +34,7 @@ affs_insert_hash(struct inode *dir, struct buffer_head *bh)
ino = bh->b_blocknr;
offset = affs_hash_name(sb, AFFS_TAIL(sb, bh)->name + 1, AFFS_TAIL(sb, bh)->name[0]);
- pr_debug("AFFS: insert_hash(dir=%u, ino=%d)\n", (u32)dir->i_ino, ino);
+ pr_debug("%s(dir=%u, ino=%d)\n", __func__, (u32)dir->i_ino, ino);
dir_bh = affs_bread(sb, dir->i_ino);
if (!dir_bh)
@@ -84,7 +84,8 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh)
sb = dir->i_sb;
rem_ino = rem_bh->b_blocknr;
offset = affs_hash_name(sb, AFFS_TAIL(sb, rem_bh)->name+1, AFFS_TAIL(sb, rem_bh)->name[0]);
- pr_debug("AFFS: remove_hash(dir=%d, ino=%d, hashval=%d)\n", (u32)dir->i_ino, rem_ino, offset);
+ pr_debug("%s(dir=%d, ino=%d, hashval=%d)\n",
+ __func__, (u32)dir->i_ino, rem_ino, offset);
bh = affs_bread(sb, dir->i_ino);
if (!bh)
@@ -147,7 +148,7 @@ affs_remove_link(struct dentry *dentry)
u32 link_ino, ino;
int retval;
- pr_debug("AFFS: remove_link(key=%ld)\n", inode->i_ino);
+ pr_debug("%s(key=%ld)\n", __func__, inode->i_ino);
retval = -EIO;
bh = affs_bread(sb, inode->i_ino);
if (!bh)
@@ -279,7 +280,7 @@ affs_remove_header(struct dentry *dentry)
if (!inode)
goto done;
- pr_debug("AFFS: remove_header(key=%ld)\n", inode->i_ino);
+ pr_debug("%s(key=%ld)\n", __func__, inode->i_ino);
retval = -EIO;
bh = affs_bread(sb, (u32)(long)dentry->d_fsdata);
if (!bh)
@@ -451,10 +452,10 @@ affs_error(struct super_block *sb, const char *function, const char *fmt, ...)
vsnprintf(ErrorBuffer,sizeof(ErrorBuffer),fmt,args);
va_end(args);
- printk(KERN_CRIT "AFFS error (device %s): %s(): %s\n", sb->s_id,
+ pr_crit("error (device %s): %s(): %s\n", sb->s_id,
function,ErrorBuffer);
if (!(sb->s_flags & MS_RDONLY))
- printk(KERN_WARNING "AFFS: Remounting filesystem read-only\n");
+ pr_warn("Remounting filesystem read-only\n");
sb->s_flags |= MS_RDONLY;
}
@@ -467,7 +468,7 @@ affs_warning(struct super_block *sb, const char *function, const char *fmt, ...)
vsnprintf(ErrorBuffer,sizeof(ErrorBuffer),fmt,args);
va_end(args);
- printk(KERN_WARNING "AFFS warning (device %s): %s(): %s\n", sb->s_id,
+ pr_warn("(device %s): %s(): %s\n", sb->s_id,
function,ErrorBuffer);
}
diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c
index a32246b8359..c8de51185c2 100644
--- a/fs/affs/bitmap.c
+++ b/fs/affs/bitmap.c
@@ -17,7 +17,7 @@ affs_count_free_blocks(struct super_block *sb)
u32 free;
int i;
- pr_debug("AFFS: count_free_blocks()\n");
+ pr_debug("%s()\n", __func__);
if (sb->s_flags & MS_RDONLY)
return 0;
@@ -43,7 +43,7 @@ affs_free_block(struct super_block *sb, u32 block)
u32 blk, bmap, bit, mask, tmp;
__be32 *data;
- pr_debug("AFFS: free_block(%u)\n", block);
+ pr_debug("%s(%u)\n", __func__, block);
if (block > sbi->s_partition_size)
goto err_range;
@@ -125,7 +125,7 @@ affs_alloc_block(struct inode *inode, u32 goal)
sb = inode->i_sb;
sbi = AFFS_SB(sb);
- pr_debug("AFFS: balloc(inode=%lu,goal=%u): ", inode->i_ino, goal);
+ pr_debug("balloc(inode=%lu,goal=%u): ", inode->i_ino, goal);
if (AFFS_I(inode)->i_pa_cnt) {
pr_debug("%d\n", AFFS_I(inode)->i_lastalloc+1);
@@ -254,8 +254,7 @@ int affs_init_bitmap(struct super_block *sb, int *flags)
return 0;
if (!AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag) {
- printk(KERN_NOTICE "AFFS: Bitmap invalid - mounting %s read only\n",
- sb->s_id);
+ pr_notice("Bitmap invalid - mounting %s read only\n", sb->s_id);
*flags |= MS_RDONLY;
return 0;
}
@@ -268,7 +267,7 @@ int affs_init_bitmap(struct super_block *sb, int *flags)
size = sbi->s_bmap_count * sizeof(*bm);
bm = sbi->s_bitmap = kzalloc(size, GFP_KERNEL);
if (!sbi->s_bitmap) {
- printk(KERN_ERR "AFFS: Bitmap allocation failed\n");
+ pr_err("Bitmap allocation failed\n");
return -ENOMEM;
}
@@ -282,17 +281,17 @@ int affs_init_bitmap(struct super_block *sb, int *flags)
bm->bm_key = be32_to_cpu(bmap_blk[blk]);
bh = affs_bread(sb, bm->bm_key);
if (!bh) {
- printk(KERN_ERR "AFFS: Cannot read bitmap\n");
+ pr_err("Cannot read bitmap\n");
res = -EIO;
goto out;
}
if (affs_checksum_block(sb, bh)) {
- printk(KERN_WARNING "AFFS: Bitmap %u invalid - mounting %s read only.\n",
- bm->bm_key, sb->s_id);
+ pr_warn("Bitmap %u invalid - mounting %s read only.\n",
+ bm->bm_key, sb->s_id);
*flags |= MS_RDONLY;
goto out;
}
- pr_debug("AFFS: read bitmap block %d: %d\n", blk, bm->bm_key);
+ pr_debug("read bitmap block %d: %d\n", blk, bm->bm_key);
bm->bm_free = memweight(bh->b_data + 4, sb->s_blocksize - 4);
/* Don't try read the extension if this is the last block,
@@ -304,7 +303,7 @@ int affs_init_bitmap(struct super_block *sb, int *flags)
affs_brelse(bmap_bh);
bmap_bh = affs_bread(sb, be32_to_cpu(bmap_blk[blk]));
if (!bmap_bh) {
- printk(KERN_ERR "AFFS: Cannot read bitmap extension\n");
+ pr_err("Cannot read bitmap extension\n");
res = -EIO;
goto out;
}
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index cbbda476a80..59f07bec92a 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -54,8 +54,8 @@ affs_readdir(struct file *file, struct dir_context *ctx)
u32 ino;
int error = 0;
- pr_debug("AFFS: readdir(ino=%lu,f_pos=%lx)\n",
- inode->i_ino, (unsigned long)ctx->pos);
+ pr_debug("%s(ino=%lu,f_pos=%lx)\n",
+ __func__, inode->i_ino, (unsigned long)ctx->pos);
if (ctx->pos < 2) {
file->private_data = (void *)0;
@@ -81,7 +81,7 @@ affs_readdir(struct file *file, struct dir_context *ctx)
*/
ino = (u32)(long)file->private_data;
if (ino && file->f_version == inode->i_version) {
- pr_debug("AFFS: readdir() left off=%d\n", ino);
+ pr_debug("readdir() left off=%d\n", ino);
goto inside;
}
@@ -117,7 +117,7 @@ inside:
namelen = min(AFFS_TAIL(sb, fh_bh)->name[0], (u8)30);
name = AFFS_TAIL(sb, fh_bh)->name + 1;
- pr_debug("AFFS: readdir(): dir_emit(\"%.*s\", "
+ pr_debug("readdir(): dir_emit(\"%.*s\", "
"ino=%u), hash=%d, f_pos=%x\n",
namelen, name, ino, hash_pos, (u32)ctx->pos);
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 8669b6ecdde..a7fe57d2cd9 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -27,10 +27,10 @@ static int affs_file_release(struct inode *inode, struct file *filp);
const struct file_operations affs_file_operations = {
.llseek = generic_file_llseek,
- .read = do_sync_read,
- .aio_read = generic_file_aio_read,
- .write = do_sync_write,
- .aio_write = generic_file_aio_write,
+ .read = new_sync_read,
+ .read_iter = generic_file_read_iter,
+ .write = new_sync_write,
+ .write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.open = affs_file_open,
.release = affs_file_release,
@@ -45,7 +45,7 @@ const struct inode_operations affs_file_inode_operations = {
static int
affs_file_open(struct inode *inode, struct file *filp)
{
- pr_debug("AFFS: open(%lu,%d)\n",
+ pr_debug("open(%lu,%d)\n",
inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
atomic_inc(&AFFS_I(inode)->i_opencnt);
return 0;
@@ -54,7 +54,7 @@ affs_file_open(struct inode *inode, struct file *filp)
static int
affs_file_release(struct inode *inode, struct file *filp)
{
- pr_debug("AFFS: release(%lu, %d)\n",
+ pr_debug("release(%lu, %d)\n",
inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
if (atomic_dec_and_test(&AFFS_I(inode)->i_opencnt)) {
@@ -324,7 +324,8 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul
struct buffer_head *ext_bh;
u32 ext;
- pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block);
+ pr_debug("%s(%u, %lu)\n",
+ __func__, (u32)inode->i_ino, (unsigned long)block);
BUG_ON(block > (sector_t)0x7fffffffUL);
@@ -498,34 +499,36 @@ affs_getemptyblk_ino(struct inode *inode, int block)
}
static int
-affs_do_readpage_ofs(struct file *file, struct page *page, unsigned from, unsigned to)
+affs_do_readpage_ofs(struct page *page, unsigned to)
{
struct inode *inode = page->mapping->host;
struct super_block *sb = inode->i_sb;
struct buffer_head *bh;
char *data;
+ unsigned pos = 0;
u32 bidx, boff, bsize;
u32 tmp;
- pr_debug("AFFS: read_page(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to);
- BUG_ON(from > to || to > PAGE_CACHE_SIZE);
+ pr_debug("%s(%u, %ld, 0, %d)\n", __func__, (u32)inode->i_ino,
+ page->index, to);
+ BUG_ON(to > PAGE_CACHE_SIZE);
kmap(page);
data = page_address(page);
bsize = AFFS_SB(sb)->s_data_blksize;
- tmp = (page->index << PAGE_CACHE_SHIFT) + from;
+ tmp = page->index << PAGE_CACHE_SHIFT;
bidx = tmp / bsize;
boff = tmp % bsize;
- while (from < to) {
+ while (pos < to) {
bh = affs_bread_ino(inode, bidx, 0);
if (IS_ERR(bh))
return PTR_ERR(bh);
- tmp = min(bsize - boff, to - from);
- BUG_ON(from + tmp > to || tmp > bsize);
- memcpy(data + from, AFFS_DATA(bh) + boff, tmp);
+ tmp = min(bsize - boff, to - pos);
+ BUG_ON(pos + tmp > to || tmp > bsize);
+ memcpy(data + pos, AFFS_DATA(bh) + boff, tmp);
affs_brelse(bh);
bidx++;
- from += tmp;
+ pos += tmp;
boff = 0;
}
flush_dcache_page(page);
@@ -542,7 +545,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
u32 size, bsize;
u32 tmp;
- pr_debug("AFFS: extent_file(%u, %d)\n", (u32)inode->i_ino, newsize);
+ pr_debug("%s(%u, %d)\n", __func__, (u32)inode->i_ino, newsize);
bsize = AFFS_SB(sb)->s_data_blksize;
bh = NULL;
size = AFFS_I(inode)->mmu_private;
@@ -608,14 +611,14 @@ affs_readpage_ofs(struct file *file, struct page *page)
u32 to;
int err;
- pr_debug("AFFS: read_page(%u, %ld)\n", (u32)inode->i_ino, page->index);
+ pr_debug("%s(%u, %ld)\n", __func__, (u32)inode->i_ino, page->index);
to = PAGE_CACHE_SIZE;
if (((page->index + 1) << PAGE_CACHE_SHIFT) > inode->i_size) {
to = inode->i_size & ~PAGE_CACHE_MASK;
memset(page_address(page) + to, 0, PAGE_CACHE_SIZE - to);
}
- err = affs_do_readpage_ofs(file, page, 0, to);
+ err = affs_do_readpage_ofs(page, to);
if (!err)
SetPageUptodate(page);
unlock_page(page);
@@ -631,7 +634,8 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping
pgoff_t index;
int err = 0;
- pr_debug("AFFS: write_begin(%u, %llu, %llu)\n", (u32)inode->i_ino, (unsigned long long)pos, (unsigned long long)pos + len);
+ pr_debug("%s(%u, %llu, %llu)\n", __func__, (u32)inode->i_ino,
+ (unsigned long long)pos, (unsigned long long)pos + len);
if (pos > AFFS_I(inode)->mmu_private) {
/* XXX: this probably leaves a too-big i_size in case of
* failure. Should really be updating i_size at write_end time
@@ -651,7 +655,7 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping
return 0;
/* XXX: inefficient but safe in the face of short writes */
- err = affs_do_readpage_ofs(file, page, 0, PAGE_CACHE_SIZE);
+ err = affs_do_readpage_ofs(page, PAGE_CACHE_SIZE);
if (err) {
unlock_page(page);
page_cache_release(page);
@@ -680,7 +684,9 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
* due to write_begin.
*/
- pr_debug("AFFS: write_begin(%u, %llu, %llu)\n", (u32)inode->i_ino, (unsigned long long)pos, (unsigned long long)pos + len);
+ pr_debug("%s(%u, %llu, %llu)\n",
+ __func__, (u32)inode->i_ino, (unsigned long long)pos,
+ (unsigned long long)pos + len);
bsize = AFFS_SB(sb)->s_data_blksize;
data = page_address(page);
@@ -802,7 +808,7 @@ affs_free_prealloc(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
- pr_debug("AFFS: free_prealloc(ino=%lu)\n", inode->i_ino);
+ pr_debug("free_prealloc(ino=%lu)\n", inode->i_ino);
while (AFFS_I(inode)->i_pa_cnt) {
AFFS_I(inode)->i_pa_cnt--;
@@ -822,7 +828,7 @@ affs_truncate(struct inode *inode)
struct buffer_head *ext_bh;
int i;
- pr_debug("AFFS: truncate(inode=%d, oldsize=%u, newsize=%u)\n",
+ pr_debug("truncate(inode=%d, oldsize=%u, newsize=%u)\n",
(u32)inode->i_ino, (u32)AFFS_I(inode)->mmu_private, (u32)inode->i_size);
last_blk = 0;
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 96df91e8c33..bec2d1a0c91 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -34,7 +34,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
if (!(inode->i_state & I_NEW))
return inode;
- pr_debug("AFFS: affs_iget(%lu)\n", inode->i_ino);
+ pr_debug("affs_iget(%lu)\n", inode->i_ino);
block = inode->i_ino;
bh = affs_bread(sb, block);
@@ -175,7 +175,7 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
uid_t uid;
gid_t gid;
- pr_debug("AFFS: write_inode(%lu)\n",inode->i_ino);
+ pr_debug("write_inode(%lu)\n", inode->i_ino);
if (!inode->i_nlink)
// possibly free block
@@ -220,7 +220,7 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr)
struct inode *inode = dentry->d_inode;
int error;
- pr_debug("AFFS: notify_change(%lu,0x%x)\n",inode->i_ino,attr->ia_valid);
+ pr_debug("notify_change(%lu,0x%x)\n", inode->i_ino, attr->ia_valid);
error = inode_change_ok(inode,attr);
if (error)
@@ -258,7 +258,8 @@ void
affs_evict_inode(struct inode *inode)
{
unsigned long cache_page;
- pr_debug("AFFS: evict_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
+ pr_debug("evict_inode(ino=%lu, nlink=%u)\n",
+ inode->i_ino, inode->i_nlink);
truncate_inode_pages_final(&inode->i_data);
if (!inode->i_nlink) {
@@ -271,7 +272,7 @@ affs_evict_inode(struct inode *inode)
affs_free_prealloc(inode);
cache_page = (unsigned long)AFFS_I(inode)->i_lc;
if (cache_page) {
- pr_debug("AFFS: freeing ext cache\n");
+ pr_debug("freeing ext cache\n");
AFFS_I(inode)->i_lc = NULL;
AFFS_I(inode)->i_ac = NULL;
free_page(cache_page);
@@ -350,7 +351,8 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
u32 block = 0;
int retval;
- pr_debug("AFFS: add_entry(dir=%u, inode=%u, \"%*s\", type=%d)\n", (u32)dir->i_ino,
+ pr_debug("%s(dir=%u, inode=%u, \"%*s\", type=%d)\n",
+ __func__, (u32)dir->i_ino,
(u32)inode->i_ino, (int)dentry->d_name.len, dentry->d_name.name, type);
retval = -EIO;
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 6dae1ccd176..035bd31556f 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -190,7 +190,8 @@ affs_find_entry(struct inode *dir, struct dentry *dentry)
toupper_t toupper = affs_get_toupper(sb);
u32 key;
- pr_debug("AFFS: find_entry(\"%.*s\")\n", (int)dentry->d_name.len, dentry->d_name.name);
+ pr_debug("%s(\"%.*s\")\n",
+ __func__, (int)dentry->d_name.len, dentry->d_name.name);
bh = affs_bread(sb, dir->i_ino);
if (!bh)
@@ -218,7 +219,8 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
struct buffer_head *bh;
struct inode *inode = NULL;
- pr_debug("AFFS: lookup(\"%.*s\")\n",(int)dentry->d_name.len,dentry->d_name.name);
+ pr_debug("%s(\"%.*s\")\n",
+ __func__, (int)dentry->d_name.len, dentry->d_name.name);
affs_lock_dir(dir);
bh = affs_find_entry(dir, dentry);
@@ -248,9 +250,9 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
int
affs_unlink(struct inode *dir, struct dentry *dentry)
{
- pr_debug("AFFS: unlink(dir=%d, %lu \"%.*s\")\n", (u32)dir->i_ino,
- dentry->d_inode->i_ino,
- (int)dentry->d_name.len, dentry->d_name.name);
+ pr_debug("%s(dir=%d, %lu \"%.*s\")\n",
+ __func__, (u32)dir->i_ino, dentry->d_inode->i_ino,
+ (int)dentry->d_name.len, dentry->d_name.name);
return affs_remove_header(dentry);
}
@@ -262,7 +264,8 @@ affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
struct inode *inode;
int error;
- pr_debug("AFFS: create(%lu,\"%.*s\",0%ho)\n",dir->i_ino,(int)dentry->d_name.len,
+ pr_debug("%s(%lu,\"%.*s\",0%ho)\n",
+ __func__, dir->i_ino, (int)dentry->d_name.len,
dentry->d_name.name,mode);
inode = affs_new_inode(dir);
@@ -291,8 +294,9 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct inode *inode;
int error;
- pr_debug("AFFS: mkdir(%lu,\"%.*s\",0%ho)\n",dir->i_ino,
- (int)dentry->d_name.len,dentry->d_name.name,mode);
+ pr_debug("%s(%lu,\"%.*s\",0%ho)\n",
+ __func__, dir->i_ino, (int)dentry->d_name.len,
+ dentry->d_name.name, mode);
inode = affs_new_inode(dir);
if (!inode)
@@ -317,8 +321,8 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
int
affs_rmdir(struct inode *dir, struct dentry *dentry)
{
- pr_debug("AFFS: rmdir(dir=%u, %lu \"%.*s\")\n", (u32)dir->i_ino,
- dentry->d_inode->i_ino,
+ pr_debug("%s(dir=%u, %lu \"%.*s\")\n",
+ __func__, (u32)dir->i_ino, dentry->d_inode->i_ino,
(int)dentry->d_name.len, dentry->d_name.name);
return affs_remove_header(dentry);
@@ -334,8 +338,9 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
int i, maxlen, error;
char c, lc;
- pr_debug("AFFS: symlink(%lu,\"%.*s\" -> \"%s\")\n",dir->i_ino,
- (int)dentry->d_name.len,dentry->d_name.name,symname);
+ pr_debug("%s(%lu,\"%.*s\" -> \"%s\")\n",
+ __func__, dir->i_ino, (int)dentry->d_name.len,
+ dentry->d_name.name, symname);
maxlen = AFFS_SB(sb)->s_hashsize * sizeof(u32) - 1;
inode = affs_new_inode(dir);
@@ -404,7 +409,8 @@ affs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
{
struct inode *inode = old_dentry->d_inode;
- pr_debug("AFFS: link(%u, %u, \"%.*s\")\n", (u32)inode->i_ino, (u32)dir->i_ino,
+ pr_debug("%s(%u, %u, \"%.*s\")\n",
+ __func__, (u32)inode->i_ino, (u32)dir->i_ino,
(int)dentry->d_name.len,dentry->d_name.name);
return affs_add_entry(dir, inode, dentry, ST_LINKFILE);
@@ -418,9 +424,10 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct buffer_head *bh = NULL;
int retval;
- pr_debug("AFFS: rename(old=%u,\"%*s\" to new=%u,\"%*s\")\n",
- (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name,
- (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name);
+ pr_debug("%s(old=%u,\"%*s\" to new=%u,\"%*s\")\n",
+ __func__, (u32)old_dir->i_ino, (int)old_dentry->d_name.len,
+ old_dentry->d_name.name, (u32)new_dir->i_ino,
+ (int)new_dentry->d_name.len, new_dentry->d_name.name);
retval = affs_check_name(new_dentry->d_name.name,
new_dentry->d_name.len,
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 6d589f28bf9..51f1a95bff7 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -46,7 +46,7 @@ static void
affs_put_super(struct super_block *sb)
{
struct affs_sb_info *sbi = AFFS_SB(sb);
- pr_debug("AFFS: put_super()\n");
+ pr_debug("%s()\n", __func__);
cancel_delayed_work_sync(&sbi->sb_work);
}
@@ -220,7 +220,7 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
return 0;
if (n != 512 && n != 1024 && n != 2048
&& n != 4096) {
- printk ("AFFS: Invalid blocksize (512, 1024, 2048, 4096 allowed)\n");
+ pr_warn("Invalid blocksize (512, 1024, 2048, 4096 allowed)\n");
return 0;
}
*blocksize = n;
@@ -285,8 +285,8 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
/* Silently ignore the quota options */
break;
default:
- printk("AFFS: Unrecognized mount option \"%s\" "
- "or missing value\n", p);
+ pr_warn("Unrecognized mount option \"%s\" or missing value\n",
+ p);
return 0;
}
}
@@ -319,7 +319,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
save_mount_options(sb, data);
- pr_debug("AFFS: read_super(%s)\n",data ? (const char *)data : "no options");
+ pr_debug("read_super(%s)\n", data ? (const char *)data : "no options");
sb->s_magic = AFFS_SUPER_MAGIC;
sb->s_op = &affs_sops;
@@ -339,9 +339,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block,
&blocksize,&sbi->s_prefix,
sbi->s_volume, &mount_flags)) {
- printk(KERN_ERR "AFFS: Error parsing options\n");
- kfree(sbi->s_prefix);
- kfree(sbi);
+ pr_err("Error parsing options\n");
return -EINVAL;
}
/* N.B. after this point s_prefix must be released */
@@ -358,7 +356,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
*/
size = sb->s_bdev->bd_inode->i_size >> 9;
- pr_debug("AFFS: initial blocksize=%d, #blocks=%d\n", 512, size);
+ pr_debug("initial blocksize=%d, #blocks=%d\n", 512, size);
affs_set_blocksize(sb, PAGE_SIZE);
/* Try to find root block. Its location depends on the block size. */
@@ -373,7 +371,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_root_block = root_block;
if (root_block < 0)
sbi->s_root_block = (reserved + size - 1) / 2;
- pr_debug("AFFS: setting blocksize to %d\n", blocksize);
+ pr_debug("setting blocksize to %d\n", blocksize);
affs_set_blocksize(sb, blocksize);
sbi->s_partition_size = size;
@@ -388,7 +386,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
* block behind the calculated one. So we check this one, too.
*/
for (num_bm = 0; num_bm < 2; num_bm++) {
- pr_debug("AFFS: Dev %s, trying root=%u, bs=%d, "
+ pr_debug("Dev %s, trying root=%u, bs=%d, "
"size=%d, reserved=%d\n",
sb->s_id,
sbi->s_root_block + num_bm,
@@ -409,8 +407,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
}
}
if (!silent)
- printk(KERN_ERR "AFFS: No valid root block on device %s\n",
- sb->s_id);
+ pr_err("No valid root block on device %s\n", sb->s_id);
return -EINVAL;
/* N.B. after this point bh must be released */
@@ -422,7 +419,7 @@ got_root:
/* Find out which kind of FS we have */
boot_bh = sb_bread(sb, 0);
if (!boot_bh) {
- printk(KERN_ERR "AFFS: Cannot read boot block\n");
+ pr_err("Cannot read boot block\n");
return -EINVAL;
}
memcpy(sig, boot_bh->b_data, 4);
@@ -435,8 +432,7 @@ got_root:
*/
if ((chksum == FS_DCFFS || chksum == MUFS_DCFFS || chksum == FS_DCOFS
|| chksum == MUFS_DCOFS) && !(sb->s_flags & MS_RDONLY)) {
- printk(KERN_NOTICE "AFFS: Dircache FS - mounting %s read only\n",
- sb->s_id);
+ pr_notice("Dircache FS - mounting %s read only\n", sb->s_id);
sb->s_flags |= MS_RDONLY;
}
switch (chksum) {
@@ -470,14 +466,14 @@ got_root:
sb->s_flags |= MS_NOEXEC;
break;
default:
- printk(KERN_ERR "AFFS: Unknown filesystem on device %s: %08X\n",
- sb->s_id, chksum);
+ pr_err("Unknown filesystem on device %s: %08X\n",
+ sb->s_id, chksum);
return -EINVAL;
}
if (mount_flags & SF_VERBOSE) {
u8 len = AFFS_ROOT_TAIL(sb, root_bh)->disk_name[0];
- printk(KERN_NOTICE "AFFS: Mounting volume \"%.*s\": Type=%.3s\\%c, Blocksize=%d\n",
+ pr_notice("Mounting volume \"%.*s\": Type=%.3s\\%c, Blocksize=%d\n",
len > 31 ? 31 : len,
AFFS_ROOT_TAIL(sb, root_bh)->disk_name + 1,
sig, sig[3] + '0', blocksize);
@@ -508,11 +504,11 @@ got_root:
sb->s_root = d_make_root(root_inode);
if (!sb->s_root) {
- printk(KERN_ERR "AFFS: Get root inode failed\n");
+ pr_err("AFFS: Get root inode failed\n");
return -ENOMEM;
}
- pr_debug("AFFS: s_flags=%lX\n",sb->s_flags);
+ pr_debug("s_flags=%lX\n", sb->s_flags);
return 0;
}
@@ -532,7 +528,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
char volume[32];
char *prefix = NULL;
- pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data);
+ pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data);
sync_filesystem(sb);
*flags |= MS_NODIRATIME;
@@ -580,8 +576,9 @@ affs_statfs(struct dentry *dentry, struct kstatfs *buf)
int free;
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
- pr_debug("AFFS: statfs() partsize=%d, reserved=%d\n",AFFS_SB(sb)->s_partition_size,
- AFFS_SB(sb)->s_reserved);
+ pr_debug("%s() partsize=%d, reserved=%d\n",
+ __func__, AFFS_SB(sb)->s_partition_size,
+ AFFS_SB(sb)->s_reserved);
free = affs_count_free_blocks(sb);
buf->f_type = AFFS_SUPER_MAGIC;
diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c
index ee00f08c4f5..f39b71c3981 100644
--- a/fs/affs/symlink.c
+++ b/fs/affs/symlink.c
@@ -21,7 +21,7 @@ static int affs_symlink_readpage(struct file *file, struct page *page)
char c;
char lc;
- pr_debug("AFFS: follow_link(ino=%lu)\n",inode->i_ino);
+ pr_debug("follow_link(ino=%lu)\n", inode->i_ino);
err = -EIO;
bh = affs_bread(inode->i_sb, inode->i_ino);
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 1c8c6cc6de3..4b0eff6da67 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -130,6 +130,15 @@ static void afs_cm_destructor(struct afs_call *call)
{
_enter("");
+ /* Break the callbacks here so that we do it after the final ACK is
+ * received. The step number here must match the final number in
+ * afs_deliver_cb_callback().
+ */
+ if (call->unmarshall == 6) {
+ ASSERT(call->server && call->count && call->request);
+ afs_break_callbacks(call->server, call->count, call->request);
+ }
+
afs_put_server(call->server);
call->server = NULL;
kfree(call->buffer);
@@ -272,6 +281,16 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
_debug("trailer");
if (skb->len != 0)
return -EBADMSG;
+
+ /* Record that the message was unmarshalled successfully so
+ * that the call destructor can know do the callback breaking
+ * work, even if the final ACK isn't received.
+ *
+ * If the step number changes, then afs_cm_destructor() must be
+ * updated also.
+ */
+ call->unmarshall++;
+ case 6:
break;
}
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 66d50fe2ee4..932ce07948b 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -31,10 +31,10 @@ const struct file_operations afs_file_operations = {
.open = afs_open,
.release = afs_release,
.llseek = generic_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = generic_file_aio_read,
- .aio_write = afs_file_write,
+ .read = new_sync_read,
+ .write = new_sync_write,
+ .read_iter = generic_file_read_iter,
+ .write_iter = afs_file_write,
.mmap = generic_file_readonly_mmap,
.splice_read = generic_file_splice_read,
.fsync = afs_fsync,
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index a8cf2cff836..4baf1d2b39e 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -555,10 +555,6 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl)
return -ENOLCK;
/* we're simulating flock() locks using posix locks on the server */
- fl->fl_owner = (fl_owner_t) file;
- fl->fl_start = 0;
- fl->fl_end = OFFSET_MAX;
-
if (fl->fl_type == F_UNLCK)
return afs_do_unlk(file, fl);
return afs_do_setlk(file, fl);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index be75b500005..71d5982312f 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -75,7 +75,7 @@ struct afs_call {
const struct afs_call_type *type; /* type of call */
const struct afs_wait_mode *wait_mode; /* completion wait mode */
wait_queue_head_t waitq; /* processes awaiting completion */
- work_func_t async_workfn;
+ void (*async_workfn)(struct afs_call *call); /* asynchronous work function */
struct work_struct async_work; /* asynchronous work processor */
struct work_struct work; /* actual work processor */
struct sk_buff_head rx_queue; /* received packets */
@@ -747,8 +747,7 @@ extern int afs_write_end(struct file *file, struct address_space *mapping,
extern int afs_writepage(struct page *, struct writeback_control *);
extern int afs_writepages(struct address_space *, struct writeback_control *);
extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
-extern ssize_t afs_file_write(struct kiocb *, const struct iovec *,
- unsigned long, loff_t);
+extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
extern int afs_writeback_all(struct afs_vnode *);
extern int afs_fsync(struct file *, loff_t, loff_t, int);
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 42dd2e499ed..35de0c04729 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -55,13 +55,13 @@ static int __init afs_get_client_UUID(void)
afs_uuid.time_low = uuidtime;
afs_uuid.time_mid = uuidtime >> 32;
afs_uuid.time_hi_and_version = (uuidtime >> 48) & AFS_UUID_TIMEHI_MASK;
- afs_uuid.time_hi_and_version = AFS_UUID_VERSION_TIME;
+ afs_uuid.time_hi_and_version |= AFS_UUID_VERSION_TIME;
get_random_bytes(&clockseq, 2);
afs_uuid.clock_seq_low = clockseq;
afs_uuid.clock_seq_hi_and_reserved =
(clockseq >> 8) & AFS_UUID_CLOCKHI_MASK;
- afs_uuid.clock_seq_hi_and_reserved = AFS_UUID_VARIANT_STD;
+ afs_uuid.clock_seq_hi_and_reserved |= AFS_UUID_VARIANT_STD;
_debug("AFS UUID: %08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
afs_uuid.time_low,
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index ef943df73b8..03a3beb1700 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -25,7 +25,7 @@ static void afs_wake_up_call_waiter(struct afs_call *);
static int afs_wait_for_call_to_complete(struct afs_call *);
static void afs_wake_up_async_call(struct afs_call *);
static int afs_dont_wait_for_call_to_complete(struct afs_call *);
-static void afs_process_async_call(struct work_struct *);
+static void afs_process_async_call(struct afs_call *);
static void afs_rx_interceptor(struct sock *, unsigned long, struct sk_buff *);
static int afs_deliver_cm_op_id(struct afs_call *, struct sk_buff *, bool);
@@ -58,6 +58,13 @@ static void afs_collect_incoming_call(struct work_struct *);
static struct sk_buff_head afs_incoming_calls;
static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call);
+static void afs_async_workfn(struct work_struct *work)
+{
+ struct afs_call *call = container_of(work, struct afs_call, async_work);
+
+ call->async_workfn(call);
+}
+
/*
* open an RxRPC socket and bind it to be a server for callback notifications
* - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT
@@ -184,6 +191,28 @@ static void afs_free_call(struct afs_call *call)
}
/*
+ * End a call but do not free it
+ */
+static void afs_end_call_nofree(struct afs_call *call)
+{
+ if (call->rxcall) {
+ rxrpc_kernel_end_call(call->rxcall);
+ call->rxcall = NULL;
+ }
+ if (call->type->destructor)
+ call->type->destructor(call);
+}
+
+/*
+ * End a call and free it
+ */
+static void afs_end_call(struct afs_call *call)
+{
+ afs_end_call_nofree(call);
+ afs_free_call(call);
+}
+
+/*
* allocate a call with flat request and reply buffers
*/
struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
@@ -326,7 +355,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
atomic_read(&afs_outstanding_calls));
call->wait_mode = wait_mode;
- INIT_WORK(&call->async_work, afs_process_async_call);
+ call->async_workfn = afs_process_async_call;
+ INIT_WORK(&call->async_work, afs_async_workfn);
memset(&srx, 0, sizeof(srx));
srx.srx_family = AF_RXRPC;
@@ -383,11 +413,8 @@ error_do_abort:
rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
while ((skb = skb_dequeue(&call->rx_queue)))
afs_free_skb(skb);
- rxrpc_kernel_end_call(rxcall);
- call->rxcall = NULL;
error_kill_call:
- call->type->destructor(call);
- afs_free_call(call);
+ afs_end_call(call);
_leave(" = %d", ret);
return ret;
}
@@ -509,12 +536,8 @@ static void afs_deliver_to_call(struct afs_call *call)
if (call->state >= AFS_CALL_COMPLETE) {
while ((skb = skb_dequeue(&call->rx_queue)))
afs_free_skb(skb);
- if (call->incoming) {
- rxrpc_kernel_end_call(call->rxcall);
- call->rxcall = NULL;
- call->type->destructor(call);
- afs_free_call(call);
- }
+ if (call->incoming)
+ afs_end_call(call);
}
_leave("");
@@ -564,10 +587,7 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
}
_debug("call complete");
- rxrpc_kernel_end_call(call->rxcall);
- call->rxcall = NULL;
- call->type->destructor(call);
- afs_free_call(call);
+ afs_end_call(call);
_leave(" = %d", ret);
return ret;
}
@@ -603,11 +623,8 @@ static int afs_dont_wait_for_call_to_complete(struct afs_call *call)
/*
* delete an asynchronous call
*/
-static void afs_delete_async_call(struct work_struct *work)
+static void afs_delete_async_call(struct afs_call *call)
{
- struct afs_call *call =
- container_of(work, struct afs_call, async_work);
-
_enter("");
afs_free_call(call);
@@ -620,11 +637,8 @@ static void afs_delete_async_call(struct work_struct *work)
* - on a multiple-thread workqueue this work item may try to run on several
* CPUs at the same time
*/
-static void afs_process_async_call(struct work_struct *work)
+static void afs_process_async_call(struct afs_call *call)
{
- struct afs_call *call =
- container_of(work, struct afs_call, async_work);
-
_enter("");
if (!skb_queue_empty(&call->rx_queue))
@@ -637,10 +651,7 @@ static void afs_process_async_call(struct work_struct *work)
call->reply = NULL;
/* kill the call */
- rxrpc_kernel_end_call(call->rxcall);
- call->rxcall = NULL;
- if (call->type->destructor)
- call->type->destructor(call);
+ afs_end_call_nofree(call);
/* we can't just delete the call because the work item may be
* queued */
@@ -663,13 +674,6 @@ void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb)
call->reply_size += len;
}
-static void afs_async_workfn(struct work_struct *work)
-{
- struct afs_call *call = container_of(work, struct afs_call, async_work);
-
- call->async_workfn(work);
-}
-
/*
* accept the backlog of incoming calls
*/
@@ -790,10 +794,7 @@ void afs_send_empty_reply(struct afs_call *call)
_debug("oom");
rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
default:
- rxrpc_kernel_end_call(call->rxcall);
- call->rxcall = NULL;
- call->type->destructor(call);
- afs_free_call(call);
+ afs_end_call(call);
_leave(" [error]");
return;
}
@@ -823,17 +824,16 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
call->state = AFS_CALL_AWAIT_ACK;
n = rxrpc_kernel_send_data(call->rxcall, &msg, len);
if (n >= 0) {
+ /* Success */
_leave(" [replied]");
return;
}
+
if (n == -ENOMEM) {
_debug("oom");
rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
}
- rxrpc_kernel_end_call(call->rxcall);
- call->rxcall = NULL;
- call->type->destructor(call);
- afs_free_call(call);
+ afs_end_call(call);
_leave(" [error]");
}
diff --git a/fs/afs/write.c b/fs/afs/write.c
index a890db4b989..ab6adfd5251 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -625,15 +625,14 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
/*
* write to an AFS file
*/
-ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
ssize_t result;
- size_t count = iov_length(iov, nr_segs);
+ size_t count = iov_iter_count(from);
- _enter("{%x.%u},{%zu},%lu,",
- vnode->fid.vid, vnode->fid.vnode, count, nr_segs);
+ _enter("{%x.%u},{%zu},",
+ vnode->fid.vid, vnode->fid.vnode, count);
if (IS_SWAPFILE(&vnode->vfs_inode)) {
printk(KERN_INFO
@@ -644,7 +643,7 @@ ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
if (!count)
return 0;
- result = generic_file_aio_write(iocb, iov, nr_segs, pos);
+ result = generic_file_write_iter(iocb, from);
if (IS_ERR_VALUE(result)) {
_leave(" = %zd", result);
return result;
diff --git a/fs/aio.c b/fs/aio.c
index 12a3de0ee6d..1c9c5f0a9e2 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -112,6 +112,11 @@ struct kioctx {
struct work_struct free_work;
+ /*
+ * signals when all in-flight requests are done
+ */
+ struct completion *requests_done;
+
struct {
/*
* This counts the number of available slots in the ringbuffer,
@@ -472,7 +477,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
}
EXPORT_SYMBOL(kiocb_set_cancel_fn);
-static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb)
+static int kiocb_cancel(struct kiocb *kiocb)
{
kiocb_cancel_fn *old, *cancel;
@@ -508,6 +513,10 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
{
struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
+ /* At this point we know that there are no any in-flight requests */
+ if (ctx->requests_done)
+ complete(ctx->requests_done);
+
INIT_WORK(&ctx->free_work, free_ioctx);
schedule_work(&ctx->free_work);
}
@@ -529,7 +538,7 @@ static void free_ioctx_users(struct percpu_ref *ref)
struct kiocb, ki_list);
list_del_init(&req->ki_list);
- kiocb_cancel(ctx, req);
+ kiocb_cancel(req);
}
spin_unlock_irq(&ctx->ctx_lock);
@@ -718,37 +727,42 @@ err:
* when the processes owning a context have all exited to encourage
* the rapid destruction of the kioctx.
*/
-static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
+static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
+ struct completion *requests_done)
{
- if (!atomic_xchg(&ctx->dead, 1)) {
- struct kioctx_table *table;
+ struct kioctx_table *table;
- spin_lock(&mm->ioctx_lock);
- rcu_read_lock();
- table = rcu_dereference(mm->ioctx_table);
+ if (atomic_xchg(&ctx->dead, 1))
+ return -EINVAL;
- WARN_ON(ctx != table->table[ctx->id]);
- table->table[ctx->id] = NULL;
- rcu_read_unlock();
- spin_unlock(&mm->ioctx_lock);
- /* percpu_ref_kill() will do the necessary call_rcu() */
- wake_up_all(&ctx->wait);
+ spin_lock(&mm->ioctx_lock);
+ rcu_read_lock();
+ table = rcu_dereference(mm->ioctx_table);
- /*
- * It'd be more correct to do this in free_ioctx(), after all
- * the outstanding kiocbs have finished - but by then io_destroy
- * has already returned, so io_setup() could potentially return
- * -EAGAIN with no ioctxs actually in use (as far as userspace
- * could tell).
- */
- aio_nr_sub(ctx->max_reqs);
+ WARN_ON(ctx != table->table[ctx->id]);
+ table->table[ctx->id] = NULL;
+ rcu_read_unlock();
+ spin_unlock(&mm->ioctx_lock);
- if (ctx->mmap_size)
- vm_munmap(ctx->mmap_base, ctx->mmap_size);
+ /* percpu_ref_kill() will do the necessary call_rcu() */
+ wake_up_all(&ctx->wait);
- percpu_ref_kill(&ctx->users);
- }
+ /*
+ * It'd be more correct to do this in free_ioctx(), after all
+ * the outstanding kiocbs have finished - but by then io_destroy
+ * has already returned, so io_setup() could potentially return
+ * -EAGAIN with no ioctxs actually in use (as far as userspace
+ * could tell).
+ */
+ aio_nr_sub(ctx->max_reqs);
+
+ if (ctx->mmap_size)
+ vm_munmap(ctx->mmap_base, ctx->mmap_size);
+
+ ctx->requests_done = requests_done;
+ percpu_ref_kill(&ctx->users);
+ return 0;
}
/* wait_on_sync_kiocb:
@@ -809,23 +823,27 @@ void exit_aio(struct mm_struct *mm)
*/
ctx->mmap_size = 0;
- kill_ioctx(mm, ctx);
+ kill_ioctx(mm, ctx, NULL);
}
}
static void put_reqs_available(struct kioctx *ctx, unsigned nr)
{
struct kioctx_cpu *kcpu;
+ unsigned long flags;
preempt_disable();
kcpu = this_cpu_ptr(ctx->cpu);
+ local_irq_save(flags);
kcpu->reqs_available += nr;
+
while (kcpu->reqs_available >= ctx->req_batch * 2) {
kcpu->reqs_available -= ctx->req_batch;
atomic_add(ctx->req_batch, &ctx->reqs_available);
}
+ local_irq_restore(flags);
preempt_enable();
}
@@ -833,10 +851,12 @@ static bool get_reqs_available(struct kioctx *ctx)
{
struct kioctx_cpu *kcpu;
bool ret = false;
+ unsigned long flags;
preempt_disable();
kcpu = this_cpu_ptr(ctx->cpu);
+ local_irq_save(flags);
if (!kcpu->reqs_available) {
int old, avail = atomic_read(&ctx->reqs_available);
@@ -855,6 +875,7 @@ static bool get_reqs_available(struct kioctx *ctx)
ret = true;
kcpu->reqs_available--;
out:
+ local_irq_restore(flags);
preempt_enable();
return ret;
}
@@ -1007,6 +1028,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
/* everything turned out well, dispose of the aiocb. */
kiocb_free(iocb);
+ put_reqs_available(ctx, 1);
/*
* We have to order our ring_info tail store above and test
@@ -1048,6 +1070,9 @@ static long aio_read_events_ring(struct kioctx *ctx,
if (head == tail)
goto out;
+ head %= ctx->nr_events;
+ tail %= ctx->nr_events;
+
while (ret < nr) {
long avail;
struct io_event *ev;
@@ -1086,8 +1111,6 @@ static long aio_read_events_ring(struct kioctx *ctx,
flush_dcache_page(ctx->ring_pages[0]);
pr_debug("%li h%u t%u\n", ret, head, tail);
-
- put_reqs_available(ctx, ret);
out:
mutex_unlock(&ctx->ring_lock);
@@ -1185,7 +1208,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
if (!IS_ERR(ioctx)) {
ret = put_user(ioctx->user_id, ctxp);
if (ret)
- kill_ioctx(current->mm, ioctx);
+ kill_ioctx(current->mm, ioctx, NULL);
percpu_ref_put(&ioctx->users);
}
@@ -1203,9 +1226,25 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
{
struct kioctx *ioctx = lookup_ioctx(ctx);
if (likely(NULL != ioctx)) {
- kill_ioctx(current->mm, ioctx);
+ struct completion requests_done =
+ COMPLETION_INITIALIZER_ONSTACK(requests_done);
+ int ret;
+
+ /* Pass requests_done to kill_ioctx() where it can be set
+ * in a thread-safe way. If we try to set it here then we have
+ * a race condition if two io_destroy() called simultaneously.
+ */
+ ret = kill_ioctx(current->mm, ioctx, &requests_done);
percpu_ref_put(&ioctx->users);
- return 0;
+
+ /* Wait until all IO for the context are done. Otherwise kernel
+ * keep using user-space buffers even if user thinks the context
+ * is destroyed.
+ */
+ if (!ret)
+ wait_for_completion(&requests_done);
+
+ return ret;
}
pr_debug("EINVAL: io_destroy: invalid context id\n");
return -EINVAL;
@@ -1213,6 +1252,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
unsigned long, loff_t);
+typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
int rw, char __user *buf,
@@ -1270,7 +1310,9 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
int rw;
fmode_t mode;
aio_rw_op *rw_op;
+ rw_iter_op *iter_op;
struct iovec inline_vec, *iovec = &inline_vec;
+ struct iov_iter iter;
switch (opcode) {
case IOCB_CMD_PREAD:
@@ -1278,6 +1320,7 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
mode = FMODE_READ;
rw = READ;
rw_op = file->f_op->aio_read;
+ iter_op = file->f_op->read_iter;
goto rw_common;
case IOCB_CMD_PWRITE:
@@ -1285,12 +1328,13 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
mode = FMODE_WRITE;
rw = WRITE;
rw_op = file->f_op->aio_write;
+ iter_op = file->f_op->write_iter;
goto rw_common;
rw_common:
if (unlikely(!(file->f_mode & mode)))
return -EBADF;
- if (!rw_op)
+ if (!rw_op && !iter_op)
return -EINVAL;
ret = (opcode == IOCB_CMD_PREADV ||
@@ -1299,10 +1343,8 @@ rw_common:
&iovec, compat)
: aio_setup_single_vector(req, rw, buf, &nr_segs,
iovec);
- if (ret)
- return ret;
-
- ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
+ if (!ret)
+ ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
if (ret < 0) {
if (iovec != &inline_vec)
kfree(iovec);
@@ -1321,7 +1363,12 @@ rw_common:
if (rw == WRITE)
file_start_write(file);
- ret = rw_op(req, iovec, nr_segs, req->ki_pos);
+ if (iter_op) {
+ iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
+ ret = iter_op(req, &iter);
+ } else {
+ ret = rw_op(req, iovec, nr_segs, req->ki_pos);
+ }
if (rw == WRITE)
file_end_write(file);
@@ -1559,7 +1606,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
kiocb = lookup_kiocb(ctx, iocb, key);
if (kiocb)
- ret = kiocb_cancel(ctx, kiocb);
+ ret = kiocb_cancel(kiocb);
else
ret = -EINVAL;
diff --git a/fs/attr.c b/fs/attr.c
index 5d4e59d56e8..6530ced1969 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -50,14 +50,14 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
if ((ia_valid & ATTR_UID) &&
(!uid_eq(current_fsuid(), inode->i_uid) ||
!uid_eq(attr->ia_uid, inode->i_uid)) &&
- !inode_capable(inode, CAP_CHOWN))
+ !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
return -EPERM;
/* Make sure caller can chgrp. */
if ((ia_valid & ATTR_GID) &&
(!uid_eq(current_fsuid(), inode->i_uid) ||
(!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) &&
- !inode_capable(inode, CAP_CHOWN))
+ !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
return -EPERM;
/* Make sure a caller can chmod. */
@@ -67,7 +67,7 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
/* Also check the setgid bit! */
if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
inode->i_gid) &&
- !inode_capable(inode, CAP_FSETID))
+ !capable_wrt_inode_uidgid(inode, CAP_FSETID))
attr->ia_mode &= ~S_ISGID;
}
@@ -160,7 +160,7 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
umode_t mode = attr->ia_mode;
if (!in_group_p(inode->i_gid) &&
- !inode_capable(inode, CAP_FSETID))
+ !capable_wrt_inode_uidgid(inode, CAP_FSETID))
mode &= ~S_ISGID;
inode->i_mode = mode;
}
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 232e03d4780..5b570b6efa2 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -737,7 +737,7 @@ MODULE_ALIAS_MISCDEV(AUTOFS_MINOR);
MODULE_ALIAS("devname:autofs");
/* Register/deregister misc character device */
-int autofs_dev_ioctl_init(void)
+int __init autofs_dev_ioctl_init(void)
{
int r;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index d7bd395ab58..1c55388ae63 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -210,7 +210,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
int pipefd;
struct autofs_sb_info *sbi;
struct autofs_info *ino;
- int pgrp;
+ int pgrp = 0;
bool pgrp_set = false;
int ret = -EINVAL;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 2caf36ac3e9..cc87c1abac9 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -179,7 +179,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
spin_lock(&active->d_lock);
/* Already gone? */
- if (!d_count(active))
+ if ((int) d_count(active) <= 0)
goto next;
qstr = &active->d_name;
@@ -230,7 +230,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
spin_lock(&expiring->d_lock);
- /* Bad luck, we've already been dentry_iput */
+ /* We've already been dentry_iput or unlinked */
if (!expiring->d_inode)
goto next;
diff --git a/fs/befs/btree.c b/fs/befs/btree.c
index a2cd305a993..9c7faa8a928 100644
--- a/fs/befs/btree.c
+++ b/fs/befs/btree.c
@@ -318,7 +318,7 @@ befs_btree_find(struct super_block *sb, befs_data_stream * ds,
* befs_find_key - Search for a key within a node
* @sb: Filesystem superblock
* @node: Node to find the key within
- * @key: Keystring to search for
+ * @findkey: Keystring to search for
* @value: If key is found, the value stored with the key is put here
*
* finds exact match if one exists, and returns BEFS_BT_MATCH
@@ -405,7 +405,7 @@ befs_find_key(struct super_block *sb, befs_btree_node * node,
* Heres how it works: Key_no is the index of the key/value pair to
* return in keybuf/value.
* Bufsize is the size of keybuf (BEFS_NAME_LEN+1 is a good size). Keysize is
- * the number of charecters in the key (just a convenience).
+ * the number of characters in the key (just a convenience).
*
* Algorithm:
* Get the first leafnode of the tree. See if the requested key is in that
@@ -502,12 +502,11 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds,
"for key of size %d", __func__, bufsize, keylen);
brelse(this_node->bh);
goto error_alloc;
- };
+ }
- strncpy(keybuf, keystart, keylen);
+ strlcpy(keybuf, keystart, keylen + 1);
*value = fs64_to_cpu(sb, valarray[cur_key]);
*keysize = keylen;
- keybuf[keylen] = '\0';
befs_debug(sb, "Read [%llu,%d]: Key \"%.*s\", Value %llu", node_off,
cur_key, keylen, keybuf, *value);
@@ -707,7 +706,7 @@ befs_bt_get_key(struct super_block *sb, befs_btree_node * node,
* @key1: pointer to the first key to be compared
* @keylen1: length in bytes of key1
* @key2: pointer to the second key to be compared
- * @kelen2: length in bytes of key2
+ * @keylen2: length in bytes of key2
*
* Returns 0 if @key1 and @key2 are equal.
* Returns >0 if @key1 is greater.
diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index c467bebd50a..1e8e0b8d883 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -116,7 +116,7 @@ befs_fblock2brun(struct super_block *sb, befs_data_stream * data,
* befs_read_lsmylink - read long symlink from datastream.
* @sb: Filesystem superblock
* @ds: Datastrem to read from
- * @buf: Buffer in which to place long symlink data
+ * @buff: Buffer in which to place long symlink data
* @len: Length of the long symlink in bytes
*
* Returns the number of bytes read
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index d626756ff72..a16fbd4e824 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -133,14 +133,6 @@ befs_get_block(struct inode *inode, sector_t block,
befs_debug(sb, "---> befs_get_block() for inode %lu, block %ld",
(unsigned long)inode->i_ino, (long)block);
-
- if (block < 0) {
- befs_error(sb, "befs_get_block() was asked for a block "
- "number less than zero: block %ld in inode %lu",
- (long)block, (unsigned long)inode->i_ino);
- return -EIO;
- }
-
if (create) {
befs_error(sb, "befs_get_block() was asked to write to "
"block %ld in inode %lu", (long)block,
@@ -396,9 +388,8 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
if (S_ISLNK(inode->i_mode) && !(befs_ino->i_flags & BEFS_LONG_SYMLINK)){
inode->i_size = 0;
inode->i_blocks = befs_sb->block_size / VFS_BLOCK_SIZE;
- strncpy(befs_ino->i_data.symlink, raw_inode->data.symlink,
- BEFS_SYMLINK_LEN - 1);
- befs_ino->i_data.symlink[BEFS_SYMLINK_LEN - 1] = '\0';
+ strlcpy(befs_ino->i_data.symlink, raw_inode->data.symlink,
+ BEFS_SYMLINK_LEN);
} else {
int num_blks;
@@ -591,21 +582,21 @@ befs_utf2nls(struct super_block *sb, const char *in,
/**
* befs_nls2utf - Convert NLS string to utf8 encodeing
* @sb: Superblock
- * @src: Input string buffer in NLS format
- * @srclen: Length of input string in bytes
- * @dest: The output string in UTF-8 format
- * @destlen: Length of the output buffer
+ * @in: Input string buffer in NLS format
+ * @in_len: Length of input string in bytes
+ * @out: The output string in UTF-8 format
+ * @out_len: Length of the output buffer
*
- * Converts input string @src, which is in the format of the loaded NLS map,
+ * Converts input string @in, which is in the format of the loaded NLS map,
* into a utf8 string.
*
- * The destination string @dest is allocated by this function and the caller is
+ * The destination string @out is allocated by this function and the caller is
* responsible for freeing it with kfree()
*
- * On return, *@destlen is the length of @dest in bytes.
+ * On return, *@out_len is the length of @out in bytes.
*
* On success, the return value is the number of utf8 characters written to
- * the output buffer @dest.
+ * the output buffer @out.
*
* On Failure, a negative number coresponding to the error code is returned.
*/
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index ae289221833..e7f88ace1a2 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -23,10 +23,10 @@
const struct file_operations bfs_file_operations = {
.llseek = generic_file_llseek,
- .read = do_sync_read,
- .aio_read = generic_file_aio_read,
- .write = do_sync_write,
- .aio_write = generic_file_aio_write,
+ .read = new_sync_read,
+ .read_iter = generic_file_read_iter,
+ .write = new_sync_write,
+ .write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.splice_read = generic_file_splice_read,
};
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index aa3cb626671..3892c1a2324 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1108,6 +1108,14 @@ static bool always_dump_vma(struct vm_area_struct *vma)
/* Any vsyscall mappings? */
if (vma == get_gate_vma(vma->vm_mm))
return true;
+
+ /*
+ * Assume that all vmas with a .name op should always be dumped.
+ * If this changes, a new vm_ops field can easily be added.
+ */
+ if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
+ return true;
+
/*
* arch_vma_name() returns non-NULL for special architecture mappings,
* such as vDSO sections.
@@ -1686,7 +1694,7 @@ static size_t get_note_info_size(struct elf_note_info *info)
static int write_note_info(struct elf_note_info *info,
struct coredump_params *cprm)
{
- bool first = 1;
+ bool first = true;
struct elf_thread_core_info *t = info->thread;
do {
@@ -1710,7 +1718,7 @@ static int write_note_info(struct elf_note_info *info,
!writenote(&t->notes[i], cprm))
return 0;
- first = 0;
+ first = false;
t = t->next;
} while (t);
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index d50bbe59da1..f723cd3a455 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -380,7 +380,7 @@ failed:
/****************************************************************************/
-void old_reloc(unsigned long rl)
+static void old_reloc(unsigned long rl)
{
#ifdef DEBUG
char *segment[] = { "TEXT", "DATA", "BSS", "*UNKNOWN*" };
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
deleted file mode 100644
index 1c2ce0c8771..00000000000
--- a/fs/bio-integrity.c
+++ /dev/null
@@ -1,657 +0,0 @@
-/*
- * bio-integrity.c - bio data integrity extensions
- *
- * Copyright (C) 2007, 2008, 2009 Oracle Corporation
- * Written by: Martin K. Petersen <martin.petersen@oracle.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; see the file COPYING. If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
- * USA.
- *
- */
-
-#include <linux/blkdev.h>
-#include <linux/mempool.h>
-#include <linux/export.h>
-#include <linux/bio.h>
-#include <linux/workqueue.h>
-#include <linux/slab.h>
-
-#define BIP_INLINE_VECS 4
-
-static struct kmem_cache *bip_slab;
-static struct workqueue_struct *kintegrityd_wq;
-
-/**
- * bio_integrity_alloc - Allocate integrity payload and attach it to bio
- * @bio: bio to attach integrity metadata to
- * @gfp_mask: Memory allocation mask
- * @nr_vecs: Number of integrity metadata scatter-gather elements
- *
- * Description: This function prepares a bio for attaching integrity
- * metadata. nr_vecs specifies the maximum number of pages containing
- * integrity metadata that can be attached.
- */
-struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
- gfp_t gfp_mask,
- unsigned int nr_vecs)
-{
- struct bio_integrity_payload *bip;
- struct bio_set *bs = bio->bi_pool;
- unsigned long idx = BIO_POOL_NONE;
- unsigned inline_vecs;
-
- if (!bs) {
- bip = kmalloc(sizeof(struct bio_integrity_payload) +
- sizeof(struct bio_vec) * nr_vecs, gfp_mask);
- inline_vecs = nr_vecs;
- } else {
- bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
- inline_vecs = BIP_INLINE_VECS;
- }
-
- if (unlikely(!bip))
- return NULL;
-
- memset(bip, 0, sizeof(*bip));
-
- if (nr_vecs > inline_vecs) {
- bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx,
- bs->bvec_integrity_pool);
- if (!bip->bip_vec)
- goto err;
- } else {
- bip->bip_vec = bip->bip_inline_vecs;
- }
-
- bip->bip_slab = idx;
- bip->bip_bio = bio;
- bio->bi_integrity = bip;
-
- return bip;
-err:
- mempool_free(bip, bs->bio_integrity_pool);
- return NULL;
-}
-EXPORT_SYMBOL(bio_integrity_alloc);
-
-/**
- * bio_integrity_free - Free bio integrity payload
- * @bio: bio containing bip to be freed
- *
- * Description: Used to free the integrity portion of a bio. Usually
- * called from bio_free().
- */
-void bio_integrity_free(struct bio *bio)
-{
- struct bio_integrity_payload *bip = bio->bi_integrity;
- struct bio_set *bs = bio->bi_pool;
-
- if (bip->bip_owns_buf)
- kfree(bip->bip_buf);
-
- if (bs) {
- if (bip->bip_slab != BIO_POOL_NONE)
- bvec_free(bs->bvec_integrity_pool, bip->bip_vec,
- bip->bip_slab);
-
- mempool_free(bip, bs->bio_integrity_pool);
- } else {
- kfree(bip);
- }
-
- bio->bi_integrity = NULL;
-}
-EXPORT_SYMBOL(bio_integrity_free);
-
-static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip)
-{
- if (bip->bip_slab == BIO_POOL_NONE)
- return BIP_INLINE_VECS;
-
- return bvec_nr_vecs(bip->bip_slab);
-}
-
-/**
- * bio_integrity_add_page - Attach integrity metadata
- * @bio: bio to update
- * @page: page containing integrity metadata
- * @len: number of bytes of integrity metadata in page
- * @offset: start offset within page
- *
- * Description: Attach a page containing integrity metadata to bio.
- */
-int bio_integrity_add_page(struct bio *bio, struct page *page,
- unsigned int len, unsigned int offset)
-{
- struct bio_integrity_payload *bip = bio->bi_integrity;
- struct bio_vec *iv;
-
- if (bip->bip_vcnt >= bip_integrity_vecs(bip)) {
- printk(KERN_ERR "%s: bip_vec full\n", __func__);
- return 0;
- }
-
- iv = bip->bip_vec + bip->bip_vcnt;
-
- iv->bv_page = page;
- iv->bv_len = len;
- iv->bv_offset = offset;
- bip->bip_vcnt++;
-
- return len;
-}
-EXPORT_SYMBOL(bio_integrity_add_page);
-
-static int bdev_integrity_enabled(struct block_device *bdev, int rw)
-{
- struct blk_integrity *bi = bdev_get_integrity(bdev);
-
- if (bi == NULL)
- return 0;
-
- if (rw == READ && bi->verify_fn != NULL &&
- (bi->flags & INTEGRITY_FLAG_READ))
- return 1;
-
- if (rw == WRITE && bi->generate_fn != NULL &&
- (bi->flags & INTEGRITY_FLAG_WRITE))
- return 1;
-
- return 0;
-}
-
-/**
- * bio_integrity_enabled - Check whether integrity can be passed
- * @bio: bio to check
- *
- * Description: Determines whether bio_integrity_prep() can be called
- * on this bio or not. bio data direction and target device must be
- * set prior to calling. The functions honors the write_generate and
- * read_verify flags in sysfs.
- */
-int bio_integrity_enabled(struct bio *bio)
-{
- if (!bio_is_rw(bio))
- return 0;
-
- /* Already protected? */
- if (bio_integrity(bio))
- return 0;
-
- return bdev_integrity_enabled(bio->bi_bdev, bio_data_dir(bio));
-}
-EXPORT_SYMBOL(bio_integrity_enabled);
-
-/**
- * bio_integrity_hw_sectors - Convert 512b sectors to hardware ditto
- * @bi: blk_integrity profile for device
- * @sectors: Number of 512 sectors to convert
- *
- * Description: The block layer calculates everything in 512 byte
- * sectors but integrity metadata is done in terms of the hardware
- * sector size of the storage device. Convert the block layer sectors
- * to physical sectors.
- */
-static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi,
- unsigned int sectors)
-{
- /* At this point there are only 512b or 4096b DIF/EPP devices */
- if (bi->sector_size == 4096)
- return sectors >>= 3;
-
- return sectors;
-}
-
-static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
- unsigned int sectors)
-{
- return bio_integrity_hw_sectors(bi, sectors) * bi->tuple_size;
-}
-
-/**
- * bio_integrity_tag_size - Retrieve integrity tag space
- * @bio: bio to inspect
- *
- * Description: Returns the maximum number of tag bytes that can be
- * attached to this bio. Filesystems can use this to determine how
- * much metadata to attach to an I/O.
- */
-unsigned int bio_integrity_tag_size(struct bio *bio)
-{
- struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
-
- BUG_ON(bio->bi_iter.bi_size == 0);
-
- return bi->tag_size * (bio->bi_iter.bi_size / bi->sector_size);
-}
-EXPORT_SYMBOL(bio_integrity_tag_size);
-
-static int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len,
- int set)
-{
- struct bio_integrity_payload *bip = bio->bi_integrity;
- struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
- unsigned int nr_sectors;
-
- BUG_ON(bip->bip_buf == NULL);
-
- if (bi->tag_size == 0)
- return -1;
-
- nr_sectors = bio_integrity_hw_sectors(bi,
- DIV_ROUND_UP(len, bi->tag_size));
-
- if (nr_sectors * bi->tuple_size > bip->bip_iter.bi_size) {
- printk(KERN_ERR "%s: tag too big for bio: %u > %u\n", __func__,
- nr_sectors * bi->tuple_size, bip->bip_iter.bi_size);
- return -1;
- }
-
- if (set)
- bi->set_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
- else
- bi->get_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
-
- return 0;
-}
-
-/**
- * bio_integrity_set_tag - Attach a tag buffer to a bio
- * @bio: bio to attach buffer to
- * @tag_buf: Pointer to a buffer containing tag data
- * @len: Length of the included buffer
- *
- * Description: Use this function to tag a bio by leveraging the extra
- * space provided by devices formatted with integrity protection. The
- * size of the integrity buffer must be <= to the size reported by
- * bio_integrity_tag_size().
- */
-int bio_integrity_set_tag(struct bio *bio, void *tag_buf, unsigned int len)
-{
- BUG_ON(bio_data_dir(bio) != WRITE);
-
- return bio_integrity_tag(bio, tag_buf, len, 1);
-}
-EXPORT_SYMBOL(bio_integrity_set_tag);
-
-/**
- * bio_integrity_get_tag - Retrieve a tag buffer from a bio
- * @bio: bio to retrieve buffer from
- * @tag_buf: Pointer to a buffer for the tag data
- * @len: Length of the target buffer
- *
- * Description: Use this function to retrieve the tag buffer from a
- * completed I/O. The size of the integrity buffer must be <= to the
- * size reported by bio_integrity_tag_size().
- */
-int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len)
-{
- BUG_ON(bio_data_dir(bio) != READ);
-
- return bio_integrity_tag(bio, tag_buf, len, 0);
-}
-EXPORT_SYMBOL(bio_integrity_get_tag);
-
-/**
- * bio_integrity_generate_verify - Generate/verify integrity metadata for a bio
- * @bio: bio to generate/verify integrity metadata for
- * @operate: operate number, 1 for generate, 0 for verify
- */
-static int bio_integrity_generate_verify(struct bio *bio, int operate)
-{
- struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
- struct blk_integrity_exchg bix;
- struct bio_vec *bv;
- sector_t sector;
- unsigned int sectors, ret = 0, i;
- void *prot_buf = bio->bi_integrity->bip_buf;
-
- if (operate)
- sector = bio->bi_iter.bi_sector;
- else
- sector = bio->bi_integrity->bip_iter.bi_sector;
-
- bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
- bix.sector_size = bi->sector_size;
-
- bio_for_each_segment_all(bv, bio, i) {
- void *kaddr = kmap_atomic(bv->bv_page);
- bix.data_buf = kaddr + bv->bv_offset;
- bix.data_size = bv->bv_len;
- bix.prot_buf = prot_buf;
- bix.sector = sector;
-
- if (operate)
- bi->generate_fn(&bix);
- else {
- ret = bi->verify_fn(&bix);
- if (ret) {
- kunmap_atomic(kaddr);
- return ret;
- }
- }
-
- sectors = bv->bv_len / bi->sector_size;
- sector += sectors;
- prot_buf += sectors * bi->tuple_size;
-
- kunmap_atomic(kaddr);
- }
- return ret;
-}
-
-/**
- * bio_integrity_generate - Generate integrity metadata for a bio
- * @bio: bio to generate integrity metadata for
- *
- * Description: Generates integrity metadata for a bio by calling the
- * block device's generation callback function. The bio must have a
- * bip attached with enough room to accommodate the generated
- * integrity metadata.
- */
-static void bio_integrity_generate(struct bio *bio)
-{
- bio_integrity_generate_verify(bio, 1);
-}
-
-static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi)
-{
- if (bi)
- return bi->tuple_size;
-
- return 0;
-}
-
-/**
- * bio_integrity_prep - Prepare bio for integrity I/O
- * @bio: bio to prepare
- *
- * Description: Allocates a buffer for integrity metadata, maps the
- * pages and attaches them to a bio. The bio must have data
- * direction, target device and start sector set priot to calling. In
- * the WRITE case, integrity metadata will be generated using the
- * block device's integrity function. In the READ case, the buffer
- * will be prepared for DMA and a suitable end_io handler set up.
- */
-int bio_integrity_prep(struct bio *bio)
-{
- struct bio_integrity_payload *bip;
- struct blk_integrity *bi;
- struct request_queue *q;
- void *buf;
- unsigned long start, end;
- unsigned int len, nr_pages;
- unsigned int bytes, offset, i;
- unsigned int sectors;
-
- bi = bdev_get_integrity(bio->bi_bdev);
- q = bdev_get_queue(bio->bi_bdev);
- BUG_ON(bi == NULL);
- BUG_ON(bio_integrity(bio));
-
- sectors = bio_integrity_hw_sectors(bi, bio_sectors(bio));
-
- /* Allocate kernel buffer for protection data */
- len = sectors * blk_integrity_tuple_size(bi);
- buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
- if (unlikely(buf == NULL)) {
- printk(KERN_ERR "could not allocate integrity buffer\n");
- return -ENOMEM;
- }
-
- end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- start = ((unsigned long) buf) >> PAGE_SHIFT;
- nr_pages = end - start;
-
- /* Allocate bio integrity payload and integrity vectors */
- bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages);
- if (unlikely(bip == NULL)) {
- printk(KERN_ERR "could not allocate data integrity bioset\n");
- kfree(buf);
- return -EIO;
- }
-
- bip->bip_owns_buf = 1;
- bip->bip_buf = buf;
- bip->bip_iter.bi_size = len;
- bip->bip_iter.bi_sector = bio->bi_iter.bi_sector;
-
- /* Map it */
- offset = offset_in_page(buf);
- for (i = 0 ; i < nr_pages ; i++) {
- int ret;
- bytes = PAGE_SIZE - offset;
-
- if (len <= 0)
- break;
-
- if (bytes > len)
- bytes = len;
-
- ret = bio_integrity_add_page(bio, virt_to_page(buf),
- bytes, offset);
-
- if (ret == 0)
- return 0;
-
- if (ret < bytes)
- break;
-
- buf += bytes;
- len -= bytes;
- offset = 0;
- }
-
- /* Install custom I/O completion handler if read verify is enabled */
- if (bio_data_dir(bio) == READ) {
- bip->bip_end_io = bio->bi_end_io;
- bio->bi_end_io = bio_integrity_endio;
- }
-
- /* Auto-generate integrity metadata if this is a write */
- if (bio_data_dir(bio) == WRITE)
- bio_integrity_generate(bio);
-
- return 0;
-}
-EXPORT_SYMBOL(bio_integrity_prep);
-
-/**
- * bio_integrity_verify - Verify integrity metadata for a bio
- * @bio: bio to verify
- *
- * Description: This function is called to verify the integrity of a
- * bio. The data in the bio io_vec is compared to the integrity
- * metadata returned by the HBA.
- */
-static int bio_integrity_verify(struct bio *bio)
-{
- return bio_integrity_generate_verify(bio, 0);
-}
-
-/**
- * bio_integrity_verify_fn - Integrity I/O completion worker
- * @work: Work struct stored in bio to be verified
- *
- * Description: This workqueue function is called to complete a READ
- * request. The function verifies the transferred integrity metadata
- * and then calls the original bio end_io function.
- */
-static void bio_integrity_verify_fn(struct work_struct *work)
-{
- struct bio_integrity_payload *bip =
- container_of(work, struct bio_integrity_payload, bip_work);
- struct bio *bio = bip->bip_bio;
- int error;
-
- error = bio_integrity_verify(bio);
-
- /* Restore original bio completion handler */
- bio->bi_end_io = bip->bip_end_io;
- bio_endio_nodec(bio, error);
-}
-
-/**
- * bio_integrity_endio - Integrity I/O completion function
- * @bio: Protected bio
- * @error: Pointer to errno
- *
- * Description: Completion for integrity I/O
- *
- * Normally I/O completion is done in interrupt context. However,
- * verifying I/O integrity is a time-consuming task which must be run
- * in process context. This function postpones completion
- * accordingly.
- */
-void bio_integrity_endio(struct bio *bio, int error)
-{
- struct bio_integrity_payload *bip = bio->bi_integrity;
-
- BUG_ON(bip->bip_bio != bio);
-
- /* In case of an I/O error there is no point in verifying the
- * integrity metadata. Restore original bio end_io handler
- * and run it.
- */
- if (error) {
- bio->bi_end_io = bip->bip_end_io;
- bio_endio(bio, error);
-
- return;
- }
-
- INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
- queue_work(kintegrityd_wq, &bip->bip_work);
-}
-EXPORT_SYMBOL(bio_integrity_endio);
-
-/**
- * bio_integrity_advance - Advance integrity vector
- * @bio: bio whose integrity vector to update
- * @bytes_done: number of data bytes that have been completed
- *
- * Description: This function calculates how many integrity bytes the
- * number of completed data bytes correspond to and advances the
- * integrity vector accordingly.
- */
-void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
-{
- struct bio_integrity_payload *bip = bio->bi_integrity;
- struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
- unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
-
- bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes);
-}
-EXPORT_SYMBOL(bio_integrity_advance);
-
-/**
- * bio_integrity_trim - Trim integrity vector
- * @bio: bio whose integrity vector to update
- * @offset: offset to first data sector
- * @sectors: number of data sectors
- *
- * Description: Used to trim the integrity vector in a cloned bio.
- * The ivec will be advanced corresponding to 'offset' data sectors
- * and the length will be truncated corresponding to 'len' data
- * sectors.
- */
-void bio_integrity_trim(struct bio *bio, unsigned int offset,
- unsigned int sectors)
-{
- struct bio_integrity_payload *bip = bio->bi_integrity;
- struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
-
- bio_integrity_advance(bio, offset << 9);
- bip->bip_iter.bi_size = bio_integrity_bytes(bi, sectors);
-}
-EXPORT_SYMBOL(bio_integrity_trim);
-
-/**
- * bio_integrity_clone - Callback for cloning bios with integrity metadata
- * @bio: New bio
- * @bio_src: Original bio
- * @gfp_mask: Memory allocation mask
- *
- * Description: Called to allocate a bip when cloning a bio
- */
-int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
- gfp_t gfp_mask)
-{
- struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
- struct bio_integrity_payload *bip;
-
- BUG_ON(bip_src == NULL);
-
- bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt);
-
- if (bip == NULL)
- return -EIO;
-
- memcpy(bip->bip_vec, bip_src->bip_vec,
- bip_src->bip_vcnt * sizeof(struct bio_vec));
-
- bip->bip_vcnt = bip_src->bip_vcnt;
- bip->bip_iter = bip_src->bip_iter;
-
- return 0;
-}
-EXPORT_SYMBOL(bio_integrity_clone);
-
-int bioset_integrity_create(struct bio_set *bs, int pool_size)
-{
- if (bs->bio_integrity_pool)
- return 0;
-
- bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab);
- if (!bs->bio_integrity_pool)
- return -1;
-
- bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size);
- if (!bs->bvec_integrity_pool) {
- mempool_destroy(bs->bio_integrity_pool);
- return -1;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(bioset_integrity_create);
-
-void bioset_integrity_free(struct bio_set *bs)
-{
- if (bs->bio_integrity_pool)
- mempool_destroy(bs->bio_integrity_pool);
-
- if (bs->bvec_integrity_pool)
- mempool_destroy(bs->bvec_integrity_pool);
-}
-EXPORT_SYMBOL(bioset_integrity_free);
-
-void __init bio_integrity_init(void)
-{
- /*
- * kintegrityd won't block much but may burn a lot of CPU cycles.
- * Make it highpri CPU intensive wq with max concurrency of 1.
- */
- kintegrityd_wq = alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM |
- WQ_HIGHPRI | WQ_CPU_INTENSIVE, 1);
- if (!kintegrityd_wq)
- panic("Failed to create kintegrityd\n");
-
- bip_slab = kmem_cache_create("bio_integrity_payload",
- sizeof(struct bio_integrity_payload) +
- sizeof(struct bio_vec) * BIP_INLINE_VECS,
- 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
- if (!bip_slab)
- panic("Failed to create slab\n");
-}
diff --git a/fs/bio.c b/fs/bio.c
deleted file mode 100644
index 6f0362b7780..00000000000
--- a/fs/bio.c
+++ /dev/null
@@ -1,2037 +0,0 @@
-/*
- * Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public Licens
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
- *
- */
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <linux/uio.h>
-#include <linux/iocontext.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/mempool.h>
-#include <linux/workqueue.h>
-#include <linux/cgroup.h>
-#include <scsi/sg.h> /* for struct sg_iovec */
-
-#include <trace/events/block.h>
-
-/*
- * Test patch to inline a certain number of bi_io_vec's inside the bio
- * itself, to shrink a bio data allocation from two mempool calls to one
- */
-#define BIO_INLINE_VECS 4
-
-/*
- * if you change this list, also change bvec_alloc or things will
- * break badly! cannot be bigger than what you can fit into an
- * unsigned short
- */
-#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
-static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
- BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
-};
-#undef BV
-
-/*
- * fs_bio_set is the bio_set containing bio and iovec memory pools used by
- * IO code that does not need private memory pools.
- */
-struct bio_set *fs_bio_set;
-EXPORT_SYMBOL(fs_bio_set);
-
-/*
- * Our slab pool management
- */
-struct bio_slab {
- struct kmem_cache *slab;
- unsigned int slab_ref;
- unsigned int slab_size;
- char name[8];
-};
-static DEFINE_MUTEX(bio_slab_lock);
-static struct bio_slab *bio_slabs;
-static unsigned int bio_slab_nr, bio_slab_max;
-
-static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
-{
- unsigned int sz = sizeof(struct bio) + extra_size;
- struct kmem_cache *slab = NULL;
- struct bio_slab *bslab, *new_bio_slabs;
- unsigned int new_bio_slab_max;
- unsigned int i, entry = -1;
-
- mutex_lock(&bio_slab_lock);
-
- i = 0;
- while (i < bio_slab_nr) {
- bslab = &bio_slabs[i];
-
- if (!bslab->slab && entry == -1)
- entry = i;
- else if (bslab->slab_size == sz) {
- slab = bslab->slab;
- bslab->slab_ref++;
- break;
- }
- i++;
- }
-
- if (slab)
- goto out_unlock;
-
- if (bio_slab_nr == bio_slab_max && entry == -1) {
- new_bio_slab_max = bio_slab_max << 1;
- new_bio_slabs = krealloc(bio_slabs,
- new_bio_slab_max * sizeof(struct bio_slab),
- GFP_KERNEL);
- if (!new_bio_slabs)
- goto out_unlock;
- bio_slab_max = new_bio_slab_max;
- bio_slabs = new_bio_slabs;
- }
- if (entry == -1)
- entry = bio_slab_nr++;
-
- bslab = &bio_slabs[entry];
-
- snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
- slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
- if (!slab)
- goto out_unlock;
-
- bslab->slab = slab;
- bslab->slab_ref = 1;
- bslab->slab_size = sz;
-out_unlock:
- mutex_unlock(&bio_slab_lock);
- return slab;
-}
-
-static void bio_put_slab(struct bio_set *bs)
-{
- struct bio_slab *bslab = NULL;
- unsigned int i;
-
- mutex_lock(&bio_slab_lock);
-
- for (i = 0; i < bio_slab_nr; i++) {
- if (bs->bio_slab == bio_slabs[i].slab) {
- bslab = &bio_slabs[i];
- break;
- }
- }
-
- if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
- goto out;
-
- WARN_ON(!bslab->slab_ref);
-
- if (--bslab->slab_ref)
- goto out;
-
- kmem_cache_destroy(bslab->slab);
- bslab->slab = NULL;
-
-out:
- mutex_unlock(&bio_slab_lock);
-}
-
-unsigned int bvec_nr_vecs(unsigned short idx)
-{
- return bvec_slabs[idx].nr_vecs;
-}
-
-void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx)
-{
- BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
-
- if (idx == BIOVEC_MAX_IDX)
- mempool_free(bv, pool);
- else {
- struct biovec_slab *bvs = bvec_slabs + idx;
-
- kmem_cache_free(bvs->slab, bv);
- }
-}
-
-struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx,
- mempool_t *pool)
-{
- struct bio_vec *bvl;
-
- /*
- * see comment near bvec_array define!
- */
- switch (nr) {
- case 1:
- *idx = 0;
- break;
- case 2 ... 4:
- *idx = 1;
- break;
- case 5 ... 16:
- *idx = 2;
- break;
- case 17 ... 64:
- *idx = 3;
- break;
- case 65 ... 128:
- *idx = 4;
- break;
- case 129 ... BIO_MAX_PAGES:
- *idx = 5;
- break;
- default:
- return NULL;
- }
-
- /*
- * idx now points to the pool we want to allocate from. only the
- * 1-vec entry pool is mempool backed.
- */
- if (*idx == BIOVEC_MAX_IDX) {
-fallback:
- bvl = mempool_alloc(pool, gfp_mask);
- } else {
- struct biovec_slab *bvs = bvec_slabs + *idx;
- gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
-
- /*
- * Make this allocation restricted and don't dump info on
- * allocation failures, since we'll fallback to the mempool
- * in case of failure.
- */
- __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
-
- /*
- * Try a slab allocation. If this fails and __GFP_WAIT
- * is set, retry with the 1-entry mempool
- */
- bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
- if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
- *idx = BIOVEC_MAX_IDX;
- goto fallback;
- }
- }
-
- return bvl;
-}
-
-static void __bio_free(struct bio *bio)
-{
- bio_disassociate_task(bio);
-
- if (bio_integrity(bio))
- bio_integrity_free(bio);
-}
-
-static void bio_free(struct bio *bio)
-{
- struct bio_set *bs = bio->bi_pool;
- void *p;
-
- __bio_free(bio);
-
- if (bs) {
- if (bio_flagged(bio, BIO_OWNS_VEC))
- bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio));
-
- /*
- * If we have front padding, adjust the bio pointer before freeing
- */
- p = bio;
- p -= bs->front_pad;
-
- mempool_free(p, bs->bio_pool);
- } else {
- /* Bio was allocated by bio_kmalloc() */
- kfree(bio);
- }
-}
-
-void bio_init(struct bio *bio)
-{
- memset(bio, 0, sizeof(*bio));
- bio->bi_flags = 1 << BIO_UPTODATE;
- atomic_set(&bio->bi_remaining, 1);
- atomic_set(&bio->bi_cnt, 1);
-}
-EXPORT_SYMBOL(bio_init);
-
-/**
- * bio_reset - reinitialize a bio
- * @bio: bio to reset
- *
- * Description:
- * After calling bio_reset(), @bio will be in the same state as a freshly
- * allocated bio returned bio bio_alloc_bioset() - the only fields that are
- * preserved are the ones that are initialized by bio_alloc_bioset(). See
- * comment in struct bio.
- */
-void bio_reset(struct bio *bio)
-{
- unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
-
- __bio_free(bio);
-
- memset(bio, 0, BIO_RESET_BYTES);
- bio->bi_flags = flags|(1 << BIO_UPTODATE);
- atomic_set(&bio->bi_remaining, 1);
-}
-EXPORT_SYMBOL(bio_reset);
-
-static void bio_chain_endio(struct bio *bio, int error)
-{
- bio_endio(bio->bi_private, error);
- bio_put(bio);
-}
-
-/**
- * bio_chain - chain bio completions
- *
- * The caller won't have a bi_end_io called when @bio completes - instead,
- * @parent's bi_end_io won't be called until both @parent and @bio have
- * completed; the chained bio will also be freed when it completes.
- *
- * The caller must not set bi_private or bi_end_io in @bio.
- */
-void bio_chain(struct bio *bio, struct bio *parent)
-{
- BUG_ON(bio->bi_private || bio->bi_end_io);
-
- bio->bi_private = parent;
- bio->bi_end_io = bio_chain_endio;
- atomic_inc(&parent->bi_remaining);
-}
-EXPORT_SYMBOL(bio_chain);
-
-static void bio_alloc_rescue(struct work_struct *work)
-{
- struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
- struct bio *bio;
-
- while (1) {
- spin_lock(&bs->rescue_lock);
- bio = bio_list_pop(&bs->rescue_list);
- spin_unlock(&bs->rescue_lock);
-
- if (!bio)
- break;
-
- generic_make_request(bio);
- }
-}
-
-static void punt_bios_to_rescuer(struct bio_set *bs)
-{
- struct bio_list punt, nopunt;
- struct bio *bio;
-
- /*
- * In order to guarantee forward progress we must punt only bios that
- * were allocated from this bio_set; otherwise, if there was a bio on
- * there for a stacking driver higher up in the stack, processing it
- * could require allocating bios from this bio_set, and doing that from
- * our own rescuer would be bad.
- *
- * Since bio lists are singly linked, pop them all instead of trying to
- * remove from the middle of the list:
- */
-
- bio_list_init(&punt);
- bio_list_init(&nopunt);
-
- while ((bio = bio_list_pop(current->bio_list)))
- bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
-
- *current->bio_list = nopunt;
-
- spin_lock(&bs->rescue_lock);
- bio_list_merge(&bs->rescue_list, &punt);
- spin_unlock(&bs->rescue_lock);
-
- queue_work(bs->rescue_workqueue, &bs->rescue_work);
-}
-
-/**
- * bio_alloc_bioset - allocate a bio for I/O
- * @gfp_mask: the GFP_ mask given to the slab allocator
- * @nr_iovecs: number of iovecs to pre-allocate
- * @bs: the bio_set to allocate from.
- *
- * Description:
- * If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
- * backed by the @bs's mempool.
- *
- * When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be
- * able to allocate a bio. This is due to the mempool guarantees. To make this
- * work, callers must never allocate more than 1 bio at a time from this pool.
- * Callers that need to allocate more than 1 bio must always submit the
- * previously allocated bio for IO before attempting to allocate a new one.
- * Failure to do so can cause deadlocks under memory pressure.
- *
- * Note that when running under generic_make_request() (i.e. any block
- * driver), bios are not submitted until after you return - see the code in
- * generic_make_request() that converts recursion into iteration, to prevent
- * stack overflows.
- *
- * This would normally mean allocating multiple bios under
- * generic_make_request() would be susceptible to deadlocks, but we have
- * deadlock avoidance code that resubmits any blocked bios from a rescuer
- * thread.
- *
- * However, we do not guarantee forward progress for allocations from other
- * mempools. Doing multiple allocations from the same mempool under
- * generic_make_request() should be avoided - instead, use bio_set's front_pad
- * for per bio allocations.
- *
- * RETURNS:
- * Pointer to new bio on success, NULL on failure.
- */
-struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
-{
- gfp_t saved_gfp = gfp_mask;
- unsigned front_pad;
- unsigned inline_vecs;
- unsigned long idx = BIO_POOL_NONE;
- struct bio_vec *bvl = NULL;
- struct bio *bio;
- void *p;
-
- if (!bs) {
- if (nr_iovecs > UIO_MAXIOV)
- return NULL;
-
- p = kmalloc(sizeof(struct bio) +
- nr_iovecs * sizeof(struct bio_vec),
- gfp_mask);
- front_pad = 0;
- inline_vecs = nr_iovecs;
- } else {
- /*
- * generic_make_request() converts recursion to iteration; this
- * means if we're running beneath it, any bios we allocate and
- * submit will not be submitted (and thus freed) until after we
- * return.
- *
- * This exposes us to a potential deadlock if we allocate
- * multiple bios from the same bio_set() while running
- * underneath generic_make_request(). If we were to allocate
- * multiple bios (say a stacking block driver that was splitting
- * bios), we would deadlock if we exhausted the mempool's
- * reserve.
- *
- * We solve this, and guarantee forward progress, with a rescuer
- * workqueue per bio_set. If we go to allocate and there are
- * bios on current->bio_list, we first try the allocation
- * without __GFP_WAIT; if that fails, we punt those bios we
- * would be blocking to the rescuer workqueue before we retry
- * with the original gfp_flags.
- */
-
- if (current->bio_list && !bio_list_empty(current->bio_list))
- gfp_mask &= ~__GFP_WAIT;
-
- p = mempool_alloc(bs->bio_pool, gfp_mask);
- if (!p && gfp_mask != saved_gfp) {
- punt_bios_to_rescuer(bs);
- gfp_mask = saved_gfp;
- p = mempool_alloc(bs->bio_pool, gfp_mask);
- }
-
- front_pad = bs->front_pad;
- inline_vecs = BIO_INLINE_VECS;
- }
-
- if (unlikely(!p))
- return NULL;
-
- bio = p + front_pad;
- bio_init(bio);
-
- if (nr_iovecs > inline_vecs) {
- bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
- if (!bvl && gfp_mask != saved_gfp) {
- punt_bios_to_rescuer(bs);
- gfp_mask = saved_gfp;
- bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
- }
-
- if (unlikely(!bvl))
- goto err_free;
-
- bio->bi_flags |= 1 << BIO_OWNS_VEC;
- } else if (nr_iovecs) {
- bvl = bio->bi_inline_vecs;
- }
-
- bio->bi_pool = bs;
- bio->bi_flags |= idx << BIO_POOL_OFFSET;
- bio->bi_max_vecs = nr_iovecs;
- bio->bi_io_vec = bvl;
- return bio;
-
-err_free:
- mempool_free(p, bs->bio_pool);
- return NULL;
-}
-EXPORT_SYMBOL(bio_alloc_bioset);
-
-void zero_fill_bio(struct bio *bio)
-{
- unsigned long flags;
- struct bio_vec bv;
- struct bvec_iter iter;
-
- bio_for_each_segment(bv, bio, iter) {
- char *data = bvec_kmap_irq(&bv, &flags);
- memset(data, 0, bv.bv_len);
- flush_dcache_page(bv.bv_page);
- bvec_kunmap_irq(data, &flags);
- }
-}
-EXPORT_SYMBOL(zero_fill_bio);
-
-/**
- * bio_put - release a reference to a bio
- * @bio: bio to release reference to
- *
- * Description:
- * Put a reference to a &struct bio, either one you have gotten with
- * bio_alloc, bio_get or bio_clone. The last put of a bio will free it.
- **/
-void bio_put(struct bio *bio)
-{
- BIO_BUG_ON(!atomic_read(&bio->bi_cnt));
-
- /*
- * last put frees it
- */
- if (atomic_dec_and_test(&bio->bi_cnt))
- bio_free(bio);
-}
-EXPORT_SYMBOL(bio_put);
-
-inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
-{
- if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
- blk_recount_segments(q, bio);
-
- return bio->bi_phys_segments;
-}
-EXPORT_SYMBOL(bio_phys_segments);
-
-/**
- * __bio_clone_fast - clone a bio that shares the original bio's biovec
- * @bio: destination bio
- * @bio_src: bio to clone
- *
- * Clone a &bio. Caller will own the returned bio, but not
- * the actual data it points to. Reference count of returned
- * bio will be one.
- *
- * Caller must ensure that @bio_src is not freed before @bio.
- */
-void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
-{
- BUG_ON(bio->bi_pool && BIO_POOL_IDX(bio) != BIO_POOL_NONE);
-
- /*
- * most users will be overriding ->bi_bdev with a new target,
- * so we don't set nor calculate new physical/hw segment counts here
- */
- bio->bi_bdev = bio_src->bi_bdev;
- bio->bi_flags |= 1 << BIO_CLONED;
- bio->bi_rw = bio_src->bi_rw;
- bio->bi_iter = bio_src->bi_iter;
- bio->bi_io_vec = bio_src->bi_io_vec;
-}
-EXPORT_SYMBOL(__bio_clone_fast);
-
-/**
- * bio_clone_fast - clone a bio that shares the original bio's biovec
- * @bio: bio to clone
- * @gfp_mask: allocation priority
- * @bs: bio_set to allocate from
- *
- * Like __bio_clone_fast, only also allocates the returned bio
- */
-struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
-{
- struct bio *b;
-
- b = bio_alloc_bioset(gfp_mask, 0, bs);
- if (!b)
- return NULL;
-
- __bio_clone_fast(b, bio);
-
- if (bio_integrity(bio)) {
- int ret;
-
- ret = bio_integrity_clone(b, bio, gfp_mask);
-
- if (ret < 0) {
- bio_put(b);
- return NULL;
- }
- }
-
- return b;
-}
-EXPORT_SYMBOL(bio_clone_fast);
-
-/**
- * bio_clone_bioset - clone a bio
- * @bio_src: bio to clone
- * @gfp_mask: allocation priority
- * @bs: bio_set to allocate from
- *
- * Clone bio. Caller will own the returned bio, but not the actual data it
- * points to. Reference count of returned bio will be one.
- */
-struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
- struct bio_set *bs)
-{
- struct bvec_iter iter;
- struct bio_vec bv;
- struct bio *bio;
-
- /*
- * Pre immutable biovecs, __bio_clone() used to just do a memcpy from
- * bio_src->bi_io_vec to bio->bi_io_vec.
- *
- * We can't do that anymore, because:
- *
- * - The point of cloning the biovec is to produce a bio with a biovec
- * the caller can modify: bi_idx and bi_bvec_done should be 0.
- *
- * - The original bio could've had more than BIO_MAX_PAGES biovecs; if
- * we tried to clone the whole thing bio_alloc_bioset() would fail.
- * But the clone should succeed as long as the number of biovecs we
- * actually need to allocate is fewer than BIO_MAX_PAGES.
- *
- * - Lastly, bi_vcnt should not be looked at or relied upon by code
- * that does not own the bio - reason being drivers don't use it for
- * iterating over the biovec anymore, so expecting it to be kept up
- * to date (i.e. for clones that share the parent biovec) is just
- * asking for trouble and would force extra work on
- * __bio_clone_fast() anyways.
- */
-
- bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
- if (!bio)
- return NULL;
-
- bio->bi_bdev = bio_src->bi_bdev;
- bio->bi_rw = bio_src->bi_rw;
- bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
- bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
-
- if (bio->bi_rw & REQ_DISCARD)
- goto integrity_clone;
-
- if (bio->bi_rw & REQ_WRITE_SAME) {
- bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
- goto integrity_clone;
- }
-
- bio_for_each_segment(bv, bio_src, iter)
- bio->bi_io_vec[bio->bi_vcnt++] = bv;
-
-integrity_clone:
- if (bio_integrity(bio_src)) {
- int ret;
-
- ret = bio_integrity_clone(bio, bio_src, gfp_mask);
- if (ret < 0) {
- bio_put(bio);
- return NULL;
- }
- }
-
- return bio;
-}
-EXPORT_SYMBOL(bio_clone_bioset);
-
-/**
- * bio_get_nr_vecs - return approx number of vecs
- * @bdev: I/O target
- *
- * Return the approximate number of pages we can send to this target.
- * There's no guarantee that you will be able to fit this number of pages
- * into a bio, it does not account for dynamic restrictions that vary
- * on offset.
- */
-int bio_get_nr_vecs(struct block_device *bdev)
-{
- struct request_queue *q = bdev_get_queue(bdev);
- int nr_pages;
-
- nr_pages = min_t(unsigned,
- queue_max_segments(q),
- queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
-
- return min_t(unsigned, nr_pages, BIO_MAX_PAGES);
-
-}
-EXPORT_SYMBOL(bio_get_nr_vecs);
-
-static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
- *page, unsigned int len, unsigned int offset,
- unsigned int max_sectors)
-{
- int retried_segments = 0;
- struct bio_vec *bvec;
-
- /*
- * cloned bio must not modify vec list
- */
- if (unlikely(bio_flagged(bio, BIO_CLONED)))
- return 0;
-
- if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
- return 0;
-
- /*
- * For filesystems with a blocksize smaller than the pagesize
- * we will often be called with the same page as last time and
- * a consecutive offset. Optimize this special case.
- */
- if (bio->bi_vcnt > 0) {
- struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
-
- if (page == prev->bv_page &&
- offset == prev->bv_offset + prev->bv_len) {
- unsigned int prev_bv_len = prev->bv_len;
- prev->bv_len += len;
-
- if (q->merge_bvec_fn) {
- struct bvec_merge_data bvm = {
- /* prev_bvec is already charged in
- bi_size, discharge it in order to
- simulate merging updated prev_bvec
- as new bvec. */
- .bi_bdev = bio->bi_bdev,
- .bi_sector = bio->bi_iter.bi_sector,
- .bi_size = bio->bi_iter.bi_size -
- prev_bv_len,
- .bi_rw = bio->bi_rw,
- };
-
- if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) {
- prev->bv_len -= len;
- return 0;
- }
- }
-
- goto done;
- }
- }
-
- if (bio->bi_vcnt >= bio->bi_max_vecs)
- return 0;
-
- /*
- * we might lose a segment or two here, but rather that than
- * make this too complex.
- */
-
- while (bio->bi_phys_segments >= queue_max_segments(q)) {
-
- if (retried_segments)
- return 0;
-
- retried_segments = 1;
- blk_recount_segments(q, bio);
- }
-
- /*
- * setup the new entry, we might clear it again later if we
- * cannot add the page
- */
- bvec = &bio->bi_io_vec[bio->bi_vcnt];
- bvec->bv_page = page;
- bvec->bv_len = len;
- bvec->bv_offset = offset;
-
- /*
- * if queue has other restrictions (eg varying max sector size
- * depending on offset), it can specify a merge_bvec_fn in the
- * queue to get further control
- */
- if (q->merge_bvec_fn) {
- struct bvec_merge_data bvm = {
- .bi_bdev = bio->bi_bdev,
- .bi_sector = bio->bi_iter.bi_sector,
- .bi_size = bio->bi_iter.bi_size,
- .bi_rw = bio->bi_rw,
- };
-
- /*
- * merge_bvec_fn() returns number of bytes it can accept
- * at this offset
- */
- if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) {
- bvec->bv_page = NULL;
- bvec->bv_len = 0;
- bvec->bv_offset = 0;
- return 0;
- }
- }
-
- /* If we may be able to merge these biovecs, force a recount */
- if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
- bio->bi_flags &= ~(1 << BIO_SEG_VALID);
-
- bio->bi_vcnt++;
- bio->bi_phys_segments++;
- done:
- bio->bi_iter.bi_size += len;
- return len;
-}
-
-/**
- * bio_add_pc_page - attempt to add page to bio
- * @q: the target queue
- * @bio: destination bio
- * @page: page to add
- * @len: vec entry length
- * @offset: vec entry offset
- *
- * Attempt to add a page to the bio_vec maplist. This can fail for a
- * number of reasons, such as the bio being full or target block device
- * limitations. The target block device must allow bio's up to PAGE_SIZE,
- * so it is always possible to add a single page to an empty bio.
- *
- * This should only be used by REQ_PC bios.
- */
-int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page,
- unsigned int len, unsigned int offset)
-{
- return __bio_add_page(q, bio, page, len, offset,
- queue_max_hw_sectors(q));
-}
-EXPORT_SYMBOL(bio_add_pc_page);
-
-/**
- * bio_add_page - attempt to add page to bio
- * @bio: destination bio
- * @page: page to add
- * @len: vec entry length
- * @offset: vec entry offset
- *
- * Attempt to add a page to the bio_vec maplist. This can fail for a
- * number of reasons, such as the bio being full or target block device
- * limitations. The target block device must allow bio's up to PAGE_SIZE,
- * so it is always possible to add a single page to an empty bio.
- */
-int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
- unsigned int offset)
-{
- struct request_queue *q = bdev_get_queue(bio->bi_bdev);
- return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
-}
-EXPORT_SYMBOL(bio_add_page);
-
-struct submit_bio_ret {
- struct completion event;
- int error;
-};
-
-static void submit_bio_wait_endio(struct bio *bio, int error)
-{
- struct submit_bio_ret *ret = bio->bi_private;
-
- ret->error = error;
- complete(&ret->event);
-}
-
-/**
- * submit_bio_wait - submit a bio, and wait until it completes
- * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
- * @bio: The &struct bio which describes the I/O
- *
- * Simple wrapper around submit_bio(). Returns 0 on success, or the error from
- * bio_endio() on failure.
- */
-int submit_bio_wait(int rw, struct bio *bio)
-{
- struct submit_bio_ret ret;
-
- rw |= REQ_SYNC;
- init_completion(&ret.event);
- bio->bi_private = &ret;
- bio->bi_end_io = submit_bio_wait_endio;
- submit_bio(rw, bio);
- wait_for_completion(&ret.event);
-
- return ret.error;
-}
-EXPORT_SYMBOL(submit_bio_wait);
-
-/**
- * bio_advance - increment/complete a bio by some number of bytes
- * @bio: bio to advance
- * @bytes: number of bytes to complete
- *
- * This updates bi_sector, bi_size and bi_idx; if the number of bytes to
- * complete doesn't align with a bvec boundary, then bv_len and bv_offset will
- * be updated on the last bvec as well.
- *
- * @bio will then represent the remaining, uncompleted portion of the io.
- */
-void bio_advance(struct bio *bio, unsigned bytes)
-{
- if (bio_integrity(bio))
- bio_integrity_advance(bio, bytes);
-
- bio_advance_iter(bio, &bio->bi_iter, bytes);
-}
-EXPORT_SYMBOL(bio_advance);
-
-/**
- * bio_alloc_pages - allocates a single page for each bvec in a bio
- * @bio: bio to allocate pages for
- * @gfp_mask: flags for allocation
- *
- * Allocates pages up to @bio->bi_vcnt.
- *
- * Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are
- * freed.
- */
-int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask)
-{
- int i;
- struct bio_vec *bv;
-
- bio_for_each_segment_all(bv, bio, i) {
- bv->bv_page = alloc_page(gfp_mask);
- if (!bv->bv_page) {
- while (--bv >= bio->bi_io_vec)
- __free_page(bv->bv_page);
- return -ENOMEM;
- }
- }
-
- return 0;
-}
-EXPORT_SYMBOL(bio_alloc_pages);
-
-/**
- * bio_copy_data - copy contents of data buffers from one chain of bios to
- * another
- * @src: source bio list
- * @dst: destination bio list
- *
- * If @src and @dst are single bios, bi_next must be NULL - otherwise, treats
- * @src and @dst as linked lists of bios.
- *
- * Stops when it reaches the end of either @src or @dst - that is, copies
- * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
- */
-void bio_copy_data(struct bio *dst, struct bio *src)
-{
- struct bvec_iter src_iter, dst_iter;
- struct bio_vec src_bv, dst_bv;
- void *src_p, *dst_p;
- unsigned bytes;
-
- src_iter = src->bi_iter;
- dst_iter = dst->bi_iter;
-
- while (1) {
- if (!src_iter.bi_size) {
- src = src->bi_next;
- if (!src)
- break;
-
- src_iter = src->bi_iter;
- }
-
- if (!dst_iter.bi_size) {
- dst = dst->bi_next;
- if (!dst)
- break;
-
- dst_iter = dst->bi_iter;
- }
-
- src_bv = bio_iter_iovec(src, src_iter);
- dst_bv = bio_iter_iovec(dst, dst_iter);
-
- bytes = min(src_bv.bv_len, dst_bv.bv_len);
-
- src_p = kmap_atomic(src_bv.bv_page);
- dst_p = kmap_atomic(dst_bv.bv_page);
-
- memcpy(dst_p + dst_bv.bv_offset,
- src_p + src_bv.bv_offset,
- bytes);
-
- kunmap_atomic(dst_p);
- kunmap_atomic(src_p);
-
- bio_advance_iter(src, &src_iter, bytes);
- bio_advance_iter(dst, &dst_iter, bytes);
- }
-}
-EXPORT_SYMBOL(bio_copy_data);
-
-struct bio_map_data {
- int nr_sgvecs;
- int is_our_pages;
- struct sg_iovec sgvecs[];
-};
-
-static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
- const struct sg_iovec *iov, int iov_count,
- int is_our_pages)
-{
- memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
- bmd->nr_sgvecs = iov_count;
- bmd->is_our_pages = is_our_pages;
- bio->bi_private = bmd;
-}
-
-static struct bio_map_data *bio_alloc_map_data(int nr_segs,
- unsigned int iov_count,
- gfp_t gfp_mask)
-{
- if (iov_count > UIO_MAXIOV)
- return NULL;
-
- return kmalloc(sizeof(struct bio_map_data) +
- sizeof(struct sg_iovec) * iov_count, gfp_mask);
-}
-
-static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_count,
- int to_user, int from_user, int do_free_page)
-{
- int ret = 0, i;
- struct bio_vec *bvec;
- int iov_idx = 0;
- unsigned int iov_off = 0;
-
- bio_for_each_segment_all(bvec, bio, i) {
- char *bv_addr = page_address(bvec->bv_page);
- unsigned int bv_len = bvec->bv_len;
-
- while (bv_len && iov_idx < iov_count) {
- unsigned int bytes;
- char __user *iov_addr;
-
- bytes = min_t(unsigned int,
- iov[iov_idx].iov_len - iov_off, bv_len);
- iov_addr = iov[iov_idx].iov_base + iov_off;
-
- if (!ret) {
- if (to_user)
- ret = copy_to_user(iov_addr, bv_addr,
- bytes);
-
- if (from_user)
- ret = copy_from_user(bv_addr, iov_addr,
- bytes);
-
- if (ret)
- ret = -EFAULT;
- }
-
- bv_len -= bytes;
- bv_addr += bytes;
- iov_addr += bytes;
- iov_off += bytes;
-
- if (iov[iov_idx].iov_len == iov_off) {
- iov_idx++;
- iov_off = 0;
- }
- }
-
- if (do_free_page)
- __free_page(bvec->bv_page);
- }
-
- return ret;
-}
-
-/**
- * bio_uncopy_user - finish previously mapped bio
- * @bio: bio being terminated
- *
- * Free pages allocated from bio_copy_user() and write back data
- * to user space in case of a read.
- */
-int bio_uncopy_user(struct bio *bio)
-{
- struct bio_map_data *bmd = bio->bi_private;
- struct bio_vec *bvec;
- int ret = 0, i;
-
- if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
- /*
- * if we're in a workqueue, the request is orphaned, so
- * don't copy into a random user address space, just free.
- */
- if (current->mm)
- ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs,
- bio_data_dir(bio) == READ,
- 0, bmd->is_our_pages);
- else if (bmd->is_our_pages)
- bio_for_each_segment_all(bvec, bio, i)
- __free_page(bvec->bv_page);
- }
- kfree(bmd);
- bio_put(bio);
- return ret;
-}
-EXPORT_SYMBOL(bio_uncopy_user);
-
-/**
- * bio_copy_user_iov - copy user data to bio
- * @q: destination block queue
- * @map_data: pointer to the rq_map_data holding pages (if necessary)
- * @iov: the iovec.
- * @iov_count: number of elements in the iovec
- * @write_to_vm: bool indicating writing to pages or not
- * @gfp_mask: memory allocation flags
- *
- * Prepares and returns a bio for indirect user io, bouncing data
- * to/from kernel pages as necessary. Must be paired with
- * call bio_uncopy_user() on io completion.
- */
-struct bio *bio_copy_user_iov(struct request_queue *q,
- struct rq_map_data *map_data,
- const struct sg_iovec *iov, int iov_count,
- int write_to_vm, gfp_t gfp_mask)
-{
- struct bio_map_data *bmd;
- struct bio_vec *bvec;
- struct page *page;
- struct bio *bio;
- int i, ret;
- int nr_pages = 0;
- unsigned int len = 0;
- unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0;
-
- for (i = 0; i < iov_count; i++) {
- unsigned long uaddr;
- unsigned long end;
- unsigned long start;
-
- uaddr = (unsigned long)iov[i].iov_base;
- end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- start = uaddr >> PAGE_SHIFT;
-
- /*
- * Overflow, abort
- */
- if (end < start)
- return ERR_PTR(-EINVAL);
-
- nr_pages += end - start;
- len += iov[i].iov_len;
- }
-
- if (offset)
- nr_pages++;
-
- bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
- if (!bmd)
- return ERR_PTR(-ENOMEM);
-
- ret = -ENOMEM;
- bio = bio_kmalloc(gfp_mask, nr_pages);
- if (!bio)
- goto out_bmd;
-
- if (!write_to_vm)
- bio->bi_rw |= REQ_WRITE;
-
- ret = 0;
-
- if (map_data) {
- nr_pages = 1 << map_data->page_order;
- i = map_data->offset / PAGE_SIZE;
- }
- while (len) {
- unsigned int bytes = PAGE_SIZE;
-
- bytes -= offset;
-
- if (bytes > len)
- bytes = len;
-
- if (map_data) {
- if (i == map_data->nr_entries * nr_pages) {
- ret = -ENOMEM;
- break;
- }
-
- page = map_data->pages[i / nr_pages];
- page += (i % nr_pages);
-
- i++;
- } else {
- page = alloc_page(q->bounce_gfp | gfp_mask);
- if (!page) {
- ret = -ENOMEM;
- break;
- }
- }
-
- if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes)
- break;
-
- len -= bytes;
- offset = 0;
- }
-
- if (ret)
- goto cleanup;
-
- /*
- * success
- */
- if ((!write_to_vm && (!map_data || !map_data->null_mapped)) ||
- (map_data && map_data->from_user)) {
- ret = __bio_copy_iov(bio, iov, iov_count, 0, 1, 0);
- if (ret)
- goto cleanup;
- }
-
- bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
- return bio;
-cleanup:
- if (!map_data)
- bio_for_each_segment_all(bvec, bio, i)
- __free_page(bvec->bv_page);
-
- bio_put(bio);
-out_bmd:
- kfree(bmd);
- return ERR_PTR(ret);
-}
-
-/**
- * bio_copy_user - copy user data to bio
- * @q: destination block queue
- * @map_data: pointer to the rq_map_data holding pages (if necessary)
- * @uaddr: start of user address
- * @len: length in bytes
- * @write_to_vm: bool indicating writing to pages or not
- * @gfp_mask: memory allocation flags
- *
- * Prepares and returns a bio for indirect user io, bouncing data
- * to/from kernel pages as necessary. Must be paired with
- * call bio_uncopy_user() on io completion.
- */
-struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data,
- unsigned long uaddr, unsigned int len,
- int write_to_vm, gfp_t gfp_mask)
-{
- struct sg_iovec iov;
-
- iov.iov_base = (void __user *)uaddr;
- iov.iov_len = len;
-
- return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
-}
-EXPORT_SYMBOL(bio_copy_user);
-
-static struct bio *__bio_map_user_iov(struct request_queue *q,
- struct block_device *bdev,
- const struct sg_iovec *iov, int iov_count,
- int write_to_vm, gfp_t gfp_mask)
-{
- int i, j;
- int nr_pages = 0;
- struct page **pages;
- struct bio *bio;
- int cur_page = 0;
- int ret, offset;
-
- for (i = 0; i < iov_count; i++) {
- unsigned long uaddr = (unsigned long)iov[i].iov_base;
- unsigned long len = iov[i].iov_len;
- unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long start = uaddr >> PAGE_SHIFT;
-
- /*
- * Overflow, abort
- */
- if (end < start)
- return ERR_PTR(-EINVAL);
-
- nr_pages += end - start;
- /*
- * buffer must be aligned to at least hardsector size for now
- */
- if (uaddr & queue_dma_alignment(q))
- return ERR_PTR(-EINVAL);
- }
-
- if (!nr_pages)
- return ERR_PTR(-EINVAL);
-
- bio = bio_kmalloc(gfp_mask, nr_pages);
- if (!bio)
- return ERR_PTR(-ENOMEM);
-
- ret = -ENOMEM;
- pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
- if (!pages)
- goto out;
-
- for (i = 0; i < iov_count; i++) {
- unsigned long uaddr = (unsigned long)iov[i].iov_base;
- unsigned long len = iov[i].iov_len;
- unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long start = uaddr >> PAGE_SHIFT;
- const int local_nr_pages = end - start;
- const int page_limit = cur_page + local_nr_pages;
-
- ret = get_user_pages_fast(uaddr, local_nr_pages,
- write_to_vm, &pages[cur_page]);
- if (ret < local_nr_pages) {
- ret = -EFAULT;
- goto out_unmap;
- }
-
- offset = uaddr & ~PAGE_MASK;
- for (j = cur_page; j < page_limit; j++) {
- unsigned int bytes = PAGE_SIZE - offset;
-
- if (len <= 0)
- break;
-
- if (bytes > len)
- bytes = len;
-
- /*
- * sorry...
- */
- if (bio_add_pc_page(q, bio, pages[j], bytes, offset) <
- bytes)
- break;
-
- len -= bytes;
- offset = 0;
- }
-
- cur_page = j;
- /*
- * release the pages we didn't map into the bio, if any
- */
- while (j < page_limit)
- page_cache_release(pages[j++]);
- }
-
- kfree(pages);
-
- /*
- * set data direction, and check if mapped pages need bouncing
- */
- if (!write_to_vm)
- bio->bi_rw |= REQ_WRITE;
-
- bio->bi_bdev = bdev;
- bio->bi_flags |= (1 << BIO_USER_MAPPED);
- return bio;
-
- out_unmap:
- for (i = 0; i < nr_pages; i++) {
- if(!pages[i])
- break;
- page_cache_release(pages[i]);
- }
- out:
- kfree(pages);
- bio_put(bio);
- return ERR_PTR(ret);
-}
-
-/**
- * bio_map_user - map user address into bio
- * @q: the struct request_queue for the bio
- * @bdev: destination block device
- * @uaddr: start of user address
- * @len: length in bytes
- * @write_to_vm: bool indicating writing to pages or not
- * @gfp_mask: memory allocation flags
- *
- * Map the user space address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
- unsigned long uaddr, unsigned int len, int write_to_vm,
- gfp_t gfp_mask)
-{
- struct sg_iovec iov;
-
- iov.iov_base = (void __user *)uaddr;
- iov.iov_len = len;
-
- return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
-}
-EXPORT_SYMBOL(bio_map_user);
-
-/**
- * bio_map_user_iov - map user sg_iovec table into bio
- * @q: the struct request_queue for the bio
- * @bdev: destination block device
- * @iov: the iovec.
- * @iov_count: number of elements in the iovec
- * @write_to_vm: bool indicating writing to pages or not
- * @gfp_mask: memory allocation flags
- *
- * Map the user space address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
- const struct sg_iovec *iov, int iov_count,
- int write_to_vm, gfp_t gfp_mask)
-{
- struct bio *bio;
-
- bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
- gfp_mask);
- if (IS_ERR(bio))
- return bio;
-
- /*
- * subtle -- if __bio_map_user() ended up bouncing a bio,
- * it would normally disappear when its bi_end_io is run.
- * however, we need it for the unmap, so grab an extra
- * reference to it
- */
- bio_get(bio);
-
- return bio;
-}
-
-static void __bio_unmap_user(struct bio *bio)
-{
- struct bio_vec *bvec;
- int i;
-
- /*
- * make sure we dirty pages we wrote to
- */
- bio_for_each_segment_all(bvec, bio, i) {
- if (bio_data_dir(bio) == READ)
- set_page_dirty_lock(bvec->bv_page);
-
- page_cache_release(bvec->bv_page);
- }
-
- bio_put(bio);
-}
-
-/**
- * bio_unmap_user - unmap a bio
- * @bio: the bio being unmapped
- *
- * Unmap a bio previously mapped by bio_map_user(). Must be called with
- * a process context.
- *
- * bio_unmap_user() may sleep.
- */
-void bio_unmap_user(struct bio *bio)
-{
- __bio_unmap_user(bio);
- bio_put(bio);
-}
-EXPORT_SYMBOL(bio_unmap_user);
-
-static void bio_map_kern_endio(struct bio *bio, int err)
-{
- bio_put(bio);
-}
-
-static struct bio *__bio_map_kern(struct request_queue *q, void *data,
- unsigned int len, gfp_t gfp_mask)
-{
- unsigned long kaddr = (unsigned long)data;
- unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long start = kaddr >> PAGE_SHIFT;
- const int nr_pages = end - start;
- int offset, i;
- struct bio *bio;
-
- bio = bio_kmalloc(gfp_mask, nr_pages);
- if (!bio)
- return ERR_PTR(-ENOMEM);
-
- offset = offset_in_page(kaddr);
- for (i = 0; i < nr_pages; i++) {
- unsigned int bytes = PAGE_SIZE - offset;
-
- if (len <= 0)
- break;
-
- if (bytes > len)
- bytes = len;
-
- if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
- offset) < bytes)
- break;
-
- data += bytes;
- len -= bytes;
- offset = 0;
- }
-
- bio->bi_end_io = bio_map_kern_endio;
- return bio;
-}
-
-/**
- * bio_map_kern - map kernel address into bio
- * @q: the struct request_queue for the bio
- * @data: pointer to buffer to map
- * @len: length in bytes
- * @gfp_mask: allocation flags for bio allocation
- *
- * Map the kernel address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
- gfp_t gfp_mask)
-{
- struct bio *bio;
-
- bio = __bio_map_kern(q, data, len, gfp_mask);
- if (IS_ERR(bio))
- return bio;
-
- if (bio->bi_iter.bi_size == len)
- return bio;
-
- /*
- * Don't support partial mappings.
- */
- bio_put(bio);
- return ERR_PTR(-EINVAL);
-}
-EXPORT_SYMBOL(bio_map_kern);
-
-static void bio_copy_kern_endio(struct bio *bio, int err)
-{
- struct bio_vec *bvec;
- const int read = bio_data_dir(bio) == READ;
- struct bio_map_data *bmd = bio->bi_private;
- int i;
- char *p = bmd->sgvecs[0].iov_base;
-
- bio_for_each_segment_all(bvec, bio, i) {
- char *addr = page_address(bvec->bv_page);
-
- if (read)
- memcpy(p, addr, bvec->bv_len);
-
- __free_page(bvec->bv_page);
- p += bvec->bv_len;
- }
-
- kfree(bmd);
- bio_put(bio);
-}
-
-/**
- * bio_copy_kern - copy kernel address into bio
- * @q: the struct request_queue for the bio
- * @data: pointer to buffer to copy
- * @len: length in bytes
- * @gfp_mask: allocation flags for bio and page allocation
- * @reading: data direction is READ
- *
- * copy the kernel address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
- gfp_t gfp_mask, int reading)
-{
- struct bio *bio;
- struct bio_vec *bvec;
- int i;
-
- bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
- if (IS_ERR(bio))
- return bio;
-
- if (!reading) {
- void *p = data;
-
- bio_for_each_segment_all(bvec, bio, i) {
- char *addr = page_address(bvec->bv_page);
-
- memcpy(addr, p, bvec->bv_len);
- p += bvec->bv_len;
- }
- }
-
- bio->bi_end_io = bio_copy_kern_endio;
-
- return bio;
-}
-EXPORT_SYMBOL(bio_copy_kern);
-
-/*
- * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
- * for performing direct-IO in BIOs.
- *
- * The problem is that we cannot run set_page_dirty() from interrupt context
- * because the required locks are not interrupt-safe. So what we can do is to
- * mark the pages dirty _before_ performing IO. And in interrupt context,
- * check that the pages are still dirty. If so, fine. If not, redirty them
- * in process context.
- *
- * We special-case compound pages here: normally this means reads into hugetlb
- * pages. The logic in here doesn't really work right for compound pages
- * because the VM does not uniformly chase down the head page in all cases.
- * But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't
- * handle them at all. So we skip compound pages here at an early stage.
- *
- * Note that this code is very hard to test under normal circumstances because
- * direct-io pins the pages with get_user_pages(). This makes
- * is_page_cache_freeable return false, and the VM will not clean the pages.
- * But other code (eg, flusher threads) could clean the pages if they are mapped
- * pagecache.
- *
- * Simply disabling the call to bio_set_pages_dirty() is a good way to test the
- * deferred bio dirtying paths.
- */
-
-/*
- * bio_set_pages_dirty() will mark all the bio's pages as dirty.
- */
-void bio_set_pages_dirty(struct bio *bio)
-{
- struct bio_vec *bvec;
- int i;
-
- bio_for_each_segment_all(bvec, bio, i) {
- struct page *page = bvec->bv_page;
-
- if (page && !PageCompound(page))
- set_page_dirty_lock(page);
- }
-}
-
-static void bio_release_pages(struct bio *bio)
-{
- struct bio_vec *bvec;
- int i;
-
- bio_for_each_segment_all(bvec, bio, i) {
- struct page *page = bvec->bv_page;
-
- if (page)
- put_page(page);
- }
-}
-
-/*
- * bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
- * If they are, then fine. If, however, some pages are clean then they must
- * have been written out during the direct-IO read. So we take another ref on
- * the BIO and the offending pages and re-dirty the pages in process context.
- *
- * It is expected that bio_check_pages_dirty() will wholly own the BIO from
- * here on. It will run one page_cache_release() against each page and will
- * run one bio_put() against the BIO.
- */
-
-static void bio_dirty_fn(struct work_struct *work);
-
-static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
-static DEFINE_SPINLOCK(bio_dirty_lock);
-static struct bio *bio_dirty_list;
-
-/*
- * This runs in process context
- */
-static void bio_dirty_fn(struct work_struct *work)
-{
- unsigned long flags;
- struct bio *bio;
-
- spin_lock_irqsave(&bio_dirty_lock, flags);
- bio = bio_dirty_list;
- bio_dirty_list = NULL;
- spin_unlock_irqrestore(&bio_dirty_lock, flags);
-
- while (bio) {
- struct bio *next = bio->bi_private;
-
- bio_set_pages_dirty(bio);
- bio_release_pages(bio);
- bio_put(bio);
- bio = next;
- }
-}
-
-void bio_check_pages_dirty(struct bio *bio)
-{
- struct bio_vec *bvec;
- int nr_clean_pages = 0;
- int i;
-
- bio_for_each_segment_all(bvec, bio, i) {
- struct page *page = bvec->bv_page;
-
- if (PageDirty(page) || PageCompound(page)) {
- page_cache_release(page);
- bvec->bv_page = NULL;
- } else {
- nr_clean_pages++;
- }
- }
-
- if (nr_clean_pages) {
- unsigned long flags;
-
- spin_lock_irqsave(&bio_dirty_lock, flags);
- bio->bi_private = bio_dirty_list;
- bio_dirty_list = bio;
- spin_unlock_irqrestore(&bio_dirty_lock, flags);
- schedule_work(&bio_dirty_work);
- } else {
- bio_put(bio);
- }
-}
-
-#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
-void bio_flush_dcache_pages(struct bio *bi)
-{
- struct bio_vec bvec;
- struct bvec_iter iter;
-
- bio_for_each_segment(bvec, bi, iter)
- flush_dcache_page(bvec.bv_page);
-}
-EXPORT_SYMBOL(bio_flush_dcache_pages);
-#endif
-
-/**
- * bio_endio - end I/O on a bio
- * @bio: bio
- * @error: error, if any
- *
- * Description:
- * bio_endio() will end I/O on the whole bio. bio_endio() is the
- * preferred way to end I/O on a bio, it takes care of clearing
- * BIO_UPTODATE on error. @error is 0 on success, and and one of the
- * established -Exxxx (-EIO, for instance) error values in case
- * something went wrong. No one should call bi_end_io() directly on a
- * bio unless they own it and thus know that it has an end_io
- * function.
- **/
-void bio_endio(struct bio *bio, int error)
-{
- while (bio) {
- BUG_ON(atomic_read(&bio->bi_remaining) <= 0);
-
- if (error)
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
- else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
- error = -EIO;
-
- if (!atomic_dec_and_test(&bio->bi_remaining))
- return;
-
- /*
- * Need to have a real endio function for chained bios,
- * otherwise various corner cases will break (like stacking
- * block devices that save/restore bi_end_io) - however, we want
- * to avoid unbounded recursion and blowing the stack. Tail call
- * optimization would handle this, but compiling with frame
- * pointers also disables gcc's sibling call optimization.
- */
- if (bio->bi_end_io == bio_chain_endio) {
- struct bio *parent = bio->bi_private;
- bio_put(bio);
- bio = parent;
- } else {
- if (bio->bi_end_io)
- bio->bi_end_io(bio, error);
- bio = NULL;
- }
- }
-}
-EXPORT_SYMBOL(bio_endio);
-
-/**
- * bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining
- * @bio: bio
- * @error: error, if any
- *
- * For code that has saved and restored bi_end_io; thing hard before using this
- * function, probably you should've cloned the entire bio.
- **/
-void bio_endio_nodec(struct bio *bio, int error)
-{
- atomic_inc(&bio->bi_remaining);
- bio_endio(bio, error);
-}
-EXPORT_SYMBOL(bio_endio_nodec);
-
-/**
- * bio_split - split a bio
- * @bio: bio to split
- * @sectors: number of sectors to split from the front of @bio
- * @gfp: gfp mask
- * @bs: bio set to allocate from
- *
- * Allocates and returns a new bio which represents @sectors from the start of
- * @bio, and updates @bio to represent the remaining sectors.
- *
- * The newly allocated bio will point to @bio's bi_io_vec; it is the caller's
- * responsibility to ensure that @bio is not freed before the split.
- */
-struct bio *bio_split(struct bio *bio, int sectors,
- gfp_t gfp, struct bio_set *bs)
-{
- struct bio *split = NULL;
-
- BUG_ON(sectors <= 0);
- BUG_ON(sectors >= bio_sectors(bio));
-
- split = bio_clone_fast(bio, gfp, bs);
- if (!split)
- return NULL;
-
- split->bi_iter.bi_size = sectors << 9;
-
- if (bio_integrity(split))
- bio_integrity_trim(split, 0, sectors);
-
- bio_advance(bio, split->bi_iter.bi_size);
-
- return split;
-}
-EXPORT_SYMBOL(bio_split);
-
-/**
- * bio_trim - trim a bio
- * @bio: bio to trim
- * @offset: number of sectors to trim from the front of @bio
- * @size: size we want to trim @bio to, in sectors
- */
-void bio_trim(struct bio *bio, int offset, int size)
-{
- /* 'bio' is a cloned bio which we need to trim to match
- * the given offset and size.
- */
-
- size <<= 9;
- if (offset == 0 && size == bio->bi_iter.bi_size)
- return;
-
- clear_bit(BIO_SEG_VALID, &bio->bi_flags);
-
- bio_advance(bio, offset << 9);
-
- bio->bi_iter.bi_size = size;
-}
-EXPORT_SYMBOL_GPL(bio_trim);
-
-/*
- * create memory pools for biovec's in a bio_set.
- * use the global biovec slabs created for general use.
- */
-mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries)
-{
- struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
-
- return mempool_create_slab_pool(pool_entries, bp->slab);
-}
-
-void bioset_free(struct bio_set *bs)
-{
- if (bs->rescue_workqueue)
- destroy_workqueue(bs->rescue_workqueue);
-
- if (bs->bio_pool)
- mempool_destroy(bs->bio_pool);
-
- if (bs->bvec_pool)
- mempool_destroy(bs->bvec_pool);
-
- bioset_integrity_free(bs);
- bio_put_slab(bs);
-
- kfree(bs);
-}
-EXPORT_SYMBOL(bioset_free);
-
-/**
- * bioset_create - Create a bio_set
- * @pool_size: Number of bio and bio_vecs to cache in the mempool
- * @front_pad: Number of bytes to allocate in front of the returned bio
- *
- * Description:
- * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
- * to ask for a number of bytes to be allocated in front of the bio.
- * Front pad allocation is useful for embedding the bio inside
- * another structure, to avoid allocating extra data to go with the bio.
- * Note that the bio must be embedded at the END of that structure always,
- * or things will break badly.
- */
-struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
-{
- unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
- struct bio_set *bs;
-
- bs = kzalloc(sizeof(*bs), GFP_KERNEL);
- if (!bs)
- return NULL;
-
- bs->front_pad = front_pad;
-
- spin_lock_init(&bs->rescue_lock);
- bio_list_init(&bs->rescue_list);
- INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
-
- bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
- if (!bs->bio_slab) {
- kfree(bs);
- return NULL;
- }
-
- bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
- if (!bs->bio_pool)
- goto bad;
-
- bs->bvec_pool = biovec_create_pool(bs, pool_size);
- if (!bs->bvec_pool)
- goto bad;
-
- bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
- if (!bs->rescue_workqueue)
- goto bad;
-
- return bs;
-bad:
- bioset_free(bs);
- return NULL;
-}
-EXPORT_SYMBOL(bioset_create);
-
-#ifdef CONFIG_BLK_CGROUP
-/**
- * bio_associate_current - associate a bio with %current
- * @bio: target bio
- *
- * Associate @bio with %current if it hasn't been associated yet. Block
- * layer will treat @bio as if it were issued by %current no matter which
- * task actually issues it.
- *
- * This function takes an extra reference of @task's io_context and blkcg
- * which will be put when @bio is released. The caller must own @bio,
- * ensure %current->io_context exists, and is responsible for synchronizing
- * calls to this function.
- */
-int bio_associate_current(struct bio *bio)
-{
- struct io_context *ioc;
- struct cgroup_subsys_state *css;
-
- if (bio->bi_ioc)
- return -EBUSY;
-
- ioc = current->io_context;
- if (!ioc)
- return -ENOENT;
-
- /* acquire active ref on @ioc and associate */
- get_io_context_active(ioc);
- bio->bi_ioc = ioc;
-
- /* associate blkcg if exists */
- rcu_read_lock();
- css = task_css(current, blkio_cgrp_id);
- if (css && css_tryget(css))
- bio->bi_css = css;
- rcu_read_unlock();
-
- return 0;
-}
-
-/**
- * bio_disassociate_task - undo bio_associate_current()
- * @bio: target bio
- */
-void bio_disassociate_task(struct bio *bio)
-{
- if (bio->bi_ioc) {
- put_io_context(bio->bi_ioc);
- bio->bi_ioc = NULL;
- }
- if (bio->bi_css) {
- css_put(bio->bi_css);
- bio->bi_css = NULL;
- }
-}
-
-#endif /* CONFIG_BLK_CGROUP */
-
-static void __init biovec_init_slabs(void)
-{
- int i;
-
- for (i = 0; i < BIOVEC_NR_POOLS; i++) {
- int size;
- struct biovec_slab *bvs = bvec_slabs + i;
-
- if (bvs->nr_vecs <= BIO_INLINE_VECS) {
- bvs->slab = NULL;
- continue;
- }
-
- size = bvs->nr_vecs * sizeof(struct bio_vec);
- bvs->slab = kmem_cache_create(bvs->name, size, 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
- }
-}
-
-static int __init init_bio(void)
-{
- bio_slab_max = 2;
- bio_slab_nr = 0;
- bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
- if (!bio_slabs)
- panic("bio: can't allocate bios\n");
-
- bio_integrity_init();
- biovec_init_slabs();
-
- fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
- if (!fs_bio_set)
- panic("bio: can't allocate bios\n");
-
- if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
- panic("bio: can't create integrity pool\n");
-
- return 0;
-}
-subsys_initcall(init_bio);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 552a8d13bc3..6d7274619bf 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -165,14 +165,15 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
}
static ssize_t
-blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
- loff_t offset, unsigned long nr_segs)
+blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
- return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
- nr_segs, blkdev_get_block, NULL, NULL, 0);
+ return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter,
+ offset, blkdev_get_block,
+ NULL, NULL, 0);
}
int __sync_blockdev(struct block_device *bdev, int wait)
@@ -363,6 +364,69 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
}
EXPORT_SYMBOL(blkdev_fsync);
+/**
+ * bdev_read_page() - Start reading a page from a block device
+ * @bdev: The device to read the page from
+ * @sector: The offset on the device to read the page to (need not be aligned)
+ * @page: The page to read
+ *
+ * On entry, the page should be locked. It will be unlocked when the page
+ * has been read. If the block driver implements rw_page synchronously,
+ * that will be true on exit from this function, but it need not be.
+ *
+ * Errors returned by this function are usually "soft", eg out of memory, or
+ * queue full; callers should try a different route to read this page rather
+ * than propagate an error back up the stack.
+ *
+ * Return: negative errno if an error occurs, 0 if submission was successful.
+ */
+int bdev_read_page(struct block_device *bdev, sector_t sector,
+ struct page *page)
+{
+ const struct block_device_operations *ops = bdev->bd_disk->fops;
+ if (!ops->rw_page)
+ return -EOPNOTSUPP;
+ return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
+}
+EXPORT_SYMBOL_GPL(bdev_read_page);
+
+/**
+ * bdev_write_page() - Start writing a page to a block device
+ * @bdev: The device to write the page to
+ * @sector: The offset on the device to write the page to (need not be aligned)
+ * @page: The page to write
+ * @wbc: The writeback_control for the write
+ *
+ * On entry, the page should be locked and not currently under writeback.
+ * On exit, if the write started successfully, the page will be unlocked and
+ * under writeback. If the write failed already (eg the driver failed to
+ * queue the page to the device), the page will still be locked. If the
+ * caller is a ->writepage implementation, it will need to unlock the page.
+ *
+ * Errors returned by this function are usually "soft", eg out of memory, or
+ * queue full; callers should try a different route to write this page rather
+ * than propagate an error back up the stack.
+ *
+ * Return: negative errno if an error occurs, 0 if submission was successful.
+ */
+int bdev_write_page(struct block_device *bdev, sector_t sector,
+ struct page *page, struct writeback_control *wbc)
+{
+ int result;
+ int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
+ const struct block_device_operations *ops = bdev->bd_disk->fops;
+ if (!ops->rw_page)
+ return -EOPNOTSUPP;
+ set_page_writeback(page);
+ result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw);
+ if (result)
+ end_page_writeback(page);
+ else
+ unlock_page(page);
+ return result;
+}
+EXPORT_SYMBOL_GPL(bdev_write_page);
+
/*
* pseudo-fs
*/
@@ -1508,43 +1572,38 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
* Does not take i_mutex for the write and thus is not for general purpose
* use.
*/
-ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct blk_plug plug;
ssize_t ret;
- BUG_ON(iocb->ki_pos != pos);
-
blk_start_plug(&plug);
- ret = __generic_file_aio_write(iocb, iov, nr_segs);
+ ret = __generic_file_write_iter(iocb, from);
if (ret > 0) {
ssize_t err;
-
- err = generic_write_sync(file, pos, ret);
+ err = generic_write_sync(file, iocb->ki_pos - ret, ret);
if (err < 0)
ret = err;
}
blk_finish_plug(&plug);
return ret;
}
-EXPORT_SYMBOL_GPL(blkdev_aio_write);
+EXPORT_SYMBOL_GPL(blkdev_write_iter);
-static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
struct inode *bd_inode = file->f_mapping->host;
loff_t size = i_size_read(bd_inode);
+ loff_t pos = iocb->ki_pos;
if (pos >= size)
return 0;
size -= pos;
- if (size < iocb->ki_nbytes)
- nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size);
- return generic_file_aio_read(iocb, iov, nr_segs, pos);
+ iov_iter_truncate(to, size);
+ return generic_file_read_iter(iocb, to);
}
/*
@@ -1576,10 +1635,10 @@ const struct file_operations def_blk_fops = {
.open = blkdev_open,
.release = blkdev_close,
.llseek = block_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = blkdev_aio_read,
- .aio_write = blkdev_aio_write,
+ .read = new_sync_read,
+ .write = new_sync_write,
+ .read_iter = blkdev_read_iter,
+ .write_iter = blkdev_write_iter,
.mmap = generic_file_mmap,
.fsync = blkdev_fsync,
.unlocked_ioctl = block_ioctl,
@@ -1587,7 +1646,7 @@ const struct file_operations def_blk_fops = {
.compat_ioctl = compat_blkdev_ioctl,
#endif
.splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
+ .splice_write = iter_file_splice_write,
};
int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index f341a98031d..6d1d0b93b1a 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -16,4 +16,4 @@ btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
tests/extent-buffer-tests.o tests/btrfs-tests.o \
- tests/extent-io-tests.o tests/inode-tests.o
+ tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index ff9b3995d45..9a0124a9585 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -79,13 +79,6 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
const char *name;
char *value = NULL;
- if (acl) {
- ret = posix_acl_valid(acl);
- if (ret < 0)
- return ret;
- ret = 0;
- }
-
switch (type) {
case ACL_TYPE_ACCESS:
name = POSIX_ACL_XATTR_ACCESS;
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 10db21fa092..e25564bfcb4 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -900,7 +900,11 @@ again:
goto out;
BUG_ON(ret == 0);
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ if (trans && likely(trans->type != __TRANS_DUMMY)) {
+#else
if (trans) {
+#endif
/*
* look if there are updates for this ref queued and lock the
* head
@@ -984,11 +988,12 @@ again:
goto out;
}
if (ref->count && ref->parent) {
- if (extent_item_pos && !ref->inode_list) {
+ if (extent_item_pos && !ref->inode_list &&
+ ref->level == 0) {
u32 bsz;
struct extent_buffer *eb;
bsz = btrfs_level_size(fs_info->extent_root,
- info_level);
+ ref->level);
eb = read_tree_block(fs_info->extent_root,
ref->parent, bsz, 0);
if (!eb || !extent_buffer_uptodate(eb)) {
@@ -1404,9 +1409,10 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
* returns <0 on error
*/
static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb,
- struct btrfs_extent_item *ei, u32 item_size,
- struct btrfs_extent_inline_ref **out_eiref,
- int *out_type)
+ struct btrfs_key *key,
+ struct btrfs_extent_item *ei, u32 item_size,
+ struct btrfs_extent_inline_ref **out_eiref,
+ int *out_type)
{
unsigned long end;
u64 flags;
@@ -1416,19 +1422,26 @@ static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb,
/* first call */
flags = btrfs_extent_flags(eb, ei);
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- info = (struct btrfs_tree_block_info *)(ei + 1);
- *out_eiref =
- (struct btrfs_extent_inline_ref *)(info + 1);
+ if (key->type == BTRFS_METADATA_ITEM_KEY) {
+ /* a skinny metadata extent */
+ *out_eiref =
+ (struct btrfs_extent_inline_ref *)(ei + 1);
+ } else {
+ WARN_ON(key->type != BTRFS_EXTENT_ITEM_KEY);
+ info = (struct btrfs_tree_block_info *)(ei + 1);
+ *out_eiref =
+ (struct btrfs_extent_inline_ref *)(info + 1);
+ }
} else {
*out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1);
}
*ptr = (unsigned long)*out_eiref;
- if ((void *)*ptr >= (void *)ei + item_size)
+ if ((unsigned long)(*ptr) >= (unsigned long)ei + item_size)
return -ENOENT;
}
end = (unsigned long)ei + item_size;
- *out_eiref = (struct btrfs_extent_inline_ref *)*ptr;
+ *out_eiref = (struct btrfs_extent_inline_ref *)(*ptr);
*out_type = btrfs_extent_inline_ref_type(eb, *out_eiref);
*ptr += btrfs_extent_inline_ref_size(*out_type);
@@ -1447,8 +1460,8 @@ static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb,
* <0 on error.
*/
int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
- struct btrfs_extent_item *ei, u32 item_size,
- u64 *out_root, u8 *out_level)
+ struct btrfs_key *key, struct btrfs_extent_item *ei,
+ u32 item_size, u64 *out_root, u8 *out_level)
{
int ret;
int type;
@@ -1459,8 +1472,8 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
return 1;
while (1) {
- ret = __get_extent_inline_ref(ptr, eb, ei, item_size,
- &eiref, &type);
+ ret = __get_extent_inline_ref(ptr, eb, key, ei, item_size,
+ &eiref, &type);
if (ret < 0)
return ret;
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index a910b27a8ad..86fc20fec28 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -40,8 +40,8 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
u64 *flags);
int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
- struct btrfs_extent_item *ei, u32 item_size,
- u64 *out_root, u8 *out_level);
+ struct btrfs_key *key, struct btrfs_extent_item *ei,
+ u32 item_size, u64 *out_root, u8 *out_level);
int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
u64 extent_item_objectid,
@@ -55,8 +55,8 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 time_seq, struct ulist **roots);
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 time_seq, struct ulist **roots);
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
u32 name_len, unsigned long name_off,
struct extent_buffer *eb_in, u64 parent,
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index c9a24444ec9..4794923c410 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -279,9 +279,11 @@ static inline void btrfs_inode_block_unlocked_dio(struct inode *inode)
static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
{
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(BTRFS_INODE_READDIO_NEED_LOCK,
&BTRFS_I(inode)->runtime_flags);
}
+bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end);
+
#endif
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 0e8388e72d8..ce92ae30250 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1093,6 +1093,7 @@ leaf_item_out_of_bounce_error:
next_stack =
btrfsic_stack_frame_alloc();
if (NULL == next_stack) {
+ sf->error = -1;
btrfsic_release_block_ctx(
&sf->
next_block_ctx);
@@ -1190,8 +1191,10 @@ continue_with_current_node_stack_frame:
sf->next_block_ctx.datav[0];
next_stack = btrfsic_stack_frame_alloc();
- if (NULL == next_stack)
+ if (NULL == next_stack) {
+ sf->error = -1;
goto one_stack_frame_backwards;
+ }
next_stack->i = -1;
next_stack->block = sf->next_block;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index d43c544d3b6..1daea0b4718 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -821,7 +821,7 @@ static void free_workspace(int type, struct list_head *workspace)
spin_lock(workspace_lock);
if (*num_workspace < num_online_cpus()) {
- list_add_tail(workspace, idle_workspace);
+ list_add(workspace, idle_workspace);
(*num_workspace)++;
spin_unlock(workspace_lock);
goto wake;
@@ -887,7 +887,7 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
workspace = find_workspace(type);
if (IS_ERR(workspace))
- return -1;
+ return PTR_ERR(workspace);
ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
start, len, pages,
@@ -923,7 +923,7 @@ static int btrfs_decompress_biovec(int type, struct page **pages_in,
workspace = find_workspace(type);
if (IS_ERR(workspace))
- return -ENOMEM;
+ return PTR_ERR(workspace);
ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
disk_start,
@@ -945,7 +945,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
workspace = find_workspace(type);
if (IS_ERR(workspace))
- return -ENOMEM;
+ return PTR_ERR(workspace);
ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
dest_page, start_byte,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 1bcfcdb23cf..aeab453b8e2 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -224,7 +224,8 @@ static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
static void add_root_to_dirty_list(struct btrfs_root *root)
{
spin_lock(&root->fs_info->trans_lock);
- if (root->track_dirty && list_empty(&root->dirty_list)) {
+ if (test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state) &&
+ list_empty(&root->dirty_list)) {
list_add(&root->dirty_list,
&root->fs_info->dirty_cowonly_roots);
}
@@ -246,9 +247,10 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
int level;
struct btrfs_disk_key disk_key;
- WARN_ON(root->ref_cows && trans->transid !=
- root->fs_info->running_transaction->transid);
- WARN_ON(root->ref_cows && trans->transid != root->last_trans);
+ WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+ trans->transid != root->fs_info->running_transaction->transid);
+ WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+ trans->transid != root->last_trans);
level = btrfs_header_level(buf);
if (level == 0)
@@ -354,44 +356,14 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
}
/*
- * Increment the upper half of tree_mod_seq, set lower half zero.
- *
- * Must be called with fs_info->tree_mod_seq_lock held.
- */
-static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info)
-{
- u64 seq = atomic64_read(&fs_info->tree_mod_seq);
- seq &= 0xffffffff00000000ull;
- seq += 1ull << 32;
- atomic64_set(&fs_info->tree_mod_seq, seq);
- return seq;
-}
-
-/*
- * Increment the lower half of tree_mod_seq.
- *
- * Must be called with fs_info->tree_mod_seq_lock held. The way major numbers
- * are generated should not technically require a spin lock here. (Rationale:
- * incrementing the minor while incrementing the major seq number is between its
- * atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it
- * just returns a unique sequence number as usual.) We have decided to leave
- * that requirement in here and rethink it once we notice it really imposes a
- * problem on some workload.
+ * Pull a new tree mod seq number for our operation.
*/
-static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info)
+static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
{
return atomic64_inc_return(&fs_info->tree_mod_seq);
}
/*
- * return the last minor in the previous major tree_mod_seq number
- */
-u64 btrfs_tree_mod_seq_prev(u64 seq)
-{
- return (seq & 0xffffffff00000000ull) - 1ull;
-}
-
-/*
* This adds a new blocker to the tree mod log's blocker list if the @elem
* passed does not already have a sequence number set. So when a caller expects
* to record tree modifications, it should ensure to set elem->seq to zero
@@ -402,19 +374,16 @@ u64 btrfs_tree_mod_seq_prev(u64 seq)
u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem)
{
- u64 seq;
-
tree_mod_log_write_lock(fs_info);
spin_lock(&fs_info->tree_mod_seq_lock);
if (!elem->seq) {
- elem->seq = btrfs_inc_tree_mod_seq_major(fs_info);
+ elem->seq = btrfs_inc_tree_mod_seq(fs_info);
list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
}
- seq = btrfs_inc_tree_mod_seq_minor(fs_info);
spin_unlock(&fs_info->tree_mod_seq_lock);
tree_mod_log_write_unlock(fs_info);
- return seq;
+ return elem->seq;
}
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
@@ -487,9 +456,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
BUG_ON(!tm);
- spin_lock(&fs_info->tree_mod_seq_lock);
- tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
- spin_unlock(&fs_info->tree_mod_seq_lock);
+ tm->seq = btrfs_inc_tree_mod_seq(fs_info);
tm_root = &fs_info->tree_mod_log;
new = &tm_root->rb_node;
@@ -997,14 +964,14 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
* snapshot and the block was not allocated by tree relocation,
* we know the block is not shared.
*/
- if (root->ref_cows &&
+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
buf != root->node && buf != root->commit_root &&
(btrfs_header_generation(buf) <=
btrfs_root_last_snapshot(&root->root_item) ||
btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
return 1;
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (root->ref_cows &&
+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
return 1;
#endif
@@ -1146,9 +1113,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_assert_tree_locked(buf);
- WARN_ON(root->ref_cows && trans->transid !=
- root->fs_info->running_transaction->transid);
- WARN_ON(root->ref_cows && trans->transid != root->last_trans);
+ WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+ trans->transid != root->fs_info->running_transaction->transid);
+ WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+ trans->transid != root->last_trans);
level = btrfs_header_level(buf);
@@ -1193,7 +1161,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
return ret;
}
- if (root->ref_cows) {
+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
ret = btrfs_reloc_cow_block(trans, root, buf, cow);
if (ret)
return ret;
@@ -1538,6 +1506,10 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf)
{
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
+ return 0;
+#endif
/* ensure we can see the force_cow */
smp_rmb();
@@ -1556,7 +1528,7 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
!(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
- !root->force_cow)
+ !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
return 0;
return 1;
}
@@ -5125,7 +5097,17 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
return ret;
btrfs_item_key(path->nodes[0], &found_key, 0);
ret = comp_keys(&found_key, &key);
- if (ret < 0)
+ /*
+ * We might have had an item with the previous key in the tree right
+ * before we released our path. And after we released our path, that
+ * item might have been pushed to the first slot (0) of the leaf we
+ * were holding due to a tree balance. Alternatively, an item with the
+ * previous key can exist as the only element of a leaf (big fat item).
+ * Therefore account for these 2 cases, so that our callers (like
+ * btrfs_previous_item) don't miss an existing item with a key matching
+ * the previous key we computed above.
+ */
+ if (ret <= 0)
return 0;
return 1;
}
@@ -5736,6 +5718,24 @@ again:
ret = 0;
goto done;
}
+ /*
+ * So the above check misses one case:
+ * - after releasing the path above, someone has removed the item that
+ * used to be at the very end of the block, and balance between leafs
+ * gets another one with bigger key.offset to replace it.
+ *
+ * This one should be returned as well, or we can get leaf corruption
+ * later(esp. in __btrfs_drop_extents()).
+ *
+ * And a bit more explanation about this check,
+ * with ret > 0, the key isn't found, the path points to the slot
+ * where it should be inserted, so the path->slots[0] item must be the
+ * bigger one.
+ */
+ if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
+ ret = 0;
+ goto done;
+ }
while (level < BTRFS_MAX_LEVEL) {
if (!path->nodes[level]) {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4c48df572bd..be91397f4e9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -33,6 +33,7 @@
#include <asm/kmap_types.h>
#include <linux/pagemap.h>
#include <linux/btrfs.h>
+#include <linux/workqueue.h>
#include "extent_io.h"
#include "extent_map.h"
#include "async-thread.h"
@@ -756,6 +757,12 @@ struct btrfs_dir_item {
#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0)
+/*
+ * Internal in-memory flag that a subvolume has been marked for deletion but
+ * still visible as a directory
+ */
+#define BTRFS_ROOT_SUBVOL_DEAD (1ULL << 48)
+
struct btrfs_root_item {
struct btrfs_inode_item inode;
__le64 generation;
@@ -840,7 +847,10 @@ struct btrfs_disk_balance_args {
/* BTRFS_BALANCE_ARGS_* */
__le64 flags;
- __le64 unused[8];
+ /* BTRFS_BALANCE_ARGS_LIMIT value */
+ __le64 limit;
+
+ __le64 unused[7];
} __attribute__ ((__packed__));
/*
@@ -1113,6 +1123,12 @@ struct btrfs_qgroup_limit_item {
__le64 rsv_excl;
} __attribute__ ((__packed__));
+/* For raid type sysfs entries */
+struct raid_kobject {
+ int raid_type;
+ struct kobject kobj;
+};
+
struct btrfs_space_info {
spinlock_t lock;
@@ -1163,7 +1179,7 @@ struct btrfs_space_info {
wait_queue_head_t wait;
struct kobject kobj;
- struct kobject block_group_kobjs[BTRFS_NR_RAID_TYPES];
+ struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES];
};
#define BTRFS_BLOCK_RSV_GLOBAL 1
@@ -1243,11 +1259,19 @@ struct btrfs_block_group_cache {
spinlock_t lock;
u64 pinned;
u64 reserved;
+ u64 delalloc_bytes;
u64 bytes_super;
u64 flags;
u64 sectorsize;
u64 cache_generation;
+ /*
+ * It is just used for the delayed data space allocation because
+ * only the data space allocation and the relative metadata update
+ * can be done cross the transaction.
+ */
+ struct rw_semaphore data_rwsem;
+
/* for raid56, this is a full stripe, without parity */
unsigned long full_stripe_len;
@@ -1313,6 +1337,8 @@ struct btrfs_stripe_hash_table {
#define BTRFS_STRIPE_HASH_TABLE_BITS 11
+void btrfs_init_async_reclaim_work(struct work_struct *work);
+
/* fs_info */
struct reloc_control;
struct btrfs_device;
@@ -1534,6 +1560,9 @@ struct btrfs_fs_info {
*/
struct btrfs_workqueue *fixup_workers;
struct btrfs_workqueue *delayed_workers;
+
+ /* the extent workers do delayed refs on the extent allocation tree */
+ struct btrfs_workqueue *extent_workers;
struct task_struct *transaction_kthread;
struct task_struct *cleaner_kthread;
int thread_pool_size;
@@ -1636,7 +1665,10 @@ struct btrfs_fs_info {
/* holds configuration and tracking. Protected by qgroup_lock */
struct rb_root qgroup_tree;
+ struct rb_root qgroup_op_tree;
spinlock_t qgroup_lock;
+ spinlock_t qgroup_op_lock;
+ atomic_t qgroup_op_seq;
/*
* used to avoid frequently calling ulist_alloc()/ulist_free()
@@ -1688,6 +1720,9 @@ struct btrfs_fs_info {
struct semaphore uuid_tree_rescan_sem;
unsigned int update_uuid_tree_gen:1;
+
+ /* Used to reclaim the metadata space in the background. */
+ struct work_struct async_reclaim_work;
};
struct btrfs_subvolume_writers {
@@ -1696,6 +1731,26 @@ struct btrfs_subvolume_writers {
};
/*
+ * The state of btrfs root
+ */
+/*
+ * btrfs_record_root_in_trans is a multi-step process,
+ * and it can race with the balancing code. But the
+ * race is very small, and only the first time the root
+ * is added to each transaction. So IN_TRANS_SETUP
+ * is used to tell us when more checks are required
+ */
+#define BTRFS_ROOT_IN_TRANS_SETUP 0
+#define BTRFS_ROOT_REF_COWS 1
+#define BTRFS_ROOT_TRACK_DIRTY 2
+#define BTRFS_ROOT_IN_RADIX 3
+#define BTRFS_ROOT_DUMMY_ROOT 4
+#define BTRFS_ROOT_ORPHAN_ITEM_INSERTED 5
+#define BTRFS_ROOT_DEFRAG_RUNNING 6
+#define BTRFS_ROOT_FORCE_COW 7
+#define BTRFS_ROOT_MULTI_LOG_TASKS 8
+
+/*
* in ram representation of the tree. extent_root is used for all allocations
* and for the extent tree extent_root root.
*/
@@ -1706,6 +1761,7 @@ struct btrfs_root {
struct btrfs_root *log_root;
struct btrfs_root *reloc_root;
+ unsigned long state;
struct btrfs_root_item root_item;
struct btrfs_key root_key;
struct btrfs_fs_info *fs_info;
@@ -1740,7 +1796,6 @@ struct btrfs_root {
/* Just be updated when the commit succeeds. */
int last_log_commit;
pid_t log_start_pid;
- bool log_multiple_pids;
u64 objectid;
u64 last_trans;
@@ -1760,23 +1815,13 @@ struct btrfs_root {
u64 highest_objectid;
- /* btrfs_record_root_in_trans is a multi-step process,
- * and it can race with the balancing code. But the
- * race is very small, and only the first time the root
- * is added to each transaction. So in_trans_setup
- * is used to tell us when more checks are required
- */
- unsigned long in_trans_setup;
- int ref_cows;
- int track_dirty;
- int in_radix;
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
- int dummy_root;
+ u64 alloc_bytenr;
#endif
+
u64 defrag_trans_start;
struct btrfs_key defrag_progress;
struct btrfs_key defrag_max;
- int defrag_running;
char *name;
/* the dirty list is only used by non-reference counted roots */
@@ -1790,7 +1835,6 @@ struct btrfs_root {
spinlock_t orphan_lock;
atomic_t orphan_inodes;
struct btrfs_block_rsv *orphan_block_rsv;
- int orphan_item_inserted;
int orphan_cleanup_state;
spinlock_t inode_lock;
@@ -1808,8 +1852,6 @@ struct btrfs_root {
*/
dev_t anon_dev;
- int force_cow;
-
spinlock_t root_item_lock;
atomic_t refs;
@@ -2058,6 +2100,20 @@ struct btrfs_ioctl_defrag_range_args {
#define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt)
#define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \
BTRFS_MOUNT_##opt)
+#define btrfs_set_and_info(root, opt, fmt, args...) \
+{ \
+ if (!btrfs_test_opt(root, opt)) \
+ btrfs_info(root->fs_info, fmt, ##args); \
+ btrfs_set_opt(root->fs_info->mount_opt, opt); \
+}
+
+#define btrfs_clear_and_info(root, opt, fmt, args...) \
+{ \
+ if (btrfs_test_opt(root, opt)) \
+ btrfs_info(root->fs_info, fmt, ##args); \
+ btrfs_clear_opt(root->fs_info->mount_opt, opt); \
+}
+
/*
* Inode flags
*/
@@ -2774,6 +2830,11 @@ static inline bool btrfs_root_readonly(struct btrfs_root *root)
return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0;
}
+static inline bool btrfs_root_dead(struct btrfs_root *root)
+{
+ return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0;
+}
+
/* struct btrfs_root_backup */
BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup,
tree_root, 64);
@@ -2883,6 +2944,7 @@ btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
cpu->vend = le64_to_cpu(disk->vend);
cpu->target = le64_to_cpu(disk->target);
cpu->flags = le64_to_cpu(disk->flags);
+ cpu->limit = le64_to_cpu(disk->limit);
}
static inline void
@@ -2900,6 +2962,7 @@ btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk,
disk->vend = cpu_to_le64(cpu->vend);
disk->target = cpu_to_le64(cpu->target);
disk->flags = cpu_to_le64(cpu->flags);
+ disk->limit = cpu_to_le64(cpu->limit);
}
/* struct btrfs_super_block */
@@ -3222,6 +3285,8 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count);
+int btrfs_async_run_delayed_refs(struct btrfs_root *root,
+ unsigned long count, int wait);
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
@@ -3259,11 +3324,11 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_key *ins);
int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
u64 min_alloc_size, u64 empty_size, u64 hint_byte,
- struct btrfs_key *ins, int is_data);
+ struct btrfs_key *ins, int is_data, int delalloc);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref, int for_cow);
+ struct extent_buffer *buf, int full_backref, int no_quota);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref, int for_cow);
+ struct extent_buffer *buf, int full_backref, int no_quota);
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 flags,
@@ -3271,9 +3336,10 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
int btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
- u64 owner, u64 offset, int for_cow);
+ u64 owner, u64 offset, int no_quota);
-int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
+int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len,
+ int delalloc);
int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
u64 start, u64 len);
void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
@@ -3283,7 +3349,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
- u64 root_objectid, u64 owner, u64 offset, int for_cow);
+ u64 root_objectid, u64 owner, u64 offset, int no_quota);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
@@ -3371,7 +3437,6 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int __get_raid_index(u64 flags);
-
int btrfs_start_nocow_write(struct btrfs_root *root);
void btrfs_end_nocow_write(struct btrfs_root *root);
/* ctree.c */
@@ -3547,7 +3612,6 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
-u64 btrfs_tree_mod_seq_prev(u64 seq);
int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
/* root-item.c */
@@ -3694,6 +3758,12 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u64 file_start, int contig);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit);
+void btrfs_extent_item_to_extent_map(struct inode *inode,
+ const struct btrfs_path *path,
+ struct btrfs_file_extent_item *fi,
+ const bool new_inline,
+ struct extent_map *em);
+
/* inode.c */
struct btrfs_delalloc_work {
struct inode *inode;
@@ -4055,52 +4125,6 @@ void btrfs_reada_detach(void *handle);
int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
u64 start, int err);
-/* qgroup.c */
-struct qgroup_update {
- struct list_head list;
- struct btrfs_delayed_ref_node *node;
- struct btrfs_delayed_extent_op *extent_op;
-};
-
-int btrfs_quota_enable(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
-int btrfs_quota_disable(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
-int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
-void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
-int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
-int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 src, u64 dst);
-int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 src, u64 dst);
-int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 qgroupid,
- char *name);
-int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 qgroupid);
-int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 qgroupid,
- struct btrfs_qgroup_limit *limit);
-int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
-void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
-struct btrfs_delayed_extent_op;
-int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_node *node,
- struct btrfs_delayed_extent_op *extent_op);
-int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_delayed_ref_node *node,
- struct btrfs_delayed_extent_op *extent_op);
-int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
-int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
- struct btrfs_qgroup_inherit *inherit);
-int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
-void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
-
-void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
-
static inline int is_fstree(u64 rootid)
{
if (rootid == BTRFS_FS_TREE_OBJECTID ||
@@ -4117,6 +4141,8 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
/* Sanity test specific functions */
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
void btrfs_test_destroy_inode(struct inode *inode);
+int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
+ u64 rfer, u64 excl);
#endif
#endif
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 33e561a8401..da775bfdebc 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -149,8 +149,8 @@ again:
spin_lock(&root->inode_lock);
ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
if (ret == -EEXIST) {
- kmem_cache_free(delayed_node_cache, node);
spin_unlock(&root->inode_lock);
+ kmem_cache_free(delayed_node_cache, node);
radix_tree_preload_end();
goto again;
}
@@ -267,14 +267,17 @@ static void __btrfs_release_delayed_node(
mutex_unlock(&delayed_node->mutex);
if (atomic_dec_and_test(&delayed_node->refs)) {
+ bool free = false;
struct btrfs_root *root = delayed_node->root;
spin_lock(&root->inode_lock);
if (atomic_read(&delayed_node->refs) == 0) {
radix_tree_delete(&root->delayed_nodes_tree,
delayed_node->inode_id);
- kmem_cache_free(delayed_node_cache, delayed_node);
+ free = true;
}
spin_unlock(&root->inode_lock);
+ if (free)
+ kmem_cache_free(delayed_node_cache, delayed_node);
}
}
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 31299646024..6d16bea94e1 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -106,6 +106,10 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
return -1;
if (ref1->type > ref2->type)
return 1;
+ if (ref1->no_quota > ref2->no_quota)
+ return 1;
+ if (ref1->no_quota < ref2->no_quota)
+ return -1;
/* merging of sequenced refs is not allowed */
if (compare_seq) {
if (ref1->seq < ref2->seq)
@@ -635,7 +639,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head_ref,
struct btrfs_delayed_ref_node *ref, u64 bytenr,
u64 num_bytes, u64 parent, u64 ref_root, int level,
- int action, int for_cow)
+ int action, int no_quota)
{
struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_tree_ref *full_ref;
@@ -645,6 +649,8 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
if (action == BTRFS_ADD_DELAYED_EXTENT)
action = BTRFS_ADD_DELAYED_REF;
+ if (is_fstree(ref_root))
+ seq = atomic64_read(&fs_info->tree_mod_seq);
delayed_refs = &trans->transaction->delayed_refs;
/* first set the basic ref node struct up */
@@ -655,9 +661,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
ref->action = action;
ref->is_head = 0;
ref->in_tree = 1;
-
- if (need_ref_seq(for_cow, ref_root))
- seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
+ ref->no_quota = no_quota;
ref->seq = seq;
full_ref = btrfs_delayed_node_to_tree_ref(ref);
@@ -697,7 +701,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head_ref,
struct btrfs_delayed_ref_node *ref, u64 bytenr,
u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
- u64 offset, int action, int for_cow)
+ u64 offset, int action, int no_quota)
{
struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_data_ref *full_ref;
@@ -709,6 +713,9 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
delayed_refs = &trans->transaction->delayed_refs;
+ if (is_fstree(ref_root))
+ seq = atomic64_read(&fs_info->tree_mod_seq);
+
/* first set the basic ref node struct up */
atomic_set(&ref->refs, 1);
ref->bytenr = bytenr;
@@ -717,9 +724,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
ref->action = action;
ref->is_head = 0;
ref->in_tree = 1;
-
- if (need_ref_seq(for_cow, ref_root))
- seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
+ ref->no_quota = no_quota;
ref->seq = seq;
full_ref = btrfs_delayed_node_to_data_ref(ref);
@@ -762,12 +767,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op,
- int for_cow)
+ int no_quota)
{
struct btrfs_delayed_tree_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
+ if (!is_fstree(ref_root) || !fs_info->quota_enabled)
+ no_quota = 0;
+
BUG_ON(extent_op && extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
if (!ref)
@@ -793,10 +801,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action,
- for_cow);
+ no_quota);
spin_unlock(&delayed_refs->lock);
- if (need_ref_seq(for_cow, ref_root))
- btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
return 0;
}
@@ -810,12 +816,15 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
u64 parent, u64 ref_root,
u64 owner, u64 offset, int action,
struct btrfs_delayed_extent_op *extent_op,
- int for_cow)
+ int no_quota)
{
struct btrfs_delayed_data_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
+ if (!is_fstree(ref_root) || !fs_info->quota_enabled)
+ no_quota = 0;
+
BUG_ON(extent_op && !extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
if (!ref)
@@ -841,10 +850,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
- action, for_cow);
+ action, no_quota);
spin_unlock(&delayed_refs->lock);
- if (need_ref_seq(for_cow, ref_root))
- btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
return 0;
}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 4ba9b93022f..a764e2340d4 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -52,6 +52,7 @@ struct btrfs_delayed_ref_node {
unsigned int action:8;
unsigned int type:8;
+ unsigned int no_quota:1;
/* is this node still in the rbtree? */
unsigned int is_head:1;
unsigned int in_tree:1;
@@ -196,14 +197,14 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op,
- int for_cow);
+ int no_quota);
int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, int action,
struct btrfs_delayed_extent_op *extent_op,
- int for_cow);
+ int no_quota);
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
@@ -231,25 +232,6 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
u64 seq);
/*
- * delayed refs with a ref_seq > 0 must be held back during backref walking.
- * this only applies to items in one of the fs-trees. for_cow items never need
- * to be held back, so they won't get a ref_seq number.
- */
-static inline int need_ref_seq(int for_cow, u64 rootid)
-{
- if (for_cow)
- return 0;
-
- if (rootid == BTRFS_FS_TREE_OBJECTID)
- return 1;
-
- if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
- return 1;
-
- return 0;
-}
-
-/*
* a node might live in a head or a regular ref, this lets you
* test for the proper type to use.
*/
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 9f2290509ac..eea26e1b2fd 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -36,6 +36,7 @@
#include "check-integrity.h"
#include "rcu-string.h"
#include "dev-replace.h"
+#include "sysfs.h"
static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
int scrub_ret);
@@ -313,7 +314,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
if (btrfs_fs_incompat(fs_info, RAID56)) {
btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6");
- return -EINVAL;
+ return -EOPNOTSUPP;
}
switch (args->start.cont_reading_from_srcdev_mode) {
@@ -562,6 +563,10 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
fs_info->fs_devices->latest_bdev = tgt_device->bdev;
list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
+ /* replace the sysfs entry */
+ btrfs_kobj_rm_device(fs_info, src_device);
+ btrfs_kobj_add_device(fs_info, tgt_device);
+
btrfs_rm_dev_replace_blocked(fs_info);
btrfs_rm_dev_replace_srcdev(fs_info, src_device);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 029d46c2e17..08e65e9cf2a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -49,6 +49,7 @@
#include "dev-replace.h"
#include "raid56.h"
#include "sysfs.h"
+#include "qgroup.h"
#ifdef CONFIG_X86
#include <asm/cpufeature.h>
@@ -368,7 +369,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
out:
unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
&cached_state, GFP_NOFS);
- btrfs_tree_read_unlock_blocking(eb);
+ if (need_lock)
+ btrfs_tree_read_unlock_blocking(eb);
return ret;
}
@@ -1109,6 +1111,11 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize)
{
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
+ return alloc_test_extent_buffer(root->fs_info, bytenr,
+ blocksize);
+#endif
return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
}
@@ -1201,10 +1208,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->nodesize = nodesize;
root->leafsize = leafsize;
root->stripesize = stripesize;
- root->ref_cows = 0;
- root->track_dirty = 0;
- root->in_radix = 0;
- root->orphan_item_inserted = 0;
+ root->state = 0;
root->orphan_cleanup_state = 0;
root->objectid = objectid;
@@ -1265,7 +1269,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
else
root->defrag_trans_start = 0;
init_completion(&root->kobj_unregister);
- root->defrag_running = 0;
root->root_key.objectid = objectid;
root->anon_dev = 0;
@@ -1290,7 +1293,8 @@ struct btrfs_root *btrfs_alloc_dummy_root(void)
if (!root)
return ERR_PTR(-ENOMEM);
__setup_root(4096, 4096, 4096, 4096, root, NULL, 1);
- root->dummy_root = 1;
+ set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state);
+ root->alloc_bytenr = 0;
return root;
}
@@ -1341,8 +1345,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
root->commit_root = btrfs_root_node(root);
- root->track_dirty = 1;
-
+ set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
root->root_item.flags = 0;
root->root_item.byte_limit = 0;
@@ -1371,6 +1374,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
fail:
if (leaf) {
btrfs_tree_unlock(leaf);
+ free_extent_buffer(root->commit_root);
free_extent_buffer(leaf);
}
kfree(root);
@@ -1396,13 +1400,15 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
root->root_key.type = BTRFS_ROOT_ITEM_KEY;
root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
+
/*
+ * DON'T set REF_COWS for log trees
+ *
* log trees do not get reference counted because they go away
* before a real commit is actually done. They do store pointers
* to file data extents, and those reference counts still get
* updated (along with back refs to the log tree).
*/
- root->ref_cows = 0;
leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
BTRFS_TREE_LOG_OBJECTID, NULL,
@@ -1536,7 +1542,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
return root;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
- root->ref_cows = 1;
+ set_bit(BTRFS_ROOT_REF_COWS, &root->state);
btrfs_check_and_init_root_item(&root->root_item);
}
@@ -1606,7 +1612,7 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
(unsigned long)root->root_key.objectid,
root);
if (ret == 0)
- root->in_radix = 1;
+ set_bit(BTRFS_ROOT_IN_RADIX, &root->state);
spin_unlock(&fs_info->fs_roots_radix_lock);
radix_tree_preload_end();
@@ -1662,7 +1668,7 @@ again:
if (ret < 0)
goto fail;
if (ret == 0)
- root->orphan_item_inserted = 1;
+ set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
ret = btrfs_insert_fs_root(fs_info, root);
if (ret) {
@@ -2064,6 +2070,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_destroy_workqueue(fs_info->readahead_workers);
btrfs_destroy_workqueue(fs_info->flush_workers);
btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
+ btrfs_destroy_workqueue(fs_info->extent_workers);
}
static void free_root_extent_buffers(struct btrfs_root *root)
@@ -2090,7 +2097,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
free_root_extent_buffers(info->chunk_root);
}
-static void del_fs_roots(struct btrfs_fs_info *fs_info)
+void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
{
int ret;
struct btrfs_root *gang[8];
@@ -2101,7 +2108,7 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info)
struct btrfs_root, root_list);
list_del(&gang[0]->root_list);
- if (gang[0]->in_radix) {
+ if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state)) {
btrfs_drop_and_free_fs_root(fs_info, gang[0]);
} else {
free_extent_buffer(gang[0]->node);
@@ -2221,6 +2228,7 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->free_chunk_lock);
spin_lock_init(&fs_info->tree_mod_seq_lock);
spin_lock_init(&fs_info->super_lock);
+ spin_lock_init(&fs_info->qgroup_op_lock);
spin_lock_init(&fs_info->buffer_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->reloc_mutex);
@@ -2246,6 +2254,7 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->async_submit_draining, 0);
atomic_set(&fs_info->nr_async_bios, 0);
atomic_set(&fs_info->defrag_running, 0);
+ atomic_set(&fs_info->qgroup_op_seq, 0);
atomic64_set(&fs_info->tree_mod_seq, 0);
fs_info->sb = sb;
fs_info->max_inline = 8192 * 1024;
@@ -2291,6 +2300,7 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->balance_cancel_req, 0);
fs_info->balance_ctl = NULL;
init_waitqueue_head(&fs_info->balance_wait_q);
+ btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work);
sb->s_blocksize = 4096;
sb->s_blocksize_bits = blksize_bits(4096);
@@ -2354,6 +2364,7 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->qgroup_lock);
mutex_init(&fs_info->qgroup_ioctl_lock);
fs_info->qgroup_tree = RB_ROOT;
+ fs_info->qgroup_op_tree = RB_ROOT;
INIT_LIST_HEAD(&fs_info->dirty_qgroups);
fs_info->qgroup_seq = 1;
fs_info->quota_enabled = 0;
@@ -2577,6 +2588,10 @@ int open_ctree(struct super_block *sb,
btrfs_alloc_workqueue("readahead", flags, max_active, 2);
fs_info->qgroup_rescan_workers =
btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
+ fs_info->extent_workers =
+ btrfs_alloc_workqueue("extent-refs", flags,
+ min_t(u64, fs_devices->num_devices,
+ max_active), 8);
if (!(fs_info->workers && fs_info->delalloc_workers &&
fs_info->submit_workers && fs_info->flush_workers &&
@@ -2586,6 +2601,7 @@ int open_ctree(struct super_block *sb,
fs_info->endio_freespace_worker && fs_info->rmw_workers &&
fs_info->caching_workers && fs_info->readahead_workers &&
fs_info->fixup_workers && fs_info->delayed_workers &&
+ fs_info->fixup_workers && fs_info->extent_workers &&
fs_info->qgroup_rescan_workers)) {
err = -ENOMEM;
goto fail_sb_buffer;
@@ -2693,7 +2709,7 @@ retry_root_backup:
ret = PTR_ERR(extent_root);
goto recovery_tree_root;
}
- extent_root->track_dirty = 1;
+ set_bit(BTRFS_ROOT_TRACK_DIRTY, &extent_root->state);
fs_info->extent_root = extent_root;
location.objectid = BTRFS_DEV_TREE_OBJECTID;
@@ -2702,7 +2718,7 @@ retry_root_backup:
ret = PTR_ERR(dev_root);
goto recovery_tree_root;
}
- dev_root->track_dirty = 1;
+ set_bit(BTRFS_ROOT_TRACK_DIRTY, &dev_root->state);
fs_info->dev_root = dev_root;
btrfs_init_devices_late(fs_info);
@@ -2712,13 +2728,13 @@ retry_root_backup:
ret = PTR_ERR(csum_root);
goto recovery_tree_root;
}
- csum_root->track_dirty = 1;
+ set_bit(BTRFS_ROOT_TRACK_DIRTY, &csum_root->state);
fs_info->csum_root = csum_root;
location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
quota_root = btrfs_read_tree_root(tree_root, &location);
if (!IS_ERR(quota_root)) {
- quota_root->track_dirty = 1;
+ set_bit(BTRFS_ROOT_TRACK_DIRTY, &quota_root->state);
fs_info->quota_enabled = 1;
fs_info->pending_quota_state = 1;
fs_info->quota_root = quota_root;
@@ -2733,7 +2749,7 @@ retry_root_backup:
create_uuid_tree = true;
check_uuid_tree = false;
} else {
- uuid_root->track_dirty = 1;
+ set_bit(BTRFS_ROOT_TRACK_DIRTY, &uuid_root->state);
fs_info->uuid_root = uuid_root;
create_uuid_tree = false;
check_uuid_tree =
@@ -2861,7 +2877,7 @@ retry_root_backup:
printk(KERN_ERR "BTRFS: failed to read log tree\n");
free_extent_buffer(log_tree_root->node);
kfree(log_tree_root);
- goto fail_trans_kthread;
+ goto fail_qgroup;
}
/* returns with log_tree_root freed on success */
ret = btrfs_recover_log_trees(log_tree_root);
@@ -2870,26 +2886,28 @@ retry_root_backup:
"Failed to recover log tree");
free_extent_buffer(log_tree_root->node);
kfree(log_tree_root);
- goto fail_trans_kthread;
+ goto fail_qgroup;
}
if (sb->s_flags & MS_RDONLY) {
ret = btrfs_commit_super(tree_root);
if (ret)
- goto fail_trans_kthread;
+ goto fail_qgroup;
}
}
ret = btrfs_find_orphan_roots(tree_root);
if (ret)
- goto fail_trans_kthread;
+ goto fail_qgroup;
if (!(sb->s_flags & MS_RDONLY)) {
ret = btrfs_cleanup_fs_roots(fs_info);
if (ret)
- goto fail_trans_kthread;
+ goto fail_qgroup;
+ mutex_lock(&fs_info->cleaner_mutex);
ret = btrfs_recover_relocation(tree_root);
+ mutex_unlock(&fs_info->cleaner_mutex);
if (ret < 0) {
printk(KERN_WARNING
"BTRFS: failed to recover relocation\n");
@@ -2966,7 +2984,7 @@ fail_qgroup:
fail_trans_kthread:
kthread_stop(fs_info->transaction_kthread);
btrfs_cleanup_transaction(fs_info->tree_root);
- del_fs_roots(fs_info);
+ btrfs_free_fs_roots(fs_info);
fail_cleaner:
kthread_stop(fs_info->cleaner_kthread);
@@ -3501,8 +3519,10 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
btrfs_free_log(NULL, root);
- __btrfs_remove_free_space_cache(root->free_ino_pinned);
- __btrfs_remove_free_space_cache(root->free_ino_ctl);
+ if (root->free_ino_pinned)
+ __btrfs_remove_free_space_cache(root->free_ino_pinned);
+ if (root->free_ino_ctl)
+ __btrfs_remove_free_space_cache(root->free_ino_ctl);
free_fs_root(root);
}
@@ -3533,28 +3553,51 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
{
u64 root_objectid = 0;
struct btrfs_root *gang[8];
- int i;
- int ret;
+ int i = 0;
+ int err = 0;
+ unsigned int ret = 0;
+ int index;
while (1) {
+ index = srcu_read_lock(&fs_info->subvol_srcu);
ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
(void **)gang, root_objectid,
ARRAY_SIZE(gang));
- if (!ret)
+ if (!ret) {
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
break;
-
+ }
root_objectid = gang[ret - 1]->root_key.objectid + 1;
+
for (i = 0; i < ret; i++) {
- int err;
+ /* Avoid to grab roots in dead_roots */
+ if (btrfs_root_refs(&gang[i]->root_item) == 0) {
+ gang[i] = NULL;
+ continue;
+ }
+ /* grab all the search result for later use */
+ gang[i] = btrfs_grab_fs_root(gang[i]);
+ }
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+ for (i = 0; i < ret; i++) {
+ if (!gang[i])
+ continue;
root_objectid = gang[i]->root_key.objectid;
err = btrfs_orphan_cleanup(gang[i]);
if (err)
- return err;
+ break;
+ btrfs_put_fs_root(gang[i]);
}
root_objectid++;
}
- return 0;
+
+ /* release the uncleaned roots due to error */
+ for (; i < ret; i++) {
+ if (gang[i])
+ btrfs_put_fs_root(gang[i]);
+ }
+ return err;
}
int btrfs_commit_super(struct btrfs_root *root)
@@ -3603,6 +3646,8 @@ int close_ctree(struct btrfs_root *root)
/* clear out the rbtree of defraggable inodes */
btrfs_cleanup_defrag_inodes(fs_info);
+ cancel_work_sync(&fs_info->async_reclaim_work);
+
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
ret = btrfs_commit_super(root);
if (ret)
@@ -3627,12 +3672,17 @@ int close_ctree(struct btrfs_root *root)
btrfs_sysfs_remove_one(fs_info);
- del_fs_roots(fs_info);
+ btrfs_free_fs_roots(fs_info);
btrfs_put_block_group_cache(fs_info);
btrfs_free_block_groups(fs_info);
+ /*
+ * we must make sure there is not any read request to
+ * submit after we stopping all workers.
+ */
+ invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
btrfs_stop_all_workers(fs_info);
free_root_pointers(fs_info, 1);
@@ -3709,6 +3759,12 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
__percpu_counter_add(&root->fs_info->dirty_metadata_bytes,
buf->len,
root->fs_info->dirty_metadata_batch);
+#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+ if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) {
+ btrfs_print_leaf(root, buf);
+ ASSERT(0);
+ }
+#endif
}
static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 53059df350f..23ce3ceba0a 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -68,6 +68,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
int btrfs_init_fs_root(struct btrfs_root *root);
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
+void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_key *key,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1306487c82c..813537f362f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -26,16 +26,16 @@
#include <linux/ratelimit.h>
#include <linux/percpu_counter.h>
#include "hash.h"
-#include "ctree.h"
+#include "tree-log.h"
#include "disk-io.h"
#include "print-tree.h"
-#include "transaction.h"
#include "volumes.h"
#include "raid56.h"
#include "locking.h"
#include "free-space-cache.h"
#include "math.h"
#include "sysfs.h"
+#include "qgroup.h"
#undef SCRAMBLE_DELAYED_REFS
@@ -81,7 +81,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner_objectid,
u64 owner_offset, int refs_to_drop,
- struct btrfs_delayed_extent_op *extra_op);
+ struct btrfs_delayed_extent_op *extra_op,
+ int no_quota);
static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
struct extent_buffer *leaf,
struct btrfs_extent_item *ei);
@@ -94,7 +95,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 parent, u64 root_objectid,
u64 flags, struct btrfs_disk_key *key,
- int level, struct btrfs_key *ins);
+ int level, struct btrfs_key *ins,
+ int no_quota);
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 flags,
int force);
@@ -103,7 +105,8 @@ static int find_next_key(struct btrfs_path *path, int level,
static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
int dump_block_groups);
static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
- u64 num_bytes, int reserve);
+ u64 num_bytes, int reserve,
+ int delalloc);
static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
int btrfs_pin_extent(struct btrfs_root *root,
@@ -1271,7 +1274,7 @@ fail:
static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- int refs_to_drop)
+ int refs_to_drop, int *last_ref)
{
struct btrfs_key key;
struct btrfs_extent_data_ref *ref1 = NULL;
@@ -1307,6 +1310,7 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
if (num_refs == 0) {
ret = btrfs_del_item(trans, root, path);
+ *last_ref = 1;
} else {
if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
@@ -1542,6 +1546,7 @@ again:
ret = 0;
}
if (ret) {
+ key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
btrfs_release_path(path);
@@ -1763,7 +1768,8 @@ void update_inline_extent_backref(struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
int refs_to_mod,
- struct btrfs_delayed_extent_op *extent_op)
+ struct btrfs_delayed_extent_op *extent_op,
+ int *last_ref)
{
struct extent_buffer *leaf;
struct btrfs_extent_item *ei;
@@ -1807,6 +1813,7 @@ void update_inline_extent_backref(struct btrfs_root *root,
else
btrfs_set_shared_data_ref_count(leaf, sref, refs);
} else {
+ *last_ref = 1;
size = btrfs_extent_inline_ref_size(type);
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
ptr = (unsigned long)iref;
@@ -1838,7 +1845,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
if (ret == 0) {
BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
update_inline_extent_backref(root, path, iref,
- refs_to_add, extent_op);
+ refs_to_add, extent_op, NULL);
} else if (ret == -ENOENT) {
setup_inline_extent_backref(root, path, iref, parent,
root_objectid, owner, offset,
@@ -1871,17 +1878,19 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
- int refs_to_drop, int is_data)
+ int refs_to_drop, int is_data, int *last_ref)
{
int ret = 0;
BUG_ON(!is_data && refs_to_drop != 1);
if (iref) {
update_inline_extent_backref(root, path, iref,
- -refs_to_drop, NULL);
+ -refs_to_drop, NULL, last_ref);
} else if (is_data) {
- ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
+ ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
+ last_ref);
} else {
+ *last_ref = 1;
ret = btrfs_del_item(trans, root, path);
}
return ret;
@@ -1945,7 +1954,8 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
- u64 root_objectid, u64 owner, u64 offset, int for_cow)
+ u64 root_objectid, u64 owner, u64 offset,
+ int no_quota)
{
int ret;
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1957,12 +1967,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
num_bytes,
parent, root_objectid, (int)owner,
- BTRFS_ADD_DELAYED_REF, NULL, for_cow);
+ BTRFS_ADD_DELAYED_REF, NULL, no_quota);
} else {
ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
num_bytes,
parent, root_objectid, owner, offset,
- BTRFS_ADD_DELAYED_REF, NULL, for_cow);
+ BTRFS_ADD_DELAYED_REF, NULL, no_quota);
}
return ret;
}
@@ -1972,31 +1982,64 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 root_objectid,
u64 owner, u64 offset, int refs_to_add,
+ int no_quota,
struct btrfs_delayed_extent_op *extent_op)
{
+ struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_extent_item *item;
+ struct btrfs_key key;
u64 refs;
int ret;
+ enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
+ if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled)
+ no_quota = 1;
+
path->reada = 1;
path->leave_spinning = 1;
/* this will setup the path even if it fails to insert the back ref */
- ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
- path, bytenr, num_bytes, parent,
+ ret = insert_inline_extent_backref(trans, fs_info->extent_root, path,
+ bytenr, num_bytes, parent,
root_objectid, owner, offset,
refs_to_add, extent_op);
- if (ret != -EAGAIN)
+ if ((ret < 0 && ret != -EAGAIN) || (!ret && no_quota))
goto out;
+ /*
+ * Ok we were able to insert an inline extent and it appears to be a new
+ * reference, deal with the qgroup accounting.
+ */
+ if (!ret && !no_quota) {
+ ASSERT(root->fs_info->quota_enabled);
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_item);
+ if (btrfs_extent_refs(leaf, item) > (u64)refs_to_add)
+ type = BTRFS_QGROUP_OPER_ADD_SHARED;
+ btrfs_release_path(path);
+ ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
+ bytenr, num_bytes, type, 0);
+ goto out;
+ }
+
+ /*
+ * Ok we had -EAGAIN which means we didn't have space to insert and
+ * inline extent ref, so just update the reference count and add a
+ * normal backref.
+ */
leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, item);
+ if (refs)
+ type = BTRFS_QGROUP_OPER_ADD_SHARED;
btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
if (extent_op)
__run_delayed_extent_op(extent_op, leaf, item);
@@ -2004,9 +2047,15 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
+ if (!no_quota) {
+ ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
+ bytenr, num_bytes, type, 0);
+ if (ret)
+ goto out;
+ }
+
path->reada = 1;
path->leave_spinning = 1;
-
/* now insert the actual backref */
ret = insert_extent_backref(trans, root->fs_info->extent_root,
path, bytenr, parent, root_objectid,
@@ -2040,8 +2089,7 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
if (node->type == BTRFS_SHARED_DATA_REF_KEY)
parent = ref->parent;
- else
- ref_root = ref->root;
+ ref_root = ref->root;
if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
if (extent_op)
@@ -2055,13 +2103,13 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
node->num_bytes, parent,
ref_root, ref->objectid,
ref->offset, node->ref_mod,
- extent_op);
+ node->no_quota, extent_op);
} else if (node->action == BTRFS_DROP_DELAYED_REF) {
ret = __btrfs_free_extent(trans, root, node->bytenr,
node->num_bytes, parent,
ref_root, ref->objectid,
ref->offset, node->ref_mod,
- extent_op);
+ extent_op, node->no_quota);
} else {
BUG();
}
@@ -2198,8 +2246,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
parent = ref->parent;
- else
- ref_root = ref->root;
+ ref_root = ref->root;
ins.objectid = node->bytenr;
if (skinny_metadata) {
@@ -2217,15 +2264,18 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
parent, ref_root,
extent_op->flags_to_set,
&extent_op->key,
- ref->level, &ins);
+ ref->level, &ins,
+ node->no_quota);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
node->num_bytes, parent, ref_root,
- ref->level, 0, 1, extent_op);
+ ref->level, 0, 1, node->no_quota,
+ extent_op);
} else if (node->action == BTRFS_DROP_DELAYED_REF) {
ret = __btrfs_free_extent(trans, root, node->bytenr,
node->num_bytes, parent, ref_root,
- ref->level, 0, 1, extent_op);
+ ref->level, 0, 1, extent_op,
+ node->no_quota);
} else {
BUG();
}
@@ -2573,42 +2623,6 @@ static u64 find_middle(struct rb_root *root)
}
#endif
-int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
-{
- struct qgroup_update *qgroup_update;
- int ret = 0;
-
- if (list_empty(&trans->qgroup_ref_list) !=
- !trans->delayed_ref_elem.seq) {
- /* list without seq or seq without list */
- btrfs_err(fs_info,
- "qgroup accounting update error, list is%s empty, seq is %#x.%x",
- list_empty(&trans->qgroup_ref_list) ? "" : " not",
- (u32)(trans->delayed_ref_elem.seq >> 32),
- (u32)trans->delayed_ref_elem.seq);
- BUG();
- }
-
- if (!trans->delayed_ref_elem.seq)
- return 0;
-
- while (!list_empty(&trans->qgroup_ref_list)) {
- qgroup_update = list_first_entry(&trans->qgroup_ref_list,
- struct qgroup_update, list);
- list_del(&qgroup_update->list);
- if (!ret)
- ret = btrfs_qgroup_account_ref(
- trans, fs_info, qgroup_update->node,
- qgroup_update->extent_op);
- kfree(qgroup_update);
- }
-
- btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
-
- return ret;
-}
-
static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
{
u64 num_bytes;
@@ -2661,15 +2675,94 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
u64 num_entries =
atomic_read(&trans->transaction->delayed_refs.num_entries);
u64 avg_runtime;
+ u64 val;
smp_mb();
avg_runtime = fs_info->avg_delayed_ref_runtime;
+ val = num_entries * avg_runtime;
if (num_entries * avg_runtime >= NSEC_PER_SEC)
return 1;
+ if (val >= NSEC_PER_SEC / 2)
+ return 2;
return btrfs_check_space_for_delayed_refs(trans, root);
}
+struct async_delayed_refs {
+ struct btrfs_root *root;
+ int count;
+ int error;
+ int sync;
+ struct completion wait;
+ struct btrfs_work work;
+};
+
+static void delayed_ref_async_start(struct btrfs_work *work)
+{
+ struct async_delayed_refs *async;
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ async = container_of(work, struct async_delayed_refs, work);
+
+ trans = btrfs_join_transaction(async->root);
+ if (IS_ERR(trans)) {
+ async->error = PTR_ERR(trans);
+ goto done;
+ }
+
+ /*
+ * trans->sync means that when we call end_transaciton, we won't
+ * wait on delayed refs
+ */
+ trans->sync = true;
+ ret = btrfs_run_delayed_refs(trans, async->root, async->count);
+ if (ret)
+ async->error = ret;
+
+ ret = btrfs_end_transaction(trans, async->root);
+ if (ret && !async->error)
+ async->error = ret;
+done:
+ if (async->sync)
+ complete(&async->wait);
+ else
+ kfree(async);
+}
+
+int btrfs_async_run_delayed_refs(struct btrfs_root *root,
+ unsigned long count, int wait)
+{
+ struct async_delayed_refs *async;
+ int ret;
+
+ async = kmalloc(sizeof(*async), GFP_NOFS);
+ if (!async)
+ return -ENOMEM;
+
+ async->root = root->fs_info->tree_root;
+ async->count = count;
+ async->error = 0;
+ if (wait)
+ async->sync = 1;
+ else
+ async->sync = 0;
+ init_completion(&async->wait);
+
+ btrfs_init_work(&async->work, delayed_ref_async_start,
+ NULL, NULL);
+
+ btrfs_queue_work(root->fs_info->extent_workers, &async->work);
+
+ if (wait) {
+ wait_for_completion(&async->wait);
+ ret = async->error;
+ kfree(async);
+ return ret;
+ }
+ return 0;
+}
+
/*
* this starts processing the delayed reference count updates and
* extent insertions we have queued up so far. count can be
@@ -2697,8 +2790,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
if (root == root->fs_info->extent_root)
root = root->fs_info->tree_root;
- btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
-
delayed_refs = &trans->transaction->delayed_refs;
if (count == 0) {
count = atomic_read(&delayed_refs->num_entries) * 2;
@@ -2757,6 +2848,9 @@ again:
goto again;
}
out:
+ ret = btrfs_delayed_qgroup_accounting(trans, root->fs_info);
+ if (ret)
+ return ret;
assert_qgroups_uptodate(trans);
return 0;
}
@@ -2963,7 +3057,7 @@ out:
static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
- int full_backref, int inc, int for_cow)
+ int full_backref, int inc, int no_quota)
{
u64 bytenr;
u64 num_bytes;
@@ -2978,11 +3072,15 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
u64, u64, u64, u64, u64, u64, int);
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
+ return 0;
+#endif
ref_root = btrfs_header_owner(buf);
nritems = btrfs_header_nritems(buf);
level = btrfs_header_level(buf);
- if (!root->ref_cows && level == 0)
+ if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
return 0;
if (inc)
@@ -3013,7 +3111,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
key.offset -= btrfs_file_extent_offset(buf, fi);
ret = process_func(trans, root, bytenr, num_bytes,
parent, ref_root, key.objectid,
- key.offset, for_cow);
+ key.offset, no_quota);
if (ret)
goto fail;
} else {
@@ -3021,7 +3119,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
num_bytes = btrfs_level_size(root, level - 1);
ret = process_func(trans, root, bytenr, num_bytes,
parent, ref_root, level - 1, 0,
- for_cow);
+ no_quota);
if (ret)
goto fail;
}
@@ -3032,15 +3130,15 @@ fail:
}
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref, int for_cow)
+ struct extent_buffer *buf, int full_backref, int no_quota)
{
- return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow);
+ return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota);
}
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref, int for_cow)
+ struct extent_buffer *buf, int full_backref, int no_quota)
{
- return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow);
+ return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota);
}
static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -3163,7 +3261,8 @@ again:
spin_lock(&block_group->lock);
if (block_group->cached != BTRFS_CACHE_FINISHED ||
- !btrfs_test_opt(root, SPACE_CACHE)) {
+ !btrfs_test_opt(root, SPACE_CACHE) ||
+ block_group->delalloc_bytes) {
/*
* don't bother trying to write stuff out _if_
* a) we're not cached,
@@ -3400,10 +3499,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
return ret;
}
- for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
+ for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
INIT_LIST_HEAD(&found->block_groups[i]);
- kobject_init(&found->block_group_kobjs[i], &btrfs_raid_ktype);
- }
init_rwsem(&found->groups_sem);
spin_lock_init(&found->lock);
found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
@@ -3542,11 +3639,13 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
return extended_to_chunk(flags | tmp);
}
-static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
+static u64 get_alloc_profile(struct btrfs_root *root, u64 orig_flags)
{
unsigned seq;
+ u64 flags;
do {
+ flags = orig_flags;
seq = read_seqbegin(&root->fs_info->profiles_lock);
if (flags & BTRFS_BLOCK_GROUP_DATA)
@@ -4201,6 +4300,104 @@ static int flush_space(struct btrfs_root *root,
return ret;
}
+
+static inline u64
+btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
+ struct btrfs_space_info *space_info)
+{
+ u64 used;
+ u64 expected;
+ u64 to_reclaim;
+
+ to_reclaim = min_t(u64, num_online_cpus() * 1024 * 1024,
+ 16 * 1024 * 1024);
+ spin_lock(&space_info->lock);
+ if (can_overcommit(root, space_info, to_reclaim,
+ BTRFS_RESERVE_FLUSH_ALL)) {
+ to_reclaim = 0;
+ goto out;
+ }
+
+ used = space_info->bytes_used + space_info->bytes_reserved +
+ space_info->bytes_pinned + space_info->bytes_readonly +
+ space_info->bytes_may_use;
+ if (can_overcommit(root, space_info, 1024 * 1024,
+ BTRFS_RESERVE_FLUSH_ALL))
+ expected = div_factor_fine(space_info->total_bytes, 95);
+ else
+ expected = div_factor_fine(space_info->total_bytes, 90);
+
+ if (used > expected)
+ to_reclaim = used - expected;
+ else
+ to_reclaim = 0;
+ to_reclaim = min(to_reclaim, space_info->bytes_may_use +
+ space_info->bytes_reserved);
+out:
+ spin_unlock(&space_info->lock);
+
+ return to_reclaim;
+}
+
+static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
+ struct btrfs_fs_info *fs_info, u64 used)
+{
+ return (used >= div_factor_fine(space_info->total_bytes, 98) &&
+ !btrfs_fs_closing(fs_info) &&
+ !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
+}
+
+static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info,
+ struct btrfs_fs_info *fs_info)
+{
+ u64 used;
+
+ spin_lock(&space_info->lock);
+ used = space_info->bytes_used + space_info->bytes_reserved +
+ space_info->bytes_pinned + space_info->bytes_readonly +
+ space_info->bytes_may_use;
+ if (need_do_async_reclaim(space_info, fs_info, used)) {
+ spin_unlock(&space_info->lock);
+ return 1;
+ }
+ spin_unlock(&space_info->lock);
+
+ return 0;
+}
+
+static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
+{
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_space_info *space_info;
+ u64 to_reclaim;
+ int flush_state;
+
+ fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
+ space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
+
+ to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
+ space_info);
+ if (!to_reclaim)
+ return;
+
+ flush_state = FLUSH_DELAYED_ITEMS_NR;
+ do {
+ flush_space(fs_info->fs_root, space_info, to_reclaim,
+ to_reclaim, flush_state);
+ flush_state++;
+ if (!btrfs_need_do_async_reclaim(space_info, fs_info))
+ return;
+ } while (flush_state <= COMMIT_TRANS);
+
+ if (btrfs_need_do_async_reclaim(space_info, fs_info))
+ queue_work(system_unbound_wq, work);
+}
+
+void btrfs_init_async_reclaim_work(struct work_struct *work)
+{
+ INIT_WORK(work, btrfs_async_reclaim_metadata_space);
+}
+
/**
* reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
* @root - the root we're allocating for
@@ -4308,8 +4505,13 @@ again:
if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
flushing = true;
space_info->flush = 1;
+ } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
+ used += orig_bytes;
+ if (need_do_async_reclaim(space_info, root->fs_info, used) &&
+ !work_busy(&root->fs_info->async_reclaim_work))
+ queue_work(system_unbound_wq,
+ &root->fs_info->async_reclaim_work);
}
-
spin_unlock(&space_info->lock);
if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
@@ -4366,7 +4568,7 @@ static struct btrfs_block_rsv *get_block_rsv(
{
struct btrfs_block_rsv *block_rsv = NULL;
- if (root->ref_cows)
+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
block_rsv = trans->block_rsv;
if (root == root->fs_info->csum_root && trans->adding_csums)
@@ -5413,6 +5615,7 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log,
* @cache: The cache we are manipulating
* @num_bytes: The number of bytes in question
* @reserve: One of the reservation enums
+ * @delalloc: The blocks are allocated for the delalloc write
*
* This is called by the allocator when it reserves space, or by somebody who is
* freeing space that was never actually used on disk. For example if you
@@ -5431,7 +5634,7 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log,
* succeeds.
*/
static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
- u64 num_bytes, int reserve)
+ u64 num_bytes, int reserve, int delalloc)
{
struct btrfs_space_info *space_info = cache->space_info;
int ret = 0;
@@ -5450,12 +5653,18 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
num_bytes, 0);
space_info->bytes_may_use -= num_bytes;
}
+
+ if (delalloc)
+ cache->delalloc_bytes += num_bytes;
}
} else {
if (cache->ro)
space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes;
+
+ if (delalloc)
+ cache->delalloc_bytes -= num_bytes;
}
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
@@ -5469,7 +5678,6 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_caching_control *next;
struct btrfs_caching_control *caching_ctl;
struct btrfs_block_group_cache *cache;
- struct btrfs_space_info *space_info;
down_write(&fs_info->commit_root_sem);
@@ -5492,9 +5700,6 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
up_write(&fs_info->commit_root_sem);
- list_for_each_entry_rcu(space_info, &fs_info->space_info, list)
- percpu_counter_set(&space_info->total_bytes_pinned, 0);
-
update_global_block_rsv(fs_info);
}
@@ -5532,6 +5737,7 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
spin_lock(&cache->lock);
cache->pinned -= len;
space_info->bytes_pinned -= len;
+ percpu_counter_add(&space_info->total_bytes_pinned, -len);
if (cache->ro) {
space_info->bytes_readonly += len;
readonly = true;
@@ -5618,7 +5824,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner_objectid,
u64 owner_offset, int refs_to_drop,
- struct btrfs_delayed_extent_op *extent_op)
+ struct btrfs_delayed_extent_op *extent_op,
+ int no_quota)
{
struct btrfs_key key;
struct btrfs_path *path;
@@ -5634,9 +5841,14 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
int num_to_del = 1;
u32 item_size;
u64 refs;
+ int last_ref = 0;
+ enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL;
bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
SKINNY_METADATA);
+ if (!info->quota_enabled || !is_fstree(root_objectid))
+ no_quota = 1;
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -5684,7 +5896,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
BUG_ON(iref);
ret = remove_extent_backref(trans, extent_root, path,
NULL, refs_to_drop,
- is_data);
+ is_data, &last_ref);
if (ret) {
btrfs_abort_transaction(trans, extent_root, ret);
goto out;
@@ -5719,6 +5931,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
if (ret > 0 && skinny_metadata) {
skinny_metadata = false;
+ key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
btrfs_release_path(path);
@@ -5802,7 +6015,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
refs = btrfs_extent_refs(leaf, ei);
if (refs < refs_to_drop) {
btrfs_err(info, "trying to drop %d refs but we only have %Lu "
- "for bytenr %Lu\n", refs_to_drop, refs, bytenr);
+ "for bytenr %Lu", refs_to_drop, refs, bytenr);
ret = -EINVAL;
btrfs_abort_transaction(trans, extent_root, ret);
goto out;
@@ -5810,6 +6023,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
refs -= refs_to_drop;
if (refs > 0) {
+ type = BTRFS_QGROUP_OPER_SUB_SHARED;
if (extent_op)
__run_delayed_extent_op(extent_op, leaf, ei);
/*
@@ -5825,7 +6039,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
if (found_extent) {
ret = remove_extent_backref(trans, extent_root, path,
iref, refs_to_drop,
- is_data);
+ is_data, &last_ref);
if (ret) {
btrfs_abort_transaction(trans, extent_root, ret);
goto out;
@@ -5846,6 +6060,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
}
+ last_ref = 1;
ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
num_to_del);
if (ret) {
@@ -5868,6 +6083,20 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
goto out;
}
}
+ btrfs_release_path(path);
+
+ /* Deal with the quota accounting */
+ if (!ret && last_ref && !no_quota) {
+ int mod_seq = 0;
+
+ if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
+ type == BTRFS_QGROUP_OPER_SUB_SHARED)
+ mod_seq = 1;
+
+ ret = btrfs_qgroup_record_ref(trans, info, root_objectid,
+ bytenr, num_bytes, type,
+ mod_seq);
+ }
out:
btrfs_free_path(path);
return ret;
@@ -5983,7 +6212,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
btrfs_add_free_space(cache, buf->start, buf->len);
- btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE);
+ btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
pin = 0;
}
@@ -6004,11 +6233,15 @@ out:
/* Can return -ENOMEM */
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
- u64 owner, u64 offset, int for_cow)
+ u64 owner, u64 offset, int no_quota)
{
int ret;
struct btrfs_fs_info *fs_info = root->fs_info;
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
+ return 0;
+#endif
add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid);
/*
@@ -6024,13 +6257,13 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
num_bytes,
parent, root_objectid, (int)owner,
- BTRFS_DROP_DELAYED_REF, NULL, for_cow);
+ BTRFS_DROP_DELAYED_REF, NULL, no_quota);
} else {
ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
num_bytes,
parent, root_objectid, owner,
offset, BTRFS_DROP_DELAYED_REF,
- NULL, for_cow);
+ NULL, no_quota);
}
return ret;
}
@@ -6138,6 +6371,70 @@ enum btrfs_loop_type {
LOOP_NO_EMPTY_SIZE = 3,
};
+static inline void
+btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
+ int delalloc)
+{
+ if (delalloc)
+ down_read(&cache->data_rwsem);
+}
+
+static inline void
+btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
+ int delalloc)
+{
+ btrfs_get_block_group(cache);
+ if (delalloc)
+ down_read(&cache->data_rwsem);
+}
+
+static struct btrfs_block_group_cache *
+btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_cluster *cluster,
+ int delalloc)
+{
+ struct btrfs_block_group_cache *used_bg;
+ bool locked = false;
+again:
+ spin_lock(&cluster->refill_lock);
+ if (locked) {
+ if (used_bg == cluster->block_group)
+ return used_bg;
+
+ up_read(&used_bg->data_rwsem);
+ btrfs_put_block_group(used_bg);
+ }
+
+ used_bg = cluster->block_group;
+ if (!used_bg)
+ return NULL;
+
+ if (used_bg == block_group)
+ return used_bg;
+
+ btrfs_get_block_group(used_bg);
+
+ if (!delalloc)
+ return used_bg;
+
+ if (down_read_trylock(&used_bg->data_rwsem))
+ return used_bg;
+
+ spin_unlock(&cluster->refill_lock);
+ down_read(&used_bg->data_rwsem);
+ locked = true;
+ goto again;
+}
+
+static inline void
+btrfs_release_block_group(struct btrfs_block_group_cache *cache,
+ int delalloc)
+{
+ if (delalloc)
+ up_read(&cache->data_rwsem);
+ btrfs_put_block_group(cache);
+}
+
/*
* walks the btree of allocated extents and find a hole of a given size.
* The key ins is changed to record the hole:
@@ -6152,7 +6449,7 @@ enum btrfs_loop_type {
static noinline int find_free_extent(struct btrfs_root *orig_root,
u64 num_bytes, u64 empty_size,
u64 hint_byte, struct btrfs_key *ins,
- u64 flags)
+ u64 flags, int delalloc)
{
int ret = 0;
struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -6240,6 +6537,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
up_read(&space_info->groups_sem);
} else {
index = get_block_group_index(block_group);
+ btrfs_lock_block_group(block_group, delalloc);
goto have_block_group;
}
} else if (block_group) {
@@ -6254,7 +6552,7 @@ search:
u64 offset;
int cached;
- btrfs_get_block_group(block_group);
+ btrfs_grab_block_group(block_group, delalloc);
search_start = block_group->key.objectid;
/*
@@ -6302,16 +6600,16 @@ have_block_group:
* the refill lock keeps out other
* people trying to start a new cluster
*/
- spin_lock(&last_ptr->refill_lock);
- used_block_group = last_ptr->block_group;
- if (used_block_group != block_group &&
- (!used_block_group ||
- used_block_group->ro ||
- !block_group_bits(used_block_group, flags)))
+ used_block_group = btrfs_lock_cluster(block_group,
+ last_ptr,
+ delalloc);
+ if (!used_block_group)
goto refill_cluster;
- if (used_block_group != block_group)
- btrfs_get_block_group(used_block_group);
+ if (used_block_group != block_group &&
+ (used_block_group->ro ||
+ !block_group_bits(used_block_group, flags)))
+ goto release_cluster;
offset = btrfs_alloc_from_cluster(used_block_group,
last_ptr,
@@ -6325,16 +6623,15 @@ have_block_group:
used_block_group,
search_start, num_bytes);
if (used_block_group != block_group) {
- btrfs_put_block_group(block_group);
+ btrfs_release_block_group(block_group,
+ delalloc);
block_group = used_block_group;
}
goto checks;
}
WARN_ON(last_ptr->block_group != used_block_group);
- if (used_block_group != block_group)
- btrfs_put_block_group(used_block_group);
-refill_cluster:
+release_cluster:
/* If we are on LOOP_NO_EMPTY_SIZE, we can't
* set up a new clusters, so lets just skip it
* and let the allocator find whatever block
@@ -6351,8 +6648,10 @@ refill_cluster:
* succeeding in the unclustered
* allocation. */
if (loop >= LOOP_NO_EMPTY_SIZE &&
- last_ptr->block_group != block_group) {
+ used_block_group != block_group) {
spin_unlock(&last_ptr->refill_lock);
+ btrfs_release_block_group(used_block_group,
+ delalloc);
goto unclustered_alloc;
}
@@ -6362,6 +6661,10 @@ refill_cluster:
*/
btrfs_return_cluster_to_free_space(NULL, last_ptr);
+ if (used_block_group != block_group)
+ btrfs_release_block_group(used_block_group,
+ delalloc);
+refill_cluster:
if (loop >= LOOP_NO_EMPTY_SIZE) {
spin_unlock(&last_ptr->refill_lock);
goto unclustered_alloc;
@@ -6469,7 +6772,7 @@ checks:
BUG_ON(offset > search_start);
ret = btrfs_update_reserved_bytes(block_group, num_bytes,
- alloc_type);
+ alloc_type, delalloc);
if (ret == -EAGAIN) {
btrfs_add_free_space(block_group, offset, num_bytes);
goto loop;
@@ -6481,13 +6784,13 @@ checks:
trace_btrfs_reserve_extent(orig_root, block_group,
search_start, num_bytes);
- btrfs_put_block_group(block_group);
+ btrfs_release_block_group(block_group, delalloc);
break;
loop:
failed_cluster_refill = false;
failed_alloc = false;
BUG_ON(index != get_block_group_index(block_group));
- btrfs_put_block_group(block_group);
+ btrfs_release_block_group(block_group, delalloc);
}
up_read(&space_info->groups_sem);
@@ -6510,8 +6813,14 @@ loop:
loop++;
if (loop == LOOP_ALLOC_CHUNK) {
struct btrfs_trans_handle *trans;
+ int exist = 0;
+
+ trans = current->journal_info;
+ if (trans)
+ exist = 1;
+ else
+ trans = btrfs_join_transaction(root);
- trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
@@ -6528,7 +6837,8 @@ loop:
root, ret);
else
ret = 0;
- btrfs_end_transaction(trans, root);
+ if (!exist)
+ btrfs_end_transaction(trans, root);
if (ret)
goto out;
}
@@ -6593,7 +6903,7 @@ again:
int btrfs_reserve_extent(struct btrfs_root *root,
u64 num_bytes, u64 min_alloc_size,
u64 empty_size, u64 hint_byte,
- struct btrfs_key *ins, int is_data)
+ struct btrfs_key *ins, int is_data, int delalloc)
{
bool final_tried = false;
u64 flags;
@@ -6603,7 +6913,7 @@ int btrfs_reserve_extent(struct btrfs_root *root,
again:
WARN_ON(num_bytes < root->sectorsize);
ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
- flags);
+ flags, delalloc);
if (ret == -ENOSPC) {
if (!final_tried && ins->offset) {
@@ -6628,7 +6938,8 @@ again:
}
static int __btrfs_free_reserved_extent(struct btrfs_root *root,
- u64 start, u64 len, int pin)
+ u64 start, u64 len,
+ int pin, int delalloc)
{
struct btrfs_block_group_cache *cache;
int ret = 0;
@@ -6647,7 +6958,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
pin_down_extent(root, cache, start, len, 1);
else {
btrfs_add_free_space(cache, start, len);
- btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
+ btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
}
btrfs_put_block_group(cache);
@@ -6657,15 +6968,15 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
}
int btrfs_free_reserved_extent(struct btrfs_root *root,
- u64 start, u64 len)
+ u64 start, u64 len, int delalloc)
{
- return __btrfs_free_reserved_extent(root, start, len, 0);
+ return __btrfs_free_reserved_extent(root, start, len, 0, delalloc);
}
int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
u64 start, u64 len)
{
- return __btrfs_free_reserved_extent(root, start, len, 1);
+ return __btrfs_free_reserved_extent(root, start, len, 1, 0);
}
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
@@ -6729,6 +7040,13 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
+ /* Always set parent to 0 here since its exclusive anyway. */
+ ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
+ ins->objectid, ins->offset,
+ BTRFS_QGROUP_OPER_ADD_EXCL, 0);
+ if (ret)
+ return ret;
+
ret = update_block_group(root, ins->objectid, ins->offset, 1);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
@@ -6743,7 +7061,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 parent, u64 root_objectid,
u64 flags, struct btrfs_disk_key *key,
- int level, struct btrfs_key *ins)
+ int level, struct btrfs_key *ins,
+ int no_quota)
{
int ret;
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -6753,6 +7072,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
struct extent_buffer *leaf;
u32 size = sizeof(*extent_item) + sizeof(*iref);
+ u64 num_bytes = ins->offset;
bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
SKINNY_METADATA);
@@ -6786,6 +7106,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
if (skinny_metadata) {
iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
+ num_bytes = root->leafsize;
} else {
block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
btrfs_set_tree_block_key(leaf, block_info, key);
@@ -6807,6 +7128,14 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_free_path(path);
+ if (!no_quota) {
+ ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
+ ins->objectid, num_bytes,
+ BTRFS_QGROUP_OPER_ADD_EXCL, 0);
+ if (ret)
+ return ret;
+ }
+
ret = update_block_group(root, ins->objectid, root->leafsize, 1);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
@@ -6862,7 +7191,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
return -EINVAL;
ret = btrfs_update_reserved_bytes(block_group, ins->offset,
- RESERVE_ALLOC_NO_ACCOUNT);
+ RESERVE_ALLOC_NO_ACCOUNT, 0);
BUG_ON(ret); /* logic error */
ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
0, owner, offset, ins, 1);
@@ -6990,12 +7319,21 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
SKINNY_METADATA);
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) {
+ buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
+ blocksize, level);
+ if (!IS_ERR(buf))
+ root->alloc_bytenr += blocksize;
+ return buf;
+ }
+#endif
block_rsv = use_block_rsv(trans, root, blocksize);
if (IS_ERR(block_rsv))
return ERR_CAST(block_rsv);
ret = btrfs_reserve_extent(root, blocksize, blocksize,
- empty_size, hint, &ins, 0);
+ empty_size, hint, &ins, 0, 0);
if (ret) {
unuse_block_rsv(root->fs_info, block_rsv, blocksize);
return ERR_PTR(ret);
@@ -7731,7 +8069,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
}
}
- if (root->in_radix) {
+ if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
btrfs_drop_and_free_fs_root(tree_root->fs_info, root);
} else {
free_extent_buffer(root->node);
@@ -8323,8 +8661,9 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
list_del(&space_info->list);
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
struct kobject *kobj;
- kobj = &space_info->block_group_kobjs[i];
- if (kobj->parent) {
+ kobj = space_info->block_group_kobjs[i];
+ space_info->block_group_kobjs[i] = NULL;
+ if (kobj) {
kobject_del(kobj);
kobject_put(kobj);
}
@@ -8348,17 +8687,26 @@ static void __link_block_group(struct btrfs_space_info *space_info,
up_write(&space_info->groups_sem);
if (first) {
- struct kobject *kobj = &space_info->block_group_kobjs[index];
+ struct raid_kobject *rkobj;
int ret;
- kobject_get(&space_info->kobj); /* put in release */
- ret = kobject_add(kobj, &space_info->kobj, "%s",
- get_raid_name(index));
+ rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
+ if (!rkobj)
+ goto out_err;
+ rkobj->raid_type = index;
+ kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
+ ret = kobject_add(&rkobj->kobj, &space_info->kobj,
+ "%s", get_raid_name(index));
if (ret) {
- pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
- kobject_put(&space_info->kobj);
+ kobject_put(&rkobj->kobj);
+ goto out_err;
}
+ space_info->block_group_kobjs[index] = &rkobj->kobj;
}
+
+ return;
+out_err:
+ pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
}
static struct btrfs_block_group_cache *
@@ -8388,6 +8736,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
start);
atomic_set(&cache->count, 1);
spin_lock_init(&cache->lock);
+ init_rwsem(&cache->data_rwsem);
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
INIT_LIST_HEAD(&cache->new_bg_list);
@@ -8607,7 +8956,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
extent_root = root->fs_info->extent_root;
- root->fs_info->last_trans_log_full_commit = trans->transid;
+ btrfs_set_log_full_commit(root->fs_info, trans);
cache = btrfs_create_block_group_cache(root, chunk_offset, size);
if (!cache)
@@ -8693,6 +9042,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *tree_root = root->fs_info->tree_root;
struct btrfs_key key;
struct inode *inode;
+ struct kobject *kobj = NULL;
int ret;
int index;
int factor;
@@ -8792,11 +9142,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
*/
list_del_init(&block_group->list);
if (list_empty(&block_group->space_info->block_groups[index])) {
- kobject_del(&block_group->space_info->block_group_kobjs[index]);
- kobject_put(&block_group->space_info->block_group_kobjs[index]);
+ kobj = block_group->space_info->block_group_kobjs[index];
+ block_group->space_info->block_group_kobjs[index] = NULL;
clear_avail_alloc_bits(root->fs_info, block_group->flags);
}
up_write(&block_group->space_info->groups_sem);
+ if (kobj) {
+ kobject_del(kobj);
+ kobject_put(kobj);
+ }
if (block_group->cached == BTRFS_CACHE_STARTED)
wait_block_group_cache_done(block_group);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3955e475cee..a389820d158 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1693,6 +1693,7 @@ again:
* shortening the size of the delalloc range we're searching
*/
free_extent_state(cached_state);
+ cached_state = NULL;
if (!loops) {
max_bytes = PAGE_CACHE_SIZE;
loops = 1;
@@ -2353,7 +2354,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
{
int uptodate = (err == 0);
struct extent_io_tree *tree;
- int ret;
+ int ret = 0;
tree = &BTRFS_I(page->mapping->host)->io_tree;
@@ -2367,6 +2368,8 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
if (!uptodate) {
ClearPageUptodate(page);
SetPageError(page);
+ ret = ret < 0 ? ret : -EIO;
+ mapping_set_error(page->mapping, ret);
}
return 0;
}
@@ -3098,143 +3101,130 @@ static noinline void update_nr_written(struct page *page,
}
/*
- * the writepage semantics are similar to regular writepage. extent
- * records are inserted to lock ranges in the tree, and as dirty areas
- * are found, they are marked writeback. Then the lock bits are removed
- * and the end_io handler clears the writeback ranges
+ * helper for __extent_writepage, doing all of the delayed allocation setup.
+ *
+ * This returns 1 if our fill_delalloc function did all the work required
+ * to write the page (copy into inline extent). In this case the IO has
+ * been started and the page is already unlocked.
+ *
+ * This returns 0 if all went well (page still locked)
+ * This returns < 0 if there were errors (page still locked)
*/
-static int __extent_writepage(struct page *page, struct writeback_control *wbc,
- void *data)
+static noinline_for_stack int writepage_delalloc(struct inode *inode,
+ struct page *page, struct writeback_control *wbc,
+ struct extent_page_data *epd,
+ u64 delalloc_start,
+ unsigned long *nr_written)
+{
+ struct extent_io_tree *tree = epd->tree;
+ u64 page_end = delalloc_start + PAGE_CACHE_SIZE - 1;
+ u64 nr_delalloc;
+ u64 delalloc_to_write = 0;
+ u64 delalloc_end = 0;
+ int ret;
+ int page_started = 0;
+
+ if (epd->extent_locked || !tree->ops || !tree->ops->fill_delalloc)
+ return 0;
+
+ while (delalloc_end < page_end) {
+ nr_delalloc = find_lock_delalloc_range(inode, tree,
+ page,
+ &delalloc_start,
+ &delalloc_end,
+ 128 * 1024 * 1024);
+ if (nr_delalloc == 0) {
+ delalloc_start = delalloc_end + 1;
+ continue;
+ }
+ ret = tree->ops->fill_delalloc(inode, page,
+ delalloc_start,
+ delalloc_end,
+ &page_started,
+ nr_written);
+ /* File system has been set read-only */
+ if (ret) {
+ SetPageError(page);
+ /* fill_delalloc should be return < 0 for error
+ * but just in case, we use > 0 here meaning the
+ * IO is started, so we don't want to return > 0
+ * unless things are going well.
+ */
+ ret = ret < 0 ? ret : -EIO;
+ goto done;
+ }
+ /*
+ * delalloc_end is already one less than the total
+ * length, so we don't subtract one from
+ * PAGE_CACHE_SIZE
+ */
+ delalloc_to_write += (delalloc_end - delalloc_start +
+ PAGE_CACHE_SIZE) >>
+ PAGE_CACHE_SHIFT;
+ delalloc_start = delalloc_end + 1;
+ }
+ if (wbc->nr_to_write < delalloc_to_write) {
+ int thresh = 8192;
+
+ if (delalloc_to_write < thresh * 2)
+ thresh = delalloc_to_write;
+ wbc->nr_to_write = min_t(u64, delalloc_to_write,
+ thresh);
+ }
+
+ /* did the fill delalloc function already unlock and start
+ * the IO?
+ */
+ if (page_started) {
+ /*
+ * we've unlocked the page, so we can't update
+ * the mapping's writeback index, just update
+ * nr_to_write.
+ */
+ wbc->nr_to_write -= *nr_written;
+ return 1;
+ }
+
+ ret = 0;
+
+done:
+ return ret;
+}
+
+/*
+ * helper for __extent_writepage. This calls the writepage start hooks,
+ * and does the loop to map the page into extents and bios.
+ *
+ * We return 1 if the IO is started and the page is unlocked,
+ * 0 if all went well (page still locked)
+ * < 0 if there were errors (page still locked)
+ */
+static noinline_for_stack int __extent_writepage_io(struct inode *inode,
+ struct page *page,
+ struct writeback_control *wbc,
+ struct extent_page_data *epd,
+ loff_t i_size,
+ unsigned long nr_written,
+ int write_flags, int *nr_ret)
{
- struct inode *inode = page->mapping->host;
- struct extent_page_data *epd = data;
struct extent_io_tree *tree = epd->tree;
u64 start = page_offset(page);
- u64 delalloc_start;
u64 page_end = start + PAGE_CACHE_SIZE - 1;
u64 end;
u64 cur = start;
u64 extent_offset;
- u64 last_byte = i_size_read(inode);
u64 block_start;
u64 iosize;
sector_t sector;
struct extent_state *cached_state = NULL;
struct extent_map *em;
struct block_device *bdev;
- int ret;
- int nr = 0;
size_t pg_offset = 0;
size_t blocksize;
- loff_t i_size = i_size_read(inode);
- unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
- u64 nr_delalloc;
- u64 delalloc_end;
- int page_started;
- int compressed;
- int write_flags;
- unsigned long nr_written = 0;
- bool fill_delalloc = true;
-
- if (wbc->sync_mode == WB_SYNC_ALL)
- write_flags = WRITE_SYNC;
- else
- write_flags = WRITE;
-
- trace___extent_writepage(page, inode, wbc);
-
- WARN_ON(!PageLocked(page));
-
- ClearPageError(page);
-
- pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
- if (page->index > end_index ||
- (page->index == end_index && !pg_offset)) {
- page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
- unlock_page(page);
- return 0;
- }
-
- if (page->index == end_index) {
- char *userpage;
-
- userpage = kmap_atomic(page);
- memset(userpage + pg_offset, 0,
- PAGE_CACHE_SIZE - pg_offset);
- kunmap_atomic(userpage);
- flush_dcache_page(page);
- }
- pg_offset = 0;
-
- set_page_extent_mapped(page);
-
- if (!tree->ops || !tree->ops->fill_delalloc)
- fill_delalloc = false;
-
- delalloc_start = start;
- delalloc_end = 0;
- page_started = 0;
- if (!epd->extent_locked && fill_delalloc) {
- u64 delalloc_to_write = 0;
- /*
- * make sure the wbc mapping index is at least updated
- * to this page.
- */
- update_nr_written(page, wbc, 0);
-
- while (delalloc_end < page_end) {
- nr_delalloc = find_lock_delalloc_range(inode, tree,
- page,
- &delalloc_start,
- &delalloc_end,
- 128 * 1024 * 1024);
- if (nr_delalloc == 0) {
- delalloc_start = delalloc_end + 1;
- continue;
- }
- ret = tree->ops->fill_delalloc(inode, page,
- delalloc_start,
- delalloc_end,
- &page_started,
- &nr_written);
- /* File system has been set read-only */
- if (ret) {
- SetPageError(page);
- goto done;
- }
- /*
- * delalloc_end is already one less than the total
- * length, so we don't subtract one from
- * PAGE_CACHE_SIZE
- */
- delalloc_to_write += (delalloc_end - delalloc_start +
- PAGE_CACHE_SIZE) >>
- PAGE_CACHE_SHIFT;
- delalloc_start = delalloc_end + 1;
- }
- if (wbc->nr_to_write < delalloc_to_write) {
- int thresh = 8192;
-
- if (delalloc_to_write < thresh * 2)
- thresh = delalloc_to_write;
- wbc->nr_to_write = min_t(u64, delalloc_to_write,
- thresh);
- }
+ int ret = 0;
+ int nr = 0;
+ bool compressed;
- /* did the fill delalloc function already unlock and start
- * the IO?
- */
- if (page_started) {
- ret = 0;
- /*
- * we've unlocked the page, so we can't update
- * the mapping's writeback index, just update
- * nr_to_write.
- */
- wbc->nr_to_write -= nr_written;
- goto done_unlocked;
- }
- }
if (tree->ops && tree->ops->writepage_start_hook) {
ret = tree->ops->writepage_start_hook(page, start,
page_end);
@@ -3244,9 +3234,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
wbc->pages_skipped++;
else
redirty_page_for_writepage(wbc, page);
+
update_nr_written(page, wbc, nr_written);
unlock_page(page);
- ret = 0;
+ ret = 1;
goto done_unlocked;
}
}
@@ -3258,7 +3249,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
update_nr_written(page, wbc, nr_written + 1);
end = page_end;
- if (last_byte <= start) {
+ if (i_size <= start) {
if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, start,
page_end, NULL, 1);
@@ -3268,7 +3259,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
blocksize = inode->i_sb->s_blocksize;
while (cur <= end) {
- if (cur >= last_byte) {
+ u64 em_end;
+ if (cur >= i_size) {
if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, cur,
page_end, NULL, 1);
@@ -3278,13 +3270,15 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
end - cur + 1, 1);
if (IS_ERR_OR_NULL(em)) {
SetPageError(page);
+ ret = PTR_ERR_OR_ZERO(em);
break;
}
extent_offset = cur - em->start;
- BUG_ON(extent_map_end(em) <= cur);
+ em_end = extent_map_end(em);
+ BUG_ON(em_end <= cur);
BUG_ON(end < cur);
- iosize = min(extent_map_end(em) - cur, end - cur + 1);
+ iosize = min(em_end - cur, end - cur + 1);
iosize = ALIGN(iosize, blocksize);
sector = (em->block_start + extent_offset) >> 9;
bdev = em->bdev;
@@ -3320,13 +3314,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
pg_offset += iosize;
continue;
}
- /* leave this out until we have a page_mkwrite call */
- if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
- EXTENT_DIRTY, 0, NULL)) {
- cur = cur + iosize;
- pg_offset += iosize;
- continue;
- }
if (tree->ops && tree->ops->writepage_io_hook) {
ret = tree->ops->writepage_io_hook(page, cur,
@@ -3337,7 +3324,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
if (ret) {
SetPageError(page);
} else {
- unsigned long max_nr = end_index + 1;
+ unsigned long max_nr = (i_size >> PAGE_CACHE_SHIFT) + 1;
set_range_writeback(tree, cur, cur + iosize - 1);
if (!PageWriteback(page)) {
@@ -3359,17 +3346,94 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
nr++;
}
done:
+ *nr_ret = nr;
+
+done_unlocked:
+
+ /* drop our reference on any cached states */
+ free_extent_state(cached_state);
+ return ret;
+}
+
+/*
+ * the writepage semantics are similar to regular writepage. extent
+ * records are inserted to lock ranges in the tree, and as dirty areas
+ * are found, they are marked writeback. Then the lock bits are removed
+ * and the end_io handler clears the writeback ranges
+ */
+static int __extent_writepage(struct page *page, struct writeback_control *wbc,
+ void *data)
+{
+ struct inode *inode = page->mapping->host;
+ struct extent_page_data *epd = data;
+ u64 start = page_offset(page);
+ u64 page_end = start + PAGE_CACHE_SIZE - 1;
+ int ret;
+ int nr = 0;
+ size_t pg_offset = 0;
+ loff_t i_size = i_size_read(inode);
+ unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
+ int write_flags;
+ unsigned long nr_written = 0;
+
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ write_flags = WRITE_SYNC;
+ else
+ write_flags = WRITE;
+
+ trace___extent_writepage(page, inode, wbc);
+
+ WARN_ON(!PageLocked(page));
+
+ ClearPageError(page);
+
+ pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
+ if (page->index > end_index ||
+ (page->index == end_index && !pg_offset)) {
+ page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
+ unlock_page(page);
+ return 0;
+ }
+
+ if (page->index == end_index) {
+ char *userpage;
+
+ userpage = kmap_atomic(page);
+ memset(userpage + pg_offset, 0,
+ PAGE_CACHE_SIZE - pg_offset);
+ kunmap_atomic(userpage);
+ flush_dcache_page(page);
+ }
+
+ pg_offset = 0;
+
+ set_page_extent_mapped(page);
+
+ ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written);
+ if (ret == 1)
+ goto done_unlocked;
+ if (ret)
+ goto done;
+
+ ret = __extent_writepage_io(inode, page, wbc, epd,
+ i_size, nr_written, write_flags, &nr);
+ if (ret == 1)
+ goto done_unlocked;
+
+done:
if (nr == 0) {
/* make sure the mapping tag for page dirty gets cleared */
set_page_writeback(page);
end_page_writeback(page);
}
+ if (PageError(page)) {
+ ret = ret < 0 ? ret : -EIO;
+ end_extent_writepage(page, ret, start, page_end);
+ }
unlock_page(page);
+ return ret;
done_unlocked:
-
- /* drop our reference on any cached states */
- free_extent_state(cached_state);
return 0;
}
@@ -3385,9 +3449,10 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
TASK_UNINTERRUPTIBLE);
}
-static int lock_extent_buffer_for_io(struct extent_buffer *eb,
- struct btrfs_fs_info *fs_info,
- struct extent_page_data *epd)
+static noinline_for_stack int
+lock_extent_buffer_for_io(struct extent_buffer *eb,
+ struct btrfs_fs_info *fs_info,
+ struct extent_page_data *epd)
{
unsigned long i, num_pages;
int flush = 0;
@@ -3458,7 +3523,7 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb,
static void end_extent_buffer_writeback(struct extent_buffer *eb)
{
clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
}
@@ -3492,7 +3557,7 @@ static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
bio_put(bio);
}
-static int write_one_eb(struct extent_buffer *eb,
+static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
struct btrfs_fs_info *fs_info,
struct writeback_control *wbc,
struct extent_page_data *epd)
@@ -3690,6 +3755,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
struct inode *inode = mapping->host;
int ret = 0;
int done = 0;
+ int err = 0;
int nr_to_write_done = 0;
struct pagevec pvec;
int nr_pages;
@@ -3776,8 +3842,8 @@ retry:
unlock_page(page);
ret = 0;
}
- if (ret)
- done = 1;
+ if (!err && ret < 0)
+ err = ret;
/*
* the filesystem may choose to bump up nr_to_write.
@@ -3789,7 +3855,7 @@ retry:
pagevec_release(&pvec);
cond_resched();
}
- if (!scanned && !done) {
+ if (!scanned && !done && !err) {
/*
* We hit the last page and there is more work to be done: wrap
* back to the start of the file
@@ -3799,7 +3865,7 @@ retry:
goto retry;
}
btrfs_add_delayed_iput(inode);
- return ret;
+ return err;
}
static void flush_epd_write_bio(struct extent_page_data *epd)
@@ -4510,7 +4576,8 @@ static void check_buffer_tree_ref(struct extent_buffer *eb)
spin_unlock(&eb->refs_lock);
}
-static void mark_extent_buffer_accessed(struct extent_buffer *eb)
+static void mark_extent_buffer_accessed(struct extent_buffer *eb,
+ struct page *accessed)
{
unsigned long num_pages, i;
@@ -4519,7 +4586,8 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb)
num_pages = num_extent_pages(eb->start, eb->len);
for (i = 0; i < num_pages; i++) {
struct page *p = extent_buffer_page(eb, i);
- mark_page_accessed(p);
+ if (p != accessed)
+ mark_page_accessed(p);
}
}
@@ -4533,7 +4601,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
start >> PAGE_CACHE_SHIFT);
if (eb && atomic_inc_not_zero(&eb->refs)) {
rcu_read_unlock();
- mark_extent_buffer_accessed(eb);
+ mark_extent_buffer_accessed(eb, NULL);
return eb;
}
rcu_read_unlock();
@@ -4541,6 +4609,53 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
return NULL;
}
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
+ u64 start, unsigned long len)
+{
+ struct extent_buffer *eb, *exists = NULL;
+ int ret;
+
+ eb = find_extent_buffer(fs_info, start);
+ if (eb)
+ return eb;
+ eb = alloc_dummy_extent_buffer(start, len);
+ if (!eb)
+ return NULL;
+ eb->fs_info = fs_info;
+again:
+ ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
+ if (ret)
+ goto free_eb;
+ spin_lock(&fs_info->buffer_lock);
+ ret = radix_tree_insert(&fs_info->buffer_radix,
+ start >> PAGE_CACHE_SHIFT, eb);
+ spin_unlock(&fs_info->buffer_lock);
+ radix_tree_preload_end();
+ if (ret == -EEXIST) {
+ exists = find_extent_buffer(fs_info, start);
+ if (exists)
+ goto free_eb;
+ else
+ goto again;
+ }
+ check_buffer_tree_ref(eb);
+ set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
+
+ /*
+ * We will free dummy extent buffer's if they come into
+ * free_extent_buffer with a ref count of 2, but if we are using this we
+ * want the buffers to stay in memory until we're done with them, so
+ * bump the ref count again.
+ */
+ atomic_inc(&eb->refs);
+ return eb;
+free_eb:
+ btrfs_release_extent_buffer(eb);
+ return exists;
+}
+#endif
+
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len)
{
@@ -4581,7 +4696,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
spin_unlock(&mapping->private_lock);
unlock_page(p);
page_cache_release(p);
- mark_extent_buffer_accessed(exists);
+ mark_extent_buffer_accessed(exists, p);
goto free_eb;
}
@@ -4596,7 +4711,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
attach_extent_buffer_page(eb, p);
spin_unlock(&mapping->private_lock);
WARN_ON(PageDirty(p));
- mark_page_accessed(p);
eb->pages[i] = p;
if (!PageUptodate(p))
uptodate = 0;
@@ -4954,6 +5068,43 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
}
}
+int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
+ unsigned long start,
+ unsigned long len)
+{
+ size_t cur;
+ size_t offset;
+ struct page *page;
+ char *kaddr;
+ char __user *dst = (char __user *)dstv;
+ size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+ unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+ int ret = 0;
+
+ WARN_ON(start > eb->len);
+ WARN_ON(start + len > eb->start + eb->len);
+
+ offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
+
+ while (len > 0) {
+ page = extent_buffer_page(eb, i);
+
+ cur = min(len, (PAGE_CACHE_SIZE - offset));
+ kaddr = page_address(page);
+ if (copy_to_user(dst, kaddr + offset, cur)) {
+ ret = -EFAULT;
+ break;
+ }
+
+ dst += cur;
+ len -= cur;
+ offset = 0;
+ i++;
+ }
+
+ return ret;
+}
+
int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
unsigned long min_len, char **map,
unsigned long *map_start,
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index c488b45237b..ccc264e7bde 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -158,7 +158,6 @@ struct extent_buffer {
* to unlock
*/
wait_queue_head_t read_lock_wq;
- wait_queue_head_t lock_wq;
struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
#ifdef CONFIG_BTRFS_DEBUG
struct list_head leak_list;
@@ -304,6 +303,9 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
void read_extent_buffer(struct extent_buffer *eb, void *dst,
unsigned long start,
unsigned long len);
+int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst,
+ unsigned long start,
+ unsigned long len);
void write_extent_buffer(struct extent_buffer *eb, const void *src,
unsigned long start, unsigned long len);
void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
@@ -350,5 +352,7 @@ noinline u64 find_lock_delalloc_range(struct inode *inode,
struct extent_io_tree *tree,
struct page *locked_page, u64 *start,
u64 *end, u64 max_bytes);
+struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
+ u64 start, unsigned long len);
#endif
#endif
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 1874aee69c8..225302b39af 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -75,6 +75,8 @@ void free_extent_map(struct extent_map *em)
if (atomic_dec_and_test(&em->refs)) {
WARN_ON(extent_map_in_tree(em));
WARN_ON(!list_empty(&em->list));
+ if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
+ kfree(em->bdev);
kmem_cache_free(extent_map_cache, em);
}
}
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index e7fd8a56a14..b2991fd8583 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -15,6 +15,7 @@
#define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */
#define EXTENT_FLAG_LOGGING 4 /* Logging this extent */
#define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */
+#define EXTENT_FLAG_FS_MAPPING 6 /* filesystem extent mapping type */
struct extent_map {
struct rb_node rb_node;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 127555b29f5..f46cfe45d68 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -281,10 +281,10 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
found:
csum += count * csum_size;
nblocks -= count;
+ bio_index += count;
while (count--) {
disk_bytenr += bvec->bv_len;
offset += bvec->bv_len;
- bio_index++;
bvec++;
}
}
@@ -750,7 +750,7 @@ again:
int slot = path->slots[0] + 1;
/* we didn't find a csum item, insert one */
nritems = btrfs_header_nritems(path->nodes[0]);
- if (path->slots[0] >= nritems - 1) {
+ if (!nritems || (path->slots[0] >= nritems - 1)) {
ret = btrfs_next_leaf(root, path);
if (ret == 1)
found_next = 1;
@@ -885,3 +885,79 @@ out:
fail_unlock:
goto out;
}
+
+void btrfs_extent_item_to_extent_map(struct inode *inode,
+ const struct btrfs_path *path,
+ struct btrfs_file_extent_item *fi,
+ const bool new_inline,
+ struct extent_map *em)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct extent_buffer *leaf = path->nodes[0];
+ const int slot = path->slots[0];
+ struct btrfs_key key;
+ u64 extent_start, extent_end;
+ u64 bytenr;
+ u8 type = btrfs_file_extent_type(leaf, fi);
+ int compress_type = btrfs_file_extent_compression(leaf, fi);
+
+ em->bdev = root->fs_info->fs_devices->latest_bdev;
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ extent_start = key.offset;
+
+ if (type == BTRFS_FILE_EXTENT_REG ||
+ type == BTRFS_FILE_EXTENT_PREALLOC) {
+ extent_end = extent_start +
+ btrfs_file_extent_num_bytes(leaf, fi);
+ } else if (type == BTRFS_FILE_EXTENT_INLINE) {
+ size_t size;
+ size = btrfs_file_extent_inline_len(leaf, slot, fi);
+ extent_end = ALIGN(extent_start + size, root->sectorsize);
+ }
+
+ em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
+ if (type == BTRFS_FILE_EXTENT_REG ||
+ type == BTRFS_FILE_EXTENT_PREALLOC) {
+ em->start = extent_start;
+ em->len = extent_end - extent_start;
+ em->orig_start = extent_start -
+ btrfs_file_extent_offset(leaf, fi);
+ em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
+ bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+ if (bytenr == 0) {
+ em->block_start = EXTENT_MAP_HOLE;
+ return;
+ }
+ if (compress_type != BTRFS_COMPRESS_NONE) {
+ set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+ em->compress_type = compress_type;
+ em->block_start = bytenr;
+ em->block_len = em->orig_block_len;
+ } else {
+ bytenr += btrfs_file_extent_offset(leaf, fi);
+ em->block_start = bytenr;
+ em->block_len = em->len;
+ if (type == BTRFS_FILE_EXTENT_PREALLOC)
+ set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
+ }
+ } else if (type == BTRFS_FILE_EXTENT_INLINE) {
+ em->block_start = EXTENT_MAP_INLINE;
+ em->start = extent_start;
+ em->len = extent_end - extent_start;
+ /*
+ * Initialize orig_start and block_len with the same values
+ * as in inode.c:btrfs_get_extent().
+ */
+ em->orig_start = EXTENT_MAP_HOLE;
+ em->block_len = (u64)-1;
+ if (!new_inline && compress_type != BTRFS_COMPRESS_NONE) {
+ set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+ em->compress_type = compress_type;
+ }
+ } else {
+ btrfs_err(root->fs_info,
+ "unknown file extent item type %d, inode %llu, offset %llu, root %llu",
+ type, btrfs_ino(inode), extent_start,
+ root->root_key.objectid);
+ }
+}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c