diff options
Diffstat (limited to 'fs/ocfs2/super.c')
| -rw-r--r-- | fs/ocfs2/super.c | 738 |
1 files changed, 451 insertions, 287 deletions
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 7efb349fb9b..ddb662b3244 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -28,7 +28,6 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> -#include <linux/utsname.h> #include <linux/init.h> #include <linux/random.h> #include <linux/statfs.h> @@ -42,9 +41,11 @@ #include <linux/mount.h> #include <linux/seq_file.h> #include <linux/quotaops.h> -#include <linux/smp_lock.h> +#include <linux/cleancache.h> + +#define CREATE_TRACE_POINTS +#include "ocfs2_trace.h" -#define MLOG_MASK_PREFIX ML_SUPER #include <cluster/masklog.h> #include "ocfs2.h" @@ -53,6 +54,7 @@ #include "ocfs1_fs_compat.h" #include "alloc.h" +#include "aops.h" #include "blockcheck.h" #include "dlmglue.h" #include "export.h" @@ -66,26 +68,28 @@ #include "super.h" #include "sysfile.h" #include "uptodate.h" -#include "ver.h" #include "xattr.h" #include "quota.h" +#include "refcounttree.h" +#include "suballoc.h" #include "buffer_head_io.h" -static struct kmem_cache *ocfs2_inode_cachep = NULL; +static struct kmem_cache *ocfs2_inode_cachep; struct kmem_cache *ocfs2_dquot_cachep; struct kmem_cache *ocfs2_qf_chunk_cachep; -/* OCFS2 needs to schedule several differnt types of work which +/* OCFS2 needs to schedule several different types of work which * require cluster locking, disk I/O, recovery waits, etc. Since these * types of work tend to be heavy we avoid using the kernel events * workqueue and schedule on our own. */ struct workqueue_struct *ocfs2_wq = NULL; -static struct dentry *ocfs2_debugfs_root = NULL; +static struct dentry *ocfs2_debugfs_root; MODULE_AUTHOR("Oracle"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("OCFS2 cluster file system"); struct mount_options { @@ -93,14 +97,18 @@ struct mount_options unsigned long mount_opt; unsigned int atime_quantum; signed short slot; - unsigned int localalloc_opt; + int localalloc_opt; + unsigned int resv_level; + int dir_resv_level; char cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; }; static int ocfs2_parse_options(struct super_block *sb, char *options, struct mount_options *mopt, int is_remount); -static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt); +static int ocfs2_check_set_options(struct super_block *sb, + struct mount_options *options); +static int ocfs2_show_options(struct seq_file *s, struct dentry *root); static void ocfs2_put_super(struct super_block *sb); static int ocfs2_mount_volume(struct super_block *sb); static int ocfs2_remount(struct super_block *sb, int *flags, char *data); @@ -140,8 +148,7 @@ static const struct super_operations ocfs2_sops = { .alloc_inode = ocfs2_alloc_inode, .destroy_inode = ocfs2_destroy_inode, .drop_inode = ocfs2_drop_inode, - .clear_inode = ocfs2_clear_inode, - .delete_inode = ocfs2_delete_inode, + .evict_inode = ocfs2_evict_inode, .sync_fs = ocfs2_sync_fs, .put_super = ocfs2_put_super, .remount_fs = ocfs2_remount, @@ -158,6 +165,7 @@ enum { Opt_nointr, Opt_hb_none, Opt_hb_local, + Opt_hb_global, Opt_data_ordered, Opt_data_writeback, Opt_atime_quantum, @@ -173,6 +181,10 @@ enum { Opt_noacl, Opt_usrquota, Opt_grpquota, + Opt_coherency_buffered, + Opt_coherency_full, + Opt_resv_level, + Opt_dir_resv_level, Opt_err, }; @@ -184,6 +196,7 @@ static const match_table_t tokens = { {Opt_nointr, "nointr"}, {Opt_hb_none, OCFS2_HB_NONE}, {Opt_hb_local, OCFS2_HB_LOCAL}, + {Opt_hb_global, OCFS2_HB_GLOBAL}, {Opt_data_ordered, "data=ordered"}, {Opt_data_writeback, "data=writeback"}, {Opt_atime_quantum, "atime_quantum=%u"}, @@ -199,6 +212,10 @@ static const match_table_t tokens = { {Opt_noacl, "noacl"}, {Opt_usrquota, "usrquota"}, {Opt_grpquota, "grpquota"}, + {Opt_coherency_buffered, "coherency=buffered"}, + {Opt_coherency_full, "coherency=full"}, + {Opt_resv_level, "resv_level=%u"}, + {Opt_dir_resv_level, "dir_resv_level=%u"}, {Opt_err, NULL} }; @@ -269,10 +286,9 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) spin_unlock(&osb->osb_lock); out += snprintf(buf + out, len - out, - "%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit", + "%10s => Pid: %d Interval: %lu\n", "Commit", (osb->commit_task ? task_pid_nr(osb->commit_task) : -1), - osb->osb_commit_interval, - atomic_read(&osb->needs_checkpoint)); + osb->osb_commit_interval); out += snprintf(buf + out, len - out, "%10s => State: %d TxnId: %lu NumTxns: %d\n", @@ -299,9 +315,12 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) spin_lock(&osb->osb_lock); out += snprintf(buf + out, len - out, - "%10s => Slot: %d NumStolen: %d\n", "Steal", + "%10s => InodeSlot: %d StolenInodes: %d, " + "MetaSlot: %d StolenMeta: %d\n", "Steal", osb->s_inode_steal_slot, - atomic_read(&osb->s_num_inodes_stolen)); + atomic_read(&osb->s_num_inodes_stolen), + osb->s_meta_steal_slot, + atomic_read(&osb->s_num_meta_stolen)); spin_unlock(&osb->osb_lock); out += snprintf(buf + out, len - out, "OrphanScan => "); @@ -373,7 +392,7 @@ static ssize_t ocfs2_debug_read(struct file *file, char __user *buf, } #endif /* CONFIG_DEBUG_FS */ -static struct file_operations ocfs2_osb_debug_fops = { +static const struct file_operations ocfs2_osb_debug_fops = { .open = ocfs2_osb_debug_open, .release = ocfs2_debug_release, .read = ocfs2_debug_read, @@ -425,8 +444,6 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) int status = 0; int i; - mlog_entry_void(); - new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE, 0); if (IS_ERR(new)) { status = PTR_ERR(new); @@ -462,7 +479,8 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) } bail: - mlog_exit(status); + if (status) + mlog_errno(status); return status; } @@ -472,8 +490,6 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb) int status = 0; int i; - mlog_entry_void(); - for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1; i < NUM_SYSTEM_INODES; i++) { @@ -492,7 +508,8 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb) } bail: - mlog_exit(status); + if (status) + mlog_errno(status); return status; } @@ -501,13 +518,11 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb) int i; struct inode *inode; - mlog_entry_void(); - - for (i = 0; i < NUM_SYSTEM_INODES; i++) { - inode = osb->system_inodes[i]; + for (i = 0; i < NUM_GLOBAL_SYSTEM_INODES; i++) { + inode = osb->global_system_inodes[i]; if (inode) { iput(inode); - osb->system_inodes[i] = NULL; + osb->global_system_inodes[i] = NULL; } } @@ -523,7 +538,18 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb) osb->root_inode = NULL; } - mlog_exit(0); + if (!osb->local_system_inodes) + return; + + for (i = 0; i < NUM_LOCAL_SYSTEM_INODES * osb->max_slots; i++) { + if (osb->local_system_inodes[i]) { + iput(osb->local_system_inodes[i]); + osb->local_system_inodes[i] = NULL; + } + } + + kfree(osb->local_system_inodes); + osb->local_system_inodes = NULL; } /* We're allocating fs objects, use GFP_NOFS */ @@ -535,15 +561,24 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb) if (!oi) return NULL; + oi->i_sync_tid = 0; + oi->i_datasync_tid = 0; + jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode); return &oi->vfs_inode; } -static void ocfs2_destroy_inode(struct inode *inode) +static void ocfs2_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode)); } +static void ocfs2_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, ocfs2_i_callback); +} + static unsigned long long ocfs2_max_file_offset(unsigned int bbits, unsigned int cbits) { @@ -597,16 +632,19 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) int ret = 0; struct mount_options parsed_options; struct ocfs2_super *osb = OCFS2_SB(sb); + u32 tmp; - lock_kernel(); + sync_filesystem(sb); - if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) { + if (!ocfs2_parse_options(sb, data, &parsed_options, 1) || + !ocfs2_check_set_options(sb, &parsed_options)) { ret = -EINVAL; goto out; } - if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) != - (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL)) { + tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | + OCFS2_MOUNT_HB_NONE; + if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { ret = -EINVAL; mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n"); goto out; @@ -646,12 +684,9 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) } if (*flags & MS_RDONLY) { - mlog(0, "Going to ro mode.\n"); sb->s_flags |= MS_RDONLY; osb->osb_flags |= OCFS2_OSB_SOFT_RO; } else { - mlog(0, "Making ro filesystem writeable.\n"); - if (osb->osb_flags & OCFS2_OSB_ERROR_FS) { mlog(ML_ERROR, "Cannot remount RDWR " "filesystem due to previous errors.\n"); @@ -669,6 +704,7 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) sb->s_flags &= ~MS_RDONLY; osb->osb_flags &= ~OCFS2_OSB_SOFT_RO; } + trace_ocfs2_remount(sb->s_flags, osb->osb_flags, *flags); unlock_osb: spin_unlock(&osb->osb_lock); /* Enable quota accounting after remounting RW */ @@ -691,8 +727,6 @@ unlock_osb: if (!ret) { /* Only save off the new mount options in case of a successful * remount. */ - if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)) - parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; osb->s_mount_opt = parsed_options.mount_opt; osb->s_atime_quantum = parsed_options.atime_quantum; osb->preferred_slot = parsed_options.slot; @@ -701,9 +735,12 @@ unlock_osb: if (!ocfs2_is_hard_readonly(osb)) ocfs2_set_journal_params(osb); + + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? + MS_POSIXACL : 0); } out: - unlock_kernel(); return ret; } @@ -773,17 +810,20 @@ static int ocfs2_sb_probe(struct super_block *sb, if (tmpstat < 0) { status = tmpstat; mlog_errno(status); - goto bail; + break; } di = (struct ocfs2_dinode *) (*bh)->b_data; memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats)); - status = ocfs2_verify_volume(di, *bh, blksize, stats); - if (status >= 0) - goto bail; - brelse(*bh); - *bh = NULL; - if (status != -EAGAIN) + spin_lock_init(&stats->b_lock); + tmpstat = ocfs2_verify_volume(di, *bh, blksize, stats); + if (tmpstat < 0) { + brelse(*bh); + *bh = NULL; + } + if (tmpstat != -EAGAIN) { + status = tmpstat; break; + } } bail: @@ -792,23 +832,29 @@ bail: static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) { - if (ocfs2_mount_local(osb)) { - if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { + u32 hb_enabled = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL; + + if (osb->s_mount_opt & hb_enabled) { + if (ocfs2_mount_local(osb)) { mlog(ML_ERROR, "Cannot heartbeat on a locally " "mounted device.\n"); return -EINVAL; } - } - - if (ocfs2_userspace_stack(osb)) { - if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { + if (ocfs2_userspace_stack(osb)) { mlog(ML_ERROR, "Userspace stack expected, but " "o2cb heartbeat arguments passed to mount\n"); return -EINVAL; } + if (((osb->s_mount_opt & OCFS2_MOUNT_HB_GLOBAL) && + !ocfs2_cluster_o2cb_global_heartbeat(osb)) || + ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) && + ocfs2_cluster_o2cb_global_heartbeat(osb))) { + mlog(ML_ERROR, "Mismatching o2cb heartbeat modes\n"); + return -EINVAL; + } } - if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { + if (!(osb->s_mount_opt & hb_enabled)) { if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) && !ocfs2_userspace_stack(osb)) { mlog(ML_ERROR, "Heartbeat has to be started to mount " @@ -861,13 +907,15 @@ static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend) if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) continue; if (unsuspend) - status = vfs_quota_enable( - sb_dqopt(sb)->files[type], - type, QFMT_OCFS2, - DQUOT_SUSPENDED); - else - status = vfs_quota_disable(sb, type, - DQUOT_SUSPENDED); + status = dquot_resume(sb, type); + else { + struct ocfs2_mem_dqinfo *oinfo; + + /* Cancel periodic syncing before suspending */ + oinfo = sb_dqinfo(sb, type)->dqi_priv; + cancel_delayed_work_sync(&oinfo->dqi_sync_work); + status = dquot_suspend(sb, type); + } if (status < 0) break; } @@ -898,8 +946,8 @@ static int ocfs2_enable_quotas(struct ocfs2_super *osb) status = -ENOENT; goto out_quota_off; } - status = vfs_quota_enable(inode[type], type, QFMT_OCFS2, - DQUOT_USAGE_ENABLED); + status = dquot_enable(inode[type], type, QFMT_OCFS2, + DQUOT_USAGE_ENABLED); if (status < 0) goto out_quota_off; } @@ -920,18 +968,22 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) int type; struct inode *inode; struct super_block *sb = osb->sb; + struct ocfs2_mem_dqinfo *oinfo; /* We mostly ignore errors in this function because there's not much * we can do when we see them */ for (type = 0; type < MAXQUOTAS; type++) { if (!sb_has_quota_loaded(sb, type)) continue; + /* Cancel periodic syncing before we grab dqonoff_mutex */ + oinfo = sb_dqinfo(sb, type)->dqi_priv; + cancel_delayed_work_sync(&oinfo->dqi_sync_work); inode = igrab(sb->s_dquot.files[type]); /* Turn off quotas. This will remove all dquot structures from * memory and so they will be automatically synced to global * quota files */ - vfs_quota_disable(sb, type, DQUOT_USAGE_ENABLED | - DQUOT_LIMITS_ENABLED); + dquot_disable(sb, type, DQUOT_USAGE_ENABLED | + DQUOT_LIMITS_ENABLED); if (!inode) continue; iput(inode); @@ -939,8 +991,7 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) } /* Handle quota on quotactl */ -static int ocfs2_quota_on(struct super_block *sb, int type, int format_id, - char *path, int remount) +static int ocfs2_quota_on(struct super_block *sb, int type, int format_id) { unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; @@ -948,30 +999,24 @@ static int ocfs2_quota_on(struct super_block *sb, int type, int format_id, if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) return -EINVAL; - if (remount) - return 0; /* Just ignore it has been handled in - * ocfs2_remount() */ - return vfs_quota_enable(sb_dqopt(sb)->files[type], type, - format_id, DQUOT_LIMITS_ENABLED); + return dquot_enable(sb_dqopt(sb)->files[type], type, + format_id, DQUOT_LIMITS_ENABLED); } /* Handle quota off quotactl */ -static int ocfs2_quota_off(struct super_block *sb, int type, int remount) +static int ocfs2_quota_off(struct super_block *sb, int type) { - if (remount) - return 0; /* Ignore now and handle later in - * ocfs2_remount() */ - return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED); + return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); } -static struct quotactl_ops ocfs2_quotactl_ops = { - .quota_on = ocfs2_quota_on, +static const struct quotactl_ops ocfs2_quotactl_ops = { + .quota_on_meta = ocfs2_quota_on, .quota_off = ocfs2_quota_off, - .quota_sync = vfs_quota_sync, - .get_info = vfs_get_dqinfo, - .set_info = vfs_set_dqinfo, - .get_dqblk = vfs_get_dqblk, - .set_dqblk = vfs_set_dqblk, + .quota_sync = dquot_quota_sync, + .get_info = dquot_get_dqinfo, + .set_info = dquot_set_dqinfo, + .get_dqblk = dquot_get_dqblk, + .set_dqblk = dquot_set_dqblk, }; static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) @@ -982,10 +1027,10 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) struct inode *inode = NULL; struct ocfs2_super *osb = NULL; struct buffer_head *bh = NULL; - char nodestr[8]; + char nodestr[12]; struct ocfs2_blockcheck_stats stats; - mlog_entry("%p, %p, %i", sb, data, silent); + trace_ocfs2_fill_super(sb, data, silent); if (!ocfs2_parse_options(sb, data, &parsed_options, 0)) { status = -EINVAL; @@ -1008,31 +1053,22 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) brelse(bh); bh = NULL; - if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)) - parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; - + if (!ocfs2_check_set_options(sb, &parsed_options)) { + status = -EINVAL; + goto read_super_error; + } osb->s_mount_opt = parsed_options.mount_opt; osb->s_atime_quantum = parsed_options.atime_quantum; osb->preferred_slot = parsed_options.slot; osb->osb_commit_interval = parsed_options.commit_interval; - osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); - osb->local_alloc_bits = osb->local_alloc_default_bits; - if (osb->s_mount_opt & OCFS2_MOUNT_USRQUOTA && - !OCFS2_HAS_RO_COMPAT_FEATURE(sb, - OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { - status = -EINVAL; - mlog(ML_ERROR, "User quotas were requested, but this " - "filesystem does not have the feature enabled.\n"); - goto read_super_error; - } - if (osb->s_mount_opt & OCFS2_MOUNT_GRPQUOTA && - !OCFS2_HAS_RO_COMPAT_FEATURE(sb, - OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { - status = -EINVAL; - mlog(ML_ERROR, "Group quotas were requested, but this " - "filesystem does not have the feature enabled.\n"); - goto read_super_error; - } + + ocfs2_la_set_sizes(osb, parsed_options.localalloc_opt); + osb->osb_resv_level = parsed_options.resv_level; + osb->osb_dir_resv_level = parsed_options.resv_level; + if (parsed_options.dir_resv_level == -1) + osb->osb_dir_resv_level = parsed_options.resv_level; + else + osb->osb_dir_resv_level = parsed_options.dir_resv_level; status = ocfs2_verify_userspace_stack(osb, &parsed_options); if (status) @@ -1040,7 +1076,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = OCFS2_SUPER_MAGIC; - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + sb->s_flags = (sb->s_flags & ~(MS_POSIXACL | MS_NOSEC)) | ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, @@ -1069,15 +1105,15 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) "file system, but write access is " "unavailable.\n"); else - mlog_errno(status); + mlog_errno(status); goto read_super_error; } ocfs2_set_ro_flag(osb, 1); - printk(KERN_NOTICE "Readonly device detected. No cluster " - "services will be utilized for this mount. Recovery " - "will be skipped.\n"); + printk(KERN_NOTICE "ocfs2: Readonly device (%s) detected. " + "Cluster services will not be used for this mount. " + "Recovery will be skipped.\n", osb->dev_str); } if (!ocfs2_is_hard_readonly(osb)) { @@ -1122,19 +1158,19 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) } status = ocfs2_mount_volume(sb); - if (osb->root_inode) - inode = igrab(osb->root_inode); - if (status < 0) goto read_super_error; + if (osb->root_inode) + inode = igrab(osb->root_inode); + if (!inode) { status = -EIO; mlog_errno(status); goto read_super_error; } - root = d_alloc_root(inode); + root = d_make_root(inode); if (!root) { status = -ENOMEM; mlog_errno(status); @@ -1170,7 +1206,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) mlog_errno(status); atomic_set(&osb->vol_state, VOLUME_DISABLED); wake_up(&osb->osb_mount_event); - mlog_exit(status); return status; } } @@ -1182,66 +1217,95 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) wake_up(&osb->osb_mount_event); /* Start this when the mount is almost sure of being successful */ - ocfs2_orphan_scan_init(osb); + ocfs2_orphan_scan_start(osb); - mlog_exit(status); return status; read_super_error: brelse(bh); - if (inode) - iput(inode); - if (osb) { atomic_set(&osb->vol_state, VOLUME_DISABLED); wake_up(&osb->osb_mount_event); ocfs2_dismount_volume(sb, 1); } - mlog_exit(status); + if (status) + mlog_errno(status); return status; } -static int ocfs2_get_sb(struct file_system_type *fs_type, +static struct dentry *ocfs2_mount(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data, - struct vfsmount *mnt) + void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super, - mnt); + return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super); } static struct file_system_type ocfs2_fs_type = { .owner = THIS_MODULE, .name = "ocfs2", - .get_sb = ocfs2_get_sb, /* is this called when we mount - * the fs? */ - .kill_sb = kill_block_super, /* set to the generic one - * right now, but do we - * need to change that? */ + .mount = ocfs2_mount, + .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, .next = NULL }; +MODULE_ALIAS_FS("ocfs2"); + +static int ocfs2_check_set_options(struct super_block *sb, + struct mount_options *options) +{ + if (options->mount_opt & OCFS2_MOUNT_USRQUOTA && + !OCFS2_HAS_RO_COMPAT_FEATURE(sb, + OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { + mlog(ML_ERROR, "User quotas were requested, but this " + "filesystem does not have the feature enabled.\n"); + return 0; + } + if (options->mount_opt & OCFS2_MOUNT_GRPQUOTA && + !OCFS2_HAS_RO_COMPAT_FEATURE(sb, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { + mlog(ML_ERROR, "Group quotas were requested, but this " + "filesystem does not have the feature enabled.\n"); + return 0; + } + if (options->mount_opt & OCFS2_MOUNT_POSIX_ACL && + !OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR)) { + mlog(ML_ERROR, "ACL support requested but extended attributes " + "feature is not enabled\n"); + return 0; + } + /* No ACL setting specified? Use XATTR feature... */ + if (!(options->mount_opt & (OCFS2_MOUNT_POSIX_ACL | + OCFS2_MOUNT_NO_POSIX_ACL))) { + if (OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR)) + options->mount_opt |= OCFS2_MOUNT_POSIX_ACL; + else + options->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL; + } + return 1; +} static int ocfs2_parse_options(struct super_block *sb, char *options, struct mount_options *mopt, int is_remount) { - int status; + int status, user_stack = 0; char *p; + u32 tmp; - mlog_entry("remount: %d, options: \"%s\"\n", is_remount, - options ? options : "(none)"); + trace_ocfs2_parse_options(is_remount, options ? options : "(none)"); mopt->commit_interval = 0; - mopt->mount_opt = 0; + mopt->mount_opt = OCFS2_MOUNT_NOINTR; mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; mopt->slot = OCFS2_INVALID_SLOT; - mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; + mopt->localalloc_opt = -1; mopt->cluster_stack[0] = '\0'; + mopt->resv_level = OCFS2_DEFAULT_RESV_LEVEL; + mopt->dir_resv_level = -1; if (!options) { status = 1; @@ -1261,7 +1325,10 @@ static int ocfs2_parse_options(struct super_block *sb, mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL; break; case Opt_hb_none: - mopt->mount_opt &= ~OCFS2_MOUNT_HB_LOCAL; + mopt->mount_opt |= OCFS2_MOUNT_HB_NONE; + break; + case Opt_hb_global: + mopt->mount_opt |= OCFS2_MOUNT_HB_GLOBAL; break; case Opt_barrier: if (match_int(&args[0], &option)) { @@ -1332,7 +1399,7 @@ static int ocfs2_parse_options(struct super_block *sb, status = 0; goto bail; } - if (option >= 0 && (option <= ocfs2_local_alloc_size(sb) * 8)) + if (option >= 0) mopt->localalloc_opt = option; break; case Opt_localflocks: @@ -1367,45 +1434,61 @@ static int ocfs2_parse_options(struct super_block *sb, memcpy(mopt->cluster_stack, args[0].from, OCFS2_STACK_LABEL_LEN); mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; + /* + * Open code the memcmp here as we don't have + * an osb to pass to + * ocfs2_userspace_stack(). + */ + if (memcmp(mopt->cluster_stack, + OCFS2_CLASSIC_CLUSTER_STACK, + OCFS2_STACK_LABEL_LEN)) + user_stack = 1; break; case Opt_inode64: mopt->mount_opt |= OCFS2_MOUNT_INODE64; break; case Opt_usrquota: - /* We check only on remount, otherwise features - * aren't yet initialized. */ - if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb, - OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { - mlog(ML_ERROR, "User quota requested but " - "filesystem feature is not set\n"); - status = 0; - goto bail; - } mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA; break; case Opt_grpquota: - if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb, - OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { - mlog(ML_ERROR, "Group quota requested but " - "filesystem feature is not set\n"); - status = 0; - goto bail; - } mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; break; -#ifdef CONFIG_OCFS2_FS_POSIX_ACL + case Opt_coherency_buffered: + mopt->mount_opt |= OCFS2_MOUNT_COHERENCY_BUFFERED; + break; + case Opt_coherency_full: + mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED; + break; case Opt_acl: mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; + mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; break; case Opt_noacl: + mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL; mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; break; -#else - case Opt_acl: - case Opt_noacl: - printk(KERN_INFO "ocfs2 (no)acl options not supported\n"); + case Opt_resv_level: + if (is_remount) + break; + if (match_int(&args[0], &option)) { + status = 0; + goto bail; + } + if (option >= OCFS2_MIN_RESV_LEVEL && + option < OCFS2_MAX_RESV_LEVEL) + mopt->resv_level = option; + break; + case Opt_dir_resv_level: + if (is_remount) + break; + if (match_int(&args[0], &option)) { + status = 0; + goto bail; + } + if (option >= OCFS2_MIN_RESV_LEVEL && + option < OCFS2_MAX_RESV_LEVEL) + mopt->dir_resv_level = option; break; -#endif default: mlog(ML_ERROR, "Unrecognized mount option \"%s\" " @@ -1415,23 +1498,38 @@ static int ocfs2_parse_options(struct super_block *sb, } } + if (user_stack == 0) { + /* Ensure only one heartbeat mode */ + tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | + OCFS2_MOUNT_HB_GLOBAL | + OCFS2_MOUNT_HB_NONE); + if (hweight32(tmp) != 1) { + mlog(ML_ERROR, "Invalid heartbeat mount options\n"); + status = 0; + goto bail; + } + } + status = 1; bail: - mlog_exit(status); return status; } -static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) +static int ocfs2_show_options(struct seq_file *s, struct dentry *root) { - struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb); + struct ocfs2_super *osb = OCFS2_SB(root->d_sb); unsigned long opts = osb->s_mount_opt; unsigned int local_alloc_megs; - if (opts & OCFS2_MOUNT_HB_LOCAL) - seq_printf(s, ",_netdev,heartbeat=local"); - else - seq_printf(s, ",heartbeat=none"); + if (opts & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL)) { + seq_printf(s, ",_netdev"); + if (opts & OCFS2_MOUNT_HB_LOCAL) + seq_printf(s, ",%s", OCFS2_HB_LOCAL); + else + seq_printf(s, ",%s", OCFS2_HB_GLOBAL); + } else + seq_printf(s, ",%s", OCFS2_HB_NONE); if (opts & OCFS2_MOUNT_NOINTR) seq_printf(s, ",nointr"); @@ -1452,15 +1550,14 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) if (osb->preferred_slot != OCFS2_INVALID_SLOT) seq_printf(s, ",preferred_slot=%d", osb->preferred_slot); - if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM) - seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); + seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); if (osb->osb_commit_interval) seq_printf(s, ",commit=%u", (unsigned) (osb->osb_commit_interval / HZ)); local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits); - if (local_alloc_megs != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) + if (local_alloc_megs != ocfs2_la_default_mb(osb)) seq_printf(s, ",localalloc=%d", local_alloc_megs); if (opts & OCFS2_MOUNT_LOCALFLOCKS) @@ -1474,6 +1571,11 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) if (opts & OCFS2_MOUNT_GRPQUOTA) seq_printf(s, ",grpquota"); + if (opts & OCFS2_MOUNT_COHERENCY_BUFFERED) + seq_printf(s, ",coherency=buffered"); + else + seq_printf(s, ",coherency=full"); + if (opts & OCFS2_MOUNT_NOUSERXATTR) seq_printf(s, ",nouser_xattr"); else @@ -1482,12 +1584,16 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) if (opts & OCFS2_MOUNT_INODE64) seq_printf(s, ",inode64"); -#ifdef CONFIG_OCFS2_FS_POSIX_ACL if (opts & OCFS2_MOUNT_POSIX_ACL) seq_printf(s, ",acl"); else seq_printf(s, ",noacl"); -#endif + + if (osb->osb_resv_level != OCFS2_DEFAULT_RESV_LEVEL) + seq_printf(s, ",resv_level=%d", osb->osb_resv_level); + + if (osb->osb_dir_resv_level != osb->osb_resv_level) + seq_printf(s, ",dir_resv_level=%d", osb->osb_resv_level); return 0; } @@ -1496,26 +1602,18 @@ static int __init ocfs2_init(void) { int status; - mlog_entry_void(); - - ocfs2_print_version(); - status = init_ocfs2_uptodate_cache(); - if (status < 0) { - mlog_errno(status); - goto leave; - } + if (status < 0) + goto out1; status = ocfs2_initialize_mem_caches(); - if (status < 0) { - mlog_errno(status); - goto leave; - } + if (status < 0) + goto out2; ocfs2_wq = create_singlethread_workqueue("ocfs2_wq"); if (!ocfs2_wq) { status = -ENOMEM; - goto leave; + goto out3; } ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); @@ -1524,34 +1622,30 @@ static int __init ocfs2_init(void) mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); } - status = ocfs2_quota_setup(); - if (status) - goto leave; - ocfs2_set_locking_protocol(); status = register_quota_format(&ocfs2_quota_format); -leave: - if (status < 0) { - ocfs2_quota_shutdown(); - ocfs2_free_mem_caches(); - exit_ocfs2_uptodate_cache(); - } - - mlog_exit(status); + if (status < 0) + goto out4; + status = register_filesystem(&ocfs2_fs_type); + if (!status) + return 0; - if (status >= 0) { - return register_filesystem(&ocfs2_fs_type); - } else - return -1; + unregister_quota_format(&ocfs2_quota_format); +out4: + destroy_workqueue(ocfs2_wq); + debugfs_remove(ocfs2_debugfs_root); +out3: + ocfs2_free_mem_caches(); +out2: + exit_ocfs2_uptodate_cache(); +out1: + mlog_errno(status); + return status; } static void __exit ocfs2_exit(void) { - mlog_entry_void(); - - ocfs2_quota_shutdown(); - if (ocfs2_wq) { flush_workqueue(ocfs2_wq); destroy_workqueue(ocfs2_wq); @@ -1566,22 +1660,14 @@ static void __exit ocfs2_exit(void) unregister_filesystem(&ocfs2_fs_type); exit_ocfs2_uptodate_cache(); - - mlog_exit_void(); } static void ocfs2_put_super(struct super_block *sb) { - mlog_entry("(0x%p)\n", sb); - - lock_kernel(); + trace_ocfs2_put_super(sb); ocfs2_sync_blockdev(sb); ocfs2_dismount_volume(sb, 0); - - unlock_kernel(); - - mlog_exit_void(); } static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) @@ -1593,7 +1679,7 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) struct buffer_head *bh = NULL; struct inode *inode = NULL; - mlog_entry("(%p, %p)\n", dentry->d_sb, buf); + trace_ocfs2_statfs(dentry->d_sb, buf); osb = OCFS2_SB(dentry->d_sb); @@ -1627,6 +1713,10 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bavail = buf->f_bfree; buf->f_files = numbits; buf->f_ffree = freebits; + buf->f_fsid.val[0] = crc32_le(0, osb->uuid_str, OCFS2_VOL_UUID_LEN) + & 0xFFFFFFFFUL; + buf->f_fsid.val[1] = crc32_le(0, osb->uuid_str + OCFS2_VOL_UUID_LEN, + OCFS2_VOL_UUID_LEN) & 0xFFFFFFFFUL; brelse(bh); @@ -1636,7 +1726,8 @@ bail: if (inode) iput(inode); - mlog_exit(status); + if (status) + mlog_errno(status); return status; } @@ -1650,10 +1741,8 @@ static void ocfs2_inode_init_once(void *data) spin_lock_init(&oi->ip_lock); ocfs2_extent_map_init(&oi->vfs_inode); INIT_LIST_HEAD(&oi->ip_io_markers); - oi->ip_created_trans = 0; - oi->ip_last_trans = 0; oi->ip_dir_start_lookup = 0; - + mutex_init(&oi->ip_unaligned_aio); init_rwsem(&oi->ip_alloc_sem); init_rwsem(&oi->ip_xattr_sem); mutex_init(&oi->ip_io_mutex); @@ -1661,11 +1750,14 @@ static void ocfs2_inode_init_once(void *data) oi->ip_blkno = 0ULL; oi->ip_clusters = 0; + ocfs2_resv_init_once(&oi->ip_la_data_resv); + ocfs2_lock_res_init_once(&oi->ip_rw_lockres); ocfs2_lock_res_init_once(&oi->ip_inode_lockres); ocfs2_lock_res_init_once(&oi->ip_open_lockres); - ocfs2_metadata_cache_init(&oi->vfs_inode); + ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode), + &ocfs2_inode_caching_ops); inode_init_once(&oi->vfs_inode); } @@ -1705,6 +1797,11 @@ static int ocfs2_initialize_mem_caches(void) static void ocfs2_free_mem_caches(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); if (ocfs2_inode_cachep) kmem_cache_destroy(ocfs2_inode_cachep); ocfs2_inode_cachep = NULL; @@ -1730,8 +1827,8 @@ static int ocfs2_get_sector(struct super_block *sb, *bh = sb_getblk(sb, block); if (!*bh) { - mlog_errno(-EIO); - return -EIO; + mlog_errno(-ENOMEM); + return -ENOMEM; } lock_buffer(*bh); if (!buffer_dirty(*bh)) @@ -1755,8 +1852,6 @@ static int ocfs2_mount_volume(struct super_block *sb) int unlock_super = 0; struct ocfs2_super *osb = OCFS2_SB(sb); - mlog_entry_void(); - if (ocfs2_is_hard_readonly(osb)) goto leave; @@ -1801,7 +1896,6 @@ leave: if (unlock_super) ocfs2_super_unlock(osb, 1); - mlog_exit(status); return status; } @@ -1809,9 +1903,9 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) { int tmp, hangup_needed = 0; struct ocfs2_super *osb = NULL; - char nodestr[8]; + char nodestr[12]; - mlog_entry("(0x%p)\n", sb); + trace_ocfs2_dismount_volume(sb); BUG_ON(!sb); osb = OCFS2_SB(sb); @@ -1824,6 +1918,11 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) ocfs2_disable_quotas(osb); + /* All dquots should be freed by now */ + WARN_ON(!llist_empty(&osb->dquot_drop_list)); + /* Wait for worker to be done with the work structure in osb */ + cancel_work_sync(&osb->dquot_drop_work); + ocfs2_shutdown_local_alloc(osb); ocfs2_truncate_log_shutdown(osb); @@ -1835,6 +1934,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) ocfs2_sync_blockdev(sb); + ocfs2_purge_refcount_trees(osb); + /* No cluster connection means we've failed during mount, so skip * all the steps which depended on that to complete. */ if (osb->cconn) { @@ -1859,7 +1960,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) * If we failed before we got a uuid_str yet, we can't stop * heartbeat. Otherwise, do it. */ - if (!mnt_err && !ocfs2_mount_local(osb) && osb->uuid_str) + if (!mnt_err && !ocfs2_mount_local(osb) && osb->uuid_str && + !ocfs2_is_hard_readonly(osb)) hangup_needed = 1; if (osb->cconn) @@ -1911,6 +2013,36 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu return 0; } +/* Make sure entire volume is addressable by our journal. Requires + osb_clusters_at_boot to be valid and for the journal to have been + initialized by ocfs2_journal_init(). */ +static int ocfs2_journal_addressable(struct ocfs2_super *osb) +{ + int status = 0; + u64 max_block = + ocfs2_clusters_to_blocks(osb->sb, + osb->osb_clusters_at_boot) - 1; + + /* 32-bit block number is always OK. */ + if (max_block <= (u32)~0ULL) + goto out; + + /* Volume is "huge", so see if our journal is new enough to + support it. */ + if (!(OCFS2_HAS_COMPAT_FEATURE(osb->sb, + OCFS2_FEATURE_COMPAT_JBD2_SB) && + jbd2_journal_check_used_features(osb->journal->j_journal, 0, 0, + JBD2_FEATURE_INCOMPAT_64BIT))) { + mlog(ML_ERROR, "The journal cannot address the entire volume. " + "Enable the 'block64' journal option with tunefs.ocfs2"); + status = -EFBIG; + goto out; + } + + out: + return status; +} + static int ocfs2_initialize_super(struct super_block *sb, struct buffer_head *bh, int sector_size, @@ -1921,10 +2053,8 @@ static int ocfs2_initialize_super(struct super_block *sb, struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; struct inode *inode = NULL; struct ocfs2_journal *journal; - __le32 uuid_net_key; struct ocfs2_super *osb; - - mlog_entry_void(); + u64 total_blocks; osb = kzalloc(sizeof(struct ocfs2_super), GFP_KERNEL); if (!osb) { @@ -1935,6 +2065,7 @@ static int ocfs2_initialize_super(struct super_block *sb, sb->s_fs_info = osb; sb->s_op = &ocfs2_sops; + sb->s_d_op = &ocfs2_dentry_ops; sb->s_export_op = &ocfs2_export_ops; sb->s_qcop = &ocfs2_quotactl_ops; sb->dq_op = &ocfs2_quota_operations; @@ -1965,7 +2096,9 @@ static int ocfs2_initialize_super(struct super_block *sb, osb->blocked_lock_count = 0; spin_lock_init(&osb->osb_lock); spin_lock_init(&osb->osb_xattr_lock); - ocfs2_init_inode_steal_slot(osb); + ocfs2_init_steal_slots(osb); + + mutex_init(&osb->system_file_mutex); atomic_set(&osb->alloc_stats.moves, 0); atomic_set(&osb->alloc_stats.local_data, 0); @@ -1981,6 +2114,16 @@ static int ocfs2_initialize_super(struct super_block *sb, snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); + osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots); + if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) { + mlog(ML_ERROR, "Invalid number of node slots (%u)\n", + osb->max_slots); + status = -EINVAL; + goto bail; + } + + ocfs2_orphan_scan_init(osb); + status = ocfs2_recovery_init(osb); if (status) { mlog(ML_ERROR, "Unable to initialize recovery state\n"); @@ -1989,7 +2132,6 @@ static int ocfs2_initialize_super(struct super_block *sb, } init_waitqueue_head(&osb->checkpoint_event); - atomic_set(&osb->needs_checkpoint, 0); osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; @@ -2004,6 +2146,12 @@ static int ocfs2_initialize_super(struct super_block *sb, init_waitqueue_head(&osb->osb_mount_event); + status = ocfs2_resmap_init(osb, &osb->osb_la_resmap); + if (status) { + mlog_errno(status); + goto bail; + } + osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); if (!osb->vol_label) { mlog(ML_ERROR, "unable to alloc vol label\n"); @@ -2011,15 +2159,6 @@ static int ocfs2_initialize_super(struct super_block *sb, goto bail; } - osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots); - if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) { - mlog(ML_ERROR, "Invalid number of node slots (%u)\n", - osb->max_slots); - status = -EINVAL; - goto bail; - } - mlog(0, "max_slots for this device: %u\n", osb->max_slots); - osb->slot_recovery_generations = kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations), GFP_KERNEL); @@ -2039,6 +2178,8 @@ static int ocfs2_initialize_super(struct super_block *sb, goto bail; } + osb->osb_rf_lock_tree = RB_ROOT; + osb->s_feature_compat = le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat); osb->s_feature_ro_compat = @@ -2060,11 +2201,12 @@ static int ocfs2_initialize_super(struct super_block *sb, goto bail; } - if (ocfs2_userspace_stack(osb)) { - memcpy(osb->osb_cluster_stack, + if (ocfs2_clusterinfo_valid(osb)) { + osb->osb_stackflags = + OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags; + strlcpy(osb->osb_cluster_stack, OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, - OCFS2_STACK_LABEL_LEN); - osb->osb_cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; + OCFS2_STACK_LABEL_LEN + 1); if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) { mlog(ML_ERROR, "couldn't mount because of an invalid " @@ -2073,6 +2215,9 @@ static int ocfs2_initialize_super(struct super_block *sb, status = -EINVAL; goto bail; } + strlcpy(osb->osb_cluster_name, + OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster, + OCFS2_CLUSTER_NAME_LEN + 1); } else { /* The empty string is identical with classic tools that * don't know about s_cluster_info. */ @@ -2108,14 +2253,13 @@ static int ocfs2_initialize_super(struct super_block *sb, INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery); journal->j_state = OCFS2_JOURNAL_FREE; - INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes); - osb->dentry_lock_list = NULL; + INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs); + init_llist_head(&osb->dquot_drop_list); /* get some pseudo constants for clustersize bits */ osb->s_clustersize_bits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); osb->s_clustersize = 1 << osb->s_clustersize_bits; - mlog(0, "clusterbits=%d\n", osb->s_clustersize_bits); if (osb->s_clustersize < OCFS2_MIN_CLUSTERSIZE || osb->s_clustersize > OCFS2_MAX_CLUSTERSIZE) { @@ -2125,11 +2269,15 @@ static int ocfs2_initialize_super(struct super_block *sb, goto bail; } - if (ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(di->i_clusters) - 1) - > (u32)~0UL) { - mlog(ML_ERROR, "Volume might try to write to blocks beyond " - "what jbd can address in 32 bits.\n"); - status = -EINVAL; + total_blocks = ocfs2_clusters_to_blocks(osb->sb, + le32_to_cpu(di->i_clusters)); + + status = generic_check_addressable(osb->sb->s_blocksize_bits, + total_blocks); + if (status) { + mlog(ML_ERROR, "Volume too large " + "to mount safely on this system"); + status = -EFBIG; goto bail; } @@ -2140,21 +2288,18 @@ static int ocfs2_initialize_super(struct super_block *sb, goto bail; } - memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key)); - - strncpy(osb->vol_label, di->id2.i_super.s_label, 63); - osb->vol_label[63] = '\0'; + strlcpy(osb->vol_label, di->id2.i_super.s_label, + OCFS2_MAX_VOL_LABEL_LEN); osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno); osb->system_dir_blkno = le64_to_cpu(di->id2.i_super.s_system_dir_blkno); osb->first_cluster_group_blkno = le64_to_cpu(di->id2.i_super.s_first_cluster_group); osb->fs_generation = le32_to_cpu(di->i_fs_generation); osb->uuid_hash = le32_to_cpu(di->id2.i_super.s_uuid_hash); - mlog(0, "vol_label: %s\n", osb->vol_label); - mlog(0, "uuid: %s\n", osb->uuid_str); - mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n", - (unsigned long long)osb->root_blkno, - (unsigned long long)osb->system_dir_blkno); + trace_ocfs2_initialize_super(osb->vol_label, osb->uuid_str, + (unsigned long long)osb->root_blkno, + (unsigned long long)osb->system_dir_blkno, + osb->s_clustersize_bits); osb->osb_dlm_debug = ocfs2_new_dlm_debug(); if (!osb->osb_dlm_debug) { @@ -2184,18 +2329,20 @@ static int ocfs2_initialize_super(struct super_block *sb, } osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; + osb->osb_clusters_at_boot = OCFS2_I(inode)->ip_clusters; iput(inode); - osb->bitmap_cpg = ocfs2_group_bitmap_size(sb) * 8; + osb->bitmap_cpg = ocfs2_group_bitmap_size(sb, 0, + osb->s_feature_incompat) * 8; status = ocfs2_init_slot_info(osb); if (status < 0) { mlog_errno(status); goto bail; } + cleancache_init_shared_fs((char *)&di->id2.i_super.s_uuid, sb); bail: - mlog_exit(status); return status; } @@ -2211,8 +2358,6 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di, { int status = -EAGAIN; - mlog_entry_void(); - if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE, strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) { /* We have to do a raw check of the feature here */ @@ -2267,7 +2412,8 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di, } out: - mlog_exit(status); + if (status && status != -EAGAIN) + mlog_errno(status); return status; } @@ -2280,8 +2426,6 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) * recover * ourselves. */ - mlog_entry_void(); - /* Init our journal object. */ status = ocfs2_journal_init(osb->journal, &dirty); if (status < 0) { @@ -2289,6 +2433,12 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) goto finally; } + /* Now that journal has been initialized, check to make sure + entire volume is addressable. */ + status = ocfs2_journal_addressable(osb); + if (status) + goto finally; + /* If the journal was unmounted cleanly then we don't want to * recover anything. Otherwise, journal_load will do that * dirty work for us :) */ @@ -2299,8 +2449,8 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) goto finally; } } else { - mlog(ML_NOTICE, "File system was not unmounted cleanly, " - "recovering volume.\n"); + printk(KERN_NOTICE "ocfs2: File system on device (%s) was not " + "unmounted cleanly, recovering it.\n", osb->dev_str); } local = ocfs2_mount_local(osb); @@ -2325,8 +2475,6 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) * ourselves as mounted. */ } - mlog(0, "Journal loaded.\n"); - status = ocfs2_load_local_alloc(osb); if (status < 0) { mlog_errno(status); @@ -2355,10 +2503,10 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) mlog_errno(status); finally: - if (local_alloc) - kfree(local_alloc); + kfree(local_alloc); - mlog_exit(status); + if (status) + mlog_errno(status); return status; } @@ -2370,8 +2518,6 @@ finally: */ static void ocfs2_delete_osb(struct ocfs2_super *osb) { - mlog_entry_void(); - /* This function assumes that the caller has the main osb resource */ ocfs2_free_slot_info(osb); @@ -2380,17 +2526,14 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb) kfree(osb->slot_recovery_generations); /* FIXME * This belongs in journal shutdown, but because we have to - * allocate osb->journal at the start of ocfs2_initalize_osb(), + * allocate osb->journal at the start of ocfs2_initialize_osb(), * we free it here. */ kfree(osb->journal); - if (osb->local_alloc_copy) - kfree(osb->local_alloc_copy); + kfree(osb->local_alloc_copy); kfree(osb->uuid_str); ocfs2_put_dlm_debug(osb->osb_dlm_debug); memset(osb, 0, sizeof(struct ocfs2_super)); - - mlog_exit_void(); } /* Put OCFS2 into a readonly state, or (if the user specifies it), @@ -2464,9 +2607,30 @@ void __ocfs2_abort(struct super_block* sb, /* Force a panic(). This stinks, but it's better than letting * things continue without having a proper hard readonly * here. */ - OCFS2_SB(sb)->s_mount_opt |= OCFS2_MOUNT_ERRORS_PANIC; + if (!ocfs2_mount_local(OCFS2_SB(sb))) + OCFS2_SB(sb)->s_mount_opt |= OCFS2_MOUNT_ERRORS_PANIC; ocfs2_handle_error(sb); } +/* + * Void signal blockers, because in-kernel sigprocmask() only fails + * when SIG_* is wrong. + */ +void ocfs2_block_signals(sigset_t *oldset) +{ + int rc; + sigset_t blocked; + + sigfillset(&blocked); + rc = sigprocmask(SIG_BLOCK, &blocked, oldset); + BUG_ON(rc); +} + +void ocfs2_unblock_signals(sigset_t *oldset) +{ + int rc = sigprocmask(SIG_SETMASK, oldset, NULL); + BUG_ON(rc); +} + module_init(ocfs2_init); module_exit(ocfs2_exit); |
