aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-04-03 12:13:56 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-03 12:13:56 -0700
commit78609a812e9afa87202631d128018361f68c44a9 (patch)
tree8c2d4b924ade170c69776d6cc39bec4dff3c8574
parent133e2a3164771454aa326859c2b293687189b553 (diff)
parent9140db04ef185f934acf2b1b15b3dd5e6a6bfc22 (diff)
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (32 commits) ocfs2: recover orphans in offline slots during recovery and mount ocfs2: Pagecache usage optimization on ocfs2 ocfs2: fix rare stale inode errors when exporting via nfs ocfs2/dlm: Tweak mle_state output ocfs2/dlm: Do not purge lockres that is being migrated dlm_purge_lockres() ocfs2/dlm: Remove struct dlm_lock_name in struct dlm_master_list_entry ocfs2/dlm: Show the number of lockres/mles in dlm_state ocfs2/dlm: dlm_set_lockres_owner() and dlm_change_lockres_owner() inlined ocfs2/dlm: Improve lockres counts ocfs2/dlm: Track number of mles ocfs2/dlm: Indent dlm_cleanup_master_list() ocfs2/dlm: Activate dlm->master_hash for master list entries ocfs2/dlm: Create and destroy the dlm->master_hash ocfs2/dlm: Refactor dlm_clean_master_list() ocfs2/dlm: Clean up struct dlm_lock_name ocfs2/dlm: Encapsulate adding and removing of mle from dlm->master_list ocfs2: Optimize inode group allocation by recording last used group. ocfs2: Allocate inode groups from global_bitmap. ocfs2: Optimize inode allocation by remembering last group ocfs2: fix leaf start calculation in ocfs2_dx_dir_rebalance() ...
-rw-r--r--fs/ocfs2/alloc.c57
-rw-r--r--fs/ocfs2/alloc.h3
-rw-r--r--fs/ocfs2/aops.c23
-rw-r--r--fs/ocfs2/cluster/heartbeat.c96
-rw-r--r--fs/ocfs2/cluster/heartbeat.h3
-rw-r--r--fs/ocfs2/cluster/nodemanager.c9
-rw-r--r--fs/ocfs2/dir.c2806
-rw-r--r--fs/ocfs2/dir.h57
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h58
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c87
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c29
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c387
-rw-r--r--fs/ocfs2/dlm/dlmthread.c20
-rw-r--r--fs/ocfs2/dlmglue.c46
-rw-r--r--fs/ocfs2/dlmglue.h2
-rw-r--r--fs/ocfs2/export.c84
-rw-r--r--fs/ocfs2/inode.c48
-rw-r--r--fs/ocfs2/inode.h5
-rw-r--r--fs/ocfs2/journal.c173
-rw-r--r--fs/ocfs2/journal.h77
-rw-r--r--fs/ocfs2/localalloc.c86
-rw-r--r--fs/ocfs2/namei.c250
-rw-r--r--fs/ocfs2/ocfs2.h76
-rw-r--r--fs/ocfs2/ocfs2_fs.h136
-rw-r--r--fs/ocfs2/ocfs2_lockid.h4
-rw-r--r--fs/ocfs2/suballoc.c254
-rw-r--r--fs/ocfs2/suballoc.h4
-rw-r--r--fs/ocfs2/super.c188
-rw-r--r--fs/ocfs2/xattr.c8
-rw-r--r--fs/ocfs2/xattr.h2
30 files changed, 4389 insertions, 689 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 19e3a96aa02..678a067d925 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -294,6 +294,55 @@ static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = {
.eo_fill_max_leaf_clusters = ocfs2_xattr_tree_fill_max_leaf_clusters,
};
+static void ocfs2_dx_root_set_last_eb_blk(struct ocfs2_extent_tree *et,
+ u64 blkno)
+{
+ struct ocfs2_dx_root_block *dx_root = et->et_object;
+
+ dx_root->dr_last_eb_blk = cpu_to_le64(blkno);
+}
+
+static u64 ocfs2_dx_root_get_last_eb_blk(struct ocfs2_extent_tree *et)
+{
+ struct ocfs2_dx_root_block *dx_root = et->et_object;
+
+ return le64_to_cpu(dx_root->dr_last_eb_blk);
+}
+
+static void ocfs2_dx_root_update_clusters(struct inode *inode,
+ struct ocfs2_extent_tree *et,
+ u32 clusters)
+{
+ struct ocfs2_dx_root_block *dx_root = et->et_object;
+
+ le32_add_cpu(&dx_root->dr_clusters, clusters);
+}
+
+static int ocfs2_dx_root_sanity_check(struct inode *inode,
+ struct ocfs2_extent_tree *et)
+{
+ struct ocfs2_dx_root_block *dx_root = et->et_object;
+
+ BUG_ON(!OCFS2_IS_VALID_DX_ROOT(dx_root));
+
+ return 0;
+}
+
+static void ocfs2_dx_root_fill_root_el(struct ocfs2_extent_tree *et)
+{
+ struct ocfs2_dx_root_block *dx_root = et->et_object;
+
+ et->et_root_el = &dx_root->dr_list;
+}
+
+static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = {
+ .eo_set_last_eb_blk = ocfs2_dx_root_set_last_eb_blk,
+ .eo_get_last_eb_blk = ocfs2_dx_root_get_last_eb_blk,
+ .eo_update_clusters = ocfs2_dx_root_update_clusters,
+ .eo_sanity_check = ocfs2_dx_root_sanity_check,
+ .eo_fill_root_el = ocfs2_dx_root_fill_root_el,
+};
+
static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh,
@@ -339,6 +388,14 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
&ocfs2_xattr_value_et_ops);
}
+void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
+ struct inode *inode,
+ struct buffer_head *bh)
+{
+ __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_dr,
+ NULL, &ocfs2_dx_root_et_ops);
+}
+
static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et,
u64 new_last_eb_blk)
{
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index cceff5c37f4..353254ba29e 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -75,6 +75,9 @@ struct ocfs2_xattr_value_buf;
void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct ocfs2_xattr_value_buf *vb);
+void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
+ struct inode *inode,
+ struct buffer_head *bh);
/*
* Read an extent block into *bh. If *bh is NULL, a bh will be
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 8e1709a679b..b2c52b3a148 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1956,15 +1956,16 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping,
}
const struct address_space_operations ocfs2_aops = {
- .readpage = ocfs2_readpage,
- .readpages = ocfs2_readpages,
- .writepage = ocfs2_writepage,
- .write_begin = ocfs2_write_begin,
- .write_end = ocfs2_write_end,
- .bmap = ocfs2_bmap,
- .sync_page = block_sync_page,
- .direct_IO = ocfs2_direct_IO,
- .invalidatepage = ocfs2_invalidatepage,
- .releasepage = ocfs2_releasepage,
- .migratepage = buffer_migrate_page,
+ .readpage = ocfs2_readpage,
+ .readpages = ocfs2_readpages,
+ .writepage = ocfs2_writepage,
+ .write_begin = ocfs2_write_begin,
+ .write_end = ocfs2_write_end,
+ .bmap = ocfs2_bmap,
+ .sync_page = block_sync_page,
+ .direct_IO = ocfs2_direct_IO,
+ .invalidatepage = ocfs2_invalidatepage,
+ .releasepage = ocfs2_releasepage,
+ .migratepage = buffer_migrate_page,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 04697ba7f73..4f85eceab37 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -33,6 +33,7 @@
#include <linux/random.h>
#include <linux/crc32.h>
#include <linux/time.h>
+#include <linux/debugfs.h>
#include "heartbeat.h"
#include "tcp.h"
@@ -60,6 +61,11 @@ static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
static LIST_HEAD(o2hb_node_events);
static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue);
+#define O2HB_DEBUG_DIR "o2hb"
+#define O2HB_DEBUG_LIVENODES "livenodes"
+static struct dentry *o2hb_debug_dir;
+static struct dentry *o2hb_debug_livenodes;
+
static LIST_HEAD(o2hb_all_regions);
static struct o2hb_callback {
@@ -905,7 +911,77 @@ static int o2hb_thread(void *data)
return 0;
}
-void o2hb_init(void)
+#ifdef CONFIG_DEBUG_FS
+static int o2hb_debug_open(struct inode *inode, struct file *file)
+{
+ unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)];
+ char *buf = NULL;
+ int i = -1;
+ int out = 0;
+
+ buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!buf)
+ goto bail;
+
+ o2hb_fill_node_map(map, sizeof(map));
+
+ while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES)
+ out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i);
+ out += snprintf(buf + out, PAGE_SIZE - out, "\n");
+
+ i_size_write(inode, out);
+
+ file->private_data = buf;
+
+ return 0;
+bail:
+ return -ENOMEM;
+}
+
+static int o2hb_debug_release(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
+
+static ssize_t o2hb_debug_read(struct file *file, char __user *buf,
+ size_t nbytes, loff_t *ppos)
+{
+ return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
+ i_size_read(file->f_mapping->host));
+}
+#else
+static int o2hb_debug_open(struct inode *inode, struct file *file)
+{
+ return 0;
+}
+static int o2hb_debug_release(struct inode *inode, struct file *file)
+{
+ return 0;
+}
+static ssize_t o2hb_debug_read(struct file *file, char __user *buf,
+ size_t nbytes, loff_t *ppos)
+{
+ return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
+
+static struct file_operations o2hb_debug_fops = {
+ .open = o2hb_debug_open,
+ .release = o2hb_debug_release,
+ .read = o2hb_debug_read,
+ .llseek = generic_file_llseek,
+};
+
+void o2hb_exit(void)
+{
+ if (o2hb_debug_livenodes)
+ debugfs_remove(o2hb_debug_livenodes);
+ if (o2hb_debug_dir)
+ debugfs_remove(o2hb_debug_dir);
+}
+
+int o2hb_init(void)
{
int i;
@@ -918,6 +994,24 @@ void o2hb_init(void)
INIT_LIST_HEAD(&o2hb_node_events);
memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap));
+
+ o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL);
+ if (!o2hb_debug_dir) {
+ mlog_errno(-ENOMEM);
+ return -ENOMEM;
+ }
+
+ o2hb_debug_livenodes = debugfs_create_file(O2HB_DEBUG_LIVENODES,
+ S_IFREG|S_IRUSR,
+ o2hb_debug_dir, NULL,
+ &o2hb_debug_fops);
+ if (!o2hb_debug_livenodes) {
+ mlog_errno(-ENOMEM);
+ debugfs_remove(o2hb_debug_dir);
+ return -ENOMEM;
+ }
+
+ return 0;
}
/* if we're already in a callback then we're already serialized by the sem */
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h
index e511339886b..2f1649253b4 100644
--- a/fs/ocfs2/cluster/heartbeat.h
+++ b/fs/ocfs2/cluster/heartbeat.h
@@ -75,7 +75,8 @@ void o2hb_unregister_callback(const char *region_uuid,
struct o2hb_callback_func *hc);
void o2hb_fill_node_map(unsigned long *map,
unsigned bytes);
-void o2hb_init(void);
+void o2hb_exit(void);
+int o2hb_init(void);
int o2hb_check_node_heartbeating(u8 node_num);
int o2hb_check_node_heartbeating_from_callback(u8 node_num);
int o2hb_check_local_node_heartbeating(void);
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 70e8fa9e253..7ee6188bc79 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -881,6 +881,7 @@ static void __exit exit_o2nm(void)
o2cb_sys_shutdown();
o2net_exit();
+ o2hb_exit();
}
static int __init init_o2nm(void)
@@ -889,11 +890,13 @@ static int __init init_o2nm(void)
cluster_print_version();
- o2hb_init();
+ ret = o2hb_init();
+ if (ret)
+ goto out;
ret = o2net_init();
if (ret)
- goto out;
+ goto out_o2hb;
ret = o2net_register_hb_callbacks();
if (ret)
@@ -916,6 +919,8 @@ out_callbacks:
o2net_unregister_hb_callbacks();
out_o2net:
o2net_exit();
+out_o2hb:
+ o2hb_exit();
out:
return ret;
}
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index f2c4098cf33..e71160cda11 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -41,6 +41,7 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/quotaops.h>
+#include <linux/sort.h>
#define MLOG_MASK_PREFIX ML_NAMEI
#include <cluster/masklog.h>
@@ -58,6 +59,7 @@
#include "namei.h"
#include "suballoc.h"
#include "super.h"
+#include "sysfile.h"
#include "uptodate.h"
#include "buffer_head_io.h"
@@ -71,11 +73,6 @@ static unsigned char ocfs2_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
};
-static int ocfs2_extend_dir(struct ocfs2_super *osb,
- struct inode *dir,
- struct buffer_head *parent_fe_bh,
- unsigned int blocks_wanted,
- struct buffer_head **new_de_bh);
static int ocfs2_do_extend_dir(struct super_block *sb,
handle_t *handle,
struct inode *dir,
@@ -83,22 +80,36 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
struct buffer_head **new_bh);
+static int ocfs2_dir_indexed(struct inode *inode);
/*
* These are distinct checks because future versions of the file system will
* want to have a trailing dirent structure independent of indexing.
*/
-static int ocfs2_dir_has_trailer(struct inode *dir)
+static int ocfs2_supports_dir_trailer(struct inode *dir)
{
+ struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
+
if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
return 0;
- return ocfs2_meta_ecc(OCFS2_SB(dir->i_sb));
+ return ocfs2_meta_ecc(osb) || ocfs2_dir_indexed(dir);
}
-static int ocfs2_supports_dir_trailer(struct ocfs2_super *osb)
+/*
+ * "new' here refers to the point at which we're creating a new
+ * directory via "mkdir()", but also when we're expanding an inline
+ * directory. In either case, we don't yet have the indexing bit set
+ * on the directory, so the standard checks will fail in when metaecc
+ * is turned off. Only directory-initialization type functions should
+ * use this then. Everything else wants ocfs2_supports_dir_trailer()
+ */
+static int ocfs2_new_dir_wants_trailer(struct inode *dir)
{
- return ocfs2_meta_ecc(osb);
+ struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
+
+ return ocfs2_meta_ecc(osb) ||
+ ocfs2_supports_indexed_dirs(osb);
}
static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb)
@@ -130,7 +141,7 @@ static int ocfs2_skip_dir_trailer(struct inode *dir,
{
unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer);
- if (!ocfs2_dir_has_trailer(dir))
+ if (!ocfs2_supports_dir_trailer(dir))
return 0;
if (offset != toff)
@@ -140,7 +151,7 @@ static int ocfs2_skip_dir_trailer(struct inode *dir,
}
static void ocfs2_init_dir_trailer(struct inode *inode,
- struct buffer_head *bh)
+ struct buffer_head *bh, u16 rec_len)
{
struct ocfs2_dir_block_trailer *trailer;
@@ -150,6 +161,153 @@ static void ocfs2_init_dir_trailer(struct inode *inode,
cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer));
trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
trailer->db_blkno = cpu_to_le64(bh->b_blocknr);
+ trailer->db_free_rec_len = cpu_to_le16(rec_len);
+}
+/*
+ * Link an unindexed block with a dir trailer structure into the index free
+ * list. This function will modify dirdata_bh, but assumes you've already
+ * passed it to the journal.
+ */
+static int ocfs2_dx_dir_link_trailer(struct inode *dir, handle_t *handle,
+ struct buffer_head *dx_root_bh,
+ struct buffer_head *dirdata_bh)
+{
+ int ret;
+ struct ocfs2_dx_root_block *dx_root;
+ struct ocfs2_dir_block_trailer *trailer;
+
+ ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ trailer = ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb);
+ dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
+
+ trailer->db_free_next = dx_root->dr_free_blk;
+ dx_root->dr_free_blk = cpu_to_le64(dirdata_bh->b_blocknr);
+
+ ocfs2_journal_dirty(handle, dx_root_bh);
+
+out:
+ return ret;
+}
+
+static int ocfs2_free_list_at_root(struct ocfs2_dir_lookup_result *res)
+{
+ return res->dl_prev_leaf_bh == NULL;
+}
+
+void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res)
+{
+ brelse(res->dl_dx_root_bh);
+ brelse(res->dl_leaf_bh);
+ brelse(res->dl_dx_leaf_bh);
+ brelse(res->dl_prev_leaf_bh);
+}
+
+static int ocfs2_dir_indexed(struct inode *inode)
+{
+ if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INDEXED_DIR_FL)
+ return 1;
+ return 0;
+}
+
+static inline int ocfs2_dx_root_inline(struct ocfs2_dx_root_block *dx_root)
+{
+ return dx_root->dr_flags & OCFS2_DX_FLAG_INLINE;
+}
+
+/*
+ * Hashing code adapted from ext3
+ */
+#define DELTA 0x9E3779B9
+
+static void TEA_transform(__u32 buf[4], __u32 const in[])
+{
+ __u32 sum = 0;
+ __u32 b0 = buf[0], b1 = buf[1];
+ __u32 a = in[0], b = in[1], c = in[2], d = in[3];
+ int n = 16;
+
+ do {
+ sum += DELTA;
+ b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
+ b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
+ } while (--n);
+
+ buf[0] += b0;
+ buf[1] += b1;
+}
+
+static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
+{
+ __u32 pad, val;
+ int i;
+
+ pad = (__u32)len | ((__u32)len << 8);
+ pad |= pad << 16;
+
+ val = pad;
+ if (len > num*4)
+ len = num * 4;
+ for (i = 0; i < len; i++) {
+ if ((i % 4) == 0)
+ val = pad;
+ val = msg[i] + (val << 8);
+ if ((i % 4) == 3) {
+ *buf++ = val;
+ val = pad;
+ num--;
+ }
+ }
+ if (--num >= 0)
+ *buf++ = val;
+ while (--num >= 0)
+ *buf++ = pad;
+}
+
+static void ocfs2_dx_dir_name_hash(struct inode *dir, const char *name, int len,
+ struct ocfs2_dx_hinfo *hinfo)
+{
+ struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
+ const char *p;
+ __u32 in[8], buf[4];
+
+ /*
+ * XXX: Is this really necessary, if the index is never looked
+ * at by readdir? Is a hash value of '0' a bad idea?
+ */
+ if ((len == 1 && !strncmp(".", name, 1)) ||
+ (len == 2 && !strncmp("..", name, 2))) {
+ buf[0] = buf[1] = 0;
+ goto out;
+ }
+
+#ifdef OCFS2_DEBUG_DX_DIRS
+ /*
+ * This makes it very easy to debug indexing problems. We
+ * should never allow this to be selected without hand editing
+ * this file though.
+ */
+ buf[0] = buf[1] = len;
+ goto out;
+#endif
+
+ memcpy(buf, osb->osb_dx_seed, sizeof(buf));
+
+ p = name;
+ while (len > 0) {
+ str2hashbuf(p, len, in, 4);
+ TEA_transform(buf, in);
+ len -= 16;
+ p += 16;
+ }
+
+out:
+ hinfo->major_hash = buf[0];
+ hinfo->minor_hash = buf[1];
}
/*
@@ -312,6 +470,52 @@ static int ocfs2_validate_dir_block(struct super_block *sb,
}
/*
+ * Validate a directory trailer.
+ *
+ * We check the trailer here rather than in ocfs2_validate_dir_block()
+ * because that function doesn't have the inode to test.
+ */
+static int ocfs2_check_dir_trailer(struct inode *dir, struct buffer_head *bh)
+{
+ int rc = 0;
+ struct ocfs2_dir_block_trailer *trailer;
+
+ trailer = ocfs2_trailer_from_bh(bh, dir->i_sb);
+ if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) {
+ rc = -EINVAL;
+ ocfs2_error(dir->i_sb,
+ "Invalid dirblock #%llu: "
+ "signature = %.*s\n",
+ (unsigned long long)bh->b_blocknr, 7,
+ trailer->db_signature);
+ goto out;
+ }
+ if (le64_to_cpu(trailer->db_blkno) != bh->b_blocknr) {
+ rc = -EINVAL;
+ ocfs2_error(dir->i_sb,
+ "Directory block #%llu has an invalid "
+ "db_blkno of %llu",
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)le64_to_cpu(trailer->db_blkno));
+ goto out;
+ }
+ if (le64_to_cpu(trailer->db_parent_dinode) !=
+ OCFS2_I(dir)->ip_blkno) {
+ rc = -EINVAL;
+ ocfs2_error(dir->i_sb,
+ "Directory block #%llu on dinode "
+ "#%llu has an invalid parent_dinode "
+ "of %llu",
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)OCFS2_I(dir)->ip_blkno,
+ (unsigned long long)le64_to_cpu(trailer->db_blkno));
+ goto out;
+ }
+out:
+ return rc;
+}
+
+/*
* This function forces all errors to -EIO for consistency with its
* predecessor, ocfs2_bread(). We haven't audited what returning the
* real error codes would do to callers. We log the real codes with
@@ -322,7 +526,6 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
{
int rc = 0;
struct buffer_head *tmp = *bh;
- struct ocfs2_dir_block_trailer *trailer;
rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags,
ocfs2_validate_dir_block);
@@ -331,42 +534,13 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
goto out;
}
- /*
- * We check the trailer here rather than in
- * ocfs2_validate_dir_block() because that function doesn't have
- * the inode to test.
- */
if (!(flags & OCFS2_BH_READAHEAD) &&
- ocfs2_dir_has_trailer(inode)) {
- trailer = ocfs2_trailer_from_bh(tmp, inode->i_sb);
- if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) {
- rc = -EINVAL;
- ocfs2_error(inode->i_sb,
- "Invalid dirblock #%llu: "
- "signature = %.*s\n",
- (unsigned long long)tmp->b_blocknr, 7,
- trailer->db_signature);
- goto out;
- }
- if (le64_to_cpu(trailer->db_blkno) != tmp->b_blocknr) {
- rc = -EINVAL;
- ocfs2_error(inode->i_sb,
- "Directory block #%llu has an invalid "
- "db_blkno of %llu",
- (unsigned long long)tmp->b_blocknr,
- (unsigned long long)le64_to_cpu(trailer->db_blkno));
- goto out;
- }
- if (le64_to_cpu(trailer->db_parent_dinode) !=
- OCFS2_I(inode)->ip_blkno) {
- rc = -EINVAL;
- ocfs2_error(inode->i_sb,
- "Directory block #%llu on dinode "
- "#%llu has an invalid parent_dinode "
- "of %llu",
- (unsigned long long)tmp->b_blocknr,
- (unsigned long long)OCFS2_I(inode)->ip_blkno,
- (unsigned long long)le64_to_cpu(trailer->db_blkno));
+ ocfs2_supports_dir_trailer(inode)) {
+ rc = ocfs2_check_dir_trailer(inode, tmp);
+ if (rc) {
+ if (!*bh)
+ brelse(tmp);
+ mlog_errno(rc);
goto out;
}
}
@@ -379,6 +553,141 @@ out:
return rc ? -EIO : 0;
}
+/*
+ * Read the block at 'phys' which belongs to this directory
+ * inode. This function does no virtual->physical block translation -
+ * what's passed in is assumed to be a valid directory block.
+ */
+static int ocfs2_read_dir_block_direct(struct inode *dir, u64 phys,
+ struct buffer_head **bh)
+{
+ int ret;
+ struct buffer_head *tmp = *bh;
+
+ ret = ocfs2_read_block(dir, phys, &tmp, ocfs2_validate_dir_block);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ if (ocfs2_supports_dir_trailer(dir)) {
+ ret = ocfs2_check_dir_trailer(dir, tmp);
+ if (ret) {
+ if (!*bh)
+ brelse(tmp);
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ if (!ret && !*bh)
+ *bh = tmp;
+out:
+ return ret;
+}
+
+static int ocfs2_validate_dx_root(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ int ret;
+ struct ocfs2_dx_root_block *dx_root;
+
+ BUG_ON(!buffer_uptodate(bh));
+
+ dx_root = (struct ocfs2_dx_root_block *) bh->b_data;
+
+ ret = ocfs2_validate_meta_ecc(sb, bh->b_data, &dx_root->dr_check);
+ if (ret) {
+ mlog(ML_ERROR,
+ "Checksum failed for dir index root block %llu\n",
+ (unsigned long long)bh->b_blocknr);
+ return ret;
+ }
+
+ if (!OCFS2_IS_VALID_DX_ROOT(dx_root)) {
+ ocfs2_error(sb,
+ "Dir Index Root # %llu has bad signature %.*s",
+ (unsigned long long)le64_to_cpu(dx_root->dr_blkno),
+ 7, dx_root->dr_signature);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ocfs2_read_dx_root(struct inode *dir, struct ocfs2_dinode *di,
+ struct buffer_head **dx_root_bh)
+{
+ int ret;
+ u64 blkno = le64_to_cpu(di->i_dx_root);
+ struct buffer_head *tmp = *dx_root_bh;
+
+ ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_root);
+
+ /* If ocfs2_read_block() got us a new bh, pass it up. */
+ if (!ret && !*dx_root_bh)
+ *dx_root_bh = tmp;
+
+ return ret;
+}
+
+static int ocfs2_validate_dx_leaf(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ int ret;
+ struct ocfs2_dx_leaf *dx_leaf = (struct ocfs2_dx_leaf *)bh->b_data;
+
+ BUG_ON(!buffer_uptodate(bh));
+
+ ret = ocfs2_validate_meta_ecc(sb, bh->b_data, &dx_leaf->dl_check);
+ if (ret) {
+ mlog(ML_ERROR,
+ "Checksum failed for dir index leaf block %llu\n",
+ (unsigned long long)bh->b_blocknr);
+ return ret;
+ }
+
+ if (!OCFS2_IS_VALID_DX_LEAF(dx_leaf)) {
+ ocfs2_error(sb, "Dir Index Leaf has bad signature %.*s",
+ 7, dx_leaf->dl_signature);
+ return -EROFS;
+ }
+
+ return 0;
+}
+
+static int ocfs2_read_dx_leaf(struct inode *dir, u64 blkno,
+ struct buffer_head **dx_leaf_bh)
+{
+ int ret;
+ struct buffer_head *tmp = *dx_leaf_bh;
+
+ ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_leaf);
+
+ /* If ocfs2_read_block() got us a new bh, pass it up. */
+ if (!ret && !*dx_leaf_bh)
+ *dx_leaf_bh = tmp;
+
+ return ret;
+}
+
+/*
+ * Read a series of dx_leaf blocks. This expects all buffer_head
+ * pointers to be NULL on function entry.
+ */
+static int ocfs2_read_dx_leaves(struct inode *dir, u64 start, int num,
+ struct buffer_head **dx_leaf_bhs)
+{
+ int ret;
+
+ ret = ocfs2_read_blocks(dir, start, num, dx_leaf_bhs, 0,
+ ocfs2_validate_dx_leaf);
+ if (ret)
+ mlog_errno(ret);
+
+ return ret;
+}
+
static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
struct inode *dir,
struct ocfs2_dir_entry **res_dir)
@@ -480,39 +789,340 @@ cleanup_and_exit:
return ret;
}
+static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
+ struct ocfs2_extent_list *el,
+ u32 major_hash,
+ u32 *ret_cpos,
+ u64 *ret_phys_blkno,
+ unsigned int *ret_clen)
+{
+ int ret = 0, i, found;
+ struct buffer_head *eb_bh = NULL;
+ struct ocfs2_extent_block *eb;
+ struct ocfs2_extent_rec *rec = NULL;
+
+ if (el->l_tree_depth) {
+ ret = ocfs2_find_leaf(inode, el, major_hash, &eb_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ eb = (struct ocfs2_extent_block *) eb_bh->b_data;
+ el = &eb->h_list;
+
+ if (el->l_tree_depth) {
+ ocfs2_error(inode->i_sb,
+ "Inode %lu has non zero tree depth in "
+ "btree tree block %llu\n", inode->i_ino,
+ (unsigned long long)eb_bh->b_blocknr);
+ ret = -EROFS;
+ goto out;
+ }
+ }
+
+ found = 0;
+ for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
+ rec = &el->l_recs[i];
+
+ if (le32_to_cpu(rec->e_cpos) <= major_hash) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
+ "record (%u, %u, 0) in btree", inode->i_ino,
+ le32_to_cpu(rec->e_cpos),
+ ocfs2_rec_clusters(el, rec));
+ ret = -EROFS;
+ goto out;
+ }
+
+ if (ret_phys_blkno)
+ *ret_phys_blkno = le64_to_cpu(rec->e_blkno);
+ if (ret_cpos)
+ *ret_cpos = le32_to_cpu(rec->e_cpos);
+ if (ret_clen)
+ *ret_clen = le16_to_cpu(rec->e_leaf_clusters);
+
+out:
+ brelse(eb_bh);
+ return ret;
+}
+
+/*
+ * Returns the block index, from the start of the cluster which this
+ * hash belongs too.
+ */
+static inline unsigned int __ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
+ u32 minor_hash)
+{
+ return minor_hash & osb->osb_dx_mask;
+}
+
+static inline unsigned int ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
+ struct ocfs2_dx_hinfo *hinfo)
+{
+ return __ocfs2_dx_dir_hash_idx(osb, hinfo->minor_hash);
+}
+
+static int ocfs2_dx_dir_lookup(struct inode *inode,
+ struct ocfs2_extent_list *el,
+ struct ocfs2_dx_hinfo *hinfo,
+ u32 *ret_cpos,
+ u64 *ret_phys_blkno)
+{
+ int ret = 0;
+ unsigned int cend, uninitialized_var(clen);
+ u32 uninitialized_var(cpos);
+ u64 uninitialized_var(blkno);
+ u32 name_hash = hinfo->major_hash;
+
+ ret = ocfs2_dx_dir_lookup_rec(inode, el, name_hash, &cpos, &blkno,
+ &clen);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ cend = cpos + clen;
+ if (name_hash >= cend) {
+ /* We want the last cluster */
+ blkno += ocfs2_clusters_to_blocks(inode->i_sb, clen - 1);
+ cpos += clen