diff options
author | David Teigland <teigland@redhat.com> | 2006-01-16 16:50:04 +0000 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2006-01-16 16:50:04 +0000 |
commit | b3b94faa5fe5968827ba0640ee9fba4b3e7f736e (patch) | |
tree | 70bd6068b050d2c46e338484f8b03fae4365c6c3 /fs/gfs2/dir.c | |
parent | f7825dcf8c7301cfd3724eb40c5b443cc85ab7b8 (diff) |
[GFS2] The core of GFS2
This patch contains all the core files for GFS2.
Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/gfs2/dir.c')
-rw-r--r-- | fs/gfs2/dir.c | 2157 |
1 files changed, 2157 insertions, 0 deletions
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c new file mode 100644 index 00000000000..6b1dc3dc3a2 --- /dev/null +++ b/fs/gfs2/dir.c @@ -0,0 +1,2157 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +/* +* Implements Extendible Hashing as described in: +* "Extendible Hashing" by Fagin, et al in +* __ACM Trans. on Database Systems__, Sept 1979. +* +* +* Here's the layout of dirents which is essentially the same as that of ext2 +* within a single block. The field de_name_len is the number of bytes +* actually required for the name (no null terminator). The field de_rec_len +* is the number of bytes allocated to the dirent. The offset of the next +* dirent in the block is (dirent + dirent->de_rec_len). When a dirent is +* deleted, the preceding dirent inherits its allocated space, ie +* prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained +* by adding de_rec_len to the current dirent, this essentially causes the +* deleted dirent to get jumped over when iterating through all the dirents. +* +* When deleting the first dirent in a block, there is no previous dirent so +* the field de_ino is set to zero to designate it as deleted. When allocating +* a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the +* first dirent has (de_ino == 0) and de_rec_len is large enough, this first +* dirent is allocated. Otherwise it must go through all the 'used' dirents +* searching for one in which the amount of total space minus the amount of +* used space will provide enough space for the new dirent. +* +* There are two types of blocks in which dirents reside. In a stuffed dinode, +* the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of +* the block. In leaves, they begin at offset sizeof(struct gfs2_leaf) from the +* beginning of the leaf block. The dirents reside in leaves when +* +* dip->i_di.di_flags & GFS2_DIF_EXHASH is true +* +* Otherwise, the dirents are "linear", within a single stuffed dinode block. +* +* When the dirents are in leaves, the actual contents of the directory file are +* used as an array of 64-bit block pointers pointing to the leaf blocks. The +* dirents are NOT in the directory file itself. There can be more than one block +* pointer in the array that points to the same leaf. In fact, when a directory +* is first converted from linear to exhash, all of the pointers point to the +* same leaf. +* +* When a leaf is completely full, the size of the hash table can be +* doubled unless it is already at the maximum size which is hard coded into +* GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list, +* but never before the maximum hash table size has been reached. +*/ + +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/completion.h> +#include <linux/buffer_head.h> +#include <linux/sort.h> +#include <asm/semaphore.h> + +#include "gfs2.h" +#include "dir.h" +#include "glock.h" +#include "inode.h" +#include "jdata.h" +#include "meta_io.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" + +#define IS_LEAF 1 /* Hashed (leaf) directory */ +#define IS_DINODE 2 /* Linear (stuffed dinode block) directory */ + +#if 1 +#define gfs2_disk_hash2offset(h) (((uint64_t)(h)) >> 1) +#define gfs2_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p)) << 1)) +#else +#define gfs2_disk_hash2offset(h) (((uint64_t)(h))) +#define gfs2_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p)))) +#endif + +typedef int (*leaf_call_t) (struct gfs2_inode *dip, + uint32_t index, uint32_t len, uint64_t leaf_no, + void *data); + +/** + * int gfs2_filecmp - Compare two filenames + * @file1: The first filename + * @file2: The second filename + * @len_of_file2: The length of the second file + * + * This routine compares two filenames and returns 1 if they are equal. + * + * Returns: 1 if the files are the same, otherwise 0. + */ + +int gfs2_filecmp(struct qstr *file1, char *file2, int len_of_file2) +{ + if (file1->len != len_of_file2) + return 0; + if (memcmp(file1->name, file2, file1->len)) + return 0; + return 1; +} + +/** + * dirent_first - Return the first dirent + * @dip: the directory + * @bh: The buffer + * @dent: Pointer to list of dirents + * + * return first dirent whether bh points to leaf or stuffed dinode + * + * Returns: IS_LEAF, IS_DINODE, or -errno + */ + +static int dirent_first(struct gfs2_inode *dip, struct buffer_head *bh, + struct gfs2_dirent **dent) +{ + struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data; + + if (be16_to_cpu(h->mh_type) == GFS2_METATYPE_LF) { + if (gfs2_meta_check(dip->i_sbd, bh)) + return -EIO; + *dent = (struct gfs2_dirent *)(bh->b_data + + sizeof(struct gfs2_leaf)); + return IS_LEAF; + } else { + if (gfs2_metatype_check(dip->i_sbd, bh, GFS2_METATYPE_DI)) + return -EIO; + *dent = (struct gfs2_dirent *)(bh->b_data + + sizeof(struct gfs2_dinode)); + return IS_DINODE; + } +} + +/** + * dirent_next - Next dirent + * @dip: the directory + * @bh: The buffer + * @dent: Pointer to list of dirents + * + * Returns: 0 on success, error code otherwise + */ + +static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh, + struct gfs2_dirent **dent) +{ + struct gfs2_dirent *tmp, *cur; + char *bh_end; + uint32_t cur_rec_len; + + cur = *dent; + bh_end = bh->b_data + bh->b_size; + cur_rec_len = be32_to_cpu(cur->de_rec_len); + + if ((char *)cur + cur_rec_len >= bh_end) { + if ((char *)cur + cur_rec_len > bh_end) { + gfs2_consist_inode(dip); + return -EIO; + } + return -ENOENT; + } + + tmp = (struct gfs2_dirent *)((char *)cur + cur_rec_len); + + if ((char *)tmp + be32_to_cpu(tmp->de_rec_len) > bh_end) { + gfs2_consist_inode(dip); + return -EIO; + } + /* Only the first dent could ever have de_inum.no_addr == 0 */ + if (!tmp->de_inum.no_addr) { + gfs2_consist_inode(dip); + return -EIO; + } + + *dent = tmp; + + return 0; +} + +/** + * dirent_del - Delete a dirent + * @dip: The GFS2 inode + * @bh: The buffer + * @prev: The previous dirent + * @cur: The current dirent + * + */ + +static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh, + struct gfs2_dirent *prev, struct gfs2_dirent *cur) +{ + uint32_t cur_rec_len, prev_rec_len; + + if (!cur->de_inum.no_addr) { + gfs2_consist_inode(dip); + return; + } + + gfs2_trans_add_bh(dip->i_gl, bh); + + /* If there is no prev entry, this is the first entry in the block. + The de_rec_len is already as big as it needs to be. Just zero + out the inode number and return. */ + + if (!prev) { + cur->de_inum.no_addr = 0; /* No endianess worries */ + return; + } + + /* Combine this dentry with the previous one. */ + + prev_rec_len = be32_to_cpu(prev->de_rec_len); + cur_rec_len = be32_to_cpu(cur->de_rec_len); + + if ((char *)prev + prev_rec_len != (char *)cur) + gfs2_consist_inode(dip); + if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size) + gfs2_consist_inode(dip); + + prev_rec_len += cur_rec_len; + prev->de_rec_len = cpu_to_be32(prev_rec_len); +} + +/** + * gfs2_dirent_alloc - Allocate a directory entry + * @dip: The GFS2 inode + * @bh: The buffer + * @name_len: The length of the name + * @dent_out: Pointer to list of dirents + * + * Returns: 0 on success, error code otherwise + */ + +int gfs2_dirent_alloc(struct gfs2_inode *dip, struct buffer_head *bh, + int name_len, struct gfs2_dirent **dent_out) +{ + struct gfs2_dirent *dent, *new; + unsigned int rec_len = GFS2_DIRENT_SIZE(name_len); + unsigned int entries = 0, offset = 0; + int type; + + type = dirent_first(dip, bh, &dent); + if (type < 0) + return type; + + if (type == IS_LEAF) { + struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data; + entries = be16_to_cpu(leaf->lf_entries); + offset = sizeof(struct gfs2_leaf); + } else { + struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data; + entries = be32_to_cpu(dinode->di_entries); + offset = sizeof(struct gfs2_dinode); + } + + if (!entries) { + if (dent->de_inum.no_addr) { + gfs2_consist_inode(dip); + return -EIO; + } + + gfs2_trans_add_bh(dip->i_gl, bh); + + dent->de_rec_len = bh->b_size - offset; + dent->de_rec_len = cpu_to_be32(dent->de_rec_len); + dent->de_name_len = name_len; + + *dent_out = dent; + return 0; + } + + do { + uint32_t cur_rec_len, cur_name_len; + + cur_rec_len = be32_to_cpu(dent->de_rec_len); + cur_name_len = dent->de_name_len; + + if ((!dent->de_inum.no_addr && cur_rec_len >= rec_len) || + (cur_rec_len >= GFS2_DIRENT_SIZE(cur_name_len) + rec_len)) { + gfs2_trans_add_bh(dip->i_gl, bh); + + if (dent->de_inum.no_addr) { + new = (struct gfs2_dirent *)((char *)dent + + GFS2_DIRENT_SIZE(cur_name_len)); + memset(new, 0, sizeof(struct gfs2_dirent)); + + new->de_rec_len = cur_rec_len - GFS2_DIRENT_SIZE(cur_name_len); + new->de_rec_len = cpu_to_be32(new->de_rec_len); + new->de_name_len = name_len; + + dent->de_rec_len = cur_rec_len - be32_to_cpu(new->de_rec_len); + dent->de_rec_len = cpu_to_be32(dent->de_rec_len); + + *dent_out = new; + return 0; + } + + dent->de_name_len = name_len; + + *dent_out = dent; + return 0; + } + } while (dirent_next(dip, bh, &dent) == 0); + + return -ENOSPC; +} + +/** + * dirent_fits - See if we can fit a entry in this buffer + * @dip: The GFS2 inode + * @bh: The buffer + * @name_len: The length of the name + * + * Returns: 1 if it can fit, 0 otherwise + */ + +static int dirent_fits(struct gfs2_inode *dip, struct buffer_head *bh, + int name_len) +{ + struct gfs2_dirent *dent; + unsigned int rec_len = GFS2_DIRENT_SIZE(name_len); + unsigned int entries = 0; + int type; + + type = dirent_first(dip, bh, &dent); + if (type < 0) + return type; + + if (type == IS_LEAF) { + struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data; + entries = be16_to_cpu(leaf->lf_entries); + } else { + struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data; + entries = be32_to_cpu(dinode->di_entries); + } + + if (!entries) + return 1; + + do { + uint32_t cur_rec_len, cur_name_len; + + cur_rec_len = be32_to_cpu(dent->de_rec_len); + cur_name_len = dent->de_name_len; + + if ((!dent->de_inum.no_addr && cur_rec_len >= rec_len) || + (cur_rec_len >= GFS2_DIRENT_SIZE(cur_name_len) + rec_len)) + return 1; + } while (dirent_next(dip, bh, &dent) == 0); + + return 0; +} + +static int leaf_search(struct gfs2_inode *dip, struct buffer_head *bh, + struct qstr *filename, struct gfs2_dirent **dent_out, + struct gfs2_dirent **dent_prev) +{ + uint32_t hash; + struct gfs2_dirent *dent, *prev = NULL; + unsigned int entries = 0; + int type; + + type = dirent_first(dip, bh, &dent); + if (type < 0) + return type; + + if (type == IS_LEAF) { + struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data; + entries = be16_to_cpu(leaf->lf_entries); + } else if (type == IS_DINODE) { + struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data; + entries = be32_to_cpu(dinode->di_entries); + } + + hash = gfs2_disk_hash(filename->name, filename->len); + + do { + if (!dent->de_inum.no_addr) { + prev = dent; + continue; + } + + if (be32_to_cpu(dent->de_hash) == hash && + gfs2_filecmp(filename, (char *)(dent + 1), + dent->de_name_len)) { + *dent_out = dent; + if (dent_prev) + *dent_prev = prev; + + return 0; + } + + prev = dent; + } while (dirent_next(dip, bh, &dent) == 0); + + return -ENOENT; +} + +static int get_leaf(struct gfs2_inode *dip, uint64_t leaf_no, + struct buffer_head **bhp) +{ + int error; + + error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_START | DIO_WAIT, bhp); + if (!error && gfs2_metatype_check(dip->i_sbd, *bhp, GFS2_METATYPE_LF)) + error = -EIO; + + return error; +} + +/** + * get_leaf_nr - Get a leaf number associated with the index + * @dip: The GFS2 inode + * @index: + * @leaf_out: + * + * Returns: 0 on success, error code otherwise + */ + +static int get_leaf_nr(struct gfs2_inode *dip, uint32_t index, + uint64_t *leaf_out) +{ + uint64_t leaf_no; + int error; + + error = gfs2_jdata_read_mem(dip, (char *)&leaf_no, + index * sizeof(uint64_t), + sizeof(uint64_t)); + if (error != sizeof(uint64_t)) + return (error < 0) ? error : -EIO; + + *leaf_out = be64_to_cpu(leaf_no); + + return 0; +} + +static int get_first_leaf(struct gfs2_inode *dip, uint32_t index, + struct buffer_head **bh_out) +{ + uint64_t leaf_no; + int error; + + error = get_leaf_nr(dip, index, &leaf_no); + if (!error) + error = get_leaf(dip, leaf_no, bh_out); + + return error; +} + +static int get_next_leaf(struct gfs2_inode *dip, struct buffer_head *bh_in, + struct buffer_head **bh_out) +{ + struct gfs2_leaf *leaf; + int error; + + leaf = (struct gfs2_leaf *)bh_in->b_data; + + if (!leaf->lf_next) + error = -ENOENT; + else + error = get_leaf(dip, be64_to_cpu(leaf->lf_next), bh_out); + + return error; +} + +static int linked_leaf_search(struct gfs2_inode *dip, struct qstr *filename, + struct gfs2_dirent **dent_out, + struct gfs2_dirent **dent_prev, + struct buffer_head **bh_out) +{ + struct buffer_head *bh = NULL, *bh_next; + uint32_t hsize, index; + uint32_t hash; + int error; + + hsize = 1 << dip->i_di.di_depth; + if (hsize * sizeof(uint64_t) != dip->i_di.di_size) { + gfs2_consist_inode(dip); + return -EIO; + } + + /* Figure out the address of the leaf node. */ + + hash = gfs2_disk_hash(filename->name, filename->len); + index = hash >> (32 - dip->i_di.di_depth); + + error = get_first_leaf(dip, index, &bh_next); + if (error) + return error; + + /* Find the entry */ + + do { + brelse(bh); + + bh = bh_next; + + error = leaf_search(dip, bh, filename, dent_out, dent_prev); + switch (error) { + case 0: + *bh_out = bh; + return 0; + + case -ENOENT: + break; + + default: + brelse(bh); + return error; + } + + error = get_next_leaf(dip, bh, &bh_next); + } + while (!error); + + brelse(bh); + + return error; +} + +/** + * dir_make_exhash - Convert a stuffed directory into an ExHash directory + * @dip: The GFS2 inode + * + * Returns: 0 on success, error code otherwise + */ + +static int dir_make_exhash(struct gfs2_inode *dip) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_dirent *dent; + struct buffer_head *bh, *dibh; + struct gfs2_leaf *leaf; + int y; + uint32_t x; + uint64_t *lp, bn; + int error; + + error = gfs2_meta_inode_buffer(dip, &dibh); + if (error) + return error; + + /* Allocate a new block for the first leaf node */ + + bn = gfs2_alloc_meta(dip); + + /* Turn over a new leaf */ + + bh = gfs2_meta_new(dip->i_gl, bn); + gfs2_trans_add_bh(dip->i_gl, bh); + gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); + gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); + + /* Fill in the leaf structure */ + + leaf = (struct gfs2_leaf *)bh->b_data; + + gfs2_assert(sdp, dip->i_di.di_entries < (1 << 16)); + + leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE); + leaf->lf_entries = cpu_to_be16(dip->i_di.di_entries); + + /* Copy dirents */ + + gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh, + sizeof(struct gfs2_dinode)); + + /* Find last entry */ + + x = 0; + dirent_first(dip, bh, &dent); + + do { + if (!dent->de_inum.no_addr) + continue; + if (++x == dip->i_di.di_entries) + break; + } + while (dirent_next(dip, bh, &dent) == 0); + + /* Adjust the last dirent's record length + (Remember that dent still points to the last entry.) */ + + dent->de_rec_len = be32_to_cpu(dent->de_rec_len) + + sizeof(struct gfs2_dinode) - + sizeof(struct gfs2_leaf); + dent->de_rec_len = cpu_to_be32(dent->de_rec_len); + + brelse(bh); + + /* We're done with the new leaf block, now setup the new + hash table. */ + + gfs2_trans_add_bh(dip->i_gl, dibh); + gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); + + lp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode)); + + for (x = sdp->sd_hash_ptrs; x--; lp++) + *lp = cpu_to_be64(bn); + + dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2; + dip->i_di.di_blocks++; + dip->i_di.di_flags |= GFS2_DIF_EXHASH; + dip->i_di.di_payload_format = 0; + + for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; + dip->i_di.di_depth = y; + + gfs2_dinode_out(&dip->i_di, dibh->b_data); + + brelse(dibh); + + return 0; +} + +/** + * dir_split_leaf - Split a leaf block into two + * @dip: The GFS2 inode + * @index: + * @leaf_no: + * + * Returns: 0 on success, error code on failure + */ + +static int dir_split_leaf(struct gfs2_inode *dip, uint32_t index, + uint64_t leaf_no) +{ + struct buffer_head *nbh, *obh, *dibh; + struct gfs2_leaf *nleaf, *oleaf; + struct gfs2_dirent *dent, *prev = NULL, *next = NULL, *new; + uint32_t start, len, half_len, divider; + uint64_t bn, *lp; + uint32_t name_len; + int x, moved = 0; + int error; + + /* Allocate the new leaf block */ + + bn = gfs2_alloc_meta(dip); + + /* Get the new leaf block */ + + nbh = gfs2_meta_new(dip->i_gl, bn); + gfs2_trans_add_bh(dip->i_gl, nbh); + gfs2_metatype_set(nbh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); + gfs2_buffer_clear_tail(nbh, sizeof(struct gfs2_meta_header)); + + nleaf = (struct gfs2_leaf *)nbh->b_data; + + nleaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE); + + /* Get the old leaf block */ + + error = get_leaf(dip, leaf_no, &obh); + if (error) + goto fail; + + gfs2_trans_add_bh(dip->i_gl, obh); + + oleaf = (struct gfs2_leaf *)obh->b_data; + + /* Compute the start and len of leaf pointers in the hash table. */ + + len = 1 << (dip->i_di.di_depth - be16_to_cpu(oleaf->lf_depth)); + half_len = len >> 1; + if (!half_len) { + gfs2_consist_inode(dip); + error = -EIO; + goto fail_brelse; + } + + start = (index & ~(len - 1)); + + /* Change the pointers. + Don't bother distinguishing stuffed from non-stuffed. + This code is complicated enough already. */ + + lp = kcalloc(half_len, sizeof(uint64_t), GFP_KERNEL | __GFP_NOFAIL); + + error = gfs2_jdata_read_mem(dip, (char *)lp, start * sizeof(uint64_t), + half_len * sizeof(uint64_t)); + if (error != half_len * sizeof(uint64_t)) { + if (error >= 0) + error = -EIO; + goto fail_lpfree; + } + + /* Change the pointers */ + + for (x = 0; x < half_len; x++) + lp[x] = cpu_to_be64(bn); + + error = gfs2_jdata_write_mem(dip, (char *)lp, start * sizeof(uint64_t), + half_len * sizeof(uint64_t)); + if (error != half_len * sizeof(uint64_t)) { + if (error >= 0) + error = -EIO; + goto fail_lpfree; + } + + kfree(lp); + + /* Compute the divider */ + + divider = (start + half_len) << (32 - dip->i_di.di_depth); + + /* Copy the entries */ + + dirent_first(dip, obh, &dent); + + do { + next = dent; + if (dirent_next(dip, obh, &next)) + next = NULL; + + if (dent->de_inum.no_addr && + be32_to_cpu(dent->de_hash) < divider) { + name_len = dent->de_name_len; + + gfs2_dirent_alloc(dip, nbh, name_len, &new); + + new->de_inum = dent->de_inum; /* No endian worries */ + new->de_hash = dent->de_hash; /* No endian worries */ + new->de_type = dent->de_type; /* No endian worries */ + memcpy((char *)(new + 1), (char *)(dent + 1), + name_len); + + nleaf->lf_entries = be16_to_cpu(nleaf->lf_entries)+1; + nleaf->lf_entries = cpu_to_be16(nleaf->lf_entries); + + dirent_del(dip, obh, prev, dent); + + if (!oleaf->lf_entries) + gfs2_consist_inode(dip); + oleaf->lf_entries = be16_to_cpu(oleaf->lf_entries)-1; + oleaf->lf_entries = cpu_to_be16(oleaf->lf_entries); + + if (!prev) + prev = dent; + + moved = 1; + } else + prev = dent; + + dent = next; + } + while (dent); + + /* If none of the entries got moved into the new leaf, + artificially fill in the first entry. */ + + if (!moved) { + gfs2_dirent_alloc(dip, nbh, 0, &new); + new->de_inum.no_addr = 0; + } + + oleaf->lf_depth = be16_to_cpu(oleaf->lf_depth) + 1; + oleaf->lf_depth = cpu_to_be16(oleaf->lf_depth); + nleaf->lf_depth = oleaf->lf_depth; + + error = gfs2_meta_inode_buffer(dip, &dibh); + if (!gfs2_assert_withdraw(dip->i_sbd, !error)) { + dip->i_di.di_blocks++; + gfs2_dinode_out(&dip->i_di, dibh->b_data); + brelse(dibh); + } + + brelse(obh); + brelse(nbh); + + return error; + + fail_lpfree: + kfree(lp); + + fail_brelse: + brelse(obh); + + fail: + brelse(nbh); + return error; +} + +/** + * dir_double_exhash - Double size of ExHash table + * @dip: The GFS2 dinode + * + * Returns: 0 on success, error code on failure + */ + +static int dir_double_exhash(struct gfs2_inode *dip) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + struct buffer_head *dibh; + uint32_t hsize; + uint64_t *buf; + uint64_t *from, *to; + uint64_t block; + int x; + int error = 0; + + hsize = 1 << dip->i_di.di_depth; + if (hsize * sizeof(uint64_t) != dip->i_di.di_size) { + gfs2_consist_inode(dip); + return -EIO; + } + + /* Allocate both the "from" and "to" buffers in one big chunk */ + + buf = kcalloc(3, sdp->sd_hash_bsize, GFP_KERNEL | __GFP_NOFAIL); + + for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) { + error = gfs2_jdata_read_mem(dip, (char *)buf, + block * sdp->sd_hash_bsize, + sdp->sd_hash_bsize); + if (error != sdp->sd_hash_bsize) { + if (error >= 0) + error = -EIO; + goto fail; + } + + from = buf; + to = (uint64_t *)((char *)buf + sdp->sd_hash_bsize); + + for (x = sdp->sd_hash_ptrs; x--; from++) { + *to++ = *from; /* No endianess worries */ + *to++ = *from; + } + + error = gfs2_jdata_write_mem(dip, + (char *)buf + sdp->sd_hash_bsize, + block * sdp->sd_sb.sb_bsize, + sdp->sd_sb.sb_bsize); + if (error != sdp->sd_sb.sb_bsize) { + if (error >= 0) + error = -EIO; + goto fail; + } + } + + kfree(buf); + + error = gfs2_meta_inode_buffer(dip, &dibh); + if (!gfs2_assert_withdraw(sdp, !error)) { + dip->i_di.di_depth++; + gfs2_dinode_out(&dip->i_di, dibh->b_data); + brelse(dibh); + } + + return error; + + fail: + kfree(buf); + + return error; +} + +/** + * compare_dents - compare directory entries by hash value + * @a: first dent + * @b: second dent + * + * When comparing the hash entries of @a to @b: + * gt: returns 1 + * lt: returns -1 + * eq: returns 0 + */ + +static int compare_dents(const void *a, const void *b) +{ + struct gfs2_dirent *dent_a, *dent_b; + uint32_t hash_a, hash_b; + int ret = 0; + + dent_a = *(struct gfs2_dirent **)a; + hash_a = dent_a->de_hash; + hash_a = be32_to_cpu(hash_a); + + dent_b = *(struct gfs2_dirent **)b; + hash_b = dent_b->de_hash; + hash_b = be32_to_cpu(hash_b); + + if (hash_a > hash_b) + ret = 1; + else if (hash_a < hash_b) + ret = -1; + else { + unsigned int len_a = dent_a->de_name_len; + unsigned int len_b = dent_b->de_name_len; + + if (len_a > len_b) + ret = 1; + else if (len_a < len_b) + ret = -1; + else + ret = memcmp((char *)(dent_a + 1), + (char *)(dent_b + 1), + len_a); + } + + return ret; +} + +/** + * do_filldir_main - read out directory entries + * @dip: The GFS2 inode + * @offset: The offset in the file to read from + * @opaque: opaque data to pass to filldir + * @filldir: The function to pass entries to + * @darr: an array of struct gfs2_dirent pointers to read + * @entries: the number of entries in darr + * @copied: pointer to int that's non-zero if a entry has been copied out + * + * Jump through some hoops to make sure that if there are hash collsions, + * they are read out at the beginning of a buffer. We want to minimize + * the possibility that they will fall into different readdir buffers or + * that someone will want to seek to that location. + * + * Returns: errno, >0 on exception from filldir + */ + +static int do_filldir_main(struct gfs2_inode *dip, uint64_t *offset, + void *opaque, gfs2_filldir_t filldir, + struct gfs2_dirent **darr, uint32_t entries, + int *copied) +{ + struct gfs2_dirent *dent, *dent_next; + struct gfs2_inum inum; + uint64_t off, off_next; + unsigned int x, y; + int run = 0; + int error = 0; + + sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL); + + dent_next = darr[0]; + off_next = be32_to_cpu(dent_next->de_hash); + off_next = gfs2_disk_hash2offset(off_next); + + for (x = 0, y = 1; x < entries; x++, y++) { + dent = dent_next; + off = off_next; + + if (y < entries) { + dent_next = darr[y]; + off_next = be32_to_cpu(dent_next->de_hash); + off_next = gfs2_disk_hash2offset(off_next); + + if (off < *offset) + continue; + *offset = off; + + if (off_next == off) { + if (*copied && !run) + return 1; + run = 1; + } else + run = 0; + } else { + if (off < *offset) + continue; + *offset = off; + } + + gfs2_inum_in(&inum, (char *)&dent->de_inum); + + error = filldir(opaque, (char *)(dent + 1), + dent->de_name_len, + off, &inum, + dent->de_type); + if (error) + return 1; + + *copied = 1; + } + + /* Increment the *offset by one, so the next time we come into the + do_filldir fxn, we get the next entry instead of the last one in the + current leaf */ + + (*offset)++; + + return 0; +} + +/** + * do_filldir_single - Read directory entries out of a single block + * @dip: The GFS2 inode + * @offset: The offset in the file to read from + * @opaque: opaque data to pass to filldir + * @filldir: The function to pass entries to + * @bh: the block + * @entries: the number of entries in the block + * @copied: pointer to int that's non-zero if a entry has been copied out + * + * Returns: errno, >0 on exception from filldir + */ + +static int do_filldir_single(struct gfs2_inode *dip, uint64_t *offset, + void *opaque, gfs2_filldir_t filldir, + struct buffer_head *bh, uint32_t entries, + int *copied) +{ + struct gfs2_dirent **darr; + struct gfs2_dirent *de; + unsigned int e = 0; + int error; + + if (!entries) + return 0; + + darr = kcalloc(entries, sizeof(struct gfs2_dirent *), GFP_KERNEL); + if (!darr) + return -ENOMEM; + + dirent_first(dip, bh, &de); + do { + if (!de->de_inum.no_addr) + continue; + if (e >= entries) { + gfs2_consist_inode(dip); + error = -EIO; + goto out; + } + darr[e++] = de; + } + while (dirent_next(dip, bh, &de) == 0); + + if (e != entries) { + gfs2_consist_inode(dip); + error = -EIO; + goto out; + } + + error = do_filldir_main(dip, offset, opaque, filldir, darr, + entries, copied); + + out: + kfree(darr); + + return error; +} + +/** + * do_filldir_multi - Read directory entries out of a linked leaf list + * @dip: The GFS2 inode + * @offset: The offset in the file to read from + * @opaque: opaque data to pass to filldir + * @filldir: The function to pass entries to + * @bh: the first leaf in the list + * @copied: pointer to int that's non-zero if a entry has been copied out + * + * Returns: errno, >0 on exception from filldir + */ + +static int do_filldir_multi(struct gfs2_inode *dip, uint64_t *offset, + void *opaque, gfs2_filldir_t filldir, + struct buffer_head *bh, int *copied) +{ + struct buffer_head **larr = NULL; + struct gfs2_dirent **darr; + struct gfs2_leaf *leaf; + struct buffer_head *tmp_bh; + struct gfs2_dirent *de; + unsigned int entries, e = 0; + unsigned int leaves = 0, l = 0; + unsigned int x; + uint64_t ln; + int error = 0; + + /* Count leaves and entries */ + + leaf = (struct gfs2_leaf *)bh->b_data; + entries = be16_to_cpu(leaf->lf_entries); + ln = leaf->lf_next; + + while (ln) { + ln = be64_to_cpu(ln); + + error = get_leaf(dip, ln, &tmp_bh); + if (error) + return error; + + leaf = (struct gfs2_leaf *)tmp_bh->b_data; + if (leaf->lf_entries) { + entries += be16_to_cpu(leaf->lf_entries); + leaves++; + } + ln = leaf->lf_next; + + brelse(tmp_bh); + } + + if (!entries) + return 0; + + if (leaves) { + larr = kcalloc(leaves, sizeof(struct buffer_head *),GFP_KERNEL); + if (!larr) + return -ENOMEM; + } + + darr = kcalloc(entries, sizeof(struct gfs2_dirent *), GFP_KERNEL); + if (!darr) { + kfree(larr); + return -ENOMEM; + } + + leaf = (struct gfs2_leaf *)bh->b_data; + if (leaf->lf_entries) { + dirent_first(dip, bh, &de); + do { + if (!de->de_inum.no_addr) + continue; + if (e >= entries) { + gfs2_consist_inode(dip); + error = -EIO; + goto out; + } + darr[e++] = de; + } + while (dirent_next(dip, bh, &de) == 0); + } + ln = leaf->lf_next; + + while (ln) { + ln = be64_to_cpu(ln); + + error = get_leaf(dip, ln, &tmp_bh); + if (error) + goto out; + + leaf = (struct gfs2_leaf *)tmp_bh->b_data; + if (leaf->lf_entries) { + dirent_first(dip, tmp_bh, &de); + do { + if (!de->de_inum.no_addr) + continue; + if (e >= entries) { + gfs2_consist_inode(dip); + error = -EIO; + goto out; + } + darr[e++] = de; + } + while (dirent_next(dip, tmp_bh, &de) == 0); + + larr[l++] = tmp_bh; + + ln = leaf->lf_next; + } else { + ln = leaf->lf_next; + brelse(tmp_bh); + } + } + + if (gfs2_assert_withdraw(dip->i_sbd, l == leaves)) { + error = -EIO; + goto out; + } + if (e != entries) { + gfs2_consist_inode(dip); + error = -EIO; + goto out; + } + + error = do_filldir_main(dip, offset, opaque, filldir, darr, + entries, copied); + + out: + kfree(darr); + for (x = 0; x < l; x++) + brelse(larr[x]); + kfree(larr); + + return error; +} + +/** + * dir_e_search - Search exhash (leaf) dir for inode matching name + * @dip: The GFS2 inode + * @filename: Filename string + * @inode: If non-NULL, function fills with formal inode # and block address + * @type: If non-NULL, function fills with DT_... dinode type + * + * Returns: + */ + +static int dir_e_search(struct gfs2_inode *dip, struct qstr *filename, + struct gfs2_inum *inum, unsigned int *type) +{ + struct buffer_head *bh; + struct gfs2_dirent *dent; + int error; + + error = linked_leaf_search(dip, filename, &dent, NULL, &bh); + if (error) + return error; + + if (inum) + gfs2_inum_in(inum, (char *)&dent->de_inum); + if (type) + *type = dent->de_type; + + brelse(bh); + + return 0; +} + +static int dir_e_add(struct gfs2_inode *dip, struct qstr *filename, + struct gfs2_inum *inum, unsigned int type) +{ + struct buffer_head *bh, *nbh, *dibh; + struct gfs2_leaf *leaf, *nleaf; + struct gfs2_dirent *dent; + uint32_t hsize, index; + uint32_t hash; + uint64_t leaf_no, bn; + int error; + + restart: + hsize = 1 << dip->i_di.di_depth; + if (hsize * sizeof(uint64_t) != dip->i_di.di_size) { + gfs2_consist_inode(dip); + return -EIO; + } + + /* |