diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/reiserfs/stree.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'fs/reiserfs/stree.c')
-rw-r--r-- | fs/reiserfs/stree.c | 2073 |
1 files changed, 2073 insertions, 0 deletions
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c new file mode 100644 index 00000000000..73ec5212178 --- /dev/null +++ b/fs/reiserfs/stree.c @@ -0,0 +1,2073 @@ +/* + * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README + */ + +/* + * Written by Anatoly P. Pinchuk pap@namesys.botik.ru + * Programm System Institute + * Pereslavl-Zalessky Russia + */ + +/* + * This file contains functions dealing with S+tree + * + * B_IS_IN_TREE + * copy_item_head + * comp_short_keys + * comp_keys + * comp_short_le_keys + * le_key2cpu_key + * comp_le_keys + * bin_search + * get_lkey + * get_rkey + * key_in_buffer + * decrement_bcount + * decrement_counters_in_path + * reiserfs_check_path + * pathrelse_and_restore + * pathrelse + * search_by_key_reada + * search_by_key + * search_for_position_by_key + * comp_items + * prepare_for_direct_item + * prepare_for_direntry_item + * prepare_for_delete_or_cut + * calc_deleted_bytes_number + * init_tb_struct + * padd_item + * reiserfs_delete_item + * reiserfs_delete_solid_item + * reiserfs_delete_object + * maybe_indirect_to_direct + * indirect_to_direct_roll_back + * reiserfs_cut_from_item + * truncate_directory + * reiserfs_do_truncate + * reiserfs_paste_into_item + * reiserfs_insert_item + */ + +#include <linux/config.h> +#include <linux/time.h> +#include <linux/string.h> +#include <linux/pagemap.h> +#include <linux/reiserfs_fs.h> +#include <linux/smp_lock.h> +#include <linux/buffer_head.h> +#include <linux/quotaops.h> + +/* Does the buffer contain a disk block which is in the tree. */ +inline int B_IS_IN_TREE (const struct buffer_head * p_s_bh) +{ + + RFALSE( B_LEVEL (p_s_bh) > MAX_HEIGHT, + "PAP-1010: block (%b) has too big level (%z)", p_s_bh, p_s_bh); + + return ( B_LEVEL (p_s_bh) != FREE_LEVEL ); +} + +// +// to gets item head in le form +// +inline void copy_item_head(struct item_head * p_v_to, + const struct item_head * p_v_from) +{ + memcpy (p_v_to, p_v_from, IH_SIZE); +} + + +/* k1 is pointer to on-disk structure which is stored in little-endian + form. k2 is pointer to cpu variable. For key of items of the same + object this returns 0. + Returns: -1 if key1 < key2 + 0 if key1 == key2 + 1 if key1 > key2 */ +inline int comp_short_keys (const struct reiserfs_key * le_key, + const struct cpu_key * cpu_key) +{ + __u32 * p_s_le_u32, * p_s_cpu_u32; + int n_key_length = REISERFS_SHORT_KEY_LEN; + + p_s_le_u32 = (__u32 *)le_key; + p_s_cpu_u32 = (__u32 *)&cpu_key->on_disk_key; + for( ; n_key_length--; ++p_s_le_u32, ++p_s_cpu_u32 ) { + if ( le32_to_cpu (*p_s_le_u32) < *p_s_cpu_u32 ) + return -1; + if ( le32_to_cpu (*p_s_le_u32) > *p_s_cpu_u32 ) + return 1; + } + + return 0; +} + + +/* k1 is pointer to on-disk structure which is stored in little-endian + form. k2 is pointer to cpu variable. + Compare keys using all 4 key fields. + Returns: -1 if key1 < key2 0 + if key1 = key2 1 if key1 > key2 */ +static inline int comp_keys (const struct reiserfs_key * le_key, const struct cpu_key * cpu_key) +{ + int retval; + + retval = comp_short_keys (le_key, cpu_key); + if (retval) + return retval; + if (le_key_k_offset (le_key_version(le_key), le_key) < cpu_key_k_offset (cpu_key)) + return -1; + if (le_key_k_offset (le_key_version(le_key), le_key) > cpu_key_k_offset (cpu_key)) + return 1; + + if (cpu_key->key_length == 3) + return 0; + + /* this part is needed only when tail conversion is in progress */ + if (le_key_k_type (le_key_version(le_key), le_key) < cpu_key_k_type (cpu_key)) + return -1; + + if (le_key_k_type (le_key_version(le_key), le_key) > cpu_key_k_type (cpu_key)) + return 1; + + return 0; +} + + +inline int comp_short_le_keys (const struct reiserfs_key * key1, const struct reiserfs_key * key2) +{ + __u32 * p_s_1_u32, * p_s_2_u32; + int n_key_length = REISERFS_SHORT_KEY_LEN; + + p_s_1_u32 = (__u32 *)key1; + p_s_2_u32 = (__u32 *)key2; + for( ; n_key_length--; ++p_s_1_u32, ++p_s_2_u32 ) { + if ( le32_to_cpu (*p_s_1_u32) < le32_to_cpu (*p_s_2_u32) ) + return -1; + if ( le32_to_cpu (*p_s_1_u32) > le32_to_cpu (*p_s_2_u32) ) + return 1; + } + return 0; +} + +inline void le_key2cpu_key (struct cpu_key * to, const struct reiserfs_key * from) +{ + to->on_disk_key.k_dir_id = le32_to_cpu (from->k_dir_id); + to->on_disk_key.k_objectid = le32_to_cpu (from->k_objectid); + + // find out version of the key + to->version = le_key_version (from); + if (to->version == KEY_FORMAT_3_5) { + to->on_disk_key.u.k_offset_v1.k_offset = le32_to_cpu (from->u.k_offset_v1.k_offset); + to->on_disk_key.u.k_offset_v1.k_uniqueness = le32_to_cpu (from->u.k_offset_v1.k_uniqueness); + } else { + to->on_disk_key.u.k_offset_v2.k_offset = offset_v2_k_offset(&from->u.k_offset_v2); + to->on_disk_key.u.k_offset_v2.k_type = offset_v2_k_type(&from->u.k_offset_v2); + } +} + + + +// this does not say which one is bigger, it only returns 1 if keys +// are not equal, 0 otherwise +inline int comp_le_keys (const struct reiserfs_key * k1, const struct reiserfs_key * k2) +{ + return memcmp (k1, k2, sizeof (struct reiserfs_key)); +} + +/************************************************************************** + * Binary search toolkit function * + * Search for an item in the array by the item key * + * Returns: 1 if found, 0 if not found; * + * *p_n_pos = number of the searched element if found, else the * + * number of the first element that is larger than p_v_key. * + **************************************************************************/ +/* For those not familiar with binary search: n_lbound is the leftmost item that it + could be, n_rbound the rightmost item that it could be. We examine the item + halfway between n_lbound and n_rbound, and that tells us either that we can increase + n_lbound, or decrease n_rbound, or that we have found it, or if n_lbound <= n_rbound that + there are no possible items, and we have not found it. With each examination we + cut the number of possible items it could be by one more than half rounded down, + or we find it. */ +static inline int bin_search ( + const void * p_v_key, /* Key to search for. */ + const void * p_v_base,/* First item in the array. */ + int p_n_num, /* Number of items in the array. */ + int p_n_width, /* Item size in the array. + searched. Lest the reader be + confused, note that this is crafted + as a general function, and when it + is applied specifically to the array + of item headers in a node, p_n_width + is actually the item header size not + the item size. */ + int * p_n_pos /* Number of the searched for element. */ + ) { + int n_rbound, n_lbound, n_j; + + for ( n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0))/2; n_lbound <= n_rbound; n_j = (n_rbound + n_lbound)/2 ) + switch( comp_keys((struct reiserfs_key *)((char * )p_v_base + n_j * p_n_width), (struct cpu_key *)p_v_key) ) { + case -1: n_lbound = n_j + 1; continue; + case 1: n_rbound = n_j - 1; continue; + case 0: *p_n_pos = n_j; return ITEM_FOUND; /* Key found in the array. */ + } + + /* bin_search did not find given key, it returns position of key, + that is minimal and greater than the given one. */ + *p_n_pos = n_lbound; + return ITEM_NOT_FOUND; +} + +#ifdef CONFIG_REISERFS_CHECK +extern struct tree_balance * cur_tb; +#endif + + + +/* Minimal possible key. It is never in the tree. */ +const struct reiserfs_key MIN_KEY = {0, 0, {{0, 0},}}; + +/* Maximal possible key. It is never in the tree. */ +const struct reiserfs_key MAX_KEY = {0xffffffff, 0xffffffff, {{0xffffffff, 0xffffffff},}}; + + +/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom + of the path, and going upwards. We must check the path's validity at each step. If the key is not in + the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this + case we return a special key, either MIN_KEY or MAX_KEY. */ +static inline const struct reiserfs_key * get_lkey ( + const struct path * p_s_chk_path, + const struct super_block * p_s_sb + ) { + int n_position, n_path_offset = p_s_chk_path->path_length; + struct buffer_head * p_s_parent; + + RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET, + "PAP-5010: invalid offset in the path"); + + /* While not higher in path than first element. */ + while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) { + + RFALSE( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)), + "PAP-5020: parent is not uptodate"); + + /* Parent at the path is not in the tree now. */ + if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) ) + return &MAX_KEY; + /* Check whether position in the parent is correct. */ + if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) ) + return &MAX_KEY; + /* Check whether parent at the path really points to the child. */ + if ( B_N_CHILD_NUM(p_s_parent, n_position) != + PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr ) + return &MAX_KEY; + /* Return delimiting key if position in the parent is not equal to zero. */ + if ( n_position ) + return B_N_PDELIM_KEY(p_s_parent, n_position - 1); + } + /* Return MIN_KEY if we are in the root of the buffer tree. */ + if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == + SB_ROOT_BLOCK (p_s_sb) ) + return &MIN_KEY; + return &MAX_KEY; +} + + +/* Get delimiting key of the buffer at the path and its right neighbor. */ +inline const struct reiserfs_key * get_rkey ( + const struct path * p_s_chk_path, + const struct super_block * p_s_sb + ) { + int n_position, + n_path_offset = p_s_chk_path->path_length; + struct buffer_head * p_s_parent; + + RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET, + "PAP-5030: invalid offset in the path"); + + while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) { + + RFALSE( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)), + "PAP-5040: parent is not uptodate"); + + /* Parent at the path is not in the tree now. */ + if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) ) + return &MIN_KEY; + /* Check whether position in the parent is correct. */ + if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) ) + return &MIN_KEY; + /* Check whether parent at the path really points to the child. */ + if ( B_N_CHILD_NUM(p_s_parent, n_position) != + PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr ) + return &MIN_KEY; + /* Return delimiting key if position in the parent is not the last one. */ + if ( n_position != B_NR_ITEMS(p_s_parent) ) + return B_N_PDELIM_KEY(p_s_parent, n_position); + } + /* Return MAX_KEY if we are in the root of the buffer tree. */ + if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == + SB_ROOT_BLOCK (p_s_sb) ) + return &MAX_KEY; + return &MIN_KEY; +} + + +/* Check whether a key is contained in the tree rooted from a buffer at a path. */ +/* This works by looking at the left and right delimiting keys for the buffer in the last path_element in + the path. These delimiting keys are stored at least one level above that buffer in the tree. If the + buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in + this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */ +static inline int key_in_buffer ( + struct path * p_s_chk_path, /* Path which should be checked. */ + const struct cpu_key * p_s_key, /* Key which should be checked. */ + struct super_block * p_s_sb /* Super block pointer. */ + ) { + + RFALSE( ! p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET || + p_s_chk_path->path_length > MAX_HEIGHT, + "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)", + p_s_key, p_s_chk_path->path_length); + RFALSE( !PATH_PLAST_BUFFER(p_s_chk_path)->b_bdev, + "PAP-5060: device must not be NODEV"); + + if ( comp_keys(get_lkey(p_s_chk_path, p_s_sb), p_s_key) == 1 ) + /* left delimiting key is bigger, that the key we look for */ + return 0; + // if ( comp_keys(p_s_key, get_rkey(p_s_chk_path, p_s_sb)) != -1 ) + if ( comp_keys(get_rkey(p_s_chk_path, p_s_sb), p_s_key) != 1 ) + /* p_s_key must be less than right delimitiing key */ + return 0; + return 1; +} + + +inline void decrement_bcount( + struct buffer_head * p_s_bh + ) { + if ( p_s_bh ) { + if ( atomic_read (&(p_s_bh->b_count)) ) { + put_bh(p_s_bh) ; + return; + } + reiserfs_panic(NULL, "PAP-5070: decrement_bcount: trying to free free buffer %b", p_s_bh); + } +} + + +/* Decrement b_count field of the all buffers in the path. */ +void decrement_counters_in_path ( + struct path * p_s_search_path + ) { + int n_path_offset = p_s_search_path->path_length; + + RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET || + n_path_offset > EXTENDED_MAX_HEIGHT - 1, + "PAP-5080: invalid path offset of %d", n_path_offset); + + while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) { + struct buffer_head * bh; + + bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--); + decrement_bcount (bh); + } + p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; +} + + +int reiserfs_check_path(struct path *p) { + RFALSE( p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET, + "path not properly relsed") ; + return 0 ; +} + + +/* Release all buffers in the path. Restore dirty bits clean +** when preparing the buffer for the log +** +** only called from fix_nodes() +*/ +void pathrelse_and_restore ( + struct super_block *s, + struct path * p_s_search_path + ) { + int n_path_offset = p_s_search_path->path_length; + + RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, + "clm-4000: invalid path offset"); + + while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) { + reiserfs_restore_prepared_buffer(s, PATH_OFFSET_PBUFFER(p_s_search_path, + n_path_offset)); + brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--)); + } + p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; +} + +/* Release all buffers in the path. */ +void pathrelse ( + struct path * p_s_search_path + ) { + int n_path_offset = p_s_search_path->path_length; + + RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, + "PAP-5090: invalid path offset"); + + while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) + brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--)); + + p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; +} + + + +static int is_leaf (char * buf, int blocksize, struct buffer_head * bh) +{ + struct block_head * blkh; + struct item_head * ih; + int used_space; + int prev_location; + int i; + int nr; + + blkh = (struct block_head *)buf; + if ( blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) { + reiserfs_warning (NULL, "is_leaf: this should be caught earlier"); + return 0; + } + + nr = blkh_nr_item(blkh); + if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) { + /* item number is too big or too small */ + reiserfs_warning (NULL, "is_leaf: nr_item seems wrong: %z", bh); + return 0; + } + ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; + used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location (ih)); + if (used_space != blocksize - blkh_free_space(blkh)) { + /* free space does not match to calculated amount of use space */ + reiserfs_warning (NULL, "is_leaf: free space seems wrong: %z", bh); + return 0; + } + + // FIXME: it is_leaf will hit performance too much - we may have + // return 1 here + + /* check tables of item heads */ + ih = (struct item_head *)(buf + BLKH_SIZE); + prev_location = blocksize; + for (i = 0; i < nr; i ++, ih ++) { + if ( le_ih_k_type(ih) == TYPE_ANY) { + reiserfs_warning (NULL, "is_leaf: wrong item type for item %h",ih); + return 0; + } + if (ih_location (ih) >= blocksize || ih_location (ih) < IH_SIZE * nr) { + reiserfs_warning (NULL, "is_leaf: item location seems wrong: %h", ih); + return 0; + } + if (ih_item_len (ih) < 1 || ih_item_len (ih) > MAX_ITEM_LEN (blocksize)) { + reiserfs_warning (NULL, "is_leaf: item length seems wrong: %h", ih); + return 0; + } + if (prev_location - ih_location (ih) != ih_item_len (ih)) { + reiserfs_warning (NULL, "is_leaf: item location seems wrong (second one): %h", ih); + return 0; + } + prev_location = ih_location (ih); + } + + // one may imagine much more checks + return 1; +} + + +/* returns 1 if buf looks like an internal node, 0 otherwise */ +static int is_internal (char * buf, int blocksize, struct buffer_head * bh) +{ + struct block_head * blkh; + int nr; + int used_space; + + blkh = (struct block_head *)buf; + nr = blkh_level(blkh); + if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) { + /* this level is not possible for internal nodes */ + reiserfs_warning (NULL, "is_internal: this should be caught earlier"); + return 0; + } + + nr = blkh_nr_item(blkh); + if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) { + /* for internal which is not root we might check min number of keys */ + reiserfs_warning (NULL, "is_internal: number of key seems wrong: %z", bh); + return 0; + } + + used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1); + if (used_space != blocksize - blkh_free_space(blkh)) { + reiserfs_warning (NULL, "is_internal: free space seems wrong: %z", bh); + return 0; + } + + // one may imagine much more checks + return 1; +} + + +// make sure that bh contains formatted node of reiserfs tree of +// 'level'-th level +static int is_tree_node (struct buffer_head * bh, int level) +{ + if (B_LEVEL (bh) != level) { + reiserfs_warning (NULL, "is_tree_node: node level %d does not match to the expected one %d", + B_LEVEL (bh), level); + return 0; + } + if (level == DISK_LEAF_NODE_LEVEL) + return is_leaf (bh->b_data, bh->b_size, bh); + + return is_internal (bh->b_data, bh->b_size, bh); +} + + + +#define SEARCH_BY_KEY_READA 16 + +/* The function is NOT SCHEDULE-SAFE! */ +static void search_by_key_reada (struct super_block * s, + struct buffer_head **bh, + unsigned long *b, int num) +{ + int i,j; + + for (i = 0 ; i < num ; i++) { + bh[i] = sb_getblk (s, b[i]); + } + for (j = 0 ; j < i ; j++) { + /* + * note, this needs attention if we are getting rid of the BKL + * you have to make sure the prepared bit isn't set on this buffer + */ + if (!buffer_uptodate(bh[j])) + ll_rw_block(READA, 1, bh + j); + brelse(bh[j]); + } +} + +/************************************************************************** + * Algorithm SearchByKey * + * look for item in the Disk S+Tree by its key * + * Input: p_s_sb - super block * + * p_s_key - pointer to the key to search * + * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR * + * p_s_search_path - path from the root to the needed leaf * + **************************************************************************/ + +/* This function fills up the path from the root to the leaf as it + descends the tree looking for the key. It uses reiserfs_bread to + try to find buffers in the cache given their block number. If it + does not find them in the cache it reads them from disk. For each + node search_by_key finds using reiserfs_bread it then uses + bin_search to look through that node. bin_search will find the + position of the block_number of the next node if it is looking + through an internal node. If it is looking through a leaf node + bin_search will find the position of the item which has key either + equal to given key, or which is the maximal key less than the given + key. search_by_key returns a path that must be checked for the + correctness of the top of the path but need not be checked for the + correctness of the bottom of the path */ +/* The function is NOT SCHEDULE-SAFE! */ +int search_by_key (struct super_block * p_s_sb, + const struct cpu_key * p_s_key, /* Key to search. */ + struct path * p_s_search_path, /* This structure was + allocated and initialized + by the calling + function. It is filled up + by this function. */ + int n_stop_level /* How far down the tree to search. To + stop at leaf level - set to + DISK_LEAF_NODE_LEVEL */ + ) { + int n_block_number; + int expected_level; + struct buffer_head * p_s_bh; + struct path_element * p_s_last_element; + int n_node_level, n_retval; + int right_neighbor_of_leaf_node; + int fs_gen; + struct buffer_head *reada_bh[SEARCH_BY_KEY_READA]; + unsigned long reada_blocks[SEARCH_BY_KEY_READA]; + int reada_count = 0; + +#ifdef CONFIG_REISERFS_CHECK + int n_repeat_counter = 0; +#endif + + PROC_INFO_INC( p_s_sb, search_by_key ); + + /* As we add each node to a path we increase its count. This means that + we must be careful to release all nodes in a path before we either + discard the path struct or re-use the path struct, as we do here. */ + + decrement_counters_in_path(p_s_search_path); + + right_neighbor_of_leaf_node = 0; + + /* With each iteration of this loop we search through the items in the + current node, and calculate the next current node(next path element) + for the next iteration of this loop.. */ + n_block_number = SB_ROOT_BLOCK (p_s_sb); + expected_level = -1; + while ( 1 ) { + +#ifdef CONFIG_REISERFS_CHECK + if ( !(++n_repeat_counter % 50000) ) + reiserfs_warning (p_s_sb, "PAP-5100: search_by_key: %s:" + "there were %d iterations of while loop " + "looking for key %K", + current->comm, n_repeat_counter, p_s_key); +#endif + + /* prep path to have another element added to it. */ + p_s_last_element = PATH_OFFSET_PELEMENT(p_s_search_path, ++p_s_search_path->path_length); + fs_gen = get_generation (p_s_sb); + + /* Read the next tree node, and set the last element in the path to + have a pointer to it. */ + if ((p_s_bh = p_s_last_element->pe_buffer = + sb_getblk(p_s_sb, n_block_number)) ) { + if (!buffer_uptodate(p_s_bh) && reada_count > 1) { + search_by_key_reada (p_s_sb, reada_bh, + reada_blocks, reada_count); + } + ll_rw_block(READ, 1, &p_s_bh); + wait_on_buffer(p_s_bh); + if (!buffer_uptodate(p_s_bh)) + goto io_error; + } else { +io_error: + p_s_search_path->path_length --; + pathrelse(p_s_search_path); + return IO_ERROR; + } + reada_count = 0; + if (expected_level == -1) + expected_level = SB_TREE_HEIGHT (p_s_sb); + expected_level --; + + /* It is possible that schedule occurred. We must check whether the key + to search is still in the tree rooted from the current buffer. If + not then repeat search from the root. */ + if ( fs_changed (fs_gen, p_s_sb) && + (!B_IS_IN_TREE (p_s_bh) || + B_LEVEL(p_s_bh) != expected_level || + !key_in_buffer(p_s_search_path, p_s_key, p_s_sb))) { + PROC_INFO_INC( p_s_sb, search_by_key_fs_changed ); + PROC_INFO_INC( p_s_sb, search_by_key_restarted ); + PROC_INFO_INC( p_s_sb, sbk_restarted[ expected_level - 1 ] ); + decrement_counters_in_path(p_s_search_path); + + /* Get the root block number so that we can repeat the search + starting from the root. */ + n_block_number = SB_ROOT_BLOCK (p_s_sb); + expected_level = -1; + right_neighbor_of_leaf_node = 0; + + /* repeat search from the root */ + continue; + } + + /* only check that the key is in the buffer if p_s_key is not + equal to the MAX_KEY. Latter case is only possible in + "finish_unfinished()" processing during mount. */ + RFALSE( comp_keys( &MAX_KEY, p_s_key ) && + ! key_in_buffer(p_s_search_path, p_s_key, p_s_sb), + "PAP-5130: key is not in the buffer"); +#ifdef CONFIG_REISERFS_CHECK + if ( cur_tb ) { + print_cur_tb ("5140"); + reiserfs_panic(p_s_sb, "PAP-5140: search_by_key: schedule occurred in do_balance!"); + } +#endif + + // make sure, that the node contents look like a node of + // certain level + if (!is_tree_node (p_s_bh, expected_level)) { + reiserfs_warning (p_s_sb, "vs-5150: search_by_key: " + "invalid format found in block %ld. Fsck?", + p_s_bh->b_blocknr); + pathrelse (p_s_search_path); + return IO_ERROR; + } + + /* ok, we have acquired next formatted node in the tree */ + n_node_level = B_LEVEL (p_s_bh); + + PROC_INFO_BH_STAT( p_s_sb, p_s_bh, n_node_level - 1 ); + + RFALSE( n_node_level < n_stop_level, + "vs-5152: tree level (%d) is less than stop level (%d)", + n_node_level, n_stop_level); + + n_retval = bin_search( p_s_key, B_N_PITEM_HEAD(p_s_bh, 0), + B_NR_ITEMS(p_s_bh), + ( n_node_level == DISK_LEAF_NODE_LEVEL ) ? IH_SIZE : KEY_SIZE, + &(p_s_last_element->pe_position)); + if (n_node_level == n_stop_level) { + return n_retval; + } + + /* we are not in the stop level */ + if (n_retval == ITEM_FOUND) + /* item has been found, so we choose the pointer which is to the right of the found one */ + p_s_last_element->pe_position++; + + /* if item was not found we choose the position which is to + the left of the found item. This requires no code, + bin_search did it already.*/ + + /* So we have chosen a position in the current node which is + an internal node. Now we calculate child block number by + position in the node. */ + n_block_number = B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position); + + /* if we are going to read leaf nodes, try for read ahead as well */ + if ((p_s_search_path->reada & PATH_READA) && + n_node_level == DISK_LEAF_NODE_LEVEL + 1) + { + int pos = p_s_last_element->pe_position; + int limit = B_NR_ITEMS(p_s_bh); + struct reiserfs_key *le_key; + + if (p_s_search_path->reada & PATH_READA_BACK) + limit = 0; + while(reada_count < SEARCH_BY_KEY_READA) { + if (pos == limit) + break; + reada_blocks[reada_count++] = B_N_CHILD_NUM(p_s_bh, pos); + if (p_s_search_path->reada & PATH_READA_BACK) + pos--; + else + pos++; + + /* + * check to make sure we're in the same object + */ + le_key = B_N_PDELIM_KEY(p_s_bh, pos); + if (le32_to_cpu(le_key->k_objectid) != + p_s_key->on_disk_key.k_objectid) + { + break; + } + } + } + } +} + + +/* Form the path to an item and position in this item which contains + file byte defined by p_s_key. If there is no such item + corresponding to the key, we point the path to the item with + maximal key less than p_s_key, and *p_n_pos_in_item is set to one + past the last entry/byte in the item. If searching for entry in a + directory item, and it is not found, *p_n_pos_in_item is set to one + entry more than the entry with maximal key which is less than the + sought key. + + Note that if there is no entry in this same node which is one more, + then we point to an imaginary entry. for direct items, the + position is in units of bytes, for indirect items the position is + in units of blocknr entries, for directory items the position is in + units of directory entries. */ + +/* The function is NOT SCHEDULE-SAFE! */ +int search_for_position_by_key (struct super_block * p_s_sb, /* Pointer to the super block. */ + const struct cpu_key * p_cpu_key, /* Key to search (cpu variable) */ + struct path * p_s_search_path /* Filled up by this function. */ + ) { + struct item_head * p_le_ih; /* pointer to on-disk structure */ + int n_blk_size; + loff_t item_offset, offset; + struct reiserfs_dir_entry de; + int retval; + + /* If searching for directory entry. */ + if ( is_direntry_cpu_key (p_cpu_key) ) + return search_by_entry_key (p_s_sb, p_cpu_key, p_s_search_path, &de); + + /* If not searching for directory entry. */ + + /* If item is found. */ + retval = search_item (p_s_sb, p_cpu_key, p_s_search_path); + if (retval == IO_ERROR) + return retval; + if ( retval == ITEM_FOUND ) { + + RFALSE( ! ih_item_len( + B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path), + PATH_LAST_POSITION(p_s_search_path))), + "PAP-5165: item length equals zero"); + + pos_in_item(p_s_search_path) = 0; + return POSITION_FOUND; + } + + RFALSE( ! PATH_LAST_POSITION(p_s_search_path), + "PAP-5170: position equals zero"); + + /* Item is not found. Set path to the previous item. */ + p_le_ih = B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path), --PATH_LAST_POSITION(p_s_search_path)); + n_blk_size = p_s_sb->s_blocksize; + + if (comp_short_keys (&(p_le_ih->ih_key), p_cpu_key)) { + return FILE_NOT_FOUND; + } + + // FIXME: quite ugly this far + + item_offset = le_ih_k_offset (p_le_ih); + offset = cpu_key_k_offset (p_cpu_key); + + /* Needed byte is contained in the item pointed to by the path.*/ + if (item_offset <= offset && + item_offset + op_bytes_number (p_le_ih, n_blk_size) > offset) { + pos_in_item (p_s_search_path) = offset - item_offset; + if ( is_indirect_le_ih(p_le_ih) ) { + pos_in_item (p_s_search_path) /= n_blk_size; + } + return POSITION_FOUND; + } + + /* Needed byte is not contained in the item pointed to by the + path. Set pos_in_item out of the item. */ + if ( is_indirect_le_ih (p_le_ih) ) + pos_in_item (p_s_search_path) = ih_item_len(p_le_ih) / UNFM_P_SIZE; + else + pos_in_item (p_s_search_path) = ih_item_len( p_le_ih ); + + return POSITION_NOT_FOUND; +} + + +/* Compare given item and item pointed to by the path. */ +int comp_items (const struct item_head * stored_ih, const struct path * p_s_path) +{ + struct buffer_head * p_s_bh; + struct item_head * ih; + + /* Last buffer at the path is not in the tree. */ + if ( ! B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path)) ) + return 1; + + /* Last path position is invalid. */ + if ( PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh) ) + return 1; + + /* we need only to know, whether it is the same item */ + ih = get_ih (p_s_path); + return memcmp (stored_ih, ih, IH_SIZE); +} + + +/* unformatted nodes are not logged anymore, ever. This is safe +** now +*/ +#define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1) + +// block can not be forgotten as it is in I/O or held by someone +#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh))) + + + +// prepare for delete or cut of direct item +static inline int prepare_for_direct_item (struct path * path, + struct item_head * le_ih, + struct inode * inode, + loff_t new_file_length, + int * cut_size) +{ + loff_t round_len; + + + if ( new_file_length == max_reiserfs_offset (inode) ) { + /* item has to be deleted */ + *cut_size = -(IH_SIZE + ih_item_len(le_ih)); + return M_DELETE; + } + + // new file gets truncated + if (get_inode_item_key_version (inode) == KEY_FORMAT_3_6) { + // + round_len = ROUND_UP (new_file_length); + /* this was n_new_file_length < le_ih ... */ + if ( round_len < le_ih_k_offset (le_ih) ) { + *cut_size = -(IH_SIZE + ih_item_len(le_ih)); + return M_DELETE; /* Delete this item. */ + } + /* Calculate first position and size for cutting from item. */ + pos_in_item (path) = round_len - (le_ih_k_offset (le_ih) - 1); + *cut_size = -(ih_item_len(le_ih) - pos_in_item(path)); + + return M_CUT; /* Cut from this item. */ + } + + + // old file: items may have any length + + if ( new_file_length < le_ih_k_offset (le_ih) ) { + *cut_size = -(IH_SIZE + ih_item_len(le_ih)); + return M_DELETE; /* Delete this item. */ + } + /* Calculate first position and size for cutting from item. */ + *cut_size = -(ih_item_len(le_ih) - + (pos_in_item (path) = new_file_length + 1 - le_ih_k_offset (le_ih))); + return M_CUT; /* Cut from this item. */ +} + + +static inline int prepare_for_direntry_item (struct path * path, + struct item_head * le_ih, + struct inode * inode, + loff_t new_file_length, + int * cut_size) +{ + if (le_ih_k_offset (le_ih) == DOT_OFFSET && + new_file_length == max_reiserfs_offset (inode)) { + RFALSE( ih_entry_count (le_ih) != 2, + "PAP-5220: incorrect empty directory item (%h)", le_ih); + *cut_size = -(IH_SIZE + ih_item_len(le_ih)); + return M_DELETE; /* Delete the directory item containing "." and ".." entry. */ + } + + if ( ih_entry_count (le_ih) == 1 ) { + /* Delete the directory item such as there is one record only + in this item*/ + *cut_size = -(IH_SIZE + ih_item_len(le_ih)); + return M_DELETE; + } + + /* Cut one record from the directory item. */ + *cut_size = -(DEH_SIZE + entry_length (get_last_bh (path), le_ih, pos_in_item (path))); + return M_CUT; +} + + +/* If the path points to a directory or direct item, calculate mode and the size cut, for balance. + If the path points to an indirect item, remove some number of its unformatted nodes. + In case of file truncate calculate whether this item must be deleted/truncated or last + unformatted node of this item will be converted to a direct item. + This function returns a determination of what balance mode the calling function should employ. */ +static char prepare_for_delete_or_cut( + struct reiserfs_transaction_handle *th, + struct inode * inode, + struct path * p_s_path, + const struct cpu_key * p_s_item_key, + int * p_n_removed, /* Number of unformatted nodes which were removed + from end of the file. */ + int * p_n_cut_size, + unsigned long long n_new_file_length /* MAX_KEY_OFFSET in case of delete. */ + ) { + struct super_block * p_s_sb = inode->i_sb; + struct item_head * p_le_ih = PATH_PITEM_HEAD(p_s_path); + struct buffer_head * p_s_bh = PATH_PLAST_BUFFER(p_s_path); + + BUG_ON (!th->t_trans_id); + + /* Stat_data item. */ + if ( is_statdata_le_ih (p_le_ih) ) { + + RFALSE( n_new_file_length != max_reiserfs_offset (inode), + "PAP-5210: mode must be M_DELETE"); + + *p_n_cut_size = -(IH_SIZE + ih_item_len(p_le_ih)); + return M_DELETE; + } + + + /* Directory item. */ + if ( is_direntry_le_ih (p_le_ih) ) + return prepare_for_direntry_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size); + + /* Direct item. */ + if ( is_direct_le_ih (p_le_ih) ) + return prepare_for_direct_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size); + + + /* Case of an indirect item. */ + { + int n_unfm_number, /* Number of the item unformatted nodes. */ + n_counter, |