diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/reiserfs/inode.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'fs/reiserfs/inode.c')
-rw-r--r-- | fs/reiserfs/inode.c | 2846 |
1 files changed, 2846 insertions, 0 deletions
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c new file mode 100644 index 00000000000..7543031396f --- /dev/null +++ b/fs/reiserfs/inode.c @@ -0,0 +1,2846 @@ +/* + * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README + */ + +#include <linux/config.h> +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/reiserfs_fs.h> +#include <linux/reiserfs_acl.h> +#include <linux/reiserfs_xattr.h> +#include <linux/smp_lock.h> +#include <linux/pagemap.h> +#include <linux/highmem.h> +#include <asm/uaccess.h> +#include <asm/unaligned.h> +#include <linux/buffer_head.h> +#include <linux/mpage.h> +#include <linux/writeback.h> +#include <linux/quotaops.h> + +extern int reiserfs_default_io_size; /* default io size devuned in super.c */ + +static int reiserfs_commit_write(struct file *f, struct page *page, + unsigned from, unsigned to); +static int reiserfs_prepare_write(struct file *f, struct page *page, + unsigned from, unsigned to); + +void reiserfs_delete_inode (struct inode * inode) +{ + /* We need blocks for transaction + (user+group) quota update (possibly delete) */ + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * REISERFS_QUOTA_INIT_BLOCKS; + struct reiserfs_transaction_handle th ; + + reiserfs_write_lock(inode->i_sb); + + /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ + if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ + down (&inode->i_sem); + + reiserfs_delete_xattrs (inode); + + if (journal_begin(&th, inode->i_sb, jbegin_count)) { + up (&inode->i_sem); + goto out; + } + reiserfs_update_inode_transaction(inode) ; + + if (reiserfs_delete_object (&th, inode)) { + up (&inode->i_sem); + goto out; + } + + /* Do quota update inside a transaction for journaled quotas. We must do that + * after delete_object so that quota updates go into the same transaction as + * stat data deletion */ + DQUOT_FREE_INODE(inode); + + if (journal_end(&th, inode->i_sb, jbegin_count)) { + up (&inode->i_sem); + goto out; + } + + up (&inode->i_sem); + + /* all items of file are deleted, so we can remove "save" link */ + remove_save_link (inode, 0/* not truncate */); /* we can't do anything + * about an error here */ + } else { + /* no object items are in the tree */ + ; + } +out: + clear_inode (inode); /* note this must go after the journal_end to prevent deadlock */ + inode->i_blocks = 0; + reiserfs_write_unlock(inode->i_sb); +} + +static void _make_cpu_key (struct cpu_key * key, int version, __u32 dirid, __u32 objectid, + loff_t offset, int type, int length ) +{ + key->version = version; + + key->on_disk_key.k_dir_id = dirid; + key->on_disk_key.k_objectid = objectid; + set_cpu_key_k_offset (key, offset); + set_cpu_key_k_type (key, type); + key->key_length = length; +} + + +/* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set + offset and type of key */ +void make_cpu_key (struct cpu_key * key, struct inode * inode, loff_t offset, + int type, int length ) +{ + _make_cpu_key (key, get_inode_item_key_version (inode), le32_to_cpu (INODE_PKEY (inode)->k_dir_id), + le32_to_cpu (INODE_PKEY (inode)->k_objectid), + offset, type, length); +} + + +// +// when key is 0, do not set version and short key +// +inline void make_le_item_head (struct item_head * ih, const struct cpu_key * key, + int version, + loff_t offset, int type, int length, + int entry_count/*or ih_free_space*/) +{ + if (key) { + ih->ih_key.k_dir_id = cpu_to_le32 (key->on_disk_key.k_dir_id); + ih->ih_key.k_objectid = cpu_to_le32 (key->on_disk_key.k_objectid); + } + put_ih_version( ih, version ); + set_le_ih_k_offset (ih, offset); + set_le_ih_k_type (ih, type); + put_ih_item_len( ih, length ); + /* set_ih_free_space (ih, 0);*/ + // for directory items it is entry count, for directs and stat + // datas - 0xffff, for indirects - 0 + put_ih_entry_count( ih, entry_count ); +} + +// +// FIXME: we might cache recently accessed indirect item + +// Ugh. Not too eager for that.... +// I cut the code until such time as I see a convincing argument (benchmark). +// I don't want a bloated inode struct..., and I don't like code complexity.... + +/* cutting the code is fine, since it really isn't in use yet and is easy +** to add back in. But, Vladimir has a really good idea here. Think +** about what happens for reading a file. For each page, +** The VFS layer calls reiserfs_readpage, who searches the tree to find +** an indirect item. This indirect item has X number of pointers, where +** X is a big number if we've done the block allocation right. But, +** we only use one or two of these pointers during each call to readpage, +** needlessly researching again later on. +** +** The size of the cache could be dynamic based on the size of the file. +** +** I'd also like to see us cache the location the stat data item, since +** we are needlessly researching for that frequently. +** +** --chris +*/ + +/* If this page has a file tail in it, and +** it was read in by get_block_create_0, the page data is valid, +** but tail is still sitting in a direct item, and we can't write to +** it. So, look through this page, and check all the mapped buffers +** to make sure they have valid block numbers. Any that don't need +** to be unmapped, so that block_prepare_write will correctly call +** reiserfs_get_block to convert the tail into an unformatted node +*/ +static inline void fix_tail_page_for_writing(struct page *page) { + struct buffer_head *head, *next, *bh ; + + if (page && page_has_buffers(page)) { + head = page_buffers(page) ; + bh = head ; + do { + next = bh->b_this_page ; + if (buffer_mapped(bh) && bh->b_blocknr == 0) { + reiserfs_unmap_buffer(bh) ; + } + bh = next ; + } while (bh != head) ; + } +} + +/* reiserfs_get_block does not need to allocate a block only if it has been + done already or non-hole position has been found in the indirect item */ +static inline int allocation_needed (int retval, b_blocknr_t allocated, + struct item_head * ih, + __u32 * item, int pos_in_item) +{ + if (allocated) + return 0; + if (retval == POSITION_FOUND && is_indirect_le_ih (ih) && + get_block_num(item, pos_in_item)) + return 0; + return 1; +} + +static inline int indirect_item_found (int retval, struct item_head * ih) +{ + return (retval == POSITION_FOUND) && is_indirect_le_ih (ih); +} + + +static inline void set_block_dev_mapped (struct buffer_head * bh, + b_blocknr_t block, struct inode * inode) +{ + map_bh(bh, inode->i_sb, block); +} + + +// +// files which were created in the earlier version can not be longer, +// than 2 gb +// +static int file_capable (struct inode * inode, long block) +{ + if (get_inode_item_key_version (inode) != KEY_FORMAT_3_5 || // it is new file. + block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb + return 1; + + return 0; +} + +/*static*/ int restart_transaction(struct reiserfs_transaction_handle *th, + struct inode *inode, struct path *path) { + struct super_block *s = th->t_super ; + int len = th->t_blocks_allocated ; + int err; + + BUG_ON (!th->t_trans_id); + BUG_ON (!th->t_refcount); + + /* we cannot restart while nested */ + if (th->t_refcount > 1) { + return 0 ; + } + pathrelse(path) ; + reiserfs_update_sd(th, inode) ; + err = journal_end(th, s, len) ; + if (!err) { + err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6) ; + if (!err) + reiserfs_update_inode_transaction(inode) ; + } + return err; +} + +// it is called by get_block when create == 0. Returns block number +// for 'block'-th logical block of file. When it hits direct item it +// returns 0 (being called from bmap) or read direct item into piece +// of page (bh_result) + +// Please improve the english/clarity in the comment above, as it is +// hard to understand. + +static int _get_block_create_0 (struct inode * inode, long block, + struct buffer_head * bh_result, + int args) +{ + INITIALIZE_PATH (path); + struct cpu_key key; + struct buffer_head * bh; + struct item_head * ih, tmp_ih; + int fs_gen ; + int blocknr; + char * p = NULL; + int chars; + int ret ; + int done = 0 ; + unsigned long offset ; + + // prepare the key to look for the 'block'-th block of file + make_cpu_key (&key, inode, + (loff_t)block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3); + +research: + if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) { + pathrelse (&path); + if (p) + kunmap(bh_result->b_page) ; + // We do not return -ENOENT if there is a hole but page is uptodate, because it means + // That there is some MMAPED data associated with it that is yet to be written to disk. + if ((args & GET_BLOCK_NO_HOLE) && !PageUptodate(bh_result->b_page) ) { + return -ENOENT ; + } + return 0 ; + } + + // + bh = get_last_bh (&path); + ih = get_ih (&path); + if (is_indirect_le_ih (ih)) { + __u32 * ind_item = (__u32 *)B_I_PITEM (bh, ih); + + /* FIXME: here we could cache indirect item or part of it in + the inode to avoid search_by_key in case of subsequent + access to file */ + blocknr = get_block_num(ind_item, path.pos_in_item) ; + ret = 0 ; + if (blocknr) { + map_bh(bh_result, inode->i_sb, blocknr); + if (path.pos_in_item == ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) { + set_buffer_boundary(bh_result); + } + } else + // We do not return -ENOENT if there is a hole but page is uptodate, because it means + // That there is some MMAPED data associated with it that is yet to be written to disk. + if ((args & GET_BLOCK_NO_HOLE) && !PageUptodate(bh_result->b_page) ) { + ret = -ENOENT ; + } + + pathrelse (&path); + if (p) + kunmap(bh_result->b_page) ; + return ret ; + } + + // requested data are in direct item(s) + if (!(args & GET_BLOCK_READ_DIRECT)) { + // we are called by bmap. FIXME: we can not map block of file + // when it is stored in direct item(s) + pathrelse (&path); + if (p) + kunmap(bh_result->b_page) ; + return -ENOENT; + } + + /* if we've got a direct item, and the buffer or page was uptodate, + ** we don't want to pull data off disk again. skip to the + ** end, where we map the buffer and return + */ + if (buffer_uptodate(bh_result)) { + goto finished ; + } else + /* + ** grab_tail_page can trigger calls to reiserfs_get_block on up to date + ** pages without any buffers. If the page is up to date, we don't want + ** read old data off disk. Set the up to date bit on the buffer instead + ** and jump to the end + */ + if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { + set_buffer_uptodate(bh_result); + goto finished ; + } + + // read file tail into part of page + offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1) ; + fs_gen = get_generation(inode->i_sb) ; + copy_item_head (&tmp_ih, ih); + + /* we only want to kmap if we are reading the tail into the page. + ** this is not the common case, so we don't kmap until we are + ** sure we need to. But, this means the item might move if + ** kmap schedules + */ + if (!p) { + p = (char *)kmap(bh_result->b_page) ; + if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { + goto research; + } + } + p += offset ; + memset (p, 0, inode->i_sb->s_blocksize); + do { + if (!is_direct_le_ih (ih)) { + BUG (); + } + /* make sure we don't read more bytes than actually exist in + ** the file. This can happen in odd cases where i_size isn't + ** correct, and when direct item padding results in a few + ** extra bytes at the end of the direct item + */ + if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) + break ; + if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) { + chars = inode->i_size - (le_ih_k_offset(ih) - 1) - path.pos_in_item; + done = 1 ; + } else { + chars = ih_item_len(ih) - path.pos_in_item; + } + memcpy (p, B_I_PITEM (bh, ih) + path.pos_in_item, chars); + + if (done) + break ; + + p += chars; + + if (PATH_LAST_POSITION (&path) != (B_NR_ITEMS (bh) - 1)) + // we done, if read direct item is not the last item of + // node FIXME: we could try to check right delimiting key + // to see whether direct item continues in the right + // neighbor or rely on i_size + break; + + // update key to look for the next piece + set_cpu_key_k_offset (&key, cpu_key_k_offset (&key) + chars); + if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) + // we read something from tail, even if now we got IO_ERROR + break; + bh = get_last_bh (&path); + ih = get_ih (&path); + } while (1); + + flush_dcache_page(bh_result->b_page) ; + kunmap(bh_result->b_page) ; + +finished: + pathrelse (&path); + /* this buffer has valid data, but isn't valid for io. mapping it to + * block #0 tells the rest of reiserfs it just has a tail in it + */ + map_bh(bh_result, inode->i_sb, 0); + set_buffer_uptodate (bh_result); + return 0; +} + + +// this is called to create file map. So, _get_block_create_0 will not +// read direct item +static int reiserfs_bmap (struct inode * inode, sector_t block, + struct buffer_head * bh_result, int create) +{ + if (!file_capable (inode, block)) + return -EFBIG; + + reiserfs_write_lock(inode->i_sb); + /* do not read the direct item */ + _get_block_create_0 (inode, block, bh_result, 0) ; + reiserfs_write_unlock(inode->i_sb); + return 0; +} + +/* special version of get_block that is only used by grab_tail_page right +** now. It is sent to block_prepare_write, and when you try to get a +** block past the end of the file (or a block from a hole) it returns +** -ENOENT instead of a valid buffer. block_prepare_write expects to +** be able to do i/o on the buffers returned, unless an error value +** is also returned. +** +** So, this allows block_prepare_write to be used for reading a single block +** in a page. Where it does not produce a valid page for holes, or past the +** end of the file. This turns out to be exactly what we need for reading +** tails for conversion. +** +** The point of the wrapper is forcing a certain value for create, even +** though the VFS layer is calling this function with create==1. If you +** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, +** don't use this function. +*/ +static int reiserfs_get_block_create_0 (struct inode * inode, sector_t block, + struct buffer_head * bh_result, int create) { + return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE) ; +} + +/* This is special helper for reiserfs_get_block in case we are executing + direct_IO request. */ +static int reiserfs_get_blocks_direct_io(struct inode *inode, + sector_t iblock, + unsigned long max_blocks, + struct buffer_head *bh_result, + int create) +{ + int ret ; + + bh_result->b_page = NULL; + + /* We set the b_size before reiserfs_get_block call since it is + referenced in convert_tail_for_hole() that may be called from + reiserfs_get_block() */ + bh_result->b_size = (1 << inode->i_blkbits); + + ret = reiserfs_get_block(inode, iblock, bh_result, + create | GET_BLOCK_NO_DANGLE) ; + if (ret) + goto out; + + /* don't allow direct io onto tail pages */ + if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { + /* make sure future calls to the direct io funcs for this offset + ** in the file fail by unmapping the buffer + */ + clear_buffer_mapped(bh_result); + ret = -EINVAL ; + } + /* Possible unpacked tail. Flush the data before pages have + disappeared */ + if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { + int err; + lock_kernel(); + err = reiserfs_commit_for_inode(inode); + REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; + unlock_kernel(); + if (err < 0) + ret = err; + } +out: + return ret ; +} + + +/* +** helper function for when reiserfs_get_block is called for a hole +** but the file tail is still in a direct item +** bh_result is the buffer head for the hole +** tail_offset is the offset of the start of the tail in the file +** +** This calls prepare_write, which will start a new transaction +** you should not be in a transaction, or have any paths held when you +** call this. +*/ +static int convert_tail_for_hole(struct inode *inode, + struct buffer_head *bh_result, + loff_t tail_offset) { + unsigned long index ; + unsigned long tail_end ; + unsigned long tail_start ; + struct page * tail_page ; + struct page * hole_page = bh_result->b_page ; + int retval = 0 ; + + if ((tail_offset & (bh_result->b_size - 1)) != 1) + return -EIO ; + + /* always try to read until the end of the block */ + tail_start = tail_offset & (PAGE_CACHE_SIZE - 1) ; + tail_end = (tail_start | (bh_result->b_size - 1)) + 1 ; + + index = tail_offset >> PAGE_CACHE_SHIFT ; + /* hole_page can be zero in case of direct_io, we are sure + that we cannot get here if we write with O_DIRECT into + tail page */ + if (!hole_page || index != hole_page->index) { + tail_page = grab_cache_page(inode->i_mapping, index) ; + retval = -ENOMEM; + if (!tail_page) { + goto out ; + } + } else { + tail_page = hole_page ; + } + + /* we don't have to make sure the conversion did not happen while + ** we were locking the page because anyone that could convert + ** must first take i_sem. + ** + ** We must fix the tail page for writing because it might have buffers + ** that are mapped, but have a block number of 0. This indicates tail + ** data that has been read directly into the page, and block_prepare_write + ** won't trigger a get_block in this case. + */ + fix_tail_page_for_writing(tail_page) ; + retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end); + if (retval) + goto unlock ; + + /* tail conversion might change the data in the page */ + flush_dcache_page(tail_page) ; + + retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end) ; + +unlock: + if (tail_page != hole_page) { + unlock_page(tail_page) ; + page_cache_release(tail_page) ; + } +out: + return retval ; +} + +static inline int _allocate_block(struct reiserfs_transaction_handle *th, + long block, + struct inode *inode, + b_blocknr_t *allocated_block_nr, + struct path * path, + int flags) { + BUG_ON (!th->t_trans_id); + +#ifdef REISERFS_PREALLOCATE + if (!(flags & GET_BLOCK_NO_ISEM)) { + return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, path, block); + } +#endif + return reiserfs_new_unf_blocknrs (th, inode, allocated_block_nr, path, block); +} + +int reiserfs_get_block (struct inode * inode, sector_t block, + struct buffer_head * bh_result, int create) +{ + int repeat, retval = 0; + b_blocknr_t allocated_block_nr = 0;// b_blocknr_t is (unsigned) 32 bit int + INITIALIZE_PATH(path); + int pos_in_item; + struct cpu_key key; + struct buffer_head * bh, * unbh = NULL; + struct item_head * ih, tmp_ih; + __u32 * item; + int done; + int fs_gen; + struct reiserfs_transaction_handle *th = NULL; + /* space reserved in transaction batch: + . 3 balancings in direct->indirect conversion + . 1 block involved into reiserfs_update_sd() + XXX in practically impossible worst case direct2indirect() + can incur (much) more than 3 balancings. + quota update for user, group */ + int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS; + int version; + int dangle = 1; + loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ; + + /* bad.... */ + reiserfs_write_lock(inode->i_sb); + version = get_inode_item_key_version (inode); + + if (block < 0) { + reiserfs_write_unlock(inode->i_sb); + return -EIO; + } + + if (!file_capable (inode, block)) { + reiserfs_write_unlock(inode->i_sb); + return -EFBIG; + } + + /* if !create, we aren't changing the FS, so we don't need to + ** log anything, so we don't need to start a transaction + */ + if (!(create & GET_BLOCK_CREATE)) { + int ret ; + /* find number of block-th logical block of the file */ + ret = _get_block_create_0 (inode, block, bh_result, + create | GET_BLOCK_READ_DIRECT) ; + reiserfs_write_unlock(inode->i_sb); + return ret; + } + /* + * if we're already in a transaction, make sure to close + * any new transactions we start in this func + */ + if ((create & GET_BLOCK_NO_DANGLE) || + reiserfs_transaction_running(inode->i_sb)) + dangle = 0; + + /* If file is of such a size, that it might have a tail and tails are enabled + ** we should mark it as possibly needing tail packing on close + */ + if ( (have_large_tails (inode->i_sb) && inode->i_size < i_block_size (inode)*4) || + (have_small_tails (inode->i_sb) && inode->i_size < i_block_size(inode)) ) + REISERFS_I(inode)->i_flags |= i_pack_on_close_mask ; + + /* set the key of the first byte in the 'block'-th block of file */ + make_cpu_key (&key, inode, new_offset, + TYPE_ANY, 3/*key length*/); + if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) { +start_trans: + th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count); + if (!th) { + retval = -ENOMEM; + goto failure; + } + reiserfs_update_inode_transaction(inode) ; + } + research: + + retval = search_for_position_by_key (inode->i_sb, &key, &path); + if (retval == IO_ERROR) { + retval = -EIO; + goto failure; + } + + bh = get_last_bh (&path); + ih = get_ih (&path); + item = get_item (&path); + pos_in_item = path.pos_in_item; + + fs_gen = get_generation (inode->i_sb); + copy_item_head (&tmp_ih, ih); + + if (allocation_needed (retval, allocated_block_nr, ih, item, pos_in_item)) { + /* we have to allocate block for the unformatted node */ + if (!th) { + pathrelse(&path) ; + goto start_trans; + } + + repeat = _allocate_block(th, block, inode, &allocated_block_nr, &path, create); + + if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) { + /* restart the transaction to give the journal a chance to free + ** some blocks. releases the path, so we have to go back to + ** research if we succeed on the second try + */ + SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1; + retval = restart_transaction(th, inode, &path) ; + if (retval) + goto failure; + repeat = _allocate_block(th, block, inode, &allocated_block_nr, NULL, create); + + if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) { + goto research ; + } + if (repeat == QUOTA_EXCEEDED) + retval = -EDQUOT; + else + retval = -ENOSPC; + goto failure; + } + + if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { + goto research; + } + } + + if (indirect_item_found (retval, ih)) { + b_blocknr_t unfm_ptr; + /* 'block'-th block is in the file already (there is + corresponding cell in some indirect item). But it may be + zero unformatted node pointer (hole) */ + unfm_ptr = get_block_num (item, pos_in_item); + if (unfm_ptr == 0) { + /* use allocated block to plug the hole */ + reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ; + if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { + reiserfs_restore_prepared_buffer(inode->i_sb, bh) ; + goto research; + } + set_buffer_new(bh_result); + if (buffer_dirty(bh_result) && reiserfs_data_ordered(inode->i_sb)) + reiserfs_add_ordered_list(inode, bh_result); + put_block_num(item, pos_in_item, allocated_block_nr) ; + unfm_ptr = allocated_block_nr; + journal_mark_dirty (th, inode->i_sb, bh); + reiserfs_update_sd(th, inode) ; + } + set_block_dev_mapped(bh_result, unfm_ptr, inode); + pathrelse (&path); + retval = 0; + if (!dangle && th) + retval = reiserfs_end_persistent_transaction(th); + + reiserfs_write_unlock(inode->i_sb); + + /* the item was found, so new blocks were not added to the file + ** there is no need to make sure the inode is updated with this + ** transaction + */ + return retval; + } + + if (!th) { + pathrelse(&path) ; + goto start_trans; + } + + /* desired position is not found or is in the direct item. We have + to append file with holes up to 'block'-th block converting + direct items to indirect one if necessary */ + done = 0; + do { + if (is_statdata_le_ih (ih)) { + __u32 unp = 0; + struct cpu_key tmp_key; + + /* indirect item has to be inserted */ + make_le_item_head (&tmp_ih, &key, version, 1, TYPE_INDIRECT, + UNFM_P_SIZE, 0/* free_space */); + + if (cpu_key_k_offset (&key) == 1) { + /* we are going to add 'block'-th block to the file. Use + allocated block for that */ + unp = cpu_to_le32 (allocated_block_nr); + set_block_dev_mapped (bh_result, allocated_block_nr, inode); + set_buffer_new(bh_result); + done = 1; + } + tmp_key = key; // ;) + set_cpu_key_k_offset (&tmp_key, 1); + PATH_LAST_POSITION(&path) ++; + + retval = reiserfs_insert_item (th, &path, &tmp_key, &tmp_ih, inode, (char *)&unp); + if (retval) { + reiserfs_free_block (th, inode, allocated_block_nr, 1); + goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST + } + //mark_tail_converted (inode); + } else if (is_direct_le_ih (ih)) { + /* direct item has to be converted */ + loff_t tail_offset; + + tail_offset = ((le_ih_k_offset (ih) - 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; + if (tail_offset == cpu_key_k_offset (&key)) { + /* direct item we just found fits into block we have + to map. Convert it into unformatted node: use + bh_result for the conversion */ + set_block_dev_mapped (bh_result, allocated_block_nr, inode); + unbh = bh_result; + done = 1; + } else { + /* we have to padd file tail stored in direct item(s) + up to block size and convert it to unformatted + node. FIXME: this should also get into page cache */ + + pathrelse(&path) ; + /* + * ugly, but we can only end the transaction if + * we aren't nested + */ + BUG_ON (!th->t_refcount); + if (th->t_refcount == 1) { + retval = reiserfs_end_persistent_transaction(th); + th = NULL; + if (retval) + goto failure; + } + + retval = convert_tail_for_hole(inode, bh_result, tail_offset) ; + if (retval) { + if ( retval != -ENOSPC ) + reiserfs_warning (inode->i_sb, "clm-6004: convert tail failed inode %lu, error %d", inode->i_ino, retval) ; + if (allocated_block_nr) { + /* the bitmap, the super, and the stat data == 3 */ + if (!th) + th = reiserfs_persistent_transaction(inode->i_sb,3); + if (th) + reiserfs_free_block (th,inode,allocated_block_nr,1); + } + goto failure ; + } + goto research ; + } + retval = direct2indirect (th, inode, &path, unbh, tail_offset); + if (retval) { + reiserfs_unmap_buffer(unbh); + reiserfs_free_block (th, inode, allocated_block_nr, 1); + goto failure; + } + /* it is important the set_buffer_uptodate is done after + ** the direct2indirect. The buffer might contain valid + ** data newer than the data on disk (read by readpage, changed, + ** and then sent here by writepage). direct2indirect needs + ** to know if unbh was already up to date, so it can decide + ** if the data in unbh needs to be replaced with data from + ** the disk + */ + set_buffer_uptodate (unbh); + + /* unbh->b_page == NULL in case of DIRECT_IO request, this means + buffer will disappear shortly, so it should not be added to + */ + if ( unbh->b_page ) { + /* we've converted the tail, so we must + ** flush unbh before the transaction commits + */ + reiserfs_add_tail_list(inode, unbh) ; + + /* mark it dirty now to prevent commit_write from adding + ** this buffer to the inode's dirty buffer list + */ + /* + * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty(). + * It's still atomic, but it sets the page dirty too, + * which makes it eligible for writeback at any time by the + * VM (which was also the case with __mark_buffer_dirty()) + */ + mark_buffer_dirty(unbh) ; + } + } else { + /* append indirect item with holes if needed, when appending + pointer to 'block'-th block use block, which is already + allocated */ + struct cpu_key tmp_key; + unp_t unf_single=0; // We use this in case we need to allocate only + // one block which is a fastpath + unp_t *un; + __u64 max_to_insert=MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE; + __u64 blocks_needed; + + RFALSE( pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, + "vs-804: invalid position for append"); + /* indirect item has to be appended, set up key of that position */ + make_cpu_key (&tmp_key, inode, + le_key_k_offset (version, &(ih->ih_key)) + op_bytes_number (ih, inode->i_sb->s_blocksize), + //pos_in_item * inode->i_sb->s_blocksize, + TYPE_INDIRECT, 3);// key type is unimportant + + blocks_needed = 1 + ((cpu_key_k_offset (&key) - cpu_key_k_offset (&tmp_key)) >> inode->i_sb->s_blocksize_bits); + RFALSE( blocks_needed < 0, "green-805: invalid offset"); + + if ( blocks_needed == 1 ) { + un = &unf_single; + } else { + un=kmalloc( min(blocks_needed,max_to_insert)*UNFM_P_SIZE, + GFP_ATOMIC); // We need to avoid scheduling. + if ( !un) { + un = &unf_single; + blocks_needed = 1; + max_to_insert = 0; + } else + memset(un, 0, UNFM_P_SIZE * min(blocks_needed,max_to_insert)); + } + if ( blocks_needed <= max_to_insert) { + /* we are going to add target block to the file. Use allocated + block for that */ + un[blocks_needed-1] = cpu_to_le32 (allocated_block_nr); + set_block_dev_mapped (bh_result, allocated_block_nr, inode); + set_buffer_new(bh_result); + done = 1; + } else { + /* paste hole to the indirect item */ + /* If kmalloc failed, max_to_insert becomes zero and it means we + only have space for one block */ + blocks_needed=max_to_insert?max_to_insert:1; + } + retval = reiserfs_paste_into_item (th, &path, &tmp_key, inode, (char *)un, UNFM_P_SIZE * blocks_needed); + + if (blocks_needed != 1) + kfree(un); + + if (retval) { + reiserfs_free_block (th, inode, allocated_block_nr, 1); + goto failure; + } + if (!done) { + /* We need to mark new file size in case this function will be + interrupted/aborted later on. And we may do this only for + holes. */ + inode->i_size += inode->i_sb->s_blocksize * blocks_needed; + } + } + + if (done == 1) + break; + + /* this loop could log more blocks than we had originally asked + ** for. So, we have to allow the transaction to end if it is + ** too big or too full. Update the inode so things are + ** consistent if we crash before the function returns + ** + ** release the path so that anybody waiting on the path before + ** ending their transaction will be able to continue. + */ + if (journal_transaction_should_end(th, th->t_blocks_allocated)) { + retval = restart_transaction(th, inode, &path) ; + if (retval) + goto failure; + } + /* inserting indirect pointers for a hole can take a + ** long time. reschedule if needed + */ + cond_resched(); + + retval = search_for_position_by_key (inode->i_sb, &key, &path); + if (retval == IO_ERROR) { + retval = -EIO; + goto failure; + } + if (retval == POSITION_FOUND) { + reiserfs_warning (inode->i_sb, "vs-825: reiserfs_get_block: " + "%K should not be found", &key); + retval = -EEXIST; + if (allocated_block_nr) + reiserfs_free_block (th, inode, allocated_block_nr, 1); + pathrelse(&path) ; + goto failure; + } + bh = get_last_bh (&path); + ih = get_ih (&path); + item = get_item (&path); + pos_in_item = path.pos_in_item; + } while (1); + + + retval = 0; + + failure: + if (th && (!dangle || (retval && !th->t_trans_id))) { + int err; + if (th->t_trans_id) + reiserfs_update_sd(th, inode); + err = reiserfs_end_persistent_transaction(th); + if (err) + retval = err; + } + + reiserfs_write_unlock(inode->i_sb); + reiserfs_check_path(&path) ; + return retval; +} + +static int +reiserfs_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); +} + +/* Compute real number of used bytes by file + * Following three functions can go away when we'll have enough space in stat item + */ +static int real_space_diff(struct inode *inode, int sd_size) +{ + int bytes; + loff_t blocksize = inode->i_sb->s_blocksize ; + + if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) + return sd_size ; + + /* End of file is also in full block with indirect reference, so round + ** up to the next block. + ** + ** there is just no way to know if the tail is actually packed + ** on the file, so we have to assume it isn't. When we pack the + ** tail, we add 4 bytes to pretend there really is an unformatted + ** node pointer + */ + bytes = ((inode->i_size + (blocksize-1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE + sd_size; + return bytes ; +} + +static inline loff_t to_real_used_space(struct inode *inode, ulong blocks, + int sd_size) +{ + if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { + return inode->i_size + (loff_t)(real_space_diff(inode, sd_size)) ; + } + return ((loff_t)real_space_diff(inode, sd_size)) + (((loff_t)blocks) << 9); +} + +/* Compute number of blocks used by file in ReiserFS counting */ +static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size) +{ + loff_t bytes = inode_get_bytes(inode) ; + loff_t real_space = real_space_diff(inode, sd_size) ; + + /* keeps fsck and non-quota versions of reiserfs happy */ + if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { + bytes += (loff_t)511 ;< |