diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/reiserfs/journal.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'fs/reiserfs/journal.c')
-rw-r--r-- | fs/reiserfs/journal.c | 3876 |
1 files changed, 3876 insertions, 0 deletions
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c new file mode 100644 index 00000000000..c9ad3a7849f --- /dev/null +++ b/fs/reiserfs/journal.c @@ -0,0 +1,3876 @@ +/* +** Write ahead logging implementation copyright Chris Mason 2000 +** +** The background commits make this code very interelated, and +** overly complex. I need to rethink things a bit....The major players: +** +** journal_begin -- call with the number of blocks you expect to log. +** If the current transaction is too +** old, it will block until the current transaction is +** finished, and then start a new one. +** Usually, your transaction will get joined in with +** previous ones for speed. +** +** journal_join -- same as journal_begin, but won't block on the current +** transaction regardless of age. Don't ever call +** this. Ever. There are only two places it should be +** called from, and they are both inside this file. +** +** journal_mark_dirty -- adds blocks into this transaction. clears any flags +** that might make them get sent to disk +** and then marks them BH_JDirty. Puts the buffer head +** into the current transaction hash. +** +** journal_end -- if the current transaction is batchable, it does nothing +** otherwise, it could do an async/synchronous commit, or +** a full flush of all log and real blocks in the +** transaction. +** +** flush_old_commits -- if the current transaction is too old, it is ended and +** commit blocks are sent to disk. Forces commit blocks +** to disk for all backgrounded commits that have been +** around too long. +** -- Note, if you call this as an immediate flush from +** from within kupdate, it will ignore the immediate flag +*/ + +#include <linux/config.h> +#include <asm/uaccess.h> +#include <asm/system.h> + +#include <linux/time.h> +#include <asm/semaphore.h> + +#include <linux/vmalloc.h> +#include <linux/reiserfs_fs.h> + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <linux/smp_lock.h> +#include <linux/buffer_head.h> +#include <linux/workqueue.h> +#include <linux/writeback.h> +#include <linux/blkdev.h> + + +/* gets a struct reiserfs_journal_list * from a list head */ +#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ + j_list)) +#define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ + j_working_list)) + +/* the number of mounted filesystems. This is used to decide when to +** start and kill the commit workqueue +*/ +static int reiserfs_mounted_fs_count; + +static struct workqueue_struct *commit_wq; + +#define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit + structs at 4k */ +#define BUFNR 64 /*read ahead */ + +/* cnode stat bits. Move these into reiserfs_fs.h */ + +#define BLOCK_FREED 2 /* this block was freed, and can't be written. */ +#define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */ + +#define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */ +#define BLOCK_DIRTIED 5 + + +/* journal list state bits */ +#define LIST_TOUCHED 1 +#define LIST_DIRTY 2 +#define LIST_COMMIT_PENDING 4 /* someone will commit this list */ + +/* flags for do_journal_end */ +#define FLUSH_ALL 1 /* flush commit and real blocks */ +#define COMMIT_NOW 2 /* end and commit this transaction */ +#define WAIT 4 /* wait for the log blocks to hit the disk*/ + +static int do_journal_end(struct reiserfs_transaction_handle *,struct super_block *,unsigned long nblocks,int flags) ; +static int flush_journal_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) ; +static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) ; +static int can_dirty(struct reiserfs_journal_cnode *cn) ; +static int journal_join(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks); +static int release_journal_dev( struct super_block *super, + struct reiserfs_journal *journal ); +static int dirty_one_transaction(struct super_block *s, + struct reiserfs_journal_list *jl); +static void flush_async_commits(void *p); +static void queue_log_writer(struct super_block *s); + +/* values for join in do_journal_begin_r */ +enum { + JBEGIN_REG = 0, /* regular journal begin */ + JBEGIN_JOIN = 1, /* join the running transaction if at all possible */ + JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */ +}; + +static int do_journal_begin_r(struct reiserfs_transaction_handle *th, + struct super_block * p_s_sb, + unsigned long nblocks,int join); + +static void init_journal_hash(struct super_block *p_s_sb) { + struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); + memset(journal->j_hash_table, 0, JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)) ; +} + +/* +** clears BH_Dirty and sticks the buffer on the clean list. Called because I can't allow refile_buffer to +** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for +** more details. +*/ +static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) { + if (bh) { + clear_buffer_dirty(bh); + clear_buffer_journal_test(bh); + } + return 0 ; +} + +static void disable_barrier(struct super_block *s) +{ + REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH); + printk("reiserfs: disabling flush barriers on %s\n", reiserfs_bdevname(s)); +} + +static struct reiserfs_bitmap_node * +allocate_bitmap_node(struct super_block *p_s_sb) { + struct reiserfs_bitmap_node *bn ; + static int id; + + bn = reiserfs_kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS, p_s_sb) ; + if (!bn) { + return NULL ; + } + bn->data = reiserfs_kmalloc(p_s_sb->s_blocksize, GFP_NOFS, p_s_sb) ; + if (!bn->data) { + reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ; + return NULL ; + } + bn->id = id++ ; + memset(bn->data, 0, p_s_sb->s_blocksize) ; + INIT_LIST_HEAD(&bn->list) ; + return bn ; +} + +static struct reiserfs_bitmap_node * +get_bitmap_node(struct super_block *p_s_sb) { + struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); + struct reiserfs_bitmap_node *bn = NULL; + struct list_head *entry = journal->j_bitmap_nodes.next ; + + journal->j_used_bitmap_nodes++ ; +repeat: + + if(entry != &journal->j_bitmap_nodes) { + bn = list_entry(entry, struct reiserfs_bitmap_node, list) ; + list_del(entry) ; + memset(bn->data, 0, p_s_sb->s_blocksize) ; + journal->j_free_bitmap_nodes-- ; + return bn ; + } + bn = allocate_bitmap_node(p_s_sb) ; + if (!bn) { + yield(); + goto repeat ; + } + return bn ; +} +static inline void free_bitmap_node(struct super_block *p_s_sb, + struct reiserfs_bitmap_node *bn) { + struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); + journal->j_used_bitmap_nodes-- ; + if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) { + reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb) ; + reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ; + } else { + list_add(&bn->list, &journal->j_bitmap_nodes) ; + journal->j_free_bitmap_nodes++ ; + } +} + +static void allocate_bitmap_nodes(struct super_block *p_s_sb) { + int i ; + struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); + struct reiserfs_bitmap_node *bn = NULL ; + for (i = 0 ; i < REISERFS_MIN_BITMAP_NODES ; i++) { + bn = allocate_bitmap_node(p_s_sb) ; + if (bn) { + list_add(&bn->list, &journal->j_bitmap_nodes) ; + journal->j_free_bitmap_nodes++ ; + } else { + break ; // this is ok, we'll try again when more are needed + } + } +} + +static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block, + struct reiserfs_list_bitmap *jb) { + int bmap_nr = block / (p_s_sb->s_blocksize << 3) ; + int bit_nr = block % (p_s_sb->s_blocksize << 3) ; + + if (!jb->bitmaps[bmap_nr]) { + jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb) ; + } + set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data) ; + return 0 ; +} + +static void cleanup_bitmap_list(struct super_block *p_s_sb, + struct reiserfs_list_bitmap *jb) { + int i; + if (jb->bitmaps == NULL) + return; + + for (i = 0 ; i < SB_BMAP_NR(p_s_sb) ; i++) { + if (jb->bitmaps[i]) { + free_bitmap_node(p_s_sb, jb->bitmaps[i]) ; + jb->bitmaps[i] = NULL ; + } + } +} + +/* +** only call this on FS unmount. +*/ +static int free_list_bitmaps(struct super_block *p_s_sb, + struct reiserfs_list_bitmap *jb_array) { + int i ; + struct reiserfs_list_bitmap *jb ; + for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) { + jb = jb_array + i ; + jb->journal_list = NULL ; + cleanup_bitmap_list(p_s_sb, jb) ; + vfree(jb->bitmaps) ; + jb->bitmaps = NULL ; + } + return 0; +} + +static int free_bitmap_nodes(struct super_block *p_s_sb) { + struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); + struct list_head *next = journal->j_bitmap_nodes.next ; + struct reiserfs_bitmap_node *bn ; + + while(next != &journal->j_bitmap_nodes) { + bn = list_entry(next, struct reiserfs_bitmap_node, list) ; + list_del(next) ; + reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb) ; + reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ; + next = journal->j_bitmap_nodes.next ; + journal->j_free_bitmap_nodes-- ; + } + + return 0 ; +} + +/* +** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. +** jb_array is the array to be filled in. +*/ +int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb, + struct reiserfs_list_bitmap *jb_array, + int bmap_nr) { + int i ; + int failed = 0 ; + struct reiserfs_list_bitmap *jb ; + int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *) ; + + for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) { + jb = jb_array + i ; + jb->journal_list = NULL ; + jb->bitmaps = vmalloc( mem ) ; + if (!jb->bitmaps) { + reiserfs_warning(p_s_sb, "clm-2000, unable to allocate bitmaps for journal lists") ; + failed = 1; + break ; + } + memset(jb->bitmaps, 0, mem) ; + } + if (failed) { + free_list_bitmaps(p_s_sb, jb_array) ; + return -1 ; + } + return 0 ; +} + +/* +** find an available list bitmap. If you can't find one, flush a commit list +** and try again +*/ +static struct reiserfs_list_bitmap * +get_list_bitmap(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) { + int i,j ; + struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); + struct reiserfs_list_bitmap *jb = NULL ; + + for (j = 0 ; j < (JOURNAL_NUM_BITMAPS * 3) ; j++) { + i = journal->j_list_bitmap_index ; + journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS ; + jb = journal->j_list_bitmap + i ; + if (journal->j_list_bitmap[i].journal_list) { + flush_commit_list(p_s_sb, journal->j_list_bitmap[i].journal_list, 1) ; + if (!journal->j_list_bitmap[i].journal_list) { + break ; + } + } else { + break ; + } + } + if (jb->journal_list) { /* double check to make sure if flushed correctly */ + return NULL ; + } + jb->journal_list = jl ; + return jb ; +} + +/* +** allocates a new chunk of X nodes, and links them all together as a list. +** Uses the cnode->next and cnode->prev pointers +** returns NULL on failure +*/ +static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) { + struct reiserfs_journal_cnode *head ; + int i ; + if (num_cnodes <= 0) { + return NULL ; + } + head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)) ; + if (!head) { + return NULL ; + } + memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode)) ; + head[0].prev = NULL ; + head[0].next = head + 1 ; + for (i = 1 ; i < num_cnodes; i++) { + head[i].prev = head + (i - 1) ; + head[i].next = head + (i + 1) ; /* if last one, overwrite it after the if */ + } + head[num_cnodes -1].next = NULL ; + return head ; +} + +/* +** pulls a cnode off the free list, or returns NULL on failure +*/ +static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb) { + struct reiserfs_journal_cnode *cn ; + struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); + + reiserfs_check_lock_depth(p_s_sb, "get_cnode") ; + + if (journal->j_cnode_free <= 0) { + return NULL ; + } + journal->j_cnode_used++ ; + journal->j_cnode_free-- ; + cn = journal->j_cnode_free_list ; + if (!cn) { + return cn ; + } + if (cn->next) { + cn->next->prev = NULL ; + } + journal->j_cnode_free_list = cn->next ; + memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; + return cn ; +} + +/* +** returns a cnode to the free list +*/ +static void free_cnode(struct super_block *p_s_sb, struct reiserfs_journal_cnode *cn) { + struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); + + reiserfs_check_lock_depth(p_s_sb, "free_cnode") ; + + journal->j_cnode_used-- ; + journal->j_cnode_free++ ; + /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */ + cn->next = journal->j_cnode_free_list ; + if (journal->j_cnode_free_list) { + journal->j_cnode_free_list->prev = cn ; + } + cn->prev = NULL ; /* not needed with the memset, but I might kill the memset, and forget to do this */ + journal->j_cnode_free_list = cn ; +} + +static void clear_prepared_bits(struct buffer_head *bh) { + clear_buffer_journal_prepared (bh); + clear_buffer_journal_restore_dirty (bh); +} + +/* utility function to force a BUG if it is called without the big +** kernel lock held. caller is the string printed just before calling BUG() +*/ +void reiserfs_check_lock_depth(struct super_block *sb, char *caller) { +#ifdef CONFIG_SMP + if (current->lock_depth < 0) { + reiserfs_panic (sb, "%s called without kernel lock held", caller) ; + } +#else + ; +#endif +} + +/* return a cnode with same dev, block number and size in table, or null if not found */ +static inline struct reiserfs_journal_cnode * +get_journal_hash_dev(struct super_block *sb, + struct reiserfs_journal_cnode **table, + long bl) +{ + struct reiserfs_journal_cnode *cn ; + cn = journal_hash(table, sb, bl) ; + while(cn) { + if (cn->blocknr == bl && cn->sb == sb) + return cn ; + cn = cn->hnext ; + } + return (struct reiserfs_journal_cnode *)0 ; +} + +/* +** this actually means 'can this block be reallocated yet?'. If you set search_all, a block can only be allocated +** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever +** being overwritten by a replay after crashing. +** +** If you don't set search_all, a block can only be allocated if it is not in the current transaction. Since deleting +** a block removes it from the current transaction, this case should never happen. If you don't set search_all, make +** sure you never write the block without logging it. +** +** next_zero_bit is a suggestion about the next block to try for find_forward. +** when bl is rejected because it is set in a journal list bitmap, we search +** for the next zero bit in the bitmap that rejected bl. Then, we return that +** through next_zero_bit for find_forward to try. +** +** Just because we return something in next_zero_bit does not mean we won't +** reject it on the next call to reiserfs_in_journal +** +*/ +int reiserfs_in_journal(struct super_block *p_s_sb, + int bmap_nr, int bit_nr, int search_all, + b_blocknr_t *next_zero_bit) { + struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); + struct reiserfs_journal_cnode *cn ; + struct reiserfs_list_bitmap *jb ; + int i ; + unsigned long bl; + + *next_zero_bit = 0 ; /* always start this at zero. */ + + PROC_INFO_INC( p_s_sb, journal.in_journal ); + /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. + ** if we crash before the transaction that freed it commits, this transaction won't + ** have committed either, and the block will never be written + */ + if (search_all) { + for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) { + PROC_INFO_INC( p_s_sb, journal.in_journal_bitmap ); + jb = journal->j_list_bitmap + i ; + if (jb->journal_list && jb->bitmaps[bmap_nr] && + test_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data)) { + *next_zero_bit = find_next_zero_bit((unsigned long *) + (jb->bitmaps[bmap_nr]->data), + p_s_sb->s_blocksize << 3, bit_nr+1) ; + return 1 ; + } + } + } + + bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr; + /* is it in any old transactions? */ + if (search_all && (cn = get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, bl))) { + return 1; + } + + /* is it in the current transaction. This should never happen */ + if ((cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, bl))) { + BUG(); + return 1; + } + + PROC_INFO_INC( p_s_sb, journal.in_journal_reusable ); + /* safe for reuse */ + return 0 ; +} + +/* insert cn into table +*/ +static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, struct reiserfs_journal_cnode *cn) { + struct reiserfs_journal_cnode *cn_orig ; + + cn_orig = journal_hash(table, cn->sb, cn->blocknr) ; + cn->hnext = cn_orig ; + cn->hprev = NULL ; + if (cn_orig) { + cn_orig->hprev = cn ; + } + journal_hash(table, cn->sb, cn->blocknr) = cn ; +} + +/* lock the current transaction */ +inline static void lock_journal(struct super_block *p_s_sb) { + PROC_INFO_INC( p_s_sb, journal.lock_journal ); + down(&SB_JOURNAL(p_s_sb)->j_lock); +} + +/* unlock the current transaction */ +inline static void unlock_journal(struct super_block *p_s_sb) { + up(&SB_JOURNAL(p_s_sb)->j_lock); +} + +static inline void get_journal_list(struct reiserfs_journal_list *jl) +{ + jl->j_refcount++; +} + +static inline void put_journal_list(struct super_block *s, + struct reiserfs_journal_list *jl) +{ + if (jl->j_refcount < 1) { + reiserfs_panic (s, "trans id %lu, refcount at %d", jl->j_trans_id, + jl->j_refcount); + } + if (--jl->j_refcount == 0) + reiserfs_kfree(jl, sizeof(struct reiserfs_journal_list), s); +} + +/* +** this used to be much more involved, and I'm keeping it just in case things get ugly again. +** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a +** transaction. +*/ +static void cleanup_freed_for_journal_list(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) { + + struct reiserfs_list_bitmap *jb = jl->j_list_bitmap ; + if (jb) { + cleanup_bitmap_list(p_s_sb, jb) ; + } + jl->j_list_bitmap->journal_list = NULL ; + jl->j_list_bitmap = NULL ; +} + +static int journal_list_still_alive(struct super_block *s, + unsigned long trans_id) +{ + struct reiserfs_journal *journal = SB_JOURNAL (s); + struct list_head *entry = &journal->j_journal_list; + struct reiserfs_journal_list *jl; + + if (!list_empty(entry)) { + jl = JOURNAL_LIST_ENTRY(entry->next); + if (jl->j_trans_id <= trans_id) { + return 1; + } + } + return 0; +} + +static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) { + char b[BDEVNAME_SIZE]; + + if (buffer_journaled(bh)) { + reiserfs_warning(NULL, "clm-2084: pinned buffer %lu:%s sent to disk", + bh->b_blocknr, bdevname(bh->b_bdev, b)) ; + } + if (uptodate) + set_buffer_uptodate(bh) ; + else + clear_buffer_uptodate(bh) ; + unlock_buffer(bh) ; + put_bh(bh) ; +} + +static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate) { + if (uptodate) + set_buffer_uptodate(bh) ; + else + clear_buffer_uptodate(bh) ; + unlock_buffer(bh) ; + put_bh(bh) ; +} + +static void submit_logged_buffer(struct buffer_head *bh) { + get_bh(bh) ; + bh->b_end_io = reiserfs_end_buffer_io_sync ; + clear_buffer_journal_new (bh); + clear_buffer_dirty(bh) ; + if (!test_clear_buffer_journal_test (bh)) + BUG(); + if (!buffer_uptodate(bh)) + BUG(); + submit_bh(WRITE, bh) ; +} + +static void submit_ordered_buffer(struct buffer_head *bh) { + get_bh(bh) ; + bh->b_end_io = reiserfs_end_ordered_io; + clear_buffer_dirty(bh) ; + if (!buffer_uptodate(bh)) + BUG(); + submit_bh(WRITE, bh) ; +} + +static int submit_barrier_buffer(struct buffer_head *bh) { + get_bh(bh) ; + bh->b_end_io = reiserfs_end_ordered_io; + clear_buffer_dirty(bh) ; + if (!buffer_uptodate(bh)) + BUG(); + return submit_bh(WRITE_BARRIER, bh) ; +} + +static void check_barrier_completion(struct super_block *s, + struct buffer_head *bh) { + if (buffer_eopnotsupp(bh)) { + clear_buffer_eopnotsupp(bh); + disable_barrier(s); + set_buffer_uptodate(bh); + set_buffer_dirty(bh); + sync_dirty_buffer(bh); + } +} + +#define CHUNK_SIZE 32 +struct buffer_chunk { + struct buffer_head *bh[CHUNK_SIZE]; + int nr; +}; + +static void write_chunk(struct buffer_chunk *chunk) { + int i; + for (i = 0; i < chunk->nr ; i++) { + submit_logged_buffer(chunk->bh[i]) ; + } + chunk->nr = 0; +} + +static void write_ordered_chunk(struct buffer_chunk *chunk) { + int i; + for (i = 0; i < chunk->nr ; i++) { + submit_ordered_buffer(chunk->bh[i]) ; + } + chunk->nr = 0; +} + +static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, + spinlock_t *lock, + void (fn)(struct buffer_chunk *)) +{ + int ret = 0; + if (chunk->nr >= CHUNK_SIZE) + BUG(); + chunk->bh[chunk->nr++] = bh; + if (chunk->nr >= CHUNK_SIZE) { + ret = 1; + if (lock) + spin_unlock(lock); + fn(chunk); + if (lock) + spin_lock(lock); + } + return ret; +} + + +static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0); +static struct reiserfs_jh *alloc_jh(void) { + struct reiserfs_jh *jh; + while(1) { + jh = kmalloc(sizeof(*jh), GFP_NOFS); + if (jh) { + atomic_inc(&nr_reiserfs_jh); + return jh; + } + yield(); + } +} + +/* + * we want to free the jh when the buffer has been written + * and waited on + */ +void reiserfs_free_jh(struct buffer_head *bh) { + struct reiserfs_jh *jh; + + jh = bh->b_private; + if (jh) { + bh->b_private = NULL; + jh->bh = NULL; + list_del_init(&jh->list); + kfree(jh); + if (atomic_read(&nr_reiserfs_jh) <= 0) + BUG(); + atomic_dec(&nr_reiserfs_jh); + put_bh(bh); + } +} + +static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh, + int tail) +{ + struct reiserfs_jh *jh; + + if (bh->b_private) { + spin_lock(&j->j_dirty_buffers_lock); + if (!bh->b_private) { + spin_unlock(&j->j_dirty_buffers_lock); + goto no_jh; + } + jh = bh->b_private; + list_del_init(&jh->list); + } else { +no_jh: + get_bh(bh); + jh = alloc_jh(); + spin_lock(&j->j_dirty_buffers_lock); + /* buffer must be locked for __add_jh, should be able to have + * two adds at the same time + */ + if (bh->b_private) + BUG(); + jh->bh = bh; + bh->b_private = jh; + } + jh->jl = j->j_current_jl; + if (tail) + list_add_tail(&jh->list, &jh->jl->j_tail_bh_list); + else { + list_add_tail(&jh->list, &jh->jl->j_bh_list); + } + spin_unlock(&j->j_dirty_buffers_lock); + return 0; +} + +int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh) { + return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1); +} +int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh) { + return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0); +} + +#define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list) +static int write_ordered_buffers(spinlock_t *lock, + struct reiserfs_journal *j, + struct reiserfs_journal_list *jl, + struct list_head *list) +{ + struct buffer_head *bh; + struct reiserfs_jh *jh; + int ret = j->j_errno; + struct buffer_chunk chunk; + struct list_head tmp; + INIT_LIST_HEAD(&tmp); + + chunk.nr = 0; + spin_lock(lock); + while(!list_empty(list)) { + jh = JH_ENTRY(list->next); + bh = jh->bh; + get_bh(bh); + if (test_set_buffer_locked(bh)) { + if (!buffer_dirty(bh)) { + list_del_init(&jh->list); + list_add(&jh->list, &tmp); + goto loop_next; + } + spin_unlock(lock); + if (chunk.nr) + write_ordered_chunk(&chunk); + wait_on_buffer(bh); + cond_resched(); + spin_lock(lock); + goto loop_next; + } + if (buffer_dirty(bh)) { + list_del_init(&jh->list); + list_add(&jh->list, &tmp); + add_to_chunk(&chunk, bh, lock, write_ordered_chunk); + } else { + reiserfs_free_jh(bh); + unlock_buffer(bh); + } +loop_next: + put_bh(bh); + cond_resched_lock(lock); + } + if (chunk.nr) { + spin_unlock(lock); + write_ordered_chunk(&chunk); + spin_lock(lock); + } + while(!list_empty(&tmp)) { + jh = JH_ENTRY(tmp.prev); + bh = jh->bh; + get_bh(bh); + reiserfs_free_jh(bh); + + if (buffer_locked(bh)) { + spin_unlock(lock); + wait_on_buffer(bh); + spin_lock(lock); + } + if (!buffer_uptodate(bh)) { + ret = -EIO; + } + put_bh(bh); + cond_resched_lock(lock); + } + spin_unlock(lock); + return ret; +} + +static int flush_older_commits(struct super_block *s, struct reiserfs_journal_list *jl) { + struct reiserfs_journal *journal = SB_JOURNAL (s); + struct reiserfs_journal_list *other_jl; + struct reiserfs_journal_list *first_jl; + struct list_head *entry; + unsigned long trans_id = jl->j_trans_id; + unsigned long other_trans_id; + unsigned long first_trans_id; + +find_first: + /* + * first we walk backwards to find the oldest uncommitted transation + */ + first_jl = jl; + entry = jl->j_list.prev; + while(1) { + other_jl = JOURNAL_LIST_ENTRY(entry); + if (entry == &journal->j_journal_list || + atomic_read(&other_jl->j_older_commits_done)) + break; + + first_jl = other_jl; + entry = other_jl->j_list.prev; + } + + /* if we didn't find any older uncommitted transactions, return now */ + if (first_jl == jl) { + return 0; + } + + first_trans_id = first_jl->j_trans_id; + + entry = &first_jl->j_list; + while(1) { + other_jl = JOURNAL_LIST_ENTRY(entry); + other_trans_id = other_jl->j_trans_id; + + if (other_trans_id < trans_id) { + if (atomic_read(&other_jl->j_commit_left) != 0) { + flush_commit_list(s, other_jl, 0); + + /* list we were called with is gone, return */ + if (!journal_list_still_alive(s, trans_id)) + return 1; + + /* the one we just flushed is gone, this means all + * older lists are also gone, so first_jl is no longer + * valid either. Go back to the beginning. + */ + if (!journal_list_still_alive(s, other_trans_id)) { + goto find_first; + } + } + entry = entry->next; + if (entry == &journal->j_journal_list) + return 0; + } else { + return 0; + } + } + return 0; +} +int reiserfs_async_progress_wait(struct super_block *s) { + DEFINE_WAIT(wait); + struct reiserfs_journal *j = SB_JOURNAL(s); + if (atomic_read(&j->j_async_throttle)) + blk_congestion_wait(WRITE, HZ/10); + return 0; +} + +/* +** if this journal list still has commit blocks unflushed, send them to disk. +** +** log areas must be flushed in order (transaction 2 can't commit before transaction 1) +** Before the commit block can by written, every other log block must be safely on disk +** +*/ +static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) { + int i; + int bn ; + struct buffer_head *tbh = NULL ; + unsigned long trans_id = jl->j_trans_id; + struct reiserfs_journal *journal = SB_JOURNAL (s); + int barrier = 0; + int retval = 0; + + reiserfs_check_lock_depth(s, "flush_commit_list") ; + + if (atomic_read(&jl->j_older_commits_done)) { + return 0 ; + } + + /* before we can put our commit blocks on disk, we have to make sure everyone older than + ** us is on disk too + */ + BUG_ON (jl->j_len <= 0); + BUG_ON (trans_id == journal->j_trans_id); + + get_journal_list(jl); + if (flushall) { + if (flush_older_commits(s, jl) == 1) { + /* list disappeared during flush_older_commits. return */ + goto put_jl; + } + } + + /* make sure nobody is trying to flush this one at the same time */ + down(&jl->j_commit_lock); + if (!journal_list_still_alive(s, trans_id)) { + up(&jl->j_commit_lock); + goto put_jl; + } + BUG_ON (jl->j_trans_id == 0); + + /* this commit is done, exit */ + if (atomic_read(&(jl->j_commit_left)) <= 0) { + if (flushall) { + atomic_set(&(jl->j_older_commits_done), 1) ; + } + up(&jl->j_commit_lock); + goto put_jl; + } + + if (!list_empty(&jl->j_bh_list)) { + unlock_kernel(); + write_ordered_buffers(&journal->j_dirty_buffers_lock, + journal, jl, &jl->j_bh_list); + lock_kernel(); + } + BUG_ON (!list_empty(&jl->j_bh_list)); + /* + * for the description block and all the log blocks, submit any buffers + * that haven't already reached the disk + */ + atomic_inc(&journal->j_async_throttle); + for (i = 0 ; i < (jl->j_len + 1) ; i++) { + bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start+i) % + SB_ONDISK_JOURNAL_SIZE(s); + tbh = journal_find_get_block(s, bn) ; + if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */ + ll_rw_block(WRITE, 1, &tbh) ; + put_bh(tbh) ; + } + atomic_dec(&journal->j_async_throttle); + + /* wait on everything written so far before writing the commit + * if we are in barrier mode, send the commit down now + */ + barrier = reiserfs_barrier_flush(s); + if (barrier) { + int ret; + lock_buffer(jl->j_commit_bh); + ret = submit_barrier_buffer(jl->j_commit_bh); + if (ret == -EOPNOTSUPP) { + set_buffer_uptodate(jl->j_commit_bh); + disable_barrier(s); + barrier = 0; + } + } + for (i = 0 ; i < (jl->j_len + 1) ; i++) { + bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + + (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s) ; + tbh = journal_find_get_block(s, bn) ; + wait_on_buffer(tbh) ; + // since we're using ll_rw_blk above, it might have skipped over + // a locked buffer. Double check here + // + if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */ + sync_dirty_buffer(tbh); + if (unlikely (!buffer_uptodate(tbh))) { +#ifdef CONFIG_REISERFS_CHECK + reiserfs_warning(s, "journal-601, buffer write failed") ; +#endif + retval = -EIO; + } + put_bh(tbh) ; /* once for journal_find_get_block */ + put_bh(tbh) ; /* once due to original getblk in do_journal_end */ + atomic_dec(&(jl->j_commit_left)) ; + } + + BUG_ON (atomic_read(&(jl->j_commit_left)) != 1); + + if (!barrier) { + if (buffer_dirty(jl->j_commit_bh)) + BUG(); + mark_buffer_dirty(jl->j_commit_bh) ; + sync_dirty_buffer(jl->j_commit_bh) ; + } else + wait_on_buffer(jl->j_commit_bh); + + check_barrier_completion(s, jl->j_commit_bh); + + /* If there was a write error in the journal - we can't commit this + * transaction - it will be invalid and, if successful, will just end + * up propogating the write error out to the filesystem. */ + if (unlikely (!buffer_uptodate(jl->j_commit_bh))) { +#ifdef CONFIG_REISERFS_CHECK + reiserfs_warning(s, "journal-615: buffer write failed") ; +#endif + retval = -EIO; + } + bforget(jl->j_commit_bh) ; + if (journal->j_last_commit_id != 0 && + (jl->j_trans_id - journal->j_last_commit_id) != 1) { + reiserfs_warning(s, "clm-2200: last commit %lu, current %lu", + journal->j_last_commit_id, + jl->j_trans_id); + } + journal->j_last_commit_id = jl->j_trans_id; + + /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */ + cleanup_freed_for_journal_list(s, jl) ; + + retval = retval ? retval : journal->j_errno; + + /* mark the metadata dirty */ + if (!retval) + dirty_one_transaction(s, jl); + atomic_dec(&(jl->j_commit_left)) ; + + if (flushall) { + atomic_set(&(jl->j_older_commits_done), 1) ; + } + up(&jl->j_commit_lock); +put_jl: + put_journal_list(s, jl); + + if (retval) + reiserfs_abort (s, retval, "Journal write error in %s", __FUNCTION__); + return retval; +} + +/* +** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or +** returns NULL if it can't find anything +*/ +static struct reiserfs_journal_list *find_newer_jl_for_cn(struct reiserfs_journal_cnode *cn) { + struct super_block *sb = cn->sb; + b_blocknr_t blocknr = cn->blocknr ; + + cn = cn->hprev ; + while(cn) { + if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) { + return cn->jlist ; + } + cn = cn->hprev ; + } + return NULL ; +} + +static void remove_journal_hash(struct super_block *, struct reiserfs_journal_cnode **, |