aboutsummaryrefslogtreecommitdiff
path: root/fs/reiserfs/journal.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/reiserfs/journal.c
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'fs/reiserfs/journal.c')
-rw-r--r--fs/reiserfs/journal.c3876
1 files changed, 3876 insertions, 0 deletions
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
new file mode 100644
index 00000000000..c9ad3a7849f
--- /dev/null
+++ b/fs/reiserfs/journal.c
@@ -0,0 +1,3876 @@
+/*
+** Write ahead logging implementation copyright Chris Mason 2000
+**
+** The background commits make this code very interelated, and
+** overly complex. I need to rethink things a bit....The major players:
+**
+** journal_begin -- call with the number of blocks you expect to log.
+** If the current transaction is too
+** old, it will block until the current transaction is
+** finished, and then start a new one.
+** Usually, your transaction will get joined in with
+** previous ones for speed.
+**
+** journal_join -- same as journal_begin, but won't block on the current
+** transaction regardless of age. Don't ever call
+** this. Ever. There are only two places it should be
+** called from, and they are both inside this file.
+**
+** journal_mark_dirty -- adds blocks into this transaction. clears any flags
+** that might make them get sent to disk
+** and then marks them BH_JDirty. Puts the buffer head
+** into the current transaction hash.
+**
+** journal_end -- if the current transaction is batchable, it does nothing
+** otherwise, it could do an async/synchronous commit, or
+** a full flush of all log and real blocks in the
+** transaction.
+**
+** flush_old_commits -- if the current transaction is too old, it is ended and
+** commit blocks are sent to disk. Forces commit blocks
+** to disk for all backgrounded commits that have been
+** around too long.
+** -- Note, if you call this as an immediate flush from
+** from within kupdate, it will ignore the immediate flag
+*/
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/time.h>
+#include <asm/semaphore.h>
+
+#include <linux/vmalloc.h>
+#include <linux/reiserfs_fs.h>
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/buffer_head.h>
+#include <linux/workqueue.h>
+#include <linux/writeback.h>
+#include <linux/blkdev.h>
+
+
+/* gets a struct reiserfs_journal_list * from a list head */
+#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
+ j_list))
+#define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
+ j_working_list))
+
+/* the number of mounted filesystems. This is used to decide when to
+** start and kill the commit workqueue
+*/
+static int reiserfs_mounted_fs_count;
+
+static struct workqueue_struct *commit_wq;
+
+#define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit
+ structs at 4k */
+#define BUFNR 64 /*read ahead */
+
+/* cnode stat bits. Move these into reiserfs_fs.h */
+
+#define BLOCK_FREED 2 /* this block was freed, and can't be written. */
+#define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */
+
+#define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */
+#define BLOCK_DIRTIED 5
+
+
+/* journal list state bits */
+#define LIST_TOUCHED 1
+#define LIST_DIRTY 2
+#define LIST_COMMIT_PENDING 4 /* someone will commit this list */
+
+/* flags for do_journal_end */
+#define FLUSH_ALL 1 /* flush commit and real blocks */
+#define COMMIT_NOW 2 /* end and commit this transaction */
+#define WAIT 4 /* wait for the log blocks to hit the disk*/
+
+static int do_journal_end(struct reiserfs_transaction_handle *,struct super_block *,unsigned long nblocks,int flags) ;
+static int flush_journal_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) ;
+static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) ;
+static int can_dirty(struct reiserfs_journal_cnode *cn) ;
+static int journal_join(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks);
+static int release_journal_dev( struct super_block *super,
+ struct reiserfs_journal *journal );
+static int dirty_one_transaction(struct super_block *s,
+ struct reiserfs_journal_list *jl);
+static void flush_async_commits(void *p);
+static void queue_log_writer(struct super_block *s);
+
+/* values for join in do_journal_begin_r */
+enum {
+ JBEGIN_REG = 0, /* regular journal begin */
+ JBEGIN_JOIN = 1, /* join the running transaction if at all possible */
+ JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */
+};
+
+static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
+ struct super_block * p_s_sb,
+ unsigned long nblocks,int join);
+
+static void init_journal_hash(struct super_block *p_s_sb) {
+ struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
+ memset(journal->j_hash_table, 0, JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)) ;
+}
+
+/*
+** clears BH_Dirty and sticks the buffer on the clean list. Called because I can't allow refile_buffer to
+** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for
+** more details.
+*/
+static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) {
+ if (bh) {
+ clear_buffer_dirty(bh);
+ clear_buffer_journal_test(bh);
+ }
+ return 0 ;
+}
+
+static void disable_barrier(struct super_block *s)
+{
+ REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH);
+ printk("reiserfs: disabling flush barriers on %s\n", reiserfs_bdevname(s));
+}
+
+static struct reiserfs_bitmap_node *
+allocate_bitmap_node(struct super_block *p_s_sb) {
+ struct reiserfs_bitmap_node *bn ;
+ static int id;
+
+ bn = reiserfs_kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS, p_s_sb) ;
+ if (!bn) {
+ return NULL ;
+ }
+ bn->data = reiserfs_kmalloc(p_s_sb->s_blocksize, GFP_NOFS, p_s_sb) ;
+ if (!bn->data) {
+ reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ;
+ return NULL ;
+ }
+ bn->id = id++ ;
+ memset(bn->data, 0, p_s_sb->s_blocksize) ;
+ INIT_LIST_HEAD(&bn->list) ;
+ return bn ;
+}
+
+static struct reiserfs_bitmap_node *
+get_bitmap_node(struct super_block *p_s_sb) {
+ struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
+ struct reiserfs_bitmap_node *bn = NULL;
+ struct list_head *entry = journal->j_bitmap_nodes.next ;
+
+ journal->j_used_bitmap_nodes++ ;
+repeat:
+
+ if(entry != &journal->j_bitmap_nodes) {
+ bn = list_entry(entry, struct reiserfs_bitmap_node, list) ;
+ list_del(entry) ;
+ memset(bn->data, 0, p_s_sb->s_blocksize) ;
+ journal->j_free_bitmap_nodes-- ;
+ return bn ;
+ }
+ bn = allocate_bitmap_node(p_s_sb) ;
+ if (!bn) {
+ yield();
+ goto repeat ;
+ }
+ return bn ;
+}
+static inline void free_bitmap_node(struct super_block *p_s_sb,
+ struct reiserfs_bitmap_node *bn) {
+ struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
+ journal->j_used_bitmap_nodes-- ;
+ if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
+ reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb) ;
+ reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ;
+ } else {
+ list_add(&bn->list, &journal->j_bitmap_nodes) ;
+ journal->j_free_bitmap_nodes++ ;
+ }
+}
+
+static void allocate_bitmap_nodes(struct super_block *p_s_sb) {
+ int i ;
+ struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
+ struct reiserfs_bitmap_node *bn = NULL ;
+ for (i = 0 ; i < REISERFS_MIN_BITMAP_NODES ; i++) {
+ bn = allocate_bitmap_node(p_s_sb) ;
+ if (bn) {
+ list_add(&bn->list, &journal->j_bitmap_nodes) ;
+ journal->j_free_bitmap_nodes++ ;
+ } else {
+ break ; // this is ok, we'll try again when more are needed
+ }
+ }
+}
+
+static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block,
+ struct reiserfs_list_bitmap *jb) {
+ int bmap_nr = block / (p_s_sb->s_blocksize << 3) ;
+ int bit_nr = block % (p_s_sb->s_blocksize << 3) ;
+
+ if (!jb->bitmaps[bmap_nr]) {
+ jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb) ;
+ }
+ set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data) ;
+ return 0 ;
+}
+
+static void cleanup_bitmap_list(struct super_block *p_s_sb,
+ struct reiserfs_list_bitmap *jb) {
+ int i;
+ if (jb->bitmaps == NULL)
+ return;
+
+ for (i = 0 ; i < SB_BMAP_NR(p_s_sb) ; i++) {
+ if (jb->bitmaps[i]) {
+ free_bitmap_node(p_s_sb, jb->bitmaps[i]) ;
+ jb->bitmaps[i] = NULL ;
+ }
+ }
+}
+
+/*
+** only call this on FS unmount.
+*/
+static int free_list_bitmaps(struct super_block *p_s_sb,
+ struct reiserfs_list_bitmap *jb_array) {
+ int i ;
+ struct reiserfs_list_bitmap *jb ;
+ for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) {
+ jb = jb_array + i ;
+ jb->journal_list = NULL ;
+ cleanup_bitmap_list(p_s_sb, jb) ;
+ vfree(jb->bitmaps) ;
+ jb->bitmaps = NULL ;
+ }
+ return 0;
+}
+
+static int free_bitmap_nodes(struct super_block *p_s_sb) {
+ struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
+ struct list_head *next = journal->j_bitmap_nodes.next ;
+ struct reiserfs_bitmap_node *bn ;
+
+ while(next != &journal->j_bitmap_nodes) {
+ bn = list_entry(next, struct reiserfs_bitmap_node, list) ;
+ list_del(next) ;
+ reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb) ;
+ reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ;
+ next = journal->j_bitmap_nodes.next ;
+ journal->j_free_bitmap_nodes-- ;
+ }
+
+ return 0 ;
+}
+
+/*
+** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps.
+** jb_array is the array to be filled in.
+*/
+int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb,
+ struct reiserfs_list_bitmap *jb_array,
+ int bmap_nr) {
+ int i ;
+ int failed = 0 ;
+ struct reiserfs_list_bitmap *jb ;
+ int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *) ;
+
+ for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) {
+ jb = jb_array + i ;
+ jb->journal_list = NULL ;
+ jb->bitmaps = vmalloc( mem ) ;
+ if (!jb->bitmaps) {
+ reiserfs_warning(p_s_sb, "clm-2000, unable to allocate bitmaps for journal lists") ;
+ failed = 1;
+ break ;
+ }
+ memset(jb->bitmaps, 0, mem) ;
+ }
+ if (failed) {
+ free_list_bitmaps(p_s_sb, jb_array) ;
+ return -1 ;
+ }
+ return 0 ;
+}
+
+/*
+** find an available list bitmap. If you can't find one, flush a commit list
+** and try again
+*/
+static struct reiserfs_list_bitmap *
+get_list_bitmap(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) {
+ int i,j ;
+ struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
+ struct reiserfs_list_bitmap *jb = NULL ;
+
+ for (j = 0 ; j < (JOURNAL_NUM_BITMAPS * 3) ; j++) {
+ i = journal->j_list_bitmap_index ;
+ journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS ;
+ jb = journal->j_list_bitmap + i ;
+ if (journal->j_list_bitmap[i].journal_list) {
+ flush_commit_list(p_s_sb, journal->j_list_bitmap[i].journal_list, 1) ;
+ if (!journal->j_list_bitmap[i].journal_list) {
+ break ;
+ }
+ } else {
+ break ;
+ }
+ }
+ if (jb->journal_list) { /* double check to make sure if flushed correctly */
+ return NULL ;
+ }
+ jb->journal_list = jl ;
+ return jb ;
+}
+
+/*
+** allocates a new chunk of X nodes, and links them all together as a list.
+** Uses the cnode->next and cnode->prev pointers
+** returns NULL on failure
+*/
+static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) {
+ struct reiserfs_journal_cnode *head ;
+ int i ;
+ if (num_cnodes <= 0) {
+ return NULL ;
+ }
+ head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)) ;
+ if (!head) {
+ return NULL ;
+ }
+ memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode)) ;
+ head[0].prev = NULL ;
+ head[0].next = head + 1 ;
+ for (i = 1 ; i < num_cnodes; i++) {
+ head[i].prev = head + (i - 1) ;
+ head[i].next = head + (i + 1) ; /* if last one, overwrite it after the if */
+ }
+ head[num_cnodes -1].next = NULL ;
+ return head ;
+}
+
+/*
+** pulls a cnode off the free list, or returns NULL on failure
+*/
+static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb) {
+ struct reiserfs_journal_cnode *cn ;
+ struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
+
+ reiserfs_check_lock_depth(p_s_sb, "get_cnode") ;
+
+ if (journal->j_cnode_free <= 0) {
+ return NULL ;
+ }
+ journal->j_cnode_used++ ;
+ journal->j_cnode_free-- ;
+ cn = journal->j_cnode_free_list ;
+ if (!cn) {
+ return cn ;
+ }
+ if (cn->next) {
+ cn->next->prev = NULL ;
+ }
+ journal->j_cnode_free_list = cn->next ;
+ memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ;
+ return cn ;
+}
+
+/*
+** returns a cnode to the free list
+*/
+static void free_cnode(struct super_block *p_s_sb, struct reiserfs_journal_cnode *cn) {
+ struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
+
+ reiserfs_check_lock_depth(p_s_sb, "free_cnode") ;
+
+ journal->j_cnode_used-- ;
+ journal->j_cnode_free++ ;
+ /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */
+ cn->next = journal->j_cnode_free_list ;
+ if (journal->j_cnode_free_list) {
+ journal->j_cnode_free_list->prev = cn ;
+ }
+ cn->prev = NULL ; /* not needed with the memset, but I might kill the memset, and forget to do this */
+ journal->j_cnode_free_list = cn ;
+}
+
+static void clear_prepared_bits(struct buffer_head *bh) {
+ clear_buffer_journal_prepared (bh);
+ clear_buffer_journal_restore_dirty (bh);
+}
+
+/* utility function to force a BUG if it is called without the big
+** kernel lock held. caller is the string printed just before calling BUG()
+*/
+void reiserfs_check_lock_depth(struct super_block *sb, char *caller) {
+#ifdef CONFIG_SMP
+ if (current->lock_depth < 0) {
+ reiserfs_panic (sb, "%s called without kernel lock held", caller) ;
+ }
+#else
+ ;
+#endif
+}
+
+/* return a cnode with same dev, block number and size in table, or null if not found */
+static inline struct reiserfs_journal_cnode *
+get_journal_hash_dev(struct super_block *sb,
+ struct reiserfs_journal_cnode **table,
+ long bl)
+{
+ struct reiserfs_journal_cnode *cn ;
+ cn = journal_hash(table, sb, bl) ;
+ while(cn) {
+ if (cn->blocknr == bl && cn->sb == sb)
+ return cn ;
+ cn = cn->hnext ;
+ }
+ return (struct reiserfs_journal_cnode *)0 ;
+}
+
+/*
+** this actually means 'can this block be reallocated yet?'. If you set search_all, a block can only be allocated
+** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever
+** being overwritten by a replay after crashing.
+**
+** If you don't set search_all, a block can only be allocated if it is not in the current transaction. Since deleting
+** a block removes it from the current transaction, this case should never happen. If you don't set search_all, make
+** sure you never write the block without logging it.
+**
+** next_zero_bit is a suggestion about the next block to try for find_forward.
+** when bl is rejected because it is set in a journal list bitmap, we search
+** for the next zero bit in the bitmap that rejected bl. Then, we return that
+** through next_zero_bit for find_forward to try.
+**
+** Just because we return something in next_zero_bit does not mean we won't
+** reject it on the next call to reiserfs_in_journal
+**
+*/
+int reiserfs_in_journal(struct super_block *p_s_sb,
+ int bmap_nr, int bit_nr, int search_all,
+ b_blocknr_t *next_zero_bit) {
+ struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
+ struct reiserfs_journal_cnode *cn ;
+ struct reiserfs_list_bitmap *jb ;
+ int i ;
+ unsigned long bl;
+
+ *next_zero_bit = 0 ; /* always start this at zero. */
+
+ PROC_INFO_INC( p_s_sb, journal.in_journal );
+ /* If we aren't doing a search_all, this is a metablock, and it will be logged before use.
+ ** if we crash before the transaction that freed it commits, this transaction won't
+ ** have committed either, and the block will never be written
+ */
+ if (search_all) {
+ for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) {
+ PROC_INFO_INC( p_s_sb, journal.in_journal_bitmap );
+ jb = journal->j_list_bitmap + i ;
+ if (jb->journal_list && jb->bitmaps[bmap_nr] &&
+ test_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data)) {
+ *next_zero_bit = find_next_zero_bit((unsigned long *)
+ (jb->bitmaps[bmap_nr]->data),
+ p_s_sb->s_blocksize << 3, bit_nr+1) ;
+ return 1 ;
+ }
+ }
+ }
+
+ bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr;
+ /* is it in any old transactions? */
+ if (search_all && (cn = get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, bl))) {
+ return 1;
+ }
+
+ /* is it in the current transaction. This should never happen */
+ if ((cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, bl))) {
+ BUG();
+ return 1;
+ }
+
+ PROC_INFO_INC( p_s_sb, journal.in_journal_reusable );
+ /* safe for reuse */
+ return 0 ;
+}
+
+/* insert cn into table
+*/
+static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, struct reiserfs_journal_cnode *cn) {
+ struct reiserfs_journal_cnode *cn_orig ;
+
+ cn_orig = journal_hash(table, cn->sb, cn->blocknr) ;
+ cn->hnext = cn_orig ;
+ cn->hprev = NULL ;
+ if (cn_orig) {
+ cn_orig->hprev = cn ;
+ }
+ journal_hash(table, cn->sb, cn->blocknr) = cn ;
+}
+
+/* lock the current transaction */
+inline static void lock_journal(struct super_block *p_s_sb) {
+ PROC_INFO_INC( p_s_sb, journal.lock_journal );
+ down(&SB_JOURNAL(p_s_sb)->j_lock);
+}
+
+/* unlock the current transaction */
+inline static void unlock_journal(struct super_block *p_s_sb) {
+ up(&SB_JOURNAL(p_s_sb)->j_lock);
+}
+
+static inline void get_journal_list(struct reiserfs_journal_list *jl)
+{
+ jl->j_refcount++;
+}
+
+static inline void put_journal_list(struct super_block *s,
+ struct reiserfs_journal_list *jl)
+{
+ if (jl->j_refcount < 1) {
+ reiserfs_panic (s, "trans id %lu, refcount at %d", jl->j_trans_id,
+ jl->j_refcount);
+ }
+ if (--jl->j_refcount == 0)
+ reiserfs_kfree(jl, sizeof(struct reiserfs_journal_list), s);
+}
+
+/*
+** this used to be much more involved, and I'm keeping it just in case things get ugly again.
+** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a
+** transaction.
+*/
+static void cleanup_freed_for_journal_list(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) {
+
+ struct reiserfs_list_bitmap *jb = jl->j_list_bitmap ;
+ if (jb) {
+ cleanup_bitmap_list(p_s_sb, jb) ;
+ }
+ jl->j_list_bitmap->journal_list = NULL ;
+ jl->j_list_bitmap = NULL ;
+}
+
+static int journal_list_still_alive(struct super_block *s,
+ unsigned long trans_id)
+{
+ struct reiserfs_journal *journal = SB_JOURNAL (s);
+ struct list_head *entry = &journal->j_journal_list;
+ struct reiserfs_journal_list *jl;
+
+ if (!list_empty(entry)) {
+ jl = JOURNAL_LIST_ENTRY(entry->next);
+ if (jl->j_trans_id <= trans_id) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) {
+ char b[BDEVNAME_SIZE];
+
+ if (buffer_journaled(bh)) {
+ reiserfs_warning(NULL, "clm-2084: pinned buffer %lu:%s sent to disk",
+ bh->b_blocknr, bdevname(bh->b_bdev, b)) ;
+ }
+ if (uptodate)
+ set_buffer_uptodate(bh) ;
+ else
+ clear_buffer_uptodate(bh) ;
+ unlock_buffer(bh) ;
+ put_bh(bh) ;
+}
+
+static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate) {
+ if (uptodate)
+ set_buffer_uptodate(bh) ;
+ else
+ clear_buffer_uptodate(bh) ;
+ unlock_buffer(bh) ;
+ put_bh(bh) ;
+}
+
+static void submit_logged_buffer(struct buffer_head *bh) {
+ get_bh(bh) ;
+ bh->b_end_io = reiserfs_end_buffer_io_sync ;
+ clear_buffer_journal_new (bh);
+ clear_buffer_dirty(bh) ;
+ if (!test_clear_buffer_journal_test (bh))
+ BUG();
+ if (!buffer_uptodate(bh))
+ BUG();
+ submit_bh(WRITE, bh) ;
+}
+
+static void submit_ordered_buffer(struct buffer_head *bh) {
+ get_bh(bh) ;
+ bh->b_end_io = reiserfs_end_ordered_io;
+ clear_buffer_dirty(bh) ;
+ if (!buffer_uptodate(bh))
+ BUG();
+ submit_bh(WRITE, bh) ;
+}
+
+static int submit_barrier_buffer(struct buffer_head *bh) {
+ get_bh(bh) ;
+ bh->b_end_io = reiserfs_end_ordered_io;
+ clear_buffer_dirty(bh) ;
+ if (!buffer_uptodate(bh))
+ BUG();
+ return submit_bh(WRITE_BARRIER, bh) ;
+}
+
+static void check_barrier_completion(struct super_block *s,
+ struct buffer_head *bh) {
+ if (buffer_eopnotsupp(bh)) {
+ clear_buffer_eopnotsupp(bh);
+ disable_barrier(s);
+ set_buffer_uptodate(bh);
+ set_buffer_dirty(bh);
+ sync_dirty_buffer(bh);
+ }
+}
+
+#define CHUNK_SIZE 32
+struct buffer_chunk {
+ struct buffer_head *bh[CHUNK_SIZE];
+ int nr;
+};
+
+static void write_chunk(struct buffer_chunk *chunk) {
+ int i;
+ for (i = 0; i < chunk->nr ; i++) {
+ submit_logged_buffer(chunk->bh[i]) ;
+ }
+ chunk->nr = 0;
+}
+
+static void write_ordered_chunk(struct buffer_chunk *chunk) {
+ int i;
+ for (i = 0; i < chunk->nr ; i++) {
+ submit_ordered_buffer(chunk->bh[i]) ;
+ }
+ chunk->nr = 0;
+}
+
+static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
+ spinlock_t *lock,
+ void (fn)(struct buffer_chunk *))
+{
+ int ret = 0;
+ if (chunk->nr >= CHUNK_SIZE)
+ BUG();
+ chunk->bh[chunk->nr++] = bh;
+ if (chunk->nr >= CHUNK_SIZE) {
+ ret = 1;
+ if (lock)
+ spin_unlock(lock);
+ fn(chunk);
+ if (lock)
+ spin_lock(lock);
+ }
+ return ret;
+}
+
+
+static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0);
+static struct reiserfs_jh *alloc_jh(void) {
+ struct reiserfs_jh *jh;
+ while(1) {
+ jh = kmalloc(sizeof(*jh), GFP_NOFS);
+ if (jh) {
+ atomic_inc(&nr_reiserfs_jh);
+ return jh;
+ }
+ yield();
+ }
+}
+
+/*
+ * we want to free the jh when the buffer has been written
+ * and waited on
+ */
+void reiserfs_free_jh(struct buffer_head *bh) {
+ struct reiserfs_jh *jh;
+
+ jh = bh->b_private;
+ if (jh) {
+ bh->b_private = NULL;
+ jh->bh = NULL;
+ list_del_init(&jh->list);
+ kfree(jh);
+ if (atomic_read(&nr_reiserfs_jh) <= 0)
+ BUG();
+ atomic_dec(&nr_reiserfs_jh);
+ put_bh(bh);
+ }
+}
+
+static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh,
+ int tail)
+{
+ struct reiserfs_jh *jh;
+
+ if (bh->b_private) {
+ spin_lock(&j->j_dirty_buffers_lock);
+ if (!bh->b_private) {
+ spin_unlock(&j->j_dirty_buffers_lock);
+ goto no_jh;
+ }
+ jh = bh->b_private;
+ list_del_init(&jh->list);
+ } else {
+no_jh:
+ get_bh(bh);
+ jh = alloc_jh();
+ spin_lock(&j->j_dirty_buffers_lock);
+ /* buffer must be locked for __add_jh, should be able to have
+ * two adds at the same time
+ */
+ if (bh->b_private)
+ BUG();
+ jh->bh = bh;
+ bh->b_private = jh;
+ }
+ jh->jl = j->j_current_jl;
+ if (tail)
+ list_add_tail(&jh->list, &jh->jl->j_tail_bh_list);
+ else {
+ list_add_tail(&jh->list, &jh->jl->j_bh_list);
+ }
+ spin_unlock(&j->j_dirty_buffers_lock);
+ return 0;
+}
+
+int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh) {
+ return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1);
+}
+int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh) {
+ return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0);
+}
+
+#define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list)
+static int write_ordered_buffers(spinlock_t *lock,
+ struct reiserfs_journal *j,
+ struct reiserfs_journal_list *jl,
+ struct list_head *list)
+{
+ struct buffer_head *bh;
+ struct reiserfs_jh *jh;
+ int ret = j->j_errno;
+ struct buffer_chunk chunk;
+ struct list_head tmp;
+ INIT_LIST_HEAD(&tmp);
+
+ chunk.nr = 0;
+ spin_lock(lock);
+ while(!list_empty(list)) {
+ jh = JH_ENTRY(list->next);
+ bh = jh->bh;
+ get_bh(bh);
+ if (test_set_buffer_locked(bh)) {
+ if (!buffer_dirty(bh)) {
+ list_del_init(&jh->list);
+ list_add(&jh->list, &tmp);
+ goto loop_next;
+ }
+ spin_unlock(lock);
+ if (chunk.nr)
+ write_ordered_chunk(&chunk);
+ wait_on_buffer(bh);
+ cond_resched();
+ spin_lock(lock);
+ goto loop_next;
+ }
+ if (buffer_dirty(bh)) {
+ list_del_init(&jh->list);
+ list_add(&jh->list, &tmp);
+ add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
+ } else {
+ reiserfs_free_jh(bh);
+ unlock_buffer(bh);
+ }
+loop_next:
+ put_bh(bh);
+ cond_resched_lock(lock);
+ }
+ if (chunk.nr) {
+ spin_unlock(lock);
+ write_ordered_chunk(&chunk);
+ spin_lock(lock);
+ }
+ while(!list_empty(&tmp)) {
+ jh = JH_ENTRY(tmp.prev);
+ bh = jh->bh;
+ get_bh(bh);
+ reiserfs_free_jh(bh);
+
+ if (buffer_locked(bh)) {
+ spin_unlock(lock);
+ wait_on_buffer(bh);
+ spin_lock(lock);
+ }
+ if (!buffer_uptodate(bh)) {
+ ret = -EIO;
+ }
+ put_bh(bh);
+ cond_resched_lock(lock);
+ }
+ spin_unlock(lock);
+ return ret;
+}
+
+static int flush_older_commits(struct super_block *s, struct reiserfs_journal_list *jl) {
+ struct reiserfs_journal *journal = SB_JOURNAL (s);
+ struct reiserfs_journal_list *other_jl;
+ struct reiserfs_journal_list *first_jl;
+ struct list_head *entry;
+ unsigned long trans_id = jl->j_trans_id;
+ unsigned long other_trans_id;
+ unsigned long first_trans_id;
+
+find_first:
+ /*
+ * first we walk backwards to find the oldest uncommitted transation
+ */
+ first_jl = jl;
+ entry = jl->j_list.prev;
+ while(1) {
+ other_jl = JOURNAL_LIST_ENTRY(entry);
+ if (entry == &journal->j_journal_list ||
+ atomic_read(&other_jl->j_older_commits_done))
+ break;
+
+ first_jl = other_jl;
+ entry = other_jl->j_list.prev;
+ }
+
+ /* if we didn't find any older uncommitted transactions, return now */
+ if (first_jl == jl) {
+ return 0;
+ }
+
+ first_trans_id = first_jl->j_trans_id;
+
+ entry = &first_jl->j_list;
+ while(1) {
+ other_jl = JOURNAL_LIST_ENTRY(entry);
+ other_trans_id = other_jl->j_trans_id;
+
+ if (other_trans_id < trans_id) {
+ if (atomic_read(&other_jl->j_commit_left) != 0) {
+ flush_commit_list(s, other_jl, 0);
+
+ /* list we were called with is gone, return */
+ if (!journal_list_still_alive(s, trans_id))
+ return 1;
+
+ /* the one we just flushed is gone, this means all
+ * older lists are also gone, so first_jl is no longer
+ * valid either. Go back to the beginning.
+ */
+ if (!journal_list_still_alive(s, other_trans_id)) {
+ goto find_first;
+ }
+ }
+ entry = entry->next;
+ if (entry == &journal->j_journal_list)
+ return 0;
+ } else {
+ return 0;
+ }
+ }
+ return 0;
+}
+int reiserfs_async_progress_wait(struct super_block *s) {
+ DEFINE_WAIT(wait);
+ struct reiserfs_journal *j = SB_JOURNAL(s);
+ if (atomic_read(&j->j_async_throttle))
+ blk_congestion_wait(WRITE, HZ/10);
+ return 0;
+}
+
+/*
+** if this journal list still has commit blocks unflushed, send them to disk.
+**
+** log areas must be flushed in order (transaction 2 can't commit before transaction 1)
+** Before the commit block can by written, every other log block must be safely on disk
+**
+*/
+static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) {
+ int i;
+ int bn ;
+ struct buffer_head *tbh = NULL ;
+ unsigned long trans_id = jl->j_trans_id;
+ struct reiserfs_journal *journal = SB_JOURNAL (s);
+ int barrier = 0;
+ int retval = 0;
+
+ reiserfs_check_lock_depth(s, "flush_commit_list") ;
+
+ if (atomic_read(&jl->j_older_commits_done)) {
+ return 0 ;
+ }
+
+ /* before we can put our commit blocks on disk, we have to make sure everyone older than
+ ** us is on disk too
+ */
+ BUG_ON (jl->j_len <= 0);
+ BUG_ON (trans_id == journal->j_trans_id);
+
+ get_journal_list(jl);
+ if (flushall) {
+ if (flush_older_commits(s, jl) == 1) {
+ /* list disappeared during flush_older_commits. return */
+ goto put_jl;
+ }
+ }
+
+ /* make sure nobody is trying to flush this one at the same time */
+ down(&jl->j_commit_lock);
+ if (!journal_list_still_alive(s, trans_id)) {
+ up(&jl->j_commit_lock);
+ goto put_jl;
+ }
+ BUG_ON (jl->j_trans_id == 0);
+
+ /* this commit is done, exit */
+ if (atomic_read(&(jl->j_commit_left)) <= 0) {
+ if (flushall) {
+ atomic_set(&(jl->j_older_commits_done), 1) ;
+ }
+ up(&jl->j_commit_lock);
+ goto put_jl;
+ }
+
+ if (!list_empty(&jl->j_bh_list)) {
+ unlock_kernel();
+ write_ordered_buffers(&journal->j_dirty_buffers_lock,
+ journal, jl, &jl->j_bh_list);
+ lock_kernel();
+ }
+ BUG_ON (!list_empty(&jl->j_bh_list));
+ /*
+ * for the description block and all the log blocks, submit any buffers
+ * that haven't already reached the disk
+ */
+ atomic_inc(&journal->j_async_throttle);
+ for (i = 0 ; i < (jl->j_len + 1) ; i++) {
+ bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start+i) %
+ SB_ONDISK_JOURNAL_SIZE(s);
+ tbh = journal_find_get_block(s, bn) ;
+ if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */
+ ll_rw_block(WRITE, 1, &tbh) ;
+ put_bh(tbh) ;
+ }
+ atomic_dec(&journal->j_async_throttle);
+
+ /* wait on everything written so far before writing the commit
+ * if we are in barrier mode, send the commit down now
+ */
+ barrier = reiserfs_barrier_flush(s);
+ if (barrier) {
+ int ret;
+ lock_buffer(jl->j_commit_bh);
+ ret = submit_barrier_buffer(jl->j_commit_bh);
+ if (ret == -EOPNOTSUPP) {
+ set_buffer_uptodate(jl->j_commit_bh);
+ disable_barrier(s);
+ barrier = 0;
+ }
+ }
+ for (i = 0 ; i < (jl->j_len + 1) ; i++) {
+ bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
+ (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s) ;
+ tbh = journal_find_get_block(s, bn) ;
+ wait_on_buffer(tbh) ;
+ // since we're using ll_rw_blk above, it might have skipped over
+ // a locked buffer. Double check here
+ //
+ if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */
+ sync_dirty_buffer(tbh);
+ if (unlikely (!buffer_uptodate(tbh))) {
+#ifdef CONFIG_REISERFS_CHECK
+ reiserfs_warning(s, "journal-601, buffer write failed") ;
+#endif
+ retval = -EIO;
+ }
+ put_bh(tbh) ; /* once for journal_find_get_block */
+ put_bh(tbh) ; /* once due to original getblk in do_journal_end */
+ atomic_dec(&(jl->j_commit_left)) ;
+ }
+
+ BUG_ON (atomic_read(&(jl->j_commit_left)) != 1);
+
+ if (!barrier) {
+ if (buffer_dirty(jl->j_commit_bh))
+ BUG();
+ mark_buffer_dirty(jl->j_commit_bh) ;
+ sync_dirty_buffer(jl->j_commit_bh) ;
+ } else
+ wait_on_buffer(jl->j_commit_bh);
+
+ check_barrier_completion(s, jl->j_commit_bh);
+
+ /* If there was a write error in the journal - we can't commit this
+ * transaction - it will be invalid and, if successful, will just end
+ * up propogating the write error out to the filesystem. */
+ if (unlikely (!buffer_uptodate(jl->j_commit_bh))) {
+#ifdef CONFIG_REISERFS_CHECK
+ reiserfs_warning(s, "journal-615: buffer write failed") ;
+#endif
+ retval = -EIO;
+ }
+ bforget(jl->j_commit_bh) ;
+ if (journal->j_last_commit_id != 0 &&
+ (jl->j_trans_id - journal->j_last_commit_id) != 1) {
+ reiserfs_warning(s, "clm-2200: last commit %lu, current %lu",
+ journal->j_last_commit_id,
+ jl->j_trans_id);
+ }
+ journal->j_last_commit_id = jl->j_trans_id;
+
+ /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */
+ cleanup_freed_for_journal_list(s, jl) ;
+
+ retval = retval ? retval : journal->j_errno;
+
+ /* mark the metadata dirty */
+ if (!retval)
+ dirty_one_transaction(s, jl);
+ atomic_dec(&(jl->j_commit_left)) ;
+
+ if (flushall) {
+ atomic_set(&(jl->j_older_commits_done), 1) ;
+ }
+ up(&jl->j_commit_lock);
+put_jl:
+ put_journal_list(s, jl);
+
+ if (retval)
+ reiserfs_abort (s, retval, "Journal write error in %s", __FUNCTION__);
+ return retval;
+}
+
+/*
+** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or
+** returns NULL if it can't find anything
+*/
+static struct reiserfs_journal_list *find_newer_jl_for_cn(struct reiserfs_journal_cnode *cn) {
+ struct super_block *sb = cn->sb;
+ b_blocknr_t blocknr = cn->blocknr ;
+
+ cn = cn->hprev ;
+ while(cn) {
+ if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) {
+ return cn->jlist ;
+ }
+ cn = cn->hprev ;
+ }
+ return NULL ;
+}
+
+static void remove_journal_hash(struct super_block *, struct reiserfs_journal_cnode **,