Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/reiserfs/journal.c
1 files changed, 3876 insertions, 0 deletions
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
new file mode 100644
index 00000000000..c9ad3a7849f
--- /dev/null
+++ b/fs/reiserfs/journal.c
@@ -0,0 +1,3876 @@
+/*
+** Write ahead logging implementation copyright Chris Mason 2000
+**
+** The background commits make this code very interelated, and 
+** overly complex.  I need to rethink things a bit....The major players:
+**
+** journal_begin -- call with the number of blocks you expect to log.  
+**                  If the current transaction is too
+** 		    old, it will block until the current transaction is 
+** 		    finished, and then start a new one.
+**		    Usually, your transaction will get joined in with 
+**                  previous ones for speed.
+**
+** journal_join  -- same as journal_begin, but won't block on the current 
+**                  transaction regardless of age.  Don't ever call
+**                  this.  Ever.  There are only two places it should be 
+**                  called from, and they are both inside this file.
+**
+** journal_mark_dirty -- adds blocks into this transaction.  clears any flags 
+**                       that might make them get sent to disk
+**                       and then marks them BH_JDirty.  Puts the buffer head 
+**                       into the current transaction hash.  
+**
+** journal_end -- if the current transaction is batchable, it does nothing
+**                   otherwise, it could do an async/synchronous commit, or
+**                   a full flush of all log and real blocks in the 
+**                   transaction.
+**
+** flush_old_commits -- if the current transaction is too old, it is ended and 
+**                      commit blocks are sent to disk.  Forces commit blocks 
+**                      to disk for all backgrounded commits that have been 
+**                      around too long.
+**		     -- Note, if you call this as an immediate flush from 
+**		        from within kupdate, it will ignore the immediate flag
+*/
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/time.h>
+#include <asm/semaphore.h>
+
+#include <linux/vmalloc.h>
+#include <linux/reiserfs_fs.h>
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/buffer_head.h>
+#include <linux/workqueue.h>
+#include <linux/writeback.h>
+#include <linux/blkdev.h>
+
+
+/* gets a struct reiserfs_journal_list * from a list head */
+#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
+                               j_list))
+#define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
+                               j_working_list))
+
+/* the number of mounted filesystems.  This is used to decide when to
+** start and kill the commit workqueue
+*/
+static int reiserfs_mounted_fs_count;
+
+static struct workqueue_struct *commit_wq;
+
+#define JOURNAL_TRANS_HALF 1018   /* must be correct to keep the desc and commit
+				     structs at 4k */
+#define BUFNR 64 /*read ahead */
+
+/* cnode stat bits.  Move these into reiserfs_fs.h */
+
+#define BLOCK_FREED 2		/* this block was freed, and can't be written.  */
+#define BLOCK_FREED_HOLDER 3    /* this block was freed during this transaction, and can't be written */
+
+#define BLOCK_NEEDS_FLUSH 4	/* used in flush_journal_list */
+#define BLOCK_DIRTIED 5
+
+
+/* journal list state bits */
+#define LIST_TOUCHED 1
+#define LIST_DIRTY   2
+#define LIST_COMMIT_PENDING  4		/* someone will commit this list */
+
+/* flags for do_journal_end */
+#define FLUSH_ALL   1		/* flush commit and real blocks */
+#define COMMIT_NOW  2		/* end and commit this transaction */
+#define WAIT        4		/* wait for the log blocks to hit the disk*/
+
+static int do_journal_end(struct reiserfs_transaction_handle *,struct super_block *,unsigned long nblocks,int flags) ;
+static int flush_journal_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) ;
+static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall)  ;
+static int can_dirty(struct reiserfs_journal_cnode *cn) ;
+static int journal_join(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks);
+static int release_journal_dev( struct super_block *super,
+				struct reiserfs_journal *journal );
+static int dirty_one_transaction(struct super_block *s,
+                                 struct reiserfs_journal_list *jl);
+static void flush_async_commits(void *p);
+static void queue_log_writer(struct super_block *s);
+
+/* values for join in do_journal_begin_r */
+enum {
+    JBEGIN_REG = 0, /* regular journal begin */
+    JBEGIN_JOIN = 1, /* join the running transaction if at all possible */
+    JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */
+};
+
+static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
+                             struct super_block * p_s_sb,
+			     unsigned long nblocks,int join);
+
+static void init_journal_hash(struct super_block *p_s_sb) {
+  struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
+  memset(journal->j_hash_table, 0, JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)) ;
+}
+
+/*
+** clears BH_Dirty and sticks the buffer on the clean list.  Called because I can't allow refile_buffer to
+** make schedule happen after I've freed a block.  Look at remove_from_transaction and journal_mark_freed for
+** more details.
+*/
+static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) {
+  if (bh) {
+    clear_buffer_dirty(bh);
+    clear_buffer_journal_test(bh);
+  }
+  return 0 ;
+}
+
+static void disable_barrier(struct super_block *s)
+{
+    REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH);
+    printk("reiserfs: disabling flush barriers on %s\n", reiserfs_bdevname(s));
+}
+
+static struct reiserfs_bitmap_node *
+allocate_bitmap_node(struct super_block *p_s_sb) {
+  struct reiserfs_bitmap_node *bn ;
+  static int id;
+
+  bn = reiserfs_kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS, p_s_sb) ;
+  if (!bn) {
+    return NULL ;
+  }
+  bn->data = reiserfs_kmalloc(p_s_sb->s_blocksize, GFP_NOFS, p_s_sb) ;
+  if (!bn->data) {
+    reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ;
+    return NULL ;
+  }
+  bn->id = id++ ;
+  memset(bn->data, 0, p_s_sb->s_blocksize) ;
+  INIT_LIST_HEAD(&bn->list) ;
+  return bn ;
+}
+
+static struct reiserfs_bitmap_node *
+get_bitmap_node(struct super_block *p_s_sb) {
+  struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
+  struct reiserfs_bitmap_node *bn = NULL;
+  struct list_head *entry = journal->j_bitmap_nodes.next ;
+
+  journal->j_used_bitmap_nodes++ ;
+repeat:
+
+  if(entry != &journal->j_bitmap_nodes) {
+    bn = list_entry(entry, struct reiserfs_bitmap_node, list) ;
+    list_del(entry) ;
+    memset(bn->data, 0, p_s_sb->s_blocksize) ;
+    journal->j_free_bitmap_nodes-- ;
+    return bn ;
+  }
+  bn = allocate_bitmap_node(p_s_sb) ;
+  if (!bn) {
+    yield();
+    goto repeat ;
+  }
+  return bn ;
+}
+static inline void free_bitmap_node(struct super_block *p_s_sb,
+                                    struct reiserfs_bitmap_node *bn) {
+  struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
+  journal->j_used_bitmap_nodes-- ;
+  if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
+    reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb) ;
+    reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ;
+  } else {
+    list_add(&bn->list, &journal->j_bitmap_nodes) ;
+    journal->j_free_bitmap_nodes++ ;
+  }
+}
+
+static void allocate_bitmap_nodes(struct super_block *p_s_sb) {
+  int i ;
+  struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
+  struct reiserfs_bitmap_node *bn = NULL ;
+  for (i = 0 ; i < REISERFS_MIN_BITMAP_NODES ; i++) {
+    bn = allocate_bitmap_node(p_s_sb) ;
+    if (bn) {
+      list_add(&bn->list, &journal->j_bitmap_nodes) ;
+      journal->j_free_bitmap_nodes++ ;
+    } else {
+      break ; // this is ok, we'll try again when more are needed 
+    }
+  }
+}
+
+static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block,
+                                  struct reiserfs_list_bitmap *jb) {
+  int bmap_nr = block / (p_s_sb->s_blocksize << 3) ;
+  int bit_nr = block % (p_s_sb->s_blocksize << 3) ;
+
+  if (!jb->bitmaps[bmap_nr]) {
+    jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb) ;
+  }
+  set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data) ;
+  return 0 ;
+}
+
+static void cleanup_bitmap_list(struct super_block *p_s_sb,
+                                struct reiserfs_list_bitmap *jb) {
+  int i;
+  if (jb->bitmaps == NULL)
+    return;
+
+  for (i = 0 ; i < SB_BMAP_NR(p_s_sb) ; i++) {
+    if (jb->bitmaps[i]) {
+      free_bitmap_node(p_s_sb, jb->bitmaps[i]) ;
+      jb->bitmaps[i] = NULL ;
+    }
+  }
+}
+
+/*
+** only call this on FS unmount.
+*/
+static int free_list_bitmaps(struct super_block *p_s_sb,
+                             struct reiserfs_list_bitmap *jb_array) {
+  int i ;
+  struct reiserfs_list_bitmap *jb ;
+  for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) {
+    jb = jb_array + i ;
+    jb->journal_list = NULL ;
+    cleanup_bitmap_list(p_s_sb, jb) ;
+    vfree(jb->bitmaps) ;
+    jb->bitmaps = NULL ;
+  }
+  return 0;
+}
+
+static int free_bitmap_nodes(struct super_block *p_s_sb) {
+  struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
+  struct list_head *next = journal->j_bitmap_nodes.next ;
+  struct reiserfs_bitmap_node *bn ;
+
+  while(next != &journal->j_bitmap_nodes) {
+    bn = list_entry(next, struct reiserfs_bitmap_node, list) ;
+    list_del(next) ;
+    reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb) ;
+    reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ;
+    next = journal->j_bitmap_nodes.next ;
+    journal->j_free_bitmap_nodes-- ;
+  }
+
+  return 0 ;
+}
+
+/*
+** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. 
+** jb_array is the array to be filled in.
+*/
+int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb,
+                                   struct reiserfs_list_bitmap *jb_array,
+				   int bmap_nr) {
+  int i ;
+  int failed = 0 ;
+  struct reiserfs_list_bitmap *jb ;
+  int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *) ;
+
+  for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) {
+    jb = jb_array + i ;
+    jb->journal_list = NULL ;
+    jb->bitmaps = vmalloc( mem ) ;
+    if (!jb->bitmaps) {
+      reiserfs_warning(p_s_sb, "clm-2000, unable to allocate bitmaps for journal lists") ;
+      failed = 1;   
+      break ;
+    }
+    memset(jb->bitmaps, 0, mem) ;
+  }
+  if (failed) {
+    free_list_bitmaps(p_s_sb, jb_array) ;
+    return -1 ;
+  }
+  return 0 ;
+}
+
+/*
+** find an available list bitmap.  If you can't find one, flush a commit list 
+** and try again
+*/
+static struct reiserfs_list_bitmap *
+get_list_bitmap(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) {
+  int i,j ; 
+  struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
+  struct reiserfs_list_bitmap *jb = NULL ;
+
+  for (j = 0 ; j < (JOURNAL_NUM_BITMAPS * 3) ; j++) {
+    i = journal->j_list_bitmap_index ;
+    journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS ;
+    jb = journal->j_list_bitmap + i ;
+    if (journal->j_list_bitmap[i].journal_list) {
+      flush_commit_list(p_s_sb, journal->j_list_bitmap[i].journal_list, 1) ;
+      if (!journal->j_list_bitmap[i].journal_list) {
+	break ;
+      }
+    } else {
+      break ;
+    }
+  }
+  if (jb->journal_list) { /* double check to make sure if flushed correctly */
+    return NULL ;
+  }
+  jb->journal_list = jl ;
+  return jb ;
+}
+
+/* 
+** allocates a new chunk of X nodes, and links them all together as a list.
+** Uses the cnode->next and cnode->prev pointers
+** returns NULL on failure
+*/
+static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) {
+  struct reiserfs_journal_cnode *head ;
+  int i ;
+  if (num_cnodes <= 0) {
+    return NULL ;
+  }
+  head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)) ;
+  if (!head) {
+    return NULL ;
+  }
+  memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode)) ;
+  head[0].prev = NULL ;
+  head[0].next = head + 1 ;
+  for (i = 1 ; i < num_cnodes; i++) {
+    head[i].prev = head + (i - 1) ;
+    head[i].next = head + (i + 1) ; /* if last one, overwrite it after the if */
+  }
+  head[num_cnodes -1].next = NULL ;
+  return head ;
+}
+
+/*
+** pulls a cnode off the free list, or returns NULL on failure 
+*/
+static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb) {
+  struct reiserfs_journal_cnode *cn ;
+  struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
+
+  reiserfs_check_lock_depth(p_s_sb, "get_cnode") ;
+
+  if (journal->j_cnode_free <= 0) {
+    return NULL ;
+  }
+  journal->j_cnode_used++ ;
+  journal->j_cnode_free-- ;
+  cn = journal->j_cnode_free_list ;
+  if (!cn) {
+    return cn ;
+  }
+  if (cn->next) {
+    cn->next->prev = NULL ;
+  }
+  journal->j_cnode_free_list = cn->next ;
+  memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ;
+  return cn ;
+}
+
+/*
+** returns a cnode to the free list 
+*/
+static void free_cnode(struct super_block *p_s_sb, struct reiserfs_journal_cnode *cn) {
+  struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
+
+  reiserfs_check_lock_depth(p_s_sb, "free_cnode") ;
+
+  journal->j_cnode_used-- ;
+  journal->j_cnode_free++ ;
+  /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */
+  cn->next = journal->j_cnode_free_list ;
+  if (journal->j_cnode_free_list) {
+    journal->j_cnode_free_list->prev = cn ;
+  }
+  cn->prev = NULL ; /* not needed with the memset, but I might kill the memset, and forget to do this */
+  journal->j_cnode_free_list = cn ;
+}
+
+static void clear_prepared_bits(struct buffer_head *bh) {
+  clear_buffer_journal_prepared (bh);
+  clear_buffer_journal_restore_dirty (bh);
+}
+
+/* utility function to force a BUG if it is called without the big
+** kernel lock held.  caller is the string printed just before calling BUG()
+*/
+void reiserfs_check_lock_depth(struct super_block *sb, char *caller) {
+#ifdef CONFIG_SMP
+  if (current->lock_depth < 0) {
+    reiserfs_panic (sb, "%s called without kernel lock held", caller) ;
+  }
+#else
+  ;
+#endif
+}
+
+/* return a cnode with same dev, block number and size in table, or null if not found */
+static inline struct reiserfs_journal_cnode *
+get_journal_hash_dev(struct super_block *sb,
+		     struct reiserfs_journal_cnode **table,
+		     long bl)
+{
+  struct reiserfs_journal_cnode *cn ;
+  cn = journal_hash(table, sb, bl) ;
+  while(cn) {
+    if (cn->blocknr == bl && cn->sb == sb)
+      return cn ;
+    cn = cn->hnext ;
+  }
+  return (struct reiserfs_journal_cnode *)0 ;
+}
+
+/*
+** this actually means 'can this block be reallocated yet?'.  If you set search_all, a block can only be allocated
+** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever
+** being overwritten by a replay after crashing.
+**
+** If you don't set search_all, a block can only be allocated if it is not in the current transaction.  Since deleting
+** a block removes it from the current transaction, this case should never happen.  If you don't set search_all, make
+** sure you never write the block without logging it.
+**
+** next_zero_bit is a suggestion about the next block to try for find_forward.
+** when bl is rejected because it is set in a journal list bitmap, we search
+** for the next zero bit in the bitmap that rejected bl.  Then, we return that
+** through next_zero_bit for find_forward to try.
+**
+** Just because we return something in next_zero_bit does not mean we won't
+** reject it on the next call to reiserfs_in_journal
+**
+*/
+int reiserfs_in_journal(struct super_block *p_s_sb,
+                        int bmap_nr, int bit_nr, int search_all, 
+			b_blocknr_t *next_zero_bit) {
+  struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
+  struct reiserfs_journal_cnode *cn ;
+  struct reiserfs_list_bitmap *jb ;
+  int i ;
+  unsigned long bl;
+
+  *next_zero_bit = 0 ; /* always start this at zero. */
+
+  PROC_INFO_INC( p_s_sb, journal.in_journal );
+  /* If we aren't doing a search_all, this is a metablock, and it will be logged before use.
+  ** if we crash before the transaction that freed it commits,  this transaction won't
+  ** have committed either, and the block will never be written
+  */
+  if (search_all) {
+    for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) {
+      PROC_INFO_INC( p_s_sb, journal.in_journal_bitmap );
+      jb = journal->j_list_bitmap + i ;
+      if (jb->journal_list && jb->bitmaps[bmap_nr] &&
+          test_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data)) {
+	*next_zero_bit = find_next_zero_bit((unsigned long *)
+	                             (jb->bitmaps[bmap_nr]->data),
+	                             p_s_sb->s_blocksize << 3, bit_nr+1) ; 
+	return 1 ;
+      }
+    }
+  }
+
+  bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr;
+  /* is it in any old transactions? */
+  if (search_all && (cn = get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, bl))) {
+    return 1; 
+  }
+
+  /* is it in the current transaction.  This should never happen */
+  if ((cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, bl))) {
+    BUG();
+    return 1; 
+  }
+
+  PROC_INFO_INC( p_s_sb, journal.in_journal_reusable );
+  /* safe for reuse */
+  return 0 ;
+}
+
+/* insert cn into table
+*/
+static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, struct reiserfs_journal_cnode *cn) {
+  struct reiserfs_journal_cnode *cn_orig ;
+
+  cn_orig = journal_hash(table, cn->sb, cn->blocknr) ;
+  cn->hnext = cn_orig ;
+  cn->hprev = NULL ;
+  if (cn_orig) {
+    cn_orig->hprev = cn ;
+  }
+  journal_hash(table, cn->sb, cn->blocknr) =  cn ;
+}
+
+/* lock the current transaction */
+inline static void lock_journal(struct super_block *p_s_sb) {
+    PROC_INFO_INC( p_s_sb, journal.lock_journal );
+    down(&SB_JOURNAL(p_s_sb)->j_lock);
+}
+
+/* unlock the current transaction */
+inline static void unlock_journal(struct super_block *p_s_sb) {
+    up(&SB_JOURNAL(p_s_sb)->j_lock);
+}
+
+static inline void get_journal_list(struct reiserfs_journal_list *jl)
+{
+    jl->j_refcount++;
+}
+
+static inline void put_journal_list(struct super_block *s,
+                                   struct reiserfs_journal_list *jl)
+{
+    if (jl->j_refcount < 1) {
+        reiserfs_panic (s, "trans id %lu, refcount at %d", jl->j_trans_id,
+	                                         jl->j_refcount);
+    }
+    if (--jl->j_refcount == 0)
+        reiserfs_kfree(jl, sizeof(struct reiserfs_journal_list), s);
+}
+
+/*
+** this used to be much more involved, and I'm keeping it just in case things get ugly again.
+** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a
+** transaction.
+*/
+static void cleanup_freed_for_journal_list(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) {
+
+  struct reiserfs_list_bitmap *jb = jl->j_list_bitmap ;
+  if (jb) {
+    cleanup_bitmap_list(p_s_sb, jb) ;
+  }
+  jl->j_list_bitmap->journal_list = NULL ;
+  jl->j_list_bitmap = NULL ;
+}
+
+static int journal_list_still_alive(struct super_block *s,
+                                    unsigned long trans_id)
+{
+    struct reiserfs_journal *journal = SB_JOURNAL (s);
+    struct list_head *entry = &journal->j_journal_list;
+    struct reiserfs_journal_list *jl;
+
+    if (!list_empty(entry)) {
+        jl = JOURNAL_LIST_ENTRY(entry->next);
+	if (jl->j_trans_id <= trans_id) {
+	    return 1;
+	}
+    }
+    return 0;
+}
+
+static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) {
+    char b[BDEVNAME_SIZE];
+
+    if (buffer_journaled(bh)) {
+        reiserfs_warning(NULL, "clm-2084: pinned buffer %lu:%s sent to disk",
+	                 bh->b_blocknr, bdevname(bh->b_bdev, b)) ;
+    }
+    if (uptodate)
+    	set_buffer_uptodate(bh) ;
+    else
+    	clear_buffer_uptodate(bh) ;
+    unlock_buffer(bh) ;
+    put_bh(bh) ;
+}
+
+static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate) {
+    if (uptodate)
+    	set_buffer_uptodate(bh) ;
+    else
+    	clear_buffer_uptodate(bh) ;
+    unlock_buffer(bh) ;
+    put_bh(bh) ;
+}
+
+static void submit_logged_buffer(struct buffer_head *bh) {
+    get_bh(bh) ;
+    bh->b_end_io = reiserfs_end_buffer_io_sync ;
+    clear_buffer_journal_new (bh);
+    clear_buffer_dirty(bh) ;
+    if (!test_clear_buffer_journal_test (bh))
+        BUG();
+    if (!buffer_uptodate(bh))
+        BUG();
+    submit_bh(WRITE, bh) ;
+}
+
+static void submit_ordered_buffer(struct buffer_head *bh) {
+    get_bh(bh) ;
+    bh->b_end_io = reiserfs_end_ordered_io;
+    clear_buffer_dirty(bh) ;
+    if (!buffer_uptodate(bh))
+        BUG();
+    submit_bh(WRITE, bh) ;
+}
+
+static int submit_barrier_buffer(struct buffer_head *bh) {
+    get_bh(bh) ;
+    bh->b_end_io = reiserfs_end_ordered_io;
+    clear_buffer_dirty(bh) ;
+    if (!buffer_uptodate(bh))
+        BUG();
+    return submit_bh(WRITE_BARRIER, bh) ;
+}
+
+static void check_barrier_completion(struct super_block *s,
+                                     struct buffer_head *bh) {
+    if (buffer_eopnotsupp(bh)) {
+	clear_buffer_eopnotsupp(bh);
+	disable_barrier(s);
+	set_buffer_uptodate(bh);
+	set_buffer_dirty(bh);
+	sync_dirty_buffer(bh);
+    }
+}
+
+#define CHUNK_SIZE 32
+struct buffer_chunk {
+    struct buffer_head *bh[CHUNK_SIZE];
+    int nr;
+};
+
+static void write_chunk(struct buffer_chunk *chunk) {
+    int i;
+    for (i = 0; i < chunk->nr ; i++) {
+	submit_logged_buffer(chunk->bh[i]) ;
+    }
+    chunk->nr = 0;
+}
+
+static void write_ordered_chunk(struct buffer_chunk *chunk) {
+    int i;
+    for (i = 0; i < chunk->nr ; i++) {
+	submit_ordered_buffer(chunk->bh[i]) ;
+    }
+    chunk->nr = 0;
+}
+
+static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
+			 spinlock_t *lock,
+			 void (fn)(struct buffer_chunk *))
+{
+    int ret = 0;
+    if (chunk->nr >= CHUNK_SIZE)
+        BUG();
+    chunk->bh[chunk->nr++] = bh;
+    if (chunk->nr >= CHUNK_SIZE) {
+	ret = 1;
+        if (lock)
+	    spin_unlock(lock);
+        fn(chunk);
+        if (lock)
+	    spin_lock(lock);
+    }
+    return ret;
+}
+
+
+static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0);
+static struct reiserfs_jh *alloc_jh(void) {
+    struct reiserfs_jh *jh;
+    while(1) {
+	jh = kmalloc(sizeof(*jh), GFP_NOFS);
+	if (jh) {
+	    atomic_inc(&nr_reiserfs_jh);
+	    return jh;
+	}
+        yield();
+    }
+}
+
+/*
+ * we want to free the jh when the buffer has been written
+ * and waited on
+ */
+void reiserfs_free_jh(struct buffer_head *bh) {
+    struct reiserfs_jh *jh;
+
+    jh = bh->b_private;
+    if (jh) {
+	bh->b_private = NULL;
+	jh->bh = NULL;
+	list_del_init(&jh->list);
+	kfree(jh);
+	if (atomic_read(&nr_reiserfs_jh) <= 0)
+	    BUG();
+	atomic_dec(&nr_reiserfs_jh);
+	put_bh(bh);
+    }
+}
+
+static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh,
+                           int tail)
+{
+    struct reiserfs_jh *jh;
+
+    if (bh->b_private) {
+	spin_lock(&j->j_dirty_buffers_lock);
+	if (!bh->b_private) {
+	    spin_unlock(&j->j_dirty_buffers_lock);
+	    goto no_jh;
+	}
+        jh = bh->b_private;
+	list_del_init(&jh->list);
+    } else {
+no_jh:
+	get_bh(bh);
+	jh = alloc_jh();
+	spin_lock(&j->j_dirty_buffers_lock);
+	/* buffer must be locked for __add_jh, should be able to have
+	 * two adds at the same time
+	 */
+	if (bh->b_private)
+	    BUG();
+	jh->bh = bh;
+	bh->b_private = jh;
+    }
+    jh->jl = j->j_current_jl;
+    if (tail)
+	list_add_tail(&jh->list, &jh->jl->j_tail_bh_list);
+    else {
+	list_add_tail(&jh->list, &jh->jl->j_bh_list);
+    }
+    spin_unlock(&j->j_dirty_buffers_lock);
+    return 0;
+}
+
+int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh) {
+    return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1);
+}
+int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh) {
+    return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0);
+}
+
+#define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list)
+static int write_ordered_buffers(spinlock_t *lock,
+				 struct reiserfs_journal *j,
+                                 struct reiserfs_journal_list *jl,
+				 struct list_head *list)
+{
+    struct buffer_head *bh;
+    struct reiserfs_jh *jh;
+    int ret = j->j_errno;
+    struct buffer_chunk chunk;
+    struct list_head tmp;
+    INIT_LIST_HEAD(&tmp);
+
+    chunk.nr = 0;
+    spin_lock(lock);
+    while(!list_empty(list)) {
+        jh = JH_ENTRY(list->next);
+	bh = jh->bh;
+	get_bh(bh);
+	if (test_set_buffer_locked(bh)) {
+	    if (!buffer_dirty(bh)) {
+		list_del_init(&jh->list);
+		list_add(&jh->list, &tmp);
+		goto loop_next;
+	    }
+	    spin_unlock(lock);
+	    if (chunk.nr)
+		write_ordered_chunk(&chunk);
+	    wait_on_buffer(bh);
+	    cond_resched();
+	    spin_lock(lock);
+	    goto loop_next;
+        }
+	if (buffer_dirty(bh)) {
+	    list_del_init(&jh->list);
+	    list_add(&jh->list, &tmp);
+            add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
+	} else {
+	    reiserfs_free_jh(bh);
+	    unlock_buffer(bh);
+	}
+loop_next:
+	put_bh(bh);
+	cond_resched_lock(lock);
+    }
+    if (chunk.nr) {
+	spin_unlock(lock);
+        write_ordered_chunk(&chunk);
+	spin_lock(lock);
+    }
+    while(!list_empty(&tmp)) {
+        jh = JH_ENTRY(tmp.prev);
+	bh = jh->bh;
+	get_bh(bh);
+	reiserfs_free_jh(bh);
+
+	if (buffer_locked(bh)) {
+	    spin_unlock(lock);
+	    wait_on_buffer(bh);
+	    spin_lock(lock);
+	}
+	if (!buffer_uptodate(bh)) {
+	    ret = -EIO;
+        }
+	put_bh(bh);
+	cond_resched_lock(lock);
+    }
+    spin_unlock(lock);
+    return ret;
+}
+
+static int flush_older_commits(struct super_block *s, struct reiserfs_journal_list *jl) {
+    struct reiserfs_journal *journal = SB_JOURNAL (s);
+    struct reiserfs_journal_list *other_jl;
+    struct reiserfs_journal_list *first_jl;
+    struct list_head *entry;
+    unsigned long trans_id = jl->j_trans_id;
+    unsigned long other_trans_id;
+    unsigned long first_trans_id;
+
+find_first:
+    /*
+     * first we walk backwards to find the oldest uncommitted transation
+     */
+    first_jl = jl;
+    entry = jl->j_list.prev;
+    while(1) {
+	other_jl = JOURNAL_LIST_ENTRY(entry);
+	if (entry == &journal->j_journal_list ||
+	    atomic_read(&other_jl->j_older_commits_done))
+	    break;
+
+        first_jl = other_jl;
+	entry = other_jl->j_list.prev;
+    }
+
+    /* if we didn't find any older uncommitted transactions, return now */
+    if (first_jl == jl) {
+        return 0;
+    }
+
+    first_trans_id = first_jl->j_trans_id;
+
+    entry = &first_jl->j_list;
+    while(1) {
+	other_jl = JOURNAL_LIST_ENTRY(entry);
+	other_trans_id = other_jl->j_trans_id;
+
+	if (other_trans_id < trans_id) {
+	    if (atomic_read(&other_jl->j_commit_left) != 0) {
+		flush_commit_list(s, other_jl, 0);
+
+		/* list we were called with is gone, return */
+		if (!journal_list_still_alive(s, trans_id))
+		    return 1;
+
+		/* the one we just flushed is gone, this means all
+		 * older lists are also gone, so first_jl is no longer
+		 * valid either.  Go back to the beginning.
+		 */
+		if (!journal_list_still_alive(s, other_trans_id)) {
+		    goto find_first;
+		}
+	    }
+	    entry = entry->next;
+	    if (entry == &journal->j_journal_list)
+		return 0;
+	} else {
+	    return 0;
+	}
+    }
+    return 0;
+}
+int reiserfs_async_progress_wait(struct super_block *s) {
+    DEFINE_WAIT(wait);
+    struct reiserfs_journal *j = SB_JOURNAL(s);
+    if (atomic_read(&j->j_async_throttle))
+    	blk_congestion_wait(WRITE, HZ/10);
+    return 0;
+}
+
+/*
+** if this journal list still has commit blocks unflushed, send them to disk.
+**
+** log areas must be flushed in order (transaction 2 can't commit before transaction 1)
+** Before the commit block can by written, every other log block must be safely on disk
+**
+*/
+static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) {
+  int i;
+  int bn ;
+  struct buffer_head *tbh = NULL ;
+  unsigned long trans_id = jl->j_trans_id;
+  struct reiserfs_journal *journal = SB_JOURNAL (s);
+  int barrier = 0;
+  int retval = 0;
+
+  reiserfs_check_lock_depth(s, "flush_commit_list") ;
+
+  if (atomic_read(&jl->j_older_commits_done)) {
+    return 0 ;
+  }
+
+  /* before we can put our commit blocks on disk, we have to make sure everyone older than
+  ** us is on disk too
+  */
+  BUG_ON (jl->j_len <= 0);
+  BUG_ON (trans_id == journal->j_trans_id);
+
+  get_journal_list(jl);
+  if (flushall) {
+    if (flush_older_commits(s, jl) == 1) {
+      /* list disappeared during flush_older_commits.  return */
+      goto put_jl;
+    }
+  }
+
+  /* make sure nobody is trying to flush this one at the same time */
+  down(&jl->j_commit_lock);
+  if (!journal_list_still_alive(s, trans_id)) {
+    up(&jl->j_commit_lock);
+    goto put_jl;
+  }
+  BUG_ON (jl->j_trans_id == 0);
+
+  /* this commit is done, exit */
+  if (atomic_read(&(jl->j_commit_left)) <= 0) {
+    if (flushall) {
+      atomic_set(&(jl->j_older_commits_done), 1) ;
+    }
+    up(&jl->j_commit_lock);
+    goto put_jl;
+  }
+
+  if (!list_empty(&jl->j_bh_list)) {
+      unlock_kernel();
+      write_ordered_buffers(&journal->j_dirty_buffers_lock,
+                            journal, jl, &jl->j_bh_list);
+      lock_kernel();
+  }
+  BUG_ON (!list_empty(&jl->j_bh_list));
+  /*
+   * for the description block and all the log blocks, submit any buffers
+   * that haven't already reached the disk
+   */
+  atomic_inc(&journal->j_async_throttle);
+  for (i = 0 ; i < (jl->j_len + 1) ; i++) {
+    bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start+i) %
+         SB_ONDISK_JOURNAL_SIZE(s);
+    tbh = journal_find_get_block(s, bn) ;
+    if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */
+	ll_rw_block(WRITE, 1, &tbh) ;
+    put_bh(tbh) ;
+  }
+  atomic_dec(&journal->j_async_throttle);
+
+  /* wait on everything written so far before writing the commit
+   * if we are in barrier mode, send the commit down now
+   */
+  barrier = reiserfs_barrier_flush(s);
+  if (barrier) {
+      int ret;
+      lock_buffer(jl->j_commit_bh);
+      ret = submit_barrier_buffer(jl->j_commit_bh);
+      if (ret == -EOPNOTSUPP) {
+	  set_buffer_uptodate(jl->j_commit_bh);
+          disable_barrier(s);
+	  barrier = 0;
+      }
+  }
+  for (i = 0 ;  i < (jl->j_len + 1) ; i++) {
+    bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
+	 (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s) ;
+    tbh = journal_find_get_block(s, bn) ;
+    wait_on_buffer(tbh) ;
+    // since we're using ll_rw_blk above, it might have skipped over
+    // a locked buffer.  Double check here
+    //
+    if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */
+      sync_dirty_buffer(tbh);
+    if (unlikely (!buffer_uptodate(tbh))) {
+#ifdef CONFIG_REISERFS_CHECK
+      reiserfs_warning(s, "journal-601, buffer write failed") ;
+#endif
+      retval = -EIO;
+    }
+    put_bh(tbh) ; /* once for journal_find_get_block */
+    put_bh(tbh) ;    /* once due to original getblk in do_journal_end */
+    atomic_dec(&(jl->j_commit_left)) ;
+  }
+
+  BUG_ON (atomic_read(&(jl->j_commit_left)) != 1);
+
+  if (!barrier) {
+      if (buffer_dirty(jl->j_commit_bh))
+	BUG();
+      mark_buffer_dirty(jl->j_commit_bh) ;
+      sync_dirty_buffer(jl->j_commit_bh) ;
+  } else
+      wait_on_buffer(jl->j_commit_bh);
+
+  check_barrier_completion(s, jl->j_commit_bh);
+
+  /* If there was a write error in the journal - we can't commit this
+   * transaction - it will be invalid and, if successful, will just end
+   * up propogating the write error out to the filesystem. */
+  if (unlikely (!buffer_uptodate(jl->j_commit_bh))) {
+#ifdef CONFIG_REISERFS_CHECK
+    reiserfs_warning(s, "journal-615: buffer write failed") ;
+#endif
+    retval = -EIO;
+  }
+  bforget(jl->j_commit_bh) ;
+  if (journal->j_last_commit_id != 0 &&
+     (jl->j_trans_id - journal->j_last_commit_id) != 1) {
+      reiserfs_warning(s, "clm-2200: last commit %lu, current %lu",
+                       journal->j_last_commit_id,
+		       jl->j_trans_id);
+  }
+  journal->j_last_commit_id = jl->j_trans_id;
+
+  /* now, every commit block is on the disk.  It is safe to allow blocks freed during this transaction to be reallocated */
+  cleanup_freed_for_journal_list(s, jl) ;
+
+  retval = retval ? retval : journal->j_errno;
+
+  /* mark the metadata dirty */
+  if (!retval)
+    dirty_one_transaction(s, jl);
+  atomic_dec(&(jl->j_commit_left)) ;
+
+  if (flushall) {
+    atomic_set(&(jl->j_older_commits_done), 1) ;
+  }
+  up(&jl->j_commit_lock);
+put_jl:
+  put_journal_list(s, jl);
+
+  if (retval)
+    reiserfs_abort (s, retval, "Journal write error in %s", __FUNCTION__);
+  return retval;
+}
+
+/*
+** flush_journal_list frequently needs to find a newer transaction for a given block.  This does that, or 
+** returns NULL if it can't find anything 
+*/
+static struct reiserfs_journal_list *find_newer_jl_for_cn(struct reiserfs_journal_cnode *cn) {
+  struct super_block *sb = cn->sb;
+  b_blocknr_t blocknr = cn->blocknr ;
+
+  cn = cn->hprev ;
+  while(cn) {
+    if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) {
+      return cn->jlist ;
+    }
+    cn = cn->hprev ;
+  }
+  return NULL ;
+}
+
+static void remove_journal_hash(struct super_block *, struct reiserfs_journal_cnode **,
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/reiserfs/journal.c