diff options
Diffstat (limited to 'fs/jbd/revoke.c')
| -rw-r--r-- | fs/jbd/revoke.c | 347 |
1 files changed, 190 insertions, 157 deletions
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index a5614418346..8898bbd2b61 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -1,6 +1,6 @@ /* - * linux/fs/revoke.c - * + * linux/fs/jbd/revoke.c + * * Written by Stephen C. Tweedie <sct@redhat.com>, 2000 * * Copyright 2000 Red Hat corp --- All Rights Reserved @@ -15,10 +15,10 @@ * Revoke is the mechanism used to prevent old log records for deleted * metadata from being replayed on top of newer data using the same * blocks. The revoke mechanism is used in two separate places: - * + * * + Commit: during commit we write the entire list of the current * transaction's revoked blocks to the journal - * + * * + Recovery: during recovery we record the transaction ID of all * revoked blocks. If there are multiple revoke records in the log * for a single block, only the last one counts, and if there is a log @@ -29,7 +29,7 @@ * single transaction: * * Block is revoked and then journaled: - * The desired end result is the journaling of the new block, so we + * The desired end result is the journaling of the new block, so we * cancel the revoke before the transaction commits. * * Block is journaled and then revoked: @@ -41,12 +41,16 @@ * transaction must have happened after the block was journaled and so * the revoke must take precedence. * - * Block is revoked and then written as data: + * Block is revoked and then written as data: * The data write is allowed to succeed, but the revoke is _not_ * cancelled. We still need to prevent old log records from * overwriting the new data. We don't even need to clear the revoke * bit here. * + * We cache revoke status of a buffer in the current transaction in b_states + * bits. As the name says, revokevalid flag indicates that the cached revoke + * status of a buffer is valid and we can rely on the cached status. + * * Revoke information on buffers is a tri-state value: * * RevokeValid clear: no cached revoke status, need to look it up @@ -54,7 +58,26 @@ * buffer has not been revoked, and cancel_revoke * need do nothing. * RevokeValid set, Revoked set: - * buffer has been revoked. + * buffer has been revoked. + * + * Locking rules: + * We keep two hash tables of revoke records. One hashtable belongs to the + * running transaction (is pointed to by journal->j_revoke), the other one + * belongs to the committing transaction. Accesses to the second hash table + * happen only from the kjournald and no other thread touches this table. Also + * journal_switch_revoke_table() which switches which hashtable belongs to the + * running and which to the committing transaction is called only from + * kjournald. Therefore we need no locks when accessing the hashtable belonging + * to the committing transaction. + * + * All users operating on the hash table belonging to the running transaction + * have a handle to the transaction. Therefore they are safe from kjournald + * switching hash tables under them. For operations on the lists of entries in + * the hash table j_revoke_lock is used. + * + * Finally, also replay code uses the hash tables but at this moment no one else + * can touch them (filesystem isn't mounted yet) and hence no locking is + * needed. */ #ifndef __KERNEL__ @@ -66,22 +89,23 @@ #include <linux/errno.h> #include <linux/slab.h> #include <linux/list.h> -#include <linux/smp_lock.h> #include <linux/init.h> +#include <linux/bio.h> #endif +#include <linux/log2.h> -static kmem_cache_t *revoke_record_cache; -static kmem_cache_t *revoke_table_cache; +static struct kmem_cache *revoke_record_cache; +static struct kmem_cache *revoke_table_cache; /* Each revoke record represents one single revoked block. During journal replay, this involves recording the transaction ID of the last transaction to revoke this block. */ -struct jbd_revoke_record_s +struct jbd_revoke_record_s { struct list_head hash; tid_t sequence; /* Used for recovery only */ - unsigned long blocknr; + unsigned int blocknr; }; @@ -90,8 +114,8 @@ struct jbd_revoke_table_s { /* It is conceivable that we might want a larger hash table * for recovery. Must be a power of two. */ - int hash_size; - int hash_shift; + int hash_size; + int hash_shift; struct list_head *hash_table; }; @@ -99,14 +123,14 @@ struct jbd_revoke_table_s #ifdef __KERNEL__ static void write_one_revoke_record(journal_t *, transaction_t *, struct journal_head **, int *, - struct jbd_revoke_record_s *); -static void flush_descriptor(journal_t *, struct journal_head *, int); + struct jbd_revoke_record_s *, int); +static void flush_descriptor(journal_t *, struct journal_head *, int, int); #endif /* Utility functions to maintain the revoke table */ /* Borrowed from buffer.c: this is a tried and tested block hash function */ -static inline int hash(journal_t *journal, unsigned long block) +static inline int hash(journal_t *journal, unsigned int block) { struct jbd_revoke_table_s *table = journal->j_revoke; int hash_shift = table->hash_shift; @@ -116,7 +140,7 @@ static inline int hash(journal_t *journal, unsigned long block) (block << (hash_shift - 12))) & (table->hash_size - 1); } -static int insert_revoke_hash(journal_t *journal, unsigned long blocknr, +static int insert_revoke_hash(journal_t *journal, unsigned int blocknr, tid_t seq) { struct list_head *hash_list; @@ -138,7 +162,7 @@ repeat: oom: if (!journal_oom_retry) return -ENOMEM; - jbd_debug(1, "ENOMEM in %s, retrying\n", __FUNCTION__); + jbd_debug(1, "ENOMEM in %s, retrying\n", __func__); yield(); goto repeat; } @@ -146,7 +170,7 @@ oom: /* Find a revoke record in the journal's hash table. */ static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal, - unsigned long blocknr) + unsigned int blocknr) { struct list_head *hash_list; struct jbd_revoke_record_s *record; @@ -166,157 +190,140 @@ static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal, return NULL; } -int __init journal_init_revoke_caches(void) +void journal_destroy_revoke_caches(void) { - revoke_record_cache = kmem_cache_create("revoke_record", - sizeof(struct jbd_revoke_record_s), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); - if (revoke_record_cache == 0) - return -ENOMEM; - - revoke_table_cache = kmem_cache_create("revoke_table", - sizeof(struct jbd_revoke_table_s), - 0, 0, NULL, NULL); - if (revoke_table_cache == 0) { + if (revoke_record_cache) { kmem_cache_destroy(revoke_record_cache); revoke_record_cache = NULL; - return -ENOMEM; } - return 0; -} - -void journal_destroy_revoke_caches(void) -{ - kmem_cache_destroy(revoke_record_cache); - revoke_record_cache = NULL; - kmem_cache_destroy(revoke_table_cache); - revoke_table_cache = NULL; + if (revoke_table_cache) { + kmem_cache_destroy(revoke_table_cache); + revoke_table_cache = NULL; + } } -/* Initialise the revoke table for a given journal to a given size. */ - -int journal_init_revoke(journal_t *journal, int hash_size) +int __init journal_init_revoke_caches(void) { - int shift, tmp; + J_ASSERT(!revoke_record_cache); + J_ASSERT(!revoke_table_cache); - J_ASSERT (journal->j_revoke_table[0] == NULL); + revoke_record_cache = kmem_cache_create("revoke_record", + sizeof(struct jbd_revoke_record_s), + 0, + SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, + NULL); + if (!revoke_record_cache) + goto record_cache_failure; - shift = 0; - tmp = hash_size; - while((tmp >>= 1UL) != 0UL) - shift++; + revoke_table_cache = kmem_cache_create("revoke_table", + sizeof(struct jbd_revoke_table_s), + 0, SLAB_TEMPORARY, NULL); + if (!revoke_table_cache) + goto table_cache_failure; - journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); - if (!journal->j_revoke_table[0]) - return -ENOMEM; - journal->j_revoke = journal->j_revoke_table[0]; + return 0; - /* Check that the hash_size is a power of two */ - J_ASSERT ((hash_size & (hash_size-1)) == 0); +table_cache_failure: + journal_destroy_revoke_caches(); +record_cache_failure: + return -ENOMEM; +} - journal->j_revoke->hash_size = hash_size; +static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size) +{ + int i; + struct jbd_revoke_table_s *table; - journal->j_revoke->hash_shift = shift; + table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); + if (!table) + goto out; - journal->j_revoke->hash_table = + table->hash_size = hash_size; + table->hash_shift = ilog2(hash_size); + table->hash_table = kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); - if (!journal->j_revoke->hash_table) { - kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); - journal->j_revoke = NULL; - return -ENOMEM; + if (!table->hash_table) { + kmem_cache_free(revoke_table_cache, table); + table = NULL; + goto out; } - for (tmp = 0; tmp < hash_size; tmp++) - INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); + for (i = 0; i < hash_size; i++) + INIT_LIST_HEAD(&table->hash_table[i]); - journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); - if (!journal->j_revoke_table[1]) { - kfree(journal->j_revoke_table[0]->hash_table); - kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); - return -ENOMEM; - } +out: + return table; +} - journal->j_revoke = journal->j_revoke_table[1]; +static void journal_destroy_revoke_table(struct jbd_revoke_table_s *table) +{ + int i; + struct list_head *hash_list; - /* Check that the hash_size is a power of two */ - J_ASSERT ((hash_size & (hash_size-1)) == 0); + for (i = 0; i < table->hash_size; i++) { + hash_list = &table->hash_table[i]; + J_ASSERT(list_empty(hash_list)); + } - journal->j_revoke->hash_size = hash_size; + kfree(table->hash_table); + kmem_cache_free(revoke_table_cache, table); +} - journal->j_revoke->hash_shift = shift; +/* Initialise the revoke table for a given journal to a given size. */ +int journal_init_revoke(journal_t *journal, int hash_size) +{ + J_ASSERT(journal->j_revoke_table[0] == NULL); + J_ASSERT(is_power_of_2(hash_size)); - journal->j_revoke->hash_table = - kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); - if (!journal->j_revoke->hash_table) { - kfree(journal->j_revoke_table[0]->hash_table); - kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); - kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]); - journal->j_revoke = NULL; - return -ENOMEM; - } + journal->j_revoke_table[0] = journal_init_revoke_table(hash_size); + if (!journal->j_revoke_table[0]) + goto fail0; + + journal->j_revoke_table[1] = journal_init_revoke_table(hash_size); + if (!journal->j_revoke_table[1]) + goto fail1; - for (tmp = 0; tmp < hash_size; tmp++) - INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); + journal->j_revoke = journal->j_revoke_table[1]; spin_lock_init(&journal->j_revoke_lock); return 0; -} -/* Destoy a journal's revoke table. The table must already be empty! */ +fail1: + journal_destroy_revoke_table(journal->j_revoke_table[0]); +fail0: + return -ENOMEM; +} +/* Destroy a journal's revoke table. The table must already be empty! */ void journal_destroy_revoke(journal_t *journal) { - struct jbd_revoke_table_s *table; - struct list_head *hash_list; - int i; - - table = journal->j_revoke_table[0]; - if (!table) - return; - - for (i=0; i<table->hash_size; i++) { - hash_list = &table->hash_table[i]; - J_ASSERT (list_empty(hash_list)); - } - - kfree(table->hash_table); - kmem_cache_free(revoke_table_cache, table); - journal->j_revoke = NULL; - - table = journal->j_revoke_table[1]; - if (!table) - return; - - for (i=0; i<table->hash_size; i++) { - hash_list = &table->hash_table[i]; - J_ASSERT (list_empty(hash_list)); - } - - kfree(table->hash_table); - kmem_cache_free(revoke_table_cache, table); journal->j_revoke = NULL; + if (journal->j_revoke_table[0]) + journal_destroy_revoke_table(journal->j_revoke_table[0]); + if (journal->j_revoke_table[1]) + journal_destroy_revoke_table(journal->j_revoke_table[1]); } #ifdef __KERNEL__ -/* +/* * journal_revoke: revoke a given buffer_head from the journal. This * prevents the block from being replayed during recovery if we take a * crash after this current transaction commits. Any subsequent * metadata writes of the buffer in this transaction cancel the - * revoke. + * revoke. * * Note that this call may block --- it is up to the caller to make * sure that there are no further calls to journal_write_metadata * before the revoke is complete. In ext3, this implies calling the * revoke before clearing the block bitmap when we are deleting - * metadata. + * metadata. * * Revoke performs a journal_forget on any buffer_head passed in as a * parameter, but does _not_ forget the buffer_head if the bh was only - * found implicitly. + * found implicitly. * * bh_in may not be a journalled buffer - it may have come off * the hash tables without an attached journal_head. @@ -325,7 +332,7 @@ void journal_destroy_revoke(journal_t *journal) * by one. */ -int journal_revoke(handle_t *handle, unsigned long blocknr, +int journal_revoke(handle_t *handle, unsigned int blocknr, struct buffer_head *bh_in) { struct buffer_head *bh = NULL; @@ -394,7 +401,7 @@ int journal_revoke(handle_t *handle, unsigned long blocknr, } } - jbd_debug(2, "insert revoke for block %lu, bh_in=%p\n", blocknr, bh_in); + jbd_debug(2, "insert revoke for block %u, bh_in=%p\n", blocknr, bh_in); err = insert_revoke_hash(journal, blocknr, handle->h_transaction->t_tid); BUFFER_TRACE(bh_in, "exit"); @@ -415,8 +422,6 @@ int journal_revoke(handle_t *handle, unsigned long blocknr, * the second time we would still have a pending revoke to cancel. So, * do not trust the Revoked bit on buffers unless RevokeValid is also * set. - * - * The caller must have the journal locked. */ int journal_cancel_revoke(handle_t *handle, struct journal_head *jh) { @@ -474,6 +479,36 @@ int journal_cancel_revoke(handle_t *handle, struct journal_head *jh) return did_revoke; } +/* + * journal_clear_revoked_flags clears revoked flag of buffers in + * revoke table to reflect there is no revoked buffer in the next + * transaction which is going to be started. + */ +void journal_clear_buffer_revoked_flags(journal_t *journal) +{ + struct jbd_revoke_table_s *revoke = journal->j_revoke; + int i = 0; + + for (i = 0; i < revoke->hash_size; i++) { + struct list_head *hash_list; + struct list_head *list_entry; + hash_list = &revoke->hash_table[i]; + + list_for_each(list_entry, hash_list) { + struct jbd_revoke_record_s *record; + struct buffer_head *bh; + record = (struct jbd_revoke_record_s *)list_entry; + bh = __find_get_block(journal->j_fs_dev, + record->blocknr, + journal->j_blocksize); + if (bh) { + clear_buffer_revoked(bh); + __brelse(bh); + } + } + } +} + /* journal_switch_revoke table select j_revoke for next transaction * we do not want to suspend any processing until all revokes are * written -bzzz @@ -487,19 +522,16 @@ void journal_switch_revoke_table(journal_t *journal) else journal->j_revoke = journal->j_revoke_table[0]; - for (i = 0; i < journal->j_revoke->hash_size; i++) + for (i = 0; i < journal->j_revoke->hash_size; i++) INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]); } /* * Write revoke records to the journal for all entries in the current * revoke hash, deleting the entries as we go. - * - * Called with the journal lock held. */ - -void journal_write_revoke_records(journal_t *journal, - transaction_t *transaction) +void journal_write_revoke_records(journal_t *journal, + transaction_t *transaction, int write_op) { struct journal_head *descriptor; struct jbd_revoke_record_s *record; @@ -507,7 +539,7 @@ void journal_write_revoke_records(journal_t *journal, struct list_head *hash_list; int i, offset, count; - descriptor = NULL; + descriptor = NULL; offset = 0; count = 0; @@ -519,31 +551,32 @@ void journal_write_revoke_records(journal_t *journal, hash_list = &revoke->hash_table[i]; while (!list_empty(hash_list)) { - record = (struct jbd_revoke_record_s *) + record = (struct jbd_revoke_record_s *) hash_list->next; write_one_revoke_record(journal, transaction, - &descriptor, &offset, - record); + &descriptor, &offset, + record, write_op); count++; list_del(&record->hash); kmem_cache_free(revoke_record_cache, record); } } if (descriptor) - flush_descriptor(journal, descriptor, offset); + flush_descriptor(journal, descriptor, offset, write_op); jbd_debug(1, "Wrote %d revoke records\n", count); } -/* +/* * Write out one revoke record. We need to create a new descriptor - * block if the old one is full or if we have not already created one. + * block if the old one is full or if we have not already created one. */ -static void write_one_revoke_record(journal_t *journal, +static void write_one_revoke_record(journal_t *journal, transaction_t *transaction, - struct journal_head **descriptorp, + struct journal_head **descriptorp, int *offsetp, - struct jbd_revoke_record_s *record) + struct jbd_revoke_record_s *record, + int write_op) { struct journal_head *descriptor; int offset; @@ -562,7 +595,7 @@ static void write_one_revoke_record(journal_t *journal, /* Make sure we have a descriptor with space left for the record */ if (descriptor) { if (offset == journal->j_blocksize) { - flush_descriptor(journal, descriptor, offset); + flush_descriptor(journal, descriptor, offset, write_op); descriptor = NULL; } } @@ -584,22 +617,22 @@ static void write_one_revoke_record(journal_t *journal, *descriptorp = descriptor; } - * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = + * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = cpu_to_be32(record->blocknr); offset += 4; *offsetp = offset; } -/* +/* * Flush a revoke descriptor out to the journal. If we are aborting, * this is a noop; otherwise we are generating a buffer which needs to * be waited for during commit, so it has to go onto the appropriate * journal buffer list. */ -static void flush_descriptor(journal_t *journal, - struct journal_head *descriptor, - int offset) +static void flush_descriptor(journal_t *journal, + struct journal_head *descriptor, + int offset, int write_op) { journal_revoke_header_t *header; struct buffer_head *bh = jh2bh(descriptor); @@ -614,11 +647,11 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block(SWRITE, 1, &bh); + write_dirty_buffer(bh, write_op); } #endif -/* +/* * Revoke support for recovery. * * Recovery needs to be able to: @@ -629,7 +662,7 @@ static void flush_descriptor(journal_t *journal, * check whether a given block in a given transaction should be replayed * (ie. has not been revoked by a revoke record in that or a subsequent * transaction) - * + * * empty the revoke table after recovery. */ @@ -637,11 +670,11 @@ static void flush_descriptor(journal_t *journal, * First, setting revoke records. We create a new revoke record for * every block ever revoked in the log as we scan it for recovery, and * we update the existing records if we find multiple revokes for a - * single block. + * single block. */ -int journal_set_revoke(journal_t *journal, - unsigned long blocknr, +int journal_set_revoke(journal_t *journal, + unsigned int blocknr, tid_t sequence) { struct jbd_revoke_record_s *record; @@ -653,19 +686,19 @@ int journal_set_revoke(journal_t *journal, if (tid_gt(sequence, record->sequence)) record->sequence = sequence; return 0; - } + } return insert_revoke_hash(journal, blocknr, sequence); } -/* +/* * Test revoke records. For a given block referenced in the log, has * that block been revoked? A revoke record with a given transaction * sequence number revokes all blocks in that transaction and earlier * ones, but later transactions still need replayed. */ -int journal_test_revoke(journal_t *journal, - unsigned long blocknr, +int journal_test_revoke(journal_t *journal, + unsigned int blocknr, tid_t sequence) { struct jbd_revoke_record_s *record; |
