diff options
Diffstat (limited to 'fs/jbd2/revoke.c')
| -rw-r--r-- | fs/jbd2/revoke.c | 312 |
1 files changed, 183 insertions, 129 deletions
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 2e1453a5e99..198c9c10276 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -47,6 +47,10 @@ * overwriting the new data. We don't even need to clear the revoke * bit here. * + * We cache revoke status of a buffer in the current transaction in b_states + * bits. As the name says, revokevalid flag indicates that the cached revoke + * status of a buffer is valid and we can rely on the cached status. + * * Revoke information on buffers is a tri-state value: * * RevokeValid clear: no cached revoke status, need to look it up @@ -55,6 +59,25 @@ * need do nothing. * RevokeValid set, Revoked set: * buffer has been revoked. + * + * Locking rules: + * We keep two hash tables of revoke records. One hashtable belongs to the + * running transaction (is pointed to by journal->j_revoke), the other one + * belongs to the committing transaction. Accesses to the second hash table + * happen only from the kjournald and no other thread touches this table. Also + * journal_switch_revoke_table() which switches which hashtable belongs to the + * running and which to the committing transaction is called only from + * kjournald. Therefore we need no locks when accessing the hashtable belonging + * to the committing transaction. + * + * All users operating on the hash table belonging to the running transaction + * have a handle to the transaction. Therefore they are safe from kjournald + * switching hash tables under them. For operations on the lists of entries in + * the hash table j_revoke_lock is used. + * + * Finally, also replay code uses the hash tables but at this moment no one else + * can touch them (filesystem isn't mounted yet) and hence no locking is + * needed. */ #ifndef __KERNEL__ @@ -67,6 +90,7 @@ #include <linux/slab.h> #include <linux/list.h> #include <linux/init.h> +#include <linux/bio.h> #endif #include <linux/log2.h> @@ -98,9 +122,10 @@ struct jbd2_revoke_table_s #ifdef __KERNEL__ static void write_one_revoke_record(journal_t *, transaction_t *, - struct journal_head **, int *, - struct jbd2_revoke_record_s *); -static void flush_descriptor(journal_t *, struct journal_head *, int); + struct list_head *, + struct buffer_head **, int *, + struct jbd2_revoke_record_s *, int); +static void flush_descriptor(journal_t *, struct buffer_head *, int, int); #endif /* Utility functions to maintain the revoke table */ @@ -139,7 +164,7 @@ repeat: oom: if (!journal_oom_retry) return -ENOMEM; - jbd_debug(1, "ENOMEM in %s, retrying\n", __FUNCTION__); + jbd_debug(1, "ENOMEM in %s, retrying\n", __func__); yield(); goto repeat; } @@ -167,138 +192,117 @@ static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal, return NULL; } -int __init jbd2_journal_init_revoke_caches(void) +void jbd2_journal_destroy_revoke_caches(void) { - jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", - sizeof(struct jbd2_revoke_record_s), - 0, - SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, - NULL); - if (!jbd2_revoke_record_cache) - return -ENOMEM; - - jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", - sizeof(struct jbd2_revoke_table_s), - 0, SLAB_TEMPORARY, NULL); - if (!jbd2_revoke_table_cache) { + if (jbd2_revoke_record_cache) { kmem_cache_destroy(jbd2_revoke_record_cache); jbd2_revoke_record_cache = NULL; - return -ENOMEM; } - return 0; + if (jbd2_revoke_table_cache) { + kmem_cache_destroy(jbd2_revoke_table_cache); + jbd2_revoke_table_cache = NULL; + } } -void jbd2_journal_destroy_revoke_caches(void) +int __init jbd2_journal_init_revoke_caches(void) { - kmem_cache_destroy(jbd2_revoke_record_cache); - jbd2_revoke_record_cache = NULL; - kmem_cache_destroy(jbd2_revoke_table_cache); - jbd2_revoke_table_cache = NULL; -} + J_ASSERT(!jbd2_revoke_record_cache); + J_ASSERT(!jbd2_revoke_table_cache); -/* Initialise the revoke table for a given journal to a given size. */ + jbd2_revoke_record_cache = KMEM_CACHE(jbd2_revoke_record_s, + SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY); + if (!jbd2_revoke_record_cache) + goto record_cache_failure; -int jbd2_journal_init_revoke(journal_t *journal, int hash_size) + jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s, + SLAB_TEMPORARY); + if (!jbd2_revoke_table_cache) + goto table_cache_failure; + return 0; +table_cache_failure: + jbd2_journal_destroy_revoke_caches(); +record_cache_failure: + return -ENOMEM; +} + +static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size) { - int shift, tmp; + int shift = 0; + int tmp = hash_size; + struct jbd2_revoke_table_s *table; - J_ASSERT (journal->j_revoke_table[0] == NULL); + table = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); + if (!table) + goto out; - shift = 0; - tmp = hash_size; while((tmp >>= 1UL) != 0UL) shift++; - journal->j_revoke_table[0] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); - if (!journal->j_revoke_table[0]) - return -ENOMEM; - journal->j_revoke = journal->j_revoke_table[0]; - - /* Check that the hash_size is a power of two */ - J_ASSERT(is_power_of_2(hash_size)); - - journal->j_revoke->hash_size = hash_size; - - journal->j_revoke->hash_shift = shift; - - journal->j_revoke->hash_table = + table->hash_size = hash_size; + table->hash_shift = shift; + table->hash_table = kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); - if (!journal->j_revoke->hash_table) { - kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); - journal->j_revoke = NULL; - return -ENOMEM; + if (!table->hash_table) { + kmem_cache_free(jbd2_revoke_table_cache, table); + table = NULL; + goto out; } for (tmp = 0; tmp < hash_size; tmp++) - INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); + INIT_LIST_HEAD(&table->hash_table[tmp]); - journal->j_revoke_table[1] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); - if (!journal->j_revoke_table[1]) { - kfree(journal->j_revoke_table[0]->hash_table); - kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); - return -ENOMEM; +out: + return table; +} + +static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table) +{ + int i; + struct list_head *hash_list; + + for (i = 0; i < table->hash_size; i++) { + hash_list = &table->hash_table[i]; + J_ASSERT(list_empty(hash_list)); } - journal->j_revoke = journal->j_revoke_table[1]; + kfree(table->hash_table); + kmem_cache_free(jbd2_revoke_table_cache, table); +} - /* Check that the hash_size is a power of two */ +/* Initialise the revoke table for a given journal to a given size. */ +int jbd2_journal_init_revoke(journal_t *journal, int hash_size) +{ + J_ASSERT(journal->j_revoke_table[0] == NULL); J_ASSERT(is_power_of_2(hash_size)); - journal->j_revoke->hash_size = hash_size; - - journal->j_revoke->hash_shift = shift; + journal->j_revoke_table[0] = jbd2_journal_init_revoke_table(hash_size); + if (!journal->j_revoke_table[0]) + goto fail0; - journal->j_revoke->hash_table = - kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); - if (!journal->j_revoke->hash_table) { - kfree(journal->j_revoke_table[0]->hash_table); - kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); - kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[1]); - journal->j_revoke = NULL; - return -ENOMEM; - } + journal->j_revoke_table[1] = jbd2_journal_init_revoke_table(hash_size); + if (!journal->j_revoke_table[1]) + goto fail1; - for (tmp = 0; tmp < hash_size; tmp++) - INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); + journal->j_revoke = journal->j_revoke_table[1]; spin_lock_init(&journal->j_revoke_lock); return 0; -} -/* Destoy a journal's revoke table. The table must already be empty! */ +fail1: + jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]); +fail0: + return -ENOMEM; +} +/* Destroy a journal's revoke table. The table must already be empty! */ void jbd2_journal_destroy_revoke(journal_t *journal) { - struct jbd2_revoke_table_s *table; - struct list_head *hash_list; - int i; - - table = journal->j_revoke_table[0]; - if (!table) - return; - - for (i=0; i<table->hash_size; i++) { - hash_list = &table->hash_table[i]; - J_ASSERT (list_empty(hash_list)); - } - - kfree(table->hash_table); - kmem_cache_free(jbd2_revoke_table_cache, table); - journal->j_revoke = NULL; - - table = journal->j_revoke_table[1]; - if (!table) - return; - - for (i=0; i<table->hash_size; i++) { - hash_list = &table->hash_table[i]; - J_ASSERT (list_empty(hash_list)); - } - - kfree(table->hash_table); - kmem_cache_free(jbd2_revoke_table_cache, table); journal->j_revoke = NULL; + if (journal->j_revoke_table[0]) + jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]); + if (journal->j_revoke_table[1]) + jbd2_journal_destroy_revoke_table(journal->j_revoke_table[1]); } @@ -418,8 +422,6 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr, * the second time we would still have a pending revoke to cancel. So, * do not trust the Revoked bit on buffers unless RevokeValid is also * set. - * - * The caller must have the journal locked. */ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) { @@ -477,6 +479,36 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) return did_revoke; } +/* + * journal_clear_revoked_flag clears revoked flag of buffers in + * revoke table to reflect there is no revoked buffers in the next + * transaction which is going to be started. + */ +void jbd2_clear_buffer_revoked_flags(journal_t *journal) +{ + struct jbd2_revoke_table_s *revoke = journal->j_revoke; + int i = 0; + + for (i = 0; i < revoke->hash_size; i++) { + struct list_head *hash_list; + struct list_head *list_entry; + hash_list = &revoke->hash_table[i]; + + list_for_each(list_entry, hash_list) { + struct jbd2_revoke_record_s *record; + struct buffer_head *bh; + record = (struct jbd2_revoke_record_s *)list_entry; + bh = __find_get_block(journal->j_fs_dev, + record->blocknr, + journal->j_blocksize); + if (bh) { + clear_buffer_revoked(bh); + __brelse(bh); + } + } + } +} + /* journal_switch_revoke table select j_revoke for next transaction * we do not want to suspend any processing until all revokes are * written -bzzz @@ -497,14 +529,13 @@ void jbd2_journal_switch_revoke_table(journal_t *journal) /* * Write revoke records to the journal for all entries in the current * revoke hash, deleting the entries as we go. - * - * Called with the journal lock held. */ - void jbd2_journal_write_revoke_records(journal_t *journal, - transaction_t *transaction) + transaction_t *transaction, + struct list_head *log_bufs, + int write_op) { - struct journal_head *descriptor; + struct buffer_head *descriptor; struct jbd2_revoke_record_s *record; struct jbd2_revoke_table_s *revoke; struct list_head *hash_list; @@ -524,16 +555,16 @@ void jbd2_journal_write_revoke_records(journal_t *journal, while (!list_empty(hash_list)) { record = (struct jbd2_revoke_record_s *) hash_list->next; - write_one_revoke_record(journal, transaction, + write_one_revoke_record(journal, transaction, log_bufs, &descriptor, &offset, - record); + record, write_op); count++; list_del(&record->hash); kmem_cache_free(jbd2_revoke_record_cache, record); } } if (descriptor) - flush_descriptor(journal, descriptor, offset); + flush_descriptor(journal, descriptor, offset, write_op); jbd_debug(1, "Wrote %d revoke records\n", count); } @@ -544,11 +575,14 @@ void jbd2_journal_write_revoke_records(journal_t *journal, static void write_one_revoke_record(journal_t *journal, transaction_t *transaction, - struct journal_head **descriptorp, + struct list_head *log_bufs, + struct buffer_head **descriptorp, int *offsetp, - struct jbd2_revoke_record_s *record) + struct jbd2_revoke_record_s *record, + int write_op) { - struct journal_head *descriptor; + int csum_size = 0; + struct buffer_head *descriptor; int offset; journal_header_t *header; @@ -562,10 +596,14 @@ static void write_one_revoke_record(journal_t *journal, descriptor = *descriptorp; offset = *offsetp; + /* Do we need to leave space at the end for a checksum? */ + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + csum_size = sizeof(struct jbd2_journal_revoke_tail); + /* Make sure we have a descriptor with space left for the record */ if (descriptor) { - if (offset == journal->j_blocksize) { - flush_descriptor(journal, descriptor, offset); + if (offset >= journal->j_blocksize - csum_size) { + flush_descriptor(journal, descriptor, offset, write_op); descriptor = NULL; } } @@ -574,26 +612,26 @@ static void write_one_revoke_record(journal_t *journal, descriptor = jbd2_journal_get_descriptor_buffer(journal); if (!descriptor) return; - header = (journal_header_t *) &jh2bh(descriptor)->b_data[0]; + header = (journal_header_t *)descriptor->b_data; header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK); header->h_sequence = cpu_to_be32(transaction->t_tid); /* Record it so that we can wait for IO completion later */ - JBUFFER_TRACE(descriptor, "file as BJ_LogCtl"); - jbd2_journal_file_buffer(descriptor, transaction, BJ_LogCtl); + BUFFER_TRACE(descriptor, "file in log_bufs"); + jbd2_file_log_bh(log_bufs, descriptor); offset = sizeof(jbd2_journal_revoke_header_t); *descriptorp = descriptor; } if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) { - * ((__be64 *)(&jh2bh(descriptor)->b_data[offset])) = + * ((__be64 *)(&descriptor->b_data[offset])) = cpu_to_be64(record->blocknr); offset += 8; } else { - * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = + * ((__be32 *)(&descriptor->b_data[offset])) = cpu_to_be32(record->blocknr); offset += 4; } @@ -601,6 +639,21 @@ static void write_one_revoke_record(journal_t *journal, *offsetp = offset; } +static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh) +{ + struct jbd2_journal_revoke_tail *tail; + __u32 csum; + + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + return; + + tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize - + sizeof(struct jbd2_journal_revoke_tail)); + tail->r_checksum = 0; + csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); + tail->r_checksum = cpu_to_be32(csum); +} + /* * Flush a revoke descriptor out to the journal. If we are aborting, * this is a noop; otherwise we are generating a buffer which needs to @@ -609,23 +662,24 @@ static void write_one_revoke_record(journal_t *journal, */ static void flush_descriptor(journal_t *journal, - struct journal_head *descriptor, - int offset) + struct buffer_head *descriptor, + int offset, int write_op) { jbd2_journal_revoke_header_t *header; - struct buffer_head *bh = jh2bh(descriptor); if (is_journal_aborted(journal)) { - put_bh(bh); + put_bh(descriptor); return; } - header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data; + header = (jbd2_journal_revoke_header_t *)descriptor->b_data; header->r_count = cpu_to_be32(offset); - set_buffer_jwrite(bh); - BUFFER_TRACE(bh, "write"); - set_buffer_dirty(bh); - ll_rw_block(SWRITE, 1, &bh); + jbd2_revoke_csum_set(journal, descriptor); + + set_buffer_jwrite(descriptor); + BUFFER_TRACE(descriptor, "write"); + set_buffer_dirty(descriptor); + write_dirty_buffer(descriptor, write_op); } #endif |
