diff options
Diffstat (limited to 'drivers/md/bitmap.c')
| -rw-r--r-- | drivers/md/bitmap.c | 1555 |
1 files changed, 855 insertions, 700 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 5a1ffe3527a..67f8b31e205 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -26,74 +26,16 @@ #include <linux/file.h> #include <linux/mount.h> #include <linux/buffer_head.h> +#include <linux/seq_file.h> #include "md.h" #include "bitmap.h" -#include <linux/dm-dirty-log.h> -/* debug macros */ - -#define DEBUG 0 - -#if DEBUG -/* these are for debugging purposes only! */ - -/* define one and only one of these */ -#define INJECT_FAULTS_1 0 /* cause bitmap_alloc_page to fail always */ -#define INJECT_FAULTS_2 0 /* cause bitmap file to be kicked when first bit set*/ -#define INJECT_FAULTS_3 0 /* treat bitmap file as kicked at init time */ -#define INJECT_FAULTS_4 0 /* undef */ -#define INJECT_FAULTS_5 0 /* undef */ -#define INJECT_FAULTS_6 0 - -/* if these are defined, the driver will fail! debug only */ -#define INJECT_FATAL_FAULT_1 0 /* fail kmalloc, causing bitmap_create to fail */ -#define INJECT_FATAL_FAULT_2 0 /* undef */ -#define INJECT_FATAL_FAULT_3 0 /* undef */ -#endif - -#ifndef PRINTK -# if DEBUG > 0 -# define PRINTK(x...) printk(KERN_DEBUG x) -# else -# define PRINTK(x...) -# endif -#endif - static inline char *bmname(struct bitmap *bitmap) { return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; } /* - * just a placeholder - calls kmalloc for bitmap pages - */ -static unsigned char *bitmap_alloc_page(struct bitmap *bitmap) -{ - unsigned char *page; - -#ifdef INJECT_FAULTS_1 - page = NULL; -#else - page = kzalloc(PAGE_SIZE, GFP_NOIO); -#endif - if (!page) - printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap)); - else - PRINTK("%s: bitmap_alloc_page: allocated page at %p\n", - bmname(bitmap), page); - return page; -} - -/* - * for now just a placeholder -- just calls kfree for bitmap pages - */ -static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page) -{ - PRINTK("%s: bitmap_free_page: free page %p\n", bmname(bitmap), page); - kfree(page); -} - -/* * check a page and, if necessary, allocate it (or hijack it if the alloc fails) * * 1) check to see if this page is allocated, if it's not then try to alloc @@ -103,7 +45,7 @@ static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page) * if we find our page, we increment the page's refcount so that it stays * allocated while we're using it */ -static int bitmap_checkpage(struct bitmap *bitmap, +static int bitmap_checkpage(struct bitmap_counts *bitmap, unsigned long page, int create) __releases(bitmap->lock) __acquires(bitmap->lock) @@ -130,12 +72,11 @@ __acquires(bitmap->lock) /* this page has not been allocated yet */ spin_unlock_irq(&bitmap->lock); - mappage = bitmap_alloc_page(bitmap); + mappage = kzalloc(PAGE_SIZE, GFP_NOIO); spin_lock_irq(&bitmap->lock); if (mappage == NULL) { - PRINTK("%s: bitmap map page allocation failed, hijacking\n", - bmname(bitmap)); + pr_debug("md/bitmap: map page allocation failed, hijacking\n"); /* failed - set the hijacked flag so that we can use the * pointer as a counter */ if (!bitmap->bp[page].map) @@ -143,7 +84,7 @@ __acquires(bitmap->lock) } else if (bitmap->bp[page].map || bitmap->bp[page].hijacked) { /* somebody beat us to getting the page */ - bitmap_free_page(bitmap, mappage); + kfree(mappage); return 0; } else { @@ -158,7 +99,7 @@ __acquires(bitmap->lock) /* if page is completely empty, put it back on the free list, or dealloc it */ /* if page was hijacked, unmark the flag so it might get alloced next time */ /* Note: lock should be held when calling this */ -static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page) +static void bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page) { char *ptr; @@ -175,7 +116,7 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page) ptr = bitmap->bp[page].map; bitmap->bp[page].map = NULL; bitmap->missing_pages++; - bitmap_free_page(bitmap, ptr); + kfree(ptr); } } @@ -188,46 +129,33 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page) */ /* IO operations when bitmap is stored near all superblocks */ -static struct page *read_sb_page(mddev_t *mddev, loff_t offset, - struct page *page, - unsigned long index, int size) +static int read_sb_page(struct mddev *mddev, loff_t offset, + struct page *page, + unsigned long index, int size) { /* choose a good rdev and read the page from there */ - mdk_rdev_t *rdev; + struct md_rdev *rdev; sector_t target; - int did_alloc = 0; - if (!page) { - page = alloc_page(GFP_KERNEL); - if (!page) - return ERR_PTR(-ENOMEM); - did_alloc = 1; - } - - list_for_each_entry(rdev, &mddev->disks, same_set) { + rdev_for_each(rdev, mddev) { if (! test_bit(In_sync, &rdev->flags) || test_bit(Faulty, &rdev->flags)) continue; - target = rdev->sb_start + offset + index * (PAGE_SIZE/512); + target = offset + index * (PAGE_SIZE/512); if (sync_page_io(rdev, target, roundup(size, bdev_logical_block_size(rdev->bdev)), - page, READ)) { + page, READ, true)) { page->index = index; - attach_page_buffers(page, NULL); /* so that free_buffer will - * quietly no-op */ - return page; + return 0; } } - if (did_alloc) - put_page(page); - return ERR_PTR(-EIO); - + return -EIO; } -static mdk_rdev_t *next_active_rdev(mdk_rdev_t *rdev, mddev_t *mddev) +static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev) { /* Iterate the disks of an mddev, using rcu to protect access to the * linked list, and raising the refcount of devices we return to ensure @@ -235,20 +163,17 @@ static mdk_rdev_t *next_active_rdev(mdk_rdev_t *rdev, mddev_t *mddev) * As devices are only added or removed when raid_disk is < 0 and * nr_pending is 0 and In_sync is clear, the entries we return will * still be in the same position on the list when we re-enter - * list_for_each_continue_rcu. + * list_for_each_entry_continue_rcu. */ - struct list_head *pos; rcu_read_lock(); if (rdev == NULL) /* start at the beginning */ - pos = &mddev->disks; + rdev = list_entry_rcu(&mddev->disks, struct md_rdev, same_set); else { /* release the previous rdev and start from there. */ rdev_dec_pending(rdev, mddev); - pos = &rdev->same_set; } - list_for_each_continue_rcu(pos, &mddev->disks) { - rdev = list_entry(pos, mdk_rdev_t, same_set); + list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) { if (rdev->raid_disk >= 0 && !test_bit(Faulty, &rdev->flags)) { /* this is a usable devices */ @@ -263,15 +188,24 @@ static mdk_rdev_t *next_active_rdev(mdk_rdev_t *rdev, mddev_t *mddev) static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) { - mdk_rdev_t *rdev = NULL; - mddev_t *mddev = bitmap->mddev; + struct md_rdev *rdev = NULL; + struct block_device *bdev; + struct mddev *mddev = bitmap->mddev; + struct bitmap_storage *store = &bitmap->storage; while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { int size = PAGE_SIZE; loff_t offset = mddev->bitmap_info.offset; - if (page->index == bitmap->file_pages-1) - size = roundup(bitmap->last_page_size, - bdev_logical_block_size(rdev->bdev)); + + bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev; + + if (page->index == store->file_pages-1) { + int last_page_size = store->bytes & (PAGE_SIZE-1); + if (last_page_size == 0) + last_page_size = PAGE_SIZE; + size = roundup(last_page_size, + bdev_logical_block_size(bdev)); + } /* Just make sure we aren't corrupting data or * metadata */ @@ -330,10 +264,10 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait) { struct buffer_head *bh; - if (bitmap->file == NULL) { + if (bitmap->storage.file == NULL) { switch (write_sb_page(bitmap, page, wait)) { case -EINVAL: - bitmap->flags |= BITMAP_WRITE_ERROR; + set_bit(BITMAP_WRITE_ERROR, &bitmap->flags); } } else { @@ -343,7 +277,7 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait) atomic_inc(&bitmap->pending_writes); set_buffer_locked(bh); set_buffer_mapped(bh); - submit_bh(WRITE | REQ_UNPLUG | REQ_SYNC, bh); + submit_bh(WRITE | REQ_SYNC, bh); bh = bh->b_this_page; } @@ -351,20 +285,16 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait) wait_event(bitmap->write_wait, atomic_read(&bitmap->pending_writes)==0); } - if (bitmap->flags & BITMAP_WRITE_ERROR) + if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) bitmap_file_kick(bitmap); } static void end_bitmap_write(struct buffer_head *bh, int uptodate) { struct bitmap *bitmap = bh->b_private; - unsigned long flags; - if (!uptodate) { - spin_lock_irqsave(&bitmap->lock, flags); - bitmap->flags |= BITMAP_WRITE_ERROR; - spin_unlock_irqrestore(&bitmap->lock, flags); - } + if (!uptodate) + set_bit(BITMAP_WRITE_ERROR, &bitmap->flags); if (atomic_dec_and_test(&bitmap->pending_writes)) wake_up(&bitmap->write_wait); } @@ -379,8 +309,12 @@ __clear_page_buffers(struct page *page) } static void free_buffers(struct page *page) { - struct buffer_head *bh = page_buffers(page); + struct buffer_head *bh; + + if (!PagePrivate(page)) + return; + bh = page_buffers(page); while (bh) { struct buffer_head *next = bh->b_this_page; free_buffer_head(bh); @@ -397,28 +331,22 @@ static void free_buffers(struct page *page) * This usage is similar to how swap files are handled, and allows us * to write to a file with no concerns of memory allocation failing. */ -static struct page *read_page(struct file *file, unsigned long index, - struct bitmap *bitmap, - unsigned long count) +static int read_page(struct file *file, unsigned long index, + struct bitmap *bitmap, + unsigned long count, + struct page *page) { - struct page *page = NULL; - struct inode *inode = file->f_path.dentry->d_inode; + int ret = 0; + struct inode *inode = file_inode(file); struct buffer_head *bh; sector_t block; - PRINTK("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE, - (unsigned long long)index << PAGE_SHIFT); - - page = alloc_page(GFP_KERNEL); - if (!page) - page = ERR_PTR(-ENOMEM); - if (IS_ERR(page)) - goto out; + pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE, + (unsigned long long)index << PAGE_SHIFT); bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0); if (!bh) { - put_page(page); - page = ERR_PTR(-ENOMEM); + ret = -ENOMEM; goto out; } attach_page_buffers(page, bh); @@ -430,8 +358,7 @@ static struct page *read_page(struct file *file, unsigned long index, bh->b_blocknr = bmap(inode, block); if (bh->b_blocknr == 0) { /* Cannot use this file! */ - free_buffers(page); - page = ERR_PTR(-EINVAL); + ret = -EINVAL; goto out; } bh->b_bdev = inode->i_sb->s_bdev; @@ -454,17 +381,15 @@ static struct page *read_page(struct file *file, unsigned long index, wait_event(bitmap->write_wait, atomic_read(&bitmap->pending_writes)==0); - if (bitmap->flags & BITMAP_WRITE_ERROR) { - free_buffers(page); - page = ERR_PTR(-EIO); - } + if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) + ret = -EIO; out: - if (IS_ERR(page)) - printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %ld\n", + if (ret) + printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %d\n", (int)PAGE_SIZE, (unsigned long long)index << PAGE_SHIFT, - PTR_ERR(page)); - return page; + ret); + return ret; } /* @@ -475,30 +400,30 @@ out: void bitmap_update_sb(struct bitmap *bitmap) { bitmap_super_t *sb; - unsigned long flags; if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ return; if (bitmap->mddev->bitmap_info.external) return; - spin_lock_irqsave(&bitmap->lock, flags); - if (!bitmap->sb_page) { /* no superblock */ - spin_unlock_irqrestore(&bitmap->lock, flags); + if (!bitmap->storage.sb_page) /* no superblock */ return; - } - spin_unlock_irqrestore(&bitmap->lock, flags); - sb = kmap_atomic(bitmap->sb_page, KM_USER0); + sb = kmap_atomic(bitmap->storage.sb_page); sb->events = cpu_to_le64(bitmap->mddev->events); - if (bitmap->mddev->events < bitmap->events_cleared) { + if (bitmap->mddev->events < bitmap->events_cleared) /* rocking back to read-only */ bitmap->events_cleared = bitmap->mddev->events; - sb->events_cleared = cpu_to_le64(bitmap->events_cleared); - } + sb->events_cleared = cpu_to_le64(bitmap->events_cleared); + sb->state = cpu_to_le32(bitmap->flags); /* Just in case these have been changed via sysfs: */ sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ); sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind); - kunmap_atomic(sb, KM_USER0); - write_page(bitmap, bitmap->sb_page, 1); + /* This might have been changed by a reshape */ + sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); + sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize); + sb->sectors_reserved = cpu_to_le32(bitmap->mddev-> + bitmap_info.space); + kunmap_atomic(sb); + write_page(bitmap, bitmap->storage.sb_page, 1); } /* print out the bitmap file superblock */ @@ -506,9 +431,9 @@ void bitmap_print_sb(struct bitmap *bitmap) { bitmap_super_t *sb; - if (!bitmap || !bitmap->sb_page) + if (!bitmap || !bitmap->storage.sb_page) return; - sb = kmap_atomic(bitmap->sb_page, KM_USER0); + sb = kmap_atomic(bitmap->storage.sb_page); printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap)); printk(KERN_DEBUG " magic: %08x\n", le32_to_cpu(sb->magic)); printk(KERN_DEBUG " version: %d\n", le32_to_cpu(sb->version)); @@ -527,7 +452,76 @@ void bitmap_print_sb(struct bitmap *bitmap) printk(KERN_DEBUG " sync size: %llu KB\n", (unsigned long long)le64_to_cpu(sb->sync_size)/2); printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind)); - kunmap_atomic(sb, KM_USER0); + kunmap_atomic(sb); +} + +/* + * bitmap_new_disk_sb + * @bitmap + * + * This function is somewhat the reverse of bitmap_read_sb. bitmap_read_sb + * reads and verifies the on-disk bitmap superblock and populates bitmap_info. + * This function verifies 'bitmap_info' and populates the on-disk bitmap + * structure, which is to be written to disk. + * + * Returns: 0 on success, -Exxx on error + */ +static int bitmap_new_disk_sb(struct bitmap *bitmap) +{ + bitmap_super_t *sb; + unsigned long chunksize, daemon_sleep, write_behind; + + bitmap->storage.sb_page = alloc_page(GFP_KERNEL); + if (bitmap->storage.sb_page == NULL) + return -ENOMEM; + bitmap->storage.sb_page->index = 0; + + sb = kmap_atomic(bitmap->storage.sb_page); + + sb->magic = cpu_to_le32(BITMAP_MAGIC); + sb->version = cpu_to_le32(BITMAP_MAJOR_HI); + + chunksize = bitmap->mddev->bitmap_info.chunksize; + BUG_ON(!chunksize); + if (!is_power_of_2(chunksize)) { + kunmap_atomic(sb); + printk(KERN_ERR "bitmap chunksize not a power of 2\n"); + return -EINVAL; + } + sb->chunksize = cpu_to_le32(chunksize); + + daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep; + if (!daemon_sleep || + (daemon_sleep < 1) || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) { + printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n"); + daemon_sleep = 5 * HZ; + } + sb->daemon_sleep = cpu_to_le32(daemon_sleep); + bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; + + /* + * FIXME: write_behind for RAID1. If not specified, what + * is a good choice? We choose COUNTER_MAX / 2 arbitrarily. + */ + write_behind = bitmap->mddev->bitmap_info.max_write_behind; + if (write_behind > COUNTER_MAX) + write_behind = COUNTER_MAX / 2; + sb->write_behind = cpu_to_le32(write_behind); + bitmap->mddev->bitmap_info.max_write_behind = write_behind; + + /* keep the array size field of the bitmap superblock up to date */ + sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); + + memcpy(sb->uuid, bitmap->mddev->uuid, 16); + + set_bit(BITMAP_STALE, &bitmap->flags); + sb->state = cpu_to_le32(bitmap->flags); + bitmap->events_cleared = bitmap->mddev->events; + sb->events_cleared = cpu_to_le64(bitmap->mddev->events); + + kunmap_atomic(sb); + + return 0; } /* read the superblock from the bitmap file and initialize some bitmap fields */ @@ -537,31 +531,45 @@ static int bitmap_read_sb(struct bitmap *bitmap) bitmap_super_t *sb; unsigned long chunksize, daemon_sleep, write_behind; unsigned long long events; + unsigned long sectors_reserved = 0; int err = -EINVAL; + struct page *sb_page; + if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) { + chunksize = 128 * 1024 * 1024; + daemon_sleep = 5 * HZ; + write_behind = 0; + set_bit(BITMAP_STALE, &bitmap->flags); + err = 0; + goto out_no_sb; + } /* page 0 is the superblock, read it... */ - if (bitmap->file) { - loff_t isize = i_size_read(bitmap->file->f_mapping->host); + sb_page = alloc_page(GFP_KERNEL); + if (!sb_page) + return -ENOMEM; + bitmap->storage.sb_page = sb_page; + + if (bitmap->storage.file) { + loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host); int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize; - bitmap->sb_page = read_page(bitmap->file, 0, bitmap, bytes); + err = read_page(bitmap->storage.file, 0, + bitmap, bytes, sb_page); } else { - bitmap->sb_page = read_sb_page(bitmap->mddev, - bitmap->mddev->bitmap_info.offset, - NULL, - 0, sizeof(bitmap_super_t)); + err = read_sb_page(bitmap->mddev, + bitmap->mddev->bitmap_info.offset, + sb_page, + 0, sizeof(bitmap_super_t)); } - if (IS_ERR(bitmap->sb_page)) { - err = PTR_ERR(bitmap->sb_page); - bitmap->sb_page = NULL; + if (err) return err; - } - sb = kmap_atomic(bitmap->sb_page, KM_USER0); + sb = kmap_atomic(sb_page); chunksize = le32_to_cpu(sb->chunksize); daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; write_behind = le32_to_cpu(sb->write_behind); + sectors_reserved = le32_to_cpu(sb->sectors_reserved); /* verify that the bitmap-specific fields are valid */ if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) @@ -571,7 +579,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) reason = "unrecognized superblock version"; else if (chunksize < 512) reason = "bitmap chunksize too small"; - else if ((1 << ffz(~chunksize)) != chunksize) + else if (!is_power_of_2(chunksize)) reason = "bitmap chunksize not a power of 2"; else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT) reason = "daemon sleep period out of range"; @@ -586,79 +594,50 @@ static int bitmap_read_sb(struct bitmap *bitmap) /* keep the array size field of the bitmap superblock up to date */ sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); - if (!bitmap->mddev->persistent) - goto success; - - /* - * if we have a persistent array superblock, compare the - * bitmap's UUID and event counter to the mddev's - */ - if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { - printk(KERN_INFO "%s: bitmap superblock UUID mismatch\n", - bmname(bitmap)); - goto out; - } - events = le64_to_cpu(sb->events); - if (events < bitmap->mddev->events) { - printk(KERN_INFO "%s: bitmap file is out of date (%llu < %llu) " - "-- forcing full recovery\n", bmname(bitmap), events, - (unsigned long long) bitmap->mddev->events); - sb->state |= cpu_to_le32(BITMAP_STALE); + if (bitmap->mddev->persistent) { + /* + * We have a persistent array superblock, so compare the + * bitmap's UUID and event counter to the mddev's + */ + if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { + printk(KERN_INFO + "%s: bitmap superblock UUID mismatch\n", + bmname(bitmap)); + goto out; + } + events = le64_to_cpu(sb->events); + if (events < bitmap->mddev->events) { + printk(KERN_INFO + "%s: bitmap file is out of date (%llu < %llu) " + "-- forcing full recovery\n", + bmname(bitmap), events, + (unsigned long long) bitmap->mddev->events); + set_bit(BITMAP_STALE, &bitmap->flags); + } } -success: + /* assign fields using values from superblock */ - bitmap->mddev->bitmap_info.chunksize = chunksize; - bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; - bitmap->mddev->bitmap_info.max_write_behind = write_behind; bitmap->flags |= le32_to_cpu(sb->state); if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) - bitmap->flags |= BITMAP_HOSTENDIAN; + set_bit(BITMAP_HOSTENDIAN, &bitmap->flags); bitmap->events_cleared = le64_to_cpu(sb->events_cleared); - if (sb->state & cpu_to_le32(BITMAP_STALE)) - bitmap->events_cleared = bitmap->mddev->events; err = 0; out: - kunmap_atomic(sb, KM_USER0); + kunmap_atomic(sb); +out_no_sb: + if (test_bit(BITMAP_STALE, &bitmap->flags)) + bitmap->events_cleared = bitmap->mddev->events; + bitmap->mddev->bitmap_info.chunksize = chunksize; + bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; + bitmap->mddev->bitmap_info.max_write_behind = write_behind; + if (bitmap->mddev->bitmap_info.space == 0 || + bitmap->mddev->bitmap_info.space > sectors_reserved) + bitmap->mddev->bitmap_info.space = sectors_reserved; if (err) bitmap_print_sb(bitmap); return err; } -enum bitmap_mask_op { - MASK_SET, - MASK_UNSET -}; - -/* record the state of the bitmap in the superblock. Return the old value */ -static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, - enum bitmap_mask_op op) -{ - bitmap_super_t *sb; - unsigned long flags; - int old; - - spin_lock_irqsave(&bitmap->lock, flags); - if (!bitmap->sb_page) { /* can't set the state */ - spin_unlock_irqrestore(&bitmap->lock, flags); - return 0; - } - spin_unlock_irqrestore(&bitmap->lock, flags); - sb = kmap_atomic(bitmap->sb_page, KM_USER0); - old = le32_to_cpu(sb->state) & bits; - switch (op) { - case MASK_SET: - sb->state |= cpu_to_le32(bits); - break; - case MASK_UNSET: - sb->state &= cpu_to_le32(~bits); - break; - default: - BUG(); - } - kunmap_atomic(sb, KM_USER0); - return old; -} - /* * general bitmap file operations */ @@ -670,17 +649,19 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, * file a page at a time. There's a superblock at the start of the file. */ /* calculate the index of the page that contains this bit */ -static inline unsigned long file_page_index(struct bitmap *bitmap, unsigned long chunk) +static inline unsigned long file_page_index(struct bitmap_storage *store, + unsigned long chunk) { - if (!bitmap->mddev->bitmap_info.external) + if (store->sb_page) chunk += sizeof(bitmap_super_t) << 3; return chunk >> PAGE_BIT_SHIFT; } /* calculate the (bit) offset of this bit within a page */ -static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned long chunk) +static inline unsigned long file_page_offset(struct bitmap_storage *store, + unsigned long chunk) { - if (!bitmap->mddev->bitmap_info.external) + if (store->sb_page) chunk += sizeof(bitmap_super_t) << 3; return chunk & (PAGE_BITS - 1); } @@ -688,66 +669,89 @@ static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned lon /* * return a pointer to the page in the filemap that contains the given bit * - * this lookup is complicated by the fact that the bitmap sb might be exactly - * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page - * 0 or page 1 */ -static inline struct page *filemap_get_page(struct bitmap *bitmap, - unsigned long chunk) +static inline struct page *filemap_get_page(struct bitmap_storage *store, + unsigned long chunk) { - if (bitmap->filemap == NULL) + if (file_page_index(store, chunk) >= store->file_pages) return NULL; - if (file_page_index(bitmap, chunk) >= bitmap->file_pages) - return NULL; - return bitmap->filemap[file_page_index(bitmap, chunk) - - file_page_index(bitmap, 0)]; + return store->filemap[file_page_index(store, chunk)]; +} + +static int bitmap_storage_alloc(struct bitmap_storage *store, + unsigned long chunks, int with_super) +{ + int pnum; + unsigned long num_pages; + unsigned long bytes; + + bytes = DIV_ROUND_UP(chunks, 8); + if (with_super) + bytes += sizeof(bitmap_super_t); + + num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE); + + store->filemap = kmalloc(sizeof(struct page *) + * num_pages, GFP_KERNEL); + if (!store->filemap) + return -ENOMEM; + + if (with_super && !store->sb_page) { + store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO); + if (store->sb_page == NULL) + return -ENOMEM; + store->sb_page->index = 0; + } + pnum = 0; + if (store->sb_page) { + store->filemap[0] = store->sb_page; + pnum = 1; + } + for ( ; pnum < num_pages; pnum++) { + store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO); + if (!store->filemap[pnum]) { + store->file_pages = pnum; + return -ENOMEM; + } + store->filemap[pnum]->index = pnum; + } + store->file_pages = pnum; + + /* We need 4 bits per page, rounded up to a multiple + * of sizeof(unsigned long) */ + store->filemap_attr = kzalloc( + roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), + GFP_KERNEL); + if (!store->filemap_attr) + return -ENOMEM; + + store->bytes = bytes; + + return 0; } -static void bitmap_file_unmap(struct bitmap *bitmap) +static void bitmap_file_unmap(struct bitmap_storage *store) { struct page **map, *sb_page; - unsigned long *attr; int pages; - unsigned long flags; + struct file *file; - spin_lock_irqsave(&bitmap->lock, flags); - map = bitmap->filemap; - bitmap->filemap = NULL; - attr = bitmap->filemap_attr; - bitmap->filemap_attr = NULL; - pages = bitmap->file_pages; - bitmap->file_pages = 0; - sb_page = bitmap->sb_page; - bitmap->sb_page = NULL; - spin_unlock_irqrestore(&bitmap->lock, flags); + file = store->file; + map = store->filemap; + pages = store->file_pages; + sb_page = store->sb_page; while (pages--) if (map[pages] != sb_page) /* 0 is sb_page, release it below */ free_buffers(map[pages]); kfree(map); - kfree(attr); + kfree(store->filemap_attr); if (sb_page) free_buffers(sb_page); -} - -static void bitmap_file_put(struct bitmap *bitmap) -{ - struct file *file; - unsigned long flags; - - spin_lock_irqsave(&bitmap->lock, flags); - file = bitmap->file; - bitmap->file = NULL; - spin_unlock_irqrestore(&bitmap->lock, flags); - - if (file) - wait_event(bitmap->write_wait, - atomic_read(&bitmap->pending_writes)==0); - bitmap_file_unmap(bitmap); if (file) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); invalidate_mapping_pages(inode->i_mapping, 0, -1); fput(file); } @@ -762,14 +766,14 @@ static void bitmap_file_kick(struct bitmap *bitmap) { char *path, *ptr = NULL; - if (bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET) == 0) { + if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) { bitmap_update_sb(bitmap); - if (bitmap->file) { + if (bitmap->storage.file) { path = kmalloc(PAGE_SIZE, GFP_KERNEL); if (path) - ptr = d_path(&bitmap->file->f_path, path, - PAGE_SIZE); + ptr = d_path(&bitmap->storage.file->f_path, + path, PAGE_SIZE); printk(KERN_ALERT "%s: kicking failed bitmap file %s from array!\n", @@ -781,45 +785,39 @@ static void bitmap_file_kick(struct bitmap *bitmap) "%s: disabling internal bitmap due to errors\n", bmname(bitmap)); } - - bitmap_file_put(bitmap); - - return; } enum bitmap_page_attr { BITMAP_PAGE_DIRTY = 0, /* there are set bits that need to be synced */ - BITMAP_PAGE_CLEAN = 1, /* there are bits that might need to be cleared */ + BITMAP_PAGE_PENDING = 1, /* there are bits that are being cleaned. + * i.e. counter is 1 or 2. */ BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */ }; -static inline void set_page_attr(struct bitmap *bitmap, struct page *page, - enum bitmap_page_attr attr) +static inline void set_page_attr(struct bitmap *bitmap, int pnum, + enum bitmap_page_attr attr) { - if (page) - __set_bit((page->index<<2) + attr, bitmap->filemap_attr); - else - __set_bit(attr, &bitmap->logattrs); + set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); } -static inline void clear_page_attr(struct bitmap *bitmap, struct page *page, - enum bitmap_page_attr attr) +static inline void clear_page_attr(struct bitmap *bitmap, int pnum, + enum bitmap_page_attr attr) { - if (page) - __clear_bit((page->index<<2) + attr, bitmap->filemap_attr); - else - __clear_bit(attr, &bitmap->logattrs); + clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); } -static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page, - enum bitmap_page_attr attr) +static inline int test_page_attr(struct bitmap *bitmap, int pnum, + enum bitmap_page_attr attr) { - if (page) - return test_bit((page->index<<2) + attr, bitmap->filemap_attr); - else - return test_bit(attr, &bitmap->logattrs); + return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); } +static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum, + enum bitmap_page_attr attr) +{ + return test_and_clear_bit((pnum<<2) + attr, + bitmap->storage.filemap_attr); +} /* * bitmap_file_set_bit -- called before performing a write to the md device * to set (and eventually sync) a particular bit in the bitmap file @@ -830,32 +828,48 @@ static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *p static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) { unsigned long bit; - struct page *page = NULL; + struct page *page; void *kaddr; - unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap); + unsigned long chunk = block >> bitmap->counts.chunkshift; - if (!bitmap->filemap) { - struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log; - if (log) - log->type->mark_region(log, chunk); - } else { + page = filemap_get_page(&bitmap->storage, chunk); + if (!page) + return; + bit = file_page_offset(&bitmap->storage, chunk); - page = filemap_get_page(bitmap, chunk); - if (!page) - return; - bit = file_page_offset(bitmap, chunk); + /* set the bit */ + kaddr = kmap_atomic(page); + if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) + set_bit(bit, kaddr); + else + set_bit_le(bit, kaddr); + kunmap_atomic(kaddr); + pr_debug("set file bit %lu page %lu\n", bit, page->index); + /* record page number so it gets flushed to disk when unplug occurs */ + set_page_attr(bitmap, page->index, BITMAP_PAGE_DIRTY); +} - /* set the bit */ - kaddr = kmap_atomic(page, KM_USER0); - if (bitmap->flags & BITMAP_HOSTENDIAN) - set_bit(bit, kaddr); - else - ext2_set_bit(bit, kaddr); - kunmap_atomic(kaddr, KM_USER0); - PRINTK("set file bit %lu page %lu\n", bit, page->index); +static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block) +{ + unsigned long bit; + struct page *page; + void *paddr; + unsigned long chunk = block >> bitmap->counts.chunkshift; + + page = filemap_get_page(&bitmap->storage, chunk); + if (!page) + return; + bit = file_page_offset(&bitmap->storage, chunk); + paddr = kmap_atomic(page); + if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) + clear_bit(bit, paddr); + else + clear_bit_le(bit, paddr); + kunmap_atomic(paddr); + if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) { + set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING); + bitmap->allclean = 0; } - /* record page number so it gets flushed to disk when unplug occurs */ - set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); } /* this gets called when the md device is ready to unplug its underlying @@ -863,53 +877,37 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) * sync the dirty pages of the bitmap file to disk */ void bitmap_unplug(struct bitmap *bitmap) { - unsigned long i, flags; + unsigned long i; int dirty, need_write; - struct page *page; int wait = 0; - if (!bitmap) + if (!bitmap || !bitmap->storage.filemap || + test_bit(BITMAP_STALE, &bitmap->flags)) return; - if (!bitmap->filemap) { - /* Must be using a dirty_log */ - struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log; - dirty = test_and_clear_bit(BITMAP_PAGE_DIRTY, &bitmap->logattrs); - need_write = test_and_clear_bit(BITMAP_PAGE_NEEDWRITE, &bitmap->logattrs); - if (dirty || need_write) - if (log->type->flush(log)) - bitmap->flags |= BITMAP_WRITE_ERROR; - goto out; - } /* look at each page to see if there are any set bits that need to be * flushed out to disk */ - for (i = 0; i < bitmap->file_pages; i++) { - spin_lock_irqsave(&bitmap->lock, flags); - if (!bitmap->filemap) { - spin_unlock_irqrestore(&bitmap->lock, flags); + for (i = 0; i < bitmap->storage.file_pages; i++) { + if (!bitmap->storage.filemap) return; + dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); + need_write = test_and_clear_page_attr(bitmap, i, + BITMAP_PAGE_NEEDWRITE); + if (dirty || need_write) { + clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING); + write_page(bitmap, bitmap->storage.filemap[i], 0); } - page = bitmap->filemap[i]; - dirty = test_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); - need_write = test_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); - clear_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); - clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); if (dirty) wait = 1; - spin_unlock_irqrestore(&bitmap->lock, flags); - - if (dirty || need_write) - write_page(bitmap, page, 0); } if (wait) { /* if any writes were performed, we need to wait on them */ - if (bitmap->file) + if (bitmap->storage.file) wait_event(bitmap->write_wait, atomic_read(&bitmap->pending_writes)==0); else md_super_wait(bitmap->mddev); } -out: - if (bitmap->flags & BITMAP_WRITE_ERROR) + if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) bitmap_file_kick(bitmap); } EXPORT_SYMBOL(bitmap_unplug); @@ -929,149 +927,117 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) { unsigned long i, chunks, index, oldindex, bit; - struct page *page = NULL, *oldpage = NULL; - unsigned long num_pages, bit_cnt = 0; + struct page *page = NULL; + unsigned long bit_cnt = 0; struct file *file; - unsigned long bytes, offset; + unsigned long offset; int outofdate; int ret = -ENOSPC; void *paddr; + struct bitmap_storage *store = &bitmap->storage; - chunks = bitmap->chunks; - file = bitmap->file; + chunks = bitmap->counts.chunks; + file = store->file; - BUG_ON(!file && !bitmap->mddev->bitmap_info.offset); + if (!file && !bitmap->mddev->bitmap_info.offset) { + /* No permanent bitmap - fill with '1s'. */ + store->filemap = NULL; + store->file_pages = 0; + for (i = 0; i < chunks ; i++) { + /* if the disk bit is set, set the memory bit */ + int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift) + >= start); + bitmap_set_memory_bits(bitmap, + (sector_t)i << bitmap->counts.chunkshift, + needed); + } + return 0; + } -#ifdef INJECT_FAULTS_3 - outofdate = 1; -#else - outofdate = bitmap->flags & BITMAP_STALE; -#endif + outofdate = test_bit(BITMAP_STALE, &bitmap->flags); if (outofdate) printk(KERN_INFO "%s: bitmap file is out of date, doing full " "recovery\n", bmname(bitmap)); - bytes = DIV_ROUND_UP(bitmap->chunks, 8); - if (!bitmap->mddev->bitmap_info.external) - bytes += sizeof(bitmap_super_t); - - num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE); - - if (file && i_size_read(file->f_mapping->host) < bytes) { + if (file && i_size_read(file->f_mapping->host) < store->bytes) { printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", - bmname(bitmap), - (unsigned long) i_size_read(file->f_mapping->host), - bytes); + bmname(bitmap), + (unsigned long) i_size_read(file->f_mapping->host), + store->bytes); goto err; } - ret = -ENOMEM; - - bitmap->filemap = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL); - if (!bitmap->filemap) - goto err; - - /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */ - bitmap->filemap_attr = kzalloc( - roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), - GFP_KERNEL); - if (!bitmap->filemap_attr) - goto err; - oldindex = ~0L; + offset = 0; + if (!bitmap->mddev->bitmap_info.external) + offset = sizeof(bitmap_super_t); for (i = 0; i < chunks; i++) { int b; - index = file_page_index(bitmap, i); - bit = file_page_offset(bitmap, i); + index = file_page_index(&bitmap->storage, i); + bit = file_page_offset(&bitmap->storage, i); if (index != oldindex) { /* this is a new page, read it in */ int count; /* unmap the old page, we're done with it */ - if (index == num_pages-1) - count = bytes - index * PAGE_SIZE; + if (index == store->file_pages-1) + count = store->bytes - index * PAGE_SIZE; else count = PAGE_SIZE; - if (index == 0 && bitmap->sb_page) { - /* - * if we're here then the superblock page - * contains some bits (PAGE_SIZE != sizeof sb) - * we've already read it in, so just use it - */ - page = bitmap->sb_page; - offset = sizeof(bitmap_super_t); - if (!file) - page = read_sb_page( - bitmap->mddev, - bitmap->mddev->bitmap_info.offset, - page, - index, count); - } else if (file) { - page = read_page(file, index, bitmap, count); - offset = 0; - } else { - page = read_sb_page(bitmap->mddev, - bitmap->mddev->bitmap_info.offset, - NULL, - index, count); - offset = 0; - } - if (IS_ERR(page)) { /* read error */ - ret = PTR_ERR(page); + page = store->filemap[index]; + if (file) + ret = read_page(file, index, bitmap, + count, page); + else + ret = read_sb_page( + bitmap->mddev, + bitmap->mddev->bitmap_info.offset, + page, + index, count); + + if (ret) goto err; - } oldindex = index; - oldpage = page; - - bitmap->filemap[bitmap->file_pages++] = page; - bitmap->last_page_size = count; if (outofdate) { /* * if bitmap is out of date, dirty the * whole page and write it out */ - paddr = kmap_atomic(page, KM_USER0); + paddr = kmap_atomic(page); memset(paddr + offset, 0xff, PAGE_SIZE - offset); - kunmap_atomic(paddr, KM_USER0); + kunmap_atomic(paddr); write_page(bitmap, page, 1); ret = -EIO; - if (bitmap->flags & BITMAP_WRITE_ERROR) + if (test_bit(BITMAP_WRITE_ERROR, + &bitmap->flags)) goto err; } } - paddr = kmap_atomic(page, KM_USER0); - if (bitmap->flags & BITMAP_HOSTENDIAN) + paddr = kmap_atomic(page); + if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) b = test_bit(bit, paddr); else - b = ext2_test_bit(bit, paddr); - kunmap_atomic(paddr, KM_USER0); + b = test_bit_le(bit, paddr); + kunmap_atomic(paddr); if (b) { /* if the disk bit is set, set the memory bit */ - int needed = ((sector_t)(i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) + int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift >= start); bitmap_set_memory_bits(bitmap, - (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap), + (sector_t)i << bitmap->counts.chunkshift, needed); bit_cnt++; - set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); } - } - - /* everything went OK */ - ret = 0; - bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET); - - if (bit_cnt) { /* Kick recovery if any bits were set */ - set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery); - md_wakeup_thread(bitmap->mddev->thread); + offset = 0; } printk(KERN_INFO "%s: bitmap initialized from disk: " - "read %lu/%lu pages, set %lu bits\n", - bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt); + "read %lu pages, set %lu of %lu bits\n", + bmname(bitmap), store->file_pages, + bit_cnt, chunks); return 0; @@ -1088,19 +1054,38 @@ void bitmap_write_all(struct bitmap *bitmap) */ int i; - for (i = 0; i < bitmap->file_pages; i++) - set_page_attr(bitmap, bitmap->filemap[i], + if (!bitmap || !bitmap->storage.filemap) + return; + if (bitmap->storage.file) + /* Only one copy, so nothing needed */ + return; + + for (i = 0; i < bitmap->storage.file_pages; i++) + set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); + bitmap->allclean = 0; } -static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) +static void bitmap_count_page(struct bitmap_counts *bitmap, + sector_t offset, int inc) { - sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); + sector_t chunk = offset >> bitmap->chunkshift; unsigned long page = chunk >> PAGE_COUNTER_SHIFT; bitmap->bp[page].count += inc; bitmap_checkfree(bitmap, page); } -static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, + +static void bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset) +{ + sector_t chunk = offset >> bitmap->chunkshift; + unsigned long page = chunk >> PAGE_COUNTER_SHIFT; + struct bitmap_page *bp = &bitmap->bp[page]; + + if (!bp->pending) + bp->pending = 1; +} + +static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap, sector_t offset, sector_t *blocks, int create); @@ -1109,15 +1094,13 @@ static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, * out to disk */ -void bitmap_daemon_work(mddev_t *mddev) +void bitmap_daemon_work(struct mddev *mddev) { struct bitmap *bitmap; unsigned long j; - unsigned long flags; - struct page *page = NULL, *lastpage = NULL; + unsigned long nextpage; sector_t blocks; - void *paddr; - struct dm_dirty_log *log = mddev->bitmap_info.log; + struct bitmap_counts *counts; /* Use a mutex to guard daemon_work against * bitmap_destroy. @@ -1129,135 +1112,111 @@ void bitmap_daemon_work(mddev_t *mddev) return; } if (time_before(jiffies, bitmap->daemon_lastrun - + bitmap->mddev->bitmap_info.daemon_sleep)) + + mddev->bitmap_info.daemon_sleep)) goto done; bitmap->daemon_lastrun = jiffies; if (bitmap->allclean) { - bitmap->mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; + mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; goto done; } bitmap->allclean = 1; - spin_lock_irqsave(&bitmap->lock, flags); - for (j = 0; j < bitmap->chunks; j++) { + /* Any file-page which is PENDING now needs to be written. + * So set NEEDWRITE now, then after we make any last-minute changes + * we will write it. + */ + for (j = 0; j < bitmap->storage.file_pages; j++) + if (test_and_clear_page_attr(bitmap, j, + BITMAP_PAGE_PENDING)) + set_page_attr(bitmap, j, + BITMAP_PAGE_NEEDWRITE); + + if (bitmap->need_sync && + mddev->bitmap_info.external == 0) { + /* Arrange for superblock update as well as + * other changes */ + bitmap_super_t *sb; + bitmap->need_sync = 0; + if (bitmap->storage.filemap) { + sb = kmap_atomic(bitmap->storage.sb_page); + sb->events_cleared = + cpu_to_le64(bitmap->events_cleared); + kunmap_atomic(sb); + set_page_attr(bitmap, 0, + BITMAP_PAGE_NEEDWRITE); + } + } + /* Now look at the bitmap counters and if any are '2' or '1', + * decrement and handle accordingly. + */ + counts = &bitmap->counts; + spin_lock_irq(&counts->lock); + nextpage = 0; + for (j = 0; j < counts->chunks; j++) { bitmap_counter_t *bmc; - if (!bitmap->filemap) { - if (!log) - /* error or shutdown */ - break; - } else - page = filemap_get_page(bitmap, j); - - if (page != lastpage) { - /* skip this page unless it's marked as needing cleaning */ - if (!test_page_attr(bitmap, page, BITMAP_PAGE_CLEAN)) { - int need_write = test_page_attr(bitmap, page, - BITMAP_PAGE_NEEDWRITE); - if (need_write) - clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); - - spin_unlock_irqrestore(&bitmap->lock, flags); - if (need_write) { - write_page(bitmap, page, 0); - bitmap->allclean = 0; - } - spin_lock_irqsave(&bitmap->lock, flags); - j |= (PAGE_BITS - 1); - continue; - } + sector_t block = (sector_t)j << counts->chunkshift; - /* grab the new page, sync and release the old */ - if (lastpage != NULL) { - if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { - clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); - spin_unlock_irqrestore(&bitmap->lock, flags); - write_page(bitmap, lastpage, 0); - } else { - set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); - spin_unlock_irqrestore(&bitmap->lock, flags); - } - } else - spin_unlock_irqrestore(&bitmap->lock, flags); - lastpage = page; - - /* We are possibly going to clear some bits, so make - * sure that events_cleared is up-to-date. - */ - if (bitmap->need_sync && - bitmap->mddev->bitmap_info.external == 0) { - bitmap_super_t *sb; - bitmap->need_sync = 0; - sb = kmap_atomic(bitmap->sb_page, KM_USER0); - sb->events_cleared = - cpu_to_le64(bitmap->events_cleared); - kunmap_atomic(sb, KM_USER0); - write_page(bitmap, bitmap->sb_page, 1); + if (j == nextpage) { + nextpage += PAGE_COUNTER_RATIO; + if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) { + j |= PAGE_COUNTER_MASK; + continue; } - spin_lock_irqsave(&bitmap->lock, flags); - if (!bitmap->need_sync) - clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); + counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0; } - bmc = bitmap_get_counter(bitmap, - (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), + bmc = bitmap_get_counter(counts, + block, &blocks, 0); - if (bmc) { - if (*bmc) - bitmap->allclean = 0; - if (*bmc == 2) { - *bmc = 1; /* maybe clear the bit next time */ - set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); - } else if (*bmc == 1 && !bitmap->need_sync) { - /* we can clear the bit */ - *bmc = 0; - bitmap_count_page(bitmap, - (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), - -1); - - /* clear the bit */ - if (page) { - paddr = kmap_atomic(page, KM_USER0); - if (bitmap->flags & BITMAP_HOSTENDIAN) - clear_bit(file_page_offset(bitmap, j), - paddr); - else - ext2_clear_bit(file_page_offset(bitmap, j), - paddr); - kunmap_atomic(paddr, KM_USER0); - } else - log->type->clear_region(log, j); - } - } else + if (!bmc) { j |= PAGE_COUNTER_MASK; + continue; + } + if (*bmc == 1 && !bitmap->need_sync) { + /* We can clear the bit */ + *bmc = 0; + bitmap_count_page(counts, block, -1); + bitmap_file_clear_bit(bitmap, block); + } else if (*bmc && *bmc <= 2) { + *bmc = 1; + bitmap_set_pending(counts, block); + bitmap->allclean = 0; + } } - spin_unlock_irqrestore(&bitmap->lock, flags); - - /* now sync the final page */ - if (lastpage != NULL || log != NULL) { - spin_lock_irqsave(&bitmap->lock, flags); - if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { - clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); - spin_unlock_irqrestore(&bitmap->lock, flags); - if (lastpage) - write_page(bitmap, lastpage, 0); - else - if (log->type->flush(log)) - bitmap->flags |= BITMAP_WRITE_ERROR; - } else { - set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); - spin_unlock_irqrestore(&bitmap->lock, flags); + spin_unlock_irq(&counts->lock); + + /* Now start writeout on any page in NEEDWRITE that isn't DIRTY. + * DIRTY pages need to be written by bitmap_unplug so it can wait + * for them. + * If we find any DIRTY page we stop there and let bitmap_unplug + * handle all the rest. This is important in the case where + * the first blocking holds the superblock and it has been updated. + * We mustn't write any other blocks before the superblock. + */ + for (j = 0; + j < bitmap->storage.file_pages + && !test_bit(BITMAP_STALE, &bitmap->flags); + j++) { + + if (test_page_attr(bitmap, j, + BITMAP_PAGE_DIRTY)) + /* bitmap_unplug will handle the rest */ + break; + if (test_and_clear_page_attr(bitmap, j, + BITMAP_PAGE_NEEDWRITE)) { + write_page(bitmap, bitmap->storage.filemap[j], 0); } } done: if (bitmap->allclean == 0) - bitmap->mddev->thread->timeout = - bitmap->mddev->bitmap_info.daemon_sleep; + mddev->thread->timeout = + mddev->bitmap_info.daemon_sleep; mutex_unlock(&mddev->bitmap_info.mutex); } -static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, +static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap, sector_t offset, sector_t *blocks, int create) __releases(bitmap->lock) @@ -1267,7 +1226,7 @@ __acquires(bitmap->lock) * The lock must have been taken with interrupts enabled. * If !create, we don't release the lock. */ - sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); + sector_t chunk = offset >> bitmap->chunkshift; unsigned long page = chunk >> PAGE_COUNTER_SHIFT; unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; sector_t csize; @@ -1277,10 +1236,10 @@ __acquires(bitmap->lock) if (bitmap->bp[page].hijacked || bitmap->bp[page].map == NULL) - csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) + + csize = ((sector_t)1) << (bitmap->chunkshift + PAGE_COUNTER_SHIFT - 1); else - csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); + csize = ((sector_t)1) << bitmap->chunkshift; *blocks = csize - (offset & (csize - 1)); if (err < 0) @@ -1311,22 +1270,22 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect if (bw > bitmap->behind_writes_used) bitmap->behind_writes_used = bw; - PRINTK(KERN_DEBUG "inc write-behind count %d/%d\n", - bw, bitmap->max_write_behind); + pr_debug("inc write-behind count %d/%lu\n", + bw, bitmap->mddev->bitmap_info.max_write_behind); } while (sectors) { sector_t blocks; bitmap_counter_t *bmc; - spin_lock_irq(&bitmap->lock); - bmc = bitmap_get_counter(bitmap, offset, &blocks, 1); + spin_lock_irq(&bitmap->counts.lock); + bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 1); if (!bmc) { - spin_unlock_irq(&bitmap->lock); + spin_unlock_irq(&bitmap->counts.lock); return 0; } - if (unlikely((*bmc & COUNTER_MAX) == COUNTER_MAX)) { + if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) { DEFINE_WAIT(__wait); /* note that it is safe to do the prepare_to_wait * after the test as long as we do it before dropping @@ -1334,8 +1293,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect */ prepare_to_wait(&bitmap->overflow_wait, &__wait, TASK_UNINTERRUPTIBLE); - spin_unlock_irq(&bitmap->lock); - md_unplug(bitmap->mddev); + spin_unlock_irq(&bitmap->counts.lock); schedule(); finish_wait(&bitmap->overflow_wait, &__wait); continue; @@ -1344,7 +1302,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect switch (*bmc) { case 0: bitmap_file_set_bit(bitmap, offset); - bitmap_count_page(bitmap, offset, 1); + bitmap_count_page(&bitmap->counts, offset, 1); /* fall through */ case 1: *bmc = 2; @@ -1352,7 +1310,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect (*bmc)++; - spin_unlock_irq(&bitmap->lock); + spin_unlock_irq(&bitmap->counts.lock); offset += blocks; if (sectors > blocks) @@ -1360,7 +1318,6 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect else sectors = 0; } - bitmap->allclean = 0; return 0; } EXPORT_SYMBOL(bitmap_startwrite); @@ -1373,47 +1330,42 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto if (behind) { if (atomic_dec_and_test(&bitmap->behind_writes)) wake_up(&bitmap->behind_wait); - PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n", - atomic_read(&bitmap->behind_writes), bitmap->max_write_behind); + pr_debug("dec write-behind count %d/%lu\n", + atomic_read(&bitmap->behind_writes), + bitmap->mddev->bitmap_info.max_write_behind); } - if (bitmap->mddev->degraded) - /* Never clear bits or update events_cleared when degraded */ - success = 0; while (sectors) { sector_t blocks; unsigned long flags; bitmap_counter_t *bmc; - spin_lock_irqsave(&bitmap->lock, flags); - bmc = bitmap_get_counter(bitmap, offset, &blocks, 0); + spin_lock_irqsave(&bitmap->counts.lock, flags); + bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 0); if (!bmc) { - spin_unlock_irqrestore(&bitmap->lock, flags); + spin_unlock_irqrestore(&bitmap->counts.lock, flags); return; } - if (success && + if (success && !bitmap->mddev->degraded && bitmap->events_cleared < bitmap->mddev->events) { bitmap->events_cleared = bitmap->mddev->events; bitmap->need_sync = 1; sysfs_notify_dirent_safe(bitmap->sysfs_can_clear); } - if (!success && ! (*bmc & NEEDED_MASK)) + if (!success && !NEEDED(*bmc)) *bmc |= NEEDED_MASK; - if ((*bmc & COUNTER_MAX) == COUNTER_MAX) + if (COUNTER(*bmc) == COUNTER_MAX) wake_up(&bitmap->overflow_wait); (*bmc)--; - if (*bmc <= 2) - set_page_attr(bitmap, - filemap_get_page( - bitmap, - offset >> CHUNK_BLOCK_SHIFT(bitmap)), - BITMAP_PAGE_CLEAN); - - spin_unlock_irqrestore(&bitmap->lock, flags); + if (*bmc <= 2) { + bitmap_set_pending(&bitmap->counts, offset); + bitmap->allclean = 0; + } + spin_unlock_irqrestore(&bitmap->counts.lock, flags); offset += blocks; if (sectors > blocks) sectors -= blocks; @@ -1432,8 +1384,8 @@ static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks = 1024; return 1; /* always resync if no bitmap */ } - spin_lock_irq(&bitmap->lock); - bmc = bitmap_get_counter(bitmap, offset, blocks, 0); + spin_lock_irq(&bitmap->counts.lock); + bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0); rv = 0; if (bmc) { /* locked */ @@ -1447,8 +1399,7 @@ static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t } } } - spin_unlock_irq(&bitmap->lock); - bitmap->allclean = 0; + spin_unlock_irq(&bitmap->counts.lock); return rv; } @@ -1485,8 +1436,8 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, i *blocks = 1024; return; } - spin_lock_irqsave(&bitmap->lock, flags); - bmc = bitmap_get_counter(bitmap, offset, blocks, 0); + spin_lock_irqsave(&bitmap->counts.lock, flags); + bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0); if (bmc == NULL) goto unlock; /* locked */ @@ -1496,15 +1447,14 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, i if (!NEEDED(*bmc) && aborted) *bmc |= NEEDED_MASK; else { - if (*bmc <= 2) - set_page_attr(bitmap, - filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), - BITMAP_PAGE_CLEAN); + if (*bmc <= 2) { + bitmap_set_pending(&bitmap->counts, offset); + bitmap->allclean = 0; + } } } unlock: - spin_unlock_irqrestore(&bitmap->lock, flags); - bitmap->allclean = 0; + spin_unlock_irqrestore(&bitmap->counts.lock, flags); } EXPORT_SYMBOL(bitmap_end_sync); @@ -1542,9 +1492,9 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) wait_event(bitmap->mddev->recovery_wait, atomic_read(&bitmap->mddev->recovery_active) == 0); - bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync; + bitmap->mddev->curr_resync_completed = sector; set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); - sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); + sector &= ~((1ULL << bitmap->counts.chunkshift) - 1); s = 0; while (s < sector && s < bitmap->mddev->resync_max_sectors) { bitmap_end_sync(bitmap, s, &blocks, 0); @@ -1558,27 +1508,25 @@ EXPORT_SYMBOL(bitmap_cond_end_sync); static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) { /* For each chunk covered by any of these sectors, set the - * counter to 1 and set resync_needed. They should all + * counter to 2 and possibly set resync_needed. They should all * be 0 at this point */ sector_t secs; bitmap_counter_t *bmc; - spin_lock_irq(&bitmap->lock); - bmc = bitmap_get_counter(bitmap, offset, &secs, 1); + spin_lock_irq(&bitmap->counts.lock); + bmc = bitmap_get_counter(&bitmap->counts, offset, &secs, 1); if (!bmc) { - spin_unlock_irq(&bitmap->lock); + spin_unlock_irq(&bitmap->counts.lock); return; } if (!*bmc) { - struct page *page; - *bmc = 1 | (needed ? NEEDED_MASK : 0); - bitmap_count_page(bitmap, offset, 1); - page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); - set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); + *bmc = 2 | (needed ? NEEDED_MASK : 0); + bitmap_count_page(&bitmap->counts, offset, 1); + bitmap_set_pending(&bitmap->counts, offset); + bitmap->allclean = 0; } - spin_unlock_irq(&bitmap->lock); - bitmap->allclean = 0; + spin_unlock_irq(&bitmap->counts.lock); } /* dirty the memory and file bits for bitmap chunks "s" to "e" */ @@ -1587,7 +1535,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) unsigned long chunk; for (chunk = s; chunk <= e; chunk++) { - sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap); + sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift; bitmap_set_memory_bits(bitmap, sec, 1); bitmap_file_set_bit(bitmap, sec); if (sec < bitmap->mddev->recovery_cp) @@ -1602,7 +1550,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) /* * flush out any pending updates */ -void bitmap_flush(mddev_t *mddev) +void bitmap_flush(struct mddev *mddev) { struct bitmap *bitmap = mddev->bitmap; long sleep; @@ -1634,11 +1582,15 @@ static void bitmap_free(struct bitmap *bitmap) if (!bitmap) /* there was no bitmap */ return; - /* release the bitmap file and kill the daemon */ - bitmap_file_put(bitmap); + /* Shouldn't be needed - but just in case.... */ + wait_event(bitmap->write_wait, + atomic_read(&bitmap->pending_writes) == 0); - bp = bitmap->bp; - pages = bitmap->pages; + /* release the bitmap file */ + bitmap_file_unmap(&bitmap->storage); + + bp = bitmap->counts.bp; + pages = bitmap->counts.pages; /* free all allocated memory */ @@ -1650,7 +1602,7 @@ static void bitmap_free(struct bitmap *bitmap) kfree(bitmap); } -void bitmap_destroy(mddev_t *mddev) +void bitmap_destroy(struct mddev *mddev) { struct bitmap *bitmap = mddev->bitmap; @@ -1673,31 +1625,23 @@ void bitmap_destroy(mddev_t *mddev) * initialize the bitmap structure * if this returns an error, bitmap_destroy must be called to do clean up */ -int bitmap_create(mddev_t *mddev) +int bitmap_create(struct mddev *mddev) { struct bitmap *bitmap; sector_t blocks = mddev->resync_max_sectors; - unsigned long chunks; - unsigned long pages; struct file *file = mddev->bitmap_info.file; int err; - struct sysfs_dirent *bm = NULL; + struct kernfs_node *bm = NULL; BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); - if (!file - && !mddev->bitmap_info.offset - && !mddev->bitmap_info.log) /* bitmap disabled, nothing to do */ - return 0; - BUG_ON(file && mddev->bitmap_info.offset); - BUG_ON(mddev->bitmap_info.offset && mddev->bitmap_info.log); bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); if (!bitmap) return -ENOMEM; - spin_lock_init(&bitmap->lock); + spin_lock_init(&bitmap->counts.lock); atomic_set(&bitmap->pending_writes, 0); init_waitqueue_head(&bitmap->write_wait); init_waitqueue_head(&bitmap->overflow_wait); @@ -1706,14 +1650,14 @@ int bitmap_create(mddev_t *mddev) bitmap->mddev = mddev; if (mddev->kobj.sd) - bm = sysfs_get_dirent(mddev->kobj.sd, NULL, "bitmap"); + bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap"); if (bm) { - bitmap->sysfs_can_clear = sysfs_get_dirent(bm, NULL, "can_clear"); + bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear"); sysfs_put(bm); } else bitmap->sysfs_can_clear = NULL; - bitmap->file = file; + bitmap->storage.file = file; if (file) { get_file(file); /* As future accesses to this file will use bmap, @@ -1723,9 +1667,16 @@ int bitmap_create(mddev_t *mddev) vfs_fsync(file, 1); } /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */ - if (!mddev->bitmap_info.external) - err = bitmap_read_sb(bitmap); - else { + if (!mddev->bitmap_info.external) { + /* + * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is + * instructing us to create a new on-disk bitmap instance. + */ + if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags)) + err = bitmap_new_disk_sb(bitmap); + else + err = bitmap_read_sb(bitmap); + } else { err = 0; if (mddev->bitmap_info.chunksize == 0 || mddev->bitmap_info.daemon_sleep == 0) @@ -1737,47 +1688,25 @@ int bitmap_create(mddev_t *mddev) goto error; bitmap->daemon_lastrun = jiffies; - bitmap->chunkshift = ffz(~mddev->bitmap_info.chunksize); - - /* now that chunksize and chunkshift are set, we can use these macros */ - chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >> - CHUNK_BLOCK_SHIFT(bitmap); - pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; - - BUG_ON(!pages); - - bitmap->chunks = chunks; - bitmap->pages = pages; - bitmap->missing_pages = pages; - bitmap->counter_bits = COUNTER_BITS; - - bitmap->syncchunk = ~0UL; - -#ifdef INJECT_FATAL_FAULT_1 - bitmap->bp = NULL; -#else - bitmap->bp = kzalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL); -#endif - err = -ENOMEM; - if (!bitmap->bp) + err = bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1); + if (err) goto error; printk(KERN_INFO "created bitmap (%lu pages) for device %s\n", - pages, bmname(bitmap)); + bitmap->counts.pages, bmname(bitmap)); mddev->bitmap = bitmap; - - - return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0; + return test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0; error: bitmap_free(bitmap); return err; } -int bitmap_load(mddev_t *mddev) +int bitmap_load(struct mddev *mddev) { int err = 0; + sector_t start = 0; sector_t sector = 0; struct bitmap *bitmap = mddev->bitmap; @@ -1796,41 +1725,228 @@ int bitmap_load(mddev_t *mddev) } bitmap_close_sync(bitmap); - if (mddev->bitmap_info.log) { - unsigned long i; - struct dm_dirty_log *log = mddev->bitmap_info.log; - for (i = 0; i < bitmap->chunks; i++) - if (!log->type->in_sync(log, i, 1)) - bitmap_set_memory_bits(bitmap, - (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap), - 1); - } else { - sector_t start = 0; - if (mddev->degraded == 0 - || bitmap->events_cleared == mddev->events) - /* no need to keep dirty bits to optimise a - * re-add of a missing device */ - start = mddev->recovery_cp; - - err = bitmap_init_from_disk(bitmap, start); - } + if (mddev->degraded == 0 + || bitmap->events_cleared == mddev->events) + /* no need to keep dirty bits to optimise a + * re-add of a missing device */ + start = mddev->recovery_cp; + + mutex_lock(&mddev->bitmap_info.mutex); + err = bitmap_init_from_disk(bitmap, start); + mutex_unlock(&mddev->bitmap_info.mutex); + if (err) goto out; + clear_bit(BITMAP_STALE, &bitmap->flags); + + /* Kick recovery in case any bits were set */ + set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery); mddev->thread->timeout = mddev->bitmap_info.daemon_sleep; md_wakeup_thread(mddev->thread); bitmap_update_sb(bitmap); - if (bitmap->flags & BITMAP_WRITE_ERROR) + if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) err = -EIO; out: return err; } EXPORT_SYMBOL_GPL(bitmap_load); +void bitmap_status(struct seq_file *seq, struct bitmap *bitmap) +{ + unsigned long chunk_kb; + struct bitmap_counts *counts; + + if (!bitmap) + return; + + counts = &bitmap->counts; + + chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10; + seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], " + "%lu%s chunk", + counts->pages - counts->missing_pages, + counts->pages, + (counts->pages - counts->missing_pages) + << (PAGE_SHIFT - 10), + chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize, + chunk_kb ? "KB" : "B"); + if (bitmap->storage.file) { + seq_printf(seq, ", file: "); + seq_path(seq, &bitmap->storage.file->f_path, " \t\n"); + } + + seq_printf(seq, "\n"); +} + +int bitmap_resize(struct bitmap *bitmap, sector_t blocks, + int chunksize, int init) +{ + /* If chunk_size is 0, choose an appropriate chunk size. + * Then possibly allocate new storage space. + * Then quiesce, copy bits, replace bitmap, and re-start + * + * This function is called both to set up the initial bitmap + * and to resize the bitmap while the array is active. + * If this happens as a result of the array being resized, + * chunksize will be zero, and we need to choose a suitable + * chunksize, otherwise we use what we are given. + */ + struct bitmap_storage store; + struct bitmap_counts old_counts; + unsigned long chunks; + sector_t block; + sector_t old_blocks, new_blocks; + int chunkshift; + int ret = 0; + long pages; + struct bitmap_page *new_bp; + + if (chunksize == 0) { + /* If there is enough space, leave the chunk size unchanged, + * else increase by factor of two until there is enough space. + */ + long bytes; + long space = bitmap->mddev->bitmap_info.space; + + if (space == 0) { + /* We don't know how much space there is, so limit + * to current size - in sectors. + */ + bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8); + if (!bitmap->mddev->bitmap_info.external) + bytes += sizeof(bitmap_super_t); + space = DIV_ROUND_UP(bytes, 512); + bitmap->mddev->bitmap_info.space = space; + } + chunkshift = bitmap->counts.chunkshift; + chunkshift--; + do { + /* 'chunkshift' is shift from block size to chunk size */ + chunkshift++; + chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift); + bytes = DIV_ROUND_UP(chunks, 8); + if (!bitmap->mddev->bitmap_info.external) + bytes += sizeof(bitmap_super_t); + } while (bytes > (space << 9)); + } else + chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT; + + chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift); + memset(&store, 0, sizeof(store)); + if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file) + ret = bitmap_storage_alloc(&store, chunks, + !bitmap->mddev->bitmap_info.external); + if (ret) + goto err; + + pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO); + + new_bp = kzalloc(pages * sizeof(*new_bp), GFP_KERNEL); + ret = -ENOMEM; + if (!new_bp) { + bitmap_file_unmap(&store); + goto err; + } + + if (!init) + bitmap->mddev->pers->quiesce(bitmap->mddev, 1); + + store.file = bitmap->storage.file; + bitmap->storage.file = NULL; + + if (store.sb_page && bitmap->storage.sb_page) + memcpy(page_address(store.sb_page), + page_address(bitmap->storage.sb_page), + sizeof(bitmap_super_t)); + bitmap_file_unmap(&bitmap->storage); + bitmap->storage = store; + + old_counts = bitmap->counts; + bitmap->counts.bp = new_bp; + bitmap->counts.pages = pages; + bitmap->counts.missing_pages = pages; + bitmap->counts.chunkshift = chunkshift; + bitmap->counts.chunks = chunks; + bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift + + BITMAP_BLOCK_SHIFT); + + blocks = min(old_counts.chunks << old_counts.chunkshift, + chunks << chunkshift); + + spin_lock_irq(&bitmap->counts.lock); + for (block = 0; block < blocks; ) { + bitmap_counter_t *bmc_old, *bmc_new; + int set; + + bmc_old = bitmap_get_counter(&old_counts, block, + &old_blocks, 0); + set = bmc_old && NEEDED(*bmc_old); + + if (set) { + bmc_new = bitmap_get_counter(&bitmap->counts, block, + &new_blocks, 1); + if (*bmc_new == 0) { + /* need to set on-disk bits too. */ + sector_t end = block + new_blocks; + sector_t start = block >> chunkshift; + start <<= chunkshift; + while (start < end) { + bitmap_file_set_bit(bitmap, block); + start += 1 << chunkshift; + } + *bmc_new = 2; + bitmap_count_page(&bitmap->counts, + block, 1); + bitmap_set_pending(&bitmap->counts, + block); + } + *bmc_new |= NEEDED_MASK; + if (new_blocks < old_blocks) + old_blocks = new_blocks; + } + block += old_blocks; + } + + if (!init) { + int i; + while (block < (chunks << chunkshift)) { + bitmap_counter_t *bmc; + bmc = bitmap_get_counter(&bitmap->counts, block, + &new_blocks, 1); + if (bmc) { + /* new space. It needs to be resynced, so + * we set NEEDED_MASK. + */ + if (*bmc == 0) { + *bmc = NEEDED_MASK | 2; + bitmap_count_page(&bitmap->counts, + block, 1); + bitmap_set_pending(&bitmap->counts, + block); + } + } + block += new_blocks; + } + for (i = 0; i < bitmap->storage.file_pages; i++) + set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); + } + spin_unlock_irq(&bitmap->counts.lock); + + if (!init) { + bitmap_unplug(bitmap); + bitmap->mddev->pers->quiesce(bitmap->mddev, 0); + } + ret = 0; +err: + return ret; +} +EXPORT_SYMBOL_GPL(bitmap_resize); + static ssize_t -location_show(mddev_t *mddev, char *page) +location_show(struct mddev *mddev, char *page) { ssize_t len; if (mddev->bitmap_info.file) @@ -1844,7 +1960,7 @@ location_show(mddev_t *mddev, char *page) } static ssize_t -location_store(mddev_t *mddev, const char *buf, size_t len) +location_store(struct mddev *mddev, const char *buf, size_t len) { if (mddev->pers) { @@ -1868,7 +1984,6 @@ location_store(mddev_t *mddev, const char *buf, size_t len) if (mddev->bitmap_info.file) { struct file *f = mddev->bitmap_info.file; mddev->bitmap_info.file = NULL; - restore_bitmap_write_access(f); fput(f); } } else { @@ -1882,9 +1997,9 @@ location_store(mddev_t *mddev, const char *buf, size_t len) } else { int rv; if (buf[0] == '+') - rv = strict_strtoll(buf+1, 10, &offset); + rv = kstrtoll(buf+1, 10, &offset); else - rv = strict_strtoll(buf, 10, &offset); + rv = kstrtoll(buf, 10, &offset); if (rv) return rv; if (offset == 0) @@ -1897,6 +2012,8 @@ location_store(mddev_t *mddev, const char *buf, size_t len) if (mddev->pers) { mddev->pers->quiesce(mddev, 1); rv = bitmap_create(mddev); + if (!rv) + rv = bitmap_load(mddev); if (rv) { bitmap_destroy(mddev); mddev->bitmap_info.offset = 0; @@ -1920,8 +2037,45 @@ location_store(mddev_t *mddev, const char *buf, size_t len) static struct md_sysfs_entry bitmap_location = __ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store); +/* 'bitmap/space' is the space available at 'location' for the + * bitmap. This allows the kernel to know when it is safe to + * resize the bitmap to match a resized array. + */ +static ssize_t +space_show(struct mddev *mddev, char *page) +{ + return sprintf(page, "%lu\n", mddev->bitmap_info.space); +} + +static ssize_t +space_store(struct mddev *mddev, const char *buf, size_t len) +{ + unsigned long sectors; + int rv; + + rv = kstrtoul(buf, 10, §ors); + if (rv) + return rv; + + if (sectors == 0) + return -EINVAL; + + if (mddev->bitmap && + sectors < (mddev->bitmap->storage.bytes + 511) >> 9) + return -EFBIG; /* Bitmap is too big for this small space */ + + /* could make sure it isn't too big, but that isn't really + * needed - user-space should be careful. + */ + mddev->bitmap_info.space = sectors; + return len; +} + +static struct md_sysfs_entry bitmap_space = +__ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store); + static ssize_t -timeout_show(mddev_t *mddev, char *page) +timeout_show(struct mddev *mddev, char *page) { ssize_t len; unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ; @@ -1935,7 +2089,7 @@ timeout_show(mddev_t *mddev, char *page) } static ssize_t -timeout_store(mddev_t *mddev, const char *buf, size_t len) +timeout_store(struct mddev *mddev, const char *buf, size_t len) { /* timeout can be set at any time */ unsigned long timeout; @@ -1971,16 +2125,16 @@ static struct md_sysfs_entry bitmap_timeout = __ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store); static ssize_t -backlog_show(mddev_t *mddev, char *page) +backlog_show(struct mddev *mddev, char *page) { return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind); } static ssize_t -backlog_store(mddev_t *mddev, const char *buf, size_t len) +backlog_store(struct mddev *mddev, const char *buf, size_t len) { unsigned long backlog; - int rv = strict_strtoul(buf, 10, &backlog); + int rv = kstrtoul(buf, 10, &backlog); if (rv) return rv; if (backlog > COUNTER_MAX) @@ -1993,20 +2147,20 @@ static struct md_sysfs_entry bitmap_backlog = __ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store); static ssize_t -chunksize_show(mddev_t *mddev, char *page) +chunksize_show(struct mddev *mddev, char *page) { return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize); } static ssize_t -chunksize_store(mddev_t *mddev, const char *buf, size_t len) +chunksize_store(struct mddev *mddev, const char *buf, size_t len) { /* Can only be changed when no bitmap is active */ int rv; unsigned long csize; if (mddev->bitmap) return -EBUSY; - rv = strict_strtoul(buf, 10, &csize); + rv = kstrtoul(buf, 10, &csize); if (rv) return rv; if (csize < 512 || @@ -2019,13 +2173,13 @@ chunksize_store(mddev_t *mddev, const char *buf, size_t len) static struct md_sysfs_entry bitmap_chunksize = __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store); -static ssize_t metadata_show(mddev_t *mddev, char *page) +static ssize_t metadata_show(struct mddev *mddev, char *page) { return sprintf(page, "%s\n", (mddev->bitmap_info.external ? "external" : "internal")); } -static ssize_t metadata_store(mddev_t *mddev, const char *buf, size_t len) +static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len) { if (mddev->bitmap || mddev->bitmap_info.file || @@ -2043,7 +2197,7 @@ static ssize_t metadata_store(mddev_t *mddev, const char *buf, size_t len) static struct md_sysfs_entry bitmap_metadata = __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store); -static ssize_t can_clear_show(mddev_t *mddev, char *page) +static ssize_t can_clear_show(struct mddev *mddev, char *page) { int len; if (mddev->bitmap) @@ -2054,7 +2208,7 @@ static ssize_t can_clear_show(mddev_t *mddev, char *page) return len; } -static ssize_t can_clear_store(mddev_t *mddev, const char *buf, size_t len) +static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len) { if (mddev->bitmap == NULL) return -ENOENT; @@ -2073,7 +2227,7 @@ static struct md_sysfs_entry bitmap_can_clear = __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store); static ssize_t -behind_writes_used_show(mddev_t *mddev, char *page) +behind_writes_used_show(struct mddev *mddev, char *page) { if (mddev->bitmap == NULL) return sprintf(page, "0\n"); @@ -2082,7 +2236,7 @@ behind_writes_used_show(mddev_t *mddev, char *page) } static ssize_t -behind_writes_used_reset(mddev_t *mddev, const char *buf, size_t len) +behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len) { if (mddev->bitmap) mddev->bitmap->behind_writes_used = 0; @@ -2095,6 +2249,7 @@ __ATTR(max_backlog_used, S_IRUGO | S_IWUSR, static struct attribute *md_bitmap_attrs[] = { &bitmap_location.attr, + &bitmap_space.attr, &bitmap_timeout.attr, &bitmap_backlog.attr, &bitmap_chunksize.attr, |
