diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-10 15:38:19 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-10 15:38:19 -0700 |
commit | 3d30701b58970425e1d45994d6cb82f828924fdd (patch) | |
tree | 8b14cf462628bebf8548c1b8c205a674564052d1 /drivers | |
parent | 8cbd84f2dd4e52a8771b191030c374ba3e56d291 (diff) | |
parent | fd8aa2c1811bf60ccb2d5de0579c6f62aec1772d (diff) |
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (24 commits)
md: clean up do_md_stop
md: fix another deadlock with removing sysfs attributes.
md: move revalidate_disk() back outside open_mutex
md/raid10: fix deadlock with unaligned read during resync
md/bitmap: separate out loading a bitmap from initialising the structures.
md/bitmap: prepare for storing write-intent-bitmap via dm-dirty-log.
md/bitmap: optimise scanning of empty bitmaps.
md/bitmap: clean up plugging calls.
md/bitmap: reduce dependence on sysfs.
md/bitmap: white space clean up and similar.
md/raid5: export raid5 unplugging interface.
md/plug: optionally use plugger to unplug an array during resync/recovery.
md/raid5: add simple plugging infrastructure.
md/raid5: export is_congested test
raid5: Don't set read-ahead when there is no queue
md: add support for raising dm events.
md: export various start/stop interfaces
md: split out md_rdev_init
md: be more careful setting MD_CHANGE_CLEAN
md/raid5: ensure we create a unique name for kmem_cache when mddev has no gendisk
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/Kconfig | 18 | ||||
-rw-r--r-- | drivers/md/Makefile | 77 | ||||
-rw-r--r-- | drivers/md/bitmap.c | 508 | ||||
-rw-r--r-- | drivers/md/bitmap.h | 6 | ||||
-rw-r--r-- | drivers/md/md.c | 286 | ||||
-rw-r--r-- | drivers/md/md.h | 55 | ||||
-rw-r--r-- | drivers/md/mktables.c | 132 | ||||
-rw-r--r-- | drivers/md/raid10.c | 18 | ||||
-rw-r--r-- | drivers/md/raid5.c | 168 | ||||
-rw-r--r-- | drivers/md/raid5.h | 9 | ||||
-rw-r--r-- | drivers/md/raid6algos.c | 154 | ||||
-rw-r--r-- | drivers/md/raid6altivec.uc | 130 | ||||
-rw-r--r-- | drivers/md/raid6int.uc | 117 | ||||
-rw-r--r-- | drivers/md/raid6mmx.c | 142 | ||||
-rw-r--r-- | drivers/md/raid6recov.c | 132 | ||||
-rw-r--r-- | drivers/md/raid6sse1.c | 162 | ||||
-rw-r--r-- | drivers/md/raid6sse2.c | 262 | ||||
-rw-r--r-- | drivers/md/raid6test/Makefile | 75 | ||||
-rw-r--r-- | drivers/md/raid6test/test.c | 124 | ||||
-rw-r--r-- | drivers/md/raid6x86.h | 61 | ||||
-rw-r--r-- | drivers/md/unroll.awk | 20 |
21 files changed, 644 insertions, 2012 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 4a6feac8c94..bf1a95e3155 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -121,7 +121,7 @@ config MD_RAID10 config MD_RAID456 tristate "RAID-4/RAID-5/RAID-6 mode" depends on BLK_DEV_MD - select MD_RAID6_PQ + select RAID6_PQ select ASYNC_MEMCPY select ASYNC_XOR select ASYNC_PQ @@ -165,22 +165,6 @@ config MULTICORE_RAID456 If unsure, say N. -config MD_RAID6_PQ - tristate - -config ASYNC_RAID6_TEST - tristate "Self test for hardware accelerated raid6 recovery" - depends on MD_RAID6_PQ - select ASYNC_RAID6_RECOV - ---help--- - This is a one-shot self test that permutes through the - recovery of all the possible two disk failure scenarios for a - N-disk array. Recovery is performed with the asynchronous - raid6 recovery routines, and will optionally use an offload - engine if one is available. - - If unsure, say N. - config MD_MULTIPATH tristate "Multipath I/O support" depends on BLK_DEV_MD diff --git a/drivers/md/Makefile b/drivers/md/Makefile index e355e7f6a53..5e3aac41919 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -12,13 +12,6 @@ dm-log-userspace-y \ += dm-log-userspace-base.o dm-log-userspace-transfer.o md-mod-y += md.o bitmap.o raid456-y += raid5.o -raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \ - raid6int1.o raid6int2.o raid6int4.o \ - raid6int8.o raid6int16.o raid6int32.o \ - raid6altivec1.o raid6altivec2.o raid6altivec4.o \ - raid6altivec8.o \ - raid6mmx.o raid6sse1.o raid6sse2.o -hostprogs-y += mktables # Note: link order is important. All raid personalities # and must come before md.o, as they each initialise @@ -29,7 +22,6 @@ obj-$(CONFIG_MD_LINEAR) += linear.o obj-$(CONFIG_MD_RAID0) += raid0.o obj-$(CONFIG_MD_RAID1) += raid1.o obj-$(CONFIG_MD_RAID10) += raid10.o -obj-$(CONFIG_MD_RAID6_PQ) += raid6_pq.o obj-$(CONFIG_MD_RAID456) += raid456.o obj-$(CONFIG_MD_MULTIPATH) += multipath.o obj-$(CONFIG_MD_FAULTY) += faulty.o @@ -45,75 +37,6 @@ obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o obj-$(CONFIG_DM_ZERO) += dm-zero.o -quiet_cmd_unroll = UNROLL $@ - cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \ - < $< > $@ || ( rm -f $@ && exit 1 ) - -ifeq ($(CONFIG_ALTIVEC),y) -altivec_flags := -maltivec -mabi=altivec -endif - ifeq ($(CONFIG_DM_UEVENT),y) dm-mod-objs += dm-uevent.o endif - -targets += raid6int1.c -$(obj)/raid6int1.c: UNROLL := 1 -$(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE - $(call if_changed,unroll) - -targets += raid6int2.c -$(obj)/raid6int2.c: UNROLL := 2 -$(obj)/raid6int2.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE - $(call if_changed,unroll) - -targets += raid6int4.c -$(obj)/raid6int4.c: UNROLL := 4 -$(obj)/raid6int4.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE - $(call if_changed,unroll) - -targets += raid6int8.c -$(obj)/raid6int8.c: UNROLL := 8 -$(obj)/raid6int8.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE - $(call if_changed,unroll) - -targets += raid6int16.c -$(obj)/raid6int16.c: UNROLL := 16 -$(obj)/raid6int16.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE - $(call if_changed,unroll) - -targets += raid6int32.c -$(obj)/raid6int32.c: UNROLL := 32 -$(obj)/raid6int32.c: $(src)/raid6int.uc $(src)/unroll.awk FORCE - $(call if_changed,unroll) - -CFLAGS_raid6altivec1.o += $(altivec_flags) -targets += raid6altivec1.c -$(obj)/raid6altivec1.c: UNROLL := 1 -$(obj)/raid6altivec1.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE - $(call if_changed,unroll) - -CFLAGS_raid6altivec2.o += $(altivec_flags) -targets += raid6altivec2.c -$(obj)/raid6altivec2.c: UNROLL := 2 -$(obj)/raid6altivec2.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE - $(call if_changed,unroll) - -CFLAGS_raid6altivec4.o += $(altivec_flags) -targets += raid6altivec4.c -$(obj)/raid6altivec4.c: UNROLL := 4 -$(obj)/raid6altivec4.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE - $(call if_changed,unroll) - -CFLAGS_raid6altivec8.o += $(altivec_flags) -targets += raid6altivec8.c -$(obj)/raid6altivec8.c: UNROLL := 8 -$(obj)/raid6altivec8.c: $(src)/raid6altivec.uc $(src)/unroll.awk FORCE - $(call if_changed,unroll) - -quiet_cmd_mktable = TABLE $@ - cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) - -targets += raid6tables.c -$(obj)/raid6tables.c: $(obj)/mktables FORCE - $(call if_changed,mktable) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 1742435ce3a..1ba1e122e94 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -13,7 +13,6 @@ * Still to do: * * flush after percent set rather than just time based. (maybe both). - * wait if count gets too high, wake when it drops to half. */ #include <linux/blkdev.h> @@ -30,6 +29,7 @@ #include "md.h" #include "bitmap.h" +#include <linux/dm-dirty-log.h> /* debug macros */ #define DEBUG 0 @@ -51,9 +51,6 @@ #define INJECT_FATAL_FAULT_3 0 /* undef */ #endif -//#define DPRINTK PRINTK /* set this NULL to avoid verbose debug output */ -#define DPRINTK(x...) do { } while(0) - #ifndef PRINTK # if DEBUG > 0 # define PRINTK(x...) printk(KERN_DEBUG x) @@ -62,12 +59,11 @@ # endif #endif -static inline char * bmname(struct bitmap *bitmap) +static inline char *bmname(struct bitmap *bitmap) { return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; } - /* * just a placeholder - calls kmalloc for bitmap pages */ @@ -78,7 +74,7 @@ static unsigned char *bitmap_alloc_page(struct bitmap *bitmap) #ifdef INJECT_FAULTS_1 page = NULL; #else - page = kmalloc(PAGE_SIZE, GFP_NOIO); + page = kzalloc(PAGE_SIZE, GFP_NOIO); #endif if (!page) printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap)); @@ -107,7 +103,8 @@ static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page) * if we find our page, we increment the page's refcount so that it stays * allocated while we're using it */ -static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create) +static int bitmap_checkpage(struct bitmap *bitmap, + unsigned long page, int create) __releases(bitmap->lock) __acquires(bitmap->lock) { @@ -121,7 +118,6 @@ __acquires(bitmap->lock) return -EINVAL; } - if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ return 0; @@ -131,43 +127,34 @@ __acquires(bitmap->lock) if (!create) return -ENOENT; - spin_unlock_irq(&bitmap->lock); - /* this page has not been allocated yet */ - if ((mappage = bitmap_alloc_page(bitmap)) == NULL) { + spin_unlock_irq(&bitmap->lock); + mappage = bitmap_alloc_page(bitmap); + spin_lock_irq(&bitmap->lock); + + if (mappage == NULL) { PRINTK("%s: bitmap map page allocation failed, hijacking\n", bmname(bitmap)); /* failed - set the hijacked flag so that we can use the * pointer as a counter */ - spin_lock_irq(&bitmap->lock); if (!bitmap->bp[page].map) bitmap->bp[page].hijacked = 1; - goto out; - } - - /* got a page */ - - spin_lock_irq(&bitmap->lock); - - /* recheck the page */ - - if (bitmap->bp[page].map || bitmap->bp[page].hijacked) { + } else if (bitmap->bp[page].map || + bitmap->bp[page].hijacked) { /* somebody beat us to getting the page */ bitmap_free_page(bitmap, mappage); return 0; - } + } else { - /* no page was in place and we have one, so install it */ + /* no page was in place and we have one, so install it */ - memset(mappage, 0, PAGE_SIZE); - bitmap->bp[page].map = mappage; - bitmap->missing_pages--; -out: + bitmap->bp[page].map = mappage; + bitmap->missing_pages--; + } return 0; } - /* if page is completely empty, put it back on the free list, or dealloc it */ /* if page was hijacked, unmark the flag so it might get alloced next time */ /* Note: lock should be held when calling this */ @@ -183,26 +170,15 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page) if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ bitmap->bp[page].hijacked = 0; bitmap->bp[page].map = NULL; - return; + } else { + /* normal case, free the page */ + ptr = bitmap->bp[page].map; + bitmap->bp[page].map = NULL; + bitmap->missing_pages++; + bitmap_free_page(bitmap, ptr); } - - /* normal case, free the page */ - -#if 0 -/* actually ... let's not. We will probably need the page again exactly when - * memory is tight and we are flusing to disk - */ - return; -#else - ptr = bitmap->bp[page].map; - bitmap->bp[page].map = NULL; - bitmap->missing_pages++; - bitmap_free_page(bitmap, ptr); - return; -#endif } - /* * bitmap file handling - read and write the bitmap file and its superblock */ @@ -220,11 +196,14 @@ static struct page *read_sb_page(mddev_t *mddev, loff_t offset, mdk_rdev_t *rdev; sector_t target; + int did_alloc = 0; - if (!page) + if (!page) { page = alloc_page(GFP_KERNEL); - if (!page) - return ERR_PTR(-ENOMEM); + if (!page) + return ERR_PTR(-ENOMEM); + did_alloc = 1; + } list_for_each_entry(rdev, &mddev->disks, same_set) { if (! test_bit(In_sync, &rdev->flags) @@ -242,6 +221,8 @@ static struct page *read_sb_page(mddev_t *mddev, loff_t offset, return page; } } + if (did_alloc) + put_page(page); return ERR_PTR(-EIO); } @@ -286,49 +267,51 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) mddev_t *mddev = bitmap->mddev; while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { - int size = PAGE_SIZE; - loff_t offset = mddev->bitmap_info.offset; - if (page->index == bitmap->file_pages-1) - size = roundup(bitmap->last_page_size, - bdev_logical_block_size(rdev->bdev)); - /* Just make sure we aren't corrupting data or - * metadata - */ - if (mddev->external) { - /* Bitmap could be anywhere. */ - if (rdev->sb_start + offset + (page->index *(PAGE_SIZE/512)) > - rdev->data_offset && - rdev->sb_start + offset < - rdev->data_offset + mddev->dev_sectors + - (PAGE_SIZE/512)) - goto bad_alignment; - } else if (offset < 0) { - /* DATA BITMAP METADATA */ - if (offset - + (long)(page->index * (PAGE_SIZE/512)) - + size/512 > 0) - /* bitmap runs in to metadata */ - goto bad_alignment; - if (rdev->data_offset + mddev->dev_sectors - > rdev->sb_start + offset) - /* data runs in to bitmap */ - goto bad_alignment; - } else if (rdev->sb_start < rdev->data_offset) { - /* METADATA BITMAP DATA */ - if (rdev->sb_start - + offset - + page->index*(PAGE_SIZE/512) + size/512 - > rdev->data_offset) - /* bitmap runs in to data */ - goto bad_alignment; - } else { - /* DATA METADATA BITMAP - no problems */ - } - md_super_write(mddev, rdev, - rdev->sb_start + offset - + page->index * (PAGE_SIZE/512), - size, - page); + int size = PAGE_SIZE; + loff_t offset = mddev->bitmap_info.offset; + if (page->index == bitmap->file_pages-1) + size = roundup(bitmap->last_page_size, + bdev_logical_block_size(rdev->bdev)); + /* Just make sure we aren't corrupting data or + * metadata + */ + if (mddev->external) { + /* Bitmap could be anywhere. */ + if (rdev->sb_start + offset + (page->index + * (PAGE_SIZE/512)) + > rdev->data_offset + && + rdev->sb_start + offset + < (rdev->data_offset + mddev->dev_sectors + + (PAGE_SIZE/512))) + goto bad_alignment; + } else if (offset < 0) { + /* DATA BITMAP METADATA */ + if (offset + + (long)(page->index * (PAGE_SIZE/512)) + + size/512 > 0) + /* bitmap runs in to metadata */ + goto bad_alignment; + if (rdev->data_offset + mddev->dev_sectors + > rdev->sb_start + offset) + /* data runs in to bitmap */ + goto bad_alignment; + } else if (rdev->sb_start < rdev->data_offset) { + /* METADATA BITMAP DATA */ + if (rdev->sb_start + + offset + + page->index*(PAGE_SIZE/512) + size/512 + > rdev->data_offset) + /* bitmap runs in to data */ + goto bad_alignment; + } else { + /* DATA METADATA BITMAP - no problems */ + } + md_super_write(mddev, rdev, + rdev->sb_start + offset + + page->index * (PAGE_SIZE/512), + size, + page); } if (wait) @@ -364,10 +347,9 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait) bh = bh->b_this_page; } - if (wait) { + if (wait) wait_event(bitmap->write_wait, atomic_read(&bitmap->pending_writes)==0); - } } if (bitmap->flags & BITMAP_WRITE_ERROR) bitmap_file_kick(bitmap); @@ -424,7 +406,7 @@ static struct page *read_page(struct file *file, unsigned long index, struct buffer_head *bh; sector_t block; - PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_SIZE, + PRINTK("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE, (unsigned long long)index << PAGE_SHIFT); page = alloc_page(GFP_KERNEL); @@ -478,7 +460,7 @@ static struct page *read_page(struct file *file, unsigned long index, } out: if (IS_ERR(page)) - printk(KERN_ALERT "md: bitmap read error: (%dB @ %Lu): %ld\n", + printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %ld\n", (int)PAGE_SIZE, (unsigned long long)index << PAGE_SHIFT, PTR_ERR(page)); @@ -664,11 +646,14 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, sb = kmap_atomic(bitmap->sb_page, KM_USER0); old = le32_to_cpu(sb->state) & bits; switch (op) { - case MASK_SET: sb->state |= cpu_to_le32(bits); - break; - case MASK_UNSET: sb->state &= cpu_to_le32(~bits); - break; - default: BUG(); + case MASK_SET: + sb->state |= cpu_to_le32(bits); + break; + case MASK_UNSET: + sb->state &= cpu_to_le32(~bits); + break; + default: + BUG(); } kunmap_atomic(sb, KM_USER0); return old; @@ -710,12 +695,14 @@ static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned lon static inline struct page *filemap_get_page(struct bitmap *bitmap, unsigned long chunk) { - if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL; + if (bitmap->filemap == NULL) + return NULL; + if (file_page_index(bitmap, chunk) >= bitmap->file_pages) + return NULL; return bitmap->filemap[file_page_index(bitmap, chunk) - file_page_index(bitmap, 0)]; } - static void bitmap_file_unmap(struct bitmap *bitmap) { struct page **map, *sb_page; @@ -766,7 +753,6 @@ static void bitmap_file_put(struct bitmap *bitmap) } } - /* * bitmap_file_kick - if an error occurs while manipulating the bitmap file * then it is no longer reliable, so we stop using it and we mark the file @@ -785,7 +771,6 @@ static void bitmap_file_kick(struct bitmap *bitmap) ptr = d_path(&bitmap->file->f_path, path, PAGE_SIZE); - printk(KERN_ALERT "%s: kicking failed bitmap file %s from array!\n", bmname(bitmap), IS_ERR(ptr) ? "" : ptr); @@ -803,27 +788,36 @@ static void bitmap_file_kick(struct bitmap *bitmap) } enum bitmap_page_attr { - BITMAP_PAGE_DIRTY = 0, // there are set bits that need to be synced - BITMAP_PAGE_CLEAN = 1, // there are bits that might need to be cleared - BITMAP_PAGE_NEEDWRITE=2, // there are cleared bits that need to be synced + BITMAP_PAGE_DIRTY = 0, /* there are set bits that need to be synced */ + BITMAP_PAGE_CLEAN = 1, /* there are bits that might need to be cleared */ + BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */ }; static inline void set_page_attr(struct bitmap *bitmap, struct page *page, enum bitmap_page_attr attr) { - __set_bit((page->index<<2) + attr, bitmap->filemap_attr); + if (page) + __set_bit((page->index<<2) + attr, bitmap->filemap_attr); + else + __set_bit(attr, &bitmap->logattrs); } static inline void clear_page_attr(struct bitmap *bitmap, struct page *page, enum bitmap_page_attr attr) { - __clear_bit((page->index<<2) + attr, bitmap->filemap_attr); + if (page) + __clear_bit((page->index<<2) + attr, bitmap->filemap_attr); + else + __clear_bit(attr, &bitmap->logattrs); } static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page, enum bitmap_page_attr attr) { - return test_bit((page->index<<2) + attr, bitmap->filemap_attr); + if (page) + return test_bit((page->index<<2) + attr, bitmap->filemap_attr); + else + return test_bit(attr, &bitmap->logattrs); } /* @@ -836,30 +830,32 @@ static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *p static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) { unsigned long bit; - struct page *page; + struct page *page = NULL; void *kaddr; unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap); if (!bitmap->filemap) { - return; - } - - page = filemap_get_page(bitmap, chunk); - if (!page) return; - bit = file_page_offset(bitmap, chunk); + struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log; + if (log) + log->type->mark_region(log, chunk); + } else { - /* set the bit */ - kaddr = kmap_atomic(page, KM_USER0); - if (bitmap->flags & BITMAP_HOSTENDIAN) - set_bit(bit, kaddr); - else - ext2_set_bit(bit, kaddr); - kunmap_atomic(kaddr, KM_USER0); - PRINTK("set file bit %lu page %lu\n", bit, page->index); + page = filemap_get_page(bitmap, chunk); + if (!page) + return; + bit = file_page_offset(bitmap, chunk); + /* set the bit */ + kaddr = kmap_atomic(page, KM_USER0); + if (bitmap->flags & BITMAP_HOSTENDIAN) + set_bit(bit, kaddr); + else + ext2_set_bit(bit, kaddr); + kunmap_atomic(kaddr, KM_USER0); + PRINTK("set file bit %lu page %lu\n", bit, page->index); + } /* record page number so it gets flushed to disk when unplug occurs */ set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); - } /* this gets called when the md device is ready to unplug its underlying @@ -874,6 +870,16 @@ void bitmap_unplug(struct bitmap *bitmap) if (!bitmap) return; + if (!bitmap->filemap) { + /* Must be using a dirty_log */ + struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log; + dirty = test_and_clear_bit(BITMAP_PAGE_DIRTY, &bitmap->logattrs); + need_write = test_and_clear_bit(BITMAP_PAGE_NEEDWRITE, &bitmap->logattrs); + if (dirty || need_write) + if (log->type->flush(log)) + bitmap->flags |= BITMAP_WRITE_ERROR; + goto out; + } /* look at each page to see if there are any set bits that need to be * flushed out to disk */ @@ -892,7 +898,7 @@ void bitmap_unplug(struct bitmap *bitmap) wait = 1; spin_unlock_irqrestore(&bitmap->lock, flags); - if (dirty | need_write) + if (dirty || need_write) write_page(bitmap, page, 0); } if (wait) { /* if any writes were performed, we need to wait on them */ @@ -902,9 +908,11 @@ void bitmap_unplug(struct bitmap *bitmap) else md_super_wait(bitmap->mddev); } +out: if (bitmap->flags & BITMAP_WRITE_ERROR) bitmap_file_kick(bitmap); } +EXPORT_SYMBOL(bitmap_unplug); static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); /* * bitmap_init_from_disk -- called at bitmap_create time to initialize @@ -943,12 +951,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) printk(KERN_INFO "%s: bitmap file is out of date, doing full " "recovery\n", bmname(bitmap)); - bytes = (chunks + 7) / 8; + bytes = DIV_ROUND_UP(bitmap->chunks, 8); if (!bitmap->mddev->bitmap_info.external) bytes += sizeof(bitmap_super_t); - - num_pages = (bytes + PAGE_SIZE - 1) / PAGE_SIZE; + num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE); if (file && i_size_read(file->f_mapping->host) < bytes) { printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", @@ -966,7 +973,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */ bitmap->filemap_attr = kzalloc( - roundup( DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), + roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), GFP_KERNEL); if (!bitmap->filemap_attr) goto err; @@ -1021,7 +1028,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) if (outofdate) { /* * if bitmap is out of date, dirty the - * whole page and write it out + * whole page and write it out */ paddr = kmap_atomic(page, KM_USER0); memset(paddr + offset, 0xff, @@ -1052,7 +1059,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) } } - /* everything went OK */ + /* everything went OK */ ret = 0; bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET); @@ -1080,21 +1087,16 @@ void bitmap_write_all(struct bitmap *bitmap) */ int i; - for (i=0; i < bitmap->file_pages; i++) + for (i = 0; i < bitmap->file_pages; i++) set_page_attr(bitmap, bitmap->filemap[i], BITMAP_PAGE_NEEDWRITE); } - static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) { sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); unsigned long page = chunk >> PAGE_COUNTER_SHIFT; bitmap->bp[page].count += inc; -/* - if (page == 0) printk("count page 0, offset %llu: %d gives %d\n", - (unsigned long long)offset, inc, bitmap->bp[page].count); -*/ bitmap_checkfree(bitmap, page); } static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, @@ -1114,6 +1116,7 @@ void bitmap_daemon_work(mddev_t *mddev) struct page *page = NULL, *lastpage = NULL; int blocks; void *paddr; + struct dm_dirty_log *log = mddev->bitmap_info.log; /* Use a mutex to guard daemon_work against * bitmap_destroy. @@ -1138,11 +1141,12 @@ void bitmap_daemon_work(mddev_t *mddev) spin_lock_irqsave(&bitmap->lock, flags); for (j = 0; j < bitmap->chunks; j++) { bitmap_counter_t *bmc; - if (!bitmap->filemap) - /* error or shutdown */ - break; - - page = filemap_get_page(bitmap, j); + if (!bitmap->filemap) { + if (!log) + /* error or shutdown */ + break; + } else + page = filemap_get_page(bitmap, j); if (page != lastpage) { /* skip this page unless it's marked as needing cleaning */ @@ -1197,14 +1201,11 @@ void bitmap_daemon_work(mddev_t *mddev) (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), &blocks, 0); if (bmc) { -/* - if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); -*/ if (*bmc) bitmap->allclean = 0; if (*bmc == 2) { - *bmc=1; /* maybe clear the bit next time */ + *bmc = 1; /* maybe clear the bit next time */ set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); } else if (*bmc == 1 && !bitmap->need_sync) { /* we can clear the bit */ @@ -1214,14 +1215,17 @@ void bitmap_daemon_work(mddev_t *mddev) -1); /* clear the bit */ - paddr = kmap_atomic(page, KM_USER0); - if (bitmap->flags & BITMAP_HOSTENDIAN) - clear_bit(file_page_offset(bitmap, j), - paddr); - else - ext2_clear_bit(file_page_offset(bitmap, j), - paddr); - kunmap_atomic(paddr, KM_USER0); + if (page) { + paddr = kmap_atomic(page, KM_USER0); + if (bitmap->flags & BITMAP_HOSTENDIAN) + clear_bit(file_page_offset(bitmap, j), + paddr); + else + ext2_clear_bit(file_page_offset(bitmap, j), + paddr); + kunmap_atomic(paddr, KM_USER0); + } else + log->type->clear_region(log, j); } } else j |= PAGE_COUNTER_MASK; @@ -1229,12 +1233,16 @@ void bitmap_daemon_work(mddev_t *mddev) spin_unlock_irqrestore(&bitmap->lock, flags); /* now sync the final page */ - if (lastpage != NULL) { + if (lastpage != NULL || log != NULL) { spin_lock_irqsave(&bitmap->lock, flags); if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); spin_unlock_irqrestore(&bitmap->lock, flags); - write_page(bitmap, lastpage, 0); + if (lastpage) + write_page(bitmap, lastpage, 0); + else + if (log->type->flush(log)) + bitmap->flags |= BITMAP_WRITE_ERROR; } else { set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); spin_unlock_irqrestore(&bitmap->lock, flags); @@ -1243,7 +1251,7 @@ void bitmap_daemon_work(mddev_t *mddev) done: if (bitmap->allclean == 0) - bitmap->mddev->thread->timeout = + bitmap->mddev->thread->timeout = bitmap->mddev->bitmap_info.daemon_sleep; mutex_unlock(&mddev->bitmap_info.mutex); } @@ -1262,34 +1270,38 @@ __acquires(bitmap->lock) unsigned long page = chunk >> PAGE_COUNTER_SHIFT; unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; sector_t csize; + int err; - if (bitmap_checkpage(bitmap, page, create) < 0) { + err = bitmap_checkpage(bitmap, page, create); + + if (bitmap->bp[page].hijacked || + bitmap->bp[page].map == NULL) + csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) + + PAGE_COUNTER_SHIFT - 1); + else csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); - *blocks = csize - (offset & (csize- 1)); + *blocks = csize - (offset & (csize - 1)); + + if (err < 0) return NULL; - } + /* now locked ... */ if (bitmap->bp[page].hijacked) { /* hijacked pointer */ /* should we use the first or second counter field * of the hijacked pointer? */ int hi = (pageoff > PAGE_COUNTER_MASK); - csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) + - PAGE_COUNTER_SHIFT - 1); - *blocks = csize - (offset & (csize- 1)); return &((bitmap_counter_t *) &bitmap->bp[page].map)[hi]; - } else { /* page is allocated */ - csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); - *blocks = csize - (offset & (csize- 1)); + } else /* page is allocated */ return (bitmap_counter_t *) &(bitmap->bp[page].map[pageoff]); - } } int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind) { - if (!bitmap) return 0; + if (!bitmap) + return 0; if (behind) { int bw; @@ -1322,17 +1334,16 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect prepare_to_wait(&bitmap->overflow_wait, &__wait, TASK_UNINTERRUPTIBLE); spin_unlock_irq(&bitmap->lock); - blk_unplug(bitmap->mddev->queue); + md_unplug(bitmap->mddev); schedule(); finish_wait(&bitmap->overflow_wait, &__wait); continue; } - switch(*bmc) { + switch (*bmc) { case 0: bitmap_file_set_bit(bitmap, offset); - bitmap_count_page(bitmap,offset, 1); - blk_plug_device_unlocked(bitmap->mddev->queue); + bitmap_count_page(bitmap, offset, 1); /* fall through */ case 1: *bmc = 2; @@ -1345,16 +1356,19 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect offset += blocks; if (sectors > blocks) sectors -= blocks; - else sectors = 0; + else + sectors = 0; } bitmap->allclean = 0; return 0; } +EXPORT_SYMBOL(bitmap_startwrite); void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int success, int behind) { - if (!bitmap) return; + if (!bitmap) + return; if (behind) { if (atomic_dec_and_test(&bitmap->behind_writes)) wake_up(&bitmap->behind_wait); @@ -1381,7 +1395,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto bitmap->events_cleared < bitmap->mddev->events) { bitmap->events_cleared = bitmap->mddev->events; bitmap->need_sync = 1; - sysfs_notify_dirent(bitmap->sysfs_can_clear); + sysfs_notify_dirent_safe(bitmap->sysfs_can_clear); } if (!success && ! (*bmc & NEEDED_MASK)) @@ -1391,18 +1405,22 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto wake_up(&bitmap->overflow_wait); (*bmc)--; - if (*bmc <= 2) { + if (*bmc <= 2) set_page_attr(bitmap, - filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), + filemap_get_page( + bitmap, + offset >> CHUNK_BLOCK_SHIFT(bitmap)) |