diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-14 10:03:36 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-14 10:03:36 -0800 |
commit | 37222e1c9ee3ce587f5b41fed868bd8a592a992f (patch) | |
tree | b65f22a1e20286185463ca1a2889e593d963a393 | |
parent | 76b8f82cde2d9c13ef0c9a9aa2581b9b30b68e8c (diff) | |
parent | 06e3c817b750c131a20e82eed57a17841ea88ed2 (diff) |
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (27 commits)
md: add 'recovery_start' per-device sysfs attribute
md: rcu_read_lock() walk of mddev->disks in md_do_sync()
md: integrate spares into array at earliest opportunity.
md: move compat_ioctl handling into md.c
md: revise Kconfig help for MD_MULTIPATH
md: add MODULE_DESCRIPTION for all md related modules.
raid: improve MD/raid10 handling of correctable read errors.
md/raid10: print more useful messages on device failure.
md/bitmap: update dirty flag when bitmap bits are explicitly set.
md: Support write-intent bitmaps with externally managed metadata.
md/bitmap: move setting of daemon_lastrun out of bitmap_read_sb
md: support updating bitmap parameters via sysfs.
md: factor out parsing of fixed-point numbers
md: support bitmap offset appropriate for external-metadata arrays.
md: remove needless setting of thread->timeout in raid10_quiesce
md: change daemon_sleep to be in 'jiffies' rather than 'seconds'.
md: move offset, daemon_sleep and chunksize out of bitmap structure
md: collect bitmap-specific fields into one structure.
md/raid1: add takeover support for raid5->raid1
md: add honouring of suspend_{lo,hi} to raid1.
...
-rw-r--r-- | Documentation/md.txt | 72 | ||||
-rw-r--r-- | drivers/md/Kconfig | 9 | ||||
-rw-r--r-- | drivers/md/bitmap.c | 449 | ||||
-rw-r--r-- | drivers/md/bitmap.h | 19 | ||||
-rw-r--r-- | drivers/md/faulty.c | 1 | ||||
-rw-r--r-- | drivers/md/linear.c | 3 | ||||
-rw-r--r-- | drivers/md/md.c | 393 | ||||
-rw-r--r-- | drivers/md/md.h | 51 | ||||
-rw-r--r-- | drivers/md/multipath.c | 3 | ||||
-rw-r--r-- | drivers/md/raid0.c | 3 | ||||
-rw-r--r-- | drivers/md/raid1.c | 217 | ||||
-rw-r--r-- | drivers/md/raid1.h | 5 | ||||
-rw-r--r-- | drivers/md/raid10.c | 116 | ||||
-rw-r--r-- | drivers/md/raid5.c | 63 | ||||
-rw-r--r-- | drivers/md/raid6algos.c | 20 | ||||
-rw-r--r-- | fs/compat_ioctl.c | 18 | ||||
-rw-r--r-- | include/linux/raid/pq.h | 19 |
17 files changed, 1146 insertions, 315 deletions
diff --git a/Documentation/md.txt b/Documentation/md.txt index 4edd39ec7db..188f4768f1d 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt @@ -233,9 +233,9 @@ All md devices contain: resync_start The point at which resync should start. If no resync is needed, - this will be a very large number. At array creation it will - default to 0, though starting the array as 'clean' will - set it much larger. + this will be a very large number (or 'none' since 2.6.30-rc1). At + array creation it will default to 0, though starting the array as + 'clean' will set it much larger. new_dev This file can be written but not read. The value written should @@ -296,6 +296,51 @@ All md devices contain: active-idle like active, but no writes have been seen for a while (safe_mode_delay). + bitmap/location + This indicates where the write-intent bitmap for the array is + stored. + It can be one of "none", "file" or "[+-]N". + "file" may later be extended to "file:/file/name" + "[+-]N" means that many sectors from the start of the metadata. + This is replicated on all devices. For arrays with externally + managed metadata, the offset is from the beginning of the + device. + bitmap/chunksize + The size, in bytes, of the chunk which will be represented by a + single bit. For RAID456, it is a portion of an individual + device. For RAID10, it is a portion of the array. For RAID1, it + is both (they come to the same thing). + bitmap/time_base + The time, in seconds, between looking for bits in the bitmap to + be cleared. In the current implementation, a bit will be cleared + between 2 and 3 times "time_base" after all the covered blocks + are known to be in-sync. + bitmap/backlog + When write-mostly devices are active in a RAID1, write requests + to those devices proceed in the background - the filesystem (or + other user of the device) does not have to wait for them. + 'backlog' sets a limit on the number of concurrent background + writes. If there are more than this, new writes will by + synchronous. + bitmap/metadata + This can be either 'internal' or 'external'. + 'internal' is the default and means the metadata for the bitmap + is stored in the first 256 bytes of the allocated space and is + managed by the md module. + 'external' means that bitmap metadata is managed externally to + the kernel (i.e. by some userspace program) + bitmap/can_clear + This is either 'true' or 'false'. If 'true', then bits in the + bitmap will be cleared when the corresponding blocks are thought + to be in-sync. If 'false', bits will never be cleared. + This is automatically set to 'false' if a write happens on a + degraded array, or if the array becomes degraded during a write. + When metadata is managed externally, it should be set to true + once the array becomes non-degraded, and this fact has been + recorded in the metadata. + + + As component devices are added to an md array, they appear in the 'md' directory as new directories named @@ -334,8 +379,9 @@ Each directory contains: Writing "writemostly" sets the writemostly flag. Writing "-writemostly" clears the writemostly flag. Writing "blocked" sets the "blocked" flag. - Writing "-blocked" clear the "blocked" flag and allows writes + Writing "-blocked" clears the "blocked" flag and allows writes to complete. + Writing "in_sync" sets the in_sync flag. This file responds to select/poll. Any change to 'faulty' or 'blocked' causes an event. @@ -372,6 +418,24 @@ Each directory contains: array. If a value less than the current component_size is written, it will be rejected. + recovery_start + + When the device is not 'in_sync', this records the number of + sectors from the start of the device which are known to be + correct. This is normally zero, but during a recovery + operation is will steadily increase, and if the recovery is + interrupted, restoring this value can cause recovery to + avoid repeating the earlier blocks. With v1.x metadata, this + value is saved and restored automatically. + + This can be set whenever the device is not an active member of + the array, either before the array is activated, or before + the 'slot' is set. + + Setting this to 'none' is equivalent to setting 'in_sync'. + Setting to any other value also clears the 'in_sync' flag. + + An active md device will also contain and entry for each active device in the array. These are named diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 2158377a135..acb3a4e404f 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -185,11 +185,10 @@ config MD_MULTIPATH tristate "Multipath I/O support" depends on BLK_DEV_MD help - Multipath-IO is the ability of certain devices to address the same - physical disk over multiple 'IO paths'. The code ensures that such - paths can be defined and handled at runtime, and ensures that a - transparent failover to the backup path(s) happens if a IO errors - arrives on the primary path. + MD_MULTIPATH provides a simple multi-path personality for use + the MD framework. It is not under active development. New + projects should consider using DM_MULTIPATH which has more + features and more testing. If unsure, say N. diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 60e2b322db1..26ac8aad0b1 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -212,7 +212,7 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page) */ /* IO operations when bitmap is stored near all superblocks */ -static struct page *read_sb_page(mddev_t *mddev, long offset, +static struct page *read_sb_page(mddev_t *mddev, loff_t offset, struct page *page, unsigned long index, int size) { @@ -287,27 +287,36 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { int size = PAGE_SIZE; + loff_t offset = mddev->bitmap_info.offset; if (page->index == bitmap->file_pages-1) size = roundup(bitmap->last_page_size, bdev_logical_block_size(rdev->bdev)); /* Just make sure we aren't corrupting data or * metadata */ - if (bitmap->offset < 0) { + if (mddev->external) { + /* Bitmap could be anywhere. */ + if (rdev->sb_start + offset + (page->index *(PAGE_SIZE/512)) > + rdev->data_offset && + rdev->sb_start + offset < + rdev->data_offset + mddev->dev_sectors + + (PAGE_SIZE/512)) + goto bad_alignment; + } else if (offset < 0) { /* DATA BITMAP METADATA */ - if (bitmap->offset + if (offset + (long)(page->index * (PAGE_SIZE/512)) + size/512 > 0) /* bitmap runs in to metadata */ goto bad_alignment; if (rdev->data_offset + mddev->dev_sectors - > rdev->sb_start + bitmap->offset) + > rdev->sb_start + offset) /* data runs in to bitmap */ goto bad_alignment; } else if (rdev->sb_start < rdev->data_offset) { /* METADATA BITMAP DATA */ if (rdev->sb_start - + bitmap->offset + + offset + page->index*(PAGE_SIZE/512) + size/512 > rdev->data_offset) /* bitmap runs in to data */ @@ -316,7 +325,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) /* DATA METADATA BITMAP - no problems */ } md_super_write(mddev, rdev, - rdev->sb_start + bitmap->offset + rdev->sb_start + offset + page->index * (PAGE_SIZE/512), size, page); @@ -488,6 +497,8 @@ void bitmap_update_sb(struct bitmap *bitmap) if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ return; + if (bitmap->mddev->bitmap_info.external) + return; spin_lock_irqsave(&bitmap->lock, flags); if (!bitmap->sb_page) { /* no superblock */ spin_unlock_irqrestore(&bitmap->lock, flags); @@ -501,6 +512,9 @@ void bitmap_update_sb(struct bitmap *bitmap) bitmap->events_cleared = bitmap->mddev->events; sb->events_cleared = cpu_to_le64(bitmap->events_cleared); } + /* Just in case these have been changed via sysfs: */ + sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ); + sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind); kunmap_atomic(sb, KM_USER0); write_page(bitmap, bitmap->sb_page, 1); } @@ -550,7 +564,8 @@ static int bitmap_read_sb(struct bitmap *bitmap) bitmap->sb_page = read_page(bitmap->file, 0, bitmap, bytes); } else { - bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset, + bitmap->sb_page = read_sb_page(bitmap->mddev, + bitmap->mddev->bitmap_info.offset, NULL, 0, sizeof(bitmap_super_t)); } @@ -563,7 +578,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); chunksize = le32_to_cpu(sb->chunksize); - daemon_sleep = le32_to_cpu(sb->daemon_sleep); + daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; write_behind = le32_to_cpu(sb->write_behind); /* verify that the bitmap-specific fields are valid */ @@ -576,7 +591,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) reason = "bitmap chunksize too small"; else if ((1 << ffz(~chunksize)) != chunksize) reason = "bitmap chunksize not a power of 2"; - else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT / HZ) + else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT) reason = "daemon sleep period out of range"; else if (write_behind > COUNTER_MAX) reason = "write-behind limit out of range (0 - 16383)"; @@ -610,10 +625,9 @@ static int bitmap_read_sb(struct bitmap *bitmap) } success: /* assign fields using values from superblock */ - bitmap->chunksize = chunksize; - bitmap->daemon_sleep = daemon_sleep; - bitmap->daemon_lastrun = jiffies; - bitmap->max_write_behind = write_behind; + bitmap->mddev->bitmap_info.chunksize = chunksize; + bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; + bitmap->mddev->bitmap_info.max_write_behind = write_behind; bitmap->flags |= le32_to_cpu(sb->state); if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) bitmap->flags |= BITMAP_HOSTENDIAN; @@ -664,16 +678,26 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, * general bitmap file operations */ +/* + * on-disk bitmap: + * + * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap + * file a page at a time. There's a superblock at the start of the file. + */ /* calculate the index of the page that contains this bit */ -static inline unsigned long file_page_index(unsigned long chunk) +static inline unsigned long file_page_index(struct bitmap *bitmap, unsigned long chunk) { - return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT; + if (!bitmap->mddev->bitmap_info.external) + chunk += sizeof(bitmap_super_t) << 3; + return chunk >> PAGE_BIT_SHIFT; } /* calculate the (bit) offset of this bit within a page */ -static inline unsigned long file_page_offset(unsigned long chunk) +static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned long chunk) { - return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1); + if (!bitmap->mddev->bitmap_info.external) + chunk += sizeof(bitmap_super_t) << 3; + return chunk & (PAGE_BITS - 1); } /* @@ -686,8 +710,9 @@ static inline unsigned long file_page_offset(unsigned long chunk) static inline struct page *filemap_get_page(struct bitmap *bitmap, unsigned long chunk) { - if (file_page_index(chunk) >= bitmap->file_pages) return NULL; - return bitmap->filemap[file_page_index(chunk) - file_page_index(0)]; + if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL; + return bitmap->filemap[file_page_index(bitmap, chunk) + - file_page_index(bitmap, 0)]; } @@ -710,7 +735,7 @@ static void bitmap_file_unmap(struct bitmap *bitmap) spin_unlock_irqrestore(&bitmap->lock, flags); while (pages--) - if (map[pages]->index != 0) /* 0 is sb_page, release it below */ + if (map[pages] != sb_page) /* 0 is sb_page, release it below */ free_buffers(map[pages]); kfree(map); kfree(attr); @@ -821,7 +846,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) page = filemap_get_page(bitmap, chunk); if (!page) return; - bit = file_page_offset(chunk); + bit = file_page_offset(bitmap, chunk); /* set the bit */ kaddr = kmap_atomic(page, KM_USER0); @@ -907,7 +932,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) chunks = bitmap->chunks; file = bitmap->file; - BUG_ON(!file && !bitmap->offset); + BUG_ON(!file && !bitmap->mddev->bitmap_info.offset); #ifdef INJECT_FAULTS_3 outofdate = 1; @@ -919,14 +944,17 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) "recovery\n", bmname(bitmap)); bytes = (chunks + 7) / 8; + if (!bitmap->mddev->bitmap_info.external) + bytes += sizeof(bitmap_super_t); - num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE; + + num_pages = (bytes + PAGE_SIZE - 1) / PAGE_SIZE; - if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) { + if (file && i_size_read(file->f_mapping->host) < bytes) { printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", bmname(bitmap), (unsigned long) i_size_read(file->f_mapping->host), - bytes + sizeof(bitmap_super_t)); + bytes); goto err; } @@ -947,17 +975,16 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) for (i = 0; i < chunks; i++) { int b; - index = file_page_index(i); - bit = file_page_offset(i); + index = file_page_index(bitmap, i); + bit = file_page_offset(bitmap, i); if (index != oldindex) { /* this is a new page, read it in */ int count; /* unmap the old page, we're done with it */ if (index == num_pages-1) - count = bytes + sizeof(bitmap_super_t) - - index * PAGE_SIZE; + count = bytes - index * PAGE_SIZE; else count = PAGE_SIZE; - if (index == 0) { + if (index == 0 && bitmap->sb_page) { /* * if we're here then the superblock page * contains some bits (PAGE_SIZE != sizeof sb) @@ -967,14 +994,15 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) offset = sizeof(bitmap_super_t); if (!file) read_sb_page(bitmap->mddev, - bitmap->offset, + bitmap->mddev->bitmap_info.offset, page, index, count); } else if (file) { page = read_page(file, index, bitmap, count); offset = 0; } else { - page = read_sb_page(bitmap->mddev, bitmap->offset, + page = read_sb_page(bitmap->mddev, + bitmap->mddev->bitmap_info.offset, NULL, index, count); offset = 0; @@ -1078,23 +1106,32 @@ static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, * out to disk */ -void bitmap_daemon_work(struct bitmap *bitmap) +void bitmap_daemon_work(mddev_t *mddev) { + struct bitmap *bitmap; unsigned long j; unsigned long flags; struct page *page = NULL, *lastpage = NULL; int blocks; void *paddr; - if (bitmap == NULL) + /* Use a mutex to guard daemon_work against + * bitmap_destroy. + */ + mutex_lock(&mddev->bitmap_info.mutex); + bitmap = mddev->bitmap; + if (bitmap == NULL) { + mutex_unlock(&mddev->bitmap_info.mutex); return; - if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ)) + } + if (time_before(jiffies, bitmap->daemon_lastrun + + bitmap->mddev->bitmap_info.daemon_sleep)) goto done; bitmap->daemon_lastrun = jiffies; if (bitmap->allclean) { bitmap->mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; - return; + goto done; } bitmap->allclean = 1; @@ -1142,7 +1179,8 @@ void bitmap_daemon_work(struct bitmap *bitmap) /* We are possibly going to clear some bits, so make * sure that events_cleared is up-to-date. */ - if (bitmap->need_sync) { + if (bitmap->need_sync && + bitmap->mddev->bitmap_info.external == 0) { bitmap_super_t *sb; bitmap->need_sync = 0; sb = kmap_atomic(bitmap->sb_page, KM_USER0); @@ -1152,7 +1190,8 @@ void bitmap_daemon_work(struct bitmap *bitmap) write_page(bitmap, bitmap->sb_page, 1); } spin_lock_irqsave(&bitmap->lock, flags); - clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); + if (!bitmap->need_sync) + clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); } bmc = bitmap_get_counter(bitmap, (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), @@ -1167,7 +1206,7 @@ void bitmap_daemon_work(struct bitmap *bitmap) if (*bmc == 2) { *bmc=1; /* maybe clear the bit next time */ set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); - } else if (*bmc == 1) { + } else if (*bmc == 1 && !bitmap->need_sync) { /* we can clear the bit */ *bmc = 0; bitmap_count_page(bitmap, @@ -1177,9 +1216,11 @@ void bitmap_daemon_work(struct bitmap *bitmap) /* clear the bit */ paddr = kmap_atomic(page, KM_USER0); if (bitmap->flags & BITMAP_HOSTENDIAN) - clear_bit(file_page_offset(j), paddr); + clear_bit(file_page_offset(bitmap, j), + paddr); else - ext2_clear_bit(file_page_offset(j), paddr); + ext2_clear_bit(file_page_offset(bitmap, j), + paddr); kunmap_atomic(paddr, KM_USER0); } } else @@ -1202,7 +1243,9 @@ void bitmap_daemon_work(struct bitmap *bitmap) done: if (bitmap->allclean == 0) - bitmap->mddev->thread->timeout = bitmap->daemon_sleep * HZ; + bitmap->mddev->thread->timeout = + bitmap->mddev->bitmap_info.daemon_sleep; + mutex_unlock(&mddev->bitmap_info.mutex); } static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, @@ -1332,6 +1375,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto bitmap->events_cleared < bitmap->mddev->events) { bitmap->events_cleared = bitmap->mddev->events; bitmap->need_sync = 1; + sysfs_notify_dirent(bitmap->sysfs_can_clear); } if (!success && ! (*bmc & NEEDED_MASK)) @@ -1470,7 +1514,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) return; } if (time_before(jiffies, (bitmap->last_end_sync - + bitmap->daemon_sleep * HZ))) + + bitmap->mddev->bitmap_info.daemon_sleep))) return; wait_event(bitmap->mddev->recovery_wait, atomic_read(&bitmap->mddev->recovery_active) == 0); @@ -1522,6 +1566,12 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap); bitmap_set_memory_bits(bitmap, sec, 1); bitmap_file_set_bit(bitmap, sec); + if (sec < bitmap->mddev->recovery_cp) + /* We are asserting that the array is dirty, + * so move the recovery_cp address back so + * that it is obvious that it is dirty + */ + bitmap->mddev->recovery_cp = sec; } } @@ -1531,7 +1581,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) void bitmap_flush(mddev_t *mddev) { struct bitmap *bitmap = mddev->bitmap; - int sleep; + long sleep; if (!bitmap) /* there was no bitmap */ return; @@ -1539,12 +1589,13 @@ void bitmap_flush(mddev_t *mddev) /* run the daemon_work three time to ensure everything is flushed * that can be */ - sleep = bitmap->daemon_sleep; - bitmap->daemon_sleep = 0; - bitmap_daemon_work(bitmap); - bitmap_daemon_work(bitmap); - bitmap_daemon_work(bitmap); - bitmap->daemon_sleep = sleep; + sleep = mddev->bitmap_info.daemon_sleep * 2; + bitmap->daemon_lastrun -= sleep; + bitmap_daemon_work(mddev); + bitmap->daemon_lastrun -= sleep; + bitmap_daemon_work(mddev); + bitmap->daemon_lastrun -= sleep; + bitmap_daemon_work(mddev); bitmap_update_sb(bitmap); } @@ -1574,6 +1625,7 @@ static void bitmap_free(struct bitmap *bitmap) kfree(bp); kfree(bitmap); } + void bitmap_destroy(mddev_t *mddev) { struct bitmap *bitmap = mddev->bitmap; @@ -1581,10 +1633,15 @@ void bitmap_destroy(mddev_t *mddev) if (!bitmap) /* there was no bitmap */ return; + mutex_lock(&mddev->bitmap_info.mutex); mddev->bitmap = NULL; /* disconnect from the md device */ + mutex_unlock(&mddev->bitmap_info.mutex); if (mddev->thread) mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; + if (bitmap->sysfs_can_clear) + sysfs_put(bitmap->sysfs_can_clear); + bitmap_free(bitmap); } @@ -1598,16 +1655,17 @@ int bitmap_create(mddev_t *mddev) sector_t blocks = mddev->resync_max_sectors; unsigned long chunks; unsigned long pages; - struct file *file = mddev->bitmap_file; + struct file *file = mddev->bitmap_info.file; int err; sector_t start; + struct sysfs_dirent *bm; BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); - if (!file && !mddev->bitmap_offset) /* bitmap disabled, nothing to do */ + if (!file && !mddev->bitmap_info.offset) /* bitmap disabled, nothing to do */ return 0; - BUG_ON(file && mddev->bitmap_offset); + BUG_ON(file && mddev->bitmap_info.offset); bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); if (!bitmap) @@ -1620,8 +1678,14 @@ int bitmap_create(mddev_t *mddev) bitmap->mddev = mddev; + bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap"); + if (bm) { + bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear"); + sysfs_put(bm); + } else + bitmap->sysfs_can_clear = NULL; + bitmap->file = file; - bitmap->offset = mddev->bitmap_offset; if (file) { get_file(file); /* As future accesses to this file will use bmap, @@ -1630,12 +1694,22 @@ int bitmap_create(mddev_t *mddev) */ vfs_fsync(file, file->f_dentry, 1); } - /* read superblock from bitmap file (this sets bitmap->chunksize) */ - err = bitmap_read_sb(bitmap); + /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */ + if (!mddev->bitmap_info.external) + err = bitmap_read_sb(bitmap); + else { + err = 0; + if (mddev->bitmap_info.chunksize == 0 || + mddev->bitmap_info.daemon_sleep == 0) + /* chunksize and time_base need to be + * set first. */ + err = -EINVAL; + } if (err) goto error; - bitmap->chunkshift = ffz(~bitmap->chunksize); + bitmap->daemon_lastrun = jiffies; + bitmap->chunkshift = ffz(~mddev->bitmap_info.chunksize); /* now that chunksize and chunkshift are set, we can use these macros */ chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >> @@ -1677,7 +1751,8 @@ int bitmap_create(mddev_t *mddev) mddev->bitmap = bitmap; - mddev->thread->timeout = bitmap->daemon_sleep * HZ; + mddev->thread->timeout = mddev->bitmap_info.daemon_sleep; + md_wakeup_thread(mddev->thread); bitmap_update_sb(bitmap); @@ -1688,6 +1763,264 @@ int bitmap_create(mddev_t *mddev) return err; } +static ssize_t +location_show(mddev_t *mddev, char *page) +{ + ssize_t len; + if (mddev->bitmap_info.file) { + len = sprintf(page, "file"); + } else if (mddev->bitmap_info.offset) { + len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset); + } else + len = sprintf(page, "none"); + len += sprintf(page+len, "\n"); + return len; +} + +static ssize_t +location_store(mddev_t *mddev, const char *buf, size_t len) +{ + + if (mddev->pers) { + if (!mddev->pers->quiesce) + return -EBUSY; + if (mddev->recovery || mddev->sync_thread) + return -EBUSY; + } + + if (mddev->bitmap || mddev->bitmap_info.file || + mddev->bitmap_info.offset) { + /* bitmap already configured. Only option is to clear it */ + if (strncmp(buf, "none", 4) != 0) + return -EBUSY; + if (mddev->pers) { + mddev->pers->quiesce(mddev, 1); + bitmap_destroy(mddev); + mddev->pers->quiesce(mddev, 0); + } + mddev->bitmap_info.offset = 0; + if (mddev->bitmap_info.file) { + struct file *f = mddev->bitmap_info.file; + mddev->bitmap_info.file = NULL; + restore_bitmap_write_access(f); + fput(f); + } + } else { + /* No bitmap, OK to set a location */ + long long offset; + if (strncmp(buf, "none", 4) == 0) + /* nothing to be done */; + else if (strncmp(buf, "file:", 5) == 0) { + /* Not supported yet */ + return -EINVAL; + } else { + int rv; + if (buf[0] == '+') + rv = strict_strtoll(buf+1, 10, &offset); + else + rv = strict_strtoll(buf, 10, &offset); + if (rv) + return rv; + if (offset == 0) + return -EINVAL; + if (mddev->bitmap_info.external == 0 && + mddev->major_version == 0 && + offset != mddev->bitmap_info.default_offset) + return -EINVAL; + mddev->bitmap_info.offset = offset; + if (mddev->pers) { + mddev->pers->quiesce(mddev, 1); + rv = bitmap_create(mddev); + if (rv) { + bitmap_destroy(mddev); + mddev->bitmap_info.offset = 0; + } + mddev->pers->quiesce(mddev, 0); + if (rv) + return rv; + } + } + } + if (!mddev->external) { + /* Ensure new bitmap info is stored in + * metadata promptly. + */ + set_bit(MD_CHANGE_DEVS, &mddev->flags); + md_wakeup_thread(mddev->thread); + } + return len; +} + +static struct md_sysfs_entry bitmap_location = +__ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store); + +static ssize_t +timeout_show(mddev_t *mddev, char *page) +{ + ssize_t len; + unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ; + unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ; + + len = sprintf(page, "%lu", secs); + if (jifs) + len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs)); + len += sprintf(page+len, "\n"); + return len; +} + +static ssize_t +timeout_store(mddev_t *mddev, const char *buf, size_t len) +{ + /* timeout can be set at any time */ + unsigned long timeout; + int rv = strict_strtoul_scaled(buf, &timeout, 4); + if (rv) + return rv; + + /* just to make sure we don't overflow... */ + if (timeout >= LONG_MAX / HZ) + return -EINVAL; + + timeout = timeout * HZ / 10000; + + if (timeout >= MAX_SCHEDULE_TIMEOUT) + timeout = MAX_SCHEDULE_TIMEOUT-1; + if (timeout < 1) + timeout = 1; + mddev->bitmap_info.daemon_sleep = timeout; + if (mddev->thread) { + /* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then + * the bitmap is all clean and we don't need to + * adjust the timeout right now + */ + if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) { + mddev->thread->timeout = timeout; + md_wakeup_thread(mddev->thread); + } + } + return len; +} + +static struct md_sysfs_entry bitmap_timeout = +__ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store); + +static ssize_t +backlog_show(mddev_t *mddev, char *page) +{ + return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind); +} + +static ssize_t +backlog_store(mddev_t *mddev, const char *buf, size_t len) +{ + unsigned long backlog; + int rv = strict_strtoul(buf, 10, &backlog); + if (rv) + return rv; + if (backlog > COUNTER_MAX) + return -EINVAL; + mddev->bitmap_info.max_write_behind = backlog; + return len; +} + +static struct md_sysfs_entry bitmap_backlog = +__ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store); + +static ssize_t +chunksize_show(mddev_t *mddev, char *page) +{ + return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize); +} + +static ssize_t +chunksize_store(mddev_t *mddev, const char *buf, size_t len) +{ + /* Can only be changed when no bitmap is active */ + int rv; + unsigned long csize; + if (mddev->bitmap) + return -EBUSY; + rv = strict_strtoul(buf, 10, &csize); + if (rv) + return rv; + if (csize < 512 || + !is_power_of_2(csize)) + return -EINVAL; + mddev->bitmap_info.chunksize = csize; + return len; +} + +static struct md_sysfs_entry bitmap_chunksize = +__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store); + +static ssize_t metadata_show(mddev_t *mddev, char *page) +{ + return sprintf(page, "%s\n", (mddev->bitmap_info.external + ? "external" : "internal")); +} + +static ssize_t metadata_store(mddev_t *mddev, const char *buf, size_t len) +{ + if (mddev->bitmap || + mddev->bitmap_info.file || + mddev->bitmap_info.offset) + return -EBUSY; + if (strncmp(buf, "external", 8) == 0) + mddev->bitmap_info.external = 1; + else if (strncmp(buf, "internal", 8) == 0) + mddev->bitmap_info.external = 0; + else + return -EINVAL; + return len; +} + +static struct md_sysfs_entry bitmap_metadata = +__ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store); + +static ssize_t can_clear_show(mddev_t *mddev, char *page) +{ + int len; + if (mddev->bitmap) + len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ? + "false" : "true")); + else + len = sprintf(page, "\n"); + return len; +} + +static ssize_t can_clear_store(mddev_t *mddev, const char *buf, size_t len) +{ + if (mddev->bitmap == NULL) + return -ENOENT; + if (strncmp(buf, "false", 5) == 0) + mddev->bitmap->need_sync = 1; + else if (strncmp(buf, "true", 4) == 0) { + if (mddev->degraded) + return -EBUSY; + mddev->bitmap->need_sync = 0; + } else + return -EINVAL; + return len; +} + +static struct md_sysfs_entry bitmap_can_clear = +__ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store); + +static struct attribute *md_bitmap_attrs[] = { + &bitmap_location.attr, + &bitmap_timeout.attr, + &bitmap_backlog.attr, + &bitmap_chunksize.attr, + &bitmap_metadata.attr, + &bitmap_can_clear.attr, + NULL +}; +struct attribute_group md_bitmap_group = { + .name = "bitmap", + .attrs = md_bitmap_attrs, +}; + + /* the bitmap API -- for raid personalities */ EXPORT_SYMBOL(bitmap_startwrite); EXPORT_SYMBOL(bitmap_endwrite); diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index e98900671ca..cb821d76d1b 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h @@ -106,7 +106,7 @@ typedef __u16 bitmap_counter_t; #define BITMAP_BLOCK_SHIFT 9 /* how many blocks per chunk? (this is variable) */ -#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->chunksize >> BITMAP_BLOCK_SHIFT) +#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->m |