aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-12-14 10:03:36 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-14 10:03:36 -0800
commit37222e1c9ee3ce587f5b41fed868bd8a592a992f (patch)
treeb65f22a1e20286185463ca1a2889e593d963a393
parent76b8f82cde2d9c13ef0c9a9aa2581b9b30b68e8c (diff)
parent06e3c817b750c131a20e82eed57a17841ea88ed2 (diff)
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (27 commits) md: add 'recovery_start' per-device sysfs attribute md: rcu_read_lock() walk of mddev->disks in md_do_sync() md: integrate spares into array at earliest opportunity. md: move compat_ioctl handling into md.c md: revise Kconfig help for MD_MULTIPATH md: add MODULE_DESCRIPTION for all md related modules. raid: improve MD/raid10 handling of correctable read errors. md/raid10: print more useful messages on device failure. md/bitmap: update dirty flag when bitmap bits are explicitly set. md: Support write-intent bitmaps with externally managed metadata. md/bitmap: move setting of daemon_lastrun out of bitmap_read_sb md: support updating bitmap parameters via sysfs. md: factor out parsing of fixed-point numbers md: support bitmap offset appropriate for external-metadata arrays. md: remove needless setting of thread->timeout in raid10_quiesce md: change daemon_sleep to be in 'jiffies' rather than 'seconds'. md: move offset, daemon_sleep and chunksize out of bitmap structure md: collect bitmap-specific fields into one structure. md/raid1: add takeover support for raid5->raid1 md: add honouring of suspend_{lo,hi} to raid1. ...
-rw-r--r--Documentation/md.txt72
-rw-r--r--drivers/md/Kconfig9
-rw-r--r--drivers/md/bitmap.c449
-rw-r--r--drivers/md/bitmap.h19
-rw-r--r--drivers/md/faulty.c1
-rw-r--r--drivers/md/linear.c3
-rw-r--r--drivers/md/md.c393
-rw-r--r--drivers/md/md.h51
-rw-r--r--drivers/md/multipath.c3
-rw-r--r--drivers/md/raid0.c3
-rw-r--r--drivers/md/raid1.c217
-rw-r--r--drivers/md/raid1.h5
-rw-r--r--drivers/md/raid10.c116
-rw-r--r--drivers/md/raid5.c63
-rw-r--r--drivers/md/raid6algos.c20
-rw-r--r--fs/compat_ioctl.c18
-rw-r--r--include/linux/raid/pq.h19
17 files changed, 1146 insertions, 315 deletions
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 4edd39ec7db..188f4768f1d 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -233,9 +233,9 @@ All md devices contain:
resync_start
The point at which resync should start. If no resync is needed,
- this will be a very large number. At array creation it will
- default to 0, though starting the array as 'clean' will
- set it much larger.
+ this will be a very large number (or 'none' since 2.6.30-rc1). At
+ array creation it will default to 0, though starting the array as
+ 'clean' will set it much larger.
new_dev
This file can be written but not read. The value written should
@@ -296,6 +296,51 @@ All md devices contain:
active-idle
like active, but no writes have been seen for a while (safe_mode_delay).
+ bitmap/location
+ This indicates where the write-intent bitmap for the array is
+ stored.
+ It can be one of "none", "file" or "[+-]N".
+ "file" may later be extended to "file:/file/name"
+ "[+-]N" means that many sectors from the start of the metadata.
+ This is replicated on all devices. For arrays with externally
+ managed metadata, the offset is from the beginning of the
+ device.
+ bitmap/chunksize
+ The size, in bytes, of the chunk which will be represented by a
+ single bit. For RAID456, it is a portion of an individual
+ device. For RAID10, it is a portion of the array. For RAID1, it
+ is both (they come to the same thing).
+ bitmap/time_base
+ The time, in seconds, between looking for bits in the bitmap to
+ be cleared. In the current implementation, a bit will be cleared
+ between 2 and 3 times "time_base" after all the covered blocks
+ are known to be in-sync.
+ bitmap/backlog
+ When write-mostly devices are active in a RAID1, write requests
+ to those devices proceed in the background - the filesystem (or
+ other user of the device) does not have to wait for them.
+ 'backlog' sets a limit on the number of concurrent background
+ writes. If there are more than this, new writes will by
+ synchronous.
+ bitmap/metadata
+ This can be either 'internal' or 'external'.
+ 'internal' is the default and means the metadata for the bitmap
+ is stored in the first 256 bytes of the allocated space and is
+ managed by the md module.
+ 'external' means that bitmap metadata is managed externally to
+ the kernel (i.e. by some userspace program)
+ bitmap/can_clear
+ This is either 'true' or 'false'. If 'true', then bits in the
+ bitmap will be cleared when the corresponding blocks are thought
+ to be in-sync. If 'false', bits will never be cleared.
+ This is automatically set to 'false' if a write happens on a
+ degraded array, or if the array becomes degraded during a write.
+ When metadata is managed externally, it should be set to true
+ once the array becomes non-degraded, and this fact has been
+ recorded in the metadata.
+
+
+
As component devices are added to an md array, they appear in the 'md'
directory as new directories named
@@ -334,8 +379,9 @@ Each directory contains:
Writing "writemostly" sets the writemostly flag.
Writing "-writemostly" clears the writemostly flag.
Writing "blocked" sets the "blocked" flag.
- Writing "-blocked" clear the "blocked" flag and allows writes
+ Writing "-blocked" clears the "blocked" flag and allows writes
to complete.
+ Writing "in_sync" sets the in_sync flag.
This file responds to select/poll. Any change to 'faulty'
or 'blocked' causes an event.
@@ -372,6 +418,24 @@ Each directory contains:
array. If a value less than the current component_size is
written, it will be rejected.
+ recovery_start
+
+ When the device is not 'in_sync', this records the number of
+ sectors from the start of the device which are known to be
+ correct. This is normally zero, but during a recovery
+ operation is will steadily increase, and if the recovery is
+ interrupted, restoring this value can cause recovery to
+ avoid repeating the earlier blocks. With v1.x metadata, this
+ value is saved and restored automatically.
+
+ This can be set whenever the device is not an active member of
+ the array, either before the array is activated, or before
+ the 'slot' is set.
+
+ Setting this to 'none' is equivalent to setting 'in_sync'.
+ Setting to any other value also clears the 'in_sync' flag.
+
+
An active md device will also contain and entry for each active device
in the array. These are named
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 2158377a135..acb3a4e404f 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -185,11 +185,10 @@ config MD_MULTIPATH
tristate "Multipath I/O support"
depends on BLK_DEV_MD
help
- Multipath-IO is the ability of certain devices to address the same
- physical disk over multiple 'IO paths'. The code ensures that such
- paths can be defined and handled at runtime, and ensures that a
- transparent failover to the backup path(s) happens if a IO errors
- arrives on the primary path.
+ MD_MULTIPATH provides a simple multi-path personality for use
+ the MD framework. It is not under active development. New
+ projects should consider using DM_MULTIPATH which has more
+ features and more testing.
If unsure, say N.
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 60e2b322db1..26ac8aad0b1 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -212,7 +212,7 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
*/
/* IO operations when bitmap is stored near all superblocks */
-static struct page *read_sb_page(mddev_t *mddev, long offset,
+static struct page *read_sb_page(mddev_t *mddev, loff_t offset,
struct page *page,
unsigned long index, int size)
{
@@ -287,27 +287,36 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
int size = PAGE_SIZE;
+ loff_t offset = mddev->bitmap_info.offset;
if (page->index == bitmap->file_pages-1)
size = roundup(bitmap->last_page_size,
bdev_logical_block_size(rdev->bdev));
/* Just make sure we aren't corrupting data or
* metadata
*/
- if (bitmap->offset < 0) {
+ if (mddev->external) {
+ /* Bitmap could be anywhere. */
+ if (rdev->sb_start + offset + (page->index *(PAGE_SIZE/512)) >
+ rdev->data_offset &&
+ rdev->sb_start + offset <
+ rdev->data_offset + mddev->dev_sectors +
+ (PAGE_SIZE/512))
+ goto bad_alignment;
+ } else if (offset < 0) {
/* DATA BITMAP METADATA */
- if (bitmap->offset
+ if (offset
+ (long)(page->index * (PAGE_SIZE/512))
+ size/512 > 0)
/* bitmap runs in to metadata */
goto bad_alignment;
if (rdev->data_offset + mddev->dev_sectors
- > rdev->sb_start + bitmap->offset)
+ > rdev->sb_start + offset)
/* data runs in to bitmap */
goto bad_alignment;
} else if (rdev->sb_start < rdev->data_offset) {
/* METADATA BITMAP DATA */
if (rdev->sb_start
- + bitmap->offset
+ + offset
+ page->index*(PAGE_SIZE/512) + size/512
> rdev->data_offset)
/* bitmap runs in to data */
@@ -316,7 +325,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
/* DATA METADATA BITMAP - no problems */
}
md_super_write(mddev, rdev,
- rdev->sb_start + bitmap->offset
+ rdev->sb_start + offset
+ page->index * (PAGE_SIZE/512),
size,
page);
@@ -488,6 +497,8 @@ void bitmap_update_sb(struct bitmap *bitmap)
if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
return;
+ if (bitmap->mddev->bitmap_info.external)
+ return;
spin_lock_irqsave(&bitmap->lock, flags);
if (!bitmap->sb_page) { /* no superblock */
spin_unlock_irqrestore(&bitmap->lock, flags);
@@ -501,6 +512,9 @@ void bitmap_update_sb(struct bitmap *bitmap)
bitmap->events_cleared = bitmap->mddev->events;
sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
}
+ /* Just in case these have been changed via sysfs: */
+ sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
+ sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
kunmap_atomic(sb, KM_USER0);
write_page(bitmap, bitmap->sb_page, 1);
}
@@ -550,7 +564,8 @@ static int bitmap_read_sb(struct bitmap *bitmap)
bitmap->sb_page = read_page(bitmap->file, 0, bitmap, bytes);
} else {
- bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset,
+ bitmap->sb_page = read_sb_page(bitmap->mddev,
+ bitmap->mddev->bitmap_info.offset,
NULL,
0, sizeof(bitmap_super_t));
}
@@ -563,7 +578,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
chunksize = le32_to_cpu(sb->chunksize);
- daemon_sleep = le32_to_cpu(sb->daemon_sleep);
+ daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
write_behind = le32_to_cpu(sb->write_behind);
/* verify that the bitmap-specific fields are valid */
@@ -576,7 +591,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
reason = "bitmap chunksize too small";
else if ((1 << ffz(~chunksize)) != chunksize)
reason = "bitmap chunksize not a power of 2";
- else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT / HZ)
+ else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT)
reason = "daemon sleep period out of range";
else if (write_behind > COUNTER_MAX)
reason = "write-behind limit out of range (0 - 16383)";
@@ -610,10 +625,9 @@ static int bitmap_read_sb(struct bitmap *bitmap)
}
success:
/* assign fields using values from superblock */
- bitmap->chunksize = chunksize;
- bitmap->daemon_sleep = daemon_sleep;
- bitmap->daemon_lastrun = jiffies;
- bitmap->max_write_behind = write_behind;
+ bitmap->mddev->bitmap_info.chunksize = chunksize;
+ bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
+ bitmap->mddev->bitmap_info.max_write_behind = write_behind;
bitmap->flags |= le32_to_cpu(sb->state);
if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
bitmap->flags |= BITMAP_HOSTENDIAN;
@@ -664,16 +678,26 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
* general bitmap file operations
*/
+/*
+ * on-disk bitmap:
+ *
+ * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
+ * file a page at a time. There's a superblock at the start of the file.
+ */
/* calculate the index of the page that contains this bit */
-static inline unsigned long file_page_index(unsigned long chunk)
+static inline unsigned long file_page_index(struct bitmap *bitmap, unsigned long chunk)
{
- return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT;
+ if (!bitmap->mddev->bitmap_info.external)
+ chunk += sizeof(bitmap_super_t) << 3;
+ return chunk >> PAGE_BIT_SHIFT;
}
/* calculate the (bit) offset of this bit within a page */
-static inline unsigned long file_page_offset(unsigned long chunk)
+static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned long chunk)
{
- return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1);
+ if (!bitmap->mddev->bitmap_info.external)
+ chunk += sizeof(bitmap_super_t) << 3;
+ return chunk & (PAGE_BITS - 1);
}
/*
@@ -686,8 +710,9 @@ static inline unsigned long file_page_offset(unsigned long chunk)
static inline struct page *filemap_get_page(struct bitmap *bitmap,
unsigned long chunk)
{
- if (file_page_index(chunk) >= bitmap->file_pages) return NULL;
- return bitmap->filemap[file_page_index(chunk) - file_page_index(0)];
+ if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL;
+ return bitmap->filemap[file_page_index(bitmap, chunk)
+ - file_page_index(bitmap, 0)];
}
@@ -710,7 +735,7 @@ static void bitmap_file_unmap(struct bitmap *bitmap)
spin_unlock_irqrestore(&bitmap->lock, flags);
while (pages--)
- if (map[pages]->index != 0) /* 0 is sb_page, release it below */
+ if (map[pages] != sb_page) /* 0 is sb_page, release it below */
free_buffers(map[pages]);
kfree(map);
kfree(attr);
@@ -821,7 +846,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
page = filemap_get_page(bitmap, chunk);
if (!page) return;
- bit = file_page_offset(chunk);
+ bit = file_page_offset(bitmap, chunk);
/* set the bit */
kaddr = kmap_atomic(page, KM_USER0);
@@ -907,7 +932,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
chunks = bitmap->chunks;
file = bitmap->file;
- BUG_ON(!file && !bitmap->offset);
+ BUG_ON(!file && !bitmap->mddev->bitmap_info.offset);
#ifdef INJECT_FAULTS_3
outofdate = 1;
@@ -919,14 +944,17 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
"recovery\n", bmname(bitmap));
bytes = (chunks + 7) / 8;
+ if (!bitmap->mddev->bitmap_info.external)
+ bytes += sizeof(bitmap_super_t);
- num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE;
+
+ num_pages = (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
- if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) {
+ if (file && i_size_read(file->f_mapping->host) < bytes) {
printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
bmname(bitmap),
(unsigned long) i_size_read(file->f_mapping->host),
- bytes + sizeof(bitmap_super_t));
+ bytes);
goto err;
}
@@ -947,17 +975,16 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
for (i = 0; i < chunks; i++) {
int b;
- index = file_page_index(i);
- bit = file_page_offset(i);
+ index = file_page_index(bitmap, i);
+ bit = file_page_offset(bitmap, i);
if (index != oldindex) { /* this is a new page, read it in */
int count;
/* unmap the old page, we're done with it */
if (index == num_pages-1)
- count = bytes + sizeof(bitmap_super_t)
- - index * PAGE_SIZE;
+ count = bytes - index * PAGE_SIZE;
else
count = PAGE_SIZE;
- if (index == 0) {
+ if (index == 0 && bitmap->sb_page) {
/*
* if we're here then the superblock page
* contains some bits (PAGE_SIZE != sizeof sb)
@@ -967,14 +994,15 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
offset = sizeof(bitmap_super_t);
if (!file)
read_sb_page(bitmap->mddev,
- bitmap->offset,
+ bitmap->mddev->bitmap_info.offset,
page,
index, count);
} else if (file) {
page = read_page(file, index, bitmap, count);
offset = 0;
} else {
- page = read_sb_page(bitmap->mddev, bitmap->offset,
+ page = read_sb_page(bitmap->mddev,
+ bitmap->mddev->bitmap_info.offset,
NULL,
index, count);
offset = 0;
@@ -1078,23 +1106,32 @@ static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
* out to disk
*/
-void bitmap_daemon_work(struct bitmap *bitmap)
+void bitmap_daemon_work(mddev_t *mddev)
{
+ struct bitmap *bitmap;
unsigned long j;
unsigned long flags;
struct page *page = NULL, *lastpage = NULL;
int blocks;
void *paddr;
- if (bitmap == NULL)
+ /* Use a mutex to guard daemon_work against
+ * bitmap_destroy.
+ */
+ mutex_lock(&mddev->bitmap_info.mutex);
+ bitmap = mddev->bitmap;
+ if (bitmap == NULL) {
+ mutex_unlock(&mddev->bitmap_info.mutex);
return;
- if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ))
+ }
+ if (time_before(jiffies, bitmap->daemon_lastrun
+ + bitmap->mddev->bitmap_info.daemon_sleep))
goto done;
bitmap->daemon_lastrun = jiffies;
if (bitmap->allclean) {
bitmap->mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
- return;
+ goto done;
}
bitmap->allclean = 1;
@@ -1142,7 +1179,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
/* We are possibly going to clear some bits, so make
* sure that events_cleared is up-to-date.
*/
- if (bitmap->need_sync) {
+ if (bitmap->need_sync &&
+ bitmap->mddev->bitmap_info.external == 0) {
bitmap_super_t *sb;
bitmap->need_sync = 0;
sb = kmap_atomic(bitmap->sb_page, KM_USER0);
@@ -1152,7 +1190,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
write_page(bitmap, bitmap->sb_page, 1);
}
spin_lock_irqsave(&bitmap->lock, flags);
- clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
+ if (!bitmap->need_sync)
+ clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
}
bmc = bitmap_get_counter(bitmap,
(sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
@@ -1167,7 +1206,7 @@ void bitmap_daemon_work(struct bitmap *bitmap)
if (*bmc == 2) {
*bmc=1; /* maybe clear the bit next time */
set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
- } else if (*bmc == 1) {
+ } else if (*bmc == 1 && !bitmap->need_sync) {
/* we can clear the bit */
*bmc = 0;
bitmap_count_page(bitmap,
@@ -1177,9 +1216,11 @@ void bitmap_daemon_work(struct bitmap *bitmap)
/* clear the bit */
paddr = kmap_atomic(page, KM_USER0);
if (bitmap->flags & BITMAP_HOSTENDIAN)
- clear_bit(file_page_offset(j), paddr);
+ clear_bit(file_page_offset(bitmap, j),
+ paddr);
else
- ext2_clear_bit(file_page_offset(j), paddr);
+ ext2_clear_bit(file_page_offset(bitmap, j),
+ paddr);
kunmap_atomic(paddr, KM_USER0);
}
} else
@@ -1202,7 +1243,9 @@ void bitmap_daemon_work(struct bitmap *bitmap)
done:
if (bitmap->allclean == 0)
- bitmap->mddev->thread->timeout = bitmap->daemon_sleep * HZ;
+ bitmap->mddev->thread->timeout =
+ bitmap->mddev->bitmap_info.daemon_sleep;
+ mutex_unlock(&mddev->bitmap_info.mutex);
}
static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
@@ -1332,6 +1375,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
bitmap->events_cleared < bitmap->mddev->events) {
bitmap->events_cleared = bitmap->mddev->events;
bitmap->need_sync = 1;
+ sysfs_notify_dirent(bitmap->sysfs_can_clear);
}
if (!success && ! (*bmc & NEEDED_MASK))
@@ -1470,7 +1514,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
return;
}
if (time_before(jiffies, (bitmap->last_end_sync
- + bitmap->daemon_sleep * HZ)))
+ + bitmap->mddev->bitmap_info.daemon_sleep)))
return;
wait_event(bitmap->mddev->recovery_wait,
atomic_read(&bitmap->mddev->recovery_active) == 0);
@@ -1522,6 +1566,12 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap);
bitmap_set_memory_bits(bitmap, sec, 1);
bitmap_file_set_bit(bitmap, sec);
+ if (sec < bitmap->mddev->recovery_cp)
+ /* We are asserting that the array is dirty,
+ * so move the recovery_cp address back so
+ * that it is obvious that it is dirty
+ */
+ bitmap->mddev->recovery_cp = sec;
}
}
@@ -1531,7 +1581,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
void bitmap_flush(mddev_t *mddev)
{
struct bitmap *bitmap = mddev->bitmap;
- int sleep;
+ long sleep;
if (!bitmap) /* there was no bitmap */
return;
@@ -1539,12 +1589,13 @@ void bitmap_flush(mddev_t *mddev)
/* run the daemon_work three time to ensure everything is flushed
* that can be
*/
- sleep = bitmap->daemon_sleep;
- bitmap->daemon_sleep = 0;
- bitmap_daemon_work(bitmap);
- bitmap_daemon_work(bitmap);
- bitmap_daemon_work(bitmap);
- bitmap->daemon_sleep = sleep;
+ sleep = mddev->bitmap_info.daemon_sleep * 2;
+ bitmap->daemon_lastrun -= sleep;
+ bitmap_daemon_work(mddev);
+ bitmap->daemon_lastrun -= sleep;
+ bitmap_daemon_work(mddev);
+ bitmap->daemon_lastrun -= sleep;
+ bitmap_daemon_work(mddev);
bitmap_update_sb(bitmap);
}
@@ -1574,6 +1625,7 @@ static void bitmap_free(struct bitmap *bitmap)
kfree(bp);
kfree(bitmap);
}
+
void bitmap_destroy(mddev_t *mddev)
{
struct bitmap *bitmap = mddev->bitmap;
@@ -1581,10 +1633,15 @@ void bitmap_destroy(mddev_t *mddev)
if (!bitmap) /* there was no bitmap */
return;
+ mutex_lock(&mddev->bitmap_info.mutex);
mddev->bitmap = NULL; /* disconnect from the md device */
+ mutex_unlock(&mddev->bitmap_info.mutex);
if (mddev->thread)
mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
+ if (bitmap->sysfs_can_clear)
+ sysfs_put(bitmap->sysfs_can_clear);
+
bitmap_free(bitmap);
}
@@ -1598,16 +1655,17 @@ int bitmap_create(mddev_t *mddev)
sector_t blocks = mddev->resync_max_sectors;
unsigned long chunks;
unsigned long pages;
- struct file *file = mddev->bitmap_file;
+ struct file *file = mddev->bitmap_info.file;
int err;
sector_t start;
+ struct sysfs_dirent *bm;
BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
- if (!file && !mddev->bitmap_offset) /* bitmap disabled, nothing to do */
+ if (!file && !mddev->bitmap_info.offset) /* bitmap disabled, nothing to do */
return 0;
- BUG_ON(file && mddev->bitmap_offset);
+ BUG_ON(file && mddev->bitmap_info.offset);
bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
if (!bitmap)
@@ -1620,8 +1678,14 @@ int bitmap_create(mddev_t *mddev)
bitmap->mddev = mddev;
+ bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
+ if (bm) {
+ bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
+ sysfs_put(bm);
+ } else
+ bitmap->sysfs_can_clear = NULL;
+
bitmap->file = file;
- bitmap->offset = mddev->bitmap_offset;
if (file) {
get_file(file);
/* As future accesses to this file will use bmap,
@@ -1630,12 +1694,22 @@ int bitmap_create(mddev_t *mddev)
*/
vfs_fsync(file, file->f_dentry, 1);
}
- /* read superblock from bitmap file (this sets bitmap->chunksize) */
- err = bitmap_read_sb(bitmap);
+ /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
+ if (!mddev->bitmap_info.external)
+ err = bitmap_read_sb(bitmap);
+ else {
+ err = 0;
+ if (mddev->bitmap_info.chunksize == 0 ||
+ mddev->bitmap_info.daemon_sleep == 0)
+ /* chunksize and time_base need to be
+ * set first. */
+ err = -EINVAL;
+ }
if (err)
goto error;
- bitmap->chunkshift = ffz(~bitmap->chunksize);
+ bitmap->daemon_lastrun = jiffies;
+ bitmap->chunkshift = ffz(~mddev->bitmap_info.chunksize);
/* now that chunksize and chunkshift are set, we can use these macros */
chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >>
@@ -1677,7 +1751,8 @@ int bitmap_create(mddev_t *mddev)
mddev->bitmap = bitmap;
- mddev->thread->timeout = bitmap->daemon_sleep * HZ;
+ mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
+ md_wakeup_thread(mddev->thread);
bitmap_update_sb(bitmap);
@@ -1688,6 +1763,264 @@ int bitmap_create(mddev_t *mddev)
return err;
}
+static ssize_t
+location_show(mddev_t *mddev, char *page)
+{
+ ssize_t len;
+ if (mddev->bitmap_info.file) {
+ len = sprintf(page, "file");
+ } else if (mddev->bitmap_info.offset) {
+ len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset);
+ } else
+ len = sprintf(page, "none");
+ len += sprintf(page+len, "\n");
+ return len;
+}
+
+static ssize_t
+location_store(mddev_t *mddev, const char *buf, size_t len)
+{
+
+ if (mddev->pers) {
+ if (!mddev->pers->quiesce)
+ return -EBUSY;
+ if (mddev->recovery || mddev->sync_thread)
+ return -EBUSY;
+ }
+
+ if (mddev->bitmap || mddev->bitmap_info.file ||
+ mddev->bitmap_info.offset) {
+ /* bitmap already configured. Only option is to clear it */
+ if (strncmp(buf, "none", 4) != 0)
+ return -EBUSY;
+ if (mddev->pers) {
+ mddev->pers->quiesce(mddev, 1);
+ bitmap_destroy(mddev);
+ mddev->pers->quiesce(mddev, 0);
+ }
+ mddev->bitmap_info.offset = 0;
+ if (mddev->bitmap_info.file) {
+ struct file *f = mddev->bitmap_info.file;
+ mddev->bitmap_info.file = NULL;
+ restore_bitmap_write_access(f);
+ fput(f);
+ }
+ } else {
+ /* No bitmap, OK to set a location */
+ long long offset;
+ if (strncmp(buf, "none", 4) == 0)
+ /* nothing to be done */;
+ else if (strncmp(buf, "file:", 5) == 0) {
+ /* Not supported yet */
+ return -EINVAL;
+ } else {
+ int rv;
+ if (buf[0] == '+')
+ rv = strict_strtoll(buf+1, 10, &offset);
+ else
+ rv = strict_strtoll(buf, 10, &offset);
+ if (rv)
+ return rv;
+ if (offset == 0)
+ return -EINVAL;
+ if (mddev->bitmap_info.external == 0 &&
+ mddev->major_version == 0 &&
+ offset != mddev->bitmap_info.default_offset)
+ return -EINVAL;
+ mddev->bitmap_info.offset = offset;
+ if (mddev->pers) {
+ mddev->pers->quiesce(mddev, 1);
+ rv = bitmap_create(mddev);
+ if (rv) {
+ bitmap_destroy(mddev);
+ mddev->bitmap_info.offset = 0;
+ }
+ mddev->pers->quiesce(mddev, 0);
+ if (rv)
+ return rv;
+ }
+ }
+ }
+ if (!mddev->external) {
+ /* Ensure new bitmap info is stored in
+ * metadata promptly.
+ */
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ md_wakeup_thread(mddev->thread);
+ }
+ return len;
+}
+
+static struct md_sysfs_entry bitmap_location =
+__ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store);
+
+static ssize_t
+timeout_show(mddev_t *mddev, char *page)
+{
+ ssize_t len;
+ unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ;
+ unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ;
+
+ len = sprintf(page, "%lu", secs);
+ if (jifs)
+ len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs));
+ len += sprintf(page+len, "\n");
+ return len;
+}
+
+static ssize_t
+timeout_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ /* timeout can be set at any time */
+ unsigned long timeout;
+ int rv = strict_strtoul_scaled(buf, &timeout, 4);
+ if (rv)
+ return rv;
+
+ /* just to make sure we don't overflow... */
+ if (timeout >= LONG_MAX / HZ)
+ return -EINVAL;
+
+ timeout = timeout * HZ / 10000;
+
+ if (timeout >= MAX_SCHEDULE_TIMEOUT)
+ timeout = MAX_SCHEDULE_TIMEOUT-1;
+ if (timeout < 1)
+ timeout = 1;
+ mddev->bitmap_info.daemon_sleep = timeout;
+ if (mddev->thread) {
+ /* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then
+ * the bitmap is all clean and we don't need to
+ * adjust the timeout right now
+ */
+ if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) {
+ mddev->thread->timeout = timeout;
+ md_wakeup_thread(mddev->thread);
+ }
+ }
+ return len;
+}
+
+static struct md_sysfs_entry bitmap_timeout =
+__ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store);
+
+static ssize_t
+backlog_show(mddev_t *mddev, char *page)
+{
+ return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind);
+}
+
+static ssize_t
+backlog_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ unsigned long backlog;
+ int rv = strict_strtoul(buf, 10, &backlog);
+ if (rv)
+ return rv;
+ if (backlog > COUNTER_MAX)
+ return -EINVAL;
+ mddev->bitmap_info.max_write_behind = backlog;
+ return len;
+}
+
+static struct md_sysfs_entry bitmap_backlog =
+__ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store);
+
+static ssize_t
+chunksize_show(mddev_t *mddev, char *page)
+{
+ return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize);
+}
+
+static ssize_t
+chunksize_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ /* Can only be changed when no bitmap is active */
+ int rv;
+ unsigned long csize;
+ if (mddev->bitmap)
+ return -EBUSY;
+ rv = strict_strtoul(buf, 10, &csize);
+ if (rv)
+ return rv;
+ if (csize < 512 ||
+ !is_power_of_2(csize))
+ return -EINVAL;
+ mddev->bitmap_info.chunksize = csize;
+ return len;
+}
+
+static struct md_sysfs_entry bitmap_chunksize =
+__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
+
+static ssize_t metadata_show(mddev_t *mddev, char *page)
+{
+ return sprintf(page, "%s\n", (mddev->bitmap_info.external
+ ? "external" : "internal"));
+}
+
+static ssize_t metadata_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ if (mddev->bitmap ||
+ mddev->bitmap_info.file ||
+ mddev->bitmap_info.offset)
+ return -EBUSY;
+ if (strncmp(buf, "external", 8) == 0)
+ mddev->bitmap_info.external = 1;
+ else if (strncmp(buf, "internal", 8) == 0)
+ mddev->bitmap_info.external = 0;
+ else
+ return -EINVAL;
+ return len;
+}
+
+static struct md_sysfs_entry bitmap_metadata =
+__ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
+
+static ssize_t can_clear_show(mddev_t *mddev, char *page)
+{
+ int len;
+ if (mddev->bitmap)
+ len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
+ "false" : "true"));
+ else
+ len = sprintf(page, "\n");
+ return len;
+}
+
+static ssize_t can_clear_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ if (mddev->bitmap == NULL)
+ return -ENOENT;
+ if (strncmp(buf, "false", 5) == 0)
+ mddev->bitmap->need_sync = 1;
+ else if (strncmp(buf, "true", 4) == 0) {
+ if (mddev->degraded)
+ return -EBUSY;
+ mddev->bitmap->need_sync = 0;
+ } else
+ return -EINVAL;
+ return len;
+}
+
+static struct md_sysfs_entry bitmap_can_clear =
+__ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
+
+static struct attribute *md_bitmap_attrs[] = {
+ &bitmap_location.attr,
+ &bitmap_timeout.attr,
+ &bitmap_backlog.attr,
+ &bitmap_chunksize.attr,
+ &bitmap_metadata.attr,
+ &bitmap_can_clear.attr,
+ NULL
+};
+struct attribute_group md_bitmap_group = {
+ .name = "bitmap",
+ .attrs = md_bitmap_attrs,
+};
+
+
/* the bitmap API -- for raid personalities */
EXPORT_SYMBOL(bitmap_startwrite);
EXPORT_SYMBOL(bitmap_endwrite);
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
index e98900671ca..cb821d76d1b 100644
--- a/drivers/md/bitmap.h
+++ b/drivers/md/bitmap.h
@@ -106,7 +106,7 @@ typedef __u16 bitmap_counter_t;
#define BITMAP_BLOCK_SHIFT 9
/* how many blocks per chunk? (this is variable) */
-#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->chunksize >> BITMAP_BLOCK_SHIFT)
+#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->m