aboutsummaryrefslogtreecommitdiff
path: root/drivers/block/drbd/drbd_bitmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/drbd/drbd_bitmap.c')
-rw-r--r--drivers/block/drbd/drbd_bitmap.c738
1 files changed, 422 insertions, 316 deletions
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 912f585a760..1aa29f8fdfe 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -113,62 +113,54 @@ struct drbd_bitmap {
};
#define bm_print_lock_info(m) __bm_print_lock_info(m, __func__)
-static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
+static void __bm_print_lock_info(struct drbd_device *device, const char *func)
{
- struct drbd_bitmap *b = mdev->bitmap;
+ struct drbd_bitmap *b = device->bitmap;
if (!__ratelimit(&drbd_ratelimit_state))
return;
- dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n",
- current == mdev->receiver.task ? "receiver" :
- current == mdev->asender.task ? "asender" :
- current == mdev->worker.task ? "worker" : current->comm,
- func, b->bm_why ?: "?",
- b->bm_task == mdev->receiver.task ? "receiver" :
- b->bm_task == mdev->asender.task ? "asender" :
- b->bm_task == mdev->worker.task ? "worker" : "?");
+ drbd_err(device, "FIXME %s[%d] in %s, bitmap locked for '%s' by %s[%d]\n",
+ current->comm, task_pid_nr(current),
+ func, b->bm_why ?: "?",
+ b->bm_task->comm, task_pid_nr(b->bm_task));
}
-void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
+void drbd_bm_lock(struct drbd_device *device, char *why, enum bm_flag flags)
{
- struct drbd_bitmap *b = mdev->bitmap;
+ struct drbd_bitmap *b = device->bitmap;
int trylock_failed;
if (!b) {
- dev_err(DEV, "FIXME no bitmap in drbd_bm_lock!?\n");
+ drbd_err(device, "FIXME no bitmap in drbd_bm_lock!?\n");
return;
}
trylock_failed = !mutex_trylock(&b->bm_change);
if (trylock_failed) {
- dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
- current == mdev->receiver.task ? "receiver" :
- current == mdev->asender.task ? "asender" :
- current == mdev->worker.task ? "worker" : current->comm,
- why, b->bm_why ?: "?",
- b->bm_task == mdev->receiver.task ? "receiver" :
- b->bm_task == mdev->asender.task ? "asender" :
- b->bm_task == mdev->worker.task ? "worker" : "?");
+ drbd_warn(device, "%s[%d] going to '%s' but bitmap already locked for '%s' by %s[%d]\n",
+ current->comm, task_pid_nr(current),
+ why, b->bm_why ?: "?",
+ b->bm_task->comm, task_pid_nr(b->bm_task));
mutex_lock(&b->bm_change);
}
if (BM_LOCKED_MASK & b->bm_flags)
- dev_err(DEV, "FIXME bitmap already locked in bm_lock\n");
+ drbd_err(device, "FIXME bitmap already locked in bm_lock\n");
b->bm_flags |= flags & BM_LOCKED_MASK;
b->bm_why = why;
b->bm_task = current;
}
-void drbd_bm_unlock(struct drbd_conf *mdev)
+void drbd_bm_unlock(struct drbd_device *device)
{
- struct drbd_bitmap *b = mdev->bitmap;
+ struct drbd_bitmap *b = device->bitmap;
if (!b) {
- dev_err(DEV, "FIXME no bitmap in drbd_bm_unlock!?\n");
+ drbd_err(device, "FIXME no bitmap in drbd_bm_unlock!?\n");
return;
}
- if (!(BM_LOCKED_MASK & mdev->bitmap->bm_flags))
- dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n");
+ if (!(BM_LOCKED_MASK & device->bitmap->bm_flags))
+ drbd_err(device, "FIXME bitmap not locked in bm_unlock\n");
b->bm_flags &= ~BM_LOCKED_MASK;
b->bm_why = NULL;
@@ -196,6 +188,9 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
/* to mark for lazy writeout once syncer cleared all clearable bits,
* we if bits have been cleared since last IO. */
#define BM_PAGE_LAZY_WRITEOUT 28
+/* pages marked with this "HINT" will be considered for writeout
+ * on activity log transactions */
+#define BM_PAGE_HINT_WRITEOUT 27
/* store_page_idx uses non-atomic assignment. It is only used directly after
* allocating the page. All other bm_set_page_* and bm_clear_page_* need to
@@ -205,7 +200,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
static void bm_store_page_idx(struct page *page, unsigned long idx)
{
BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK));
- page_private(page) |= idx;
+ set_page_private(page, idx);
}
static unsigned long bm_page_to_idx(struct page *page)
@@ -216,20 +211,19 @@ static unsigned long bm_page_to_idx(struct page *page)
/* As is very unlikely that the same page is under IO from more than one
* context, we can get away with a bit per page and one wait queue per bitmap.
*/
-static void bm_page_lock_io(struct drbd_conf *mdev, int page_nr)
+static void bm_page_lock_io(struct drbd_device *device, int page_nr)
{
- struct drbd_bitmap *b = mdev->bitmap;
+ struct drbd_bitmap *b = device->bitmap;
void *addr = &page_private(b->bm_pages[page_nr]);
wait_event(b->bm_io_wait, !test_and_set_bit(BM_PAGE_IO_LOCK, addr));
}
-static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr)
+static void bm_page_unlock_io(struct drbd_device *device, int page_nr)
{
- struct drbd_bitmap *b = mdev->bitmap;
+ struct drbd_bitmap *b = device->bitmap;
void *addr = &page_private(b->bm_pages[page_nr]);
- clear_bit(BM_PAGE_IO_LOCK, addr);
- smp_mb__after_clear_bit();
- wake_up(&mdev->bitmap->bm_io_wait);
+ clear_bit_unlock(BM_PAGE_IO_LOCK, addr);
+ wake_up(&device->bitmap->bm_io_wait);
}
/* set _before_ submit_io, so it may be reset due to being changed
@@ -246,6 +240,27 @@ static void bm_set_page_need_writeout(struct page *page)
set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
}
+/**
+ * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout
+ * @device: DRBD device.
+ * @page_nr: the bitmap page to mark with the "hint" flag
+ *
+ * From within an activity log transaction, we mark a few pages with these
+ * hints, then call drbd_bm_write_hinted(), which will only write out changed
+ * pages which are flagged with this mark.
+ */
+void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr)
+{
+ struct page *page;
+ if (page_nr >= device->bitmap->bm_number_of_pages) {
+ drbd_warn(device, "BAD: page_nr: %u, number_of_pages: %u\n",
+ page_nr, (int)device->bitmap->bm_number_of_pages);
+ return;
+ }
+ page = device->bitmap->bm_pages[page_nr];
+ set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page));
+}
+
static int bm_test_page_unchanged(struct page *page)
{
volatile const unsigned long *addr = &page_private(page);
@@ -289,25 +304,25 @@ static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr)
return page_nr;
}
-static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx, const enum km_type km)
+static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
{
struct page *page = b->bm_pages[idx];
- return (unsigned long *) kmap_atomic(page, km);
+ return (unsigned long *) kmap_atomic(page);
}
static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
{
- return __bm_map_pidx(b, idx, KM_IRQ1);
+ return __bm_map_pidx(b, idx);
}
-static void __bm_unmap(unsigned long *p_addr, const enum km_type km)
+static void __bm_unmap(unsigned long *p_addr)
{
- kunmap_atomic(p_addr, km);
+ kunmap_atomic(p_addr);
};
static void bm_unmap(unsigned long *p_addr)
{
- return __bm_unmap(p_addr, KM_IRQ1);
+ return __bm_unmap(p_addr);
}
/* long word offset of _bitmap_ sector */
@@ -325,7 +340,7 @@ static void bm_unmap(unsigned long *p_addr)
/*
* actually most functions herein should take a struct drbd_bitmap*, not a
- * struct drbd_conf*, but for the debug macros I like to have the mdev around
+ * struct drbd_device*, but for the debug macros I like to have the device around
* to be able to report device specific.
*/
@@ -373,14 +388,16 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
return old_pages;
/* Trying kmalloc first, falling back to vmalloc.
- * GFP_KERNEL is ok, as this is done when a lower level disk is
- * "attached" to the drbd. Context is receiver thread or cqueue
- * thread. As we have no disk yet, we are not in the IO path,
- * not even the IO path of the peer. */
+ * GFP_NOIO, as this is called while drbd IO is "suspended",
+ * and during resize or attach on diskless Primary,
+ * we must not block on IO to ourselves.
+ * Context is receiver thread or dmsetup. */
bytes = sizeof(struct page *)*want;
- new_pages = kzalloc(bytes, GFP_KERNEL);
+ new_pages = kzalloc(bytes, GFP_NOIO | __GFP_NOWARN);
if (!new_pages) {
- new_pages = vzalloc(bytes);
+ new_pages = __vmalloc(bytes,
+ GFP_NOIO | __GFP_HIGHMEM | __GFP_ZERO,
+ PAGE_KERNEL);
if (!new_pages)
return NULL;
vmalloced = 1;
@@ -390,7 +407,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
for (i = 0; i < have; i++)
new_pages[i] = old_pages[i];
for (; i < want; i++) {
- page = alloc_page(GFP_HIGHUSER);
+ page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
if (!page) {
bm_free_pages(new_pages + have, i - have);
bm_vk_free(new_pages, vmalloced);
@@ -419,11 +436,11 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
/*
* called on driver init only. TODO call when a device is created.
- * allocates the drbd_bitmap, and stores it in mdev->bitmap.
+ * allocates the drbd_bitmap, and stores it in device->bitmap.
*/
-int drbd_bm_init(struct drbd_conf *mdev)
+int drbd_bm_init(struct drbd_device *device)
{
- struct drbd_bitmap *b = mdev->bitmap;
+ struct drbd_bitmap *b = device->bitmap;
WARN_ON(b != NULL);
b = kzalloc(sizeof(struct drbd_bitmap), GFP_KERNEL);
if (!b)
@@ -432,26 +449,28 @@ int drbd_bm_init(struct drbd_conf *mdev)
mutex_init(&b->bm_change);
init_waitqueue_head(&b->bm_io_wait);
- mdev->bitmap = b;
+ device->bitmap = b;
return 0;
}
-sector_t drbd_bm_capacity(struct drbd_conf *mdev)
+sector_t drbd_bm_capacity(struct drbd_device *device)
{
- ERR_IF(!mdev->bitmap) return 0;
- return mdev->bitmap->bm_dev_capacity;
+ if (!expect(device->bitmap))
+ return 0;
+ return device->bitmap->bm_dev_capacity;
}
/* called on driver unload. TODO: call when a device is destroyed.
*/
-void drbd_bm_cleanup(struct drbd_conf *mdev)
+void drbd_bm_cleanup(struct drbd_device *device)
{
- ERR_IF (!mdev->bitmap) return;
- bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
- bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags));
- kfree(mdev->bitmap);
- mdev->bitmap = NULL;
+ if (!expect(device->bitmap))
+ return;
+ bm_free_pages(device->bitmap->bm_pages, device->bitmap->bm_number_of_pages);
+ bm_vk_free(device->bitmap->bm_pages, (BM_P_VMALLOCED & device->bitmap->bm_flags));
+ kfree(device->bitmap);
+ device->bitmap = NULL;
}
/*
@@ -543,15 +562,15 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b)
/* all but last page */
for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) {
- p_addr = __bm_map_pidx(b, idx, KM_USER0);
+ p_addr = __bm_map_pidx(b, idx);
for (i = 0; i < LWPP; i++)
bits += hweight_long(p_addr[i]);
- __bm_unmap(p_addr, KM_USER0);
+ __bm_unmap(p_addr);
cond_resched();
}
/* last (or only) page */
last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL;
- p_addr = __bm_map_pidx(b, idx, KM_USER0);
+ p_addr = __bm_map_pidx(b, idx);
for (i = 0; i < last_word; i++)
bits += hweight_long(p_addr[i]);
p_addr[last_word] &= cpu_to_lel(mask);
@@ -559,7 +578,7 @@ static unsigned long bm_count_bits(struct drbd_bitmap *b)
/* 32bit arch, may have an unused padding long */
if (BITS_PER_LONG == 32 && (last_word & 1) == 0)
p_addr[last_word+1] = 0;
- __bm_unmap(p_addr, KM_USER0);
+ __bm_unmap(p_addr);
return bits;
}
@@ -593,6 +612,17 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
}
}
+/* For the layout, see comment above drbd_md_set_sector_offsets(). */
+static u64 drbd_md_on_disk_bits(struct drbd_backing_dev *ldev)
+{
+ u64 bitmap_sectors;
+ if (ldev->md.al_offset == 8)
+ bitmap_sectors = ldev->md.md_size_sect - ldev->md.bm_offset;
+ else
+ bitmap_sectors = ldev->md.al_offset - ldev->md.bm_offset;
+ return bitmap_sectors << (9 + 3);
+}
+
/*
* make sure the bitmap has enough room for the attached storage,
* if necessary, resize.
@@ -601,20 +631,21 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
* In case this is actually a resize, we copy the old bitmap into the new one.
* Otherwise, the bitmap is initialized to all bits set.
*/
-int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
+int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bits)
{
- struct drbd_bitmap *b = mdev->bitmap;
+ struct drbd_bitmap *b = device->bitmap;
unsigned long bits, words, owords, obits;
unsigned long want, have, onpages; /* number of pages */
struct page **npages, **opages = NULL;
int err = 0, growing;
int opages_vmalloced;
- ERR_IF(!b) return -ENOMEM;
+ if (!expect(b))
+ return -ENOMEM;
- drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK);
+ drbd_bm_lock(device, "resize", BM_LOCKED_MASK);
- dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n",
+ drbd_info(device, "drbd_bm_resize called with capacity == %llu\n",
(unsigned long long)capacity);
if (capacity == b->bm_dev_capacity)
@@ -647,12 +678,12 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
*/
words = ALIGN(bits, 64) >> LN2_BPL;
- if (get_ldev(mdev)) {
- u64 bits_on_disk = ((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12;
- put_ldev(mdev);
+ if (get_ldev(device)) {
+ u64 bits_on_disk = drbd_md_on_disk_bits(device->ldev);
+ put_ldev(device);
if (bits > bits_on_disk) {
- dev_info(DEV, "bits = %lu\n", bits);
- dev_info(DEV, "bits_on_disk = %llu\n", bits_on_disk);
+ drbd_info(device, "bits = %lu\n", bits);
+ drbd_info(device, "bits_on_disk = %llu\n", bits_on_disk);
err = -ENOSPC;
goto out;
}
@@ -661,10 +692,10 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
want = ALIGN(words*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
have = b->bm_number_of_pages;
if (want == have) {
- D_ASSERT(b->bm_pages != NULL);
+ D_ASSERT(device, b->bm_pages != NULL);
npages = b->bm_pages;
} else {
- if (drbd_insert_fault(mdev, DRBD_FAULT_BM_ALLOC))
+ if (drbd_insert_fault(device, DRBD_FAULT_BM_ALLOC))
npages = NULL;
else
npages = bm_realloc_pages(b, want);
@@ -711,10 +742,10 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
bm_vk_free(opages, opages_vmalloced);
if (!growing)
b->bm_set = bm_count_bits(b);
- dev_info(DEV, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
+ drbd_info(device, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
out:
- drbd_bm_unlock(mdev);
+ drbd_bm_unlock(device);
return err;
}
@@ -726,14 +757,16 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
*
* maybe bm_set should be atomic_t ?
*/
-unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
+unsigned long _drbd_bm_total_weight(struct drbd_device *device)
{
- struct drbd_bitmap *b = mdev->bitmap;
+ struct drbd_bitmap *b = device->bitmap;
unsigned long s;
unsigned long flags;
- ERR_IF(!b) return 0;
- ERR_IF(!b->bm_pages) return 0;
+ if (!expect(b))
+ return 0;
+ if (!expect(b->bm_pages))
+ return 0;
spin_lock_irqsave(&b->bm_lock, flags);
s = b->bm_set;
@@ -742,30 +775,33 @@ unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
return s;
}
-unsigned long drbd_bm_total_weight(struct drbd_conf *mdev)
+unsigned long drbd_bm_total_weight(struct drbd_device *device)
{
unsigned long s;
/* if I don't have a disk, I don't know about out-of-sync status */
- if (!get_ldev_if_state(mdev, D_NEGOTIATING))
+ if (!get_ldev_if_state(device, D_NEGOTIATING))
return 0;
- s = _drbd_bm_total_weight(mdev);
- put_ldev(mdev);
+ s = _drbd_bm_total_weight(device);
+ put_ldev(device);
return s;
}
-size_t drbd_bm_words(struct drbd_conf *mdev)
+size_t drbd_bm_words(struct drbd_device *device)
{
- struct drbd_bitmap *b = mdev->bitmap;
- ERR_IF(!b) return 0;
- ERR_IF(!b->bm_pages) return 0;
+ struct drbd_bitmap *b = device->bitmap;
+ if (!expect(b))
+ return 0;
+ if (!expect(b->bm_pages))
+ return 0;
return b->bm_words;
}
-unsigned long drbd_bm_bits(struct drbd_conf *mdev)
+unsigned long drbd_bm_bits(struct drbd_device *device)
{
- struct drbd_bitmap *b = mdev->bitmap;
- ERR_IF(!b) return 0;
+ struct drbd_bitmap *b = device->bitmap;
+ if (!expect(b))
+ return 0;
return b->bm_bits;
}
@@ -775,10 +811,10 @@ unsigned long drbd_bm_bits(struct drbd_conf *mdev)
* bitmap must be locked by drbd_bm_lock.
* currently only used from receive_bitmap.
*/
-void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,
+void drbd_bm_merge_lel(struct drbd_device *device, size_t offset, size_t number,
unsigned long *buffer)
{
- struct drbd_bitmap *b = mdev->bitmap;
+ struct drbd_bitmap *b = device->bitmap;
unsigned long *p_addr, *bm;
unsigned long word, bits;
unsigned int idx;
@@ -786,8 +822,10 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,
end = offset + number;
- ERR_IF(!b) return;
- ERR_IF(!b->bm_pages) return;
+ if (!expect(b))
+ return;
+ if (!expect(b->bm_pages))
+ return;
if (number == 0)
return;
WARN_ON(offset >= b->bm_words);
@@ -822,23 +860,25 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,
/* copy number words from the bitmap starting at offset into the buffer.
* buffer[i] will be little endian unsigned long.
*/
-void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
+void drbd_bm_get_lel(struct drbd_device *device, size_t offset, size_t number,
unsigned long *buffer)
{
- struct drbd_bitmap *b = mdev->bitmap;
+ struct drbd_bitmap *b = device->bitmap;
unsigned long *p_addr, *bm;
size_t end, do_now;
end = offset + number;
- ERR_IF(!b) return;
- ERR_IF(!b->bm_pages) return;
+ if (!expect(b))
+ return;
+ if (!expect(b->bm_pages))
+ return;
spin_lock_irq(&b->bm_lock);
if ((offset >= b->bm_words) ||
(end > b->bm_words) ||
(number <= 0))
- dev_err(DEV, "offset=%lu number=%lu bm_words=%lu\n",
+ drbd_err(device, "offset=%lu number=%lu bm_words=%lu\n",
(unsigned long) offset,
(unsigned long) number,
(unsigned long) b->bm_words);
@@ -857,11 +897,13 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
}
/* set all bits in the bitmap */
-void drbd_bm_set_all(struct drbd_conf *mdev)
+void drbd_bm_set_all(struct drbd_device *device)
{
- struct drbd_bitmap *b = mdev->bitmap;
- ERR_IF(!b) return;
- ERR_IF(!b->bm_pages) return;
+ struct drbd_bitmap *b = device->bitmap;
+ if (!expect(b))
+ return;
+ if (!expect(b->bm_pages))
+ return;
spin_lock_irq(&b->bm_lock);
bm_memset(b, 0, 0xff, b->bm_words);
@@ -871,11 +913,13 @@ void drbd_bm_set_all(struct drbd_conf *mdev)
}
/* clear all bits in the bitmap */
-void drbd_bm_clear_all(struct drbd_conf *mdev)
+void drbd_bm_clear_all(struct drbd_device *device)
{
- struct drbd_bitmap *b = mdev->bitmap;
- ERR_IF(!b) return;
- ERR_IF(!b->bm_pages) return;
+ struct drbd_bitmap *b = device->bitmap;
+ if (!expect(b))
+ return;
+ if (!expect(b->bm_pages))
+ return;
spin_lock_irq(&b->bm_lock);
bm_memset(b, 0, 0, b->bm_words);
@@ -884,20 +928,31 @@ void drbd_bm_clear_all(struct drbd_conf *mdev)
}
struct bm_aio_ctx {
- struct drbd_conf *mdev;
+ struct drbd_device *device;
atomic_t in_flight;
- struct completion done;
+ unsigned int done;
unsigned flags;
#define BM_AIO_COPY_PAGES 1
+#define BM_AIO_WRITE_HINTED 2
+#define BM_WRITE_ALL_PAGES 4
int error;
+ struct kref kref;
};
+static void bm_aio_ctx_destroy(struct kref *kref)
+{
+ struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref);
+
+ put_ldev(ctx->device);
+ kfree(ctx);
+}
+
/* bv_page may be a copy, or may be the original */
static void bm_async_io_complete(struct bio *bio, int error)
{
struct bm_aio_ctx *ctx = bio->bi_private;
- struct drbd_conf *mdev = ctx->mdev;
- struct drbd_bitmap *b = mdev->bitmap;
+ struct drbd_device *device = ctx->device;
+ struct drbd_bitmap *b = device->bitmap;
unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page);
int uptodate = bio_flagged(bio, BIO_UPTODATE);
@@ -911,7 +966,7 @@ static void bm_async_io_complete(struct bio *bio, int error)
if ((ctx->flags & BM_AIO_COPY_PAGES) == 0 &&
!bm_test_page_unchanged(b->bm_pages[idx]))
- dev_warn(DEV, "bitmap page idx %u changed during IO!\n", idx);
+ drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx);
if (error) {
/* ctx error will hold the completed-last non-zero error code,
@@ -921,93 +976,83 @@ static void bm_async_io_complete(struct bio *bio, int error)
/* Not identical to on disk version of it.
* Is BM_PAGE_IO_ERROR enough? */
if (__ratelimit(&drbd_ratelimit_state))
- dev_err(DEV, "IO ERROR %d on bitmap page idx %u\n",
+ drbd_err(device, "IO ERROR %d on bitmap page idx %u\n",
error, idx);
} else {
bm_clear_page_io_err(b->bm_pages[idx]);
- dynamic_dev_dbg(DEV, "bitmap page idx %u completed\n", idx);
+ dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx);
}
- bm_page_unlock_io(mdev, idx);
+ bm_page_unlock_io(device, idx);
- /* FIXME give back to page pool */
if (ctx->flags & BM_AIO_COPY_PAGES)
- put_page(bio->bi_io_vec[0].bv_page);
+ mempool_free(bio->bi_io_vec[0].bv_page, drbd_md_io_page_pool);
bio_put(bio);
- if (atomic_dec_and_test(&ctx->in_flight))
- complete(&ctx->done);
+ if (atomic_dec_and_test(&ctx->in_flight)) {
+ ctx->done = 1;
+ wake_up(&device->misc_wait);
+ kref_put(&ctx->kref, &bm_aio_ctx_destroy);
+ }
}
static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local)
{
- /* we are process context. we always get a bio */
- struct bio *bio = bio_alloc(GFP_KERNEL, 1);
- struct drbd_conf *mdev = ctx->mdev;
- struct drbd_bitmap *b = mdev->bitmap;
+ struct bio *bio = bio_alloc_drbd(GFP_NOIO);
+ struct drbd_device *device = ctx->device;
+ struct drbd_bitmap *b = device->bitmap;
struct page *page;
unsigned int len;
sector_t on_disk_sector =
- mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset;
+ device->ldev->md.md_offset + device->ldev->md.bm_offset;
on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
/* this might happen with very small
* flexible external meta data device,
* or with PAGE_SIZE > 4k */
len = min_t(unsigned int, PAGE_SIZE,
- (drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9);
+ (drbd_md_last_sector(device->ldev) - on_disk_sector + 1)<<9);
/* serialize IO on this page */
- bm_page_lock_io(mdev, page_nr);
+ bm_page_lock_io(device, page_nr);
/* before memcpy and submit,
* so it can be redirtied any time */
bm_set_page_unchanged(b->bm_pages[page_nr]);
if (ctx->flags & BM_AIO_COPY_PAGES) {
- /* FIXME alloc_page is good enough for now, but actually needs
- * to use pre-allocated page pool */
- void *src, *dest;
- page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT);
- dest = kmap_atomic(page, KM_USER0);
- src = kmap_atomic(b->bm_pages[page_nr], KM_USER1);
- memcpy(dest, src, PAGE_SIZE);
- kunmap_atomic(src, KM_USER1);
- kunmap_atomic(dest, KM_USER0);
+ page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT);
+ copy_highpage(page, b->bm_pages[page_nr]);
bm_store_page_idx(page, page_nr);
} else
page = b->bm_pages[page_nr];
-
- bio->bi_bdev = mdev->ldev->md_bdev;
- bio->bi_sector = on_disk_sector;
+ bio->bi_bdev = device->ldev->md_bdev;
+ bio->bi_iter.bi_sector = on_disk_sector;
+ /* bio_add_page of a single page to an empty bio will always succeed,
+ * according to api. Do we want to assert that? */
bio_add_page(bio, page, len, 0);
bio->bi_private = ctx;
bio->bi_end_io = bm_async_io_complete;
- if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
+ if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
bio->bi_rw |= rw;
bio_endio(bio, -EIO);
} else {
submit_bio(rw, bio);
/* this should not count as user activity and cause the
* resync to throttle -- see drbd_rs_should_slow_down(). */
- atomic_add(len >> 9, &mdev->rs_sect_ev);
+ atomic_add(len >> 9, &device->rs_sect_ev);
}
}
/*
* bm_rw: read/write the whole bitmap from/to its on disk location.
*/
-static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_idx) __must_hold(local)
+static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local)
{
- struct bm_aio_ctx ctx = {
- .mdev = mdev,
- .in_flight = ATOMIC_INIT(1),
- .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done),
- .flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0,
- };
- struct drbd_bitmap *b = mdev->bitmap;
+ struct bm_aio_ctx *ctx;
+ struct drbd_bitmap *b = device->bitmap;
int num_pages, i, count = 0;
unsigned long now;
char ppb[10];
@@ -1021,7 +1066,27 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id
* For lazy writeout, we don't care for ongoing changes to the bitmap,
* as we submit copies of pages anyways.
*/
- if (!ctx.flags)
+
+ ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO);
+ if (!ctx)
+ return -ENOMEM;
+
+ *ctx = (struct bm_aio_ctx) {
+ .device = device,
+ .in_flight = ATOMIC_INIT(1),
+ .done = 0,
+ .flags = flags,
+ .error = 0,
+ .kref = { ATOMIC_INIT(2) },
+ };
+
+ if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */
+ drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n");
+ kfree(ctx);
+ return -ENODEV;
+ }
+
+ if (!ctx->flags)
WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
num_pages = b->bm_number_of_pages;
@@ -1034,91 +1099,135 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id
if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
break;
if (rw & WRITE) {
- if (bm_test_page_unchanged(b->bm_pages[i])) {
- dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i);
+ if ((flags & BM_AIO_WRITE_HINTED) &&
+ !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
+ &page_private(b->bm_pages[i])))
+ continue;
+
+ if (!(flags & BM_WRITE_ALL_PAGES) &&
+ bm_test_page_unchanged(b->bm_pages[i])) {
+ dynamic_drbd_dbg(device, "skipped bm write for idx %u\n", i);
continue;
}
/* during lazy writeout,
* ignore those pages not marked for lazy writeout. */
if (lazy_writeout_upper_idx &&
!bm_test_page_lazy_writeout(b->bm_pages[i])) {
- dynamic_dev_dbg(DEV, "skipped bm lazy write for idx %u\n", i);
+ dynamic_drbd_dbg(device, "skipped bm lazy write for idx %u\n", i);
continue;
}
}
- atomic_inc(&ctx.in_flight);
- bm_page_io_async(&ctx, i, rw);
+ atomic_inc(&ctx->in_flight);
+ bm_page_io_async(ctx, i, rw);
++count;
cond_resched();
}
/*
- * We initialize ctx.in_flight to one to make sure bm_async_io_complete
- * will not complete() early, and decrement / test it here. If there
+ * We initialize ctx->in_flight to one to make sure bm_async_io_complete
+ * will not set ctx->done early, and decrement / test it here. If there
* are still some bios in flight, we need to wait for them here.
+ * If all IO is done already (or nothing had been submitted), there is
+ * no need to wait. Still, we need to put the kref associated with the
+ * "in_flight reached zero, all done" event.
*/
- if (!atomic_dec_and_test(&ctx.in_flight))
- wait_for_completion(&ctx.done);
- dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
- rw == WRITE ? "WRITE" : "READ",
- count, jiffies - now);
-
- if (ctx.error) {
- dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
- drbd_chk_io_error(mdev, 1, true);
- err = -EIO; /* ctx.error ? */
+ if (!atomic_dec_and_test(&ctx->in_flight))
+ wait_until_done_or_force_detached(device, device->ldev, &ctx->done);
+ else
+ kref_put(&ctx->kref, &bm_aio_ctx_destroy);
+
+ /* summary for global bitmap IO */
+ if (flags == 0)
+ drbd_info(device, "bitmap %s of %u pages took %lu jiffies\n",
+ rw == WRITE ? "WRITE" : "READ",
+ count, jiffies - now);
+
+ if (ctx->error) {
+ drbd_alert(device, "we had at least one MD IO ERROR during bitmap IO\n");
+ drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
+ err = -EIO; /* ctx->error ? */
}
+ if (atomic_read(&ctx->in_flight))
+ err = -EIO; /* Disk timeout/force-detach during IO... */
+
now = jiffies;
if (rw == WRITE) {
- drbd_md_flush(mdev);
+ drbd_md_flush(device);
} else /* rw == READ */ {
b->bm_set = bm_count_bits(b);
- dev_info(DEV, "recounting of set bits took additional %lu jiffies\n",
+ drbd_info(device, "recounting of set bits took additional %lu jiffies\n",
jiffies - now);
}
now = b->bm_set;
- dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
- ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
+ if (flags == 0)
+ drbd_info(device, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
+ ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
+ kref_put(&ctx->kref, &bm_aio_ctx_destroy);
return err;
}
/**
* drbd_bm_read() - Read the whole bitmap from its on disk location.
- * @mdev: DRBD device.
+ * @device: DRBD device.
*/
-int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local)
+int drbd_bm_read(struct drbd_device *device) __must_hold(local)
{
- return bm_rw(mdev, READ, 0);
+ return bm_rw(device, READ, 0, 0);
}
/**
* drbd_bm_write() - Write the whole bitmap to its on disk location.
- * @mdev: DRBD device.
+ * @device: DRBD device.
*
* Will only write pages that have changed since last IO.
*/
-int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
+int drbd_bm_write(struct drbd_device *device) __must_hold(local)
+{
+ return bm_rw(device, WRITE, 0, 0);
+}
+
+/**
+ * drbd_bm_write_all() - Write the whole bitmap to its on disk location.
+ * @device: DRBD device.
+ *
+ * Will write all pages.
+ */
+int drbd_bm_write_all(struct drbd_device *device) __must_hold(local)
{
- return bm_rw(mdev, WRITE, 0);
+ return bm_rw(device, WRITE, BM_WRITE_ALL_PAGES, 0);
}
/**
- * drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed.
- * @mdev: DRBD device.
- * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages
+ * drbd_bm_write_copy_pages() - Write the whole bitmap to its on disk location.
+ * @device: DRBD device.
+ *
+ * Will only write pages that have changed since last IO.
+ * In contrast to drbd_bm_write(), this will copy the bitmap pages
+ * to temporary writeout pages. It is intended to trigger a full write-out
+ * while still allowing the bitmap to change, for example if a resync or online
+ * verify is aborted due to a failed peer disk, while local IO continues, or
+ * pending resync acks are still being processed.
*/
-int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local)
+int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local)
{
- return bm_rw(mdev, WRITE, upper_idx);
+ return bm_rw(device, WRITE, BM_AIO_COPY_PAGES, 0);
}
+/**
+ * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed.
+ * @device: DRBD device.
+ */
+int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local)
+{
+ return bm_rw(device, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
+}
/**
- * drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap
- * @mdev: DRBD device.
+ * drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap
+ * @device: DRBD device.
* @idx: bitmap page index
*
* We don't want to special case on logical_block_size of the backend device,
@@ -1128,30 +1237,47 @@ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(l
* In case this becomes an issue on systems with larger PAGE_SIZE,
* we may want to change this again to write 4k aligned 4k pieces.
*/
-int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local)
+int drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold(local)
{
- struct bm_aio_ctx ctx = {
- .mdev = mdev,
+ struct bm_aio_ctx *ctx;
+ int err;
+
+ if (bm_test_page_unchanged(device->bitmap->bm_pages[idx])) {
+ dynamic_drbd_dbg(device, "skipped bm page write for idx %u\n", idx);
+ return 0;
+ }
+
+ ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO);
+ if (!ctx)
+ return -ENOMEM;
+
+ *ctx = (struct bm_aio_ctx) {
+ .device = device,
.in_flight = ATOMIC_INIT(1),
- .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done),
+ .done = 0,
.flags = BM_AIO_COPY_PAGES,
+ .error = 0,
+ .kref = { ATOMIC_INIT(2) },
};
- if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) {
- dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx);
- return 0;
+ if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */
+ drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n");
+ kfree(ctx);
+ return -ENODEV;
}
- bm_page_io_async(&ctx, idx, WRITE_SYNC);
- wait_for_completion(&ctx.done);
+ bm_page_io_async(ctx, idx, WRITE_SYNC);
+ wait_until_done_or_force_detached(device, device->ldev, &ctx->done);
- if (ctx.error)
- drbd_chk_io_error(mdev, 1, true);
- /* that should force detach, so the in memory bitmap will be
+ if (ctx->error)
+ drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
+ /* that causes us to detach, so the in memory bitmap will be
* gone in a moment as well. */
- mdev->bm_writ_cnt++;
- return ctx.error;
+ device->bm_writ_cnt++;
+ err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error;
+ kref_put(&ctx->kref, &bm_aio_ctx_destroy);
+ return err;
}
/* NOTE
@@ -1162,23 +1288,23 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc
*
* this returns a bit number, NOT a sector!
*/
-static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
- const int find_zero_bit, const enum km_type km)
+static unsigned long __bm_find_next(struct drbd_device *device, unsigned long bm_fo,
+ const int find_zero_bit)
{
- struct drbd_bitmap *b = mdev->bitmap;
+ struct drbd_bitmap *b = device->bitmap;
unsigned long *p_addr;
unsigned long bit_offset;
unsigned i;
if (bm_fo > b->bm_bits) {
- dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits);
+ drbd_err(device, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits);
bm_fo = DRBD_END_OF_BITMAP;
} else {
while (bm_fo < b->bm_bits) {
/* bit offset of the first bit in the page */
bit_offset = bm_fo & ~BITS_PER_PAGE_MASK;
- p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km);
+ p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo));
if (find_zero_bit)
i = find_next_zero_bit_le(p_addr,
@@ -1187,7 +1313,7 @@ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
i = find_next_bit_le(p_addr,
PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
- __bm_unmap(p_addr, km);
+ __bm_unmap(p_addr);
if (i < PAGE_SIZE*8) {
bm_fo = bit_offset + i;
if (bm_fo >= b->bm_bits)
@@ -1202,50 +1328,52 @@ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
return bm_fo;
}
-static unsigned long bm_find_next(struct drbd_conf *mdev,
+static unsigned long bm_find_next(struct drbd_device *device,
unsigned long bm_fo, const int find_zero_bit)
{
- struct drbd_bitmap *b = mdev->bitmap;
+ struct drbd_bitmap *b = device->bitmap;
unsigned long i = DRBD_END_OF_BITMAP;
- ERR_IF(!b) return i;
- ERR_IF(!b->bm_pages) return i;
+ if (!expect(b))
+ return i;
+ if (!expect(b->bm_pages))
+ return i;
spin_lock_irq(&b->bm_lock);
if (BM_DONT_TEST & b->bm_flags)
- bm_print_lock_info(mdev);
+ bm_print_lock_info(device);
- i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1);
+ i = __bm_find_next(device, bm_fo, find_zero_bit);
spin_unlock_irq(&b->bm_lock);
return i;
}
-unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
+unsigned long drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo)
{
- return bm_find_next(mdev, bm_fo, 0);
+ return bm_find_next(device, bm_fo, 0);
}
#if 0
/* not yet needed for anything. */
-unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
+unsigned long drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo)
{
- return bm_find_next(mdev, bm_fo, 1);
+ return bm_find_next(device, bm_fo, 1);
}
#endif
/* does not spin_lock_irqsave.
* you must take drbd_bm_lock() first */
-unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
+unsigned long _drbd_bm_find_next(struct drbd_device *device, unsigned long bm_fo)
{
- /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
- return __bm_find_next(mdev, bm_fo, 0, KM_USER1);
+ /* WARN_ON(!(BM_DONT_SET & device->b->bm_flags)); */
+ return __bm_find_next(device, bm_fo, 0);
}
-unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
+unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo)
{
- /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
- return __bm_find_next(mdev, bm_fo, 1, KM_USER1);
+ /* WARN_ON(!(BM_DONT_SET & device->b->bm_flags)); */
+ return __bm_find_next(device, bm_fo, 1);
}
/* returns number of bits actually changed.
@@ -1254,10 +1382,10 @@ unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_f
* wants bitnr, not sector.
* expected to be called for only a few bits (e - s about BITS_PER_LONG).
* Must hold bitmap lock already. */
-static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
+static int __bm_change_bits_to(struct drbd_device *device, const unsigned long s,
unsigned long e, int val)
{
- struct drbd_bitmap *b = mdev->bitmap;
+ struct drbd_bitmap *b = device->bitmap;
unsigned long *p_addr = NULL;
unsigned long bitnr;
unsigned int last_page_nr = -1U;
@@ -1265,7 +1393,7 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
int changed_total = 0;
if (e >= b->bm_bits) {
- dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n",
+ drbd_err(device, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n",
s, e, b->bm_bits);
e = b->bm_bits ? b->bm_bits -1 : 0;
}
@@ -1273,14 +1401,14 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
unsigned int page_nr = bm_bit_to_page_idx(b, bitnr);
if (page_nr != last_page_nr) {
if (p_addr)
- __bm_unmap(p_addr, KM_IRQ1);
+ __bm_unmap(p_addr);
if (c < 0)
bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
else if (c > 0)
bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
changed_total += c;
c = 0;
- p_addr = __bm_map_pidx(b, page_nr, KM_IRQ1);
+ p_addr = __bm_map_pidx(b, page_nr);
last_page_nr = page_nr;
}
if (val)
@@ -1289,7 +1417,7 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr));
}
if (p_addr)
- __bm_unmap(p_addr, KM_IRQ1);
+ __bm_unmap(p_addr);
if (c < 0)
bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
else if (c > 0)
@@ -1303,36 +1431,38 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
* for val != 0, we change 0 -> 1, return code positive
* for val == 0, we change 1 -> 0, return code negative
* wants bitnr, not sector */
-static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
+static int bm_change_bits_to(struct drbd_device *device, const unsigned long s,
const unsigned long e, int val)
{
unsigned long flags;
- struct drbd_bitmap *b = mdev->bitmap;
+ struct drbd_bitmap *b = device->bitmap;
int c = 0;
- ERR_IF(!b) return 1;
- ERR_IF(!b->bm_pages) return 0;
+ if (!expect(b))
+ return 1;
+ if (!expect(b->bm_pages))
+ return 0;
spin_lock_irqsave(&b->bm_lock, flags);
if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags)
- bm_print_lock_info(mdev);
+ bm_print_lock_info(device);
- c = __bm_change_bits_to(mdev, s, e, val);
+ c = __bm_change_bits_to(device, s, e, val);
spin_unlock_irqrestore(&b->bm_lock, flags);
return c;
}
/* returns number of bits changed 0 -> 1 */
-int drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
+int drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
{
- return bm_change_bits_to(mdev, s, e, 1);
+ return bm_change_bits_to(device, s, e, 1);
}
/* returns number of bits changed 1 -> 0 */
-int drbd_bm_clear_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
+int drbd_bm_clear_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
{
- return -bm_change_bits_to(mdev, s, e, 0);
+ return -bm_change_bits_to(device, s, e, 0);
}
/* sets all bits in full words,
@@ -1342,13 +1472,21 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
{
int i;
int bits;
- unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_IRQ1);
+ int changed = 0;
+ unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
for (i = first_word; i < last_word; i++) {
bits = hweight_long(paddr[i]);
paddr[i] = ~0UL;
- b->bm_set += BITS_PER_LONG - bits;
+ changed += BITS_PER_LONG - bits;
+ }
+ kunmap_atomic(paddr);
+ if (changed) {
+ /* We only need lazy writeout, the information is still in the
+ * remote bitmap as well, and is reconstructed during the next
+ * bitmap exchange, if lost locally due to a crash. */
+ bm_set_page_lazy_writeout(b->bm_pages[page_nr]);
+ b->bm_set += changed;
}
- kunmap_atomic(paddr, KM_IRQ1);
}
/* Same thing as drbd_bm_set_bits,
@@ -1356,7 +1494,7 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
* You must first drbd_bm_lock().
* Can be called to set the whole bitmap in one go.
* Sets bits from s to e _inclusive_. */
-void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
+void _drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
{
/* First set_bit from the first bit (s)
* up to the next long boundary (sl),
@@ -1366,7 +1504,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
* Do not use memset, because we must account for changes,
* so we need to loop over the words with hweight() anyways.
*/
- struct drbd_bitmap *b = mdev->bitmap;
+ struct drbd_bitmap *b = device->bitmap;
unsigned long sl = ALIGN(s,BITS_PER_LONG);
unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1);
int first_page;
@@ -1378,7 +1516,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
if (e - s <= 3*BITS_PER_LONG) {
/* don't bother; el and sl may even be wrong. */
spin_lock_irq(&b->bm_lock);
- __bm_change_bits_to(mdev, s, e, 1);
+ __bm_change_bits_to(device, s, e, 1);
spin_unlock_irq(&b->bm_lock);
return;
}
@@ -1389,7 +1527,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
/* bits filling the current long */
if (sl)
- __bm_change_bits_to(mdev, s, sl-1, 1);
+ __bm_change_bits_to(device, s, sl-1, 1);
first_page = sl >> (3 + PAGE_SHIFT);
last_page = el >> (3 + PAGE_SHIFT);
@@ -1401,16 +1539,23 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
/* first and full pages, unless first page == last page */
for (page_nr = first_page; page_nr < last_page; page_nr++) {
- bm_set_full_words_within_one_page(mdev->bitmap, page_nr, first_word, last_word);
+ bm_set_full_words_within_one_page(device->bitmap, page_nr, first_word, last_word);
spin_unlock_irq(&b->bm_lock);
cond_resched();
first_word = 0;
spin_lock_irq(&b->bm_lock);
}
-
/* last page (respectively only page, for first page == last page) */
last_word = MLPP(el >> LN2_BPL);
- bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word);
+
+ /* consider bitmap->bm_bits = 32768, bitmap->bm_number_of_pages = 1. (or multiples).
+ * ==> e = 32767, el = 32768, last_page = 2,
+ * and now last_word = 0.
+ * We do not want to touch last_page in this case,
+ * as we did not allocate it, it is not present in bitmap->bm_pages.
+ */
+ if (last_word)
+ bm_set_full_words_within_one_page(device->bitmap, last_page, first_word, last_word);
/* possibly trailing bits.
* example: (e & 63) == 63, el will be e+1.
@@ -1418,7 +1563,7 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
* it would trigger an assert in __bm_change_bits_to()
*/
if (el <= e)
- __bm_change_bits_to(mdev, el, e, 1);
+ __bm_change_bits_to(device, el, e, 1);
spin_unlock_irq(&b->bm_lock);
}
@@ -1429,19 +1574,21 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
* 0 ... bit not set
* -1 ... first out of bounds access, stop testing for bits!
*/
-int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
+int drbd_bm_test_bit(struct drbd_device *device, const unsigned long bitnr)
{
unsigned long flags;
- struct drbd_bitmap *b = mdev->bitmap;
+ struct drbd_bitmap *b = device->bitmap;
unsigned long *p_addr;
int i;
- ERR_IF(!b) return 0;
- ERR_IF(!b->bm_pages) return 0;
+ if (!expect(b))
+ return 0;
+ if (!expect(b->bm_pages))
+ return 0;
spin_lock_irqsave(&b->bm_lock, flags);
if (BM_DONT_TEST & b->bm_flags)
- bm_print_lock_info(mdev);
+ bm_print_lock_info(device);
if (bitnr < b->bm_bits) {
p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr));
i = test_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr) ? 1 : 0;
@@ -1449,7 +1596,7 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
} else if (bitnr == b->bm_bits) {
i = -1;
} else { /* (bitnr > b->bm_bits) */
- dev_err(DEV, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits);
+ drbd_err(device, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits);
i = 0;
}
@@ -1458,10 +1605,10 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
}
/* returns number of bits set in the range [s, e] */
-int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
+int drbd_bm_count_bits(struct drbd_device *device, const unsigned long s, const unsigned long e)
{
unsigned long flags;
- struct drbd_bitmap *b = mdev->bitmap;
+ struct drbd_bitmap *b = device->bitmap;
unsigned long *p_addr = NULL;
unsigned long bitnr;
unsigned int page_nr = -1U;
@@ -1471,12 +1618,14 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
* robust in case we screwed up elsewhere, in that case pretend there
* was one dirty bit in the requested area, so we won't try to do a
* local read there (no bitmap probably implies no disk) */
- ERR_IF(!b) return 1;
- ERR_IF(!b->bm_pages) return 1;
+ if (!expect(b))
+ return 1;
+ if (!expect(b->bm_pages))
+ return 1;
spin_lock_irqsave(&b->bm_lock, flags);
if (BM_DONT_TEST & b->bm_flags)
- bm_print_lock_info(mdev);
+ bm_print_lock_info(device);
for (bitnr = s; bitnr <= e; bitnr++) {
unsigned int idx = bm_bit_to_page_idx(b, bitnr);
if (page_nr != idx) {
@@ -1485,11 +1634,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
bm_unmap(p_addr);
p_addr = bm_map_pidx(b, idx);
}
- ERR_IF (bitnr >= b->bm_bits) {
- dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
- } else {
+ if (expect(bitnr < b->bm_bits))
c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
- }
+ else
+ drbd_err(device, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
}
if (p_addr)
bm_unmap(p_addr);
@@ -1512,19 +1660,21 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
* reference count of some bitmap extent element from some lru instead...
*
*/
-int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
+int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr)
{
- struct drbd_bitmap *b = mdev->bitmap;
+ struct drbd_bitmap *b = device->bitmap;
int count, s, e;
unsigned long flags;
unsigned long *p_addr, *bm;
- ERR_IF(!b) return 0;
- ERR_IF(!b->bm_pages) return 0;
+ if (!expect(b))
+ return 0;
+ if (!expect(b->bm_pages))
+ return 0;
spin_lock_irqsave(&b->bm_lock, flags);
if (BM_DONT_TEST & b->bm_flags)
- bm_print_lock_info(mdev);
+ bm_print_lock_info(device);
s = S2W(enr);
e = min((size_t)S2W(enr+1), b->bm_words);
@@ -1537,52 +1687,8 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
count += hweight_long(*bm++);
bm_unmap(p_addr);
} else {
- dev_err(DEV, "start offset (%d) too large in drbd_bm_e_weight\n", s);
+ drbd_err(device, "start offset (%d) too large in drbd_bm_e_weight\n", s);
}
spin_unlock_irqrestore(&b->bm_lock, flags);
return count;
}
-
-/* Set all bits covered by the AL-extent al_enr.
- * Returns number of bits changed. */
-unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
-{
- struct drbd_bitmap *b = mdev->bitmap;
- unsigned long *p_addr, *bm;
- unsigned long weight;
- unsigned long s, e;
- int count, i, do_now;
- ERR_IF(!b) return 0;
- ERR_IF(!b->bm_pages) return 0;
-
- spin_lock_irq(&b->bm_lock);
- if (BM_DONT_SET & b->bm_flags)
- bm_print_lock_info(mdev);
- weight = b->bm_set;
-
- s = al_enr * BM_WORDS_PER_AL_EXT;
- e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words);
- /* assert that s and e are on the same page */
- D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3)
- == s >> (PAGE_SHIFT - LN2_BPL + 3));
- count = 0;
- if (s < b->bm_words) {
- i = do_now = e-s;
- p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
- bm = p_addr + MLPP(s);
- while (i--) {
- count += hweight_long(*bm);
- *bm = -1UL;
- bm++;
- }
- bm_unmap(p_addr);
- b->bm_set += do_now*BITS_PER_LONG - count;
- if (e == b->bm_words)
- b->bm_set -= bm_clear_surplus(b);
- } else {
- dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s);
- }
- weight = b->bm_set - weight;
- spin_unlock_irq(&b->bm_lock);
- return weight;
-}