aboutsummaryrefslogtreecommitdiff
path: root/fs/ubifs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ubifs')
-rw-r--r--fs/ubifs/Kconfig41
-rw-r--r--fs/ubifs/Makefile5
-rw-r--r--fs/ubifs/budget.c511
-rw-r--r--fs/ubifs/commit.c121
-rw-r--r--fs/ubifs/compress.c27
-rw-r--r--fs/ubifs/debug.c2041
-rw-r--r--fs/ubifs/debug.h552
-rw-r--r--fs/ubifs/dir.c325
-rw-r--r--fs/ubifs/file.c588
-rw-r--r--fs/ubifs/find.c70
-rw-r--r--fs/ubifs/gc.c489
-rw-r--r--fs/ubifs/io.c526
-rw-r--r--fs/ubifs/ioctl.c11
-rw-r--r--fs/ubifs/journal.c224
-rw-r--r--fs/ubifs/key.h113
-rw-r--r--fs/ubifs/log.c125
-rw-r--r--fs/ubifs/lprops.c304
-rw-r--r--fs/ubifs/lpt.c227
-rw-r--r--fs/ubifs/lpt_commit.c540
-rw-r--r--fs/ubifs/master.c41
-rw-r--r--fs/ubifs/misc.h201
-rw-r--r--fs/ubifs/orphan.c155
-rw-r--r--fs/ubifs/recovery.c719
-rw-r--r--fs/ubifs/replay.c550
-rw-r--r--fs/ubifs/sb.c274
-rw-r--r--fs/ubifs/scan.c87
-rw-r--r--fs/ubifs/shrinker.c35
-rw-r--r--fs/ubifs/super.c1201
-rw-r--r--fs/ubifs/tnc.c692
-rw-r--r--fs/ubifs/tnc_commit.c219
-rw-r--r--fs/ubifs/tnc_misc.c50
-rw-r--r--fs/ubifs/ubifs-media.h79
-rw-r--r--fs/ubifs/ubifs.h526
-rw-r--r--fs/ubifs/xattr.c105
34 files changed, 7360 insertions, 4414 deletions
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
index 91ceeda7e5b..ba66d508006 100644
--- a/fs/ubifs/Kconfig
+++ b/fs/ubifs/Kconfig
@@ -11,18 +11,12 @@ config UBIFS_FS
help
UBIFS is a file system for flash devices which works on top of UBI.
-config UBIFS_FS_XATTR
- bool "Extended attributes support"
- depends on UBIFS_FS
- help
- This option enables support of extended attributes.
-
config UBIFS_FS_ADVANCED_COMPR
bool "Advanced compression options"
depends on UBIFS_FS
help
This option allows to explicitly choose which compressions, if any,
- are enabled in UBIFS. Removing compressors means inbility to read
+ are enabled in UBIFS. Removing compressors means inability to read
existing file systems.
If unsure, say 'N'.
@@ -32,7 +26,7 @@ config UBIFS_FS_LZO
depends on UBIFS_FS
default y
help
- LZO compressor is generally faster then zlib but compresses worse.
+ LZO compressor is generally faster than zlib but compresses worse.
Say 'Y' if unsure.
config UBIFS_FS_ZLIB
@@ -40,33 +34,4 @@ config UBIFS_FS_ZLIB
depends on UBIFS_FS
default y
help
- Zlib copresses better then LZO but it is slower. Say 'Y' if unsure.
-
-# Debugging-related stuff
-config UBIFS_FS_DEBUG
- bool "Enable debugging"
- depends on UBIFS_FS
- select DEBUG_FS
- select KALLSYMS_ALL
- help
- This option enables UBIFS debugging.
-
-config UBIFS_FS_DEBUG_MSG_LVL
- int "Default message level (0 = no extra messages, 3 = lots)"
- depends on UBIFS_FS_DEBUG
- default "0"
- help
- This controls the amount of debugging messages produced by UBIFS.
- If reporting bugs, please try to have available a full dump of the
- messages at level 1 while the misbehaviour was occurring. Level 2
- may become necessary if level 1 messages were not enough to find the
- bug. Generally Level 3 should be avoided.
-
-config UBIFS_FS_DEBUG_CHKS
- bool "Enable extra checks"
- depends on UBIFS_FS_DEBUG
- help
- If extra checks are enabled UBIFS will check the consistency of its
- internal data structures during operation. However, UBIFS performance
- is dramatically slower when this option is selected especially if the
- file system is large.
+ Zlib compresses better than LZO but it is slower. Say 'Y' if unsure.
diff --git a/fs/ubifs/Makefile b/fs/ubifs/Makefile
index 80e93c35e49..2c6f0cb816b 100644
--- a/fs/ubifs/Makefile
+++ b/fs/ubifs/Makefile
@@ -3,7 +3,4 @@ obj-$(CONFIG_UBIFS_FS) += ubifs.o
ubifs-y += shrinker.o journal.o file.o dir.o super.o sb.o io.o
ubifs-y += tnc.o master.o scan.o replay.o log.o commit.o gc.o orphan.o
ubifs-y += budget.o find.o tnc_commit.o compress.o lpt.o lprops.o
-ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o
-
-ubifs-$(CONFIG_UBIFS_FS_DEBUG) += debug.o
-ubifs-$(CONFIG_UBIFS_FS_XATTR) += xattr.o
+ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o xattr.o debug.o
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index d81fb9ed2b8..eb997e9c4ab 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -32,18 +32,15 @@
#include "ubifs.h"
#include <linux/writeback.h>
-#include <asm/div64.h>
+#include <linux/math64.h>
/*
* When pessimistic budget calculations say that there is no enough space,
* UBIFS starts writing back dirty inodes and pages, doing garbage collection,
- * or committing. The below constants define maximum number of times UBIFS
+ * or committing. The below constant defines maximum number of times UBIFS
* repeats the operations.
*/
-#define MAX_SHRINK_RETRIES 8
-#define MAX_GC_RETRIES 4
-#define MAX_CMT_RETRIES 2
-#define MAX_NOSPC_RETRIES 1
+#define MAX_MKSPC_RETRIES 3
/*
* The below constant defines amount of dirty pages which should be written
@@ -52,73 +49,24 @@
#define NR_TO_WRITE 16
/**
- * struct retries_info - information about re-tries while making free space.
- * @prev_liability: previous liability
- * @shrink_cnt: how many times the liability was shrinked
- * @shrink_retries: count of liability shrink re-tries (increased when
- * liability does not shrink)
- * @try_gc: GC should be tried first
- * @gc_retries: how many times GC was run
- * @cmt_retries: how many times commit has been done
- * @nospc_retries: how many times GC returned %-ENOSPC
- *
- * Since we consider budgeting to be the fast-path, and this structure has to
- * be allocated on stack and zeroed out, we make it smaller using bit-fields.
- */
-struct retries_info {
- long long prev_liability;
- unsigned int shrink_cnt;
- unsigned int shrink_retries:5;
- unsigned int try_gc:1;
- unsigned int gc_retries:4;
- unsigned int cmt_retries:3;
- unsigned int nospc_retries:1;
-};
-
-/**
* shrink_liability - write-back some dirty pages/inodes.
* @c: UBIFS file-system description object
* @nr_to_write: how many dirty pages to write-back
*
* This function shrinks UBIFS liability by means of writing back some amount
- * of dirty inodes and their pages. Returns the amount of pages which were
- * written back. The returned value does not include dirty inodes which were
- * synchronized.
+ * of dirty inodes and their pages.
*
* Note, this function synchronizes even VFS inodes which are locked
* (@i_mutex) by the caller of the budgeting function, because write-back does
* not touch @i_mutex.
*/
-static int shrink_liability(struct ubifs_info *c, int nr_to_write)
+static void shrink_liability(struct ubifs_info *c, int nr_to_write)
{
- int nr_written;
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_NONE,
- .range_end = LLONG_MAX,
- .nr_to_write = nr_to_write,
- };
-
- generic_sync_sb_inodes(c->vfs_sb, &wbc);
- nr_written = nr_to_write - wbc.nr_to_write;
-
- if (!nr_written) {
- /*
- * Re-try again but wait on pages/inodes which are being
- * written-back concurrently (e.g., by pdflush).
- */
- memset(&wbc, 0, sizeof(struct writeback_control));
- wbc.sync_mode = WB_SYNC_ALL;
- wbc.range_end = LLONG_MAX;
- wbc.nr_to_write = nr_to_write;
- generic_sync_sb_inodes(c->vfs_sb, &wbc);
- nr_written = nr_to_write - wbc.nr_to_write;
- }
-
- dbg_budg("%d pages were written back", nr_written);
- return nr_written;
+ down_read(&c->vfs_sb->s_umount);
+ writeback_inodes_sb(c->vfs_sb, WB_REASON_FS_FREE_SPACE);
+ up_read(&c->vfs_sb->s_umount);
}
-
/**
* run_gc - run garbage collector.
* @c: UBIFS file-system description object
@@ -147,13 +95,29 @@ static int run_gc(struct ubifs_info *c)
}
/**
+ * get_liability - calculate current liability.
+ * @c: UBIFS file-system description object
+ *
+ * This function calculates and returns current UBIFS liability, i.e. the
+ * amount of bytes UBIFS has "promised" to write to the media.
+ */
+static long long get_liability(struct ubifs_info *c)
+{
+ long long liab;
+
+ spin_lock(&c->space_lock);
+ liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth;
+ spin_unlock(&c->space_lock);
+ return liab;
+}
+
+/**
* make_free_space - make more free space on the file-system.
* @c: UBIFS file-system description object
- * @ri: information about previous invocations of this function
*
* This function is called when an operation cannot be budgeted because there
* is supposedly no free space. But in most cases there is some free space:
- * o budgeting is pessimistic, so it always budgets more then it is actually
+ * o budgeting is pessimistic, so it always budgets more than it is actually
* needed, so shrinking the liability is one way to make free space - the
* cached data will take less space then it was budgeted for;
* o GC may turn some dark space into free space (budgeting treats dark space
@@ -165,129 +129,74 @@ static int run_gc(struct ubifs_info *c)
* Returns %-ENOSPC if it couldn't do more free space, and other negative error
* codes on failures.
*/
-static int make_free_space(struct ubifs_info *c, struct retries_info *ri)
+static int make_free_space(struct ubifs_info *c)
{
- int err;
-
- /*
- * If we have some dirty pages and inodes (liability), try to write
- * them back unless this was tried too many times without effect
- * already.
- */
- if (ri->shrink_retries < MAX_SHRINK_RETRIES && !ri->try_gc) {
- long long liability;
+ int err, retries = 0;
+ long long liab1, liab2;
- spin_lock(&c->space_lock);
- liability = c->budg_idx_growth + c->budg_data_growth +
- c->budg_dd_growth;
- spin_unlock(&c->space_lock);
-
- if (ri->prev_liability >= liability) {
- /* Liability does not shrink, next time try GC then */
- ri->shrink_retries += 1;
- if (ri->gc_retries < MAX_GC_RETRIES)
- ri->try_gc = 1;
- dbg_budg("liability did not shrink: retries %d of %d",
- ri->shrink_retries, MAX_SHRINK_RETRIES);
- }
-
- dbg_budg("force write-back (count %d)", ri->shrink_cnt);
- shrink_liability(c, NR_TO_WRITE + ri->shrink_cnt);
+ do {
+ liab1 = get_liability(c);
+ /*
+ * We probably have some dirty pages or inodes (liability), try
+ * to write them back.
+ */
+ dbg_budg("liability %lld, run write-back", liab1);
+ shrink_liability(c, NR_TO_WRITE);
- ri->prev_liability = liability;
- ri->shrink_cnt += 1;
- return -EAGAIN;
- }
+ liab2 = get_liability(c);
+ if (liab2 < liab1)
+ return -EAGAIN;
- /*
- * Try to run garbage collector unless it was already tried too many
- * times.
- */
- if (ri->gc_retries < MAX_GC_RETRIES) {
- ri->gc_retries += 1;
- dbg_budg("run GC, retries %d of %d",
- ri->gc_retries, MAX_GC_RETRIES);
+ dbg_budg("new liability %lld (not shrunk)", liab2);
- ri->try_gc = 0;
+ /* Liability did not shrink again, try GC */
+ dbg_budg("Run GC");
err = run_gc(c);
if (!err)
return -EAGAIN;
- if (err == -EAGAIN) {
- dbg_budg("GC asked to commit");
- err = ubifs_run_commit(c);
- if (err)
- return err;
- return -EAGAIN;
- }
-
- if (err != -ENOSPC)
+ if (err != -EAGAIN && err != -ENOSPC)
+ /* Some real error happened */
return err;
- /*
- * GC could not make any progress. If this is the first time,
- * then it makes sense to try to commit, because it might make
- * some dirty space.
- */
- dbg_budg("GC returned -ENOSPC, retries %d",
- ri->nospc_retries);
- if (ri->nospc_retries >= MAX_NOSPC_RETRIES)
- return err;
- ri->nospc_retries += 1;
- }
-
- /* Neither GC nor write-back helped, try to commit */
- if (ri->cmt_retries < MAX_CMT_RETRIES) {
- ri->cmt_retries += 1;
- dbg_budg("run commit, retries %d of %d",
- ri->cmt_retries, MAX_CMT_RETRIES);
+ dbg_budg("Run commit (retries %d)", retries);
err = ubifs_run_commit(c);
if (err)
return err;
- return -EAGAIN;
- }
+ } while (retries++ < MAX_MKSPC_RETRIES);
+
return -ENOSPC;
}
/**
- * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index.
+ * ubifs_calc_min_idx_lebs - calculate amount of LEBs for the index.
* @c: UBIFS file-system description object
*
- * This function calculates and returns the number of eraseblocks which should
- * be kept for index usage.
+ * This function calculates and returns the number of LEBs which should be kept
+ * for index usage.
*/
int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
{
- int ret;
- uint64_t idx_size;
-
- idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
-
- /* And make sure we have twice the index size of space reserved */
- idx_size <<= 1;
+ int idx_lebs;
+ long long idx_size;
+ idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx;
+ /* And make sure we have thrice the index size of space reserved */
+ idx_size += idx_size << 1;
/*
* We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes'
* pair, nor similarly the two variables for the new index size, so we
* have to do this costly 64-bit division on fast-path.
*/
- if (do_div(idx_size, c->leb_size - c->max_idx_node_sz))
- ret = idx_size + 1;
- else
- ret = idx_size;
+ idx_lebs = div_u64(idx_size + c->idx_leb_size - 1, c->idx_leb_size);
/*
* The index head is not available for the in-the-gaps method, so add an
* extra LEB to compensate.
*/
- ret += 1;
- /*
- * At present the index needs at least 2 LEBs: one for the index head
- * and one for in-the-gaps method (which currently does not cater for
- * the index head and so excludes it from consideration).
- */
- if (ret < 2)
- ret = 2;
- return ret;
+ idx_lebs += 1;
+ if (idx_lebs < MIN_INDEX_LEBS)
+ idx_lebs = MIN_INDEX_LEBS;
+ return idx_lebs;
}
/**
@@ -302,18 +211,6 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
int subtract_lebs;
long long available;
- /*
- * Force the amount available to the total size reported if the used
- * space is zero.
- */
- if (c->lst.total_used <= UBIFS_INO_NODE_SZ &&
- c->budg_data_growth + c->budg_dd_growth == 0) {
- /* Do the same calculation as for c->block_cnt */
- available = c->main_lebs - 2;
- available *= c->leb_size - c->dark_wm;
- return available;
- }
-
available = c->main_bytes - c->lst.total_used;
/*
@@ -375,8 +272,8 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
*/
static int can_use_rp(struct ubifs_info *c)
{
- if (current->fsuid == c->rp_uid || capable(CAP_SYS_RESOURCE) ||
- (c->rp_gid != 0 && in_group_p(c->rp_gid)))
+ if (uid_eq(current_fsuid(), c->rp_uid) || capable(CAP_SYS_RESOURCE) ||
+ (!gid_eq(c->rp_gid, GLOBAL_ROOT_GID) && in_group_p(c->rp_gid)))
return 1;
return 0;
}
@@ -385,23 +282,23 @@ static int can_use_rp(struct ubifs_info *c)
* do_budget_space - reserve flash space for index and data growth.
* @c: UBIFS file-system description object
*
- * This function makes sure UBIFS has enough free eraseblocks for index growth
- * and data.
+ * This function makes sure UBIFS has enough free LEBs for index growth and
+ * data.
*
- * When budgeting index space, UBIFS reserves twice as more LEBs as the index
+ * When budgeting index space, UBIFS reserves thrice as many LEBs as the index
* would take if it was consolidated and written to the flash. This guarantees
* that the "in-the-gaps" commit method always succeeds and UBIFS will always
* be able to commit dirty index. So this function basically adds amount of
- * budgeted index space to the size of the current index, multiplies this by 2,
- * and makes sure this does not exceed the amount of free eraseblocks.
+ * budgeted index space to the size of the current index, multiplies this by 3,
+ * and makes sure this does not exceed the amount of free LEBs.
*
- * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables:
+ * Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables:
* o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might
* be large, because UBIFS does not do any index consolidation as long as
* there is free space. IOW, the index may take a lot of LEBs, but the LEBs
* will contain a lot of dirt.
- * o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be
- * consolidated to take up to @c->min_idx_lebs LEBs.
+ * o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW,
+ * the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs.
*
* This function returns zero in case of success, and %-ENOSPC in case of
* failure.
@@ -426,31 +323,32 @@ static int do_budget_space(struct ubifs_info *c)
* @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt -
* @c->lst.taken_empty_lebs
*
- * @empty_lebs are available because they are empty. @freeable_cnt are
- * available because they contain only free and dirty space and the
- * index allocation always occurs after wbufs are synch'ed.
- * @idx_gc_cnt are available because they are index LEBs that have been
- * garbage collected (including trivial GC) and are awaiting the commit
- * before they can be unmapped - note that the in-the-gaps method will
- * grab these if it needs them. @taken_empty_lebs are empty_lebs that
- * have already been allocated for some purpose (also includes those
- * LEBs on the @idx_gc list).
+ * @c->lst.empty_lebs are available because they are empty.
+ * @c->freeable_cnt are available because they contain only free and
+ * dirty space, @c->idx_gc_cnt are available because they are index
+ * LEBs that have been garbage collected and are awaiting the commit
+ * before they can be used. And the in-the-gaps method will grab these
+ * if it needs them. @c->lst.taken_empty_lebs are empty LEBs that have
+ * already been allocated for some purpose.
+ *
+ * Note, @c->idx_gc_cnt is included to both @c->lst.empty_lebs (because
+ * these LEBs are empty) and to @c->lst.taken_empty_lebs (because they
+ * are taken until after the commit).
*
- * Note, @taken_empty_lebs may temporarily be higher by one because of
- * the way we serialize LEB allocations and budgeting. See a comment in
- * 'ubifs_find_free_space()'.
+ * Note, @c->lst.taken_empty_lebs may temporarily be higher by one
+ * because of the way we serialize LEB allocations and budgeting. See a
+ * comment in 'ubifs_find_free_space()'.
*/
lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
c->lst.taken_empty_lebs;
if (unlikely(rsvd_idx_lebs > lebs)) {
- dbg_budg("out of indexing space: min_idx_lebs %d (old %d), "
- "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs,
- rsvd_idx_lebs);
+ dbg_budg("out of indexing space: min_idx_lebs %d (old %d), rsvd_idx_lebs %d",
+ min_idx_lebs, c->bi.min_idx_lebs, rsvd_idx_lebs);
return -ENOSPC;
}
available = ubifs_calc_available(c, min_idx_lebs);
- outstanding = c->budg_data_growth + c->budg_dd_growth;
+ outstanding = c->bi.data_growth + c->bi.dd_growth;
if (unlikely(available < outstanding)) {
dbg_budg("out of data space: available %lld, outstanding %lld",
@@ -461,7 +359,7 @@ static int do_budget_space(struct ubifs_info *c)
if (available - outstanding <= c->rp_size && !can_use_rp(c))
return -ENOSPC;
- c->min_idx_lebs = min_idx_lebs;
+ c->bi.min_idx_lebs = min_idx_lebs;
return 0;
}
@@ -494,11 +392,11 @@ static int calc_data_growth(const struct ubifs_info *c,
{
int data_growth;
- data_growth = req->new_ino ? c->inode_budget : 0;
+ data_growth = req->new_ino ? c->bi.inode_budget : 0;
if (req->new_page)
- data_growth += c->page_budget;
+ data_growth += c->bi.page_budget;
if (req->new_dent)
- data_growth += c->dent_budget;
+ data_growth += c->bi.dent_budget;
data_growth += req->new_ino_d;
return data_growth;
}
@@ -514,12 +412,12 @@ static int calc_dd_growth(const struct ubifs_info *c,
{
int dd_growth;
- dd_growth = req->dirtied_page ? c->page_budget : 0;
+ dd_growth = req->dirtied_page ? c->bi.page_budget : 0;
if (req->dirtied_ino)
- dd_growth += c->inode_budget << (req->dirtied_ino - 1);
+ dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1);
if (req->mod_dent)
- dd_growth += c->dent_budget;
+ dd_growth += c->bi.dent_budget;
dd_growth += req->dirtied_ino_d;
return dd_growth;
}
@@ -539,35 +437,40 @@ static int calc_dd_growth(const struct ubifs_info *c,
*/
int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
{
- int uninitialized_var(cmt_retries), uninitialized_var(wb_retries);
- int err, idx_growth, data_growth, dd_growth;
- struct retries_info ri;
-
+ int err, idx_growth, data_growth, dd_growth, retried = 0;
+
+ ubifs_assert(req->new_page <= 1);
+ ubifs_assert(req->dirtied_page <= 1);
+ ubifs_assert(req->new_dent <= 1);
+ ubifs_assert(req->mod_dent <= 1);
+ ubifs_assert(req->new_ino <= 1);
+ ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA);
ubifs_assert(req->dirtied_ino <= 4);
ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
+ ubifs_assert(!(req->new_ino_d & 7));
+ ubifs_assert(!(req->dirtied_ino_d & 7));
data_growth = calc_data_growth(c, req);
dd_growth = calc_dd_growth(c, req);
if (!data_growth && !dd_growth)
return 0;
idx_growth = calc_idx_growth(c, req);
- memset(&ri, 0, sizeof(struct retries_info));
again:
spin_lock(&c->space_lock);
- ubifs_assert(c->budg_idx_growth >= 0);
- ubifs_assert(c->budg_data_growth >= 0);
- ubifs_assert(c->budg_dd_growth >= 0);
+ ubifs_assert(c->bi.idx_growth >= 0);
+ ubifs_assert(c->bi.data_growth >= 0);
+ ubifs_assert(c->bi.dd_growth >= 0);
- if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) {
+ if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) {
dbg_budg("no space");
spin_unlock(&c->space_lock);
return -ENOSPC;
}
- c->budg_idx_growth += idx_growth;
- c->budg_data_growth += data_growth;
- c->budg_dd_growth += dd_growth;
+ c->bi.idx_growth += idx_growth;
+ c->bi.data_growth += data_growth;
+ c->bi.dd_growth += dd_growth;
err = do_budget_space(c);
if (likely(!err)) {
@@ -579,9 +482,9 @@ again:
}
/* Restore the old values */
- c->budg_idx_growth -= idx_growth;
- c->budg_data_growth -= data_growth;
- c->budg_dd_growth -= dd_growth;
+ c->bi.idx_growth -= idx_growth;
+ c->bi.data_growth -= data_growth;
+ c->bi.dd_growth -= dd_growth;
spin_unlock(&c->space_lock);
if (req->fast) {
@@ -589,16 +492,21 @@ again:
return err;
}
- err = make_free_space(c, &ri);
+ err = make_free_space(c);
+ cond_resched();
if (err == -EAGAIN) {
dbg_budg("try again");
- cond_resched();
goto again;
} else if (err == -ENOSPC) {
+ if (!retried) {
+ retried = 1;
+ dbg_budg("-ENOSPC, but anyway try once again");
+ goto again;
+ }
dbg_budg("FS is full, -ENOSPC");
- c->nospace = 1;
+ c->bi.nospace = 1;
if (can_use_rp(c) || c->rp_size == 0)
- c->nospace_rp = 1;
+ c->bi.nospace_rp = 1;
smp_wmb();
} else
ubifs_err("cannot budget space, error %d", err);
@@ -613,13 +521,21 @@ again:
* This function releases the space budgeted by 'ubifs_budget_space()'. Note,
* since the index changes (which were budgeted for in @req->idx_growth) will
* only be written to the media on commit, this function moves the index budget
- * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be
- * zeroed by the commit operation.
+ * from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed
+ * by the commit operation.
*/
void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
{
+ ubifs_assert(req->new_page <= 1);
+ ubifs_assert(req->dirtied_page <= 1);
+ ubifs_assert(req->new_dent <= 1);
+ ubifs_assert(req->mod_dent <= 1);
+ ubifs_assert(req->new_ino <= 1);
+ ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA);
ubifs_assert(req->dirtied_ino <= 4);
ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
+ ubifs_assert(!(req->new_ino_d & 7));
+ ubifs_assert(!(req->dirtied_ino_d & 7));
if (!req->recalculate) {
ubifs_assert(req->idx_growth >= 0);
ubifs_assert(req->data_growth >= 0);
@@ -635,19 +551,23 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
if (!req->data_growth && !req->dd_growth)
return;
- c->nospace = c->nospace_rp = 0;
+ c->bi.nospace = c->bi.nospace_rp = 0;
smp_wmb();
spin_lock(&c->space_lock);
- c->budg_idx_growth -= req->idx_growth;
- c->budg_uncommitted_idx += req->idx_growth;
- c->budg_data_growth -= req->data_growth;
- c->budg_dd_growth -= req->dd_growth;
- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
-
- ubifs_assert(c->budg_idx_growth >= 0);
- ubifs_assert(c->budg_data_growth >= 0);
- ubifs_assert(c->min_idx_lebs < c->main_lebs);
+ c->bi.idx_growth -= req->idx_growth;
+ c->bi.uncommitted_idx += req->idx_growth;
+ c->bi.data_growth -= req->data_growth;
+ c->bi.dd_growth -= req->dd_growth;
+ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+
+ ubifs_assert(c->bi.idx_growth >= 0);
+ ubifs_assert(c->bi.data_growth >= 0);
+ ubifs_assert(c->bi.dd_growth >= 0);
+ ubifs_assert(c->bi.min_idx_lebs < c->main_lebs);
+ ubifs_assert(!(c->bi.idx_growth & 7));
+ ubifs_assert(!(c->bi.data_growth & 7));
+ ubifs_assert(!(c->bi.dd_growth & 7));
spin_unlock(&c->space_lock);
}
@@ -656,7 +576,7 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
* @c: UBIFS file-system description object
*
* This function converts budget which was allocated for a new page of data to
- * the budget of changing an existing page of data. The latter is smaller then
+ * the budget of changing an existing page of data. The latter is smaller than
* the former, so this function only does simple re-calculation and does not
* involve any write-back.
*/
@@ -664,13 +584,13 @@ void ubifs_convert_page_budget(struct ubifs_info *c)
{
spin_lock(&c->space_lock);
/* Release the index growth reservation */
- c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
+ c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
/* Release the data growth reservation */
- c->budg_data_growth -= c->page_budget;
+ c->bi.data_growth -= c->bi.page_budget;
/* Increase the dirty data growth reservation instead */
- c->budg_dd_growth += c->page_budget;
+ c->bi.dd_growth += c->bi.page_budget;
/* And re-calculate the indexing space reservation */
- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
spin_unlock(&c->space_lock);
}
@@ -681,47 +601,108 @@ void ubifs_convert_page_budget(struct ubifs_info *c)
*
* This function releases budget corresponding to a dirty inode. It is usually
* called when after the inode has been written to the media and marked as
- * clean.
+ * clean. It also causes the "no space" flags to be cleared.
*/
void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
struct ubifs_inode *ui)
{
- struct ubifs_budget_req req = {.dd_growth = c->inode_budget,
- .dirtied_ino_d = ui->data_len};
+ struct ubifs_budget_req req;
+ memset(&req, 0, sizeof(struct ubifs_budget_req));
+ /* The "no space" flags will be cleared because dd_growth is > 0 */
+ req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8);
ubifs_release_budget(c, &req);
}
/**
- * ubifs_budg_get_free_space - return amount of free space.
+ * ubifs_reported_space - calculate reported free space.
+ * @c: the UBIFS file-system description object
+ * @free: amount of free space
+ *
+ * This function calculates amount of free space which will be reported to
+ * user-space. User-space application tend to expect that if the file-system
+ * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
+ * are able to write a file of size N. UBIFS attaches node headers to each data
+ * node and it has to write indexing nodes as well. This introduces additional
+ * overhead, and UBIFS has to report slightly less free space to meet the above
+ * expectations.
+ *
+ * This function assumes free space is made up of uncompressed data nodes and
+ * full index nodes (one per data node, tripled because we always allow enough
+ * space to write the index thrice).
+ *
+ * Note, the calculation is pessimistic, which means that most of the time
+ * UBIFS reports less space than it actually has.
+ */
+long long ubifs_reported_space(const struct ubifs_info *c, long long free)
+{
+ int divisor, factor, f;
+
+ /*
+ * Reported space size is @free * X, where X is UBIFS block size
+ * divided by UBIFS block size + all overhead one data block
+ * introduces. The overhead is the node header + indexing overhead.
+ *
+ * Indexing overhead calculations are based on the following formula:
+ * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number
+ * of data nodes, f - fanout. Because effective UBIFS fanout is twice
+ * as less than maximum fanout, we assume that each data node
+ * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes.
+ * Note, the multiplier 3 is because UBIFS reserves thrice as more space
+ * for the index.
+ */
+ f = c->fanout > 3 ? c->fanout >> 1 : 2;
+ factor = UBIFS_BLOCK_SIZE;
+ divisor = UBIFS_MAX_DATA_NODE_SZ;
+ divisor += (c->max_idx_node_sz * 3) / (f - 1);
+ free *= factor;
+ return div_u64(free, divisor);
+}
+
+/**
+ * ubifs_get_free_space_nolock - return amount of free space.
* @c: UBIFS file-system description object
*
- * This function returns amount of free space on the file-system.
+ * This function calculates amount of free space to report to user-space.
+ *
+ * Because UBIFS may introduce substantial overhead (the index, node headers,
+ * alignment, wastage at the end of LEBs, etc), it cannot report real amount of
+ * free flash space it has (well, because not all dirty space is reclaimable,
+ * UBIFS does not actually know the real amount). If UBIFS did so, it would
+ * bread user expectations about what free space is. Users seem to accustomed
+ * to assume that if the file-system reports N bytes of free space, they would
+ * be able to fit a file of N bytes to the FS. This almost works for
+ * traditional file-systems, because they have way less overhead than UBIFS.
+ * So, to keep users happy, UBIFS tries to take the overhead into account.
*/
-long long ubifs_budg_get_free_space(struct ubifs_info *c)
+long long ubifs_get_free_space_nolock(struct ubifs_info *c)
{
- int min_idx_lebs, rsvd_idx_lebs;
+ int rsvd_idx_lebs, lebs;
long long available, outstanding, free;
- /* Do exactly the same calculations as in 'do_budget_space()' */
- spin_lock(&c->space_lock);
- min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+ ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
+ outstanding = c->bi.data_growth + c->bi.dd_growth;
+ available = ubifs_calc_available(c, c->bi.min_idx_lebs);
- if (min_idx_lebs > c->lst.idx_lebs)
- rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
+ /*
+ * When reporting free space to user-space, UBIFS guarantees that it is
+ * possible to write a file of free space size. This means that for
+ * empty LEBs we may use more precise calculations than
+ * 'ubifs_calc_available()' is using. Namely, we know that in empty
+ * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm.
+ * Thus, amend the available space.
+ *
+ * Note, the calculations below are similar to what we have in
+ * 'do_budget_space()', so refer there for comments.
+ */
+ if (c->bi.min_idx_lebs > c->lst.idx_lebs)
+ rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
else
rsvd_idx_lebs = 0;
-
- if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt
- - c->lst.taken_empty_lebs) {
- spin_unlock(&c->space_lock);
- return 0;
- }
-
- available = ubifs_calc_available(c, min_idx_lebs);
- outstanding = c->budg_data_growth + c->budg_dd_growth;
- c->min_idx_lebs = min_idx_lebs;
- spin_unlock(&c->space_lock);
+ lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
+ c->lst.taken_empty_lebs;
+ lebs -= rsvd_idx_lebs;
+ available += lebs * (c->dark_wm - c->leb_overhead);
if (available > outstanding)
free = ubifs_reported_space(c, available - outstanding);
@@ -729,3 +710,21 @@ long long ubifs_budg_get_free_space(struct ubifs_info *c)
free = 0;
return free;
}
+
+/**
+ * ubifs_get_free_space - return amount of free space.
+ * @c: UBIFS file-system description object
+ *
+ * This function calculates and returns amount of free space to report to
+ * user-space.
+ */
+long long ubifs_get_free_space(struct ubifs_info *c)
+{
+ long long free;
+
+ spin_lock(&c->space_lock);
+ free = ubifs_get_free_space_nolock(c);
+ spin_unlock(&c->space_lock);
+
+ return free;
+}
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 3b516316c9b..ff8229340cd 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -45,8 +45,59 @@
#include <linux/freezer.h>
#include <linux/kthread.h>
+#include <linux/slab.h>
#include "ubifs.h"
+/*
+ * nothing_to_commit - check if there is nothing to commit.
+ * @c: UBIFS file-system description object
+ *
+ * This is a helper function which checks if there is anything to commit. It is
+ * used as an optimization to avoid starting the commit if it is not really
+ * necessary. Indeed, the commit operation always assumes flash I/O (e.g.,
+ * writing the commit start node to the log), and it is better to avoid doing
+ * this unnecessarily. E.g., 'ubifs_sync_fs()' runs the commit, but if there is
+ * nothing to commit, it is more optimal to avoid any flash I/O.
+ *
+ * This function has to be called with @c->commit_sem locked for writing -
+ * this function does not take LPT/TNC locks because the @c->commit_sem
+ * guarantees that we have exclusive access to the TNC and LPT data structures.
+ *
+ * This function returns %1 if there is nothing to commit and %0 otherwise.
+ */
+static int nothing_to_commit(struct ubifs_info *c)
+{
+ /*
+ * During mounting or remounting from R/O mode to R/W mode we may
+ * commit for various recovery-related reasons.
+ */
+ if (c->mounting || c->remounting_rw)
+ return 0;
+
+ /*
+ * If the root TNC node is dirty, we definitely have something to
+ * commit.
+ */
+ if (c->zroot.znode && ubifs_zn_dirty(c->zroot.znode))
+ return 0;
+
+ /*
+ * Even though the TNC is clean, the LPT tree may have dirty nodes. For
+ * example, this may happen if the budgeting subsystem invoked GC to
+ * make some free space, and the GC found an LEB with only dirty and
+ * free space. In this case GC would just change the lprops of this
+ * LEB (by turning all space into free space) and unmap it.
+ */
+ if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags))
+ return 0;
+
+ ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0);
+ ubifs_assert(c->dirty_pn_cnt == 0);
+ ubifs_assert(c->dirty_nn_cnt == 0);
+
+ return 1;
+}
+
/**
* do_commit - commit the journal.
* @c: UBIFS file-system description object
@@ -62,11 +113,19 @@ static int do_commit(struct ubifs_info *c)
struct ubifs_lp_stats lst;
dbg_cmt("start");
- if (c->ro_media) {
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+
+ if (c->ro_error) {
err = -EROFS;
goto out_up;
}
+ if (nothing_to_commit(c)) {
+ up_write(&c->commit_sem);
+ err = 0;
+ goto out_cancel;
+ }
+
/* Sync all write buffers (necessary for recovery) */
for (i = 0; i < c->jhead_cnt; i++) {
err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
@@ -74,6 +133,7 @@ static int do_commit(struct ubifs_info *c)
goto out_up;
}
+ c->cmt_no += 1;
err = ubifs_gc_start_commit(c);
if (err)
goto out_up;
@@ -115,14 +175,14 @@ static int do_commit(struct ubifs_info *c)
goto out;
mutex_lock(&c->mst_mutex);
- c->mst_node->cmt_no = cpu_to_le64(++c->cmt_no);
+ c->mst_node->cmt_no = cpu_to_le64(c->cmt_no);
c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum);
c->mst_node->root_lnum = cpu_to_le32(zroot.lnum);
c->mst_node->root_offs = cpu_to_le32(zroot.offs);
c->mst_node->root_len = cpu_to_le32(zroot.len);
c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum);
c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs);
- c->mst_node->index_size = cpu_to_le64(c->old_idx_sz);
+ c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz);
c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum);
c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs);
c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum);
@@ -158,12 +218,12 @@ static int do_commit(struct ubifs_info *c)
if (err)
goto out;
+out_cancel:
spin_lock(&c->cs_lock);
c->cmt_state = COMMIT_RESTING;
wake_up(&c->cmt_wq);
dbg_cmt("commit end");
spin_unlock(&c->cs_lock);
-
return 0;
out_up:
@@ -268,7 +328,7 @@ int ubifs_bg_thread(void *info)
cond_resched();
}
- dbg_msg("background thread \"%s\" stops", c->bgt_name);
+ ubifs_msg("background thread \"%s\" stops", c->bgt_name);
return 0;
}
@@ -358,7 +418,7 @@ int ubifs_run_commit(struct ubifs_info *c)
spin_lock(&c->cs_lock);
if (c->cmt_state == COMMIT_BROKEN) {
- err = -EINVAL;
+ err = -EROFS;
goto out;
}
@@ -384,7 +444,7 @@ int ubifs_run_commit(struct ubifs_info *c)
* re-check it.
*/
if (c->cmt_state == COMMIT_BROKEN) {
- err = -EINVAL;
+ err = -EROFS;
goto out_cmt_unlock;
}
@@ -436,7 +496,9 @@ int ubifs_gc_should_commit(struct ubifs_info *c)
return ret;
}
-#ifdef CONFIG_UBIFS_FS_DEBUG
+/*
+ * Everything below is related to debugging.
+ */
/**
* struct idx_node - hold index nodes during index tree traversal.
@@ -452,7 +514,7 @@ struct idx_node {
struct list_head list;
int iip;
union ubifs_key upper_key;
- struct ubifs_idx_node idx __attribute__((aligned(8)));
+ struct ubifs_idx_node idx __aligned(8);
};
/**
@@ -469,12 +531,12 @@ int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot)
{
struct ubifs_idx_node *idx;
int lnum, offs, len, err = 0;
+ struct ubifs_debug_info *d = c->dbg;
- c->old_zroot = *zroot;
-
- lnum = c->old_zroot.lnum;
- offs = c->old_zroot.offs;
- len = c->old_zroot.len;
+ d->old_zroot = *zroot;
+ lnum = d->old_zroot.lnum;
+ offs = d->old_zroot.offs;
+ len = d->old_zroot.len;
idx = kmalloc(c->max_idx_node_sz, GFP_NOFS);
if (!idx)
@@ -484,8 +546,8 @@ int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot)
if (err)
goto out;
- c->old_zroot_level = le16_to_cpu(idx->level);
- c->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum);
+ d->old_zroot_level = le16_to_cpu(idx->level);
+ d->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum);
out:
kfree(idx);
return err;
@@ -508,15 +570,16 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
{
int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt;
int first = 1, iip;
- union ubifs_key lower_key, upper_key, l_key, u_key;
+ struct ubifs_debug_info *d = c->dbg;
+ union ubifs_key uninitialized_var(lower_key), upper_key, l_key, u_key;
unsigned long long uninitialized_var(last_sqnum);
struct ubifs_idx_node *idx;
struct list_head list;
struct idx_node *i;
size_t sz;
- if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX))
- goto out;
+ if (!dbg_is_chk_index(c))
+ return 0;
INIT_LIST_HEAD(&list);
@@ -524,9 +587,9 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
UBIFS_IDX_NODE_SZ;
/* Start at the old zroot */
- lnum = c->old_zroot.lnum;
- offs = c->old_zroot.offs;
- len = c->old_zroot.len;
+ lnum = d->old_zroot.lnum;
+ offs = d->old_zroot.offs;
+ len = d->old_zroot.len;
iip = 0;
/*
@@ -559,11 +622,11 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
if (first) {
first = 0;
/* Check root level and sqnum */
- if (le16_to_cpu(idx->level) != c->old_zroot_level) {
+ if (le16_to_cpu(idx->level) != d->old_zroot_level) {
err = 2;
goto out_dump;
}
- if (le64_to_cpu(idx->ch.sqnum) != c->old_zroot_sqnum) {
+ if (le64_to_cpu(idx->ch.sqnum) != d->old_zroot_sqnum) {
err = 3;
goto out_dump;
}
@@ -653,14 +716,14 @@ out:
return 0;
out_dump:
- dbg_err("dumping index node (iip=%d)", i->iip);
- dbg_dump_node(c, idx);
+ ubifs_err("dumping index node (iip=%d)", i->iip);
+ ubifs_dump_node(c, idx);
list_del(&i->list);
kfree(i);
if (!list_empty(&list)) {
i = list_entry(list.prev, struct idx_node, list);
- dbg_err("dumping parent index node");
- dbg_dump_node(c, &i->idx);
+ ubifs_err("dumping parent index node");
+ ubifs_dump_node(c, &i->idx);
}
out_free:
while (!list_empty(&list)) {
@@ -673,5 +736,3 @@ out_free:
err = -EINVAL;
return err;
}
-
-#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index 5bb51dac3c1..2bfa0953335 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -33,7 +33,7 @@
/* Fake description object for the "none" compressor */
static struct ubifs_compressor none_compr = {
.compr_type = UBIFS_COMPR_NONE,
- .name = "no compression",
+ .name = "none",
.capi_name = "",
};
@@ -43,13 +43,13 @@ static DEFINE_MUTEX(lzo_mutex);
static struct ubifs_compressor lzo_compr = {
.compr_type = UBIFS_COMPR_LZO,
.comp_mutex = &lzo_mutex,
- .name = "LZO",
+ .name = "lzo",
.capi_name = "lzo",
};
#else
static struct ubifs_compressor lzo_compr = {
.compr_type = UBIFS_COMPR_LZO,
- .name = "LZO",
+ .name = "lzo",
};
#endif
@@ -91,8 +91,6 @@ struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
*
* Note, if the input buffer was not compressed, it is copied to the output
* buffer and %UBIFS_COMPR_NONE is returned in @compr_type.
- *
- * This functions returns %0 on success or a negative error code on failure.
*/
void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
int *compr_type)
@@ -110,21 +108,20 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
if (compr->comp_mutex)
mutex_lock(compr->comp_mutex);
err = crypto_comp_compress(compr->cc, in_buf, in_len, out_buf,
- out_len);
+ (unsigned int *)out_len);
if (compr->comp_mutex)
mutex_unlock(compr->comp_mutex);
if (unlikely(err)) {
- ubifs_warn("cannot compress %d bytes, compressor %s, "
- "error %d, leave data uncompressed",
+ ubifs_warn("cannot compress %d bytes, compressor %s, error %d, leave data uncompressed",
in_len, compr->name, err);
goto no_compr;
}
/*
- * Presently, we just require that compression results in less data,
- * rather than any defined minimum compression ratio or amount.
+ * If the data compressed only slightly, it is better to leave it
+ * uncompressed to improve read speed.
*/
- if (ALIGN(*out_len, 8) >= ALIGN(in_len, 8))
+ if (in_len - *out_len < UBIFS_MIN_COMPRESS_DIFF)
goto no_compr;
return;
@@ -174,12 +171,12 @@ int ubifs_decompress(const void *in_buf, int in_len, void *out_buf,
if (compr->decomp_mutex)
mutex_lock(compr->decomp_mutex);
err = crypto_comp_decompress(compr->cc, in_buf, in_len, out_buf,
- out_len);
+ (unsigned int *)out_len);
if (compr->decomp_mutex)
mutex_unlock(compr->decomp_mutex);
if (err)
- ubifs_err("cannot decompress %d bytes, compressor %s, "
- "error %d", in_len, compr->name, err);
+ ubifs_err("cannot decompress %d bytes, compressor %s, error %d",
+ in_len, compr->name, err);
return err;
}
@@ -246,7 +243,7 @@ out_lzo:
/**
* ubifs_compressors_exit - de-initialize UBIFS compressors.
*/
-void __exit ubifs_compressors_exit(void)
+void ubifs_compressors_exit(void)
{
compr_exit(&lzo_compr);
compr_exit(&zlib_compr);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 4e3aaeba4ec..177b0152fef 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -27,30 +27,14 @@
* various local functions of those subsystems.
*/
-#define UBIFS_DBG_PRESERVE_UBI
-
-#include "ubifs.h"
#include <linux/module.h>
-#include <linux/moduleparam.h>
-
-#ifdef CONFIG_UBIFS_FS_DEBUG
-
-DEFINE_SPINLOCK(dbg_lock);
-
-static char dbg_key_buf0[128];
-static char dbg_key_buf1[128];
-
-unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT;
-unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT;
-unsigned int ubifs_tst_flags;
-
-module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR);
-module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR);
-module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR);
+#include <linux/debugfs.h>
+#include <linux/math64.h>
+#include <linux/uaccess.h>
+#include <linux/random.h>
+#include "ubifs.h"
-MODULE_PARM_DESC(debug_msgs, "Debug message type flags");
-MODULE_PARM_DESC(debug_chks, "Debug check flags");
-MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
+static DEFINE_SPINLOCK(dbg_lock);
static const char *get_key_fmt(int fmt)
{
@@ -92,8 +76,30 @@ static const char *get_key_type(int type)
}
}
-static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key,
- char *buffer)
+static const char *get_dent_type(int type)
+{
+ switch (type) {
+ case UBIFS_ITYPE_REG:
+ return "file";
+ case UBIFS_ITYPE_DIR:
+ return "dir";
+ case UBIFS_ITYPE_LNK:
+ return "symlink";
+ case UBIFS_ITYPE_BLK:
+ return "blkdev";
+ case UBIFS_ITYPE_CHR:
+ return "char dev";
+ case UBIFS_ITYPE_FIFO:
+ return "fifo";
+ case UBIFS_ITYPE_SOCK:
+ return "socket";
+ default:
+ return "unknown/invalid type";
+ }
+}
+
+const char *dbg_snprintf_key(const struct ubifs_info *c,
+ const union ubifs_key *key, char *buffer, int len)
{
char *p = buffer;
int type = key_type(c, key);
@@ -101,42 +107,34 @@ static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key,
if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) {
switch (type) {
case UBIFS_INO_KEY:
- sprintf(p, "(%lu, %s)", key_inum(c, key),
- get_key_type(type));
+ len -= snprintf(p, len, "(%lu, %s)",
+ (unsigned long)key_inum(c, key),
+ get_key_type(type));
break;
case UBIFS_DENT_KEY:
case UBIFS_XENT_KEY:
- sprintf(p, "(%lu, %s, %#08x)", key_inum(c, key),
- get_key_type(type), key_hash(c, key));
+ len -= snprintf(p, len, "(%lu, %s, %#08x)",
+ (unsigned long)key_inum(c, key),
+ get_key_type(type), key_hash(c, key));
break;
case UBIFS_DATA_KEY:
- sprintf(p, "(%lu, %s, %u)", key_inum(c, key),
- get_key_type(type), key_block(c, key));
+ len -= snprintf(p, len, "(%lu, %s, %u)",
+ (unsigned long)key_inum(c, key),
+ get_key_type(type), key_block(c, key));
break;
case UBIFS_TRUN_KEY:
- sprintf(p, "(%lu, %s)",
- key_inum(c, key), get_key_type(type));
+ len -= snprintf(p, len, "(%lu, %s)",
+ (unsigned long)key_inum(c, key),
+ get_key_type(type));
break;
default:
- sprintf(p, "(bad key type: %#08x, %#08x)",
- key->u32[0], key->u32[1]);
+ len -= snprintf(p, len, "(bad key type: %#08x, %#08x)",
+ key->u32[0], key->u32[1]);
}
} else
- sprintf(p, "bad key format %d", c->key_fmt);
-}
-
-const char *dbg_key_str0(const struct ubifs_info *c, const union ubifs_key *key)
-{
- /* dbg_lock must be held */
- sprintf_key(c, key, dbg_key_buf0);
- return dbg_key_buf0;
-}
-
-const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key)
-{
- /* dbg_lock must be held */
- sprintf_key(c, key, dbg_key_buf1);
- return dbg_key_buf1;
+ len -= snprintf(p, len, "bad key format %d", c->key_fmt);
+ ubifs_assert(len > 0);
+ return p;
}
const char *dbg_ntype(int type)
@@ -205,62 +203,112 @@ const char *dbg_cstate(int cmt_state)
}
}
+const char *dbg_jhead(int jhead)
+{
+ switch (jhead) {
+ case GCHD:
+ return "0 (GC)";
+ case BASEHD:
+ return "1 (base)";
+ case DATAHD:
+ return "2 (data)";
+ default:
+ return "unknown journal head";
+ }
+}
+
static void dump_ch(const struct ubifs_ch *ch)
{
- printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic));
- printk(KERN_DEBUG "\tcrc %#x\n", le32_to_cpu(ch->crc));
- printk(KERN_DEBUG "\tnode_type %d (%s)\n", ch->node_type,
+ pr_err("\tmagic %#x\n", le32_to_cpu(ch->magic));
+ pr_err("\tcrc %#x\n", le32_to_cpu(ch->crc));
+ pr_err("\tnode_type %d (%s)\n", ch->node_type,
dbg_ntype(ch->node_type));
- printk(KERN_DEBUG "\tgroup_type %d (%s)\n", ch->group_type,
+ pr_err("\tgroup_type %d (%s)\n", ch->group_type,
dbg_gtype(ch->group_type));
- printk(KERN_DEBUG "\tsqnum %llu\n",
+ pr_err("\tsqnum %llu\n",
(unsigned long long)le64_to_cpu(ch->sqnum));
- printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len));
+ pr_err("\tlen %u\n", le32_to_cpu(ch->len));
}
-void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode)
+void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode)
{
const struct ubifs_inode *ui = ubifs_inode(inode);
+ struct qstr nm = { .name = NULL };
+ union ubifs_key key;
+ struct ubifs_dent_node *dent, *pdent = NULL;
+ int count = 2;
- printk(KERN_DEBUG "inode %lu\n", inode->i_ino);
- printk(KERN_DEBUG "size %llu\n",
+ pr_err("Dump in-memory inode:");
+ pr_err("\tinode %lu\n", inode->i_ino);
+ pr_err("\tsize %llu\n",
(unsigned long long)i_size_read(inode));
- printk(KERN_DEBUG "nlink %u\n", inode->i_nlink);
- printk(KERN_DEBUG "uid %u\n", (unsigned int)inode->i_uid);
- printk(KERN_DEBUG "gid %u\n", (unsigned int)inode->i_gid);
- printk(KERN_DEBUG "atime %u.%u\n",
+ pr_err("\tnlink %u\n", inode->i_nlink);
+ pr_err("\tuid %u\n", (unsigned int)i_uid_read(inode));
+ pr_err("\tgid %u\n", (unsigned int)i_gid_read(inode));
+ pr_err("\tatime %u.%u\n",
(unsigned int)inode->i_atime.tv_sec,
(unsigned int)inode->i_atime.tv_nsec);
- printk(KERN_DEBUG "mtime %u.%u\n",
+ pr_err("\tmtime %u.%u\n",
(unsigned int)inode->i_mtime.tv_sec,
(unsigned int)inode->i_mtime.tv_nsec);
- printk(KERN_DEBUG "ctime %u.%u\n",
+ pr_err("\tctime %u.%u\n",
(unsigned int)inode->i_ctime.tv_sec,
(unsigned int)inode->i_ctime.tv_nsec);
- printk(KERN_DEBUG "creat_sqnum %llu\n", ui->creat_sqnum);
- printk(KERN_DEBUG "xattr_size %u\n", ui->xattr_size);
- printk(KERN_DEBUG "xattr_cnt %u\n", ui->xattr_cnt);
- printk(KERN_DEBUG "xattr_names %u\n", ui->xattr_names);
- printk(KERN_DEBUG "dirty %u\n", ui->dirty);
- printk(KERN_DEBUG "xattr %u\n", ui->xattr);
- printk(KERN_DEBUG "flags %d\n", ui->flags);
- printk(KERN_DEBUG "compr_type %d\n", ui->compr_type);
- printk(KERN_DEBUG "data_len %d\n", ui->data_len);
+ pr_err("\tcreat_sqnum %llu\n", ui->creat_sqnum);
+ pr_err("\txattr_size %u\n", ui->xattr_size);
+ pr_err("\txattr_cnt %u\n", ui->xattr_cnt);
+ pr_err("\txattr_names %u\n", ui->xattr_names);
+ pr_err("\tdirty %u\n", ui->dirty);
+ pr_err("\txattr %u\n", ui->xattr);
+ pr_err("\tbulk_read %u\n", ui->xattr);
+ pr_err("\tsynced_i_size %llu\n",
+ (unsigned long long)ui->synced_i_size);
+ pr_err("\tui_size %llu\n",
+ (unsigned long long)ui->ui_size);
+ pr_err("\tflags %d\n", ui->flags);
+ pr_err("\tcompr_type %d\n", ui->compr_type);
+ pr_err("\tlast_page_read %lu\n", ui->last_page_read);
+ pr_err("\tread_in_a_row %lu\n", ui->read_in_a_row);
+ pr_err("\tdata_len %d\n", ui->data_len);
+
+ if (!S_ISDIR(inode->i_mode))
+ return;
+
+ pr_err("List of directory entries:\n");
+ ubifs_assert(!mutex_is_locked(&c->tnc_mutex));
+
+ lowest_dent_key(c, &key, inode->i_ino);
+ while (1) {
+ dent = ubifs_tnc_next_ent(c, &key, &nm);
+ if (IS_ERR(dent)) {
+ if (PTR_ERR(dent) != -ENOENT)
+ pr_err("error %ld\n", PTR_ERR(dent));
+ break;
+ }
+
+ pr_err("\t%d: %s (%s)\n",
+ count++, dent->name, get_dent_type(dent->type));
+
+ nm.name = dent->name;
+ nm.len = le16_to_cpu(dent->nlen);
+ kfree(pdent);
+ pdent = dent;
+ key_read(c, &dent->key, &key);
+ }
+ kfree(pdent);
}
-void dbg_dump_node(const struct ubifs_info *c, const void *node)
+void ubifs_dump_node(const struct ubifs_info *c, const void *node)
{
int i, n;
union ubifs_key key;
const struct ubifs_ch *ch = node;
-
- if (dbg_failure_mode)
- return;
+ char key_buf[DBG_KEY_BUF_LEN];
/* If the magic is incorrect, just hexdump the first bytes */
if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) {
- printk(KERN_DEBUG "Not a node, first %zu bytes:", UBIFS_CH_SZ);
- print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
+ pr_err("Not a node, first %zu bytes:", UBIFS_CH_SZ);
+ print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1,
(void *)node, UBIFS_CH_SZ, 1);
return;
}
@@ -273,8 +321,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
{
const struct ubifs_pad_node *pad = node;
- printk(KERN_DEBUG "\tpad_len %u\n",
- le32_to_cpu(pad->pad_len));
+ pr_err("\tpad_len %u\n", le32_to_cpu(pad->pad_len));
break;
}
case UBIFS_SB_NODE:
@@ -282,115 +329,77 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
const struct ubifs_sb_node *sup = node;
unsigned int sup_flags = le32_to_cpu(sup->flags);
- printk(KERN_DEBUG "\tkey_hash %d (%s)\n",
+ pr_err("\tkey_hash %d (%s)\n",
(int)sup->key_hash, get_key_hash(sup->key_hash));
- printk(KERN_DEBUG "\tkey_fmt %d (%s)\n",
+ pr_err("\tkey_fmt %d (%s)\n",
(int)sup->key_fmt, get_key_fmt(sup->key_fmt));
- printk(KERN_DEBUG "\tflags %#x\n", sup_flags);
- printk(KERN_DEBUG "\t big_lpt %u\n",
+ pr_err("\tflags %#x\n", sup_flags);
+ pr_err("\t big_lpt %u\n",
!!(sup_flags & UBIFS_FLG_BIGLPT));
- printk(KERN_DEBUG "\tmin_io_size %u\n",
- le32_to_cpu(sup->min_io_size));
- printk(KERN_DEBUG "\tleb_size %u\n",
- le32_to_cpu(sup->leb_size));
- printk(KERN_DEBUG "\tleb_cnt %u\n",
- le32_to_cpu(sup->leb_cnt));
- printk(KERN_DEBUG "\tmax_leb_cnt %u\n",
- le32_to_cpu(sup->max_leb_cnt));
- printk(KERN_DEBUG "\tmax_bud_bytes %llu\n",
+ pr_err("\t space_fixup %u\n",
+ !!(sup_flags & UBIFS_FLG_SPACE_FIXUP));
+ pr_err("\tmin_io_size %u\n", le32_to_cpu(sup->min_io_size));
+ pr_err("\tleb_size %u\n", le32_to_cpu(sup->leb_size));
+ pr_err("\tleb_cnt %u\n", le32_to_cpu(sup->leb_cnt));
+ pr_err("\tmax_leb_cnt %u\n", le32_to_cpu(sup->max_leb_cnt));
+ pr_err("\tmax_bud_bytes %llu\n",
(unsigned long long)le64_to_cpu(sup->max_bud_bytes));
- printk(KERN_DEBUG "\tlog_lebs %u\n",
- le32_to_cpu(sup->log_lebs));
- printk(KERN_DEBUG "\tlpt_lebs %u\n",
- le32_to_cpu(sup->lpt_lebs));
- printk(KERN_DEBUG "\torph_lebs %u\n",
- le32_to_cpu(sup->orph_lebs));
- printk(KERN_DEBUG "\tjhead_cnt %u\n",
- le32_to_cpu(sup->jhead_cnt));
- printk(KERN_DEBUG "\tfanout %u\n",
- le32_to_cpu(sup->fanout));
- printk(KERN_DEBUG "\tlsave_cnt %u\n",
- le32_to_cpu(sup->lsave_cnt));
- printk(KERN_DEBUG "\tdefault_compr %u\n",
+ pr_err("\tlog_lebs %u\n", le32_to_cpu(sup->log_lebs));
+ pr_err("\tlpt_lebs %u\n", le32_to_cpu(sup->lpt_lebs));
+ pr_err("\torph_lebs %u\n", le32_to_cpu(sup->orph_lebs));
+ pr_err("\tjhead_cnt %u\n", le32_to_cpu(sup->jhead_cnt));
+ pr_err("\tfanout %u\n", le32_to_cpu(sup->fanout));
+ pr_err("\tlsave_cnt %u\n", le32_to_cpu(sup->lsave_cnt));
+ pr_err("\tdefault_compr %u\n",
(int)le16_to_cpu(sup->default_compr));
- printk(KERN_DEBUG "\trp_size %llu\n",
+ pr_err("\trp_size %llu\n",
(unsigned long long)le64_to_cpu(sup->rp_size));
- printk(KERN_DEBUG "\trp_uid %u\n",
- le32_to_cpu(sup->rp_uid));
- printk(KERN_DEBUG "\trp_gid %u\n",
- le32_to_cpu(sup->rp_gid));
- printk(KERN_DEBUG "\tfmt_version %u\n",
- le32_to_cpu(sup->fmt_version));
- printk(KERN_DEBUG "\ttime_gran %u\n",
- le32_to_cpu(sup->time_gran));
- printk(KERN_DEBUG "\tUUID %02X%02X%02X%02X-%02X%02X"
- "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X\n",
- sup->uuid[0], sup->uuid[1], sup->uuid[2], sup->uuid[3],
- sup->uuid[4], sup->uuid[5], sup->uuid[6], sup->uuid[7],
- sup->uuid[8], sup->uuid[9], sup->uuid[10], sup->uuid[11],
- sup->uuid[12], sup->uuid[13], sup->uuid[14],
- sup->uuid[15]);
+ pr_err("\trp_uid %u\n", le32_to_cpu(sup->rp_uid));
+ pr_err("\trp_gid %u\n", le32_to_cpu(sup->rp_gid));
+ pr_err("\tfmt_version %u\n", le32_to_cpu(sup->fmt_version));
+ pr_err("\ttime_gran %u\n", le32_to_cpu(sup->time_gran));
+ pr_err("\tUUID %pUB\n", sup->uuid);
break;
}
case UBIFS_MST_NODE:
{
const struct ubifs_mst_node *mst = node;
- printk(KERN_DEBUG "\thighest_inum %llu\n",
+ pr_err("\thighest_inum %llu\n",
(unsigned long long)le64_to_cpu(mst->highest_inum));
- printk(KERN_DEBUG "\tcommit number %llu\n",
+ pr_err("\tcommit number %llu\n",
(unsigned long long)le64_to_cpu(mst->cmt_no));
- printk(KERN_DEBUG "\tflags %#x\n",
- le32_to_cpu(mst->flags));
- printk(KERN_DEBUG "\tlog_lnum %u\n",
- le32_to_cpu(mst->log_lnum));
- printk(KERN_DEBUG "\troot_lnum %u\n",
- le32_to_cpu(mst->root_lnum));
- printk(KERN_DEBUG "\troot_offs %u\n",
- le32_to_cpu(mst->root_offs));
- printk(KERN_DEBUG "\troot_len %u\n",
- le32_to_cpu(mst->root_len));
- printk(KERN_DEBUG "\tgc_lnum %u\n",
- le32_to_cpu(mst->gc_lnum));
- printk(KERN_DEBUG "\tihead_lnum %u\n",
- le32_to_cpu(mst->ihead_lnum));
- printk(KERN_DEBUG "\tihead_offs %u\n",
- le32_to_cpu(mst->ihead_offs));
- printk(KERN_DEBUG "\tindex_size %u\n",
- le32_to_cpu(mst->index_size));
- printk(KERN_DEBUG "\tlpt_lnum %u\n",
- le32_to_cpu(mst->lpt_lnum));
- printk(KERN_DEBUG "\tlpt_offs %u\n",
- le32_to_cpu(mst->lpt_offs));
- printk(KERN_DEBUG "\tnhead_lnum %u\n",
- le32_to_cpu(mst->nhead_lnum));
- printk(KERN_DEBUG "\tnhead_offs %u\n",
- le32_to_cpu(mst->nhead_offs));
- printk(KERN_DEBUG "\tltab_lnum %u\n",
- le32_to_cpu(mst->ltab_lnum));
- printk(KERN_DEBUG "\tltab_offs %u\n",
- le32_to_cpu(mst->ltab_offs));
- printk(KERN_DEBUG "\tlsave_lnum %u\n",
- le32_to_cpu(mst->lsave_lnum));
- printk(KERN_DEBUG "\tlsave_offs %u\n",
- le32_to_cpu(mst->lsave_offs));
- printk(KERN_DEBUG "\tlscan_lnum %u\n",
- le32_to_cpu(mst->lscan_lnum));
- printk(KERN_DEBUG "\tleb_cnt %u\n",
- le32_to_cpu(mst->leb_cnt));
- printk(KERN_DEBUG "\tempty_lebs %u\n",
- le32_to_cpu(mst->empty_lebs));
- printk(KERN_DEBUG "\tidx_lebs %u\n",
- le32_to_cpu(mst->idx_lebs));
- printk(KERN_DEBUG "\ttotal_free %llu\n",
+ pr_err("\tflags %#x\n", le32_to_cpu(mst->flags));
+ pr_err("\tlog_lnum %u\n", le32_to_cpu(mst->log_lnum));
+ pr_err("\troot_lnum %u\n", le32_to_cpu(mst->root_lnum));
+ pr_err("\troot_offs %u\n", le32_to_cpu(mst->root_offs));
+ pr_err("\troot_len %u\n", le32_to_cpu(mst->root_len));
+ pr_err("\tgc_lnum %u\n", le32_to_cpu(mst->gc_lnum));
+ pr_err("\tihead_lnum %u\n", le32_to_cpu(mst->ihead_lnum));
+ pr_err("\tihead_offs %u\n", le32_to_cpu(mst->ihead_offs));
+ pr_err("\tindex_size %llu\n",
+ (unsigned long long)le64_to_cpu(mst->index_size));
+ pr_err("\tlpt_lnum %u\n", le32_to_cpu(mst->lpt_lnum));
+ pr_err("\tlpt_offs %u\n", le32_to_cpu(mst->lpt_offs));
+ pr_err("\tnhead_lnum %u\n", le32_to_cpu(mst->nhead_lnum));
+ pr_err("\tnhead_offs %u\n", le32_to_cpu(mst->nhead_offs));
+ pr_err("\tltab_lnum %u\n", le32_to_cpu(mst->ltab_lnum));
+ pr_err("\tltab_offs %u\n", le32_to_cpu(mst->ltab_offs));
+ pr_err("\tlsave_lnum %u\n", le32_to_cpu(mst->lsave_lnum));
+ pr_err("\tlsave_offs %u\n", le32_to_cpu(mst->lsave_offs));
+ pr_err("\tlscan_lnum %u\n", le32_to_cpu(mst->lscan_lnum));
+ pr_err("\tleb_cnt %u\n", le32_to_cpu(mst->leb_cnt));
+ pr_err("\tempty_lebs %u\n", le32_to_cpu(mst->empty_lebs));
+ pr_err("\tidx_lebs %u\n", le32_to_cpu(mst->idx_lebs));
+ pr_err("\ttotal_free %llu\n",
(unsigned long long)le64_to_cpu(mst->total_free));
- printk(KERN_DEBUG "\ttotal_dirty %llu\n",
+ pr_err("\ttotal_dirty %llu\n",
(unsigned long long)le64_to_cpu(mst->total_dirty));
- printk(KERN_DEBUG "\ttotal_used %llu\n",
+ pr_err("\ttotal_used %llu\n",
(unsigned long long)le64_to_cpu(mst->total_used));
- printk(KERN_DEBUG "\ttotal_dead %llu\n",
+ pr_err("\ttotal_dead %llu\n",
(unsigned long long)le64_to_cpu(mst->total_dead));
- printk(KERN_DEBUG "\ttotal_dark %llu\n",
+ pr_err("\ttotal_dark %llu\n",
(unsigned long long)le64_to_cpu(mst->total_dark));
break;
}
@@ -398,12 +407,9 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
{
const struct ubifs_ref_node *ref = node;
- printk(KERN_DEBUG "\tlnum %u\n",
- le32_to_cpu(ref->lnum));
- printk(KERN_DEBUG "\toffs %u\n",
- le32_to_cpu(ref->offs));
- printk(KERN_DEBUG "\tjhead %u\n",
- le32_to_cpu(ref->jhead));
+ pr_err("\tlnum %u\n", le32_to_cpu(ref->lnum));
+ pr_err("\toffs %u\n", le32_to_cpu(ref->offs));
+ pr_err("\tjhead %u\n", le32_to_cpu(ref->jhead));
break;
}
case UBIFS_INO_NODE:
@@ -411,40 +417,32 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
const struct ubifs_ino_node *ino = node;
key_read(c, &ino->key, &key);
- printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key));
- printk(KERN_DEBUG "\tcreat_sqnum %llu\n",
+ pr_err("\tkey %s\n",
+ dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
+ pr_err("\tcreat_sqnum %llu\n",
(unsigned long long)le64_to_cpu(ino->creat_sqnum));
- printk(KERN_DEBUG "\tsize %llu\n",
+ pr_err("\tsize %llu\n",
(unsigned long long)le64_to_cpu(ino->size));
- printk(KERN_DEBUG "\tnlink %u\n",
- le32_to_cpu(ino->nlink));
- printk(KERN_DEBUG "\tatime %lld.%u\n",
+ pr_err("\tnlink %u\n", le32_to_cpu(ino->nlink));
+ pr_err("\tatime %lld.%u\n",
(long long)le64_to_cpu(ino->atime_sec),
le32_to_cpu(ino->atime_nsec));
- printk(KERN_DEBUG "\tmtime %lld.%u\n",
+ pr_err("\tmtime %lld.%u\n",
(long long)le64_to_cpu(ino->mtime_sec),
le32_to_cpu(ino->mtime_nsec));
- printk(KERN_DEBUG "\tctime %lld.%u\n",
+ pr_err("\tctime %lld.%u\n",
(long long)le64_to_cpu(ino->ctime_sec),
le32_to_cpu(ino->ctime_nsec));
- printk(KERN_DEBUG "\tuid %u\n",
- le32_to_cpu(ino->uid));
- printk(KERN_DEBUG "\tgid %u\n",
- le32_to_cpu(ino->gid));
- printk(KERN_DEBUG "\tmode %u\n",
- le32_to_cpu(ino->mode));
- printk(KERN_DEBUG "\tflags %#x\n",
- le32_to_cpu(ino->flags));
- printk(KERN_DEBUG "\txattr_cnt %u\n",
- le32_to_cpu(ino->xattr_cnt));
- printk(KERN_DEBUG "\txattr_size %u\n",
- le32_to_cpu(ino->xattr_size));
- printk(KERN_DEBUG "\txattr_names %u\n",
- le32_to_cpu(ino->xattr_names));
- printk(KERN_DEBUG "\tcompr_type %#x\n",
+ pr_err("\tuid %u\n", le32_to_cpu(ino->uid));
+ pr_err("\tgid %u\n", le32_to_cpu(ino->gid));
+ pr_err("\tmode %u\n", le32_to_cpu(ino->mode));
+ pr_err("\tflags %#x\n", le32_to_cpu(ino->flags));
+ pr_err("\txattr_cnt %u\n", le32_to_cpu(ino->xattr_cnt));
+ pr_err("\txattr_size %u\n", le32_to_cpu(ino->xattr_size));
+ pr_err("\txattr_names %u\n", le32_to_cpu(ino->xattr_names));
+ pr_err("\tcompr_type %#x\n",
(int)le16_to_cpu(ino->compr_type));
- printk(KERN_DEBUG "\tdata len %u\n",
- le32_to_cpu(ino->data_len));
+ pr_err("\tdata len %u\n", le32_to_cpu(ino->data_len));
break;
}
case UBIFS_DENT_NODE:
@@ -454,21 +452,21 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
int nlen = le16_to_cpu(dent->nlen);
key_read(c, &dent->key, &key);
- printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key));
- printk(KERN_DEBUG "\tinum %llu\n",
+ pr_err("\tkey %s\n",
+ dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
+ pr_err("\tinum %llu\n",
(unsigned long long)le64_to_cpu(dent->inum));
- printk(KERN_DEBUG "\ttype %d\n", (int)dent->type);
- printk(KERN_DEBUG "\tnlen %d\n", nlen);
- printk(KERN_DEBUG "\tname ");
+ pr_err("\ttype %d\n", (int)dent->type);
+ pr_err("\tnlen %d\n", nlen);
+ pr_err("\tname ");
if (nlen > UBIFS_MAX_NLEN)
- printk(KERN_DEBUG "(bad name length, not printing, "
- "bad or corrupted node)");
+ pr_err("(bad name length, not printing, bad or corrupted node)");
else {
for (i = 0; i < nlen && dent->name[i]; i++)
- printk("%c", dent->name[i]);
+ pr_cont("%c", dent->name[i]);
}
- printk("\n");
+ pr_cont("\n");
break;
}
@@ -478,15 +476,14 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ;
key_read(c, &dn->key, &key);
- printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key));
- printk(KERN_DEBUG "\tsize %u\n",
- le32_to_cpu(dn->size));
- printk(KERN_DEBUG "\tcompr_typ %d\n",
+ pr_err("\tkey %s\n",
+ dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
+ pr_err("\tsize %u\n", le32_to_cpu(dn->size));
+ pr_err("\tcompr_typ %d\n",
(int)le16_to_cpu(dn->compr_type));
- printk(KERN_DEBUG "\tdata size %d\n",
- dlen);
- printk(KERN_DEBUG "\tdata:\n");
- print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, 32, 1,
+ pr_err("\tdata size %d\n", dlen);
+ pr_err("\tdata:\n");
+ print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1,
(void *)&dn->data, dlen, 0);
break;
}
@@ -494,11 +491,10 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
{
const struct ubifs_trun_node *trun = node;
- printk(KERN_DEBUG "\tinum %u\n",
- le32_to_cpu(trun->inum));
- printk(KERN_DEBUG "\told_size %llu\n",
+ pr_err("\tinum %u\n", le32_to_cpu(trun->inum));
+ pr_err("\told_size %llu\n",
(unsigned long long)le64_to_cpu(trun->old_size));
- printk(KERN_DEBUG "\tnew_size %llu\n",
+ pr_err("\tnew_size %llu\n",
(unsigned long long)le64_to_cpu(trun->new_size));
break;
}
@@ -507,19 +503,20 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
const struct ubifs_idx_node *idx = node;
n = le16_to_cpu(idx->child_cnt);
- printk(KERN_DEBUG "\tchild_cnt %d\n", n);
- printk(KERN_DEBUG "\tlevel %d\n",
- (int)le16_to_cpu(idx->level));
- printk(KERN_DEBUG "\tBranches:\n");
+ pr_err("\tchild_cnt %d\n", n);
+ pr_err("\tlevel %d\n", (int)le16_to_cpu(idx->level));
+ pr_err("\tBranches:\n");
for (i = 0; i < n && i < c->fanout - 1; i++) {
const struct ubifs_branch *br;
br = ubifs_idx_branch(c, idx, i);
key_read(c, &br->key, &key);
- printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n",
+ pr_err("\t%d: LEB %d:%d len %d key %s\n",
i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs),
- le32_to_cpu(br->len), DBGKEY(&key));
+ le32_to_cpu(br->len),
+ dbg_snprintf_key(c, &key, key_buf,
+ DBG_KEY_BUF_LEN));
}
break;
}
@@ -529,159 +526,333 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
{
const struct ubifs_orph_node *orph = node;
- printk(KERN_DEBUG "\tcommit number %llu\n",
+ pr_err("\tcommit number %llu\n",
(unsigned long long)
le64_to_cpu(orph->cmt_no) & LLONG_MAX);
- printk(KERN_DEBUG "\tlast node flag %llu\n",
+ pr_err("\tlast node flag %llu\n",
(unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63);
n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3;
- printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n);
+ pr_err("\t%d orphan inode numbers:\n", n);
for (i = 0; i < n; i++)
- printk(KERN_DEBUG "\t ino %llu\n",
- le64_to_cpu(orph->inos[i]));
+ pr_err("\t ino %llu\n",
+ (unsigned long long)le64_to_cpu(orph->inos[i]));
break;
}
default:
- printk(KERN_DEBUG "node type %d was not recognized\n",
+ pr_err("node type %d was not recognized\n",
(int)ch->node_type);
}
spin_unlock(&dbg_lock);
}
-void dbg_dump_budget_req(const struct ubifs_budget_req *req)
+void ubifs_dump_budget_req(const struct ubifs_budget_req *req)
{
spin_lock(&dbg_lock);
- printk(KERN_DEBUG "Budgeting request: new_ino %d, dirtied_ino %d\n",
+ pr_err("Budgeting request: new_ino %d, dirtied_ino %d\n",
req->new_ino, req->dirtied_ino);
- printk(KERN_DEBUG "\tnew_ino_d %d, dirtied_ino_d %d\n",
+ pr_err("\tnew_ino_d %d, dirtied_ino_d %d\n",
req->new_ino_d, req->dirtied_ino_d);
- printk(KERN_DEBUG "\tnew_page %d, dirtied_page %d\n",
+ pr_err("\tnew_page %d, dirtied_page %d\n",
req->new_page, req->dirtied_page);
- printk(KERN_DEBUG "\tnew_dent %d, mod_dent %d\n",
+ pr_err("\tnew_dent %d, mod_dent %d\n",
req->new_dent, req->mod_dent);
- printk(KERN_DEBUG "\tidx_growth %d\n", req->idx_growth);
- printk(KERN_DEBUG "\tdata_growth %d dd_growth %d\n",
+ pr_err("\tidx_growth %d\n", req->idx_growth);
+ pr_err("\tdata_growth %d dd_growth %d\n",
req->data_growth, req->dd_growth);
spin_unlock(&dbg_lock);
}
-void dbg_dump_lstats(const struct ubifs_lp_stats *lst)
+void ubifs_dump_lstats(const struct ubifs_lp_stats *lst)
{
spin_lock(&dbg_lock);
- printk(KERN_DEBUG "Lprops statistics: empty_lebs %d, idx_lebs %d\n",
- lst->empty_lebs, lst->idx_lebs);
- printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, "
- "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free,
- lst->total_dirty);
- printk(KERN_DEBUG "\ttotal_used %lld, total_dark %lld, "
- "total_dead %lld\n", lst->total_used, lst->total_dark,
- lst->total_dead);
+ pr_err("(pid %d) Lprops statistics: empty_lebs %d, idx_lebs %d\n",
+ current->pid, lst->empty_lebs, lst->idx_lebs);
+ pr_err("\ttaken_empty_lebs %d, total_free %lld, total_dirty %lld\n",
+ lst->taken_empty_lebs, lst->total_free, lst->total_dirty);
+ pr_err("\ttotal_used %lld, total_dark %lld, total_dead %lld\n",
+ lst->total_used, lst->total_dark, lst->total_dead);
spin_unlock(&dbg_lock);
}
-void dbg_dump_budg(struct ubifs_info *c)
+void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
{
int i;
struct rb_node *rb;
struct ubifs_bud *bud;
struct ubifs_gced_idx_leb *idx_gc;
+ long long available, outstanding, free;
+ spin_lock(&c->space_lock);
spin_lock(&dbg_lock);
- printk(KERN_DEBUG "Budgeting info: budg_data_growth %lld, "
- "budg_dd_growth %lld, budg_idx_growth %lld\n",
- c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth);
- printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, "
- "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth,
- c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth,
- c->freeable_cnt);
- printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, "
- "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs,
- c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt);
- printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, "
- "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt),
+ pr_err("(pid %d) Budgeting info: data budget sum %lld, total budget sum %lld\n",
+ current->pid, bi->data_growth + bi->dd_growth,
+ bi->data_growth + bi->dd_growth + bi->idx_growth);
+ pr_err("\tbudg_data_growth %lld, budg_dd_growth %lld, budg_idx_growth %lld\n",
+ bi->data_growth, bi->dd_growth, bi->idx_growth);
+ pr_err("\tmin_idx_lebs %d, old_idx_sz %llu, uncommitted_idx %lld\n",
+ bi->min_idx_lebs, bi->old_idx_sz, bi->uncommitted_idx);
+ pr_err("\tpage_budget %d, inode_budget %d, dent_budget %d\n",
+ bi->page_budget, bi->inode_budget, bi->dent_budget);
+ pr_err("\tnospace %u, nospace_rp %u\n", bi->nospace, bi->nospace_rp);
+ pr_err("\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
+ c->dark_wm, c->dead_wm, c->max_idx_node_sz);
+
+ if (bi != &c->bi)
+ /*
+ * If we are dumping saved budgeting data, do not print
+ * additional information which is about the current state, not
+ * the old one which corresponded to the saved budgeting data.
+ */
+ goto out_unlock;
+
+ pr_err("\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n",
+ c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt);
+ pr_err("\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, clean_zn_cnt %ld\n",
+ atomic_long_read(&c->dirty_pg_cnt),
atomic_long_read(&c->dirty_zn_cnt),
atomic_long_read(&c->clean_zn_cnt));
- printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
- c->dark_wm, c->dead_wm, c->max_idx_node_sz);
- printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n",
- c->gc_lnum, c->ihead_lnum);
- for (i = 0; i < c->jhead_cnt; i++)
- printk(KERN_DEBUG "\tjhead %d\t LEB %d\n",
- c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum);
+ pr_err("\tgc_lnum %d, ihead_lnum %d\n", c->gc_lnum, c->ihead_lnum);
+
+ /* If we are in R/O mode, journal heads do not exist */
+ if (c->jheads)
+ for (i = 0; i < c->jhead_cnt; i++)
+ pr_err("\tjhead %s\t LEB %d\n",
+ dbg_jhead(c->jheads[i].wbuf.jhead),
+ c->jheads[i].wbuf.lnum);
for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) {
bud = rb_entry(rb, struct ubifs_bud, rb);
- printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum);
+ pr_err("\tbud LEB %d\n", bud->lnum);
}
list_for_each_entry(bud, &c->old_buds, list)
- printk(KERN_DEBUG "\told bud LEB %d\n", bud->lnum);
+ pr_err("\told bud LEB %d\n", bud->lnum);
list_for_each_entry(idx_gc, &c->idx_gc, list)
- printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n",
+ pr_err("\tGC'ed idx LEB %d unmap %d\n",
idx_gc->lnum, idx_gc->unmap);
- printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state);
+ pr_err("\tcommit state %d\n", c->cmt_state);
+
+ /* Print budgeting predictions */
+ available = ubifs_calc_available(c, c->bi.min_idx_lebs);
+ outstanding = c->bi.data_growth + c->bi.dd_growth;
+ free = ubifs_get_free_space_nolock(c);
+ pr_err("Budgeting predictions:\n");
+ pr_err("\tavailable: %lld, outstanding %lld, free %lld\n",
+ available, outstanding, free);
+out_unlock:
spin_unlock(&dbg_lock);
+ spin_unlock(&c->space_lock);
}
-void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
+void ubifs_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
{
- printk(KERN_DEBUG "LEB %d lprops: free %d, dirty %d (used %d), "
- "flags %#x\n", lp->lnum, lp->free, lp->dirty,
- c->leb_size - lp->free - lp->dirty, lp->flags);
+ int i, spc, dark = 0, dead = 0;
+ struct rb_node *rb;
+ struct ubifs_bud *bud;
+
+ spc = lp->free + lp->dirty;
+ if (spc < c->dead_wm)
+ dead = spc;
+ else
+ dark = ubifs_calc_dark(c, spc);
+
+ if (lp->flags & LPROPS_INDEX)
+ pr_err("LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d flags %#x (",
+ lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc,
+ lp->flags);
+ else
+ pr_err("LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d flags %#-4x (",
+ lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc,
+ dark, dead, (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags);
+
+ if (lp->flags & LPROPS_TAKEN) {
+ if (lp->flags & LPROPS_INDEX)
+ pr_cont("index, taken");
+ else
+ pr_cont("taken");
+ } else {
+ const char *s;
+
+ if (lp->flags & LPROPS_INDEX) {
+ switch (lp->flags & LPROPS_CAT_MASK) {
+ case LPROPS_DIRTY_IDX:
+ s = "dirty index";
+ break;
+ case LPROPS_FRDI_IDX:
+ s = "freeable index";
+ break;
+ default:
+ s = "index";
+ }
+ } else {
+ switch (lp->flags & LPROPS_CAT_MASK) {
+ case LPROPS_UNCAT:
+ s = "not categorized";
+ break;
+ case LPROPS_DIRTY:
+ s = "dirty";
+ break;
+ case LPROPS_FREE:
+ s = "free";
+ break;
+ case LPROPS_EMPTY:
+ s = "empty";
+ break;
+ case LPROPS_FREEABLE:
+ s = "freeable";
+ break;
+ default:
+ s = NULL;
+ break;
+ }
+ }
+ pr_cont("%s", s);
+ }
+
+ for (rb = rb_first((struct rb_root *)&c->buds); rb; rb = rb_next(rb)) {
+ bud = rb_entry(rb, struct ubifs_bud, rb);
+ if (bud->lnum == lp->lnum) {
+ int head = 0;
+ for (i = 0; i < c->jhead_cnt; i++) {
+ /*
+ * Note, if we are in R/O mode or in the middle
+ * of mounting/re-mounting, the write-buffers do
+ * not exist.
+ */
+ if (c->jheads &&
+ lp->lnum == c->jheads[i].wbuf.lnum) {
+ pr_cont(", jhead %s", dbg_jhead(i));
+ head = 1;
+ }
+ }
+ if (!head)
+ pr_cont(", bud of jhead %s",
+ dbg_jhead(bud->jhead));
+ }
+ }
+ if (lp->lnum == c->gc_lnum)
+ pr_cont(", GC LEB");
+ pr_cont(")\n");
}
-void dbg_dump_lprops(struct ubifs_info *c)
+void ubifs_dump_lprops(struct ubifs_info *c)
{
int lnum, err;
struct ubifs_lprops lp;
struct ubifs_lp_stats lst;
- printk(KERN_DEBUG "Dumping LEB properties\n");
+ pr_err("(pid %d) start dumping LEB properties\n", current->pid);
ubifs_get_lp_stats(c, &lst);
- dbg_dump_lstats(&lst);
+ ubifs_dump_lstats(&lst);
for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) {
err = ubifs_read_one_lp(c, lnum, &lp);
- if (err)
+ if (err) {
ubifs_err("cannot read lprops for LEB %d", lnum);
+ continue;
+ }
- dbg_dump_lprop(c, &lp);
+ ubifs_dump_lprop(c, &lp);
}
+ pr_err("(pid %d) finish dumping LEB properties\n", current->pid);
}
-void dbg_dump_leb(const struct ubifs_info *c, int lnum)
+void ubifs_dump_lpt_info(struct ubifs_info *c)
+{
+ int i;
+
+ spin_lock(&dbg_lock);
+ pr_err("(pid %d) dumping LPT information\n", current->pid);
+ pr_err("\tlpt_sz: %lld\n", c->lpt_sz);
+ pr_err("\tpnode_sz: %d\n", c->pnode_sz);
+ pr_err("\tnnode_sz: %d\n", c->nnode_sz);
+ pr_err("\tltab_sz: %d\n", c->ltab_sz);
+ pr_err("\tlsave_sz: %d\n", c->lsave_sz);
+ pr_err("\tbig_lpt: %d\n", c->big_lpt);
+ pr_err("\tlpt_hght: %d\n", c->lpt_hght);
+ pr_err("\tpnode_cnt: %d\n", c->pnode_cnt);
+ pr_err("\tnnode_cnt: %d\n", c->nnode_cnt);
+ pr_err("\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt);
+ pr_err("\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt);
+ pr_err("\tlsave_cnt: %d\n", c->lsave_cnt);
+ pr_err("\tspace_bits: %d\n", c->space_bits);
+ pr_err("\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits);
+ pr_err("\tlpt_offs_bits: %d\n", c->lpt_offs_bits);
+ pr_err("\tlpt_spc_bits: %d\n", c->lpt_spc_bits);
+ pr_err("\tpcnt_bits: %d\n", c->pcnt_bits);
+ pr_err("\tlnum_bits: %d\n", c->lnum_bits);
+ pr_err("\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs);
+ pr_err("\tLPT head is at %d:%d\n",
+ c->nhead_lnum, c->nhead_offs);
+ pr_err("\tLPT ltab is at %d:%d\n", c->ltab_lnum, c->ltab_offs);
+ if (c->big_lpt)
+ pr_err("\tLPT lsave is at %d:%d\n",
+ c->lsave_lnum, c->lsave_offs);
+ for (i = 0; i < c->lpt_lebs; i++)
+ pr_err("\tLPT LEB %d free %d dirty %d tgc %d cmt %d\n",
+ i + c->lpt_first, c->ltab[i].free, c->ltab[i].dirty,
+ c->ltab[i].tgc, c->ltab[i].cmt);
+ spin_unlock(&dbg_lock);
+}
+
+void ubifs_dump_sleb(const struct ubifs_info *c,
+ const struct ubifs_scan_leb *sleb, int offs)
+{
+ struct ubifs_scan_node *snod;
+
+ pr_err("(pid %d) start dumping scanned data from LEB %d:%d\n",
+ current->pid, sleb->lnum, offs);
+
+ list_for_each_entry(snod, &sleb->nodes, list) {
+ cond_resched();
+ pr_err("Dumping node at LEB %d:%d len %d\n",
+ sleb->lnum, snod->offs, snod->len);
+ ubifs_dump_node(c, snod->node);
+ }
+}
+
+void ubifs_dump_leb(const struct ubifs_info *c, int lnum)
{
struct ubifs_scan_leb *sleb;
struct ubifs_scan_node *snod;
+ void *buf;
- if (dbg_failure_mode)
- return;
+ pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum);
- printk(KERN_DEBUG "Dumping LEB %d\n", lnum);
+ buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
+ if (!buf) {
+ ubifs_err("cannot allocate memory for dumping LEB %d", lnum);
+ return;
+ }
- sleb = ubifs_scan(c, lnum, 0, c->dbg_buf);
+ sleb = ubifs_scan(c, lnum, 0, buf, 0);
if (IS_ERR(sleb)) {
ubifs_err("scan error %d", (int)PTR_ERR(sleb));
- return;
+ goto out;
}
- printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum,
+ pr_err("LEB %d has %d nodes ending at %d\n", lnum,
sleb->nodes_cnt, sleb->endpt);
list_for_each_entry(snod, &sleb->nodes, list) {
cond_resched();
- printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", lnum,
+ pr_err("Dumping node at LEB %d:%d len %d\n", lnum,
snod->offs, snod->len);
- dbg_dump_node(c, snod->node);
+ ubifs_dump_node(c, snod->node);
}
+ pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum);
ubifs_scan_destroy(sleb);
+
+out:
+ vfree(buf);
return;
}
-void dbg_dump_znode(const struct ubifs_info *c,
- const struct ubifs_znode *znode)
+void ubifs_dump_znode(const struct ubifs_info *c,
+ const struct ubifs_znode *znode)
{
int n;
const struct ubifs_zbranch *zbr;
+ char key_buf[DBG_KEY_BUF_LEN];
spin_lock(&dbg_lock);
if (znode->parent)
@@ -689,109 +860,203 @@ void dbg_dump_znode(const struct ubifs_info *c,
else
zbr = &c->zroot;
- printk(KERN_DEBUG "znode %p, LEB %d:%d len %d parent %p iip %d level %d"
- " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs,
- zbr->len, znode->parent, znode->iip, znode->level,
- znode->child_cnt, znode->flags);
+ pr_err("znode %p, LEB %d:%d len %d parent %p iip %d level %d child_cnt %d flags %lx\n",
+ znode, zbr->lnum, zbr->offs, zbr->len, znode->parent, znode->iip,
+ znode->level, znode->child_cnt, znode->flags);
if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) {
spin_unlock(&dbg_lock);
return;
}
- printk(KERN_DEBUG "zbranches:\n");
+ pr_err("zbranches:\n");
for (n = 0; n < znode->child_cnt; n++) {
zbr = &znode->zbranch[n];
if (znode->level > 0)
- printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key "
- "%s\n", n, zbr->znode, zbr->lnum,
- zbr->offs, zbr->len,
- DBGKEY(&zbr->key));
+ pr_err("\t%d: znode %p LEB %d:%d len %d key %s\n",
+ n, zbr->znode, zbr->lnum, zbr->offs, zbr->len,
+ dbg_snprintf_key(c, &zbr->key, key_buf,
+ DBG_KEY_BUF_LEN));
else
- printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key "
- "%s\n", n, zbr->znode, zbr->lnum,
- zbr->offs, zbr->len,
- DBGKEY(&zbr->key));
+ pr_err("\t%d: LNC %p LEB %d:%d len %d key %s\n",
+ n, zbr->znode, zbr->lnum, zbr->offs, zbr->len,
+ dbg_snprintf_key(c, &zbr->key, key_buf,
+ DBG_KEY_BUF_LEN));
}
spin_unlock(&dbg_lock);
}
-void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
+void ubifs_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
{
int i;
- printk(KERN_DEBUG "Dumping heap cat %d (%d elements)\n",
- cat, heap->cnt);
+ pr_err("(pid %d) start dumping heap cat %d (%d elements)\n",
+ current->pid, cat, heap->cnt);
for (i = 0; i < heap->cnt; i++) {
struct ubifs_lprops *lprops = heap->arr[i];
- printk(KERN_DEBUG "\t%d. LEB %d hpos %d free %d dirty %d "
- "flags %d\n", i, lprops->lnum, lprops->hpos,
- lprops->free, lprops->dirty, lprops->flags);
+ pr_err("\t%d. LEB %d hpos %d free %d dirty %d flags %d\n",
+ i, lprops->lnum, lprops->hpos, lprops->free,
+ lprops->dirty, lprops->flags);
}
+ pr_err("(pid %d) finish dumping heap\n", current->pid);
}
-void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
- struct ubifs_nnode *parent, int iip)
+void ubifs_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
+ struct ubifs_nnode *parent, int iip)
{
int i;
- printk(KERN_DEBUG "Dumping pnode:\n");
- printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n",
+ pr_err("(pid %d) dumping pnode:\n", current->pid);
+ pr_err("\taddress %zx parent %zx cnext %zx\n",
(size_t)pnode, (size_t)parent, (size_t)pnode->cnext);
- printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n",
+ pr_err("\tflags %lu iip %d level %d num %d\n",
pnode->flags, iip, pnode->level, pnode->num);
for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
struct ubifs_lprops *lp = &pnode->lprops[i];
- printk(KERN_DEBUG "\t%d: free %d dirty %d flags %d lnum %d\n",
+ pr_err("\t%d: free %d dirty %d flags %d lnum %d\n",
i, lp->free, lp->dirty, lp->flags, lp->lnum);
}
}
-void dbg_dump_tnc(struct ubifs_info *c)
+void ubifs_dump_tnc(struct ubifs_info *c)
{
struct ubifs_znode *znode;
int level;
- printk(KERN_DEBUG "\n");
- printk(KERN_DEBUG "Dumping the TNC tree\n");
+ pr_err("\n");
+ pr_err("(pid %d) start dumping TNC tree\n", current->pid);
znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
level = znode->level;
- printk(KERN_DEBUG "== Level %d ==\n", level);
+ pr_err("== Level %d ==\n", level);
while (znode) {
if (level != znode->level) {
level = znode->level;
- printk(KERN_DEBUG "== Level %d ==\n", level);
+ pr_err("== Level %d ==\n", level);
}
- dbg_dump_znode(c, znode);
+ ubifs_dump_znode(c, znode);
znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
}
-
- printk(KERN_DEBUG "\n");
+ pr_err("(pid %d) finish dumping TNC tree\n", current->pid);
}
static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode,
void *priv)
{
- dbg_dump_znode(c, znode);
+ ubifs_dump_znode(c, znode);
return 0;
}
/**
- * dbg_dump_index - dump the on-flash index.
+ * ubifs_dump_index - dump the on-flash index.
* @c: UBIFS file-system description object
*
- * This function dumps whole UBIFS indexing B-tree, unlike 'dbg_dump_tnc()'
+ * This function dumps whole UBIFS indexing B-tree, unlike 'ubifs_dump_tnc()'
* which dumps only in-memory znodes and does not read znodes which from flash.
*/
-void dbg_dump_index(struct ubifs_info *c)
+void ubifs_dump_index(struct ubifs_info *c)
{
dbg_walk_index(c, NULL, dump_znode, NULL);
}
/**
+ * dbg_save_space_info - save information about flash space.
+ * @c: UBIFS file-system description object
+ *
+ * This function saves information about UBIFS free space, dirty space, etc, in
+ * order to check it later.
+ */
+void dbg_save_space_info(struct ubifs_info *c)
+{
+ struct ubifs_debug_info *d = c->dbg;
+ int freeable_cnt;
+
+ spin_lock(&c->space_lock);
+ memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats));
+ memcpy(&d->saved_bi, &c->bi, sizeof(struct ubifs_budg_info));
+ d->saved_idx_gc_cnt = c->idx_gc_cnt;
+
+ /*
+ * We use a dirty hack here and zero out @c->freeable_cnt, because it
+ * affects the free space calculations, and UBIFS might not know about
+ * all freeable eraseblocks. Indeed, we know about freeable eraseblocks
+ * only when we read their lprops, and we do this only lazily, upon the
+ * need. So at any given point of time @c->freeable_cnt might be not
+ * exactly accurate.
+ *
+ * Just one example about the issue we hit when we did not zero
+ * @c->freeable_cnt.
+ * 1. The file-system is mounted R/O, c->freeable_cnt is %0. We save the
+ * amount of free space in @d->saved_free
+ * 2. We re-mount R/W, which makes UBIFS to read the "lsave"
+ * information from flash, where we cache LEBs from various
+ * categories ('ubifs_remount_fs()' -> 'ubifs_lpt_init()'
+ * -> 'lpt_init_wr()' -> 'read_lsave()' -> 'ubifs_lpt_lookup()'
+ * -> 'ubifs_get_pnode()' -> 'update_cats()'
+ * -> 'ubifs_add_to_cat()').
+ * 3. Lsave contains a freeable eraseblock, and @c->freeable_cnt
+ * becomes %1.
+ * 4. We calculate the amount of free space when the re-mount is
+ * finished in 'dbg_check_space_info()' and it does not match
+ * @d->saved_free.
+ */
+ freeable_cnt = c->freeable_cnt;
+ c->freeable_cnt = 0;
+ d->saved_free = ubifs_get_free_space_nolock(c);
+ c->freeable_cnt = freeable_cnt;
+ spin_unlock(&c->space_lock);
+}
+
+/**
+ * dbg_check_space_info - check flash space information.
+ * @c: UBIFS file-system description object
+ *
+ * This function compares current flash space information with the information
+ * which was saved when the 'dbg_save_space_info()' function was called.
+ * Returns zero if the information has not changed, and %-EINVAL it it has
+ * changed.
+ */
+int dbg_check_space_info(struct ubifs_info *c)
+{
+ struct ubifs_debug_info *d = c->dbg;
+ struct ubifs_lp_stats lst;
+ long long free;
+ int freeable_cnt;
+
+ spin_lock(&c->space_lock);
+ freeable_cnt = c->freeable_cnt;
+ c->freeable_cnt = 0;
+ free = ubifs_get_free_space_nolock(c);
+ c->freeable_cnt = freeable_cnt;
+ spin_unlock(&c->space_lock);
+
+ if (free != d->saved_free) {
+ ubifs_err("free space changed from %lld to %lld",
+ d->saved_free, free);
+ goto out;
+ }
+
+ return 0;
+
+out:
+ ubifs_msg("saved lprops statistics dump");
+ ubifs_dump_lstats(&d->saved_lst);
+ ubifs_msg("saved budgeting info dump");
+ ubifs_dump_budg(c, &d->saved_bi);
+ ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt);
+ ubifs_msg("current lprops statistics dump");
+ ubifs_get_lp_stats(c, &lst);
+ ubifs_dump_lstats(&lst);
+ ubifs_msg("current budgeting info dump");
+ ubifs_dump_budg(c, &c->bi);
+ dump_stack();
+ return -EINVAL;
+}
+
+/**
* dbg_check_synced_i_size - check synchronized inode size.
+ * @c: UBIFS file-system description object
* @inode: inode to check
*
* If inode is clean, synchronized inode size has to be equivalent to current
@@ -799,12 +1064,12 @@ void dbg_dump_index(struct ubifs_info *c)
* has to be locked). Returns %0 if synchronized inode size if correct, and
* %-EINVAL if not.
*/
-int dbg_check_synced_i_size(struct inode *inode)
+int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode)
{
int err = 0;
struct ubifs_inode *ui = ubifs_inode(inode);
- if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+ if (!dbg_is_chk_gen(c))
return 0;
if (!S_ISREG(inode->i_mode))
return 0;
@@ -812,11 +1077,11 @@ int dbg_check_synced_i_size(struct inode *inode)
mutex_lock(&ui->ui_mutex);
spin_lock(&ui->ui_lock);
if (ui->ui_size != ui->synced_i_size && !ui->dirty) {
- ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode "
- "is clean", ui->ui_size, ui->synced_i_size);
+ ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode is clean",
+ ui->ui_size, ui->synced_i_size);
ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino,
inode->i_mode, i_size_read(inode));
- dbg_dump_stack();
+ dump_stack();
err = -EINVAL;
}
spin_unlock(&ui->ui_lock);
@@ -837,7 +1102,7 @@ int dbg_check_synced_i_size(struct inode *inode)
* Note, it is good idea to make sure the @dir->i_mutex is locked before
* calling this function.
*/
-int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir)
+int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
{
unsigned int nlink = 2;
union ubifs_key key;
@@ -845,7 +1110,7 @@ int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir)
struct qstr nm = { .name = NULL };
loff_t size = UBIFS_INO_NODE_SZ;
- if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+ if (!dbg_is_chk_gen(c))
return 0;
if (!S_ISDIR(dir->i_mode))
@@ -875,16 +1140,17 @@ int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir)
kfree(pdent);
if (i_size_read(dir) != size) {
- ubifs_err("directory inode %lu has size %llu, "
- "but calculated size is %llu", dir->i_ino,
- (unsigned long long)i_size_read(dir),
+ ubifs_err("directory inode %lu has size %llu, but calculated size is %llu",
+ dir->i_ino, (unsigned long long)i_size_read(dir),
(unsigned long long)size);
+ ubifs_dump_inode(c, dir);
dump_stack();
return -EINVAL;
}
if (dir->i_nlink != nlink) {
- ubifs_err("directory inode %lu has nlink %u, but calculated "
- "nlink is %u", dir->i_ino, dir->i_nlink, nlink);
+ ubifs_err("directory inode %lu has nlink %u, but calculated nlink is %u",
+ dir->i_ino, dir->i_nlink, nlink);
+ ubifs_dump_inode(c, dir);
dump_stack();
return -EINVAL;
}
@@ -911,6 +1177,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
int err, nlen1, nlen2, cmp;
struct ubifs_dent_node *dent1, *dent2;
union ubifs_key key;
+ char key_buf[DBG_KEY_BUF_LEN];
ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key));
dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS);
@@ -940,22 +1207,26 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
err = 1;
key_read(c, &dent1->key, &key);
if (keys_cmp(c, &zbr1->key, &key)) {
- dbg_err("1st entry at %d:%d has key %s", zbr1->lnum,
- zbr1->offs, DBGKEY(&key));
- dbg_err("but it should have key %s according to tnc",
- DBGKEY(&zbr1->key));
- dbg_dump_node(c, dent1);
- goto out_free;
+ ubifs_err("1st entry at %d:%d has key %s", zbr1->lnum,
+ zbr1->offs, dbg_snprintf_key(c, &key, key_buf,
+ DBG_KEY_BUF_LEN));
+ ubifs_err("but it should have key %s according to tnc",
+ dbg_snprintf_key(c, &zbr1->key, key_buf,
+ DBG_KEY_BUF_LEN));
+ ubifs_dump_node(c, dent1);
+ goto out_free;
}
key_read(c, &dent2->key, &key);
if (keys_cmp(c, &zbr2->key, &key)) {
- dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum,
- zbr1->offs, DBGKEY(&key));
- dbg_err("but it should have key %s according to tnc",
- DBGKEY(&zbr2->key));
- dbg_dump_node(c, dent2);
- goto out_free;
+ ubifs_err("2nd entry at %d:%d has key %s", zbr1->lnum,
+ zbr1->offs, dbg_snprintf_key(c, &key, key_buf,
+ DBG_KEY_BUF_LEN));
+ ubifs_err("but it should have key %s according to tnc",
+ dbg_snprintf_key(c, &zbr2->key, key_buf,
+ DBG_KEY_BUF_LEN));
+ ubifs_dump_node(c, dent2);
+ goto out_free;
}
nlen1 = le16_to_cpu(dent1->nlen);
@@ -967,15 +1238,15 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
goto out_free;
}
if (cmp == 0 && nlen1 == nlen2)
- dbg_err("2 xent/dent nodes with the same name");
+ ubifs_err("2 xent/dent nodes with the same name");
else
- dbg_err("bad order of colliding key %s",
- DBGKEY(&key));
+ ubifs_err("bad order of colliding key %s",
+ dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
- dbg_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs);
- dbg_dump_node(c, dent1);
- dbg_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs);
- dbg_dump_node(c, dent2);
+ ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs);
+ ubifs_dump_node(c, dent1);
+ ubifs_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs);
+ ubifs_dump_node(c, dent2);
out_free:
kfree(dent2);
@@ -1086,7 +1357,7 @@ static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr)
/*
* Make sure the last key in our znode is less or
- * equivalent than the the key in zbranch which goes
+ * equivalent than the key in the zbranch which goes
* after our pointing zbranch.
*/
cmp = keys_cmp(c, max,
@@ -1178,10 +1449,10 @@ static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr)
out:
ubifs_err("failed, error %d", err);
ubifs_msg("dump of the znode");
- dbg_dump_znode(c, znode);
+ ubifs_dump_znode(c, znode);
if (zp) {
ubifs_msg("dump of the parent znode");
- dbg_dump_znode(c, zp);
+ ubifs_dump_znode(c, zp);
}
dump_stack();
return -EINVAL;
@@ -1201,7 +1472,7 @@ int dbg_check_tnc(struct ubifs_info *c, int extra)
long clean_cnt = 0, dirty_cnt = 0;
int err, last;
- if (!(ubifs_chk_flags & UBIFS_CHK_TNC))
+ if (!dbg_is_chk_index(c))
return 0;
ubifs_assert(mutex_is_locked(&c->tnc_mutex));
@@ -1248,9 +1519,9 @@ int dbg_check_tnc(struct ubifs_info *c, int extra)
return err;
if (err) {
ubifs_msg("first znode");
- dbg_dump_znode(c, prev);
+ ubifs_dump_znode(c, prev);
ubifs_msg("second znode");
- dbg_dump_znode(c, znode);
+ ubifs_dump_znode(c, znode);
return -EINVAL;
}
}
@@ -1279,7 +1550,7 @@ int dbg_check_tnc(struct ubifs_info *c, int extra)
* @c: UBIFS file-system description object
* @leaf_cb: called for each leaf node
* @znode_cb: called for each indexing node
- * @priv: private date which is passed to callbacks
+ * @priv: private data which is passed to callbacks
*
* This function walks the UBIFS index and calls the @leaf_cb for each leaf
* node and @znode_cb for each indexing node. Returns zero in case of success
@@ -1337,9 +1608,9 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
if (znode_cb) {
err = znode_cb(c, znode, priv);
if (err) {
- ubifs_err("znode checking function returned "
- "error %d", err);
- dbg_dump_znode(c, znode);
+ ubifs_err("znode checking function returned error %d",
+ err);
+ ubifs_dump_znode(c, znode);
goto out_dump;
}
}
@@ -1348,9 +1619,7 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
zbr = &znode->zbranch[idx];
err = leaf_cb(c, zbr, priv);
if (err) {
- ubifs_err("leaf checking function "
- "returned error %d, for leaf "
- "at LEB %d:%d",
+ ubifs_err("leaf checking function returned error %d, for leaf at LEB %d:%d",
err, zbr->lnum, zbr->offs);
goto out_dump;
}
@@ -1407,7 +1676,7 @@ out_dump:
else
zbr = &c->zroot;
ubifs_msg("dump of znode at LEB %d:%d", zbr->lnum, zbr->offs);
- dbg_dump_znode(c, znode);
+ ubifs_dump_znode(c, znode);
out_unlock:
mutex_unlock(&c->tnc_mutex);
return err;
@@ -1448,7 +1717,7 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size)
int err;
long long calc = 0;
- if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ))
+ if (!dbg_is_chk_index(c))
return 0;
err = dbg_walk_index(c, NULL, add_size, &calc);
@@ -1458,8 +1727,8 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size)
}
if (calc != idx_size) {
- ubifs_err("index size check failed: calculated size is %lld, "
- "should be %lld", calc, idx_size);
+ ubifs_err("index size check failed: calculated size is %lld, should be %lld",
+ calc, idx_size);
dump_stack();
return -EINVAL;
}
@@ -1529,6 +1798,8 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
struct rb_node **p, *parent = NULL;
struct fsck_inode *fscki;
ino_t inum = key_inum_flash(c, &ino->key);
+ struct inode *inode;
+ struct ubifs_inode *ui;
p = &fsckd->inodes.rb_node;
while (*p) {
@@ -1544,7 +1815,7 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
if (inum > c->highest_inum) {
ubifs_err("too high inode number, max. is %lu",
- c->highest_inum);
+ (unsigned long)c->highest_inum);
return ERR_PTR(-EINVAL);
}
@@ -1552,19 +1823,46 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
if (!fscki)
return ERR_PTR(-ENOMEM);
+ inode = ilookup(c->vfs_sb, inum);
+
fscki->inum = inum;
- fscki->nlink = le32_to_cpu(ino->nlink);
- fscki->size = le64_to_cpu(ino->size);
- fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
- fscki->xattr_sz = le32_to_cpu(ino->xattr_size);
- fscki->xattr_nms = le32_to_cpu(ino->xattr_names);
- fscki->mode = le32_to_cpu(ino->mode);
+ /*
+ * If the inode is present in the VFS inode cache, use it instead of
+ * the on-flash inode which might be out-of-date. E.g., the size might
+ * be out-of-date. If we do not do this, the following may happen, for
+ * example:
+ * 1. A power cut happens
+ * 2. We mount the file-system R/O, the replay process fixes up the
+ * inode size in the VFS cache, but on on-flash.
+ * 3. 'check_leaf()' fails because it hits a data node beyond inode
+ * size.
+ */
+ if (!inode) {
+ fscki->nlink = le32_to_cpu(ino->nlink);
+ fscki->size = le64_to_cpu(ino->size);
+ fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
+ fscki->xattr_sz = le32_to_cpu(ino->xattr_size);
+ fscki->xattr_nms = le32_to_cpu(ino->xattr_names);
+ fscki->mode = le32_to_cpu(ino->mode);
+ } else {
+ ui = ubifs_inode(inode);
+ fscki->nlink = inode->i_nlink;
+ fscki->size = inode->i_size;
+ fscki->xattr_cnt = ui->xattr_cnt;
+ fscki->xattr_sz = ui->xattr_size;
+ fscki->xattr_nms = ui->xattr_names;
+ fscki->mode = inode->i_mode;
+ iput(inode);
+ }
+
if (S_ISDIR(fscki->mode)) {
fscki->calc_sz = UBIFS_INO_NODE_SZ;
fscki->calc_cnt = 2;
}
+
rb_link_node(&fscki->rb, parent, p);
rb_insert_color(&fscki->rb, &fsckd->inodes);
+
return fscki;
}
@@ -1623,16 +1921,18 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c,
ino_key_init(c, &key, inum);
err = ubifs_lookup_level0(c, &key, &znode, &n);
if (!err) {
- ubifs_err("inode %lu not found in index", inum);
+ ubifs_err("inode %lu not found in index", (unsigned long)inum);
return ERR_PTR(-ENOENT);
} else if (err < 0) {
- ubifs_err("error %d while looking up inode %lu", err, inum);
+ ubifs_err("error %d while looking up inode %lu",
+ err, (unsigned long)inum);
return ERR_PTR(err);
}
zbr = &znode->zbranch[n];
if (zbr->len < UBIFS_INO_NODE_SZ) {
- ubifs_err("bad node %lu node length %d", inum, zbr->len);
+ ubifs_err("bad node %lu node length %d",
+ (unsigned long)inum, zbr->len);
return ERR_PTR(-EINVAL);
}
@@ -1652,7 +1952,7 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c,
kfree(ino);
if (IS_ERR(fscki)) {
ubifs_err("error %ld while adding inode %lu node",
- PTR_ERR(fscki), inum);
+ PTR_ERR(fscki), (unsigned long)inum);
return fscki;
}
@@ -1740,8 +2040,8 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
fscki = read_add_inode(c, priv, inum);
if (IS_ERR(fscki)) {
err = PTR_ERR(fscki);
- ubifs_err("error %d while processing data node and "
- "trying to find inode node %lu", err, inum);
+ ubifs_err("error %d while processing data node and trying to find inode node %lu",
+ err, (unsigned long)inum);
goto out_dump;
}
@@ -1750,9 +2050,8 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
blk_offs <<= UBIFS_BLOCK_SHIFT;
blk_offs += le32_to_cpu(dn->size);
if (blk_offs > fscki->size) {
- ubifs_err("data node at LEB %d:%d is not within inode "
- "size %lld", zbr->lnum, zbr->offs,
- fscki->size);
+ ubifs_err("data node at LEB %d:%d is not within inode size %lld",
+ zbr->lnum, zbr->offs, fscki->size);
err = -EINVAL;
goto out_dump;
}
@@ -1773,8 +2072,8 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
fscki = read_add_inode(c, priv, inum);
if (IS_ERR(fscki)) {
err = PTR_ERR(fscki);
- ubifs_err("error %d while processing entry node and "
- "trying to find inode node %lu", err, inum);
+ ubifs_err("error %d while processing entry node and trying to find inode node %lu",
+ err, (unsigned long)inum);
goto out_dump;
}
@@ -1784,10 +2083,9 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
inum = key_inum_flash(c, &dent->key);
fscki1 = read_add_inode(c, priv, inum);
if (IS_ERR(fscki1)) {
- err = PTR_ERR(fscki);
- ubifs_err("error %d while processing entry node and "
- "trying to find parent inode node %lu",
- err, inum);
+ err = PTR_ERR(fscki1);
+ ubifs_err("error %d while processing entry node and trying to find parent inode node %lu",
+ err, (unsigned long)inum);
goto out_dump;
}
@@ -1810,7 +2108,7 @@ out:
out_dump:
ubifs_msg("dump of node at LEB %d:%d", zbr->lnum, zbr->offs);
- dbg_dump_node(c, node);
+ ubifs_dump_node(c, node);
out_free:
kfree(node);
return err;
@@ -1822,26 +2120,10 @@ out_free:
*/
static void free_inodes(struct fsck_data *fsckd)
{
- struct rb_node *this = fsckd->inodes.rb_node;
- struct fsck_inode *fscki;
+ struct fsck_inode *fscki, *n;
- while (this) {
- if (this->rb_left)
- this = this->rb_left;
- else if (this->rb_right)
- this = this->rb_right;
- else {
- fscki = rb_entry(this, struct fsck_inode, rb);
- this = rb_parent(this);
- if (this) {
- if (this->rb_left == &fscki->rb)
- this->rb_left = NULL;
- else
- this->rb_right = NULL;
- }
- kfree(fscki);
- }
- }
+ rbtree_postorder_for_each_entry_safe(fscki, n, &fsckd->inodes, rb)
+ kfree(fscki);
}
/**
@@ -1876,58 +2158,53 @@ static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd)
*/
if (fscki->inum != UBIFS_ROOT_INO &&
fscki->references != 1) {
- ubifs_err("directory inode %lu has %d "
- "direntries which refer it, but "
- "should be 1", fscki->inum,
+ ubifs_err("directory inode %lu has %d direntries which refer it, but should be 1",
+ (unsigned long)fscki->inum,
fscki->references);
goto out_dump;
}
if (fscki->inum == UBIFS_ROOT_INO &&
fscki->references != 0) {
- ubifs_err("root inode %lu has non-zero (%d) "
- "direntries which refer it",
- fscki->inum, fscki->references);
+ ubifs_err("root inode %lu has non-zero (%d) direntries which refer it",
+ (unsigned long)fscki->inum,
+ fscki->references);
goto out_dump;
}
if (fscki->calc_sz != fscki->size) {
- ubifs_err("directory inode %lu size is %lld, "
- "but calculated size is %lld",
- fscki->inum, fscki->size,
- fscki->calc_sz);
+ ubifs_err("directory inode %lu size is %lld, but calculated size is %lld",
+ (unsigned long)fscki->inum,
+ fscki->size, fscki->calc_sz);
goto out_dump;
}
if (fscki->calc_cnt != fscki->nlink) {
- ubifs_err("directory inode %lu nlink is %d, "
- "but calculated nlink is %d",
- fscki->inum, fscki->nlink,
- fscki->calc_cnt);
+ ubifs_err("directory inode %lu nlink is %d, but calculated nlink is %d",
+ (unsigned long)fscki->inum,
+ fscki->nlink, fscki->calc_cnt);
goto out_dump;
}
} else {
if (fscki->references != fscki->nlink) {
- ubifs_err("inode %lu nlink is %d, but "
- "calculated nlink is %d", fscki->inum,
+ ubifs_err("inode %lu nlink is %d, but calculated nlink is %d",
+ (unsigned long)fscki->inum,
fscki->nlink, fscki->references);
goto out_dump;
}
}
if (fscki->xattr_sz != fscki->calc_xsz) {
- ubifs_err("inode %lu has xattr size %u, but "
- "calculated size is %lld",
- fscki->inum, fscki->xattr_sz,
+ ubifs_err("inode %lu has xattr size %u, but calculated size is %lld",
+ (unsigned long)fscki->inum, fscki->xattr_sz,
fscki->calc_xsz);
goto out_dump;
}
if (fscki->xattr_cnt != fscki->calc_xcnt) {
- ubifs_err("inode %lu has %u xattrs, but "
- "calculated count is %lld", fscki->inum,
+ ubifs_err("inode %lu has %u xattrs, but calculated count is %lld",
+ (unsigned long)fscki->inum,
fscki->xattr_cnt, fscki->calc_xcnt);
goto out_dump;
}
if (fscki->xattr_nms != fscki->calc_xnms) {
- ubifs_err("inode %lu has xattr names' size %u, but "
- "calculated names' size is %lld",
- fscki->inum, fscki->xattr_nms,
+ ubifs_err("inode %lu has xattr names' size %u, but calculated names' size is %lld",
+ (unsigned long)fscki->inum, fscki->xattr_nms,
fscki->calc_xnms);
goto out_dump;
}
@@ -1940,11 +2217,12 @@ out_dump:
ino_key_init(c, &key, fscki->inum);
err = ubifs_lookup_level0(c, &key, &znode, &n);
if (!err) {
- ubifs_err("inode %lu not found in index", fscki->inum);
+ ubifs_err("inode %lu not found in index",
+ (unsigned long)fscki->inum);
return -ENOENT;
} else if (err < 0) {
ubifs_err("error %d while looking up inode %lu",
- err, fscki->inum);
+ err, (unsigned long)fscki->inum);
return err;
}
@@ -1962,8 +2240,8 @@ out_dump:
}
ubifs_msg("dump of the inode %lu sitting in LEB %d:%d",
- fscki->inum, zbr->lnum, zbr->offs);
- dbg_dump_node(c, ino);
+ (unsigned long)fscki->inum, zbr->lnum, zbr->offs);
+ ubifs_dump_node(c, ino);
kfree(ino);
return -EINVAL;
}
@@ -1986,7 +2264,7 @@ int dbg_check_filesystem(struct ubifs_info *c)
int err;
struct fsck_data fsckd;
- if (!(ubifs_chk_flags & UBIFS_CHK_FS))
+ if (!dbg_is_chk_fs(c))
return 0;
fsckd.inodes = RB_ROOT;
@@ -2008,282 +2286,815 @@ out_free:
return err;
}
-static int invocation_cnt;
-
-int dbg_force_in_the_gaps(void)
+/**
+ * dbg_check_data_nodes_order - check that list of data nodes is sorted.
+ * @c: UBIFS file-system description object
+ * @head: the list of nodes ('struct ubifs_scan_node' objects)
+ *
+ * This function returns zero if the list of data nodes is sorted correctly,
+ * and %-EINVAL if not.
+ */
+int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
{
- if (!dbg_force_in_the_gaps_enabled)
- return 0;
- /* Force in-the-gaps every 8th commit */
- return !((invocation_cnt++) & 0x7);
-}
+ struct list_head *cur;
+ struct ubifs_scan_node *sa, *sb;
-/* Failure mode for recovery testing */
+ if (!dbg_is_chk_gen(c))
+ return 0;
-#define chance(n, d) (simple_rand() <= (n) * 32768LL / (d))
+ for (cur = head->next; cur->next != head; cur = cur->next) {
+ ino_t inuma, inumb;
+ uint32_t blka, blkb;
-struct failure_mode_info {
- struct list_head list;
- struct ubifs_info *c;
-};
+ cond_resched();
+ sa = container_of(cur, struct ubifs_scan_node, list);
+ sb = container_of(cur->next, struct ubifs_scan_node, list);
-static LIST_HEAD(fmi_list);
-static DEFINE_SPINLOCK(fmi_lock);
+ if (sa->type != UBIFS_DATA_NODE) {
+ ubifs_err("bad node type %d", sa->type);
+ ubifs_dump_node(c, sa->node);
+ return -EINVAL;
+ }
+ if (sb->type != UBIFS_DATA_NODE) {
+ ubifs_err("bad node type %d", sb->type);
+ ubifs_dump_node(c, sb->node);
+ return -EINVAL;
+ }
-static unsigned int next;
+ inuma = key_inum(c, &sa->key);
+ inumb = key_inum(c, &sb->key);
-static int simple_rand(void)
-{
- if (next == 0)
- next = current->pid;
- next = next * 1103515245 + 12345;
- return (next >> 16) & 32767;
-}
+ if (inuma < inumb)
+ continue;
+ if (inuma > inumb) {
+ ubifs_err("larger inum %lu goes before inum %lu",
+ (unsigned long)inuma, (unsigned long)inumb);
+ goto error_dump;
+ }
-void dbg_failure_mode_registration(struct ubifs_info *c)
-{
- struct failure_mode_info *fmi;
+ blka = key_block(c, &sa->key);
+ blkb = key_block(c, &sb->key);
- fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS);
- if (!fmi) {
- dbg_err("Failed to register failure mode - no memory");
- return;
+ if (blka > blkb) {
+ ubifs_err("larger block %u goes before %u", blka, blkb);
+ goto error_dump;
+ }
+ if (blka == blkb) {
+ ubifs_err("two data nodes for the same block");
+ goto error_dump;
+ }
}
- fmi->c = c;
- spin_lock(&fmi_lock);
- list_add_tail(&fmi->list, &fmi_list);
- spin_unlock(&fmi_lock);
+
+ return 0;
+
+error_dump:
+ ubifs_dump_node(c, sa->node);
+ ubifs_dump_node(c, sb->node);
+ return -EINVAL;
}
-void dbg_failure_mode_deregistration(struct ubifs_info *c)
+/**
+ * dbg_check_nondata_nodes_order - check that list of data nodes is sorted.
+ * @c: UBIFS file-system description object
+ * @head: the list of nodes ('struct ubifs_scan_node' objects)
+ *
+ * This function returns zero if the list of non-data nodes is sorted correctly,
+ * and %-EINVAL if not.
+ */
+int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
{
- struct failure_mode_info *fmi, *tmp;
+ struct list_head *cur;
+ struct ubifs_scan_node *sa, *sb;
+
+ if (!dbg_is_chk_gen(c))
+ return 0;
+
+ for (cur = head->next; cur->next != head; cur = cur->next) {
+ ino_t inuma, inumb;
+ uint32_t hasha, hashb;
- spin_lock(&fmi_lock);
- list_for_each_entry_safe(fmi, tmp, &fmi_list, list)
- if (fmi->c == c) {
- list_del(&fmi->list);
- kfree(fmi);
+ cond_resched();
+ sa = container_of(cur, struct ubifs_scan_node, list);
+ sb = container_of(cur->next, struct ubifs_scan_node, list);
+
+ if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
+ sa->type != UBIFS_XENT_NODE) {
+ ubifs_err("bad node type %d", sa->type);
+ ubifs_dump_node(c, sa->node);
+ return -EINVAL;
+ }
+ if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
+ sa->type != UBIFS_XENT_NODE) {
+ ubifs_err("bad node type %d", sb->type);
+ ubifs_dump_node(c, sb->node);
+ return -EINVAL;
}
- spin_unlock(&fmi_lock);
-}
-static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc)
-{
- struct failure_mode_info *fmi;
+ if (sa->type != UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) {
+ ubifs_err("non-inode node goes before inode node");
+ goto error_dump;
+ }
- spin_lock(&fmi_lock);
- list_for_each_entry(fmi, &fmi_list, list)
- if (fmi->c->ubi == desc) {
- struct ubifs_info *c = fmi->c;
+ if (sa->type == UBIFS_INO_NODE && sb->type != UBIFS_INO_NODE)
+ continue;
- spin_unlock(&fmi_lock);
- return c;
+ if (sa->type == UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) {
+ /* Inode nodes are sorted in descending size order */
+ if (sa->len < sb->len) {
+ ubifs_err("smaller inode node goes first");
+ goto error_dump;
+ }
+ continue;
}
- spin_unlock(&fmi_lock);
- return NULL;
+
+ /*
+ * This is either a dentry or xentry, which should be sorted in
+ * ascending (parent ino, hash) order.
+ */
+ inuma = key_inum(c, &sa->key);
+ inumb = key_inum(c, &sb->key);
+
+ if (inuma < inumb)
+ continue;
+ if (inuma > inumb) {
+ ubifs_err("larger inum %lu goes before inum %lu",
+ (unsigned long)inuma, (unsigned long)inumb);
+ goto error_dump;
+ }
+
+ hasha = key_block(c, &sa->key);
+ hashb = key_block(c, &sb->key);
+
+ if (hasha > hashb) {
+ ubifs_err("larger hash %u goes before %u",
+ hasha, hashb);
+ goto error_dump;
+ }
+ }
+
+ return 0;
+
+error_dump:
+ ubifs_msg("dumping first node");
+ ubifs_dump_node(c, sa->node);
+ ubifs_msg("dumping second node");
+ ubifs_dump_node(c, sb->node);
+ return -EINVAL;
+ return 0;
}
-static int in_failure_mode(struct ubi_volume_desc *desc)
+static inline int chance(unsigned int n, unsigned int out_of)
{
- struct ubifs_info *c = dbg_find_info(desc);
+ return !!((prandom_u32() % out_of) + 1 <= n);
- if (c && dbg_failure_mode)
- return c->failure_mode;
- return 0;
}
-static int do_fail(struct ubi_volume_desc *desc, int lnum, int write)
+static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
{
- struct ubifs_info *c = dbg_find_info(desc);
+ struct ubifs_debug_info *d = c->dbg;
- if (!c || !dbg_failure_mode)
- return 0;
- if (c->failure_mode)
- return 1;
- if (!c->fail_cnt) {
- /* First call - decide delay to failure */
+ ubifs_assert(dbg_is_tst_rcvry(c));
+
+ if (!d->pc_cnt) {
+ /* First call - decide delay to the power cut */
if (chance(1, 2)) {
- unsigned int delay = 1 << (simple_rand() >> 11);
+ unsigned long delay;
if (chance(1, 2)) {
- c->fail_delay = 1;
- c->fail_timeout = jiffies +
- msecs_to_jiffies(delay);
- dbg_rcvry("failing after %ums", delay);
+ d->pc_delay = 1;
+ /* Fail withing 1 minute */
+ delay = prandom_u32() % 60000;
+ d->pc_timeout = jiffies;
+ d->pc_timeout += msecs_to_jiffies(delay);
+ ubifs_warn("failing after %lums", delay);
} else {
- c->fail_delay = 2;
- c->fail_cnt_max = delay;
- dbg_rcvry("failing after %u calls", delay);
+ d->pc_delay = 2;
+ delay = prandom_u32() % 10000;
+ /* Fail within 10000 operations */
+ d->pc_cnt_max = delay;
+ ubifs_warn("failing after %lu calls", delay);
}
}
- c->fail_cnt += 1;
+
+ d->pc_cnt += 1;
}
+
/* Determine if failure delay has expired */
- if (c->fail_delay == 1) {
- if (time_before(jiffies, c->fail_timeout))
+ if (d->pc_delay == 1 && time_before(jiffies, d->pc_timeout))
return 0;
- } else if (c->fail_delay == 2)
- if (c->fail_cnt++ < c->fail_cnt_max)
+ if (d->pc_delay == 2 && d->pc_cnt++ < d->pc_cnt_max)
return 0;
+
if (lnum == UBIFS_SB_LNUM) {
- if (write) {
- if (chance(1, 2))
- return 0;
- } else if (chance(19, 20))
+ if (write && chance(1, 2))
return 0;
- dbg_rcvry("failing in super block LEB %d", lnum);
+ if (chance(19, 20))
+ return 0;
+ ubifs_warn("failing in super block LEB %d", lnum);
} else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) {
if (chance(19, 20))
return 0;
- dbg_rcvry("failing in master LEB %d", lnum);
+ ubifs_warn("failing in master LEB %d", lnum);
} else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) {
- if (write) {
- if (chance(99, 100))
- return 0;
- } else if (chance(399, 400))
+ if (write && chance(99, 100))
+ return 0;
+ if (chance(399, 400))
return 0;
- dbg_rcvry("failing in log LEB %d", lnum);
+ ubifs_warn("failing in log LEB %d", lnum);
} else if (lnum >= c->lpt_first && lnum <= c->lpt_last) {
- if (write) {
- if (chance(7, 8))
- return 0;
- } else if (chance(19, 20))
+ if (write && chance(7, 8))
return 0;
- dbg_rcvry("failing in LPT LEB %d", lnum);
+ if (chance(19, 20))
+ return 0;
+ ubifs_warn("failing in LPT LEB %d", lnum);
} else if (lnum >= c->orph_first && lnum <= c->orph_last) {
- if (write) {
- if (chance(1, 2))
- return 0;
- } else if (chance(9, 10))
+ if (write && chance(1, 2))
+ return 0;
+ if (chance(9, 10))
return 0;
- dbg_rcvry("failing in orphan LEB %d", lnum);
+ ubifs_warn("failing in orphan LEB %d", lnum);
} else if (lnum == c->ihead_lnum) {
if (chance(99, 100))
return 0;
- dbg_rcvry("failing in index head LEB %d", lnum);
+ ubifs_warn("failing in index head LEB %d", lnum);
} else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) {
if (chance(9, 10))
return 0;
- dbg_rcvry("failing in GC head LEB %d", lnum);
+ ubifs_warn("failing in GC head LEB %d", lnum);
} else if (write && !RB_EMPTY_ROOT(&c->buds) &&
!ubifs_search_bud(c, lnum)) {
if (chance(19, 20))
return 0;
- dbg_rcvry("failing in non-bud LEB %d", lnum);
+ ubifs_warn("failing in non-bud LEB %d", lnum);
} else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND ||
c->cmt_state == COMMIT_RUNNING_REQUIRED) {
if (chance(999, 1000))
return 0;
- dbg_rcvry("failing in bud LEB %d commit running", lnum);
+ ubifs_warn("failing in bud LEB %d commit running", lnum);
} else {
if (chance(9999, 10000))
return 0;
- dbg_rcvry("failing in bud LEB %d commit not running", lnum);
+ ubifs_warn("failing in bud LEB %d commit not running", lnum);
}
- ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum);
- c->failure_mode = 1;
+
+ d->pc_happened = 1;
+ ubifs_warn("========== Power cut emulated ==========");
dump_stack();
return 1;
}
-static void cut_data(const void *buf, int len)
+static int corrupt_data(const struct ubifs_info *c, const void *buf,
+ unsigned int len)
{
- int flen, i;
+ unsigned int from, to, ffs = chance(1, 2);
unsigned char *p = (void *)buf;
- flen = (len * (long long)simple_rand()) >> 15;
- for (i = flen; i < len; i++)
- p[i] = 0xff;
-}
+ from = prandom_u32() % len;
+ /* Corruption span max to end of write unit */
+ to = min(len, ALIGN(from + 1, c->max_write_size));
-int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
- int len, int check)
-{
- if (in_failure_mode(desc))
- return -EIO;
- return ubi_leb_read(desc, lnum, buf, offset, len, check);
+ ubifs_warn("filled bytes %u-%u with %s", from, to - 1,
+ ffs ? "0xFFs" : "random data");
+
+ if (ffs)
+ memset(p + from, 0xFF, to - from);
+ else
+ prandom_bytes(p + from, to - from);
+
+ return to;
}
-int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
- int offset, int len, int dtype)
+int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf,
+ int offs, int len)
{
- int err;
+ int err, failing;
+
+ if (c->dbg->pc_happened)
+ return -EROFS;
- if (in_failure_mode(desc))
- return -EIO;
- if (do_fail(desc, lnum, 1))
- cut_data(buf, len);
- err = ubi_leb_write(desc, lnum, buf, offset, len, dtype);
+ failing = power_cut_emulated(c, lnum, 1);
+ if (failing) {
+ len = corrupt_data(c, buf, len);
+ ubifs_warn("actually write %d bytes to LEB %d:%d (the buffer was corrupted)",
+ len, lnum, offs);
+ }
+ err = ubi_leb_write(c->ubi, lnum, buf, offs, len);
if (err)
return err;
- if (in_failure_mode(desc))
- return -EIO;
+ if (failing)
+ return -EROFS;
return 0;
}
-int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
- int len, int dtype)
+int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf,
+ int len)
{
int err;
- if (do_fail(desc, lnum, 1))
- return -EIO;
- err = ubi_leb_change(desc, lnum, buf, len, dtype);
+ if (c->dbg->pc_happened)
+ return -EROFS;
+ if (power_cut_emulated(c, lnum, 1))
+ return -EROFS;
+ err = ubi_leb_change(c->ubi, lnum, buf, len);
if (err)
return err;
- if (do_fail(desc, lnum, 1))
- return -EIO;
+ if (power_cut_emulated(c, lnum, 1))
+ return -EROFS;
return 0;
}
-int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum)
+int dbg_leb_unmap(struct ubifs_info *c, int lnum)
{
int err;
- if (do_fail(desc, lnum, 0))
- return -EIO;
- err = ubi_leb_erase(desc, lnum);
+ if (c->dbg->pc_happened)
+ return -EROFS;
+ if (power_cut_emulated(c, lnum, 0))
+ return -EROFS;
+ err = ubi_leb_unmap(c->ubi, lnum);
if (err)
return err;
- if (do_fail(desc, lnum, 0))
- return -EIO;
+ if (power_cut_emulated(c, lnum, 0))
+ return -EROFS;
return 0;
}
-int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum)
+int dbg_leb_map(struct ubifs_info *c, int lnum)
{
int err;
- if (do_fail(desc, lnum, 0))
- return -EIO;
- err = ubi_leb_unmap(desc, lnum);
+ if (c->dbg->pc_happened)
+ return -EROFS;
+ if (power_cut_emulated(c, lnum, 0))
+ return -EROFS;
+ err = ubi_leb_map(c->ubi, lnum);
if (err)
return err;
- if (do_fail(desc, lnum, 0))
- return -EIO;
+ if (power_cut_emulated(c, lnum, 0))
+ return -EROFS;
+ return 0;
+}
+
+/*
+ * Root directory for UBIFS stuff in debugfs. Contains sub-directories which
+ * contain the stuff specific to particular file-system mounts.
+ */
+static struct dentry *dfs_rootdir;
+
+static int dfs_file_open(struct inode *inode, struct file *file)
+{
+ file->private_data = inode->i_private;
+ return nonseekable_open(inode, file);
+}
+
+/**
+ * provide_user_output - provide output to the user reading a debugfs file.
+ * @val: boolean value for the answer
+ * @u: the buffer to store the answer at
+ * @count: size of the buffer
+ * @ppos: position in the @u output buffer
+ *
+ * This is a simple helper function which stores @val boolean value in the user
+ * buffer when the user reads one of UBIFS debugfs files. Returns amount of
+ * bytes written to @u in case of success and a negative error code in case of
+ * failure.
+ */
+static int provide_user_output(int val, char __user *u, size_t count,
+ loff_t *ppos)
+{
+ char buf[3];
+
+ if (val)
+ buf[0] = '1';
+ else
+ buf[0] = '0';
+ buf[1] = '\n';
+ buf[2] = 0x00;
+
+ return simple_read_from_buffer(u, count, ppos, buf, 2);
+}
+
+static ssize_t dfs_file_read(struct file *file, char __user *u, size_t count,
+ loff_t *ppos)
+{
+ struct dentry *dent = file->f_path.dentry;
+ struct ubifs_info *c = file->private_data;
+ struct ubifs_debug_info *d = c->dbg;
+ int val;
+
+ if (dent == d->dfs_chk_gen)
+ val = d->chk_gen;
+ else if (dent == d->dfs_chk_index)
+ val = d->chk_index;
+ else if (dent == d->dfs_chk_orph)
+ val = d->chk_orph;
+ else if (dent == d->dfs_chk_lprops)
+ val = d->chk_lprops;
+ else if (dent == d->dfs_chk_fs)
+ val = d->chk_fs;
+ else if (dent == d->dfs_tst_rcvry)
+ val = d->tst_rcvry;
+ else if (dent == d->dfs_ro_error)
+ val = c->ro_error;
+ else
+ return -EINVAL;
+
+ return provide_user_output(val, u, count, ppos);
+}
+
+/**
+ * interpret_user_input - interpret user debugfs file input.
+ * @u: user-provided buffer with the input
+ * @count: buffer size
+ *
+ * This is a helper function which interpret user input to a boolean UBIFS
+ * debugfs file. Returns %0 or %1 in case of success and a negative error code
+ * in case of failure.
+ */
+static int interpret_user_input(const char __user *u, size_t count)
+{
+ size_t buf_size;
+ char buf[8];
+
+ buf_size = min_t(size_t, count, (sizeof(buf) - 1));
+ if (copy_from_user(buf, u, buf_size))
+ return -EFAULT;
+
+ if (buf[0] == '1')
+ return 1;
+ else if (buf[0] == '0')
+ return 0;
+
+ return -EINVAL;
+}
+
+static ssize_t dfs_file_write(struct file *file, const char __user *u,
+ size_t count, loff_t *ppos)
+{
+ struct ubifs_info *c = file->private_data;
+ struct ubifs_debug_info *d = c->dbg;
+ struct dentry *dent = file->f_path.dentry;
+ int val;
+
+ /*
+ * TODO: this is racy - the file-system might have already been
+ * unmounted and we'd oops in this case. The plan is to fix it with
+ * help of 'iterate_supers_type()' which we should have in v3.0: when
+ * a debugfs opened, we rember FS's UUID in file->private_data. Then
+ * whenever we access the FS via a debugfs file, we iterate all UBIFS
+ * superblocks and fine the one with the same UUID, and take the
+ * locking right.
+ *
+ * The other way to go suggested by Al Viro is to create a separate
+ * 'ubifs-debug' file-system instead.
+ */
+ if (file->f_path.dentry == d->dfs_dump_lprops) {
+ ubifs_dump_lprops(c);
+ return count;
+ }
+ if (file->f_path.dentry == d->dfs_dump_budg) {
+ ubifs_dump_budg(c, &c->bi);
+ return count;
+ }
+ if (file->f_path.dentry == d->dfs_dump_tnc) {
+ mutex_lock(&c->tnc_mutex);
+ ubifs_dump_tnc(c);
+ mutex_unlock(&c->tnc_mutex);
+ return count;
+ }
+
+ val = interpret_user_input(u, count);
+ if (val < 0)
+ return val;
+
+ if (dent == d->dfs_chk_gen)
+ d->chk_gen = val;
+ else if (dent == d->dfs_chk_index)
+ d->chk_index = val;
+ else if (dent == d->dfs_chk_orph)
+ d->chk_orph = val;
+ else if (dent == d->dfs_chk_lprops)
+ d->chk_lprops = val;
+ else if (dent == d->dfs_chk_fs)
+ d->chk_fs = val;
+ else if (dent == d->dfs_tst_rcvry)
+ d->tst_rcvry = val;
+ else if (dent == d->dfs_ro_error)
+ c->ro_error = !!val;
+ else
+ return -EINVAL;
+
+ return count;
+}
+
+static const struct file_operations dfs_fops = {
+ .open = dfs_file_open,
+ .read = dfs_file_read,
+ .write = dfs_file_write,
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+};
+
+/**
+ * dbg_debugfs_init_fs - initialize debugfs for UBIFS instance.
+ * @c: UBIFS file-system description object
+ *
+ * This function creates all debugfs files for this instance of UBIFS. Returns
+ * zero in case of success and a negative error code in case of failure.
+ *
+ * Note, the only reason we have not merged this function with the
+ * 'ubifs_debugging_init()' function is because it is better to initialize
+ * debugfs interfaces at the very end of the mount process, and remove them at
+ * the very beginning of the mount process.
+ */
+int dbg_debugfs_init_fs(struct ubifs_info *c)
+{
+ int err, n;
+ const char *fname;
+ struct dentry *dent;
+ struct ubifs_debug_info *d = c->dbg;
+
+ if (!IS_ENABLED(CONFIG_DEBUG_FS))
+ return 0;
+
+ n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME,
+ c->vi.ubi_num, c->vi.vol_id);
+ if (n == UBIFS_DFS_DIR_LEN) {
+ /* The array size is too small */
+ fname = UBIFS_DFS_DIR_NAME;
+ dent = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
+ fname = d->dfs_dir_name;
+ dent = debugfs_create_dir(fname, dfs_rootdir);
+ if (IS_ERR_OR_NULL(dent))
+ goto out;
+ d->dfs_dir = dent;
+
+ fname = "dump_lprops";
+ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ d->dfs_dump_lprops = dent;
+
+ fname = "dump_budg";
+ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ d->dfs_dump_budg = dent;
+
+ fname = "dump_tnc";
+ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ d->dfs_dump_tnc = dent;
+
+ fname = "chk_general";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+ &dfs_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ d->dfs_chk_gen = dent;
+
+ fname = "chk_index";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+ &dfs_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ d->dfs_chk_index = dent;
+
+ fname = "chk_orphans";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+ &dfs_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ d->dfs_chk_orph = dent;
+
+ fname = "chk_lprops";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+ &dfs_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ d->dfs_chk_lprops = dent;
+
+ fname = "chk_fs";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+ &dfs_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ d->dfs_chk_fs = dent;
+
+ fname = "tst_recovery";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+ &dfs_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ d->dfs_tst_rcvry = dent;
+
+ fname = "ro_error";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+ &dfs_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ d->dfs_ro_error = dent;
+
return 0;
+
+out_remove:
+ debugfs_remove_recursive(d->dfs_dir);
+out:
+ err = dent ? PTR_ERR(dent) : -ENODEV;
+ ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n",
+ fname, err);
+ return err;
+}
+
+/**
+ * dbg_debugfs_exit_fs - remove all debugfs files.
+ * @c: UBIFS file-system description object
+ */
+void dbg_debugfs_exit_fs(struct ubifs_info *c)
+{
+ if (IS_ENABLED(CONFIG_DEBUG_FS))
+ debugfs_remove_recursive(c->dbg->dfs_dir);
+}
+
+struct ubifs_global_debug_info ubifs_dbg;
+
+static struct dentry *dfs_chk_gen;
+static struct dentry *dfs_chk_index;
+static struct dentry *dfs_chk_orph;
+static struct dentry *dfs_chk_lprops;
+static struct dentry *dfs_chk_fs;
+static struct dentry *dfs_tst_rcvry;
+
+static ssize_t dfs_global_file_read(struct file *file, char __user *u,
+ size_t count, loff_t *ppos)
+{
+ struct dentry *dent = file->f_path.dentry;
+ int val;
+
+ if (dent == dfs_chk_gen)
+ val = ubifs_dbg.chk_gen;
+ else if (dent == dfs_chk_index)
+ val = ubifs_dbg.chk_index;
+ else if (dent == dfs_chk_orph)
+ val = ubifs_dbg.chk_orph;
+ else if (dent == dfs_chk_lprops)
+ val = ubifs_dbg.chk_lprops;
+ else if (dent == dfs_chk_fs)
+ val = ubifs_dbg.chk_fs;
+ else if (dent == dfs_tst_rcvry)
+ val = ubifs_dbg.tst_rcvry;
+ else
+ return -EINVAL;
+
+ return provide_user_output(val, u, count, ppos);
}
-int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum)
+static ssize_t dfs_global_file_write(struct file *file, const char __user *u,
+ size_t count, loff_t *ppos)
{
- if (in_failure_mode(desc))
- return -EIO;
- return ubi_is_mapped(desc, lnum);
+ struct dentry *dent = file->f_path.dentry;
+ int val;
+
+ val = interpret_user_input(u, count);
+ if (val < 0)
+ return val;
+
+ if (dent == dfs_chk_gen)
+ ubifs_dbg.chk_gen = val;
+ else if (dent == dfs_chk_index)
+ ubifs_dbg.chk_index = val;
+ else if (dent == dfs_chk_orph)
+ ubifs_dbg.chk_orph = val;
+ else if (dent == dfs_chk_lprops)
+ ubifs_dbg.chk_lprops = val;
+ else if (dent == dfs_chk_fs)
+ ubifs_dbg.chk_fs = val;
+ else if (dent == dfs_tst_rcvry)
+ ubifs_dbg.tst_rcvry = val;
+ else
+ return -EINVAL;
+
+ return count;
}
-int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
+static const struct file_operations dfs_global_fops = {
+ .read = dfs_global_file_read,
+ .write = dfs_global_file_write,
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+};
+
+/**
+ * dbg_debugfs_init - initialize debugfs file-system.
+ *
+ * UBIFS uses debugfs file-system to expose various debugging knobs to
+ * user-space. This function creates "ubifs" directory in the debugfs
+ * file-system. Returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+int dbg_debugfs_init(void)
{
int err;
+ const char *fname;
+ struct dentry *dent;
+
+ if (!IS_ENABLED(CONFIG_DEBUG_FS))
+ return 0;
+
+ fname = "ubifs";
+ dent = debugfs_create_dir(fname, NULL);
+ if (IS_ERR_OR_NULL(dent))
+ goto out;
+ dfs_rootdir = dent;
+
+ fname = "chk_general";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
+ &dfs_global_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ dfs_chk_gen = dent;
+
+ fname = "chk_index";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
+ &dfs_global_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ dfs_chk_index = dent;
+
+ fname = "chk_orphans";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
+ &dfs_global_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ dfs_chk_orph = dent;
+
+ fname = "chk_lprops";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
+ &dfs_global_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ dfs_chk_lprops = dent;
+
+ fname = "chk_fs";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
+ &dfs_global_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ dfs_chk_fs = dent;
+
+ fname = "tst_recovery";
+ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
+ &dfs_global_fops);
+ if (IS_ERR_OR_NULL(dent))
+ goto out_remove;
+ dfs_tst_rcvry = dent;
- if (do_fail(desc, lnum, 0))
- return -EIO;
- err = ubi_leb_map(desc, lnum, dtype);
- if (err)
- return err;
- if (do_fail(desc, lnum, 0))
- return -EIO;
return 0;
+
+out_remove:
+ debugfs_remove_recursive(dfs_rootdir);
+out:
+ err = dent ? PTR_ERR(dent) : -ENODEV;
+ ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n",
+ fname, err);
+ return err;
}
-#endif /* CONFIG_UBIFS_FS_DEBUG */
+/**
+ * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system.
+ */
+void dbg_debugfs_exit(void)
+{
+ if (IS_ENABLED(CONFIG_DEBUG_FS))
+ debugfs_remove_recursive(dfs_rootdir);
+}
+
+/**
+ * ubifs_debugging_init - initialize UBIFS debugging.
+ * @c: UBIFS file-system description object
+ *
+ * This function initializes debugging-related data for the file system.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+int ubifs_debugging_init(struct ubifs_info *c)
+{
+ c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL);
+ if (!c->dbg)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * ubifs_debugging_exit - free debugging data.
+ * @c: UBIFS file-system description object
+ */
+void ubifs_debugging_exit(struct ubifs_info *c)
+{
+ kfree(c->dbg);
+}
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 3c4f1e93c9e..e03d5179769 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -23,381 +23,293 @@
#ifndef __UBIFS_DEBUG_H__
#define __UBIFS_DEBUG_H__
-#ifdef CONFIG_UBIFS_FS_DEBUG
+/* Checking helper functions */
+typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
+ struct ubifs_zbranch *zbr, void *priv);
+typedef int (*dbg_znode_callback)(struct ubifs_info *c,
+ struct ubifs_znode *znode, void *priv);
-#define UBIFS_DBG(op) op
+/*
+ * The UBIFS debugfs directory name pattern and maximum name length (3 for "ubi"
+ * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte.
+ */
+#define UBIFS_DFS_DIR_NAME "ubi%d_%d"
+#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1)
+
+/**
+ * ubifs_debug_info - per-FS debugging information.
+ * @old_zroot: old index root - used by 'dbg_check_old_index()'
+ * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
+ * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
+ *
+ * @pc_happened: non-zero if an emulated power cut happened
+ * @pc_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls
+ * @pc_timeout: time in jiffies when delay of failure mode expires
+ * @pc_cnt: current number of calls to failure mode I/O functions
+ * @pc_cnt_max: number of calls by which to delay failure mode
+ *
+ * @chk_lpt_sz: used by LPT tree size checker
+ * @chk_lpt_sz2: used by LPT tree size checker
+ * @chk_lpt_wastage: used by LPT tree size checker
+ * @chk_lpt_lebs: used by LPT tree size checker
+ * @new_nhead_offs: used by LPT tree size checker
+ * @new_ihead_lnum: used by debugging to check @c->ihead_lnum
+ * @new_ihead_offs: used by debugging to check @c->ihead_offs
+ *
+ * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()')
+ * @saved_bi: saved budgeting information
+ * @saved_free: saved amount of free space
+ * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt
+ *
+ * @chk_gen: if general extra checks are enabled
+ * @chk_index: if index xtra checks are enabled
+ * @chk_orph: if orphans extra checks are enabled
+ * @chk_lprops: if lprops extra checks are enabled
+ * @chk_fs: if UBIFS contents extra checks are enabled
+ * @tst_rcvry: if UBIFS recovery testing mode enabled
+ *
+ * @dfs_dir_name: name of debugfs directory containing this file-system's files
+ * @dfs_dir: direntry object of the file-system debugfs directory
+ * @dfs_dump_lprops: "dump lprops" debugfs knob
+ * @dfs_dump_budg: "dump budgeting information" debugfs knob
+ * @dfs_dump_tnc: "dump TNC" debugfs knob
+ * @dfs_chk_gen: debugfs knob to enable UBIFS general extra checks
+ * @dfs_chk_index: debugfs knob to enable UBIFS index extra checks
+ * @dfs_chk_orph: debugfs knob to enable UBIFS orphans extra checks
+ * @dfs_chk_lprops: debugfs knob to enable UBIFS LEP properties extra checks
+ * @dfs_chk_fs: debugfs knob to enable UBIFS contents extra checks
+ * @dfs_tst_rcvry: debugfs knob to enable UBIFS recovery testing
+ * @dfs_ro_error: debugfs knob to switch UBIFS to R/O mode (different to
+ * re-mounting to R/O mode because it does not flush any buffers
+ * and UBIFS just starts returning -EROFS on all write
+ * operations)
+ */
+struct ubifs_debug_info {
+ struct ubifs_zbranch old_zroot;
+ int old_zroot_level;
+ unsigned long long old_zroot_sqnum;
+
+ int pc_happened;
+ int pc_delay;
+ unsigned long pc_timeout;
+ unsigned int pc_cnt;
+ unsigned int pc_cnt_max;
+
+ long long chk_lpt_sz;
+ long long chk_lpt_sz2;
+ long long chk_lpt_wastage;
+ int chk_lpt_lebs;
+ int new_nhead_offs;
+ int new_ihead_lnum;
+ int new_ihead_offs;
+
+ struct ubifs_lp_stats saved_lst;
+ struct ubifs_budg_info saved_bi;
+ long long saved_free;
+ int saved_idx_gc_cnt;
+
+ unsigned int chk_gen:1;
+ unsigned int chk_index:1;
+ unsigned int chk_orph:1;
+ unsigned int chk_lprops:1;
+ unsigned int chk_fs:1;
+ unsigned int tst_rcvry:1;
+
+ char dfs_dir_name[UBIFS_DFS_DIR_LEN + 1];
+ struct dentry *dfs_dir;
+ struct dentry *dfs_dump_lprops;
+ struct dentry *dfs_dump_budg;
+ struct dentry *dfs_dump_tnc;
+ struct dentry *dfs_chk_gen;
+ struct dentry *dfs_chk_index;
+ struct dentry *dfs_chk_orph;
+ struct dentry *dfs_chk_lprops;
+ struct dentry *dfs_chk_fs;
+ struct dentry *dfs_tst_rcvry;
+ struct dentry *dfs_ro_error;
+};
-#define ubifs_assert(expr) do { \
+/**
+ * ubifs_global_debug_info - global (not per-FS) UBIFS debugging information.
+ *
+ * @chk_gen: if general extra checks are enabled
+ * @chk_index: if index xtra checks are enabled
+ * @chk_orph: if orphans extra checks are enabled
+ * @chk_lprops: if lprops extra checks are enabled
+ * @chk_fs: if UBIFS contents extra checks are enabled
+ * @tst_rcvry: if UBIFS recovery testing mode enabled
+ */
+struct ubifs_global_debug_info {
+ unsigned int chk_gen:1;
+ unsigned int chk_index:1;
+ unsigned int chk_orph:1;
+ unsigned int chk_lprops:1;
+ unsigned int chk_fs:1;
+ unsigned int tst_rcvry:1;
+};
+
+#define ubifs_assert(expr) do { \
if (unlikely(!(expr))) { \
- printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \
+ pr_crit("UBIFS assert failed in %s at %u (pid %d)\n", \
__func__, __LINE__, current->pid); \
- dbg_dump_stack(); \
+ dump_stack(); \
} \
} while (0)
#define ubifs_assert_cmt_locked(c) do { \
if (unlikely(down_write_trylock(&(c)->commit_sem))) { \
up_write(&(c)->commit_sem); \
- printk(KERN_CRIT "commit lock is not locked!\n"); \
+ pr_crit("commit lock is not locked!\n"); \
ubifs_assert(0); \
} \
} while (0)
-#define dbg_dump_stack() do { \
- if (!dbg_failure_mode) \
- dump_stack(); \
-} while (0)
-
-/* Generic debugging messages */
-#define dbg_msg(fmt, ...) do { \
- spin_lock(&dbg_lock); \
- printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \
- __func__, ##__VA_ARGS__); \
- spin_unlock(&dbg_lock); \
-} while (0)
-
-#define dbg_do_msg(typ, fmt, ...) do { \
- if (ubifs_msg_flags & typ) \
- dbg_msg(fmt, ##__VA_ARGS__); \
-} while (0)
+#define ubifs_dbg_msg(type, fmt, ...) \
+ pr_debug("UBIFS DBG " type " (pid %d): " fmt "\n", current->pid, \
+ ##__VA_ARGS__)
-#define dbg_err(fmt, ...) do { \
- spin_lock(&dbg_lock); \
- ubifs_err(fmt, ##__VA_ARGS__); \
- spin_unlock(&dbg_lock); \
+#define DBG_KEY_BUF_LEN 48
+#define ubifs_dbg_msg_key(type, key, fmt, ...) do { \
+ char __tmp_key_buf[DBG_KEY_BUF_LEN]; \
+ pr_debug("UBIFS DBG " type " (pid %d): " fmt "%s\n", current->pid, \
+ ##__VA_ARGS__, \
+ dbg_snprintf_key(c, key, __tmp_key_buf, DBG_KEY_BUF_LEN)); \
} while (0)
-const char *dbg_key_str0(const struct ubifs_info *c,
- const union ubifs_key *key);
-const char *dbg_key_str1(const struct ubifs_info *c,
- const union ubifs_key *key);
-
-/*
- * DBGKEY macros require dbg_lock to be held, which it is in the dbg message
- * macros.
- */
-#define DBGKEY(key) dbg_key_str0(c, (key))
-#define DBGKEY1(key) dbg_key_str1(c, (key))
-
/* General messages */
-#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__)
-
+#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__)
/* Additional journal messages */
-#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__)
-
+#define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__)
+#define dbg_jnlk(key, fmt, ...) \
+ ubifs_dbg_msg_key("jnl", key, fmt, ##__VA_ARGS__)
/* Additional TNC messages */
-#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__)
-
+#define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__)
+#define dbg_tnck(key, fmt, ...) \
+ ubifs_dbg_msg_key("tnc", key, fmt, ##__VA_ARGS__)
/* Additional lprops messages */
-#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__)
-
+#define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__)
/* Additional LEB find messages */
-#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__)
-
+#define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__)
/* Additional mount messages */
-#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__)
-
+#define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__)
+#define dbg_mntk(key, fmt, ...) \
+ ubifs_dbg_msg_key("mnt", key, fmt, ##__VA_ARGS__)
/* Additional I/O messages */
-#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__)
-
+#define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__)
/* Additional commit messages */
-#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__)
-
+#define dbg_cmt(fmt, ...) ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__)
/* Additional budgeting messages */
-#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__)
-
+#define dbg_budg(fmt, ...) ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__)
/* Additional log messages */
-#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__)
-
+#define dbg_log(fmt, ...) ubifs_dbg_msg("log", fmt, ##__VA_ARGS__)
/* Additional gc messages */
-#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__)
-
+#define dbg_gc(fmt, ...) ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__)
/* Additional scan messages */
-#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__)
-
+#define dbg_scan(fmt, ...) ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__)
/* Additional recovery messages */
-#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__)
-
-/*
- * Debugging message type flags (must match msg_type_names in debug.c).
- *
- * UBIFS_MSG_GEN: general messages
- * UBIFS_MSG_JNL: journal messages
- * UBIFS_MSG_MNT: mount messages
- * UBIFS_MSG_CMT: commit messages
- * UBIFS_MSG_FIND: LEB find messages
- * UBIFS_MSG_BUDG: budgeting messages
- * UBIFS_MSG_GC: garbage collection messages
- * UBIFS_MSG_TNC: TNC messages
- * UBIFS_MSG_LP: lprops messages
- * UBIFS_MSG_IO: I/O messages
- * UBIFS_MSG_LOG: log messages
- * UBIFS_MSG_SCAN: scan messages
- * UBIFS_MSG_RCVRY: recovery messages
- */
-enum {
- UBIFS_MSG_GEN = 0x1,
- UBIFS_MSG_JNL = 0x2,
- UBIFS_MSG_MNT = 0x4,
- UBIFS_MSG_CMT = 0x8,
- UBIFS_MSG_FIND = 0x10,
- UBIFS_MSG_BUDG = 0x20,
- UBIFS_MSG_GC = 0x40,
- UBIFS_MSG_TNC = 0x80,
- UBIFS_MSG_LP = 0x100,
- UBIFS_MSG_IO = 0x200,
- UBIFS_MSG_LOG = 0x400,
- UBIFS_MSG_SCAN = 0x800,
- UBIFS_MSG_RCVRY = 0x1000,
-};
-
-/* Debugging message type flags for each default debug message level */
-#define UBIFS_MSG_LVL_0 0
-#define UBIFS_MSG_LVL_1 0x1
-#define UBIFS_MSG_LVL_2 0x7f
-#define UBIFS_MSG_LVL_3 0xffff
-
-/*
- * Debugging check flags (must match chk_names in debug.c).
- *
- * UBIFS_CHK_GEN: general checks
- * UBIFS_CHK_TNC: check TNC
- * UBIFS_CHK_IDX_SZ: check index size
- * UBIFS_CHK_ORPH: check orphans
- * UBIFS_CHK_OLD_IDX: check the old index
- * UBIFS_CHK_LPROPS: check lprops
- * UBIFS_CHK_FS: check the file-system
- */
-enum {
- UBIFS_CHK_GEN = 0x1,
- UBIFS_CHK_TNC = 0x2,
- UBIFS_CHK_IDX_SZ = 0x4,
- UBIFS_CHK_ORPH = 0x8,
- UBIFS_CHK_OLD_IDX = 0x10,
- UBIFS_CHK_LPROPS = 0x20,
- UBIFS_CHK_FS = 0x40,
-};
-
-/*
- * Special testing flags (must match tst_names in debug.c).
- *
- * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method
- * UBIFS_TST_RCVRY: failure mode for recovery testing
- */
-enum {
- UBIFS_TST_FORCE_IN_THE_GAPS = 0x2,
- UBIFS_TST_RCVRY = 0x4,
-};
+#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
-#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1
-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1
-#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2
-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2
-#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3
-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3
-#else
-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0
-#endif
+extern struct ubifs_global_debug_info ubifs_dbg;
-#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS
-#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff
-#else
-#define UBIFS_CHK_FLAGS_DEFAULT 0
-#endif
-
-extern spinlock_t dbg_lock;
+static inline int dbg_is_chk_gen(const struct ubifs_info *c)
+{
+ return !!(ubifs_dbg.chk_gen || c->dbg->chk_gen);
+}
+static inline int dbg_is_chk_index(const struct ubifs_info *c)
+{
+ return !!(ubifs_dbg.chk_index || c->dbg->chk_index);
+}
+static inline int dbg_is_chk_orph(const struct ubifs_info *c)
+{
+ return !!(ubifs_dbg.chk_orph || c->dbg->chk_orph);
+}
+static inline int dbg_is_chk_lprops(const struct ubifs_info *c)
+{
+ return !!(ubifs_dbg.chk_lprops || c->dbg->chk_lprops);
+}
+static inline int dbg_is_chk_fs(const struct ubifs_info *c)
+{
+ return !!(ubifs_dbg.chk_fs || c->dbg->chk_fs);
+}
+static inline int dbg_is_tst_rcvry(const struct ubifs_info *c)
+{
+ return !!(ubifs_dbg.tst_rcvry || c->dbg->tst_rcvry);
+}
+static inline int dbg_is_power_cut(const struct ubifs_info *c)
+{
+ return !!c->dbg->pc_happened;
+}
-extern unsigned int ubifs_msg_flags;
-extern unsigned int ubifs_chk_flags;
-extern unsigned int ubifs_tst_flags;
+int ubifs_debugging_init(struct ubifs_info *c);
+void ubifs_debugging_exit(struct ubifs_info *c);
/* Dump functions */
-
const char *dbg_ntype(int type);
const char *dbg_cstate(int cmt_state);
+const char *dbg_jhead(int jhead);
const char *dbg_get_key_dump(const struct ubifs_info *c,
const union ubifs_key *key);
-void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode);
-void dbg_dump_node(const struct ubifs_info *c, const void *node);
-void dbg_dump_budget_req(const struct ubifs_budget_req *req);
-void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
-void dbg_dump_budg(struct ubifs_info *c);
-void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp);
-void dbg_dump_lprops(struct ubifs_info *c);
-void dbg_dump_leb(const struct ubifs_info *c, int lnum);
-void dbg_dump_znode(const struct ubifs_info *c,
- const struct ubifs_znode *znode);
-void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat);
-void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
- struct ubifs_nnode *parent, int iip);
-void dbg_dump_tnc(struct ubifs_info *c);
-void dbg_dump_index(struct ubifs_info *c);
-
-/* Checking helper functions */
-
-typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
- struct ubifs_zbranch *zbr, void *priv);
-typedef int (*dbg_znode_callback)(struct ubifs_info *c,
- struct ubifs_znode *znode, void *priv);
+const char *dbg_snprintf_key(const struct ubifs_info *c,
+ const union ubifs_key *key, char *buffer, int len);
+void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode);
+void ubifs_dump_node(const struct ubifs_info *c, const void *node);
+void ubifs_dump_budget_req(const struct ubifs_budget_req *req);
+void ubifs_dump_lstats(const struct ubifs_lp_stats *lst);
+void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi);
+void ubifs_dump_lprop(const struct ubifs_info *c,
+ const struct ubifs_lprops *lp);
+void ubifs_dump_lprops(struct ubifs_info *c);
+void ubifs_dump_lpt_info(struct ubifs_info *c);
+void ubifs_dump_leb(const struct ubifs_info *c, int lnum);
+void ubifs_dump_sleb(const struct ubifs_info *c,
+ const struct ubifs_scan_leb *sleb, int offs);
+void ubifs_dump_znode(const struct ubifs_info *c,
+ const struct ubifs_znode *znode);
+void ubifs_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap,
+ int cat);
+void ubifs_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
+ struct ubifs_nnode *parent, int iip);
+void ubifs_dump_tnc(struct ubifs_info *c);
+void ubifs_dump_index(struct ubifs_info *c);
+void ubifs_dump_lpt_lebs(const struct ubifs_info *c);
int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
dbg_znode_callback znode_cb, void *priv);
/* Checking functions */
-
+void dbg_save_space_info(struct ubifs_info *c);
+int dbg_check_space_info(struct ubifs_info *c);
int dbg_check_lprops(struct ubifs_info *c);
-
int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot);
int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot);
-
int dbg_check_cats(struct ubifs_info *c);
-
int dbg_check_ltab(struct ubifs_info *c);
-
-int dbg_check_synced_i_size(struct inode *inode);
-
-int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir);
-
+int dbg_chk_lpt_free_spc(struct ubifs_info *c);
+int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len);
+int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode);
+int dbg_check_dir(struct ubifs_info *c, const struct inode *dir);
int dbg_check_tnc(struct ubifs_info *c, int extra);
-
int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
-
int dbg_check_filesystem(struct ubifs_info *c);
-
void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
int add_pos);
-
-int dbg_check_lprops(struct ubifs_info *c);
int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
int row, int col);
-
-/* Force the use of in-the-gaps method for testing */
-
-#define dbg_force_in_the_gaps_enabled \
- (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS)
-
-int dbg_force_in_the_gaps(void);
-
-/* Failure mode for recovery testing */
-
-#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
-
-void dbg_failure_mode_registration(struct ubifs_info *c);
-void dbg_failure_mode_deregistration(struct ubifs_info *c);
-
-#ifndef UBIFS_DBG_PRESERVE_UBI
-
-#define ubi_leb_read dbg_leb_read
-#define ubi_leb_write dbg_leb_write
-#define ubi_leb_change dbg_leb_change
-#define ubi_leb_erase dbg_leb_erase
-#define ubi_leb_unmap dbg_leb_unmap
-#define ubi_is_mapped dbg_is_mapped
-#define ubi_leb_map dbg_leb_map
-
-#endif
-
-int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
- int len, int check);
-int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
- int offset, int len, int dtype);
-int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
- int len, int dtype);
-int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum);
-int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum);
-int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum);
-int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype);
-
-static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf,
- int offset, int len)
-{
- return dbg_leb_read(desc, lnum, buf, offset, len, 0);
-}
-
-static inline int dbg_write(struct ubi_volume_desc *desc, int lnum,
- const void *buf, int offset, int len)
-{
- return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN);
-}
-
-static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
- const void *buf, int len)
-{
- return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN);
-}
-
-#else /* !CONFIG_UBIFS_FS_DEBUG */
-
-#define UBIFS_DBG(op)
-#define ubifs_assert(expr) ({})
-#define ubifs_assert_cmt_locked(c)
-#define dbg_dump_stack()
-#define dbg_err(fmt, ...) ({})
-#define dbg_msg(fmt, ...) ({})
-#define dbg_key(c, key, fmt, ...) ({})
-
-#define dbg_gen(fmt, ...) ({})
-#define dbg_jnl(fmt, ...) ({})
-#define dbg_tnc(fmt, ...) ({})
-#define dbg_lp(fmt, ...) ({})
-#define dbg_find(fmt, ...) ({})
-#define dbg_mnt(fmt, ...) ({})
-#define dbg_io(fmt, ...) ({})
-#define dbg_cmt(fmt, ...) ({})
-#define dbg_budg(fmt, ...) ({})
-#define dbg_log(fmt, ...) ({})
-#define dbg_gc(fmt, ...) ({})
-#define dbg_scan(fmt, ...) ({})
-#define dbg_rcvry(fmt, ...) ({})
-
-#define dbg_ntype(type) ""
-#define dbg_cstate(cmt_state) ""
-#define dbg_get_key_dump(c, key) ({})
-#define dbg_dump_inode(c, inode) ({})
-#define dbg_dump_node(c, node) ({})
-#define dbg_dump_budget_req(req) ({})
-#define dbg_dump_lstats(lst) ({})
-#define dbg_dump_budg(c) ({})
-#define dbg_dump_lprop(c, lp) ({})
-#define dbg_dump_lprops(c) ({})
-#define dbg_dump_leb(c, lnum) ({})
-#define dbg_dump_znode(c, znode) ({})
-#define dbg_dump_heap(c, heap, cat) ({})
-#define dbg_dump_pnode(c, pnode, parent, iip) ({})
-#define dbg_dump_tnc(c) ({})
-#define dbg_dump_index(c) ({})
-
-#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0
-
-#define dbg_old_index_check_init(c, zroot) 0
-#define dbg_check_old_index(c, zroot) 0
-
-#define dbg_check_cats(c) 0
-
-#define dbg_check_ltab(c) 0
-
-#define dbg_check_synced_i_size(inode) 0
-
-#define dbg_check_dir_size(c, dir) 0
-
-#define dbg_check_tnc(c, x) 0
-
-#define dbg_check_idx_size(c, idx_size) 0
-
-#define dbg_check_filesystem(c) 0
-
-#define dbg_check_heap(c, heap, cat, add_pos) ({})
-
-#define dbg_check_lprops(c) 0
-#define dbg_check_lpt_nodes(c, cnode, row, col) 0
-
-#define dbg_force_in_the_gaps_enabled 0
-#define dbg_force_in_the_gaps() 0
-
-#define dbg_failure_mode 0
-#define dbg_failure_mode_registration(c) ({})
-#define dbg_failure_mode_deregistration(c) ({})
-
-#endif /* !CONFIG_UBIFS_FS_DEBUG */
+int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
+ loff_t size);
+int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head);
+int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head);
+
+int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
+ int len);
+int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len);
+int dbg_leb_unmap(struct ubifs_info *c, int lnum);
+int dbg_leb_map(struct ubifs_info *c, int lnum);
+
+/* Debugfs-related stuff */
+int dbg_debugfs_init(void);
+void dbg_debugfs_exit(void);
+int dbg_debugfs_init_fs(struct ubifs_info *c);
+void dbg_debugfs_exit_fs(struct ubifs_info *c);
#endif /* !__UBIFS_DEBUG_H__ */
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index e90374be7d3..ea41649e4ca 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -56,7 +56,7 @@
*
* This function returns the inherited flags.
*/
-static int inherit_flags(const struct inode *dir, int mode)
+static int inherit_flags(const struct inode *dir, umode_t mode)
{
int flags;
const struct ubifs_inode *ui = ubifs_inode(dir);
@@ -86,7 +86,7 @@ static int inherit_flags(const struct inode *dir, int mode)
* case of failure.
*/
struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
- int mode)
+ umode_t mode)
{
struct inode *inode;
struct ubifs_inode *ui;
@@ -102,16 +102,9 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
* UBIFS has to fully control "clean <-> dirty" transitions of inodes
* to make budgeting work.
*/
- inode->i_flags |= (S_NOCMTIME);
+ inode->i_flags |= S_NOCMTIME;
- inode->i_uid = current->fsuid;
- if (dir->i_mode & S_ISGID) {
- inode->i_gid = dir->i_gid;
- if (S_ISDIR(mode))
- mode |= S_ISGID;
- } else
- inode->i_gid = current->fsgid;
- inode->i_mode = mode;
+ inode_init_owner(inode, dir, mode);
inode->i_mtime = inode->i_atime = inode->i_ctime =
ubifs_current_time(inode);
inode->i_mapping->nrpages = 0;
@@ -161,11 +154,10 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
return ERR_PTR(-EINVAL);
}
ubifs_warn("running out of inode numbers (current %lu, max %d)",
- c->highest_inum, INUM_WATERMARK);
+ (unsigned long)c->highest_inum, INUM_WATERMARK);
}
inode->i_ino = ++c->highest_inum;
- inode->i_generation = ++c->vfs_gen;
/*
* The creation sequence number remains with this inode for its
* lifetime. All nodes for this inode have a greater sequence number,
@@ -178,11 +170,11 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
return inode;
}
-#ifdef CONFIG_UBIFS_FS_DEBUG
-
-static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm)
+static int dbg_check_name(const struct ubifs_info *c,
+ const struct ubifs_dent_node *dent,
+ const struct qstr *nm)
{
- if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+ if (!dbg_is_chk_gen(c))
return 0;
if (le16_to_cpu(dent->nlen) != nm->len)
return -EINVAL;
@@ -191,14 +183,8 @@ static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm)
return 0;
}
-#else
-
-#define dbg_check_name(dent, nm) 0
-
-#endif
-
static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
- struct nameidata *nd)
+ unsigned int flags)
{
int err;
union ubifs_key key;
@@ -206,8 +192,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
struct ubifs_dent_node *dent;
struct ubifs_info *c = dir->i_sb->s_fs_info;
- dbg_gen("'%.*s' in dir ino %lu",
- dentry->d_name.len, dentry->d_name.name, dir->i_ino);
+ dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino);
if (dentry->d_name.len > UBIFS_MAX_NLEN)
return ERR_PTR(-ENAMETOOLONG);
@@ -220,22 +205,14 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name);
if (err) {
- /*
- * Do not hash the direntry if parent 'i_nlink' is zero, because
- * this has side-effects - '->delete_inode()' call will not be
- * called for the parent orphan inode, because 'd_count' of its
- * direntry will stay 1 (it'll be negative direntry I guess)
- * and prevent 'iput_final()' until the dentry is destroyed due
- * to unmount or memory pressure.
- */
- if (err == -ENOENT && dir->i_nlink != 0) {
+ if (err == -ENOENT) {
dbg_gen("not found");
goto done;
}
goto out;
}
- if (dbg_check_name(dent, &dentry->d_name)) {
+ if (dbg_check_name(c, dent, &dentry->d_name)) {
err = -EINVAL;
goto out;
}
@@ -247,8 +224,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
* checking.
*/
err = PTR_ERR(inode);
- ubifs_err("dead directory entry '%.*s', error %d",
- dentry->d_name.len, dentry->d_name.name, err);
+ ubifs_err("dead directory entry '%pd', error %d",
+ dentry, err);
ubifs_ro_mode(c, err);
goto out;
}
@@ -267,8 +244,8 @@ out:
return ERR_PTR(err);
}
-static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode,
- struct nameidata *nd)
+static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+ bool excl)
{
struct inode *inode;
struct ubifs_info *c = dir->i_sb->s_fs_info;
@@ -282,8 +259,8 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode,
* parent directory inode.
*/
- dbg_gen("dent '%.*s', mode %#x in dir ino %lu",
- dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
+ dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
+ dentry, mode, dir->i_ino);
err = ubifs_budget_space(c, &req);
if (err)
@@ -368,38 +345,46 @@ static unsigned int vfs_dent_type(uint8_t type)
* This means that UBIFS cannot support NFS which requires full
* 'seekdir()'/'telldir()' support.
*/
-static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
+static int ubifs_readdir(struct file *file, struct dir_context *ctx)
{
- int err, over = 0;
+ int err;
struct qstr nm;
union ubifs_key key;
struct ubifs_dent_node *dent;
- struct inode *dir = file->f_path.dentry->d_inode;
+ struct inode *dir = file_inode(file);
struct ubifs_info *c = dir->i_sb->s_fs_info;
- dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos);
+ dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, ctx->pos);
- if (file->f_pos > UBIFS_S_KEY_HASH_MASK || file->f_pos == 2)
+ if (ctx->pos > UBIFS_S_KEY_HASH_MASK || ctx->pos == 2)
/*
* The directory was seek'ed to a senseless position or there
* are no more entries.
*/
return 0;
- /* File positions 0 and 1 correspond to "." and ".." */
- if (file->f_pos == 0) {
- ubifs_assert(!file->private_data);
- over = filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR);
- if (over)
- return 0;
- file->f_pos = 1;
+ if (file->f_version == 0) {
+ /*
+ * The file was seek'ed, which means that @file->private_data
+ * is now invalid. This may also be just the first
+ * 'ubifs_readdir()' invocation, in which case
+ * @file->private_data is NULL, and the below code is
+ * basically a no-op.
+ */
+ kfree(file->private_data);
+ file->private_data = NULL;
}
- if (file->f_pos == 1) {
+ /*
+ * 'generic_file_llseek()' unconditionally sets @file->f_version to
+ * zero, and we use this for detecting whether the file was seek'ed.
+ */
+ file->f_version = 1;
+
+ /* File positions 0 and 1 correspond to "." and ".." */
+ if (ctx->pos < 2) {
ubifs_assert(!file->private_data);
- over = filldir(dirent, "..", 2, 1,
- parent_ino(file->f_path.dentry), DT_DIR);
- if (over)
+ if (!dir_emit_dots(file, ctx))
return 0;
/* Find the first entry in TNC and save it */
@@ -411,7 +396,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
goto out;
}
- file->f_pos = key_hash_flash(c, &dent->key);
+ ctx->pos = key_hash_flash(c, &dent->key);
file->private_data = dent;
}
@@ -419,31 +404,30 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
if (!dent) {
/*
* The directory was seek'ed to and is now readdir'ed.
- * Find the entry corresponding to @file->f_pos or the
- * closest one.
+ * Find the entry corresponding to @ctx->pos or the closest one.
*/
- dent_key_init_hash(c, &key, dir->i_ino, file->f_pos);
+ dent_key_init_hash(c, &key, dir->i_ino, ctx->pos);
nm.name = NULL;
dent = ubifs_tnc_next_ent(c, &key, &nm);
if (IS_ERR(dent)) {
err = PTR_ERR(dent);
goto out;
}
- file->f_pos = key_hash_flash(c, &dent->key);
+ ctx->pos = key_hash_flash(c, &dent->key);
file->private_data = dent;
}
while (1) {
dbg_gen("feed '%s', ino %llu, new f_pos %#x",
- dent->name, le64_to_cpu(dent->inum),
+ dent->name, (unsigned long long)le64_to_cpu(dent->inum),
key_hash_flash(c, &dent->key));
- ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum);
+ ubifs_assert(le64_to_cpu(dent->ch.sqnum) >
+ ubifs_inode(dir)->creat_sqnum);
nm.len = le16_to_cpu(dent->nlen);
- over = filldir(dirent, dent->name, nm.len, file->f_pos,
+ if (!dir_emit(ctx, dent->name, nm.len,
le64_to_cpu(dent->inum),
- vfs_dent_type(dent->type));
- if (over)
+ vfs_dent_type(dent->type)))
return 0;
/* Switch to the next entry */
@@ -456,7 +440,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
}
kfree(file->private_data);
- file->f_pos = key_hash_flash(c, &dent->key);
+ ctx->pos = key_hash_flash(c, &dent->key);
file->private_data = dent;
cond_resched();
}
@@ -469,18 +453,11 @@ out:
kfree(file->private_data);
file->private_data = NULL;
- file->f_pos = 2;
+ /* 2 is a special value indicating that there are no more direntries */
+ ctx->pos = 2;
return 0;
}
-/* If a directory is seeked, we have to free saved readdir() state */
-static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int origin)
-{
- kfree(file->private_data);
- file->private_data = NULL;
- return generic_file_llseek(file, offset, origin);
-}
-
/* Free saved readdir() state when the directory is closed */
static int ubifs_dir_release(struct inode *dir, struct file *file)
{
@@ -490,30 +467,29 @@ static int ubifs_dir_release(struct inode *dir, struct file *file)
}
/**
- * lock_2_inodes - lock two UBIFS inodes.
+ * lock_2_inodes - a wrapper for locking two UBIFS inodes.
* @inode1: first inode
* @inode2: second inode
+ *
+ * We do not implement any tricks to guarantee strict lock ordering, because
+ * VFS has already done it for us on the @i_mutex. So this is just a simple
+ * wrapper function.
*/
static void lock_2_inodes(struct inode *inode1, struct inode *inode2)
{
- if (inode1->i_ino < inode2->i_ino) {
- mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_2);
- mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_3);
- } else {
- mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2);
- mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_3);
- }
+ mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1);
+ mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2);
}
/**
- * unlock_2_inodes - unlock two UBIFS inodes inodes.
+ * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes.
* @inode1: first inode
* @inode2: second inode
*/
static void unlock_2_inodes(struct inode *inode1, struct inode *inode2)
{
- mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
+ mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
}
static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
@@ -525,17 +501,20 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
struct ubifs_inode *dir_ui = ubifs_inode(dir);
int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2,
- .dirtied_ino_d = ui->data_len };
+ .dirtied_ino_d = ALIGN(ui->data_len, 8) };
/*
* Budget request settings: new direntry, changing the target inode,
* changing the parent inode.
*/
- dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu",
- dentry->d_name.len, dentry->d_name.name, inode->i_ino,
+ dbg_gen("dent '%pd' to ino %lu (nlink %d) in dir ino %lu",
+ dentry, inode->i_ino,
inode->i_nlink, dir->i_ino);
- err = dbg_check_synced_i_size(inode);
+ ubifs_assert(mutex_is_locked(&dir->i_mutex));
+ ubifs_assert(mutex_is_locked(&inode->i_mutex));
+
+ err = dbg_check_synced_i_size(c, inode);
if (err)
return err;
@@ -545,7 +524,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
lock_2_inodes(dir, inode);
inc_nlink(inode);
- atomic_inc(&inode->i_count);
+ ihold(inode);
inode->i_ctime = ubifs_current_time(inode);
dir->i_size += sz_change;
dir_ui->ui_size = dir->i_size;
@@ -577,6 +556,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
int err, budgeted = 1;
struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 };
+ unsigned int saved_nlink = inode->i_nlink;
/*
* Budget request settings: deletion direntry, deletion inode (+1 for
@@ -585,10 +565,12 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
* deletions.
*/
- dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu",
- dentry->d_name.len, dentry->d_name.name, inode->i_ino,
+ dbg_gen("dent '%pd' from ino %lu (nlink %d) in dir ino %lu",
+ dentry, inode->i_ino,
inode->i_nlink, dir->i_ino);
- err = dbg_check_synced_i_size(inode);
+ ubifs_assert(mutex_is_locked(&dir->i_mutex));
+ ubifs_assert(mutex_is_locked(&inode->i_mutex));
+ err = dbg_check_synced_i_size(c, inode);
if (err)
return err;
@@ -596,7 +578,6 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
if (err) {
if (err != -ENOSPC)
return err;
- err = 0;
budgeted = 0;
}
@@ -615,7 +596,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
ubifs_release_budget(c, &req);
else {
/* We've deleted something - clean the "no space" flags */
- c->nospace = c->nospace_rp = 0;
+ c->bi.nospace = c->bi.nospace_rp = 0;
smp_wmb();
}
return 0;
@@ -623,7 +604,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
out_cancel:
dir->i_size += sz_change;
dir_ui->ui_size = dir->i_size;
- inc_nlink(inode);
+ set_nlink(inode, saved_nlink);
unlock_2_inodes(dir, inode);
if (budgeted)
ubifs_release_budget(c, &req);
@@ -674,9 +655,10 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
* because we have extra space reserved for deletions.
*/
- dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len,
- dentry->d_name.name, inode->i_ino, dir->i_ino);
-
+ dbg_gen("directory '%pd', ino %lu in dir ino %lu", dentry,
+ inode->i_ino, dir->i_ino);
+ ubifs_assert(mutex_is_locked(&dir->i_mutex));
+ ubifs_assert(mutex_is_locked(&inode->i_mutex));
err = check_dir_empty(c, dentry->d_inode);
if (err)
return err;
@@ -704,7 +686,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
ubifs_release_budget(c, &req);
else {
/* We've deleted something - clean the "no space" flags */
- c->nospace = c->nospace_rp = 0;
+ c->bi.nospace = c->bi.nospace_rp = 0;
smp_wmb();
}
return 0;
@@ -713,30 +695,28 @@ out_cancel:
dir->i_size += sz_change;
dir_ui->ui_size = dir->i_size;
inc_nlink(dir);
- inc_nlink(inode);
- inc_nlink(inode);
+ set_nlink(inode, 2);
unlock_2_inodes(dir, inode);
if (budgeted)
ubifs_release_budget(c, &req);
return err;
}
-static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
struct inode *inode;
struct ubifs_inode *dir_ui = ubifs_inode(dir);
struct ubifs_info *c = dir->i_sb->s_fs_info;
int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
- struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
- .dirtied_ino_d = 1 };
+ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 };
/*
* Budget request settings: new inode, new direntry and changing parent
* directory inode.
*/
- dbg_gen("dent '%.*s', mode %#x in dir ino %lu",
- dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
+ dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
+ dentry, mode, dir->i_ino);
err = ubifs_budget_space(c, &req);
if (err)
@@ -779,7 +759,7 @@ out_budg:
}
static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
- int mode, dev_t rdev)
+ umode_t mode, dev_t rdev)
{
struct inode *inode;
struct ubifs_inode *ui;
@@ -789,15 +769,15 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
int err, devlen = 0;
struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
- .new_ino_d = devlen, .dirtied_ino = 1 };
+ .new_ino_d = ALIGN(devlen, 8),
+ .dirtied_ino = 1 };
/*
* Budget request settings: new inode, new direntry and changing parent
* directory inode.
*/
- dbg_gen("dent '%.*s' in dir ino %lu",
- dentry->d_name.len, dentry->d_name.name, dir->i_ino);
+ dbg_gen("dent '%pd' in dir ino %lu", dentry, dir->i_ino);
if (!new_valid_dev(rdev))
return -EINVAL;
@@ -863,15 +843,16 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
int err, len = strlen(symname);
int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
- .new_ino_d = len, .dirtied_ino = 1 };
+ .new_ino_d = ALIGN(len, 8),
+ .dirtied_ino = 1 };
/*
* Budget request settings: new inode, new direntry and changing parent
* directory inode.
*/
- dbg_gen("dent '%.*s', target '%s' in dir ino %lu", dentry->d_name.len,
- dentry->d_name.name, symname, dir->i_ino);
+ dbg_gen("dent '%pd', target '%s' in dir ino %lu", dentry,
+ symname, dir->i_ino);
if (len > UBIFS_MAX_INO_DATA)
return -ENAMETOOLONG;
@@ -930,59 +911,30 @@ out_budg:
}
/**
- * lock_3_inodes - lock three UBIFS inodes for rename.
+ * lock_3_inodes - a wrapper for locking three UBIFS inodes.
* @inode1: first inode
* @inode2: second inode
* @inode3: third inode
*
- * For 'ubifs_rename()', @inode1 may be the same as @inode2 whereas @inode3 may
- * be null.
+ * This function is used for 'ubifs_rename()' and @inode1 may be the same as
+ * @inode2 whereas @inode3 may be %NULL.
+ *
+ * We do not implement any tricks to guarantee strict lock ordering, because
+ * VFS has already done it for us on the @i_mutex. So this is just a simple
+ * wrapper function.
*/
static void lock_3_inodes(struct inode *inode1, struct inode *inode2,
struct inode *inode3)
{
- struct inode *i1, *i2, *i3;
-
- if (!inode3) {
- if (inode1 != inode2) {
- lock_2_inodes(inode1, inode2);
- return;
- }
- mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1);
- return;
- }
-
- if (inode1 == inode2) {
- lock_2_inodes(inode1, inode3);
- return;
- }
-
- /* 3 different inodes */
- if (inode1 < inode2) {
- i3 = inode2;
- if (inode1 < inode3) {
- i1 = inode1;
- i2 = inode3;
- } else {
- i1 = inode3;
- i2 = inode1;
- }
- } else {
- i3 = inode1;
- if (inode2 < inode3) {
- i1 = inode2;
- i2 = inode3;
- } else {
- i1 = inode3;
- i2 = inode2;
- }
- }
- mutex_lock_nested(&ubifs_inode(i1)->ui_mutex, WB_MUTEX_1);
- lock_2_inodes(i2, i3);
+ mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1);
+ if (inode2 != inode1)
+ mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2);
+ if (inode3)
+ mutex_lock_nested(&ubifs_inode(inode3)->ui_mutex, WB_MUTEX_3);
}
/**
- * unlock_3_inodes - unlock three UBIFS inodes for rename.
+ * unlock_3_inodes - a wrapper for unlocking three UBIFS inodes for rename.
* @inode1: first inode
* @inode2: second inode
* @inode3: third inode
@@ -990,11 +942,11 @@ static void lock_3_inodes(struct inode *inode1, struct inode *inode2,
static void unlock_3_inodes(struct inode *inode1, struct inode *inode2,
struct inode *inode3)
{
- mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
- if (inode1 != inode2)
- mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
if (inode3)
mutex_unlock(&ubifs_inode(inode3)->ui_mutex);
+ if (inode1 != inode2)
+ mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
+ mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
}
static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
@@ -1012,8 +964,9 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1,
.dirtied_ino = 3 };
struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
- .dirtied_ino_d = old_inode_ui->data_len };
+ .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
struct timespec time;
+ unsigned int uninitialized_var(saved_nlink);
/*
* Budget request settings: deletion direntry, new direntry, removing
@@ -1024,10 +977,14 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
* separately.
*/
- dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in "
- "dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name,
- old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len,
- new_dentry->d_name.name, new_dir->i_ino);
+ dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu",
+ old_dentry, old_inode->i_ino, old_dir->i_ino,
+ new_dentry, new_dir->i_ino);
+ ubifs_assert(mutex_is_locked(&old_dir->i_mutex));
+ ubifs_assert(mutex_is_locked(&new_dir->i_mutex));
+ if (unlink)
+ ubifs_assert(mutex_is_locked(&new_inode->i_mutex));
+
if (unlink && is_dir) {
err = check_dir_empty(c, new_inode);
@@ -1091,13 +1048,14 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (unlink) {
/*
* Directories cannot have hard-links, so if this is a
- * directory, decrement its @i_nlink twice because an empty
- * directory has @i_nlink 2.
+ * directory, just clear @i_nlink.
*/
+ saved_nlink = new_inode->i_nlink;
if (is_dir)
+ clear_nlink(new_inode);
+ else
drop_nlink(new_inode);
new_inode->i_ctime = time;
- drop_nlink(new_inode);
} else {
new_dir->i_size += new_sz;
ubifs_inode(new_dir)->ui_size = new_dir->i_size;
@@ -1129,14 +1087,12 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (release)
ubifs_release_budget(c, &ino_req);
if (IS_SYNC(old_inode))
- err = old_inode->i_sb->s_op->write_inode(old_inode, 1);
+ err = old_inode->i_sb->s_op->write_inode(old_inode, NULL);
return err;
out_cancel:
if (unlink) {
- if (is_dir)
- inc_nlink(new_inode);
- inc_nlink(new_inode);
+ set_nlink(new_inode, saved_nlink);
} else {
new_dir->i_size -= new_sz;
ubifs_inode(new_dir)->ui_size = new_dir->i_size;
@@ -1167,16 +1123,7 @@ int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct ubifs_inode *ui = ubifs_inode(inode);
mutex_lock(&ui->ui_mutex);
- stat->dev = inode->i_sb->s_dev;
- stat->ino = inode->i_ino;
- stat->mode = inode->i_mode;
- stat->nlink = inode->i_nlink;
- stat->uid = inode->i_uid;
- stat->gid = inode->i_gid;
- stat->rdev = inode->i_rdev;
- stat->atime = inode->i_atime;
- stat->mtime = inode->i_mtime;
- stat->ctime = inode->i_ctime;
+ generic_fillattr(inode, stat);
stat->blksize = UBIFS_BLOCK_SIZE;
stat->size = ui->ui_size;
@@ -1207,7 +1154,7 @@ int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
return 0;
}
-struct inode_operations ubifs_dir_inode_operations = {
+const struct inode_operations ubifs_dir_inode_operations = {
.lookup = ubifs_lookup,
.create = ubifs_create,
.link = ubifs_link,
@@ -1219,19 +1166,17 @@ struct inode_operations ubifs_dir_inode_operations = {
.rename = ubifs_rename,
.setattr = ubifs_setattr,
.getattr = ubifs_getattr,
-#ifdef CONFIG_UBIFS_FS_XATTR
.setxattr = ubifs_setxattr,
.getxattr = ubifs_getxattr,
.listxattr = ubifs_listxattr,
.removexattr = ubifs_removexattr,
-#endif
};
-struct file_operations ubifs_dir_operations = {
- .llseek = ubifs_dir_llseek,
+const struct file_operations ubifs_dir_operations = {
+ .llseek = generic_file_llseek,
.release = ubifs_dir_release,
.read = generic_read_dir,
- .readdir = ubifs_readdir,
+ .iterate = ubifs_readdir,
.fsync = ubifs_fsync,
.unlocked_ioctl = ubifs_ioctl,
#ifdef CONFIG_COMPAT
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 8565e586e53..b5b593c4527 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -21,39 +21,39 @@
*/
/*
- * This file implements VFS file and inode operations of regular files, device
+ * This file implements VFS file and inode operations for regular files, device
* nodes and symlinks as well as address space operations.
*
- * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the
- * page is dirty and is used for budgeting purposes - dirty pages should not be
- * budgeted. The PG_checked flag is set if full budgeting is required for the
- * page e.g., when it corresponds to a file hole or it is just beyond the file
- * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to
- * fail in this function, and the budget is released in 'ubifs_write_end()'. So
- * the PG_private and PG_checked flags carry the information about how the page
- * was budgeted, to make it possible to release the budget properly.
+ * UBIFS uses 2 page flags: @PG_private and @PG_checked. @PG_private is set if
+ * the page is dirty and is used for optimization purposes - dirty pages are
+ * not budgeted so the flag shows that 'ubifs_write_end()' should not release
+ * the budget for this page. The @PG_checked flag is set if full budgeting is
+ * required for the page e.g., when it corresponds to a file hole or it is
+ * beyond the file size. The budgeting is done in 'ubifs_write_begin()', because
+ * it is OK to fail in this function, and the budget is released in
+ * 'ubifs_write_end()'. So the @PG_private and @PG_checked flags carry
+ * information about how the page was budgeted, to make it possible to release
+ * the budget properly.
*
- * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations
- * we implement. However, this is not true for '->writepage()', which might be
- * called with 'i_mutex' unlocked. For example, when pdflush is performing
- * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the
- * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is
- * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim
- * path'. So, in '->writepage()' we are only guaranteed that the page is
- * locked.
+ * A thing to keep in mind: inode @i_mutex is locked in most VFS operations we
+ * implement. However, this is not true for 'ubifs_writepage()', which may be
+ * called with @i_mutex unlocked. For example, when flusher thread is doing
+ * background write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex.
+ * At "normal" work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g.
+ * in the "sys_write -> alloc_pages -> direct reclaim path". So, in
+ * 'ubifs_writepage()' we are only guaranteed that the page is locked.
*
- * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g.,
- * readahead path does not have it locked ("sys_read -> generic_file_aio_read
- * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is
- * not set as well. However, UBIFS disables readahead.
- *
- * This, for example means that there might be 2 concurrent '->writepage()'
- * calls for the same inode, but different inode dirty pages.
+ * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
+ * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
+ * ondemand_readahead -> readpage"). In case of readahead, @I_SYNC flag is not
+ * set as well. However, UBIFS disables readahead.
*/
#include "ubifs.h"
+#include <linux/aio.h>
#include <linux/mount.h>
#include <linux/namei.h>
+#include <linux/slab.h>
static int read_block(struct inode *inode, void *addr, unsigned int block,
struct ubifs_data_node *dn)
@@ -72,8 +72,8 @@ static int read_block(struct inode *inode, void *addr, unsigned int block,
return err;
}
- ubifs_assert(dn->ch.sqnum > ubifs_inode(inode)->creat_sqnum);
-
+ ubifs_assert(le64_to_cpu(dn->ch.sqnum) >
+ ubifs_inode(inode)->creat_sqnum);
len = le32_to_cpu(dn->size);
if (len <= 0 || len > UBIFS_BLOCK_SIZE)
goto dump;
@@ -98,7 +98,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block,
dump:
ubifs_err("bad data node (block %u, inode %lu)",
block, inode->i_ino);
- dbg_dump_node(c, dn);
+ ubifs_dump_node(c, dn);
return -EINVAL;
}
@@ -147,6 +147,12 @@ static int do_readpage(struct page *page)
err = ret;
if (err != -ENOENT)
break;
+ } else if (block + 1 == beyond) {
+ int dlen = le32_to_cpu(dn->size);
+ int ilen = i_size & (UBIFS_BLOCK_SIZE - 1);
+
+ if (ilen && ilen < dlen)
+ memset(addr + ilen, 0, dlen - ilen);
}
}
if (++i >= UBIFS_BLOCKS_PER_PAGE)
@@ -207,13 +213,14 @@ static void release_new_page_budget(struct ubifs_info *c)
*/
static void release_existing_page_budget(struct ubifs_info *c)
{
- struct ubifs_budget_req req = { .dd_growth = c->page_budget};
+ struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget};
ubifs_release_budget(c, &req);
}
static int write_begin_slow(struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep)
+ loff_t pos, unsigned len, struct page **pagep,
+ unsigned flags)
{
struct inode *inode = mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -241,14 +248,14 @@ static int write_begin_slow(struct address_space *mapping,
if (unlikely(err))
return err;
- page = __grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, flags);
if (unlikely(!page)) {
ubifs_release_budget(c, &req);
return -ENOMEM;
}
if (!PageUptodate(page)) {
- if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
+ if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
SetPageChecked(page);
else {
err = do_readpage(page);
@@ -423,31 +430,35 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
struct ubifs_inode *ui = ubifs_inode(inode);
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
+ int skipped_read = 0;
struct page *page;
-
ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size);
+ ubifs_assert(!c->ro_media && !c->ro_mount);
- if (unlikely(c->ro_media))
+ if (unlikely(c->ro_error))
return -EROFS;
/* Try out the fast-path part first */
- page = __grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, flags);
if (unlikely(!page))
return -ENOMEM;
if (!PageUptodate(page)) {
/* The page is not loaded from the flash */
- if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
+ if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) {
/*
* We change whole page so no need to load it. But we
- * have to set the @PG_checked flag to make the further
- * code the page is new. This might be not true, but it
- * is better to budget more that to read the page from
- * the media.
+ * do not know whether this page exists on the media or
+ * not, so we assume the latter because it requires
+ * larger budget. The assumption is that it is better
+ * to budget a bit more than to read the page from the
+ * media. Thus, we are setting the @PG_checked flag
+ * here.
*/
SetPageChecked(page);
- else {
+ skipped_read = 1;
+ } else {
err = do_readpage(page);
if (err) {
unlock_page(page);
@@ -464,6 +475,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
if (unlikely(err)) {
ubifs_assert(err == -ENOSPC);
/*
+ * If we skipped reading the page because we were going to
+ * write all of it, then it is not up to date.
+ */
+ if (skipped_read) {
+ ClearPageChecked(page);
+ ClearPageUptodate(page);
+ }
+ /*
* Budgeting failed which means it would have to force
* write-back but didn't, because we set the @fast flag in the
* request. Write-back cannot be done now, while we have the
@@ -477,12 +496,12 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
unlock_page(page);
page_cache_release(page);
- return write_begin_slow(mapping, pos, len, pagep);
+ return write_begin_slow(mapping, pos, len, pagep, flags);
}
/*
- * Whee, we aquired budgeting quickly - without involving
- * garbage-collection, committing or forceing write-back. We return
+ * Whee, we acquired budgeting quickly - without involving
+ * garbage-collection, committing or forcing write-back. We return
* with @ui->ui_mutex locked if we are appending pages, and unlocked
* otherwise. This is an optimization (slightly hacky though).
*/
@@ -543,10 +562,11 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
dbg_gen("copied %d instead of %d, read page and repeat",
copied, len);
cancel_budget(c, page, ui, appending);
+ ClearPageChecked(page);
/*
* Return 0 to force VFS to repeat the whole operation, or the
- * error code if 'do_readpage()' failes.
+ * error code if 'do_readpage()' fails.
*/
copied = do_readpage(page);
goto out;
@@ -577,8 +597,297 @@ out:
return copied;
}
+/**
+ * populate_page - copy data nodes into a page for bulk-read.
+ * @c: UBIFS file-system description object
+ * @page: page
+ * @bu: bulk-read information
+ * @n: next zbranch slot
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+static int populate_page(struct ubifs_info *c, struct page *page,
+ struct bu_info *bu, int *n)
+{
+ int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 0, read = 0;
+ struct inode *inode = page->mapping->host;
+ loff_t i_size = i_size_read(inode);
+ unsigned int page_block;
+ void *addr, *zaddr;
+ pgoff_t end_index;
+
+ dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
+ inode->i_ino, page->index, i_size, page->flags);
+
+ addr = zaddr = kmap(page);
+
+ end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
+ if (!i_size || page->index > end_index) {
+ hole = 1;
+ memset(addr, 0, PAGE_CACHE_SIZE);
+ goto out_hole;
+ }
+
+ page_block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
+ while (1) {
+ int err, len, out_len, dlen;
+
+ if (nn >= bu->cnt) {
+ hole = 1;
+ memset(addr, 0, UBIFS_BLOCK_SIZE);
+ } else if (key_block(c, &bu->zbranch[nn].key) == page_block) {
+ struct ubifs_data_node *dn;
+
+ dn = bu->buf + (bu->zbranch[nn].offs - offs);
+
+ ubifs_assert(le64_to_cpu(dn->ch.sqnum) >
+ ubifs_inode(inode)->creat_sqnum);
+
+ len = le32_to_cpu(dn->size);
+ if (len <= 0 || len > UBIFS_BLOCK_SIZE)
+ goto out_err;
+
+ dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
+ out_len = UBIFS_BLOCK_SIZE;
+ err = ubifs_decompress(&dn->data, dlen, addr, &out_len,
+ le16_to_cpu(dn->compr_type));
+ if (err || len != out_len)
+ goto out_err;
+
+ if (len < UBIFS_BLOCK_SIZE)
+ memset(addr + len, 0, UBIFS_BLOCK_SIZE - len);
+
+ nn += 1;
+ read = (i << UBIFS_BLOCK_SHIFT) + len;
+ } else if (key_block(c, &bu->zbranch[nn].key) < page_block) {
+ nn += 1;
+ continue;
+ } else {
+ hole = 1;
+ memset(addr, 0, UBIFS_BLOCK_SIZE);
+ }
+ if (++i >= UBIFS_BLOCKS_PER_PAGE)
+ break;
+ addr += UBIFS_BLOCK_SIZE;
+ page_block += 1;
+ }
+
+ if (end_index == page->index) {
+ int len = i_size & (PAGE_CACHE_SIZE - 1);
+
+ if (len && len < read)
+ memset(zaddr + len, 0, read - len);
+ }
+
+out_hole:
+ if (hole) {
+ SetPageChecked(page);
+ dbg_gen("hole");
+ }
+
+ SetPageUptodate(page);
+ ClearPageError(page);
+ flush_dcache_page(page);
+ kunmap(page);
+ *n = nn;
+ return 0;
+
+out_err:
+ ClearPageUptodate(page);
+ SetPageError(page);
+ flush_dcache_page(page);
+ kunmap(page);
+ ubifs_err("bad data node (block %u, inode %lu)",
+ page_block, inode->i_ino);
+ return -EINVAL;
+}
+
+/**
+ * ubifs_do_bulk_read - do bulk-read.
+ * @c: UBIFS file-system description object
+ * @bu: bulk-read information
+ * @page1: first page to read
+ *
+ * This function returns %1 if the bulk-read is done, otherwise %0 is returned.
+ */
+static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
+ struct page *page1)
+{
+ pgoff_t offset = page1->index, end_index;
+ struct address_space *mapping = page1->mapping;
+ struct inode *inode = mapping->host;
+ struct ubifs_inode *ui = ubifs_inode(inode);
+ int err, page_idx, page_cnt, ret = 0, n = 0;
+ int allocate = bu->buf ? 0 : 1;
+ loff_t isize;
+
+ err = ubifs_tnc_get_bu_keys(c, bu);
+ if (err)
+ goto out_warn;
+
+ if (bu->eof) {
+ /* Turn off bulk-read at the end of the file */
+ ui->read_in_a_row = 1;
+ ui->bulk_read = 0;
+ }
+
+ page_cnt = bu->blk_cnt >> UBIFS_BLOCKS_PER_PAGE_SHIFT;
+ if (!page_cnt) {
+ /*
+ * This happens when there are multiple blocks per page and the
+ * blocks for the first page we are looking for, are not
+ * together. If all the pages were like this, bulk-read would
+ * reduce performance, so we turn it off for a while.
+ */
+ goto out_bu_off;
+ }
+
+ if (bu->cnt) {
+ if (allocate) {
+ /*
+ * Allocate bulk-read buffer depending on how many data
+ * nodes we are going to read.
+ */
+ bu->buf_len = bu->zbranch[bu->cnt - 1].offs +
+ bu->zbranch[bu->cnt - 1].len -
+ bu->zbranch[0].offs;
+ ubifs_assert(bu->buf_len > 0);
+ ubifs_assert(bu->buf_len <= c->leb_size);
+ bu->buf = kmalloc(bu->buf_len, GFP_NOFS | __GFP_NOWARN);
+ if (!bu->buf)
+ goto out_bu_off;
+ }
+
+ err = ubifs_tnc_bulk_read(c, bu);
+ if (err)
+ goto out_warn;
+ }
+
+ err = populate_page(c, page1, bu, &n);
+ if (err)
+ goto out_warn;
+
+ unlock_page(page1);
+ ret = 1;
+
+ isize = i_size_read(inode);
+ if (isize == 0)
+ goto out_free;
+ end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
+
+ for (page_idx = 1; page_idx < page_cnt; page_idx++) {
+ pgoff_t page_offset = offset + page_idx;
+ struct page *page;
+
+ if (page_offset > end_index)
+ break;
+ page = find_or_create_page(mapping, page_offset,
+ GFP_NOFS | __GFP_COLD);
+ if (!page)
+ break;
+ if (!PageUptodate(page))
+ err = populate_page(c, page, bu, &n);
+ unlock_page(page);
+ page_cache_release(page);
+ if (err)
+ break;
+ }
+
+ ui->last_page_read = offset + page_idx - 1;
+
+out_free:
+ if (allocate)
+ kfree(bu->buf);
+ return ret;
+
+out_warn:
+ ubifs_warn("ignoring error %d and skipping bulk-read", err);
+ goto out_free;
+
+out_bu_off:
+ ui->read_in_a_row = ui->bulk_read = 0;
+ goto out_free;
+}
+
+/**
+ * ubifs_bulk_read - determine whether to bulk-read and, if so, do it.
+ * @page: page from which to start bulk-read.
+ *
+ * Some flash media are capable of reading sequentially at faster rates. UBIFS
+ * bulk-read facility is designed to take advantage of that, by reading in one
+ * go consecutive data nodes that are also located consecutively in the same
+ * LEB. This function returns %1 if a bulk-read is done and %0 otherwise.
+ */
+static int ubifs_bulk_read(struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct ubifs_info *c = inode->i_sb->s_fs_info;
+ struct ubifs_inode *ui = ubifs_inode(inode);
+ pgoff_t index = page->index, last_page_read = ui->last_page_read;
+ struct bu_info *bu;
+ int err = 0, allocated = 0;
+
+ ui->last_page_read = index;
+ if (!c->bulk_read)
+ return 0;
+
+ /*
+ * Bulk-read is protected by @ui->ui_mutex, but it is an optimization,
+ * so don't bother if we cannot lock the mutex.
+ */
+ if (!mutex_trylock(&ui->ui_mutex))
+ return 0;
+
+ if (index != last_page_read + 1) {
+ /* Turn off bulk-read if we stop reading sequentially */
+ ui->read_in_a_row = 1;
+ if (ui->bulk_read)
+ ui->bulk_read = 0;
+ goto out_unlock;
+ }
+
+ if (!ui->bulk_read) {
+ ui->read_in_a_row += 1;
+ if (ui->read_in_a_row < 3)
+ goto out_unlock;
+ /* Three reads in a row, so switch on bulk-read */
+ ui->bulk_read = 1;
+ }
+
+ /*
+ * If possible, try to use pre-allocated bulk-read information, which
+ * is protected by @c->bu_mutex.
+ */
+ if (mutex_trylock(&c->bu_mutex))
+ bu = &c->bu;
+ else {
+ bu = kmalloc(sizeof(struct bu_info), GFP_NOFS | __GFP_NOWARN);
+ if (!bu)
+ goto out_unlock;
+
+ bu->buf = NULL;
+ allocated = 1;
+ }
+
+ bu->buf_len = c->max_bu_buf_len;
+ data_key_init(c, &bu->key, inode->i_ino,
+ page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT);
+ err = ubifs_do_bulk_read(c, bu, page);
+
+ if (!allocated)
+ mutex_unlock(&c->bu_mutex);
+ else
+ kfree(bu);
+
+out_unlock:
+ mutex_unlock(&ui->ui_mutex);
+ return err;
+}
+
static int ubifs_readpage(struct file *file, struct page *page)
{
+ if (ubifs_bulk_read(page))
+ return 0;
do_readpage(page);
unlock_page(page);
return 0;
@@ -594,8 +903,9 @@ static int do_writepage(struct page *page, int len)
struct ubifs_info *c = inode->i_sb->s_fs_info;
#ifdef UBIFS_DEBUG
+ struct ubifs_inode *ui = ubifs_inode(inode);
spin_lock(&ui->ui_lock);
- ubifs_assert(page->index <= ui->synced_i_size << PAGE_CACHE_SIZE);
+ ubifs_assert(page->index <= ui->synced_i_size >> PAGE_CACHE_SHIFT);
spin_unlock(&ui->ui_lock);
#endif
@@ -654,7 +964,7 @@ static int do_writepage(struct page *page, int len)
* whole index and correct all inode sizes, which is long an unacceptable.
*
* To prevent situations like this, UBIFS writes pages back only if they are
- * within last synchronized inode size, i.e. the the size which has been
+ * within the last synchronized inode size, i.e. the size which has been
* written to the flash media last time. Otherwise, UBIFS forces inode
* write-back, thus making sure the on-flash inode contains current inode size,
* and then keeps writing pages back.
@@ -663,11 +973,15 @@ static int do_writepage(struct page *page, int len)
* the page locked, and it locks @ui_mutex. However, write-back does take inode
* @i_mutex, which means other VFS operations may be run on this inode at the
* same time. And the problematic one is truncation to smaller size, from where
- * we have to call 'vmtruncate()', which first changes @inode->i_size, then
- * drops the truncated pages. And while dropping the pages, it takes the page
- * lock. This means that 'do_truncation()' cannot call 'vmtruncate()' with
- * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This
- * means that @inode->i_size is changed while @ui_mutex is unlocked.
+ * we have to call 'truncate_setsize()', which first changes @inode->i_size,
+ * then drops the truncated pages. And while dropping the pages, it takes the
+ * page lock. This means that 'do_truncation()' cannot call 'truncate_setsize()'
+ * with @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'.
+ * This means that @inode->i_size is changed while @ui_mutex is unlocked.
+ *
+ * XXX(truncate): with the new truncate sequence this is not true anymore,
+ * and the calls to truncate_setsize can be move around freely. They should
+ * be moved to the very end of the truncate sequence.
*
* But in 'ubifs_writepage()' we have to guarantee that we do not write beyond
* inode size. How do we do this if @inode->i_size may became smaller while we
@@ -708,7 +1022,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
/* Is the page fully inside @i_size? */
if (page->index < end_index) {
if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) {
- err = inode->i_sb->s_op->write_inode(inode, 1);
+ err = inode->i_sb->s_op->write_inode(inode, NULL);
if (err)
goto out_unlock;
/*
@@ -730,13 +1044,13 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
* the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
memset(kaddr + len, 0, PAGE_CACHE_SIZE - len);
flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
if (i_size > synced_i_size) {
- err = inode->i_sb->s_op->write_inode(inode, 1);
+ err = inode->i_sb->s_op->write_inode(inode, NULL);
if (err)
goto out_unlock;
}
@@ -793,7 +1107,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
int err;
struct ubifs_budget_req req;
loff_t old_size = inode->i_size, new_size = attr->ia_size;
- int offset = new_size & (UBIFS_BLOCK_SIZE - 1);
+ int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1;
struct ubifs_inode *ui = ubifs_inode(inode);
dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size);
@@ -811,12 +1125,17 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
/* A funny way to budget for truncation node */
req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ;
err = ubifs_budget_space(c, &req);
- if (err)
- return err;
+ if (err) {
+ /*
+ * Treat truncations to zero as deletion and always allow them,
+ * just like we do for '->unlink()'.
+ */
+ if (new_size || err != -ENOSPC)
+ return err;
+ budgeted = 0;
+ }
- err = vmtruncate(inode, new_size);
- if (err)
- goto out_budg;
+ truncate_setsize(inode, new_size);
if (offset) {
pgoff_t index = new_size >> PAGE_CACHE_SHIFT;
@@ -863,13 +1182,18 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
ui->ui_size = inode->i_size;
/* Truncation changes inode [mc]time */
inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
- /* The other attributes may be changed at the same time as well */
+ /* Other attributes may be changed at the same time as well */
do_attr_changes(inode, attr);
-
err = ubifs_jnl_truncate(c, inode, old_size, new_size);
mutex_unlock(&ui->ui_mutex);
+
out_budg:
- ubifs_release_budget(c, &req);
+ if (budgeted)
+ ubifs_release_budget(c, &req);
+ else {
+ c->bi.nospace = c->bi.nospace_rp = 0;
+ smp_wmb();
+ }
return err;
}
@@ -890,7 +1214,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
loff_t new_size = attr->ia_size;
struct ubifs_inode *ui = ubifs_inode(inode);
struct ubifs_budget_req req = { .dirtied_ino = 1,
- .dirtied_ino_d = ui->data_len };
+ .dirtied_ino_d = ALIGN(ui->data_len, 8) };
err = ubifs_budget_space(c, &req);
if (err)
@@ -898,16 +1222,14 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
if (attr->ia_valid & ATTR_SIZE) {
dbg_gen("size %lld -> %lld", inode->i_size, new_size);
- err = vmtruncate(inode, new_size);
- if (err)
- goto out;
+ truncate_setsize(inode, new_size);
}
mutex_lock(&ui->ui_mutex);
if (attr->ia_valid & ATTR_SIZE) {
/* Truncation changes inode [mc]time */
inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
- /* 'vmtruncate()' changed @i_size, update @ui_size */
+ /* 'truncate_setsize()' changed @i_size, update @ui_size */
ui->ui_size = inode->i_size;
}
@@ -927,11 +1249,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
if (release)
ubifs_release_budget(c, &req);
if (IS_SYNC(inode))
- err = inode->i_sb->s_op->write_inode(inode, 1);
- return err;
-
-out:
- ubifs_release_budget(c, &req);
+ err = inode->i_sb->s_op->write_inode(inode, NULL);
return err;
}
@@ -941,12 +1259,13 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
struct inode *inode = dentry->d_inode;
struct ubifs_info *c = inode->i_sb->s_fs_info;
- dbg_gen("ino %lu, ia_valid %#x", inode->i_ino, attr->ia_valid);
+ dbg_gen("ino %lu, mode %#x, ia_valid %#x",
+ inode->i_ino, inode->i_mode, attr->ia_valid);
err = inode_change_ok(inode, attr);
if (err)
return err;
- err = dbg_check_synced_i_size(inode);
+ err = dbg_check_synced_i_size(c, inode);
if (err)
return err;
@@ -959,13 +1278,14 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
return err;
}
-static void ubifs_invalidatepage(struct page *page, unsigned long offset)
+static void ubifs_invalidatepage(struct page *page, unsigned int offset,
+ unsigned int length)
{
struct inode *inode = page->mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
ubifs_assert(PagePrivate(page));
- if (offset)
+ if (offset || length < PAGE_CACHE_SIZE)
/* Partial page remains dirty */
return;
@@ -987,22 +1307,31 @@ static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd)
return NULL;
}
-int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync)
+int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
int err;
dbg_gen("syncing inode %lu", inode->i_ino);
- /*
- * VFS has already synchronized dirty pages for this inode. Synchronize
- * the inode unless this is a 'datasync()' call.
- */
+ if (c->ro_mount)
+ /*
+ * For some really strange reasons VFS does not filter out
+ * 'fsync()' for R/O mounted file-systems as per 2.6.39.
+ */
+ return 0;
+
+ err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (err)
+ return err;
+ mutex_lock(&inode->i_mutex);
+
+ /* Synchronize the inode unless this is a 'datasync()' call. */
if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
- err = inode->i_sb->s_op->write_inode(inode, 1);
+ err = inode->i_sb->s_op->write_inode(inode, NULL);
if (err)
- return err;
+ goto out;
}
/*
@@ -1010,10 +1339,9 @@ int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync)
* them.
*/
err = ubifs_sync_wbufs_by_inode(c, inode);
- if (err)
- return err;
-
- return 0;
+out:
+ mutex_unlock(&inode->i_mutex);
+ return err;
}
/**
@@ -1036,22 +1364,22 @@ static inline int mctime_update_needed(const struct inode *inode,
/**
* update_ctime - update mtime and ctime of an inode.
- * @c: UBIFS file-system description object
* @inode: inode to update
*
* This function updates mtime and ctime of the inode if it is not equivalent to
* current time. Returns zero in case of success and a negative error code in
* case of failure.
*/
-static int update_mctime(struct ubifs_info *c, struct inode *inode)
+static int update_mctime(struct inode *inode)
{
struct timespec now = ubifs_current_time(inode);
struct ubifs_inode *ui = ubifs_inode(inode);
+ struct ubifs_info *c = inode->i_sb->s_fs_info;
if (mctime_update_needed(inode, &now)) {
int err, release;
struct ubifs_budget_req req = { .dirtied_ino = 1,
- .dirtied_ino_d = ui->data_len };
+ .dirtied_ino_d = ALIGN(ui->data_len, 8) };
err = ubifs_budget_space(c, &req);
if (err)
@@ -1069,29 +1397,13 @@ static int update_mctime(struct ubifs_info *c, struct inode *inode)
return 0;
}
-static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+static ssize_t ubifs_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
- int err;
- ssize_t ret;
- struct inode *inode = iocb->ki_filp->f_mapping->host;
- struct ubifs_info *c = inode->i_sb->s_fs_info;
-
- err = update_mctime(c, inode);
+ int err = update_mctime(file_inode(iocb->ki_filp));
if (err)
return err;
- ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
- if (ret < 0)
- return ret;
-
- if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) {
- err = ubifs_sync_wbufs_by_inode(c, inode);
- if (err)
- return err;
- }
-
- return ret;
+ return generic_file_write_iter(iocb, from);
}
static int ubifs_set_page_dirty(struct page *page)
@@ -1123,12 +1435,14 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
}
/*
- * mmap()d file has taken write protection fault and is being made
- * writable. UBIFS must ensure page is budgeted for.
+ * mmap()d file has taken write protection fault and is being made writable.
+ * UBIFS must ensure page is budgeted for.
*/
-static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
+ struct vm_fault *vmf)
{
- struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+ struct page *page = vmf->page;
+ struct inode *inode = file_inode(vma->vm_file);
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct timespec now = ubifs_current_time(inode);
struct ubifs_budget_req req = { .new_page = 1 };
@@ -1136,10 +1450,10 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index,
i_size_read(inode));
- ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY));
+ ubifs_assert(!c->ro_media && !c->ro_mount);
- if (unlikely(c->ro_media))
- return -EROFS;
+ if (unlikely(c->ro_error))
+ return VM_FAULT_SIGBUS; /* -EROFS */
/*
* We have not locked @page so far so we may budget for changing the
@@ -1170,9 +1484,9 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
err = ubifs_budget_space(c, &req);
if (unlikely(err)) {
if (err == -ENOSPC)
- ubifs_warn("out of space for mmapped file "
- "(inode number %lu)", inode->i_ino);
- return err;
+ ubifs_warn("out of space for mmapped file (inode number %lu)",
+ inode->i_ino);
+ return VM_FAULT_SIGBUS;
}
lock_page(page);
@@ -1206,25 +1520,28 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
ubifs_release_dirty_inode_budget(c, ui);
}
- unlock_page(page);
- return 0;
+ wait_for_stable_page(page);
+ return VM_FAULT_LOCKED;
out_unlock:
unlock_page(page);
ubifs_release_budget(c, &req);
+ if (err)
+ err = VM_FAULT_SIGBUS;
return err;
}
-static struct vm_operations_struct ubifs_file_vm_ops = {
+static const struct vm_operations_struct ubifs_file_vm_ops = {
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = ubifs_vm_page_mkwrite,
+ .remap_pages = generic_file_remap_pages,
};
static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
int err;
- /* 'generic_file_mmap()' takes care of NOMMU case */
err = generic_file_mmap(file, vma);
if (err)
return err;
@@ -1232,7 +1549,7 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
-struct address_space_operations ubifs_file_address_operations = {
+const struct address_space_operations ubifs_file_address_operations = {
.readpage = ubifs_readpage,
.writepage = ubifs_writepage,
.write_begin = ubifs_write_begin,
@@ -1242,34 +1559,33 @@ struct address_space_operations ubifs_file_address_operations = {
.releasepage = ubifs_releasepage,
};
-struct inode_operations ubifs_file_inode_operations = {
+const struct inode_operations ubifs_file_inode_operations = {
.setattr = ubifs_setattr,
.getattr = ubifs_getattr,
-#ifdef CONFIG_UBIFS_FS_XATTR
.setxattr = ubifs_setxattr,
.getxattr = ubifs_getxattr,
.listxattr = ubifs_listxattr,
.removexattr = ubifs_removexattr,
-#endif
};
-struct inode_operations ubifs_symlink_inode_operations = {
+const struct inode_operations ubifs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = ubifs_follow_link,
.setattr = ubifs_setattr,
.getattr = ubifs_getattr,
};
-struct file_operations ubifs_file_operations = {
+const struct file_operations ubifs_file_operations = {
.llseek = generic_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = generic_file_aio_read,
- .aio_write = ubifs_aio_write,
+ .read = new_sync_read,
+ .write = new_sync_write,
+ .read_iter = generic_file_read_iter,
+ .write_iter = ubifs_write_iter,
.mmap = ubifs_file_mmap,
.fsync = ubifs_fsync,
.unlocked_ioctl = ubifs_ioctl,
.splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
#ifdef CONFIG_COMPAT
.compat_ioctl = ubifs_compat_ioctl,
#endif
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 10394c54836..2dcf3d473fe 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -211,14 +211,8 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
* dirty index heap, and it falls-back to LPT scanning if the heaps are empty
* or do not have an LEB which satisfies the @min_space criteria.
*
- * Note:
- * o LEBs which have less than dead watermark of dirty space are never picked
- * by this function;
- *
- * Returns zero and the LEB properties of
- * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a
- * negative error code in case of other failures. The returned LEB is marked as
- * "taken".
+ * Note, LEBs which have less than dead watermark of free + dirty space are
+ * never picked by this function.
*
* The additional @pick_free argument controls if this function has to return a
* free or freeable LEB if one is present. For example, GC must to set it to %1,
@@ -231,6 +225,10 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
*
* In addition @pick_free is set to %2 by the recovery process in order to
* recover gc_lnum in which case an index LEB must not be returned.
+ *
+ * This function returns zero and the LEB properties of found dirty LEB in case
+ * of success, %-ENOSPC if no dirty LEB was found and a negative error code in
+ * case of other failures. The returned LEB is marked as "taken".
*/
int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
int min_space, int pick_free)
@@ -245,7 +243,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
int lebs, rsvd_idx_lebs = 0;
spin_lock(&c->space_lock);
- lebs = c->lst.empty_lebs;
+ lebs = c->lst.empty_lebs + c->idx_gc_cnt;
lebs += c->freeable_cnt - c->lst.taken_empty_lebs;
/*
@@ -254,8 +252,8 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
* But if the index takes fewer LEBs than it is reserved for it,
* this function must avoid picking those reserved LEBs.
*/
- if (c->min_idx_lebs >= c->lst.idx_lebs) {
- rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
+ if (c->bi.min_idx_lebs >= c->lst.idx_lebs) {
+ rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
exclude_index = 1;
}
spin_unlock(&c->space_lock);
@@ -278,7 +276,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
pick_free = 0;
} else {
spin_lock(&c->space_lock);
- exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs);
+ exclude_index = (c->bi.min_idx_lebs >= c->lst.idx_lebs);
spin_unlock(&c->space_lock);
}
@@ -290,9 +288,14 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
idx_lp = idx_heap->arr[0];
sum = idx_lp->free + idx_lp->dirty;
/*
- * Since we reserve twice as more space for the index than it
+ * Since we reserve thrice as much space for the index than it
* actually takes, it does not make sense to pick indexing LEBs
- * with less than half LEB of dirty space.
+ * with less than, say, half LEB of dirty space. May be half is
+ * not the optimal boundary - this should be tested and
+ * checked. This boundary should determine how much we use
+ * in-the-gaps to consolidate the index comparing to how much
+ * we use garbage collector to consolidate it. The "half"
+ * criteria just feels to be fine.
*/
if (sum < min_space || sum < c->half_leb_size)
idx_lp = NULL;
@@ -312,7 +315,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
lp = idx_lp;
if (lp) {
- ubifs_assert(lp->dirty >= c->dead_wm);
+ ubifs_assert(lp->free + lp->dirty >= c->dead_wm);
goto found;
}
@@ -475,7 +478,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c,
* ubifs_find_free_space - find a data LEB with free space.
* @c: the UBIFS file-system description object
* @min_space: minimum amount of required free space
- * @free: contains amount of free space in the LEB on exit
+ * @offs: contains offset of where free space starts on exit
* @squeeze: whether to try to find space in a non-empty LEB first
*
* This function looks for an LEB with at least @min_space bytes of free space.
@@ -487,7 +490,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c,
* failed to find a LEB with @min_space bytes of free space and other a negative
* error codes in case of failure.
*/
-int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
+int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
int squeeze)
{
const struct ubifs_lprops *lprops;
@@ -498,13 +501,12 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
/* Check if there are enough empty LEBs for commit */
spin_lock(&c->space_lock);
- if (c->min_idx_lebs > c->lst.idx_lebs)
- rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
+ if (c->bi.min_idx_lebs > c->lst.idx_lebs)
+ rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
else
rsvd_idx_lebs = 0;
lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
c->lst.taken_empty_lebs;
- ubifs_assert(lebs + c->lst.idx_lebs >= c->min_idx_lebs);
if (rsvd_idx_lebs < lebs)
/*
* OK to allocate an empty LEB, but we still don't want to go
@@ -556,10 +558,10 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
spin_unlock(&c->space_lock);
}
- *free = lprops->free;
+ *offs = c->leb_size - lprops->free;
ubifs_release_lprops(c);
- if (*free == c->leb_size) {
+ if (*offs == 0) {
/*
* Ensure that empty LEBs have been unmapped. They may not have
* been, for example, because of an unclean unmount. Also
@@ -571,8 +573,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
return err;
}
- dbg_find("found LEB %d, free %d", lnum, *free);
- ubifs_assert(*free >= min_space);
+ dbg_find("found LEB %d, free %d", lnum, c->leb_size - *offs);
+ ubifs_assert(*offs <= c->leb_size - min_space);
return lnum;
out:
@@ -679,8 +681,16 @@ int ubifs_find_free_leb_for_idx(struct ubifs_info *c)
if (!lprops) {
lprops = ubifs_fast_find_freeable(c);
if (!lprops) {
- ubifs_assert(c->freeable_cnt == 0);
- if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
+ /*
+ * The first condition means the following: go scan the
+ * LPT if there are uncategorized lprops, which means
+ * there may be freeable LEBs there (UBIFS does not
+ * store the information about freeable LEBs in the
+ * master node).
+ */
+ if (c->in_a_category_cnt != c->main_lebs ||
+ c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
+ ubifs_assert(c->freeable_cnt == 0);
lprops = scan_for_leb_for_idx(c);
if (IS_ERR(lprops)) {
err = PTR_ERR(lprops);
@@ -899,11 +909,11 @@ static int get_idx_gc_leb(struct ubifs_info *c)
* it is needed now for this commit.
*/
lp = ubifs_lpt_lookup_dirty(c, lnum);
- if (unlikely(IS_ERR(lp)))
+ if (IS_ERR(lp))
return PTR_ERR(lp);
lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
lp->flags | LPROPS_INDEX, -1);
- if (unlikely(IS_ERR(lp)))
+ if (IS_ERR(lp))
return PTR_ERR(lp);
dbg_find("LEB %d, dirty %d and free %d flags %#x",
lp->lnum, lp->dirty, lp->free, lp->flags);
@@ -937,8 +947,8 @@ static int find_dirtiest_idx_leb(struct ubifs_info *c)
}
dbg_find("LEB %d, dirty %d and free %d flags %#x", lp->lnum, lp->dirty,
lp->free, lp->flags);
- ubifs_assert(lp->flags | LPROPS_TAKEN);
- ubifs_assert(lp->flags | LPROPS_INDEX);
+ ubifs_assert(lp->flags & LPROPS_TAKEN);
+ ubifs_assert(lp->flags & LPROPS_INDEX);
return lnum;
}
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index d0f3dac2908..9718da86ad0 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -31,21 +31,35 @@
* to be reused. Garbage collection will cause the number of dirty index nodes
* to grow, however sufficient space is reserved for the index to ensure the
* commit will never run out of space.
+ *
+ * Notes about dead watermark. At current UBIFS implementation we assume that
+ * LEBs which have less than @c->dead_wm bytes of free + dirty space are full
+ * and not worth garbage-collecting. The dead watermark is one min. I/O unit
+ * size, or min. UBIFS node size, depending on what is greater. Indeed, UBIFS
+ * Garbage Collector has to synchronize the GC head's write buffer before
+ * returning, so this is about wasting one min. I/O unit. However, UBIFS GC can
+ * actually reclaim even very small pieces of dirty space by garbage collecting
+ * enough dirty LEBs, but we do not bother doing this at this implementation.
+ *
+ * Notes about dark watermark. The results of GC work depends on how big are
+ * the UBIFS nodes GC deals with. Large nodes make GC waste more space. Indeed,
+ * if GC move data from LEB A to LEB B and nodes in LEB A are large, GC would
+ * have to waste large pieces of free space at the end of LEB B, because nodes
+ * from LEB A would not fit. And the worst situation is when all nodes are of
+ * maximum size. So dark watermark is the amount of free + dirty space in LEB
+ * which are guaranteed to be reclaimable. If LEB has less space, the GC might
+ * be unable to reclaim it. So, LEBs with free + dirty greater than dark
+ * watermark are "good" LEBs from GC's point of few. The other LEBs are not so
+ * good, and GC takes extra care when moving them.
*/
+#include <linux/slab.h>
#include <linux/pagemap.h>
+#include <linux/list_sort.h>
#include "ubifs.h"
/*
- * GC tries to optimize the way it fit nodes to available space, and it sorts
- * nodes a little. The below constants are watermarks which define "large",
- * "medium", and "small" nodes.
- */
-#define MEDIUM_NODE_WM (UBIFS_BLOCK_SIZE / 4)
-#define SMALL_NODE_WM UBIFS_MAX_DENT_NODE_SZ
-
-/*
- * GC may need to move more then one LEB to make progress. The below constants
+ * GC may need to move more than one LEB to make progress. The below constants
* define "soft" and "hard" limits on the number of LEBs the garbage collector
* may move.
*/
@@ -86,86 +100,257 @@ static int switch_gc_head(struct ubifs_info *c)
if (err)
return err;
+ err = ubifs_wbuf_sync_nolock(wbuf);
+ if (err)
+ return err;
+
err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0);
if (err)
return err;
c->gc_lnum = -1;
- err = ubifs_wbuf_seek_nolock(wbuf, gc_lnum, 0, UBI_LONGTERM);
+ err = ubifs_wbuf_seek_nolock(wbuf, gc_lnum, 0);
return err;
}
/**
- * move_nodes - move nodes.
+ * data_nodes_cmp - compare 2 data nodes.
+ * @priv: UBIFS file-system description object
+ * @a: first data node
+ * @a: second data node
+ *
+ * This function compares data nodes @a and @b. Returns %1 if @a has greater
+ * inode or block number, and %-1 otherwise.
+ */
+static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+ ino_t inuma, inumb;
+ struct ubifs_info *c = priv;
+ struct ubifs_scan_node *sa, *sb;
+
+ cond_resched();
+ if (a == b)
+ return 0;
+
+ sa = list_entry(a, struct ubifs_scan_node, list);
+ sb = list_entry(b, struct ubifs_scan_node, list);
+
+ ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY);
+ ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY);
+ ubifs_assert(sa->type == UBIFS_DATA_NODE);
+ ubifs_assert(sb->type == UBIFS_DATA_NODE);
+
+ inuma = key_inum(c, &sa->key);
+ inumb = key_inum(c, &sb->key);
+
+ if (inuma == inumb) {
+ unsigned int blka = key_block(c, &sa->key);
+ unsigned int blkb = key_block(c, &sb->key);
+
+ if (blka <= blkb)
+ return -1;
+ } else if (inuma <= inumb)
+ return -1;
+
+ return 1;
+}
+
+/*
+ * nondata_nodes_cmp - compare 2 non-data nodes.
+ * @priv: UBIFS file-system description object
+ * @a: first node
+ * @a: second node
+ *
+ * This function compares nodes @a and @b. It makes sure that inode nodes go
+ * first and sorted by length in descending order. Directory entry nodes go
+ * after inode nodes and are sorted in ascending hash valuer order.
+ */
+static int nondata_nodes_cmp(void *priv, struct list_head *a,
+ struct list_head *b)
+{
+ ino_t inuma, inumb;
+ struct ubifs_info *c = priv;
+ struct ubifs_scan_node *sa, *sb;
+
+ cond_resched();
+ if (a == b)
+ return 0;
+
+ sa = list_entry(a, struct ubifs_scan_node, list);
+ sb = list_entry(b, struct ubifs_scan_node, list);
+
+ ubifs_assert(key_type(c, &sa->key) != UBIFS_DATA_KEY &&
+ key_type(c, &sb->key) != UBIFS_DATA_KEY);
+ ubifs_assert(sa->type != UBIFS_DATA_NODE &&
+ sb->type != UBIFS_DATA_NODE);
+
+ /* Inodes go before directory entries */
+ if (sa->type == UBIFS_INO_NODE) {
+ if (sb->type == UBIFS_INO_NODE)
+ return sb->len - sa->len;
+ return -1;
+ }
+ if (sb->type == UBIFS_INO_NODE)
+ return 1;
+
+ ubifs_assert(key_type(c, &sa->key) == UBIFS_DENT_KEY ||
+ key_type(c, &sa->key) == UBIFS_XENT_KEY);
+ ubifs_assert(key_type(c, &sb->key) == UBIFS_DENT_KEY ||
+ key_type(c, &sb->key) == UBIFS_XENT_KEY);
+ ubifs_assert(sa->type == UBIFS_DENT_NODE ||
+ sa->type == UBIFS_XENT_NODE);
+ ubifs_assert(sb->type == UBIFS_DENT_NODE ||
+ sb->type == UBIFS_XENT_NODE);
+
+ inuma = key_inum(c, &sa->key);
+ inumb = key_inum(c, &sb->key);
+
+ if (inuma == inumb) {
+ uint32_t hasha = key_hash(c, &sa->key);
+ uint32_t hashb = key_hash(c, &sb->key);
+
+ if (hasha <= hashb)
+ return -1;
+ } else if (inuma <= inumb)
+ return -1;
+
+ return 1;
+}
+
+/**
+ * sort_nodes - sort nodes for GC.
* @c: UBIFS file-system description object
- * @sleb: describes nodes to move
+ * @sleb: describes nodes to sort and contains the result on exit
+ * @nondata: contains non-data nodes on exit
+ * @min: minimum node size is returned here
*
- * This function moves valid nodes from data LEB described by @sleb to the GC
- * journal head. The obsolete nodes are dropped.
+ * This function sorts the list of inodes to garbage collect. First of all, it
+ * kills obsolete nodes and separates data and non-data nodes to the
+ * @sleb->nodes and @nondata lists correspondingly.
+ *
+ * Data nodes are then sorted in block number order - this is important for
+ * bulk-read; data nodes with lower inode number go before data nodes with
+ * higher inode number, and data nodes with lower block number go before data
+ * nodes with higher block number;
*
- * When moving nodes we have to deal with classical bin-packing problem: the
- * space in the current GC journal head LEB and in @c->gc_lnum are the "bins",
- * where the nodes in the @sleb->nodes list are the elements which should be
- * fit optimally to the bins. This function uses the "first fit decreasing"
- * strategy, although it does not really sort the nodes but just split them on
- * 3 classes - large, medium, and small, so they are roughly sorted.
+ * Non-data nodes are sorted as follows.
+ * o First go inode nodes - they are sorted in descending length order.
+ * o Then go directory entry nodes - they are sorted in hash order, which
+ * should supposedly optimize 'readdir()'. Direntry nodes with lower parent
+ * inode number go before direntry nodes with higher parent inode number,
+ * and direntry nodes with lower name hash values go before direntry nodes
+ * with higher name hash values.
*
- * This function returns zero in case of success, %-EAGAIN if commit is
- * required, and other negative error codes in case of other failures.
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
*/
-static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
+static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
+ struct list_head *nondata, int *min)
{
+ int err;
struct ubifs_scan_node *snod, *tmp;
- struct list_head large, medium, small;
- struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
- int avail, err, min = INT_MAX;
- INIT_LIST_HEAD(&large);
- INIT_LIST_HEAD(&medium);
- INIT_LIST_HEAD(&small);
+ *min = INT_MAX;
+ /* Separate data nodes and non-data nodes */
list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
- struct list_head *lst;
+ ubifs_assert(snod->type == UBIFS_INO_NODE ||
+ snod->type == UBIFS_DATA_NODE ||
+ snod->type == UBIFS_DENT_NODE ||
+ snod->type == UBIFS_XENT_NODE ||
+ snod->type == UBIFS_TRUN_NODE);
+
+ if (snod->type != UBIFS_INO_NODE &&
+ snod->type != UBIFS_DATA_NODE &&
+ snod->type != UBIFS_DENT_NODE &&
+ snod->type != UBIFS_XENT_NODE) {
+ /* Probably truncation node, zap it */
+ list_del(&snod->list);
+ kfree(snod);
+ continue;
+ }
- ubifs_assert(snod->type != UBIFS_IDX_NODE);
- ubifs_assert(snod->type != UBIFS_REF_NODE);
- ubifs_assert(snod->type != UBIFS_CS_NODE);
+ ubifs_assert(key_type(c, &snod->key) == UBIFS_DATA_KEY ||
+ key_type(c, &snod->key) == UBIFS_INO_KEY ||
+ key_type(c, &snod->key) == UBIFS_DENT_KEY ||
+ key_type(c, &snod->key) == UBIFS_XENT_KEY);
err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum,
snod->offs, 0);
if (err < 0)
- goto out;
+ return err;
- lst = &snod->list;
- list_del(lst);
if (!err) {
/* The node is obsolete, remove it from the list */
+ list_del(&snod->list);
kfree(snod);
continue;
}
- /*
- * Sort the list of nodes so that large nodes go first, and
- * small nodes go last.
- */
- if (snod->len > MEDIUM_NODE_WM)
- list_add(lst, &large);
- else if (snod->len > SMALL_NODE_WM)
- list_add(lst, &medium);
- else
- list_add(lst, &small);
-
- /* And find the smallest node */
- if (snod->len < min)
- min = snod->len;
+ if (snod->len < *min)
+ *min = snod->len;
+
+ if (key_type(c, &snod->key) != UBIFS_DATA_KEY)
+ list_move_tail(&snod->list, nondata);
}
- /*
- * Join the tree lists so that we'd have one roughly sorted list
- * ('large' will be the head of the joined list).
- */
- list_splice(&medium, large.prev);
- list_splice(&small, large.prev);
+ /* Sort data and non-data nodes */
+ list_sort(c, &sleb->nodes, &data_nodes_cmp);
+ list_sort(c, nondata, &nondata_nodes_cmp);
+
+ err = dbg_check_data_nodes_order(c, &sleb->nodes);
+ if (err)
+ return err;
+ err = dbg_check_nondata_nodes_order(c, nondata);
+ if (err)
+ return err;
+ return 0;
+}
+
+/**
+ * move_node - move a node.
+ * @c: UBIFS file-system description object
+ * @sleb: describes the LEB to move nodes from
+ * @snod: the mode to move
+ * @wbuf: write-buffer to move node to
+ *
+ * This function moves node @snod to @wbuf, changes TNC correspondingly, and
+ * destroys @snod. Returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int move_node(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
+ struct ubifs_scan_node *snod, struct ubifs_wbuf *wbuf)
+{
+ int err, new_lnum = wbuf->lnum, new_offs = wbuf->offs + wbuf->used;
+
+ cond_resched();
+ err = ubifs_wbuf_write_nolock(wbuf, snod->node, snod->len);
+ if (err)
+ return err;
+
+ err = ubifs_tnc_replace(c, &snod->key, sleb->lnum,
+ snod->offs, new_lnum, new_offs,
+ snod->len);
+ list_del(&snod->list);
+ kfree(snod);
+ return err;
+}
+
+/**
+ * move_nodes - move nodes.
+ * @c: UBIFS file-system description object
+ * @sleb: describes the LEB to move nodes from
+ *
+ * This function moves valid nodes from data LEB described by @sleb to the GC
+ * journal head. This function returns zero in case of success, %-EAGAIN if
+ * commit is required, and other negative error codes in case of other
+ * failures.
+ */
+static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
+{
+ int err, min;
+ LIST_HEAD(nondata);
+ struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
if (wbuf->lnum == -1) {
/*
@@ -174,42 +359,59 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
*/
err = switch_gc_head(c);
if (err)
- goto out;
+ return err;
}
+ err = sort_nodes(c, sleb, &nondata, &min);
+ if (err)
+ goto out;
+
/* Write nodes to their new location. Use the first-fit strategy */
while (1) {
- avail = c->leb_size - wbuf->offs - wbuf->used;
- list_for_each_entry_safe(snod, tmp, &large, list) {
- int new_lnum, new_offs;
+ int avail;
+ struct ubifs_scan_node *snod, *tmp;
+
+ /* Move data nodes */
+ list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
+ avail = c->leb_size - wbuf->offs - wbuf->used;
+ if (snod->len > avail)
+ /*
+ * Do not skip data nodes in order to optimize
+ * bulk-read.
+ */
+ break;
+ err = move_node(c, sleb, snod, wbuf);
+ if (err)
+ goto out;
+ }
+
+ /* Move non-data nodes */
+ list_for_each_entry_safe(snod, tmp, &nondata, list) {
+ avail = c->leb_size - wbuf->offs - wbuf->used;
if (avail < min)
break;
- if (snod->len > avail)
- /* This node does not fit */
+ if (snod->len > avail) {
+ /*
+ * Keep going only if this is an inode with
+ * some data. Otherwise stop and switch the GC
+ * head. IOW, we assume that data-less inode
+ * nodes and direntry nodes are roughly of the
+ * same size.
+ */
+ if (key_type(c, &snod->key) == UBIFS_DENT_KEY ||
+ snod->len == UBIFS_INO_NODE_SZ)
+ break;
continue;
+ }
- cond_resched();
-
- new_lnum = wbuf->lnum;
- new_offs = wbuf->offs + wbuf->used;
- err = ubifs_wbuf_write_nolock(wbuf, snod->node,
- snod->len);
+ err = move_node(c, sleb, snod, wbuf);
if (err)
goto out;
- err = ubifs_tnc_replace(c, &snod->key, sleb->lnum,
- snod->offs, new_lnum, new_offs,
- snod->len);
- if (err)
- goto out;
-
- avail = c->leb_size - wbuf->offs - wbuf->used;
- list_del(&snod->list);
- kfree(snod);
}
- if (list_empty(&large))
+ if (list_empty(&sleb->nodes) && list_empty(&nondata))
break;
/*
@@ -224,10 +426,7 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
return 0;
out:
- list_for_each_entry_safe(snod, tmp, &large, list) {
- list_del(&snod->list);
- kfree(snod);
- }
+ list_splice_tail(&nondata, &sleb->nodes);
return err;
}
@@ -279,11 +478,42 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
ubifs_assert(c->gc_lnum != lnum);
ubifs_assert(wbuf->lnum != lnum);
+ if (lp->free + lp->dirty == c->leb_size) {
+ /* Special case - a free LEB */
+ dbg_gc("LEB %d is free, return it", lp->lnum);
+ ubifs_assert(!(lp->flags & LPROPS_INDEX));
+
+ if (lp->free != c->leb_size) {
+ /*
+ * Write buffers must be sync'd before unmapping
+ * freeable LEBs, because one of them may contain data
+ * which obsoletes something in 'lp->pnum'.
+ */
+ err = gc_sync_wbufs(c);
+ if (err)
+ return err;
+ err = ubifs_change_one_lp(c, lp->lnum, c->leb_size,
+ 0, 0, 0, 0);
+ if (err)
+ return err;
+ }
+ err = ubifs_leb_unmap(c, lp->lnum);
+ if (err)
+ return err;
+
+ if (c->gc_lnum == -1) {
+ c->gc_lnum = lnum;
+ return LEB_RETAINED;
+ }
+
+ return LEB_FREED;
+ }
+
/*
* We scan the entire LEB even though we only really need to scan up to
* (c->leb_size - lp->free).
*/
- sleb = ubifs_scan(c, lnum, 0, c->sbuf);
+ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0);
if (IS_ERR(sleb))
return PTR_ERR(sleb);
@@ -319,7 +549,7 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
/*
* Don't release the LEB until after the next commit, because
- * it may contain date which is needed for recovery. So
+ * it may contain data which is needed for recovery. So
* although we freed this LEB, it will become usable only after
* the commit.
*/
@@ -334,15 +564,21 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
err = move_nodes(c, sleb);
if (err)
- goto out;
+ goto out_inc_seq;
err = gc_sync_wbufs(c);
if (err)
- goto out;
+ goto out_inc_seq;
err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0);
if (err)
- goto out;
+ goto out_inc_seq;
+
+ /* Allow for races with TNC */
+ c->gced_lnum = lnum;
+ smp_wmb();
+ c->gc_seq += 1;
+ smp_wmb();
if (c->gc_lnum == -1) {
c->gc_lnum = lnum;
@@ -363,6 +599,14 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
out:
ubifs_scan_destroy(sleb);
return err;
+
+out_inc_seq:
+ /* We may have moved at least some nodes so allow for races with TNC */
+ c->gced_lnum = lnum;
+ smp_wmb();
+ c->gc_seq += 1;
+ smp_wmb();
+ goto out;
}
/**
@@ -408,13 +652,14 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
ubifs_assert_cmt_locked(c);
+ ubifs_assert(!c->ro_media && !c->ro_mount);
if (ubifs_gc_should_commit(c))
return -EAGAIN;
mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
- if (c->ro_media) {
+ if (c->ro_error) {
ret = -EROFS;
goto out_unlock;
}
@@ -423,8 +668,7 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
ubifs_assert(!wbuf->used);
for (i = 0; ; i++) {
- int space_before = c->leb_size - wbuf->offs - wbuf->used;
- int space_after;
+ int space_before, space_after;
cond_resched();
@@ -469,40 +713,9 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
break;
}
- dbg_gc("found LEB %d: free %d, dirty %d, sum %d "
- "(min. space %d)", lp.lnum, lp.free, lp.dirty,
- lp.free + lp.dirty, min_space);
-
- if (lp.free + lp.dirty == c->leb_size) {
- /* An empty LEB was returned */
- dbg_gc("LEB %d is free, return it", lp.lnum);
- /*
- * ubifs_find_dirty_leb() doesn't return freeable index
- * LEBs.
- */
- ubifs_assert(!(lp.flags & LPROPS_INDEX));
- if (lp.free != c->leb_size) {
- /*
- * Write buffers must be sync'd before
- * unmapping freeable LEBs, because one of them
- * may contain data which obsoletes something
- * in 'lp.pnum'.
- */
- ret = gc_sync_wbufs(c);
- if (ret)
- goto out;
- ret = ubifs_change_one_lp(c, lp.lnum,
- c->leb_size, 0, 0, 0,
- 0);
- if (ret)
- goto out;
- }
- ret = ubifs_leb_unmap(c, lp.lnum);
- if (ret)
- goto out;
- ret = lp.lnum;
- break;
- }
+ dbg_gc("found LEB %d: free %d, dirty %d, sum %d (min. space %d)",
+ lp.lnum, lp.free, lp.dirty, lp.free + lp.dirty,
+ min_space);
space_before = c->leb_size - wbuf->offs - wbuf->used;
if (wbuf->lnum == -1)
@@ -510,14 +723,12 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
ret = ubifs_garbage_collect_leb(c, &lp);
if (ret < 0) {
- if (ret == -EAGAIN || ret == -ENOSPC) {
+ if (ret == -EAGAIN) {
/*
- * These codes are not errors, so we have to
- * return the LEB to lprops. But if the
- * 'ubifs_return_leb()' function fails, its
- * failure code is propagated to the caller
- * instead of the original '-EAGAIN' or
- * '-ENOSPC'.
+ * This is not error, so we have to return the
+ * LEB to lprops. But if 'ubifs_return_leb()'
+ * fails, its failure code is propagated to the
+ * caller instead of the original '-EAGAIN'.
*/
err = ubifs_return_leb(c, lp.lnum);
if (err)
@@ -607,8 +818,8 @@ out_unlock:
out:
ubifs_assert(ret < 0);
ubifs_assert(ret != -ENOSPC && ret != -EAGAIN);
- ubifs_ro_mode(c, ret);
ubifs_wbuf_sync_nolock(wbuf);
+ ubifs_ro_mode(c, ret);
mutex_unlock(&wbuf->io_mutex);
ubifs_return_leb(c, lp.lnum);
return ret;
@@ -639,7 +850,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
*/
while (1) {
lp = ubifs_fast_find_freeable(c);
- if (unlikely(IS_ERR(lp))) {
+ if (IS_ERR(lp)) {
err = PTR_ERR(lp);
goto out;
}
@@ -651,7 +862,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
if (err)
goto out;
lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0);
- if (unlikely(IS_ERR(lp))) {
+ if (IS_ERR(lp)) {
err = PTR_ERR(lp);
goto out;
}
@@ -666,7 +877,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
/* Record index freeable LEBs for unmapping after commit */
while (1) {
lp = ubifs_fast_find_frdi_idx(c);
- if (unlikely(IS_ERR(lp))) {
+ if (IS_ERR(lp)) {
err = PTR_ERR(lp);
goto out;
}
@@ -682,7 +893,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
/* Don't release the LEB until after the next commit */
flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX;
lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1);
- if (unlikely(IS_ERR(lp))) {
+ if (IS_ERR(lp)) {
err = PTR_ERR(lp);
kfree(idx_gc);
goto out;
@@ -734,8 +945,9 @@ out:
* ubifs_destroy_idx_gc - destroy idx_gc list.
* @c: UBIFS file-system description object
*
- * This function destroys the idx_gc list. It is called when unmounting or
- * remounting read-only so locks are not needed.
+ * This function destroys the @c->idx_gc list. It is called when unmounting
+ * so locks are not needed. Returns zero in case of success and a negative
+ * error code in case of failure.
*/
void ubifs_destroy_idx_gc(struct ubifs_info *c)
{
@@ -748,7 +960,6 @@ void ubifs_destroy_idx_gc(struct ubifs_info *c)
list_del(&idx_gc->list);
kfree(idx_gc);
}
-
}
/**
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 3374f91b670..2290d586672 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -29,7 +29,27 @@
* would have been wasted for padding to the nearest minimal I/O unit boundary.
* Instead, data first goes to the write-buffer and is flushed when the
* buffer is full or when it is not used for some time (by timer). This is
- * similarto the mechanism is used by JFFS2.
+ * similar to the mechanism is used by JFFS2.
+ *
+ * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum
+ * write size (@c->max_write_size). The latter is the maximum amount of bytes
+ * the underlying flash is able to program at a time, and writing in
+ * @c->max_write_size units should presumably be faster. Obviously,
+ * @c->min_io_size <= @c->max_write_size. Write-buffers are of
+ * @c->max_write_size bytes in size for maximum performance. However, when a
+ * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size
+ * boundary) which contains data is written, not the whole write-buffer,
+ * because this is more space-efficient.
+ *
+ * This optimization adds few complications to the code. Indeed, on the one
+ * hand, we want to write in optimal @c->max_write_size bytes chunks, which
+ * also means aligning writes at the @c->max_write_size bytes offsets. On the
+ * other hand, we do not want to waste space when synchronizing the write
+ * buffer, so during synchronization we writes in smaller chunks. And this makes
+ * the next write offset to be not aligned to @c->max_write_size bytes. So the
+ * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned
+ * to @c->max_write_size bytes again. We do this by temporarily shrinking
+ * write-buffer size (@wbuf->size).
*
* Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by
* mutexes defined inside these objects. Since sometimes upper-level code
@@ -46,20 +66,154 @@
* UBIFS uses padding when it pads to the next min. I/O unit. In this case it
* uses padding nodes or padding bytes, if the padding node does not fit.
*
- * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes
- * every time they are read from the flash media.
+ * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when
+ * they are read from the flash media.
*/
#include <linux/crc32.h>
+#include <linux/slab.h>
#include "ubifs.h"
/**
+ * ubifs_ro_mode - switch UBIFS to read read-only mode.
+ * @c: UBIFS file-system description object
+ * @err: error code which is the reason of switching to R/O mode
+ */
+void ubifs_ro_mode(struct ubifs_info *c, int err)
+{
+ if (!c->ro_error) {
+ c->ro_error = 1;
+ c->no_chk_data_crc = 0;
+ c->vfs_sb->s_flags |= MS_RDONLY;
+ ubifs_warn("switched to read-only mode, error %d", err);
+ dump_stack();
+ }
+}
+
+/*
+ * Below are simple wrappers over UBI I/O functions which include some
+ * additional checks and UBIFS debugging stuff. See corresponding UBI function
+ * for more information.
+ */
+
+int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs,
+ int len, int even_ebadmsg)
+{
+ int err;
+
+ err = ubi_read(c->ubi, lnum, buf, offs, len);
+ /*
+ * In case of %-EBADMSG print the error message only if the
+ * @even_ebadmsg is true.
+ */
+ if (err && (err != -EBADMSG || even_ebadmsg)) {
+ ubifs_err("reading %d bytes from LEB %d:%d failed, error %d",
+ len, lnum, offs, err);
+ dump_stack();
+ }
+ return err;
+}
+
+int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
+ int len)
+{
+ int err;
+
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+ if (c->ro_error)
+ return -EROFS;
+ if (!dbg_is_tst_rcvry(c))
+ err = ubi_leb_write(c->ubi, lnum, buf, offs, len);
+ else
+ err = dbg_leb_write(c, lnum, buf, offs, len);
+ if (err) {
+ ubifs_err("writing %d bytes to LEB %d:%d failed, error %d",
+ len, lnum, offs, err);
+ ubifs_ro_mode(c, err);
+ dump_stack();
+ }
+ return err;
+}
+
+int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len)
+{
+ int err;
+
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+ if (c->ro_error)
+ return -EROFS;
+ if (!dbg_is_tst_rcvry(c))
+ err = ubi_leb_change(c->ubi, lnum, buf, len);
+ else
+ err = dbg_leb_change(c, lnum, buf, len);
+ if (err) {
+ ubifs_err("changing %d bytes in LEB %d failed, error %d",
+ len, lnum, err);
+ ubifs_ro_mode(c, err);
+ dump_stack();
+ }
+ return err;
+}
+
+int ubifs_leb_unmap(struct ubifs_info *c, int lnum)
+{
+ int err;
+
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+ if (c->ro_error)
+ return -EROFS;
+ if (!dbg_is_tst_rcvry(c))
+ err = ubi_leb_unmap(c->ubi, lnum);
+ else
+ err = dbg_leb_unmap(c, lnum);
+ if (err) {
+ ubifs_err("unmap LEB %d failed, error %d", lnum, err);
+ ubifs_ro_mode(c, err);
+ dump_stack();
+ }
+ return err;
+}
+
+int ubifs_leb_map(struct ubifs_info *c, int lnum)
+{
+ int err;
+
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+ if (c->ro_error)
+ return -EROFS;
+ if (!dbg_is_tst_rcvry(c))
+ err = ubi_leb_map(c->ubi, lnum);
+ else
+ err = dbg_leb_map(c, lnum);
+ if (err) {
+ ubifs_err("mapping LEB %d failed, error %d", lnum, err);
+ ubifs_ro_mode(c, err);
+ dump_stack();
+ }
+ return err;
+}
+
+int ubifs_is_mapped(const struct ubifs_info *c, int lnum)
+{
+ int err;
+
+ err = ubi_is_mapped(c->ubi, lnum);
+ if (err < 0) {
+ ubifs_err("ubi_is_mapped failed for LEB %d, error %d",
+ lnum, err);
+ dump_stack();
+ }
+ return err;
+}
+
+/**
* ubifs_check_node - check node.
* @c: UBIFS file-system description object
* @buf: node to check
* @lnum: logical eraseblock number
* @offs: offset within the logical eraseblock
* @quiet: print no messages
+ * @must_chk_crc: indicates whether to always check the CRC
*
* This function checks node magic number and CRC checksum. This function also
* validates node length to prevent UBIFS from becoming crazy when an attacker
@@ -67,11 +221,21 @@
* node length in the common header could cause UBIFS to read memory outside of
* allocated buffer when checking the CRC checksum.
*
- * This function returns zero in case of success %-EUCLEAN in case of bad CRC
- * or magic.
+ * This function may skip data nodes CRC checking if @c->no_chk_data_crc is
+ * true, which is controlled by corresponding UBIFS mount option. However, if
+ * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is
+ * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are
+ * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC
+ * is checked. This is because during mounting or re-mounting from R/O mode to
+ * R/W mode we may read journal nodes (when replying the journal or doing the
+ * recovery) and the journal nodes may potentially be corrupted, so checking is
+ * required.
+ *
+ * This function returns zero in case of success and %-EUCLEAN in case of bad
+ * CRC or magic.
*/
int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
- int offs, int quiet)
+ int offs, int quiet, int must_chk_crc)
{
int err = -EINVAL, type, node_len;
uint32_t crc, node_crc, magic;
@@ -107,6 +271,10 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
node_len > c->ranges[type].max_len)
goto out_len;
+ if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting &&
+ !c->remounting_rw && c->no_chk_data_crc)
+ return 0;
+
crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
node_crc = le32_to_cpu(ch->crc);
if (crc != node_crc) {
@@ -125,8 +293,8 @@ out_len:
out:
if (!quiet) {
ubifs_err("bad node at LEB %d:%d", lnum, offs);
- dbg_dump_node(c, buf);
- dbg_dump_stack();
+ ubifs_dump_node(c, buf);
+ dump_stack();
}
return err;
}
@@ -267,13 +435,15 @@ void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last)
*
* This function is called when the write-buffer timer expires.
*/
-static void wbuf_timer_callback_nolock(unsigned long data)
+static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer)
{
- struct ubifs_wbuf *wbuf = (struct ubifs_wbuf *)data;
+ struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer);
+ dbg_io("jhead %s", dbg_jhead(wbuf->jhead));
wbuf->need_sync = 1;
wbuf->c->need_wbuf_sync = 1;
ubifs_wake_up_bgt(wbuf->c);
+ return HRTIMER_NORESTART;
}
/**
@@ -282,13 +452,17 @@ static void wbuf_timer_callback_nolock(unsigned long data)
*/
static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
{
- ubifs_assert(!timer_pending(&wbuf->timer));
+ ubifs_assert(!hrtimer_active(&wbuf->timer));
- if (!wbuf->timeout)
+ if (wbuf->no_timer)
return;
-
- wbuf->timer.expires = jiffies + wbuf->timeout;
- add_timer(&wbuf->timer);
+ dbg_io("set timer for jhead %s, %llu-%llu millisecs",
+ dbg_jhead(wbuf->jhead),
+ div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC),
+ div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta,
+ USEC_PER_SEC));
+ hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta,
+ HRTIMER_MODE_REL);
}
/**
@@ -297,13 +471,10 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
*/
static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
{
- /*
- * If the syncer is waiting for the lock (from the background thread's
- * context) and another task is changing write-buffer then the syncing
- * should be canceled.
- */
+ if (wbuf->no_timer)
+ return;
wbuf->need_sync = 0;
- del_timer(&wbuf->timer);
+ hrtimer_cancel(&wbuf->timer);
}
/**
@@ -312,41 +483,68 @@ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
*
* This function synchronizes write-buffer @buf and returns zero in case of
* success or a negative error code in case of failure.
+ *
+ * Note, although write-buffers are of @c->max_write_size, this function does
+ * not necessarily writes all @c->max_write_size bytes to the flash. Instead,
+ * if the write-buffer is only partially filled with data, only the used part
+ * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized.
+ * This way we waste less space.
*/
int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
{
struct ubifs_info *c = wbuf->c;
- int err, dirt;
+ int err, dirt, sync_len;
cancel_wbuf_timer_nolock(wbuf);
if (!wbuf->used || wbuf->lnum == -1)
/* Write-buffer is empty or not seeked */
return 0;
- dbg_io("LEB %d:%d, %d bytes",
- wbuf->lnum, wbuf->offs, wbuf->used);
- ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY));
+ dbg_io("LEB %d:%d, %d bytes, jhead %s",
+ wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead));
ubifs_assert(!(wbuf->avail & 7));
- ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size);
-
- if (c->ro_media)
+ ubifs_assert(wbuf->offs + wbuf->size <= c->leb_size);
+ ubifs_assert(wbuf->size >= c->min_io_size);
+ ubifs_assert(wbuf->size <= c->max_write_size);
+ ubifs_assert(wbuf->size % c->min_io_size == 0);
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+ if (c->leb_size - wbuf->offs >= c->max_write_size)
+ ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
+
+ if (c->ro_error)
return -EROFS;
- ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail);
- err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
- c->min_io_size, wbuf->dtype);
- if (err) {
- ubifs_err("cannot write %d bytes to LEB %d:%d",
- c->min_io_size, wbuf->lnum, wbuf->offs);
- dbg_dump_stack();
+ /*
+ * Do not write whole write buffer but write only the minimum necessary
+ * amount of min. I/O units.
+ */
+ sync_len = ALIGN(wbuf->used, c->min_io_size);
+ dirt = sync_len - wbuf->used;
+ if (dirt)
+ ubifs_pad(c, wbuf->buf + wbuf->used, dirt);
+ err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, sync_len);
+ if (err)
return err;
- }
-
- dirt = wbuf->avail;
spin_lock(&wbuf->lock);
- wbuf->offs += c->min_io_size;
- wbuf->avail = c->min_io_size;
+ wbuf->offs += sync_len;
+ /*
+ * Now @wbuf->offs is not necessarily aligned to @c->max_write_size.
+ * But our goal is to optimize writes and make sure we write in
+ * @c->max_write_size chunks and to @c->max_write_size-aligned offset.
+ * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make
+ * sure that @wbuf->offs + @wbuf->size is aligned to
+ * @c->max_write_size. This way we make sure that after next
+ * write-buffer flush we are again at the optimal offset (aligned to
+ * @c->max_write_size).
+ */
+ if (c->leb_size - wbuf->offs < c->max_write_size)
+ wbuf->size = c->leb_size - wbuf->offs;
+ else if (wbuf->offs & (c->max_write_size - 1))
+ wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
+ else
+ wbuf->size = c->max_write_size;
+ wbuf->avail = wbuf->size;
wbuf->used = 0;
wbuf->next_ino = 0;
spin_unlock(&wbuf->lock);
@@ -362,37 +560,34 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
* @wbuf: write-buffer
* @lnum: logical eraseblock number to seek to
* @offs: logical eraseblock offset to seek to
- * @dtype: data type
*
- * This function targets the write buffer to logical eraseblock @lnum:@offs.
- * The write-buffer is synchronized if it is not empty. Returns zero in case of
- * success and a negative error code in case of failure.
+ * This function targets the write-buffer to logical eraseblock @lnum:@offs.
+ * The write-buffer has to be empty. Returns zero in case of success and a
+ * negative error code in case of failure.
*/
-int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
- int dtype)
+int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs)
{
const struct ubifs_info *c = wbuf->c;
- dbg_io("LEB %d:%d", lnum, offs);
+ dbg_io("LEB %d:%d, jhead %s", lnum, offs, dbg_jhead(wbuf->jhead));
ubifs_assert(lnum >= 0 && lnum < c->leb_cnt);
ubifs_assert(offs >= 0 && offs <= c->leb_size);
ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
ubifs_assert(lnum != wbuf->lnum);
-
- if (wbuf->used > 0) {
- int err = ubifs_wbuf_sync_nolock(wbuf);
-
- if (err)
- return err;
- }
+ ubifs_assert(wbuf->used == 0);
spin_lock(&wbuf->lock);
wbuf->lnum = lnum;
wbuf->offs = offs;
- wbuf->avail = c->min_io_size;
+ if (c->leb_size - wbuf->offs < c->max_write_size)
+ wbuf->size = c->leb_size - wbuf->offs;
+ else if (wbuf->offs & (c->max_write_size - 1))
+ wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
+ else
+ wbuf->size = c->max_write_size;
+ wbuf->avail = wbuf->size;
wbuf->used = 0;
spin_unlock(&wbuf->lock);
- wbuf->dtype = dtype;
return 0;
}
@@ -409,11 +604,12 @@ int ubifs_bg_wbufs_sync(struct ubifs_info *c)
{
int err, i;
+ ubifs_assert(!c->ro_media && !c->ro_mount);
if (!c->need_wbuf_sync)
return 0;
c->need_wbuf_sync = 0;
- if (c->ro_media) {
+ if (c->ro_error) {
err = -EROFS;
goto out_timers;
}
@@ -468,8 +664,9 @@ out_timers:
*
* This function writes data to flash via write-buffer @wbuf. This means that
* the last piece of the node won't reach the flash media immediately if it
- * does not take whole minimal I/O unit. Instead, the node will sit in RAM
- * until the write-buffer is synchronized (e.g., by timer).
+ * does not take whole max. write unit (@c->max_write_size). Instead, the node
+ * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or
+ * because more data are appended to the write-buffer).
*
* This function returns zero in case of success and a negative error code in
* case of failure. If the node cannot be written because there is no more
@@ -478,16 +675,23 @@ out_timers:
int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
{
struct ubifs_info *c = wbuf->c;
- int err, written, n, aligned_len = ALIGN(len, 8), offs;
+ int err, written, n, aligned_len = ALIGN(len, 8);
- dbg_io("%d bytes (%s) to wbuf at LEB %d:%d", len,
- dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->lnum,
- wbuf->offs + wbuf->used);
+ dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len,
+ dbg_ntype(((struct ubifs_ch *)buf)->node_type),
+ dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs + wbuf->used);
ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
- ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size);
+ ubifs_assert(wbuf->avail > 0 && wbuf->avail <= wbuf->size);
+ ubifs_assert(wbuf->size >= c->min_io_size);
+ ubifs_assert(wbuf->size <= c->max_write_size);
+ ubifs_assert(wbuf->size % c->min_io_size == 0);
ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+ ubifs_assert(!c->space_fixup);
+ if (c->leb_size - wbuf->offs >= c->max_write_size)
+ ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
err = -ENOSPC;
@@ -496,7 +700,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
cancel_wbuf_timer_nolock(wbuf);
- if (c->ro_media)
+ if (c->ro_error)
return -EROFS;
if (aligned_len <= wbuf->avail) {
@@ -507,17 +711,20 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
memcpy(wbuf->buf + wbuf->used, buf, len);
if (aligned_len == wbuf->avail) {
- dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum,
- wbuf->offs);
- err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
- wbuf->offs, c->min_io_size,
- wbuf->dtype);
+ dbg_io("flush jhead %s wbuf to LEB %d:%d",
+ dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
+ err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf,
+ wbuf->offs, wbuf->size);
if (err)
goto out;
spin_lock(&wbuf->lock);
- wbuf->offs += c->min_io_size;
- wbuf->avail = c->min_io_size;
+ wbuf->offs += wbuf->size;
+ if (c->leb_size - wbuf->offs >= c->max_write_size)
+ wbuf->size = c->max_write_size;
+ else
+ wbuf->size = c->leb_size - wbuf->offs;
+ wbuf->avail = wbuf->size;
wbuf->used = 0;
wbuf->next_ino = 0;
spin_unlock(&wbuf->lock);
@@ -531,38 +738,63 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
goto exit;
}
- /*
- * The node is large enough and does not fit entirely within current
- * minimal I/O unit. We have to fill and flush write-buffer and switch
- * to the next min. I/O unit.
- */
- dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, wbuf->offs);
- memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
- err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
- c->min_io_size, wbuf->dtype);
- if (err)
- goto out;
+ written = 0;
- offs = wbuf->offs + c->min_io_size;
- len -= wbuf->avail;
- aligned_len -= wbuf->avail;
- written = wbuf->avail;
+ if (wbuf->used) {
+ /*
+ * The node is large enough and does not fit entirely within
+ * current available space. We have to fill and flush
+ * write-buffer and switch to the next max. write unit.
+ */
+ dbg_io("flush jhead %s wbuf to LEB %d:%d",
+ dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
+ memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
+ err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs,
+ wbuf->size);
+ if (err)
+ goto out;
+
+ wbuf->offs += wbuf->size;
+ len -= wbuf->avail;
+ aligned_len -= wbuf->avail;
+ written += wbuf->avail;
+ } else if (wbuf->offs & (c->max_write_size - 1)) {
+ /*
+ * The write-buffer offset is not aligned to
+ * @c->max_write_size and @wbuf->size is less than
+ * @c->max_write_size. Write @wbuf->size bytes to make sure the
+ * following writes are done in optimal @c->max_write_size
+ * chunks.
+ */
+ dbg_io("write %d bytes to LEB %d:%d",
+ wbuf->size, wbuf->lnum, wbuf->offs);
+ err = ubifs_leb_write(c, wbuf->lnum, buf, wbuf->offs,
+ wbuf->size);
+ if (err)
+ goto out;
+
+ wbuf->offs += wbuf->size;
+ len -= wbuf->size;
+ aligned_len -= wbuf->size;
+ written += wbuf->size;
+ }
/*
- * The remaining data may take more whole min. I/O units, so write the
- * remains multiple to min. I/O unit size directly to the flash media.
+ * The remaining data may take more whole max. write units, so write the
+ * remains multiple to max. write unit size directly to the flash media.
* We align node length to 8-byte boundary because we anyway flash wbuf
* if the remaining space is less than 8 bytes.
*/
- n = aligned_len >> c->min_io_shift;
+ n = aligned_len >> c->max_write_shift;
if (n) {
- n <<= c->min_io_shift;
- dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs);
- err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n,
- wbuf->dtype);
+ n <<= c->max_write_shift;
+ dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum,
+ wbuf->offs);
+ err = ubifs_leb_write(c, wbuf->lnum, buf + written,
+ wbuf->offs, n);
if (err)
goto out;
- offs += n;
+ wbuf->offs += n;
aligned_len -= n;
len -= n;
written += n;
@@ -572,14 +804,17 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
if (aligned_len)
/*
* And now we have what's left and what does not take whole
- * min. I/O unit, so write it to the write-buffer and we are
+ * max. write unit, so write it to the write-buffer and we are
* done.
*/
memcpy(wbuf->buf, buf + written, len);
- wbuf->offs = offs;
+ if (c->leb_size - wbuf->offs >= c->max_write_size)
+ wbuf->size = c->max_write_size;
+ else
+ wbuf->size = c->leb_size - wbuf->offs;
+ wbuf->avail = wbuf->size - aligned_len;
wbuf->used = aligned_len;
- wbuf->avail = c->min_io_size - aligned_len;
wbuf->next_ino = 0;
spin_unlock(&wbuf->lock);
@@ -600,9 +835,9 @@ exit:
out:
ubifs_err("cannot write %d bytes to LEB %d:%d, error %d",
len, wbuf->lnum, wbuf->offs, err);
- dbg_dump_node(c, buf);
- dbg_dump_stack();
- dbg_dump_leb(c, wbuf->lnum);
+ ubifs_dump_node(c, buf);
+ dump_stack();
+ ubifs_dump_leb(c, wbuf->lnum);
return err;
}
@@ -613,7 +848,6 @@ out:
* @len: node length
* @lnum: logical eraseblock number
* @offs: offset within the logical eraseblock
- * @dtype: node life-time hint (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN)
*
* This function automatically fills node magic number, assigns sequence
* number, and calculates node CRC checksum. The length of the @buf buffer has
@@ -622,7 +856,7 @@ out:
* success and a negative error code in case of failure.
*/
int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
- int offs, int dtype)
+ int offs)
{
int err, buf_len = ALIGN(len, c->min_io_size);
@@ -631,18 +865,16 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
buf_len);
ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size);
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+ ubifs_assert(!c->space_fixup);
- if (c->ro_media)
+ if (c->ro_error)
return -EROFS;
ubifs_prepare_node(c, buf, len, 1);
- err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype);
- if (err) {
- ubifs_err("cannot write %d bytes to LEB %d:%d, error %d",
- buf_len, lnum, offs, err);
- dbg_dump_node(c, buf);
- dbg_dump_stack();
- }
+ err = ubifs_leb_write(c, lnum, buf, offs, buf_len);
+ if (err)
+ ubifs_dump_node(c, buf);
return err;
}
@@ -669,7 +901,8 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
int err, rlen, overlap;
struct ubifs_ch *ch = buf;
- dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
+ dbg_io("LEB %d:%d, %s, length %d, jhead %s", lnum, offs,
+ dbg_ntype(type), len, dbg_jhead(wbuf->jhead));
ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
ubifs_assert(!(offs & 7) && offs < c->leb_size);
ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
@@ -693,13 +926,9 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
if (rlen > 0) {
/* Read everything that goes before write-buffer */
- err = ubi_read(c->ubi, lnum, buf, offs, rlen);
- if (err && err != -EBADMSG) {
- ubifs_err("failed to read node %d from LEB %d:%d, "
- "error %d", type, lnum, offs, err);
- dbg_dump_stack();
+ err = ubifs_leb_read(c, lnum, buf, offs, rlen, 0);
+ if (err && err != -EBADMSG)
return err;
- }
}
if (type != ch->node_type) {
@@ -708,7 +937,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
goto out;
}
- err = ubifs_check_node(c, buf, lnum, offs, 0);
+ err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
if (err) {
ubifs_err("expected node type %d", type);
return err;
@@ -724,8 +953,8 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
out:
ubifs_err("bad node at LEB %d:%d", lnum, offs);
- dbg_dump_node(c, buf);
- dbg_dump_stack();
+ ubifs_dump_node(c, buf);
+ dump_stack();
return -EINVAL;
}
@@ -754,37 +983,37 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
ubifs_assert(!(offs & 7) && offs < c->leb_size);
ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
- err = ubi_read(c->ubi, lnum, buf, offs, len);
- if (err && err != -EBADMSG) {
- ubifs_err("cannot read node %d from LEB %d:%d, error %d",
- type, lnum, offs, err);
+ err = ubifs_leb_read(c, lnum, buf, offs, len, 0);
+ if (err && err != -EBADMSG)
return err;
- }
if (type != ch->node_type) {
- ubifs_err("bad node type (%d but expected %d)",
- ch->node_type, type);
+ ubifs_errc(c, "bad node type (%d but expected %d)",
+ ch->node_type, type);
goto out;
}
- err = ubifs_check_node(c, buf, lnum, offs, 0);
+ err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
if (err) {
- ubifs_err("expected node type %d", type);
+ ubifs_errc(c, "expected node type %d", type);
return err;
}
l = le32_to_cpu(ch->len);
if (l != len) {
- ubifs_err("bad node length %d, expected %d", l, len);
+ ubifs_errc(c, "bad node length %d, expected %d", l, len);
goto out;
}
return 0;
out:
- ubifs_err("bad node at LEB %d:%d", lnum, offs);
- dbg_dump_node(c, buf);
- dbg_dump_stack();
+ ubifs_errc(c, "bad node at LEB %d:%d, LEB mapping status %d", lnum,
+ offs, ubi_is_mapped(c->ubi, lnum));
+ if (!c->probing) {
+ ubifs_dump_node(c, buf);
+ dump_stack();
+ }
return -EINVAL;
}
@@ -793,18 +1022,18 @@ out:
* @c: UBIFS file-system description object
* @wbuf: write-buffer to initialize
*
- * This function initializes write buffer. Returns zero in case of success
+ * This function initializes write-buffer. Returns zero in case of success
* %-ENOMEM in case of failure.
*/
int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
{
size_t size;
- wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL);
+ wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL);
if (!wbuf->buf)
return -ENOMEM;
- size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t);
+ size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t);
wbuf->inodes = kmalloc(size, GFP_KERNEL);
if (!wbuf->inodes) {
kfree(wbuf->buf);
@@ -814,25 +1043,32 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
wbuf->used = 0;
wbuf->lnum = wbuf->offs = -1;
- wbuf->avail = c->min_io_size;
- wbuf->dtype = UBI_UNKNOWN;
+ /*
+ * If the LEB starts at the max. write size aligned address, then
+ * write-buffer size has to be set to @c->max_write_size. Otherwise,
+ * set it to something smaller so that it ends at the closest max.
+ * write size boundary.
+ */
+ size = c->max_write_size - (c->leb_start % c->max_write_size);
+ wbuf->avail = wbuf->size = size;
wbuf->sync_callback = NULL;
mutex_init(&wbuf->io_mutex);
spin_lock_init(&wbuf->lock);
-
wbuf->c = c;
- init_timer(&wbuf->timer);
- wbuf->timer.function = wbuf_timer_callback_nolock;
- wbuf->timer.data = (unsigned long)wbuf;
- wbuf->timeout = DEFAULT_WBUF_TIMEOUT;
wbuf->next_ino = 0;
+ hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ wbuf->timer.function = wbuf_timer_callback_nolock;
+ wbuf->softlimit = ktime_set(WBUF_TIMEOUT_SOFTLIMIT, 0);
+ wbuf->delta = WBUF_TIMEOUT_HARDLIMIT - WBUF_TIMEOUT_SOFTLIMIT;
+ wbuf->delta *= 1000000000ULL;
+ ubifs_assert(wbuf->delta <= ULONG_MAX);
return 0;
}
/**
* ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array.
- * @wbuf: the write-buffer whereto add
+ * @wbuf: the write-buffer where to add
* @inum: the inode number
*
* This function adds an inode number to the inode array of the write-buffer.
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 5e82cffe969..648b143606c 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -25,7 +25,6 @@
/* This file implements EXT2-compatible extended attribute ioctl() calls */
#include <linux/compat.h>
-#include <linux/smp_lock.h>
#include <linux/mount.h>
#include "ubifs.h"
@@ -148,19 +147,20 @@ out_unlock:
long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
int flags, err;
- struct inode *inode = file->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(file);
switch (cmd) {
case FS_IOC_GETFLAGS:
flags = ubifs2ioctl(ubifs_inode(inode)->flags);
+ dbg_gen("get flags: %#x, i_flags %#x", flags, inode->i_flags);
return put_user(flags, (int __user *) arg);
case FS_IOC_SETFLAGS: {
if (IS_RDONLY(inode))
return -EROFS;
- if (!is_owner_or_cap(inode))
+ if (!inode_owner_or_capable(inode))
return -EACCES;
if (get_user(flags, (int __user *) arg))
@@ -173,11 +173,12 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
* Make sure the file-system is read-write and make sure it
* will not become read-only while we are changing the flags.
*/
- err = mnt_want_write(file->f_path.mnt);
+ err = mnt_want_write_file(file);
if (err)
return err;
+ dbg_gen("set flags: %#x, i_flags %#x", flags, inode->i_flags);
err = setflags(inode, flags);
- mnt_drop_write(file->f_path.mnt);
+ mnt_drop_write_file(file);
return err;
}
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 283155abe5f..0e045e75abd 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -114,7 +114,7 @@ static inline void zero_trun_node_unused(struct ubifs_trun_node *trun)
*/
static int reserve_space(struct ubifs_info *c, int jhead, int len)
{
- int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze;
+ int err = 0, err1, retries = 0, avail, lnum, offs, squeeze;
struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
/*
@@ -122,11 +122,12 @@ static int reserve_space(struct ubifs_info *c, int jhead, int len)
* better to try to allocate space at the ends of eraseblocks. This is
* what the squeeze parameter does.
*/
+ ubifs_assert(!c->ro_media && !c->ro_mount);
squeeze = (jhead == BASEHD);
again:
mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
- if (c->ro_media) {
+ if (c->ro_error) {
err = -EROFS;
goto out_unlock;
}
@@ -139,16 +140,9 @@ again:
* Write buffer wasn't seek'ed or there is no enough space - look for an
* LEB with some empty space.
*/
- lnum = ubifs_find_free_space(c, len, &free, squeeze);
- if (lnum >= 0) {
- /* Found an LEB, add it to the journal head */
- offs = c->leb_size - free;
- err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
- if (err)
- goto out_return;
- /* A new bud was successfully allocated and added to the log */
+ lnum = ubifs_find_free_space(c, len, &offs, squeeze);
+ if (lnum >= 0)
goto out;
- }
err = lnum;
if (err != -ENOSPC)
@@ -159,7 +153,7 @@ again:
* some. But the write-buffer mutex has to be unlocked because
* GC also takes it.
*/
- dbg_jnl("no free space jhead %d, run GC", jhead);
+ dbg_jnl("no free space in jhead %s, run GC", dbg_jhead(jhead));
mutex_unlock(&wbuf->io_mutex);
lnum = ubifs_garbage_collect(c, 0);
@@ -174,7 +168,8 @@ again:
* because we dropped @wbuf->io_mutex, so try once
* again.
*/
- dbg_jnl("GC couldn't make a free LEB for jhead %d", jhead);
+ dbg_jnl("GC couldn't make a free LEB for jhead %s",
+ dbg_jhead(jhead));
if (retries++ < 2) {
dbg_jnl("retry (%d)", retries);
goto again;
@@ -185,13 +180,13 @@ again:
}
mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
- dbg_jnl("got LEB %d for jhead %d", lnum, jhead);
+ dbg_jnl("got LEB %d for jhead %s", lnum, dbg_jhead(jhead));
avail = c->leb_size - wbuf->offs - wbuf->used;
if (wbuf->lnum != -1 && avail >= len) {
/*
* Someone else has switched the journal head and we have
- * enough space now. This happens when more then one process is
+ * enough space now. This happens when more than one process is
* trying to write to the same journal head at the same time.
*/
dbg_jnl("return LEB %d back, already have LEB %d:%d",
@@ -202,13 +197,24 @@ again:
return 0;
}
- err = ubifs_add_bud_to_log(c, jhead, lnum, 0);
- if (err)
- goto out_return;
offs = 0;
out:
- err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, UBI_SHORTTERM);
+ /*
+ * Make sure we synchronize the write-buffer before we add the new bud
+ * to the log. Otherwise we may have a power cut after the log
+ * reference node for the last bud (@lnum) is written but before the
+ * write-buffer data are written to the next-to-last bud
+ * (@wbuf->lnum). And the effect would be that the recovery would see
+ * that there is corruption in the next-to-last bud.
+ */
+ err = ubifs_wbuf_sync_nolock(wbuf);
+ if (err)
+ goto out_return;
+ err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
+ if (err)
+ goto out_return;
+ err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs);
if (err)
goto out_unlock;
@@ -256,7 +262,8 @@ static int write_node(struct ubifs_info *c, int jhead, void *node, int len,
*lnum = c->jheads[jhead].wbuf.lnum;
*offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
- dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len);
+ dbg_jnl("jhead %s, LEB %d:%d, len %d",
+ dbg_jhead(jhead), *lnum, *offs, len);
ubifs_prepare_node(c, node, len, 0);
return ubifs_wbuf_write_nolock(wbuf, node, len);
@@ -286,7 +293,8 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
*lnum = c->jheads[jhead].wbuf.lnum;
*offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
- dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len);
+ dbg_jnl("jhead %s, LEB %d:%d, len %d",
+ dbg_jhead(jhead), *lnum, *offs, len);
err = ubifs_wbuf_write_nolock(wbuf, buf, len);
if (err)
@@ -377,11 +385,9 @@ out:
if (err == -ENOSPC) {
/* This are some budgeting problems, print useful information */
down_write(&c->commit_sem);
- spin_lock(&c->space_lock);
- dbg_dump_stack();
- dbg_dump_budg(c);
- spin_unlock(&c->space_lock);
- dbg_dump_lprops(c);
+ dump_stack();
+ ubifs_dump_budg(c, &c->bi);
+ ubifs_dump_lprops(c);
cmt_retries = dbg_check_lprops(c);
up_write(&c->commit_sem);
}
@@ -447,13 +453,11 @@ static int get_dent_type(int mode)
* @ino: buffer in which to pack inode node
* @inode: inode to pack
* @last: indicates the last node of the group
- * @last_reference: non-zero if this is a deletion inode
*/
static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino,
- const struct inode *inode, int last,
- int last_reference)
+ const struct inode *inode, int last)
{
- int data_len = 0;
+ int data_len = 0, last_reference = !inode->i_nlink;
struct ubifs_inode *ui = ubifs_inode(inode);
ino->ch.node_type = UBIFS_INO_NODE;
@@ -465,8 +469,8 @@ static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino,
ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
ino->mtime_sec = cpu_to_le64(inode->i_mtime.tv_sec);
ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
- ino->uid = cpu_to_le32(inode->i_uid);
- ino->gid = cpu_to_le32(inode->i_gid);
+ ino->uid = cpu_to_le32(i_uid_read(inode));
+ ino->gid = cpu_to_le32(i_gid_read(inode));
ino->mode = cpu_to_le32(inode->i_mode);
ino->flags = cpu_to_le32(ui->flags);
ino->size = cpu_to_le64(ui->ui_size);
@@ -596,9 +600,9 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
ubifs_prep_grp_node(c, dent, dlen, 0);
ino = (void *)dent + aligned_dlen;
- pack_inode(c, ino, inode, 0, last_reference);
+ pack_inode(c, ino, inode, 0);
ino = (void *)ino + aligned_ilen;
- pack_inode(c, ino, dir, 1, 0);
+ pack_inode(c, ino, dir, 1);
if (last_reference) {
err = ubifs_add_orphan(c, inode->i_ino);
@@ -606,6 +610,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
release_head(c, BASEHD);
goto out_finish;
}
+ ui->del_cmtno = c->cmt_no;
}
err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync);
@@ -664,6 +669,7 @@ out_free:
out_release:
release_head(c, BASEHD);
+ kfree(dent);
out_ro:
ubifs_ro_mode(c, err);
if (last_reference)
@@ -688,23 +694,33 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
{
struct ubifs_data_node *data;
int err, lnum, offs, compr_type, out_len;
- int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR;
+ int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1;
struct ubifs_inode *ui = ubifs_inode(inode);
- dbg_jnl("ino %lu, blk %u, len %d, key %s", key_inum(c, key),
- key_block(c, key), len, DBGKEY(key));
+ dbg_jnlk(key, "ino %lu, blk %u, len %d, key ",
+ (unsigned long)key_inum(c, key), key_block(c, key), len);
ubifs_assert(len <= UBIFS_BLOCK_SIZE);
- data = kmalloc(dlen, GFP_NOFS);
- if (!data)
- return -ENOMEM;
+ data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN);
+ if (!data) {
+ /*
+ * Fall-back to the write reserve buffer. Note, we might be
+ * currently on the memory reclaim path, when the kernel is
+ * trying to free some memory by writing out dirty pages. The
+ * write reserve buffer helps us to guarantee that we are
+ * always able to write the data.
+ */
+ allocated = 0;
+ mutex_lock(&c->write_reserve_mutex);
+ data = c->write_reserve_buf;
+ }
data->ch.node_type = UBIFS_DATA_NODE;
key_write(c, key, &data->key);
data->size = cpu_to_le32(len);
zero_data_node_unused(data);
- if (!(ui->flags && UBIFS_COMPR_FL))
+ if (!(ui->flags & UBIFS_COMPR_FL))
/* Compression is disabled for this inode */
compr_type = UBIFS_COMPR_NONE;
else
@@ -733,7 +749,10 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
goto out_ro;
finish_reservation(c);
- kfree(data);
+ if (!allocated)
+ mutex_unlock(&c->write_reserve_mutex);
+ else
+ kfree(data);
return 0;
out_release:
@@ -742,7 +761,10 @@ out_ro:
ubifs_ro_mode(c, err);
finish_reservation(c);
out_free:
- kfree(data);
+ if (!allocated)
+ mutex_unlock(&c->write_reserve_mutex);
+ else
+ kfree(data);
return err;
}
@@ -750,30 +772,25 @@ out_free:
* ubifs_jnl_write_inode - flush inode to the journal.
* @c: UBIFS file-system description object
* @inode: inode to flush
- * @deletion: inode has been deleted
*
* This function writes inode @inode to the journal. If the inode is
* synchronous, it also synchronizes the write-buffer. Returns zero in case of
* success and a negative error code in case of failure.
*/
-int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
- int deletion)
+int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
{
- int err, len, lnum, offs, sync = 0;
+ int err, lnum, offs;
struct ubifs_ino_node *ino;
struct ubifs_inode *ui = ubifs_inode(inode);
+ int sync = 0, len = UBIFS_INO_NODE_SZ, last_reference = !inode->i_nlink;
- dbg_jnl("ino %lu%s", inode->i_ino,
- deletion ? " (last reference)" : "");
- if (deletion)
- ubifs_assert(inode->i_nlink == 0);
+ dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink);
- len = UBIFS_INO_NODE_SZ;
/*
* If the inode is being deleted, do not write the attached data. No
* need to synchronize the write-buffer either.
*/
- if (!deletion) {
+ if (!last_reference) {
len += ui->data_len;
sync = IS_SYNC(inode);
}
@@ -786,7 +803,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
if (err)
goto out_free;
- pack_inode(c, ino, inode, 1, deletion);
+ pack_inode(c, ino, inode, 1);
err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync);
if (err)
goto out_release;
@@ -795,7 +812,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
inode->i_ino);
release_head(c, BASEHD);
- if (deletion) {
+ if (last_reference) {
err = ubifs_tnc_remove_ino(c, inode->i_ino);
if (err)
goto out_ro;
@@ -828,6 +845,65 @@ out_free:
}
/**
+ * ubifs_jnl_delete_inode - delete an inode.
+ * @c: UBIFS file-system description object
+ * @inode: inode to delete
+ *
+ * This function deletes inode @inode which includes removing it from orphans,
+ * deleting it from TNC and, in some cases, writing a deletion inode to the
+ * journal.
+ *
+ * When regular file inodes are unlinked or a directory inode is removed, the
+ * 'ubifs_jnl_update()' function writes a corresponding deletion inode and
+ * direntry to the media, and adds the inode to orphans. After this, when the
+ * last reference to this inode has been dropped, this function is called. In
+ * general, it has to write one more deletion inode to the media, because if
+ * a commit happened between 'ubifs_jnl_update()' and
+ * 'ubifs_jnl_delete_inode()', the deletion inode is not in the journal
+ * anymore, and in fact it might not be on the flash anymore, because it might
+ * have been garbage-collected already. And for optimization reasons UBIFS does
+ * not read the orphan area if it has been unmounted cleanly, so it would have
+ * no indication in the journal that there is a deleted inode which has to be
+ * removed from TNC.
+ *
+ * However, if there was no commit between 'ubifs_jnl_update()' and
+ * 'ubifs_jnl_delete_inode()', then there is no need to write the deletion
+ * inode to the media for the second time. And this is quite a typical case.
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode)
+{
+ int err;
+ struct ubifs_inode *ui = ubifs_inode(inode);
+
+ ubifs_assert(inode->i_nlink == 0);
+
+ if (ui->del_cmtno != c->cmt_no)
+ /* A commit happened for sure */
+ return ubifs_jnl_write_inode(c, inode);
+
+ down_read(&c->commit_sem);
+ /*
+ * Check commit number again, because the first test has been done
+ * without @c->commit_sem, so a commit might have happened.
+ */
+ if (ui->del_cmtno != c->cmt_no) {
+ up_read(&c->commit_sem);
+ return ubifs_jnl_write_inode(c, inode);
+ }
+
+ err = ubifs_tnc_remove_ino(c, inode->i_ino);
+ if (err)
+ ubifs_ro_mode(c, err);
+ else
+ ubifs_delete_orphan(c, inode->i_ino);
+ up_read(&c->commit_sem);
+ return err;
+}
+
+/**
* ubifs_jnl_rename - rename a directory entry.
* @c: UBIFS file-system description object
* @old_dir: parent inode of directory entry to rename
@@ -857,10 +933,8 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
int move = (old_dir != new_dir);
struct ubifs_inode *uninitialized_var(new_ui);
- dbg_jnl("dent '%.*s' in dir ino %lu to dent '%.*s' in dir ino %lu",
- old_dentry->d_name.len, old_dentry->d_name.name,
- old_dir->i_ino, new_dentry->d_name.len,
- new_dentry->d_name.name, new_dir->i_ino);
+ dbg_jnl("dent '%pd' in dir ino %lu to dent '%pd' in dir ino %lu",
+ old_dentry, old_dir->i_ino, new_dentry, new_dir->i_ino);
ubifs_assert(ubifs_inode(old_dir)->data_len == 0);
ubifs_assert(ubifs_inode(new_dir)->data_len == 0);
ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex));
@@ -917,16 +991,16 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
p = (void *)dent2 + aligned_dlen2;
if (new_inode) {
- pack_inode(c, p, new_inode, 0, last_reference);
+ pack_inode(c, p, new_inode, 0);
p += ALIGN(ilen, 8);
}
if (!move)
- pack_inode(c, p, old_dir, 1, 0);
+ pack_inode(c, p, old_dir, 1);
else {
- pack_inode(c, p, old_dir, 0, 0);
+ pack_inode(c, p, old_dir, 0);
p += ALIGN(plen, 8);
- pack_inode(c, p, new_dir, 1, 0);
+ pack_inode(c, p, new_dir, 1);
}
if (last_reference) {
@@ -935,6 +1009,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
release_head(c, BASEHD);
goto out_finish;
}
+ new_ui->del_cmtno = c->cmt_no;
}
err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync);
@@ -1074,7 +1149,8 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
ino_t inum = inode->i_ino;
unsigned int blk;
- dbg_jnl("ino %lu, size %lld -> %lld", inum, old_size, new_size);
+ dbg_jnl("ino %lu, size %lld -> %lld",
+ (unsigned long)inum, old_size, new_size);
ubifs_assert(!ui->data_len);
ubifs_assert(S_ISREG(inode->i_mode));
ubifs_assert(mutex_is_locked(&ui->ui_mutex));
@@ -1098,7 +1174,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
dn = (void *)trun + UBIFS_TRUN_NODE_SZ;
blk = new_size >> UBIFS_BLOCK_SHIFT;
data_key_init(c, &key, inum, blk);
- dbg_jnl("last block key %s", DBGKEY(&key));
+ dbg_jnlk(&key, "last block key ");
err = ubifs_tnc_lookup(c, &key, dn);
if (err == -ENOENT)
dlen = 0; /* Not found (so it is a hole) */
@@ -1131,7 +1207,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
if (err)
goto out_free;
- pack_inode(c, ino, inode, 0, 0);
+ pack_inode(c, ino, inode, 0);
ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1);
if (dlen)
ubifs_prep_grp_node(c, dn, dlen, 1);
@@ -1164,7 +1240,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
data_key_init(c, &key, inum, blk);
bit = old_size & (UBIFS_BLOCK_SIZE - 1);
- blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0: 1);
+ blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0 : 1);
data_key_init(c, &to_key, inum, blk);
err = ubifs_tnc_remove_range(c, &key, &to_key);
@@ -1189,7 +1265,6 @@ out_free:
return err;
}
-#ifdef CONFIG_UBIFS_FS_XATTR
/**
* ubifs_jnl_delete_xattr - delete an extended attribute.
@@ -1251,9 +1326,9 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
ubifs_prep_grp_node(c, xent, xlen, 0);
ino = (void *)xent + aligned_xlen;
- pack_inode(c, ino, inode, 0, 1);
+ pack_inode(c, ino, inode, 0);
ino = (void *)ino + UBIFS_INO_NODE_SZ;
- pack_inode(c, ino, host, 1, 0);
+ pack_inode(c, ino, host, 1);
err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync);
if (!sync && !err)
@@ -1310,7 +1385,7 @@ out_ro:
* @host: host inode
*
* This function writes the updated version of an extended attribute inode and
- * the host inode tho the journal (to the base head). The host inode is written
+ * the host inode to the journal (to the base head). The host inode is written
* after the extended attribute inode in order to guarantee that the extended
* attribute will be flushed when the inode is synchronized by 'fsync()' and
* consequently, the write-buffer is synchronized. This function returns zero
@@ -1320,7 +1395,7 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
const struct inode *host)
{
int err, len1, len2, aligned_len, aligned_len1, lnum, offs;
- struct ubifs_inode *host_ui = ubifs_inode(inode);
+ struct ubifs_inode *host_ui = ubifs_inode(host);
struct ubifs_ino_node *ino;
union ubifs_key key;
int sync = IS_DIRSYNC(host);
@@ -1344,8 +1419,8 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
if (err)
goto out_free;
- pack_inode(c, ino, host, 0, 0);
- pack_inode(c, (void *)ino + aligned_len1, inode, 1, 0);
+ pack_inode(c, ino, host, 0);
+ pack_inode(c, (void *)ino + aligned_len1, inode, 1);
err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0);
if (!sync && !err) {
@@ -1384,4 +1459,3 @@ out_free:
return err;
}
-#endif /* CONFIG_UBIFS_FS_XATTR */
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index 8f747600754..92a8491a8f8 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -38,6 +38,22 @@
#define __UBIFS_KEY_H__
/**
+ * key_mask_hash - mask a valid hash value.
+ * @val: value to be masked
+ *
+ * We use hash values as offset in directories, so values %0 and %1 are
+ * reserved for "." and "..". %2 is reserved for "end of readdir" marker. This
+ * function makes sure the reserved values are not used.
+ */
+static inline uint32_t key_mask_hash(uint32_t hash)
+{
+ hash &= UBIFS_S_KEY_HASH_MASK;
+ if (unlikely(hash <= 2))
+ hash += 3;
+ return hash;
+}
+
+/**
* key_r5_hash - R5 hash function (borrowed from reiserfs).
* @s: direntry name
* @len: name length
@@ -54,16 +70,7 @@ static inline uint32_t key_r5_hash(const char *s, int len)
str++;
}
- a &= UBIFS_S_KEY_HASH_MASK;
-
- /*
- * We use hash values as offset in directories, so values %0 and %1 are
- * reserved for "." and "..". %2 is reserved for "end of readdir"
- * marker.
- */
- if (unlikely(a >= 0 && a <= 2))
- a += 3;
- return a;
+ return key_mask_hash(a);
}
/**
@@ -77,10 +84,7 @@ static inline uint32_t key_test_hash(const char *str, int len)
len = min_t(uint32_t, len, 4);
memcpy(&a, str, len);
- a &= UBIFS_S_KEY_HASH_MASK;
- if (unlikely(a >= 0 && a <= 2))
- a += 3;
- return a;
+ return key_mask_hash(a);
}
/**
@@ -225,23 +229,6 @@ static inline void xent_key_init(const struct ubifs_info *c,
}
/**
- * xent_key_init_hash - initialize extended attribute entry key without
- * re-calculating hash function.
- * @c: UBIFS file-system description object
- * @key: key to initialize
- * @inum: host inode number
- * @hash: extended attribute entry name hash
- */
-static inline void xent_key_init_hash(const struct ubifs_info *c,
- union ubifs_key *key, ino_t inum,
- uint32_t hash)
-{
- ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
- key->u32[0] = inum;
- key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS);
-}
-
-/**
* xent_key_init_flash - initialize on-flash extended attribute entry key.
* @c: UBIFS file-system description object
* @k: key to initialize
@@ -291,22 +278,15 @@ static inline void data_key_init(const struct ubifs_info *c,
}
/**
- * data_key_init_flash - initialize on-flash data key.
+ * highest_data_key - get the highest possible data key for an inode.
* @c: UBIFS file-system description object
- * @k: key to initialize
+ * @key: key to initialize
* @inum: inode number
- * @block: block number
*/
-static inline void data_key_init_flash(const struct ubifs_info *c, void *k,
- ino_t inum, unsigned int block)
+static inline void highest_data_key(const struct ubifs_info *c,
+ union ubifs_key *key, ino_t inum)
{
- union ubifs_key *key = k;
-
- ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK));
- key->j32[0] = cpu_to_le32(inum);
- key->j32[1] = cpu_to_le32(block |
- (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS));
- memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
+ data_key_init(c, key, inum, UBIFS_S_KEY_BLOCK_MASK);
}
/**
@@ -326,6 +306,20 @@ static inline void trun_key_init(const struct ubifs_info *c,
}
/**
+ * invalid_key_init - initialize invalid node key.
+ * @c: UBIFS file-system description object
+ * @key: key to initialize
+ *
+ * This is a helper function which marks a @key object as invalid.
+ */
+static inline void invalid_key_init(const struct ubifs_info *c,
+ union ubifs_key *key)
+{
+ key->u32[0] = 0xDEADBEAF;
+ key->u32[1] = UBIFS_INVALID_KEY;
+}
+
+/**
* key_type - get key type.
* @c: UBIFS file-system description object
* @key: key to get type of
@@ -345,7 +339,7 @@ static inline int key_type_flash(const struct ubifs_info *c, const void *k)
{
const union ubifs_key *key = k;
- return le32_to_cpu(key->u32[1]) >> UBIFS_S_KEY_BLOCK_BITS;
+ return le32_to_cpu(key->j32[1]) >> UBIFS_S_KEY_BLOCK_BITS;
}
/**
@@ -377,8 +371,8 @@ static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k)
* @c: UBIFS file-system description object
* @key: the key to get hash from
*/
-static inline int key_hash(const struct ubifs_info *c,
- const union ubifs_key *key)
+static inline uint32_t key_hash(const struct ubifs_info *c,
+ const union ubifs_key *key)
{
return key->u32[1] & UBIFS_S_KEY_HASH_MASK;
}
@@ -388,7 +382,7 @@ static inline int key_hash(const struct ubifs_info *c,
* @c: UBIFS file-system description object
* @k: the key to get hash from
*/
-static inline int key_hash_flash(const struct ubifs_info *c, const void *k)
+static inline uint32_t key_hash_flash(const struct ubifs_info *c, const void *k)
{
const union ubifs_key *key = k;
@@ -416,7 +410,7 @@ static inline unsigned int key_block_flash(const struct ubifs_info *c,
{
const union ubifs_key *key = k;
- return le32_to_cpu(key->u32[1]) & UBIFS_S_KEY_BLOCK_MASK;
+ return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_BLOCK_MASK;
}
/**
@@ -484,7 +478,7 @@ static inline void key_copy(const struct ubifs_info *c,
* @key2: the second key to compare
*
* This function compares 2 keys and returns %-1 if @key1 is less than
- * @key2, 0 if the keys are equivalent and %1 if @key1 is greater than @key2.
+ * @key2, %0 if the keys are equivalent and %1 if @key1 is greater than @key2.
*/
static inline int keys_cmp(const struct ubifs_info *c,
const union ubifs_key *key1,
@@ -503,6 +497,26 @@ static inline int keys_cmp(const struct ubifs_info *c,
}
/**
+ * keys_eq - determine if keys are equivalent.
+ * @c: UBIFS file-system description object
+ * @key1: the first key to compare
+ * @key2: the second key to compare
+ *
+ * This function compares 2 keys and returns %1 if @key1 is equal to @key2 and
+ * %0 if not.
+ */
+static inline int keys_eq(const struct ubifs_info *c,
+ const union ubifs_key *key1,
+ const union ubifs_key *key2)
+{
+ if (key1->u32[0] != key2->u32[0])
+ return 0;
+ if (key1->u32[1] != key2->u32[1])
+ return 0;
+ return 1;
+}
+
+/**
* is_hash_key - is a key vulnerable to hash collisions.
* @c: UBIFS file-system description object
* @key: key
@@ -530,4 +544,5 @@ static inline unsigned long long key_max_inode_size(const struct ubifs_info *c)
return 0;
}
}
+
#endif /* !__UBIFS_KEY_H__ */
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 36857b9ed59..a902c5919e4 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -29,11 +29,7 @@
#include "ubifs.h"
-#ifdef CONFIG_UBIFS_FS_DEBUG
static int dbg_check_bud_bytes(struct ubifs_info *c);
-#else
-#define dbg_check_bud_bytes(c) 0
-#endif
/**
* ubifs_search_bud - search bud LEB.
@@ -100,20 +96,6 @@ struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum)
}
/**
- * next_log_lnum - switch to the next log LEB.
- * @c: UBIFS file-system description object
- * @lnum: current log LEB
- */
-static inline int next_log_lnum(const struct ubifs_info *c, int lnum)
-{
- lnum += 1;
- if (lnum > c->log_last)
- lnum = UBIFS_LOG_LNUM;
-
- return lnum;
-}
-
-/**
* empty_log_bytes - calculate amount of empty space in the log.
* @c: UBIFS file-system description object
*/
@@ -159,7 +141,7 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
jhead = &c->jheads[bud->jhead];
list_add_tail(&bud->list, &jhead->buds_list);
} else
- ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY));
+ ubifs_assert(c->replaying && c->ro_mount);
/*
* Note, although this is a new bud, we anyway account this space now,
@@ -169,28 +151,8 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
*/
c->bud_bytes += c->leb_size - bud->start;
- dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum,
- bud->start, bud->jhead, c->bud_bytes);
- spin_unlock(&c->buds_lock);
-}
-
-/**
- * ubifs_create_buds_lists - create journal head buds lists for remount rw.
- * @c: UBIFS file-system description object
- */
-void ubifs_create_buds_lists(struct ubifs_info *c)
-{
- struct rb_node *p;
-
- spin_lock(&c->buds_lock);
- p = rb_first(&c->buds);
- while (p) {
- struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb);
- struct ubifs_jhead *jhead = &c->jheads[bud->jhead];
-
- list_add_tail(&bud->list, &jhead->buds_list);
- p = rb_next(p);
- }
+ dbg_log("LEB %d:%d, jhead %s, bud_bytes %lld", bud->lnum,
+ bud->start, dbg_jhead(bud->jhead), c->bud_bytes);
spin_unlock(&c->buds_lock);
}
@@ -223,8 +185,8 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
}
mutex_lock(&c->log_mutex);
-
- if (c->ro_media) {
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+ if (c->ro_error) {
err = -EROFS;
goto out_unlock;
}
@@ -239,7 +201,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
}
/*
- * Make sure the the amount of space in buds will not exceed
+ * Make sure the amount of space in buds will not exceed the
* 'c->max_bud_bytes' limit, because we want to guarantee mount time
* limits.
*
@@ -277,7 +239,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
ref->jhead = cpu_to_le32(jhead);
if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
- c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
+ c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
c->lhead_offs = 0;
}
@@ -296,7 +258,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
* an unclean reboot, because the target LEB might have been
* unmapped, but not yet physically erased.
*/
- err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM);
+ err = ubifs_leb_map(c, bud->lnum);
if (err)
goto out_unlock;
}
@@ -304,7 +266,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
dbg_log("write ref LEB %d:%d",
c->lhead_lnum, c->lhead_offs);
err = ubifs_write_node(c, ref, UBIFS_REF_NODE_SZ, c->lhead_lnum,
- c->lhead_offs, UBI_SHORTTERM);
+ c->lhead_offs);
if (err)
goto out_unlock;
@@ -353,19 +315,16 @@ static void remove_buds(struct ubifs_info *c)
* heads (non-closed buds).
*/
c->cmt_bud_bytes += wbuf->offs - bud->start;
- dbg_log("preserve %d:%d, jhead %d, bud bytes %d, "
- "cmt_bud_bytes %lld", bud->lnum, bud->start,
- bud->jhead, wbuf->offs - bud->start,
- c->cmt_bud_bytes);
+ dbg_log("preserve %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld",
+ bud->lnum, bud->start, dbg_jhead(bud->jhead),
+ wbuf->offs - bud->start, c->cmt_bud_bytes);
bud->start = wbuf->offs;
} else {
c->cmt_bud_bytes += c->leb_size - bud->start;
- dbg_log("remove %d:%d, jhead %d, bud bytes %d, "
- "cmt_bud_bytes %lld", bud->lnum, bud->start,
- bud->jhead, c->leb_size - bud->start,
- c->cmt_bud_bytes);
+ dbg_log("remove %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld",
+ bud->lnum, bud->start, dbg_jhead(bud->jhead),
+ c->leb_size - bud->start, c->cmt_bud_bytes);
rb_erase(p1, &c->buds);
- list_del(&bud->list);
/*
* If the commit does not finish, the recovery will need
* to replay the journal, in which case the old buds
@@ -373,7 +332,7 @@ static void remove_buds(struct ubifs_info *c)
* commit i.e. do not allow them to be garbage
* collected.
*/
- list_add(&bud->list, &c->old_buds);
+ list_move(&bud->list, &c->old_buds);
}
}
spin_unlock(&c->buds_lock);
@@ -410,7 +369,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
return -ENOMEM;
cs->ch.node_type = UBIFS_CS_NODE;
- cs->cmt_no = cpu_to_le64(c->cmt_no + 1);
+ cs->cmt_no = cpu_to_le64(c->cmt_no);
ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0);
/*
@@ -428,7 +387,8 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
if (lnum == -1 || offs == c->leb_size)
continue;
- dbg_log("add ref to LEB %d:%d for jhead %d", lnum, offs, i);
+ dbg_log("add ref to LEB %d:%d for jhead %s",
+ lnum, offs, dbg_jhead(i));
ref = buf + len;
ref->ch.node_type = UBIFS_REF_NODE;
ref->lnum = cpu_to_le32(lnum);
@@ -443,7 +403,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
/* Switch to the next log LEB */
if (c->lhead_offs) {
- c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
+ c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
c->lhead_offs = 0;
}
@@ -456,7 +416,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
len = ALIGN(len, c->min_io_size);
dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len);
- err = ubifs_leb_write(c, c->lhead_lnum, cs, 0, len, UBI_SHORTTERM);
+ err = ubifs_leb_write(c, c->lhead_lnum, cs, 0, len);
if (err)
goto out;
@@ -464,7 +424,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
c->lhead_offs += len;
if (c->lhead_offs == c->leb_size) {
- c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
+ c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
c->lhead_offs = 0;
}
@@ -551,7 +511,7 @@ int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum)
}
mutex_lock(&c->log_mutex);
for (lnum = old_ltail_lnum; lnum != c->ltail_lnum;
- lnum = next_log_lnum(c, lnum)) {
+ lnum = ubifs_next_log_lnum(c, lnum)) {
dbg_log("unmap log LEB %d", lnum);
err = ubifs_leb_unmap(c, lnum);
if (err)
@@ -614,27 +574,10 @@ static int done_already(struct rb_root *done_tree, int lnum)
*/
static void destroy_done_tree(struct rb_root *done_tree)
{
- struct rb_node *this = done_tree->rb_node;
- struct done_ref *dr;
+ struct done_ref *dr, *n;
- while (this) {
- if (this->rb_left) {
- this = this->rb_left;
- continue;
- } else if (this->rb_right) {
- this = this->rb_right;
- continue;
- }
- dr = rb_entry(this, struct done_ref, rb);
- this = rb_parent(this);
- if (this) {
- if (this->rb_left == &dr->rb)
- this->rb_left = NULL;
- else
- this->rb_right = NULL;
- }
+ rbtree_postorder_for_each_entry_safe(dr, n, done_tree, rb)
kfree(dr);
- }
}
/**
@@ -657,10 +600,10 @@ static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs,
int sz = ALIGN(*offs, c->min_io_size), err;
ubifs_pad(c, buf + *offs, sz - *offs);
- err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM);
+ err = ubifs_leb_change(c, *lnum, buf, sz);
if (err)
return err;
- *lnum = next_log_lnum(c, *lnum);
+ *lnum = ubifs_next_log_lnum(c, *lnum);
*offs = 0;
}
memcpy(buf + *offs, node, len);
@@ -694,7 +637,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
lnum = c->ltail_lnum;
write_lnum = lnum;
while (1) {
- sleb = ubifs_scan(c, lnum, 0, c->sbuf);
+ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0);
if (IS_ERR(sleb)) {
err = PTR_ERR(sleb);
goto out_free;
@@ -730,13 +673,13 @@ int ubifs_consolidate_log(struct ubifs_info *c)
ubifs_scan_destroy(sleb);
if (lnum == c->lhead_lnum)
break;
- lnum = next_log_lnum(c, lnum);
+ lnum = ubifs_next_log_lnum(c, lnum);
}
if (offs) {
int sz = ALIGN(offs, c->min_io_size);
ubifs_pad(c, buf + offs, sz - offs);
- err = ubifs_leb_change(c, write_lnum, buf, sz, UBI_SHORTTERM);
+ err = ubifs_leb_change(c, write_lnum, buf, sz);
if (err)
goto out_free;
offs = ALIGN(offs, c->min_io_size);
@@ -750,7 +693,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
/* Unmap remaining LEBs */
lnum = write_lnum;
do {
- lnum = next_log_lnum(c, lnum);
+ lnum = ubifs_next_log_lnum(c, lnum);
err = ubifs_leb_unmap(c, lnum);
if (err)
return err;
@@ -768,8 +711,6 @@ out_free:
return err;
}
-#ifdef CONFIG_UBIFS_FS_DEBUG
-
/**
* dbg_check_bud_bytes - make sure bud bytes calculation are all right.
* @c: UBIFS file-system description object
@@ -784,7 +725,7 @@ static int dbg_check_bud_bytes(struct ubifs_info *c)
struct ubifs_bud *bud;
long long bud_bytes = 0;
- if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+ if (!dbg_is_chk_gen(c))
return 0;
spin_lock(&c->buds_lock);
@@ -801,5 +742,3 @@ static int dbg_check_bud_bytes(struct ubifs_info *c)
return err;
}
-
-#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 2ba93da71b6..46190a7c42a 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -125,6 +125,7 @@ static void adjust_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap,
}
}
}
+
/* Not greater than parent, so compare to children */
while (1) {
/* Compare to left child */
@@ -280,7 +281,7 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
case LPROPS_FREE:
if (add_to_lpt_heap(c, lprops, cat))
break;
- /* No more room on heap so make it uncategorized */
+ /* No more room on heap so make it un-categorized */
cat = LPROPS_UNCAT;
/* Fall through */
case LPROPS_UNCAT:
@@ -299,8 +300,11 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
default:
ubifs_assert(0);
}
+
lprops->flags &= ~LPROPS_CAT_MASK;
lprops->flags |= cat;
+ c->in_a_category_cnt += 1;
+ ubifs_assert(c->in_a_category_cnt <= c->main_lebs);
}
/**
@@ -333,6 +337,9 @@ static void ubifs_remove_from_cat(struct ubifs_info *c,
default:
ubifs_assert(0);
}
+
+ c->in_a_category_cnt -= 1;
+ ubifs_assert(c->in_a_category_cnt >= 0);
}
/**
@@ -374,8 +381,8 @@ void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops,
* @lprops: LEB properties
*
* A LEB may have fallen off of the bottom of a heap, and ended up as
- * uncategorized even though it has enough space for us now. If that is the case
- * this function will put the LEB back onto a heap.
+ * un-categorized even though it has enough space for us now. If that is the
+ * case this function will put the LEB back onto a heap.
*/
void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops)
{
@@ -435,10 +442,10 @@ int ubifs_categorize_lprops(const struct ubifs_info *c,
/**
* change_category - change LEB properties category.
* @c: UBIFS file-system description object
- * @lprops: LEB properties to recategorize
+ * @lprops: LEB properties to re-categorize
*
* LEB properties are categorized to enable fast find operations. When the LEB
- * properties change they must be recategorized.
+ * properties change they must be re-categorized.
*/
static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops)
{
@@ -446,7 +453,7 @@ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops)
int new_cat = ubifs_categorize_lprops(c, lprops);
if (old_cat == new_cat) {
- struct ubifs_lpt_heap *heap = &c->lpt_heap[new_cat - 1];
+ struct ubifs_lpt_heap *heap;
/* lprops on a heap now must be moved up or down */
if (new_cat < 1 || new_cat > LPROPS_HEAP_CNT)
@@ -460,33 +467,18 @@ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops)
}
/**
- * ubifs_get_lprops - get reference to LEB properties.
- * @c: the UBIFS file-system description object
- *
- * This function locks lprops. Lprops have to be unlocked by
- * 'ubifs_release_lprops()'.
- */
-void ubifs_get_lprops(struct ubifs_info *c)
-{
- mutex_lock(&c->lp_mutex);
-}
-
-/**
- * calc_dark - calculate LEB dark space size.
+ * ubifs_calc_dark - calculate LEB dark space size.
* @c: the UBIFS file-system description object
* @spc: amount of free and dirty space in the LEB
*
- * This function calculates amount of dark space in an LEB which has @spc bytes
- * of free and dirty space. Returns the calculations result.
+ * This function calculates and returns amount of dark space in an LEB which
+ * has @spc bytes of free and dirty space.
*
- * Dark space is the space which is not always usable - it depends on which
- * nodes are written in which order. E.g., if an LEB has only 512 free bytes,
- * it is dark space, because it cannot fit a large data node. So UBIFS cannot
- * count on this LEB and treat these 512 bytes as usable because it is not true
- * if, for example, only big chunks of uncompressible data will be written to
- * the FS.
+ * UBIFS is trying to account the space which might not be usable, and this
+ * space is called "dark space". For example, if an LEB has only %512 free
+ * bytes, it is dark space, because it cannot fit a large data node.
*/
-static int calc_dark(struct ubifs_info *c, int spc)
+int ubifs_calc_dark(const struct ubifs_info *c, int spc)
{
ubifs_assert(!(spc & 7));
@@ -518,7 +510,7 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops)
pnode = (struct ubifs_pnode *)container_of(lprops - pos,
struct ubifs_pnode,
lprops[0]);
- return !test_bit(COW_ZNODE, &pnode->flags) &&
+ return !test_bit(COW_CNODE, &pnode->flags) &&
test_bit(DIRTY_CNODE, &pnode->flags);
}
@@ -529,15 +521,15 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops)
* @free: new free space amount
* @dirty: new dirty space amount
* @flags: new flags
- * @idx_gc_cnt: change to the count of idx_gc list
+ * @idx_gc_cnt: change to the count of @idx_gc list
*
- * This function changes LEB properties. This function does not change a LEB
- * property (@free, @dirty or @flag) if the value passed is %LPROPS_NC.
+ * This function changes LEB properties (@free, @dirty or @flag). However, the
+ * property which has the %LPROPS_NC value is not changed. Returns a pointer to
+ * the updated LEB properties on success and a negative error code on failure.
*
- * This function returns a pointer to the updated LEB properties on success
- * and a negative error code on failure. N.B. the LEB properties may have had to
- * be copied (due to COW) and consequently the pointer returned may not be the
- * same as the pointer passed.
+ * Note, the LEB properties may have had to be copied (due to COW) and
+ * consequently the pointer returned may not be the same as the pointer
+ * passed.
*/
const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
const struct ubifs_lprops *lp,
@@ -546,7 +538,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
{
/*
* This is the only function that is allowed to change lprops, so we
- * discard the const qualifier.
+ * discard the "const" qualifier.
*/
struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp;
@@ -576,7 +568,6 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7));
spin_lock(&c->space_lock);
-
if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size)
c->lst.taken_empty_lebs -= 1;
@@ -587,7 +578,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
if (old_spc < c->dead_wm)
c->lst.total_dead -= old_spc;
else
- c->lst.total_dark -= calc_dark(c, old_spc);
+ c->lst.total_dark -= ubifs_calc_dark(c, old_spc);
c->lst.total_used -= c->leb_size - old_spc;
}
@@ -628,7 +619,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
if (new_spc < c->dead_wm)
c->lst.total_dead += new_spc;
else
- c->lst.total_dark += calc_dark(c, new_spc);
+ c->lst.total_dark += ubifs_calc_dark(c, new_spc);
c->lst.total_used += c->leb_size - new_spc;
}
@@ -637,39 +628,20 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
c->lst.taken_empty_lebs += 1;
change_category(c, lprops);
-
c->idx_gc_cnt += idx_gc_cnt;
-
spin_unlock(&c->space_lock);
-
return lprops;
}
/**
- * ubifs_release_lprops - release lprops lock.
- * @c: the UBIFS file-system description object
- *
- * This function has to be called after each 'ubifs_get_lprops()' call to
- * unlock lprops.
- */
-void ubifs_release_lprops(struct ubifs_info *c)
-{
- ubifs_assert(mutex_is_locked(&c->lp_mutex));
- ubifs_assert(c->lst.empty_lebs >= 0 &&
- c->lst.empty_lebs <= c->main_lebs);
-
- mutex_unlock(&c->lp_mutex);
-}
-
-/**
* ubifs_get_lp_stats - get lprops statistics.
* @c: UBIFS file-system description object
* @st: return statistics
*/
-void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *st)
+void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst)
{
spin_lock(&c->space_lock);
- memcpy(st, &c->lst, sizeof(struct ubifs_lp_stats));
+ memcpy(lst, &c->lst, sizeof(struct ubifs_lp_stats));
spin_unlock(&c->space_lock);
}
@@ -709,6 +681,9 @@ int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
out:
ubifs_release_lprops(c);
+ if (err)
+ ubifs_err("cannot change properties of LEB %d, error %d",
+ lnum, err);
return err;
}
@@ -745,6 +720,9 @@ int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
out:
ubifs_release_lprops(c);
+ if (err)
+ ubifs_err("cannot update properties of LEB %d, error %d",
+ lnum, err);
return err;
}
@@ -768,6 +746,8 @@ int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp)
lpp = ubifs_lpt_lookup(c, lnum);
if (IS_ERR(lpp)) {
err = PTR_ERR(lpp);
+ ubifs_err("cannot read properties of LEB %d, error %d",
+ lnum, err);
goto out;
}
@@ -872,7 +852,9 @@ const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c)
return lprops;
}
-#ifdef CONFIG_UBIFS_FS_DEBUG
+/*
+ * Everything below is related to debugging.
+ */
/**
* dbg_check_cats - check category heaps and lists.
@@ -886,20 +868,20 @@ int dbg_check_cats(struct ubifs_info *c)
struct list_head *pos;
int i, cat;
- if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS)))
+ if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c))
return 0;
list_for_each_entry(lprops, &c->empty_list, list) {
if (lprops->free != c->leb_size) {
- ubifs_err("non-empty LEB %d on empty list "
- "(free %d dirty %d flags %d)", lprops->lnum,
- lprops->free, lprops->dirty, lprops->flags);
+ ubifs_err("non-empty LEB %d on empty list (free %d dirty %d flags %d)",
+ lprops->lnum, lprops->free, lprops->dirty,
+ lprops->flags);
return -EINVAL;
}
if (lprops->flags & LPROPS_TAKEN) {
- ubifs_err("taken LEB %d on empty list "
- "(free %d dirty %d flags %d)", lprops->lnum,
- lprops->free, lprops->dirty, lprops->flags);
+ ubifs_err("taken LEB %d on empty list (free %d dirty %d flags %d)",
+ lprops->lnum, lprops->free, lprops->dirty,
+ lprops->flags);
return -EINVAL;
}
}
@@ -907,15 +889,15 @@ int dbg_check_cats(struct ubifs_info *c)
i = 0;
list_for_each_entry(lprops, &c->freeable_list, list) {
if (lprops->free + lprops->dirty != c->leb_size) {
- ubifs_err("non-freeable LEB %d on freeable list "
- "(free %d dirty %d flags %d)", lprops->lnum,
- lprops->free, lprops->dirty, lprops->flags);
+ ubifs_err("non-freeable LEB %d on freeable list (free %d dirty %d flags %d)",
+ lprops->lnum, lprops->free, lprops->dirty,
+ lprops->flags);
return -EINVAL;
}
if (lprops->flags & LPROPS_TAKEN) {
- ubifs_err("taken LEB %d on freeable list "
- "(free %d dirty %d flags %d)", lprops->lnum,
- lprops->free, lprops->dirty, lprops->flags);
+ ubifs_err("taken LEB %d on freeable list (free %d dirty %d flags %d)",
+ lprops->lnum, lprops->free, lprops->dirty,
+ lprops->flags);
return -EINVAL;
}
i += 1;
@@ -937,21 +919,21 @@ int dbg_check_cats(struct ubifs_info *c)
list_for_each_entry(lprops, &c->frdi_idx_list, list) {
if (lprops->free + lprops->dirty != c->leb_size) {
- ubifs_err("non-freeable LEB %d on frdi_idx list "
- "(free %d dirty %d flags %d)", lprops->lnum,
- lprops->free, lprops->dirty, lprops->flags);
+ ubifs_err("non-freeable LEB %d on frdi_idx list (free %d dirty %d flags %d)",
+ lprops->lnum, lprops->free, lprops->dirty,
+ lprops->flags);
return -EINVAL;
}
if (lprops->flags & LPROPS_TAKEN) {
- ubifs_err("taken LEB %d on frdi_idx list "
- "(free %d dirty %d flags %d)", lprops->lnum,
- lprops->free, lprops->dirty, lprops->flags);
+ ubifs_err("taken LEB %d on frdi_idx list (free %d dirty %d flags %d)",
+ lprops->lnum, lprops->free, lprops->dirty,
+ lprops->flags);
return -EINVAL;
}
if (!(lprops->flags & LPROPS_INDEX)) {
- ubifs_err("non-index LEB %d on frdi_idx list "
- "(free %d dirty %d flags %d)", lprops->lnum,
- lprops->free, lprops->dirty, lprops->flags);
+ ubifs_err("non-index LEB %d on frdi_idx list (free %d dirty %d flags %d)",
+ lprops->lnum, lprops->free, lprops->dirty,
+ lprops->flags);
return -EINVAL;
}
}
@@ -984,7 +966,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
{
int i = 0, j, err = 0;
- if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS)))
+ if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c))
return;
for (i = 0; i < heap->cnt; i++) {
@@ -1006,9 +988,9 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
goto out;
}
if (lprops != lp) {
- dbg_msg("lprops %zx lp %zx lprops->lnum %d lp->lnum %d",
- (size_t)lprops, (size_t)lp, lprops->lnum,
- lp->lnum);
+ ubifs_err("lprops %zx lp %zx lprops->lnum %d lp->lnum %d",
+ (size_t)lprops, (size_t)lp, lprops->lnum,
+ lp->lnum);
err = 4;
goto out;
}
@@ -1026,28 +1008,18 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
}
out:
if (err) {
- dbg_msg("failed cat %d hpos %d err %d", cat, i, err);
- dbg_dump_stack();
- dbg_dump_heap(c, heap, cat);
+ ubifs_err("failed cat %d hpos %d err %d", cat, i, err);
+ dump_stack();
+ ubifs_dump_heap(c, heap, cat);
}
}
/**
- * struct scan_check_data - data provided to scan callback function.
- * @lst: LEB properties statistics
- * @err: error code
- */
-struct scan_check_data {
- struct ubifs_lp_stats lst;
- int err;
-};
-
-/**
* scan_check_cb - scan callback.
* @c: the UBIFS file-system description object
* @lp: LEB properties to scan
* @in_tree: whether the LEB properties are in main memory
- * @data: information passed to and from the caller of the scan
+ * @lst: lprops statistics to update
*
* This function returns a code that indicates whether the scan should continue
* (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
@@ -1056,12 +1028,12 @@ struct scan_check_data {
*/
static int scan_check_cb(struct ubifs_info *c,
const struct ubifs_lprops *lp, int in_tree,
- struct scan_check_data *data)
+ struct ubifs_lp_stats *lst)
{
struct ubifs_scan_leb *sleb;
struct ubifs_scan_node *snod;
- struct ubifs_lp_stats *lst = &data->lst;
- int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty;
+ int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret;
+ void *buf = NULL;
cat = lp->flags & LPROPS_CAT_MASK;
if (cat != LPROPS_UNCAT) {
@@ -1069,7 +1041,7 @@ static int scan_check_cb(struct ubifs_info *c,
if (cat != (lp->flags & LPROPS_CAT_MASK)) {
ubifs_err("bad LEB category %d expected %d",
(lp->flags & LPROPS_CAT_MASK), cat);
- goto out;
+ return -EINVAL;
}
}
@@ -1103,7 +1075,7 @@ static int scan_check_cb(struct ubifs_info *c,
}
if (!found) {
ubifs_err("bad LPT list (category %d)", cat);
- goto out;
+ return -EINVAL;
}
}
}
@@ -1115,36 +1087,40 @@ static int scan_check_cb(struct ubifs_info *c,
if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) ||
lp != heap->arr[lp->hpos]) {
ubifs_err("bad LPT heap (category %d)", cat);
- goto out;
+ return -EINVAL;
}
}
- sleb = ubifs_scan(c, lnum, 0, c->dbg_buf);
- if (IS_ERR(sleb)) {
- /*
- * After an unclean unmount, empty and freeable LEBs
- * may contain garbage.
- */
- if (lp->free == c->leb_size) {
- ubifs_err("scan errors were in empty LEB "
- "- continuing checking");
- lst->empty_lebs += 1;
- lst->total_free += c->leb_size;
- lst->total_dark += calc_dark(c, c->leb_size);
- return LPT_SCAN_CONTINUE;
- }
+ buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ /*
+ * After an unclean unmount, empty and freeable LEBs
+ * may contain garbage - do not scan them.
+ */
+ if (lp->free == c->leb_size) {
+ lst->empty_lebs += 1;
+ lst->total_free += c->leb_size;
+ lst->total_dark += ubifs_calc_dark(c, c->leb_size);
+ return LPT_SCAN_CONTINUE;
+ }
+ if (lp->free + lp->dirty == c->leb_size &&
+ !(lp->flags & LPROPS_INDEX)) {
+ lst->total_free += lp->free;
+ lst->total_dirty += lp->dirty;
+ lst->total_dark += ubifs_calc_dark(c, c->leb_size);
+ return LPT_SCAN_CONTINUE;
+ }
- if (lp->free + lp->dirty == c->leb_size &&
- !(lp->flags & LPROPS_INDEX)) {
- ubifs_err("scan errors were in freeable LEB "
- "- continuing checking");
- lst->total_free += lp->free;
- lst->total_dirty += lp->dirty;
- lst->total_dark += calc_dark(c, c->leb_size);
- return LPT_SCAN_CONTINUE;
+ sleb = ubifs_scan(c, lnum, 0, buf, 0);
+ if (IS_ERR(sleb)) {
+ ret = PTR_ERR(sleb);
+ if (ret == -EUCLEAN) {
+ ubifs_dump_lprops(c);
+ ubifs_dump_budg(c, &c->bi);
}
- data->err = PTR_ERR(sleb);
- return LPT_SCAN_STOP;
+ goto out;
}
is_idx = -1;
@@ -1183,8 +1159,8 @@ static int scan_check_cb(struct ubifs_info *c,
if (free > c->leb_size || free < 0 || dirty > c->leb_size ||
dirty < 0) {
- ubifs_err("bad calculated accounting for LEB %d: "
- "free %d, dirty %d", lnum, free, dirty);
+ ubifs_err("bad calculated accounting for LEB %d: free %d, dirty %d",
+ lnum, free, dirty);
goto out_destroy;
}
@@ -1230,8 +1206,7 @@ static int scan_check_cb(struct ubifs_info *c,
/* Free but not unmapped LEB, it's fine */
is_idx = 0;
else {
- ubifs_err("indexing node without indexing "
- "flag");
+ ubifs_err("indexing node without indexing flag");
goto out_print;
}
}
@@ -1258,23 +1233,23 @@ static int scan_check_cb(struct ubifs_info *c,
if (spc < c->dead_wm)
lst->total_dead += spc;
else
- lst->total_dark += calc_dark(c, spc);
+ lst->total_dark += ubifs_calc_dark(c, spc);
}
ubifs_scan_destroy(sleb);
-
+ vfree(buf);
return LPT_SCAN_CONTINUE;
out_print:
- ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, "
- "should be free %d, dirty %d",
+ ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, should be free %d, dirty %d",
lnum, lp->free, lp->dirty, lp->flags, free, dirty);
- dbg_dump_leb(c, lnum);
+ ubifs_dump_leb(c, lnum);
out_destroy:
ubifs_scan_destroy(sleb);
+ ret = -EINVAL;
out:
- data->err = -EINVAL;
- return LPT_SCAN_STOP;
+ vfree(buf);
+ return ret;
}
/**
@@ -1291,10 +1266,9 @@ out:
int dbg_check_lprops(struct ubifs_info *c)
{
int i, err;
- struct scan_check_data data;
- struct ubifs_lp_stats *lst = &data.lst;
+ struct ubifs_lp_stats lst;
- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+ if (!dbg_is_chk_lprops(c))
return 0;
/*
@@ -1307,42 +1281,34 @@ int dbg_check_lprops(struct ubifs_info *c)
return err;
}
- memset(lst, 0, sizeof(struct ubifs_lp_stats));
-
- data.err = 0;
+ memset(&lst, 0, sizeof(struct ubifs_lp_stats));
err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1,
(ubifs_lpt_scan_callback)scan_check_cb,
- &data);
+ &lst);
if (err && err != -ENOSPC)
goto out;
- if (data.err) {
- err = data.err;
- goto out;
- }
- if (lst->empty_lebs != c->lst.empty_lebs ||
- lst->idx_lebs != c->lst.idx_lebs ||
- lst->total_free != c->lst.total_free ||
- lst->total_dirty != c->lst.total_dirty ||
- lst->total_used != c->lst.total_used) {
+ if (lst.empty_lebs != c->lst.empty_lebs ||
+ lst.idx_lebs != c->lst.idx_lebs ||
+ lst.total_free != c->lst.total_free ||
+ lst.total_dirty != c->lst.total_dirty ||
+ lst.total_used != c->lst.total_used) {
ubifs_err("bad overall accounting");
- ubifs_err("calculated: empty_lebs %d, idx_lebs %d, "
- "total_free %lld, total_dirty %lld, total_used %lld",
- lst->empty_lebs, lst->idx_lebs, lst->total_free,
- lst->total_dirty, lst->total_used);
- ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, "
- "total_free %lld, total_dirty %lld, total_used %lld",
+ ubifs_err("calculated: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld",
+ lst.empty_lebs, lst.idx_lebs, lst.total_free,
+ lst.total_dirty, lst.total_used);
+ ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld",
c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free,
c->lst.total_dirty, c->lst.total_used);
err = -EINVAL;
goto out;
}
- if (lst->total_dead != c->lst.total_dead ||
- lst->total_dark != c->lst.total_dark) {
+ if (lst.total_dead != c->lst.total_dead ||
+ lst.total_dark != c->lst.total_dark) {
ubifs_err("bad dead/dark space accounting");
ubifs_err("calculated: total_dead %lld, total_dark %lld",
- lst->total_dead, lst->total_dark);
+ lst.total_dead, lst.total_dark);
ubifs_err("read from lprops: total_dead %lld, total_dark %lld",
c->lst.total_dead, c->lst.total_dark);
err = -EINVAL;
@@ -1353,5 +1319,3 @@ int dbg_check_lprops(struct ubifs_info *c)
out:
return err;
}
-
-#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 9ff2463177e..d46b19ec181 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -36,15 +36,17 @@
* can be written into a single eraseblock. In that case, garbage collection
* consists of just writing the whole table, which therefore makes all other
* eraseblocks reusable. In the case of the big model, dirty eraseblocks are
- * selected for garbage collection, which consists are marking the nodes in
+ * selected for garbage collection, which consists of marking the clean nodes in
* that LEB as dirty, and then only the dirty nodes are written out. Also, in
* the case of the big model, a table of LEB numbers is saved so that the entire
* LPT does not to be scanned looking for empty eraseblocks when UBIFS is first
* mounted.
*/
-#include <linux/crc16.h>
#include "ubifs.h"
+#include <linux/crc16.h>
+#include <linux/math64.h>
+#include <linux/slab.h>
/**
* do_calc_lpt_geom - calculate sizes for the LPT area.
@@ -109,7 +111,8 @@ static void do_calc_lpt_geom(struct ubifs_info *c)
c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz;
c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz;
c->lpt_sz += c->ltab_sz;
- c->lpt_sz += c->lsave_sz;
+ if (c->big_lpt)
+ c->lpt_sz += c->lsave_sz;
/* Add wastage */
sz = c->lpt_sz;
@@ -134,15 +137,13 @@ static void do_calc_lpt_geom(struct ubifs_info *c)
int ubifs_calc_lpt_geom(struct ubifs_info *c)
{
int lebs_needed;
- uint64_t sz;
+ long long sz;
do_calc_lpt_geom(c);
/* Verify that lpt_lebs is big enough */
sz = c->lpt_sz * 2; /* Must have at least 2 times the size */
- sz += c->leb_size - 1;
- do_div(sz, c->leb_size);
- lebs_needed = sz;
+ lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size);
if (lebs_needed > c->lpt_lebs) {
ubifs_err("too few LPT LEBs");
return -EINVAL;
@@ -155,7 +156,6 @@ int ubifs_calc_lpt_geom(struct ubifs_info *c)
}
c->check_lpt_free = c->big_lpt;
-
return 0;
}
@@ -175,7 +175,7 @@ static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs,
int *big_lpt)
{
int i, lebs_needed;
- uint64_t sz;
+ long long sz;
/* Start by assuming the minimum number of LPT LEBs */
c->lpt_lebs = UBIFS_MIN_LPT_LEBS;
@@ -202,9 +202,7 @@ static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs,
/* Now check there are enough LPT LEBs */
for (i = 0; i < 64 ; i++) {
sz = c->lpt_sz * 4; /* Allow 4 times the size */
- sz += c->leb_size - 1;
- do_div(sz, c->leb_size);
- lebs_needed = sz;
+ lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size);
if (lebs_needed > c->lpt_lebs) {
/* Not enough LPT LEBs so try again with more */
c->lpt_lebs = lebs_needed;
@@ -287,25 +285,56 @@ uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits)
const int k = 32 - nrbits;
uint8_t *p = *addr;
int b = *pos;
- uint32_t val;
+ uint32_t uninitialized_var(val);
+ const int bytes = (nrbits + b + 7) >> 3;
ubifs_assert(nrbits > 0);
ubifs_assert(nrbits <= 32);
ubifs_assert(*pos >= 0);
ubifs_assert(*pos < 8);
if (b) {
- val = p[1] | ((uint32_t)p[2] << 8) | ((uint32_t)p[3] << 16) |
- ((uint32_t)p[4] << 24);
+ switch (bytes) {
+ case 2:
+ val = p[1];
+ break;
+ case 3:
+ val = p[1] | ((uint32_t)p[2] << 8);
+ break;
+ case 4:
+ val = p[1] | ((uint32_t)p[2] << 8) |
+ ((uint32_t)p[3] << 16);
+ break;
+ case 5:
+ val = p[1] | ((uint32_t)p[2] << 8) |
+ ((uint32_t)p[3] << 16) |
+ ((uint32_t)p[4] << 24);
+ }
val <<= (8 - b);
val |= *p >> b;
nrbits += b;
- } else
- val = p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) |
- ((uint32_t)p[3] << 24);
+ } else {
+ switch (bytes) {
+ case 1:
+ val = p[0];
+ break;
+ case 2:
+ val = p[0] | ((uint32_t)p[1] << 8);
+ break;
+ case 3:
+ val = p[0] | ((uint32_t)p[1] << 8) |
+ ((uint32_t)p[2] << 16);
+ break;
+ case 4:
+ val = p[0] | ((uint32_t)p[1] << 8) |
+ ((uint32_t)p[2] << 16) |
+ ((uint32_t)p[3] << 24);
+ break;
+ }
+ }
val <<= k;
val >>= k;
b = nrbits & 7;
- p += nrbits / 8;
+ p += nrbits >> 3;
*addr = p;
*pos = b;
ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32);
@@ -526,7 +555,7 @@ static int calc_nnode_num(int row, int col)
* This function calculates and returns the nnode number based on the parent's
* nnode number and the index in parent.
*/
-static int calc_nnode_num_from_parent(struct ubifs_info *c,
+static int calc_nnode_num_from_parent(const struct ubifs_info *c,
struct ubifs_nnode *parent, int iip)
{
int num, shft;
@@ -551,7 +580,7 @@ static int calc_nnode_num_from_parent(struct ubifs_info *c,
* This function calculates and returns the pnode number based on the parent's
* nnode number and the index in parent.
*/
-static int calc_pnode_num_from_parent(struct ubifs_info *c,
+static int calc_pnode_num_from_parent(const struct ubifs_info *c,
struct ubifs_nnode *parent, int iip)
{
int i, n = c->lpt_hght - 1, pnum = parent->num, num = 0;
@@ -672,8 +701,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
alen = ALIGN(len, c->min_io_size);
set_ltab(c, lnum, c->leb_size - alen, alen - len);
memset(p, 0xff, alen - len);
- err = ubi_leb_change(c->ubi, lnum++, buf, alen,
- UBI_SHORTTERM);
+ err = ubifs_leb_change(c, lnum++, buf, alen);
if (err)
goto out;
p = buf;
@@ -703,8 +731,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
set_ltab(c, lnum, c->leb_size - alen,
alen - len);
memset(p, 0xff, alen - len);
- err = ubi_leb_change(c->ubi, lnum++, buf, alen,
- UBI_SHORTTERM);
+ err = ubifs_leb_change(c, lnum++, buf, alen);
if (err)
goto out;
p = buf;
@@ -751,8 +778,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
alen = ALIGN(len, c->min_io_size);
set_ltab(c, lnum, c->leb_size - alen, alen - len);
memset(p, 0xff, alen - len);
- err = ubi_leb_change(c->ubi, lnum++, buf, alen,
- UBI_SHORTTERM);
+ err = ubifs_leb_change(c, lnum++, buf, alen);
if (err)
goto out;
p = buf;
@@ -777,7 +803,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
alen = ALIGN(len, c->min_io_size);
set_ltab(c, lnum, c->leb_size - alen, alen - len);
memset(p, 0xff, alen - len);
- err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM);
+ err = ubifs_leb_change(c, lnum++, buf, alen);
if (err)
goto out;
p = buf;
@@ -797,7 +823,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
/* Write remaining buffer */
memset(p, 0xff, alen - len);
- err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM);
+ err = ubifs_leb_change(c, lnum, buf, alen);
if (err)
goto out;
@@ -897,7 +923,7 @@ static int check_lpt_crc(void *buf, int len)
if (crc != calc_crc) {
ubifs_err("invalid crc in LPT node: crc %hx calc %hx", crc,
calc_crc);
- dbg_dump_stack();
+ dump_stack();
return -EINVAL;
}
return 0;
@@ -920,7 +946,7 @@ static int check_lpt_type(uint8_t **addr, int *pos, int type)
if (node_type != type) {
ubifs_err("invalid type (%d) in LPT node type %d", node_type,
type);
- dbg_dump_stack();
+ dump_stack();
return -EINVAL;
}
return 0;
@@ -934,7 +960,7 @@ static int check_lpt_type(uint8_t **addr, int *pos, int type)
*
* This function returns %0 on success and a negative error code on failure.
*/
-static int unpack_pnode(struct ubifs_info *c, void *buf,
+static int unpack_pnode(const struct ubifs_info *c, void *buf,
struct ubifs_pnode *pnode)
{
uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
@@ -964,15 +990,15 @@ static int unpack_pnode(struct ubifs_info *c, void *buf,
}
/**
- * unpack_nnode - unpack a nnode.
+ * ubifs_unpack_nnode - unpack a nnode.
* @c: UBIFS file-system description object
* @buf: buffer containing packed nnode to unpack
* @nnode: nnode structure to fill
*
* This function returns %0 on success and a negative error code on failure.
*/
-static int unpack_nnode(struct ubifs_info *c, void *buf,
- struct ubifs_nnode *nnode)
+int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf,
+ struct ubifs_nnode *nnode)
{
uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
int i, pos = 0, err;
@@ -1004,7 +1030,7 @@ static int unpack_nnode(struct ubifs_info *c, void *buf,
*
* This function returns %0 on success and a negative error code on failure.
*/
-static int unpack_ltab(struct ubifs_info *c, void *buf)
+static int unpack_ltab(const struct ubifs_info *c, void *buf)
{
uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
int i, pos = 0, err;
@@ -1036,7 +1062,7 @@ static int unpack_ltab(struct ubifs_info *c, void *buf)
*
* This function returns %0 on success and a negative error code on failure.
*/
-static int unpack_lsave(struct ubifs_info *c, void *buf)
+static int unpack_lsave(const struct ubifs_info *c, void *buf)
{
uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
int i, pos = 0, err;
@@ -1064,7 +1090,7 @@ static int unpack_lsave(struct ubifs_info *c, void *buf)
*
* This function returns %0 on success and a negative error code on failure.
*/
-static int validate_nnode(struct ubifs_info *c, struct ubifs_nnode *nnode,
+static int validate_nnode(const struct ubifs_info *c, struct ubifs_nnode *nnode,
struct ubifs_nnode *parent, int iip)
{
int i, lvl, max_offs;
@@ -1108,7 +1134,7 @@ static int validate_nnode(struct ubifs_info *c, struct ubifs_nnode *nnode,
*
* This function returns %0 on success and a negative error code on failure.
*/
-static int validate_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
+static int validate_pnode(const struct ubifs_info *c, struct ubifs_pnode *pnode,
struct ubifs_nnode *parent, int iip)
{
int i;
@@ -1142,7 +1168,8 @@ static int validate_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
* This function calculates the LEB numbers for the LEB properties it contains
* based on the pnode number.
*/
-static void set_pnode_lnum(struct ubifs_info *c, struct ubifs_pnode *pnode)
+static void set_pnode_lnum(const struct ubifs_info *c,
+ struct ubifs_pnode *pnode)
{
int i, lnum;
@@ -1192,10 +1219,10 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
if (c->big_lpt)
nnode->num = calc_nnode_num_from_parent(c, parent, iip);
} else {
- err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz);
+ err = ubifs_leb_read(c, lnum, buf, offs, c->nnode_sz, 1);
if (err)
goto out;
- err = unpack_nnode(c, buf, nnode);
+ err = ubifs_unpack_nnode(c, buf, nnode);
if (err)
goto out;
}
@@ -1217,6 +1244,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
out:
ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs);
+ dump_stack();
kfree(nnode);
return err;
}
@@ -1240,10 +1268,9 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
lnum = branch->lnum;
offs = branch->offs;
pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS);
- if (!pnode) {
- err = -ENOMEM;
- goto out;
- }
+ if (!pnode)
+ return -ENOMEM;
+
if (lnum == 0) {
/*
* This pnode was not written which just means that the LEB
@@ -1261,7 +1288,7 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
lprops->flags = ubifs_categorize_lprops(c, lprops);
}
} else {
- err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz);
+ err = ubifs_leb_read(c, lnum, buf, offs, c->pnode_sz, 1);
if (err)
goto out;
err = unpack_pnode(c, buf, pnode);
@@ -1282,8 +1309,9 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
out:
ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs);
- dbg_dump_pnode(c, pnode, parent, iip);
- dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip));
+ ubifs_dump_pnode(c, pnode, parent, iip);
+ dump_stack();
+ ubifs_err("calc num: %d", calc_pnode_num_from_parent(c, parent, iip));
kfree(pnode);
return err;
}
@@ -1302,7 +1330,7 @@ static int read_ltab(struct ubifs_info *c)
buf = vmalloc(c->ltab_sz);
if (!buf)
return -ENOMEM;
- err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz);
+ err = ubifs_leb_read(c, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz, 1);
if (err)
goto out;
err = unpack_ltab(c, buf);
@@ -1325,7 +1353,8 @@ static int read_lsave(struct ubifs_info *c)
buf = vmalloc(c->lsave_sz);
if (!buf)
return -ENOMEM;
- err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz);
+ err = ubifs_leb_read(c, c->lsave_lnum, buf, c->lsave_offs,
+ c->lsave_sz, 1);
if (err)
goto out;
err = unpack_lsave(c, buf);
@@ -1333,6 +1362,7 @@ static int read_lsave(struct ubifs_info *c)
goto out;
for (i = 0; i < c->lsave_cnt; i++) {
int lnum = c->lsave[i];
+ struct ubifs_lprops *lprops;
/*
* Due to automatic resizing, the values in the lsave table
@@ -1340,7 +1370,11 @@ static int read_lsave(struct ubifs_info *c)
*/
if (lnum >= c->leb_cnt)
continue;
- ubifs_lpt_lookup(c, lnum);
+ lprops = ubifs_lpt_lookup(c, lnum);
+ if (IS_ERR(lprops)) {
+ err = PTR_ERR(lprops);
+ goto out;
+ }
}
out:
vfree(buf);
@@ -1427,13 +1461,13 @@ struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum)
shft -= UBIFS_LPT_FANOUT_SHIFT;
nnode = ubifs_get_nnode(c, nnode, iip);
if (IS_ERR(nnode))
- return ERR_PTR(PTR_ERR(nnode));
+ return ERR_CAST(nnode);
}
iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
shft -= UBIFS_LPT_FANOUT_SHIFT;
pnode = ubifs_get_pnode(c, nnode, iip);
if (IS_ERR(pnode))
- return ERR_PTR(PTR_ERR(pnode));
+ return ERR_CAST(pnode);
iip = (i & (UBIFS_LPT_FANOUT - 1));
dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum,
pnode->lprops[iip].free, pnode->lprops[iip].dirty,
@@ -1556,7 +1590,7 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum)
nnode = c->nroot;
nnode = dirty_cow_nnode(c, nnode);
if (IS_ERR(nnode))
- return ERR_PTR(PTR_ERR(nnode));
+ return ERR_CAST(nnode);
i = lnum - c->main_first;
shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
for (h = 1; h < c->lpt_hght; h++) {
@@ -1564,19 +1598,19 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum)
shft -= UBIFS_LPT_FANOUT_SHIFT;
nnode = ubifs_get_nnode(c, nnode, iip);
if (IS_ERR(nnode))
- return ERR_PTR(PTR_ERR(nnode));
+ return ERR_CAST(nnode);
nnode = dirty_cow_nnode(c, nnode);
if (IS_ERR(nnode))
- return ERR_PTR(PTR_ERR(nnode));
+ return ERR_CAST(nnode);
}
iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
shft -= UBIFS_LPT_FANOUT_SHIFT;
pnode = ubifs_get_pnode(c, nnode, iip);
if (IS_ERR(pnode))
- return ERR_PTR(PTR_ERR(pnode));
+ return ERR_CAST(pnode);
pnode = dirty_cow_pnode(c, pnode);
if (IS_ERR(pnode))
- return ERR_PTR(PTR_ERR(pnode));
+ return ERR_CAST(pnode);
iip = (i & (UBIFS_LPT_FANOUT - 1));
dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum,
pnode->lprops[iip].free, pnode->lprops[iip].dirty,
@@ -1703,16 +1737,23 @@ int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr)
if (rd) {
err = lpt_init_rd(c);
if (err)
- return err;
+ goto out_err;
}
if (wr) {
err = lpt_init_wr(c);
if (err)
- return err;
+ goto out_err;
}
return 0;
+
+out_err:
+ if (wr)
+ ubifs_lpt_free(c, 1);
+ if (rd)
+ ubifs_lpt_free(c, 0);
+ return err;
}
/**
@@ -1780,11 +1821,11 @@ static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c,
if (c->big_lpt)
nnode->num = calc_nnode_num_from_parent(c, parent, iip);
} else {
- err = ubi_read(c->ubi, branch->lnum, buf, branch->offs,
- c->nnode_sz);
+ err = ubifs_leb_read(c, branch->lnum, buf, branch->offs,
+ c->nnode_sz, 1);
if (err)
return ERR_PTR(err);
- err = unpack_nnode(c, buf, nnode);
+ err = ubifs_unpack_nnode(c, buf, nnode);
if (err)
return ERR_PTR(err);
}
@@ -1849,8 +1890,8 @@ static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c,
ubifs_assert(branch->lnum >= c->lpt_first &&
branch->lnum <= c->lpt_last);
ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size);
- err = ubi_read(c->ubi, branch->lnum, buf, branch->offs,
- c->pnode_sz);
+ err = ubifs_leb_read(c, branch->lnum, buf, branch->offs,
+ c->pnode_sz, 1);
if (err)
return ERR_PTR(err);
err = unpack_pnode(c, buf, pnode);
@@ -1949,12 +1990,11 @@ again:
if (path[h].in_tree)
continue;
- nnode = kmalloc(sz, GFP_NOFS);
+ nnode = kmemdup(&path[h].nnode, sz, GFP_NOFS);
if (!nnode) {
err = -ENOMEM;
goto out;
}
- memcpy(nnode, &path[h].nnode, sz);
parent = nnode->parent;
parent->nbranch[nnode->iip].nnode = nnode;
path[h].ptr.nnode = nnode;
@@ -1967,12 +2007,11 @@ again:
const size_t sz = sizeof(struct ubifs_pnode);
struct ubifs_nnode *parent;
- pnode = kmalloc(sz, GFP_NOFS);
+ pnode = kmemdup(&path[h].pnode, sz, GFP_NOFS);
if (!pnode) {
err = -ENOMEM;
goto out;
}
- memcpy(pnode, &path[h].pnode, sz);
parent = pnode->parent;
parent->nbranch[pnode->iip].pnode = pnode;
path[h].ptr.pnode = pnode;
@@ -2045,8 +2084,6 @@ out:
return err;
}
-#ifdef CONFIG_UBIFS_FS_DEBUG
-
/**
* dbg_chk_pnode - check a pnode.
* @c: the UBIFS file-system description object
@@ -2061,8 +2098,8 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
int i;
if (pnode->num != col) {
- dbg_err("pnode num %d expected %d parent num %d iip %d",
- pnode->num, col, pnode->parent->num, pnode->iip);
+ ubifs_err("pnode num %d expected %d parent num %d iip %d",
+ pnode->num, col, pnode->parent->num, pnode->iip);
return -EINVAL;
}
for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
@@ -2076,14 +2113,14 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
if (lnum >= c->leb_cnt)
continue;
if (lprops->lnum != lnum) {
- dbg_err("bad LEB number %d expected %d",
- lprops->lnum, lnum);
+ ubifs_err("bad LEB number %d expected %d",
+ lprops->lnum, lnum);
return -EINVAL;
}
if (lprops->flags & LPROPS_TAKEN) {
if (cat != LPROPS_UNCAT) {
- dbg_err("LEB %d taken but not uncat %d",
- lprops->lnum, cat);
+ ubifs_err("LEB %d taken but not uncat %d",
+ lprops->lnum, cat);
return -EINVAL;
}
continue;
@@ -2095,8 +2132,8 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
case LPROPS_FRDI_IDX:
break;
default:
- dbg_err("LEB %d index but cat %d",
- lprops->lnum, cat);
+ ubifs_err("LEB %d index but cat %d",
+ lprops->lnum, cat);
return -EINVAL;
}
} else {
@@ -2108,8 +2145,8 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
case LPROPS_FREEABLE:
break;
default:
- dbg_err("LEB %d not index but cat %d",
- lprops->lnum, cat);
+ ubifs_err("LEB %d not index but cat %d",
+ lprops->lnum, cat);
return -EINVAL;
}
}
@@ -2149,24 +2186,24 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
break;
}
if (!found) {
- dbg_err("LEB %d cat %d not found in cat heap/list",
- lprops->lnum, cat);
+ ubifs_err("LEB %d cat %d not found in cat heap/list",
+ lprops->lnum, cat);
return -EINVAL;
}
switch (cat) {
case LPROPS_EMPTY:
if (lprops->free != c->leb_size) {
- dbg_err("LEB %d cat %d free %d dirty %d",
- lprops->lnum, cat, lprops->free,
- lprops->dirty);
+ ubifs_err("LEB %d cat %d free %d dirty %d",
+ lprops->lnum, cat, lprops->free,
+ lprops->dirty);
return -EINVAL;
}
case LPROPS_FREEABLE:
case LPROPS_FRDI_IDX:
if (lprops->free + lprops->dirty != c->leb_size) {
- dbg_err("LEB %d cat %d free %d dirty %d",
- lprops->lnum, cat, lprops->free,
- lprops->dirty);
+ ubifs_err("LEB %d cat %d free %d dirty %d",
+ lprops->lnum, cat, lprops->free,
+ lprops->dirty);
return -EINVAL;
}
}
@@ -2190,7 +2227,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
struct ubifs_cnode *cn;
int num, iip = 0, err;
- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+ if (!dbg_is_chk_lprops(c))
return 0;
while (cnode) {
@@ -2200,9 +2237,9 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
/* cnode is a nnode */
num = calc_nnode_num(row, col);
if (cnode->num != num) {
- dbg_err("nnode num %d expected %d "
- "parent num %d iip %d", cnode->num, num,
- (nnode ? nnode->num : 0), cnode->iip);
+ ubifs_err("nnode num %d expected %d parent num %d iip %d",
+ cnode->num, num,
+ (nnode ? nnode->num : 0), cnode->iip);
return -EINVAL;
}
nn = (struct ubifs_nnode *)cnode;
@@ -2239,5 +2276,3 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
}
return 0;
}
-
-#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 5f0b83e20af..45d4e96a6ba 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -26,8 +26,12 @@
*/
#include <linux/crc16.h>
+#include <linux/slab.h>
+#include <linux/random.h>
#include "ubifs.h"
+static int dbg_populate_lsave(struct ubifs_info *c);
+
/**
* first_dirty_cnode - find first dirty cnode.
* @c: UBIFS file-system description object
@@ -109,8 +113,8 @@ static int get_cnodes_to_commit(struct ubifs_info *c)
return 0;
cnt += 1;
while (1) {
- ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags));
- __set_bit(COW_ZNODE, &cnode->flags);
+ ubifs_assert(!test_bit(COW_CNODE, &cnode->flags));
+ __set_bit(COW_CNODE, &cnode->flags);
cnext = next_dirty_cnode(cnode);
if (!cnext) {
cnode->cnext = c->lpt_cnext;
@@ -177,8 +181,6 @@ static int alloc_lpt_leb(struct ubifs_info *c, int *lnum)
return 0;
}
}
- dbg_err("last LEB %d", *lnum);
- dump_stack();
return -ENOSPC;
}
@@ -193,6 +195,9 @@ static int layout_cnodes(struct ubifs_info *c)
int lnum, offs, len, alen, done_lsave, done_ltab, err;
struct ubifs_cnode *cnode;
+ err = dbg_chk_lpt_sz(c, 0, 0);
+ if (err)
+ return err;
cnode = c->lpt_cnext;
if (!cnode)
return 0;
@@ -206,6 +211,7 @@ static int layout_cnodes(struct ubifs_info *c)
c->lsave_lnum = lnum;
c->lsave_offs = offs;
offs += c->lsave_sz;
+ dbg_chk_lpt_sz(c, 1, c->lsave_sz);
}
if (offs + c->ltab_sz <= c->leb_size) {
@@ -213,6 +219,7 @@ static int layout_cnodes(struct ubifs_info *c)
c->ltab_lnum = lnum;
c->ltab_offs = offs;
offs += c->ltab_sz;
+ dbg_chk_lpt_sz(c, 1, c->ltab_sz);
}
do {
@@ -226,9 +233,10 @@ static int layout_cnodes(struct ubifs_info *c)
while (offs + len > c->leb_size) {
alen = ALIGN(offs, c->min_io_size);
upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
+ dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
err = alloc_lpt_leb(c, &lnum);
if (err)
- return err;
+ goto no_space;
offs = 0;
ubifs_assert(lnum >= c->lpt_first &&
lnum <= c->lpt_last);
@@ -238,6 +246,7 @@ static int layout_cnodes(struct ubifs_info *c)
c->lsave_lnum = lnum;
c->lsave_offs = offs;
offs += c->lsave_sz;
+ dbg_chk_lpt_sz(c, 1, c->lsave_sz);
continue;
}
if (!done_ltab) {
@@ -245,6 +254,7 @@ static int layout_cnodes(struct ubifs_info *c)
c->ltab_lnum = lnum;
c->ltab_offs = offs;
offs += c->ltab_sz;
+ dbg_chk_lpt_sz(c, 1, c->ltab_sz);
continue;
}
break;
@@ -257,6 +267,7 @@ static int layout_cnodes(struct ubifs_info *c)
c->lpt_offs = offs;
}
offs += len;
+ dbg_chk_lpt_sz(c, 1, len);
cnode = cnode->cnext;
} while (cnode && cnode != c->lpt_cnext);
@@ -265,9 +276,10 @@ static int layout_cnodes(struct ubifs_info *c)
if (offs + c->lsave_sz > c->leb_size) {
alen = ALIGN(offs, c->min_io_size);
upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
+ dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
err = alloc_lpt_leb(c, &lnum);
if (err)
- return err;
+ goto no_space;
offs = 0;
ubifs_assert(lnum >= c->lpt_first &&
lnum <= c->lpt_last);
@@ -276,6 +288,7 @@ static int layout_cnodes(struct ubifs_info *c)
c->lsave_lnum = lnum;
c->lsave_offs = offs;
offs += c->lsave_sz;
+ dbg_chk_lpt_sz(c, 1, c->lsave_sz);
}
/* Make sure to place LPT's own lprops table */
@@ -283,9 +296,10 @@ static int layout_cnodes(struct ubifs_info *c)
if (offs + c->ltab_sz > c->leb_size) {
alen = ALIGN(offs, c->min_io_size);
upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
+ dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
err = alloc_lpt_leb(c, &lnum);
if (err)
- return err;
+ goto no_space;
offs = 0;
ubifs_assert(lnum >= c->lpt_first &&
lnum <= c->lpt_last);
@@ -294,11 +308,24 @@ static int layout_cnodes(struct ubifs_info *c)
c->ltab_lnum = lnum;
c->ltab_offs = offs;
offs += c->ltab_sz;
+ dbg_chk_lpt_sz(c, 1, c->ltab_sz);
}
alen = ALIGN(offs, c->min_io_size);
upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
+ dbg_chk_lpt_sz(c, 4, alen - offs);
+ err = dbg_chk_lpt_sz(c, 3, alen);
+ if (err)
+ return err;
return 0;
+
+no_space:
+ ubifs_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, done_lsave %d",
+ lnum, offs, len, done_ltab, done_lsave);
+ ubifs_dump_lpt_info(c);
+ ubifs_dump_lpt_lebs(c);
+ dump_stack();
+ return err;
}
/**
@@ -333,8 +360,6 @@ static int realloc_lpt_leb(struct ubifs_info *c, int *lnum)
*lnum = i + c->lpt_first;
return 0;
}
- dbg_err("last LEB %d", *lnum);
- dump_stack();
return -ENOSPC;
}
@@ -369,12 +394,14 @@ static int write_cnodes(struct ubifs_info *c)
done_lsave = 1;
ubifs_pack_lsave(c, buf + offs, c->lsave);
offs += c->lsave_sz;
+ dbg_chk_lpt_sz(c, 1, c->lsave_sz);
}
if (offs + c->ltab_sz <= c->leb_size) {
done_ltab = 1;
ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
offs += c->ltab_sz;
+ dbg_chk_lpt_sz(c, 1, c->ltab_sz);
}
/* Loop for each cnode */
@@ -389,15 +416,15 @@ static int write_cnodes(struct ubifs_info *c)
alen = ALIGN(wlen, c->min_io_size);
memset(buf + offs, 0xff, alen - wlen);
err = ubifs_leb_write(c, lnum, buf + from, from,
- alen, UBI_SHORTTERM);
+ alen);
if (err)
return err;
}
+ dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
err = realloc_lpt_leb(c, &lnum);
if (err)
- return err;
- offs = 0;
- from = 0;
+ goto no_space;
+ offs = from = 0;
ubifs_assert(lnum >= c->lpt_first &&
lnum <= c->lpt_last);
err = ubifs_leb_unmap(c, lnum);
@@ -408,12 +435,14 @@ static int write_cnodes(struct ubifs_info *c)
done_lsave = 1;
ubifs_pack_lsave(c, buf + offs, c->lsave);
offs += c->lsave_sz;
+ dbg_chk_lpt_sz(c, 1, c->lsave_sz);
continue;
}
if (!done_ltab) {
done_ltab = 1;
ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
offs += c->ltab_sz;
+ dbg_chk_lpt_sz(c, 1, c->ltab_sz);
continue;
}
break;
@@ -431,10 +460,11 @@ static int write_cnodes(struct ubifs_info *c)
* important.
*/
clear_bit(DIRTY_CNODE, &cnode->flags);
- smp_mb__before_clear_bit();
- clear_bit(COW_ZNODE, &cnode->flags);
- smp_mb__after_clear_bit();
+ smp_mb__before_atomic();
+ clear_bit(COW_CNODE, &cnode->flags);
+ smp_mb__after_atomic();
offs += len;
+ dbg_chk_lpt_sz(c, 1, len);
cnode = cnode->cnext;
} while (cnode && cnode != c->lpt_cnext);
@@ -444,14 +474,14 @@ static int write_cnodes(struct ubifs_info *c)
wlen = offs - from;
alen = ALIGN(wlen, c->min_io_size);
memset(buf + offs, 0xff, alen - wlen);
- err = ubifs_leb_write(c, lnum, buf + from, from, alen,
- UBI_SHORTTERM);
+ err = ubifs_leb_write(c, lnum, buf + from, from, alen);
if (err)
return err;
+ dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
err = realloc_lpt_leb(c, &lnum);
if (err)
- return err;
- offs = 0;
+ goto no_space;
+ offs = from = 0;
ubifs_assert(lnum >= c->lpt_first &&
lnum <= c->lpt_last);
err = ubifs_leb_unmap(c, lnum);
@@ -461,6 +491,7 @@ static int write_cnodes(struct ubifs_info *c)
done_lsave = 1;
ubifs_pack_lsave(c, buf + offs, c->lsave);
offs += c->lsave_sz;
+ dbg_chk_lpt_sz(c, 1, c->lsave_sz);
}
/* Make sure to place LPT's own lprops table */
@@ -469,14 +500,14 @@ static int write_cnodes(struct ubifs_info *c)
wlen = offs - from;
alen = ALIGN(wlen, c->min_io_size);
memset(buf + offs, 0xff, alen - wlen);
- err = ubifs_leb_write(c, lnum, buf + from, from, alen,
- UBI_SHORTTERM);
+ err = ubifs_leb_write(c, lnum, buf + from, from, alen);
if (err)
return err;
+ dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
err = realloc_lpt_leb(c, &lnum);
if (err)
- return err;
- offs = 0;
+ goto no_space;
+ offs = from = 0;
ubifs_assert(lnum >= c->lpt_first &&
lnum <= c->lpt_last);
err = ubifs_leb_unmap(c, lnum);
@@ -486,15 +517,22 @@ static int write_cnodes(struct ubifs_info *c)
done_ltab = 1;
ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
offs += c->ltab_sz;
+ dbg_chk_lpt_sz(c, 1, c->ltab_sz);
}
/* Write remaining data in buffer */
wlen = offs - from;
alen = ALIGN(wlen, c->min_io_size);
memset(buf + offs, 0xff, alen - wlen);
- err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM);
+ err = ubifs_leb_write(c, lnum, buf + from, from, alen);
if (err)
return err;
+
+ dbg_chk_lpt_sz(c, 4, alen - wlen);
+ err = dbg_chk_lpt_sz(c, 3, ALIGN(offs, c->min_io_size));
+ if (err)
+ return err;
+
c->nhead_lnum = lnum;
c->nhead_offs = ALIGN(offs, c->min_io_size);
@@ -503,31 +541,38 @@ static int write_cnodes(struct ubifs_info *c)
dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs);
if (c->big_lpt)
dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs);
+
return 0;
+
+no_space:
+ ubifs_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab %d, done_lsave %d",
+ lnum, offs, len, done_ltab, done_lsave);
+ ubifs_dump_lpt_info(c);
+ ubifs_dump_lpt_lebs(c);
+ dump_stack();
+ return err;
}
/**
- * next_pnode - find next pnode.
+ * next_pnode_to_dirty - find next pnode to dirty.
* @c: UBIFS file-system description object
* @pnode: pnode
*
- * This function returns the next pnode or %NULL if there are no more pnodes.
+ * This function returns the next pnode to dirty or %NULL if there are no more
+ * pnodes. Note that pnodes that have never been written (lnum == 0) are
+ * skipped.
*/
-static struct ubifs_pnode *next_pnode(struct ubifs_info *c,
- struct ubifs_pnode *pnode)
+static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c,
+ struct ubifs_pnode *pnode)
{
struct ubifs_nnode *nnode;
int iip;
/* Try to go right */
nnode = pnode->parent;
- iip = pnode->iip + 1;
- if (iip < UBIFS_LPT_FANOUT) {
- /* We assume here that LEB zero is never an LPT LEB */
+ for (iip = pnode->iip + 1; iip < UBIFS_LPT_FANOUT; iip++) {
if (nnode->nbranch[iip].lnum)
return ubifs_get_pnode(c, nnode, iip);
- else
- return NULL;
}
/* Go up while can't go right */
@@ -536,8 +581,11 @@ static struct ubifs_pnode *next_pnode(struct ubifs_info *c,
nnode = nnode->parent;
if (!nnode)
return NULL;
- /* We assume here that LEB zero is never an LPT LEB */
- } while (iip >= UBIFS_LPT_FANOUT || !nnode->nbranch[iip].lnum);
+ for (; iip < UBIFS_LPT_FANOUT; iip++) {
+ if (nnode->nbranch[iip].lnum)
+ break;
+ }
+ } while (iip >= UBIFS_LPT_FANOUT);
/* Go right */
nnode = ubifs_get_nnode(c, nnode, iip);
@@ -546,12 +594,29 @@ static struct ubifs_pnode *next_pnode(struct ubifs_info *c,
/* Go down to level 1 */
while (nnode->level > 1) {
- nnode = ubifs_get_nnode(c, nnode, 0);
+ for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++) {
+ if (nnode->nbranch[iip].lnum)
+ break;
+ }
+ if (iip >= UBIFS_LPT_FANOUT) {
+ /*
+ * Should not happen, but we need to keep going
+ * if it does.
+ */
+ iip = 0;
+ }
+ nnode = ubifs_get_nnode(c, nnode, iip);
if (IS_ERR(nnode))
return (void *)nnode;
}
- return ubifs_get_pnode(c, nnode, 0);
+ for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++)
+ if (nnode->nbranch[iip].lnum)
+ break;
+ if (iip >= UBIFS_LPT_FANOUT)
+ /* Should not happen, but we need to keep going if it does */
+ iip = 0;
+ return ubifs_get_pnode(c, nnode, iip);
}
/**
@@ -580,7 +645,7 @@ static struct ubifs_pnode *pnode_lookup(struct ubifs_info *c, int i)
shft -= UBIFS_LPT_FANOUT_SHIFT;
nnode = ubifs_get_nnode(c, nnode, iip);
if (IS_ERR(nnode))
- return ERR_PTR(PTR_ERR(nnode));
+ return ERR_CAST(nnode);
}
iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
return ubifs_get_pnode(c, nnode, iip);
@@ -639,9 +704,12 @@ static int make_tree_dirty(struct ubifs_info *c)
struct ubifs_pnode *pnode;
pnode = pnode_lookup(c, 0);
+ if (IS_ERR(pnode))
+ return PTR_ERR(pnode);
+
while (pnode) {
do_make_pnode_dirty(c, pnode);
- pnode = next_pnode(c, pnode);
+ pnode = next_pnode_to_dirty(c, pnode);
if (IS_ERR(pnode))
return PTR_ERR(pnode);
}
@@ -706,7 +774,7 @@ static void lpt_tgc_start(struct ubifs_info *c)
* LPT trivial garbage collection is where a LPT LEB contains only dirty and
* free space and so may be reused as soon as the next commit is completed.
* This function is called after the commit is completed (master node has been
- * written) and unmaps LPT LEBs that were marked for trivial GC.
+ * written) and un-maps LPT LEBs that were marked for trivial GC.
*/
static int lpt_tgc_end(struct ubifs_info *c)
{
@@ -746,6 +814,10 @@ static void populate_lsave(struct ubifs_info *c)
c->lpt_drty_flgs |= LSAVE_DIRTY;
ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz);
}
+
+ if (dbg_populate_lsave(c))
+ return;
+
list_for_each_entry(lprops, &c->empty_list, list) {
c->lsave[cnt++] = lprops->lnum;
if (cnt >= c->lsave_cnt)
@@ -982,7 +1054,7 @@ static int make_node_dirty(struct ubifs_info *c, int node_type, int node_num,
* @c: UBIFS file-system description object
* @node_type: LPT node type
*/
-static int get_lpt_node_len(struct ubifs_info *c, int node_type)
+static int get_lpt_node_len(const struct ubifs_info *c, int node_type)
{
switch (node_type) {
case UBIFS_LPT_NNODE:
@@ -1003,7 +1075,7 @@ static int get_lpt_node_len(struct ubifs_info *c, int node_type)
* @buf: buffer
* @len: length of buffer
*/
-static int get_pad_len(struct ubifs_info *c, uint8_t *buf, int len)
+static int get_pad_len(const struct ubifs_info *c, uint8_t *buf, int len)
{
int offs, pad_len;
@@ -1020,7 +1092,8 @@ static int get_pad_len(struct ubifs_info *c, uint8_t *buf, int len)
* @buf: buffer
* @node_num: node number is returned here
*/
-static int get_lpt_node_type(struct ubifs_info *c, uint8_t *buf, int *node_num)
+static int get_lpt_node_type(const struct ubifs_info *c, uint8_t *buf,
+ int *node_num)
{
uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
int pos = 0, node_type;
@@ -1038,12 +1111,14 @@ static int get_lpt_node_type(struct ubifs_info *c, uint8_t *buf, int *node_num)
*
* This function returns %1 if the buffer contains a node or %0 if it does not.
*/
-static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len)
+static int is_a_node(const struct ubifs_info *c, uint8_t *buf, int len)
{
uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
int pos = 0, node_type, node_len;
uint16_t crc, calc_crc;
+ if (len < UBIFS_LPT_CRC_BYTES + (UBIFS_LPT_TYPE_BITS + 7) / 8)
+ return 0;
node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS);
if (node_type == UBIFS_LPT_NOT_A_NODE)
return 0;
@@ -1060,7 +1135,6 @@ static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len)
return 1;
}
-
/**
* lpt_gc_lnum - garbage collect a LPT LEB.
* @c: UBIFS file-system description object
@@ -1079,11 +1153,11 @@ static int lpt_gc_lnum(struct ubifs_info *c, int lnum)
void *buf = c->lpt_buf;
dbg_lp("LEB %d", lnum);
- err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
- if (err) {
- ubifs_err("cannot read LEB %d, error %d", lnum, err);
+
+ err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1);
+ if (err)
return err;
- }
+
while (1) {
if (!is_a_node(c, buf, len)) {
int pad_len;
@@ -1156,6 +1230,9 @@ int ubifs_lpt_start_commit(struct ubifs_info *c)
dbg_lp("");
mutex_lock(&c->lp_mutex);
+ err = dbg_chk_lpt_free_spc(c);
+ if (err)
+ goto out;
err = dbg_check_ltab(c);
if (err)
goto out;
@@ -1412,10 +1489,12 @@ void ubifs_lpt_free(struct ubifs_info *c, int wr_only)
kfree(c->lpt_nod_buf);
}
-#ifdef CONFIG_UBIFS_FS_DEBUG
+/*
+ * Everything below is related to debugging.
+ */
/**
- * dbg_is_all_ff - determine if a buffer contains only 0xff bytes.
+ * dbg_is_all_ff - determine if a buffer contains only 0xFF bytes.
* @buf: buffer
* @len: buffer length
*/
@@ -1440,7 +1519,7 @@ static int dbg_is_nnode_dirty(struct ubifs_info *c, int lnum, int offs)
struct ubifs_nnode *nnode;
int hght;
- /* Entire tree is in memory so first_nnode / next_nnode are ok */
+ /* Entire tree is in memory so first_nnode / next_nnode are OK */
nnode = first_nnode(c, &hght);
for (; nnode; nnode = next_nnode(c, nnode, &hght)) {
struct ubifs_nbranch *branch;
@@ -1554,53 +1633,65 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
{
int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len;
int ret;
- void *buf = c->dbg_buf;
+ void *buf, *p;
- dbg_lp("LEB %d", lnum);
- err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
- if (err) {
- dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err);
- return err;
+ if (!dbg_is_chk_lprops(c))
+ return 0;
+
+ buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
+ if (!buf) {
+ ubifs_err("cannot allocate memory for ltab checking");
+ return 0;
}
+
+ dbg_lp("LEB %d", lnum);
+
+ err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1);
+ if (err)
+ goto out;
+
while (1) {
- if (!is_a_node(c, buf, len)) {
+ if (!is_a_node(c, p, len)) {
int i, pad_len;
- pad_len = get_pad_len(c, buf, len);
+ pad_len = get_pad_len(c, p, len);
if (pad_len) {
- buf += pad_len;
+ p += pad_len;
len -= pad_len;
dirty += pad_len;
continue;
}
- if (!dbg_is_all_ff(buf, len)) {
- dbg_msg("invalid empty space in LEB %d at %d",
- lnum, c->leb_size - len);
+ if (!dbg_is_all_ff(p, len)) {
+ ubifs_err("invalid empty space in LEB %d at %d",
+ lnum, c->leb_size - len);
err = -EINVAL;
}
i = lnum - c->lpt_first;
if (len != c->ltab[i].free) {
- dbg_msg("invalid free space in LEB %d "
- "(free %d, expected %d)",
- lnum, len, c->ltab[i].free);
+ ubifs_err("invalid free space in LEB %d (free %d, expected %d)",
+ lnum, len, c->ltab[i].free);
err = -EINVAL;
}
if (dirty != c->ltab[i].dirty) {
- dbg_msg("invalid dirty space in LEB %d "
- "(dirty %d, expected %d)",
- lnum, dirty, c->ltab[i].dirty);
+ ubifs_err("invalid dirty space in LEB %d (dirty %d, expected %d)",
+ lnum, dirty, c->ltab[i].dirty);
err = -EINVAL;
}
- return err;
+ goto out;
}
- node_type = get_lpt_node_type(c, buf, &node_num);
+ node_type = get_lpt_node_type(c, p, &node_num);
node_len = get_lpt_node_len(c, node_type);
ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len);
if (ret == 1)
dirty += node_len;
- buf += node_len;
+ p += node_len;
len -= node_len;
}
+
+ err = 0;
+out:
+ vfree(buf);
+ return err;
}
/**
@@ -1613,7 +1704,7 @@ int dbg_check_ltab(struct ubifs_info *c)
{
int lnum, err, i, cnt;
- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
+ if (!dbg_is_chk_lprops(c))
return 0;
/* Bring the entire tree into memory */
@@ -1636,7 +1727,7 @@ int dbg_check_ltab(struct ubifs_info *c)
for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) {
err = dbg_check_ltab_lnum(c, lnum);
if (err) {
- dbg_err("failed at LEB %d", lnum);
+ ubifs_err("failed at LEB %d", lnum);
return err;
}
}
@@ -1645,4 +1736,299 @@ int dbg_check_ltab(struct ubifs_info *c)
return 0;
}
-#endif /* CONFIG_UBIFS_FS_DEBUG */
+/**
+ * dbg_chk_lpt_free_spc - check LPT free space is enough to write entire LPT.
+ * @c: the UBIFS file-system description object
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ */
+int dbg_chk_lpt_free_spc(struct ubifs_info *c)
+{
+ long long free = 0;
+ int i;
+
+ if (!dbg_is_chk_lprops(c))
+ return 0;
+
+ for (i = 0; i < c->lpt_lebs; i++) {
+ if (c->ltab[i].tgc || c->ltab[i].cmt)
+ continue;
+ if (i + c->lpt_first == c->nhead_lnum)
+ free += c->leb_size - c->nhead_offs;
+ else if (c->ltab[i].free == c->leb_size)
+ free += c->leb_size;
+ }
+ if (free < c->lpt_sz) {
+ ubifs_err("LPT space error: free %lld lpt_sz %lld",
+ free, c->lpt_sz);
+ ubifs_dump_lpt_info(c);
+ ubifs_dump_lpt_lebs(c);
+ dump_stack();
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
+ * dbg_chk_lpt_sz - check LPT does not write more than LPT size.
+ * @c: the UBIFS file-system description object
+ * @action: what to do
+ * @len: length written
+ *
+ * This function returns %0 on success and a negative error code on failure.
+ * The @action argument may be one of:
+ * o %0 - LPT debugging checking starts, initialize debugging variables;
+ * o %1 - wrote an LPT node, increase LPT size by @len bytes;
+ * o %2 - switched to a different LEB and wasted @len bytes;
+ * o %3 - check that we've written the right number of bytes.
+ * o %4 - wasted @len bytes;
+ */
+int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
+{
+ struct ubifs_debug_info *d = c->dbg;
+ long long chk_lpt_sz, lpt_sz;
+ int err = 0;
+
+ if (!dbg_is_chk_lprops(c))
+ return 0;
+
+ switch (action) {
+ case 0:
+ d->chk_lpt_sz = 0;
+ d->chk_lpt_sz2 = 0;
+ d->chk_lpt_lebs = 0;
+ d->chk_lpt_wastage = 0;
+ if (c->dirty_pn_cnt > c->pnode_cnt) {
+ ubifs_err("dirty pnodes %d exceed max %d",
+ c->dirty_pn_cnt, c->pnode_cnt);
+ err = -EINVAL;
+ }
+ if (c->dirty_nn_cnt > c->nnode_cnt) {
+ ubifs_err("dirty nnodes %d exceed max %d",
+ c->dirty_nn_cnt, c->nnode_cnt);
+ err = -EINVAL;
+ }
+ return err;
+ case 1:
+ d->chk_lpt_sz += len;
+ return 0;
+ case 2:
+ d->chk_lpt_sz += len;
+ d->chk_lpt_wastage += len;
+ d->chk_lpt_lebs += 1;
+ return 0;
+ case 3:
+ chk_lpt_sz = c->leb_size;
+ chk_lpt_sz *= d->chk_lpt_lebs;
+ chk_lpt_sz += len - c->nhead_offs;
+ if (d->chk_lpt_sz != chk_lpt_sz) {
+ ubifs_err("LPT wrote %lld but space used was %lld",
+ d->chk_lpt_sz, chk_lpt_sz);
+ err = -EINVAL;
+ }
+ if (d->chk_lpt_sz > c->lpt_sz) {
+ ubifs_err("LPT wrote %lld but lpt_sz is %lld",
+ d->chk_lpt_sz, c->lpt_sz);
+ err = -EINVAL;
+ }
+ if (d->chk_lpt_sz2 && d->chk_lpt_sz != d->chk_lpt_sz2) {
+ ubifs_err("LPT layout size %lld but wrote %lld",
+ d->chk_lpt_sz, d->chk_lpt_sz2);
+ err = -EINVAL;
+ }
+ if (d->chk_lpt_sz2 && d->new_nhead_offs != len) {
+ ubifs_err("LPT new nhead offs: expected %d was %d",
+ d->new_nhead_offs, len);
+ err = -EINVAL;
+ }
+ lpt_sz = (long long)c->pnode_cnt * c->pnode_sz;
+ lpt_sz += (long long)c->nnode_cnt * c->nnode_sz;
+ lpt_sz += c->ltab_sz;
+ if (c->big_lpt)
+ lpt_sz += c->lsave_sz;
+ if (d->chk_lpt_sz - d->chk_lpt_wastage > lpt_sz) {
+ ubifs_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld",
+ d->chk_lpt_sz, d->chk_lpt_wastage, lpt_sz);
+ err = -EINVAL;
+ }
+ if (err) {
+ ubifs_dump_lpt_info(c);
+ ubifs_dump_lpt_lebs(c);
+ dump_stack();
+ }
+ d->chk_lpt_sz2 = d->chk_lpt_sz;
+ d->chk_lpt_sz = 0;
+ d->chk_lpt_wastage = 0;
+ d->chk_lpt_lebs = 0;
+ d->new_nhead_offs = len;
+ return err;
+ case 4:
+ d->chk_lpt_sz += len;
+ d->chk_lpt_wastage += len;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+/**
+ * ubifs_dump_lpt_leb - dump an LPT LEB.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number to dump
+ *
+ * This function dumps an LEB from LPT area. Nodes in this area are very
+ * different to nodes in the main area (e.g., they do not have common headers,
+ * they do not have 8-byte alignments, etc), so we have a separate function to
+ * dump LPT area LEBs. Note, LPT has to be locked by the caller.
+ */
+static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
+{
+ int err, len = c->leb_size, node_type, node_num, node_len, offs;
+ void *buf, *p;
+
+ pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum);
+ buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
+ if (!buf) {
+ ubifs_err("cannot allocate memory to dump LPT");
+ return;
+ }
+
+ err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1);
+ if (err)
+ goto out;
+
+ while (1) {
+ offs = c->leb_size - len;
+ if (!is_a_node(c, p, len)) {
+ int pad_len;
+
+ pad_len = get_pad_len(c, p, len);
+ if (pad_len) {
+ pr_err("LEB %d:%d, pad %d bytes\n",
+ lnum, offs, pad_len);
+ p += pad_len;
+ len -= pad_len;
+ continue;
+ }
+ if (len)
+ pr_err("LEB %d:%d, free %d bytes\n",
+ lnum, offs, len);
+ break;
+ }
+
+ node_type = get_lpt_node_type(c, p, &node_num);
+ switch (node_type) {
+ case UBIFS_LPT_PNODE:
+ {
+ node_len = c->pnode_sz;
+ if (c->big_lpt)
+ pr_err("LEB %d:%d, pnode num %d\n",
+ lnum, offs, node_num);
+ else
+ pr_err("LEB %d:%d, pnode\n", lnum, offs);
+ break;
+ }
+ case UBIFS_LPT_NNODE:
+ {
+ int i;
+ struct ubifs_nnode nnode;
+
+ node_len = c->nnode_sz;
+ if (c->big_lpt)
+ pr_err("LEB %d:%d, nnode num %d, ",
+ lnum, offs, node_num);
+ else
+ pr_err("LEB %d:%d, nnode, ",
+ lnum, offs);
+ err = ubifs_unpack_nnode(c, p, &nnode);
+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+ pr_cont("%d:%d", nnode.nbranch[i].lnum,
+ nnode.nbranch[i].offs);
+ if (i != UBIFS_LPT_FANOUT - 1)
+ pr_cont(", ");
+ }
+ pr_cont("\n");
+ break;
+ }
+ case UBIFS_LPT_LTAB:
+ node_len = c->ltab_sz;
+ pr_err("LEB %d:%d, ltab\n", lnum, offs);
+ break;
+ case UBIFS_LPT_LSAVE:
+ node_len = c->lsave_sz;
+ pr_err("LEB %d:%d, lsave len\n", lnum, offs);
+ break;
+ default:
+ ubifs_err("LPT node type %d not recognized", node_type);
+ goto out;
+ }
+
+ p += node_len;
+ len -= node_len;
+ }
+
+ pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum);
+out:
+ vfree(buf);
+ return;
+}
+
+/**
+ * ubifs_dump_lpt_lebs - dump LPT lebs.
+ * @c: UBIFS file-system description object
+ *
+ * This function dumps all LPT LEBs. The caller has to make sure the LPT is
+ * locked.
+ */
+void ubifs_dump_lpt_lebs(const struct ubifs_info *c)
+{
+ int i;
+
+ pr_err("(pid %d) start dumping all LPT LEBs\n", current->pid);
+ for (i = 0; i < c->lpt_lebs; i++)
+ dump_lpt_leb(c, i + c->lpt_first);
+ pr_err("(pid %d) finish dumping all LPT LEBs\n", current->pid);
+}
+
+/**
+ * dbg_populate_lsave - debugging version of 'populate_lsave()'
+ * @c: UBIFS file-system description object
+ *
+ * This is a debugging version for 'populate_lsave()' which populates lsave
+ * with random LEBs instead of useful LEBs, which is good for test coverage.
+ * Returns zero if lsave has not been populated (this debugging feature is
+ * disabled) an non-zero if lsave has been populated.
+ */
+static int dbg_populate_lsave(struct ubifs_info *c)
+{
+ struct ubifs_lprops *lprops;
+ struct ubifs_lpt_heap *heap;
+ int i;
+
+ if (!dbg_is_chk_gen(c))
+ return 0;
+ if (prandom_u32() & 3)
+ return 0;
+
+ for (i = 0; i < c->lsave_cnt; i++)
+ c->lsave[i] = c->main_first;
+
+ list_for_each_entry(lprops, &c->empty_list, list)
+ c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum;
+ list_for_each_entry(lprops, &c->freeable_list, list)
+ c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum;
+ list_for_each_entry(lprops, &c->frdi_idx_list, list)
+ c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum;
+
+ heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
+ for (i = 0; i < heap->cnt; i++)
+ c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum;
+ heap = &c->lpt_heap[LPROPS_DIRTY - 1];
+ for (i = 0; i < heap->cnt; i++)
+ c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum;
+ heap = &c->lpt_heap[LPROPS_FREE - 1];
+ for (i = 0; i < heap->cnt; i++)
+ c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum;
+
+ return 1;
+}
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index 71d5493bf56..ab83ace9910 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -29,7 +29,8 @@
* @c: UBIFS file-system description object
*
* This function scans the master node LEBs and search for the latest master
- * node. Returns zero in case of success and a negative error code in case of
+ * node. Returns zero in case of success, %-EUCLEAN if there master area is
+ * corrupted and requires recovery, and a negative error code in case of
* failure.
*/
static int scan_for_master(struct ubifs_info *c)
@@ -40,7 +41,7 @@ static int scan_for_master(struct ubifs_info *c)
lnum = UBIFS_MST_LNUM;
- sleb = ubifs_scan(c, lnum, 0, c->sbuf);
+ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
if (IS_ERR(sleb))
return PTR_ERR(sleb);
nodes_cnt = sleb->nodes_cnt;
@@ -48,7 +49,7 @@ static int scan_for_master(struct ubifs_info *c)
snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
list);
if (snod->type != UBIFS_MST_NODE)
- goto out;
+ goto out_dump;
memcpy(c->mst_node, snod->node, snod->len);
offs = snod->offs;
}
@@ -56,7 +57,7 @@ static int scan_for_master(struct ubifs_info *c)
lnum += 1;
- sleb = ubifs_scan(c, lnum, 0, c->sbuf);
+ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
if (IS_ERR(sleb))
return PTR_ERR(sleb);
if (sleb->nodes_cnt != nodes_cnt)
@@ -65,7 +66,7 @@ static int scan_for_master(struct ubifs_info *c)
goto out;
snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list);
if (snod->type != UBIFS_MST_NODE)
- goto out;
+ goto out_dump;
if (snod->offs != offs)
goto out;
if (memcmp((void *)c->mst_node + UBIFS_CH_SZ,
@@ -78,6 +79,12 @@ static int scan_for_master(struct ubifs_info *c)
out:
ubifs_scan_destroy(sleb);
+ return -EUCLEAN;
+
+out_dump:
+ ubifs_err("unexpected node type %d master LEB %d:%d",
+ snod->type, lnum, snod->offs);
+ ubifs_scan_destroy(sleb);
return -EINVAL;
}
@@ -141,7 +148,7 @@ static int validate_master(const struct ubifs_info *c)
}
main_sz = (long long)c->main_lebs * c->leb_size;
- if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) {
+ if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) {
err = 9;
goto out;
}
@@ -211,7 +218,7 @@ static int validate_master(const struct ubifs_info *c)
}
if (c->lst.total_dead + c->lst.total_dark +
- c->lst.total_used + c->old_idx_sz > main_sz) {
+ c->lst.total_used + c->bi.old_idx_sz > main_sz) {
err = 21;
goto out;
}
@@ -234,7 +241,7 @@ static int validate_master(const struct ubifs_info *c)
out:
ubifs_err("bad master node at offset %d error %d", c->mst_offs, err);
- dbg_dump_node(c, c->mst_node);
+ ubifs_dump_node(c, c->mst_node);
return -EINVAL;
}
@@ -256,7 +263,8 @@ int ubifs_read_master(struct ubifs_info *c)
err = scan_for_master(c);
if (err) {
- err = ubifs_recover_master_node(c);
+ if (err == -EUCLEAN)
+ err = ubifs_recover_master_node(c);
if (err)
/*
* Note, we do not free 'c->mst_node' here because the
@@ -278,7 +286,7 @@ int ubifs_read_master(struct ubifs_info *c)
c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum);
c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum);
c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs);
- c->old_idx_sz = le64_to_cpu(c->mst_node->index_size);
+ c->bi.old_idx_sz = le64_to_cpu(c->mst_node->index_size);
c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum);
c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs);
c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum);
@@ -297,7 +305,7 @@ int ubifs_read_master(struct ubifs_info *c)
c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead);
c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark);
- c->calc_idx_sz = c->old_idx_sz;
+ c->calc_idx_sz = c->bi.old_idx_sz;
if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS))
c->no_orphs = 1;
@@ -309,7 +317,7 @@ int ubifs_read_master(struct ubifs_info *c)
if (c->leb_cnt < old_leb_cnt ||
c->leb_cnt < UBIFS_MIN_LEB_CNT) {
ubifs_err("bad leb_cnt on master node");
- dbg_dump_node(c, c->mst_node);
+ ubifs_dump_node(c, c->mst_node);
return -EINVAL;
}
@@ -353,8 +361,9 @@ int ubifs_write_master(struct ubifs_info *c)
{
int err, lnum, offs, len;
- if (c->ro_media)
- return -EINVAL;
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+ if (c->ro_error)
+ return -EROFS;
lnum = UBIFS_MST_LNUM;
offs = c->mst_offs + c->mst_node_alsz;
@@ -370,7 +379,7 @@ int ubifs_write_master(struct ubifs_info *c)
c->mst_offs = offs;
c->mst_node->highest_inum = cpu_to_le64(c->highest_inum);
- err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM);
+ err = ubifs_write_node(c, c->mst_node, len, lnum, offs);
if (err)
return err;
@@ -381,7 +390,7 @@ int ubifs_write_master(struct ubifs_info *c)
if (err)
return err;
}
- err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM);
+ err = ubifs_write_node(c, c->mst_node, len, lnum, offs);
return err;
}
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 4beccfc256d..ee7cb5ebb6e 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -39,6 +39,29 @@ static inline int ubifs_zn_dirty(const struct ubifs_znode *znode)
}
/**
+ * ubifs_zn_obsolete - check if znode is obsolete.
+ * @znode: znode to check
+ *
+ * This helper function returns %1 if @znode is obsolete and %0 otherwise.
+ */
+static inline int ubifs_zn_obsolete(const struct ubifs_znode *znode)
+{
+ return !!test_bit(OBSOLETE_ZNODE, &znode->flags);
+}
+
+/**
+ * ubifs_zn_cow - check if znode has to be copied on write.
+ * @znode: znode to check
+ *
+ * This helper function returns %1 if @znode is has COW flag set and %0
+ * otherwise.
+ */
+static inline int ubifs_zn_cow(const struct ubifs_znode *znode)
+{
+ return !!test_bit(COW_ZNODE, &znode->flags);
+}
+
+/**
* ubifs_wake_up_bgt - wake up background thread.
* @c: UBIFS file-system description object
*/
@@ -80,20 +103,6 @@ static inline struct ubifs_inode *ubifs_inode(const struct inode *inode)
}
/**
- * ubifs_ro_mode - switch UBIFS to read read-only mode.
- * @c: UBIFS file-system description object
- * @err: error code which is the reason of switching to R/O mode
- */
-static inline void ubifs_ro_mode(struct ubifs_info *c, int err)
-{
- if (!c->ro_media) {
- c->ro_media = 1;
- ubifs_warn("switched to read-only mode, error %d", err);
- dbg_dump_stack();
- }
-}
-
-/**
* ubifs_compr_present - check if compressor was compiled in.
* @compr_type: compressor type to check
*
@@ -136,83 +145,6 @@ static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf)
}
/**
- * ubifs_leb_unmap - unmap an LEB.
- * @c: UBIFS file-system description object
- * @lnum: LEB number to unmap
- *
- * This function returns %0 on success and a negative error code on failure.
- */
-static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum)
-{
- int err;
-
- if (c->ro_media)
- return -EROFS;
- err = ubi_leb_unmap(c->ubi, lnum);
- if (err) {
- ubifs_err("unmap LEB %d failed, error %d", lnum, err);
- return err;
- }
-
- return 0;
-}
-
-/**
- * ubifs_leb_write - write to a LEB.
- * @c: UBIFS file-system description object
- * @lnum: LEB number to write
- * @buf: buffer to write from
- * @offs: offset within LEB to write to
- * @len: length to write
- * @dtype: data type
- *
- * This function returns %0 on success and a negative error code on failure.
- */
-static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum,
- const void *buf, int offs, int len, int dtype)
-{
- int err;
-
- if (c->ro_media)
- return -EROFS;
- err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
- if (err) {
- ubifs_err("writing %d bytes at %d:%d, error %d",
- len, lnum, offs, err);
- return err;
- }
-
- return 0;
-}
-
-/**
- * ubifs_leb_change - atomic LEB change.
- * @c: UBIFS file-system description object
- * @lnum: LEB number to write
- * @buf: buffer to write from
- * @len: length to write
- * @dtype: data type
- *
- * This function returns %0 on success and a negative error code on failure.
- */
-static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum,
- const void *buf, int len, int dtype)
-{
- int err;
-
- if (c->ro_media)
- return -EROFS;
- err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
- if (err) {
- ubifs_err("changing %d bytes in LEB %d, error %d",
- len, lnum, err);
- return err;
- }
-
- return 0;
-}
-
-/**
* ubifs_encode_dev - encode device node IDs.
* @dev: UBIFS device node information
* @rdev: device IDs to encode
@@ -298,45 +230,74 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c,
}
/**
- * ubifs_reported_space - calculate reported free space.
- * @c: the UBIFS file-system description object
- * @free: amount of free space
- *
- * This function calculates amount of free space which will be reported to
- * user-space. User-space application tend to expect that if the file-system
- * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
- * are able to write a file of size N. UBIFS attaches node headers to each data
- * node and it has to write indexind nodes as well. This introduces additional
- * overhead, and UBIFS it has to report sligtly less free space to meet the
- * above expectetion.
- *
- * This function assumes free space is made up of uncompressed data nodes and
- * full index nodes (one per data node, doubled because we always allow enough
- * space to write the index twice).
+ * ubifs_current_time - round current time to time granularity.
+ * @inode: inode
+ */
+static inline struct timespec ubifs_current_time(struct inode *inode)
+{
+ return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
+ current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
+}
+
+/**
+ * ubifs_tnc_lookup - look up a file-system node.
+ * @c: UBIFS file-system description object
+ * @key: node key to lookup
+ * @node: the node is returned here
*
- * Note, the calculation is pessimistic, which means that most of the time
- * UBIFS reports less space than it actually has.
+ * This function look up and reads node with key @key. The caller has to make
+ * sure the @node buffer is large enough to fit the node. Returns zero in case
+ * of success, %-ENOENT if the node was not found, and a negative error code in
+ * case of failure.
*/
-static inline long long ubifs_reported_space(const struct ubifs_info *c,
- uint64_t free)
+static inline int ubifs_tnc_lookup(struct ubifs_info *c,
+ const union ubifs_key *key, void *node)
{
- int divisor, factor;
+ return ubifs_tnc_locate(c, key, node, NULL, NULL);
+}
- divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz << 1);
- factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ;
- do_div(free, divisor);
+/**
+ * ubifs_get_lprops - get reference to LEB properties.
+ * @c: the UBIFS file-system description object
+ *
+ * This function locks lprops. Lprops have to be unlocked by
+ * 'ubifs_release_lprops()'.
+ */
+static inline void ubifs_get_lprops(struct ubifs_info *c)
+{
+ mutex_lock(&c->lp_mutex);
+}
- return free * factor;
+/**
+ * ubifs_release_lprops - release lprops lock.
+ * @c: the UBIFS file-system description object
+ *
+ * This function has to be called after each 'ubifs_get_lprops()' call to
+ * unlock lprops.
+ */
+static inline void ubifs_release_lprops(struct ubifs_info *c)
+{
+ ubifs_assert(mutex_is_locked(&c->lp_mutex));
+ ubifs_assert(c->lst.empty_lebs >= 0 &&
+ c->lst.empty_lebs <= c->main_lebs);
+ mutex_unlock(&c->lp_mutex);
}
/**
- * ubifs_current_time - round current time to time granularity.
- * @inode: inode
+ * ubifs_next_log_lnum - switch to the next log LEB.
+ * @c: UBIFS file-system description object
+ * @lnum: current log LEB
+ *
+ * This helper function returns the log LEB number which goes next after LEB
+ * 'lnum'.
*/
-static inline struct timespec ubifs_current_time(struct inode *inode)
+static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum)
{
- return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
- current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
+ lnum += 1;
+ if (lnum > c->log_last)
+ lnum = UBIFS_LOG_LNUM;
+
+ return lnum;
}
#endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 3afeb9242c6..f1c3e5a1b31 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -46,17 +46,13 @@
* Orphans are accumulated in a rb-tree. When an inode's link count drops to
* zero, the inode number is added to the rb-tree. It is removed from the tree
* when the inode is deleted. Any new orphans that are in the orphan tree when
- * the commit is run, are written to the orphan area in 1 or more orph nodes.
+ * the commit is run, are written to the orphan area in 1 or more orphan nodes.
* If the orphan area is full, it is consolidated to make space. There is
* always enough space because validation prevents the user from creating more
* than the maximum number of orphans allowed.
*/
-#ifdef CONFIG_UBIFS_FS_DEBUG
static int dbg_check_orphans(struct ubifs_info *c);
-#else
-#define dbg_check_orphans(c) 0
-#endif
/**
* ubifs_add_orphan - add an orphan.
@@ -92,7 +88,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
else if (inum > o->inum)
p = &(*p)->rb_right;
else {
- dbg_err("orphaned twice");
+ ubifs_err("orphaned twice");
spin_unlock(&c->orphan_lock);
kfree(orphan);
return 0;
@@ -105,7 +101,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
list_add_tail(&orphan->list, &c->orph_list);
list_add_tail(&orphan->new_list, &c->orph_new);
spin_unlock(&c->orphan_lock);
- dbg_gen("ino %lu", inum);
+ dbg_gen("ino %lu", (unsigned long)inum);
return 0;
}
@@ -130,16 +126,19 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
else if (inum > o->inum)
p = p->rb_right;
else {
- if (o->dnext) {
+ if (o->del) {
spin_unlock(&c->orphan_lock);
- dbg_gen("deleted twice ino %lu", inum);
+ dbg_gen("deleted twice ino %lu",
+ (unsigned long)inum);
return;
}
- if (o->cnext) {
+ if (o->cmt) {
+ o->del = 1;
o->dnext = c->orph_dnext;
c->orph_dnext = o;
spin_unlock(&c->orphan_lock);
- dbg_gen("delete later ino %lu", inum);
+ dbg_gen("delete later ino %lu",
+ (unsigned long)inum);
return;
}
rb_erase(p, &c->orph_tree);
@@ -151,13 +150,13 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
}
spin_unlock(&c->orphan_lock);
kfree(o);
- dbg_gen("inum %lu", inum);
+ dbg_gen("inum %lu", (unsigned long)inum);
return;
}
}
spin_unlock(&c->orphan_lock);
- dbg_err("missing orphan ino %lu", inum);
- dbg_dump_stack();
+ ubifs_err("missing orphan ino %lu", (unsigned long)inum);
+ dump_stack();
}
/**
@@ -174,11 +173,13 @@ int ubifs_orphan_start_commit(struct ubifs_info *c)
last = &c->orph_cnext;
list_for_each_entry(orphan, &c->orph_new, new_list) {
ubifs_assert(orphan->new);
+ ubifs_assert(!orphan->cmt);
orphan->new = 0;
+ orphan->cmt = 1;
*last = orphan;
last = &orphan->cnext;
}
- *last = orphan->cnext;
+ *last = NULL;
c->cmt_orphans = c->new_orphans;
c->new_orphans = 0;
dbg_cmt("%d orphans to commit", c->cmt_orphans);
@@ -229,7 +230,7 @@ static int tot_avail_orphs(struct ubifs_info *c)
}
/**
- * do_write_orph_node - write a node
+ * do_write_orph_node - write a node to the orphan head.
* @c: UBIFS file-system description object
* @len: length of node
* @atomic: write atomically
@@ -246,8 +247,7 @@ static int do_write_orph_node(struct ubifs_info *c, int len, int atomic)
ubifs_assert(c->ohead_offs == 0);
ubifs_prepare_node(c, c->orph_buf, len, 1);
len = ALIGN(len, c->min_io_size);
- err = ubifs_leb_change(c, c->ohead_lnum, c->orph_buf, len,
- UBI_SHORTTERM);
+ err = ubifs_leb_change(c, c->ohead_lnum, c->orph_buf, len);
} else {
if (c->ohead_offs == 0) {
/* Ensure LEB has been unmapped */
@@ -256,17 +256,17 @@ static int do_write_orph_node(struct ubifs_info *c, int len, int atomic)
return err;
}
err = ubifs_write_node(c, c->orph_buf, len, c->ohead_lnum,
- c->ohead_offs, UBI_SHORTTERM);
+ c->ohead_offs);
}
return err;
}
/**
- * write_orph_node - write an orph node
+ * write_orph_node - write an orphan node.
* @c: UBIFS file-system description object
* @atomic: write atomically
*
- * This function builds an orph node from the cnext list and writes it to the
+ * This function builds an orphan node from the cnext list and writes it to the
* orphan head. On success, %0 is returned, otherwise a negative error code
* is returned.
*/
@@ -302,7 +302,9 @@ static int write_orph_node(struct ubifs_info *c, int atomic)
cnext = c->orph_cnext;
for (i = 0; i < cnt; i++) {
orphan = cnext;
+ ubifs_assert(orphan->cmt);
orph->inos[i] = cpu_to_le64(orphan->inum);
+ orphan->cmt = 0;
cnext = orphan->cnext;
orphan->cnext = NULL;
}
@@ -310,10 +312,10 @@ static int write_orph_node(struct ubifs_info *c, int atomic)
c->cmt_orphans -= cnt;
spin_unlock(&c->orphan_lock);
if (c->cmt_orphans)
- orph->cmt_no = cpu_to_le64(c->cmt_no + 1);
+ orph->cmt_no = cpu_to_le64(c->cmt_no);
else
/* Mark the last node of the commit */
- orph->cmt_no = cpu_to_le64((c->cmt_no + 1) | (1ULL << 63));
+ orph->cmt_no = cpu_to_le64((c->cmt_no) | (1ULL << 63));
ubifs_assert(c->ohead_offs + len <= c->leb_size);
ubifs_assert(c->ohead_lnum >= c->orph_first);
ubifs_assert(c->ohead_lnum <= c->orph_last);
@@ -324,11 +326,11 @@ static int write_orph_node(struct ubifs_info *c, int atomic)
}
/**
- * write_orph_nodes - write orph nodes until there are no more to commit
+ * write_orph_nodes - write orphan nodes until there are no more to commit.
* @c: UBIFS file-system description object
* @atomic: write atomically
*
- * This function writes orph nodes for all the orphans to commit. On success,
+ * This function writes orphan nodes for all the orphans to commit. On success,
* %0 is returned, otherwise a negative error code is returned.
*/
static int write_orph_nodes(struct ubifs_info *c, int atomic)
@@ -381,11 +383,12 @@ static int consolidate(struct ubifs_info *c)
list_for_each_entry(orphan, &c->orph_list, list) {
if (orphan->new)
continue;
+ orphan->cmt = 1;
*last = orphan;
last = &orphan->cnext;
cnt += 1;
}
- *last = orphan->cnext;
+ *last = NULL;
ubifs_assert(cnt == c->tot_orphans - c->new_orphans);
c->cmt_orphans = cnt;
c->ohead_lnum = c->orph_first;
@@ -445,10 +448,11 @@ static void erase_deleted(struct ubifs_info *c)
orphan = dnext;
dnext = orphan->dnext;
ubifs_assert(!orphan->new);
+ ubifs_assert(orphan->del);
rb_erase(&orphan->rb, &c->orph_tree);
list_del(&orphan->list);
c->tot_orphans -= 1;
- dbg_gen("deleting orphan ino %lu", orphan->inum);
+ dbg_gen("deleting orphan ino %lu", (unsigned long)orphan->inum);
kfree(orphan);
}
c->orph_dnext = NULL;
@@ -476,14 +480,14 @@ int ubifs_orphan_end_commit(struct ubifs_info *c)
}
/**
- * clear_orphans - erase all LEBs used for orphans.
+ * ubifs_clear_orphans - erase all LEBs used for orphans.
* @c: UBIFS file-system description object
*
* If recovery is not required, then the orphans from the previous session
* are not needed. This function locates the LEBs used to record
* orphans, and un-maps them.
*/
-static int clear_orphans(struct ubifs_info *c)
+int ubifs_clear_orphans(struct ubifs_info *c)
{
int lnum, err;
@@ -534,10 +538,11 @@ static int insert_dead_orphan(struct ubifs_info *c, ino_t inum)
rb_link_node(&orphan->rb, parent, p);
rb_insert_color(&orphan->rb, &c->orph_tree);
list_add_tail(&orphan->list, &c->orph_list);
+ orphan->del = 1;
orphan->dnext = c->orph_dnext;
c->orph_dnext = orphan;
- dbg_mnt("ino %lu, new %d, tot %d",
- inum, c->new_orphans, c->tot_orphans);
+ dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum,
+ c->new_orphans, c->tot_orphans);
return 0;
}
@@ -545,9 +550,9 @@ static int insert_dead_orphan(struct ubifs_info *c, ino_t inum)
* do_kill_orphans - remove orphan inodes from the index.
* @c: UBIFS file-system description object
* @sleb: scanned LEB
- * @last_cmt_no: cmt_no of last orph node read is passed and returned here
+ * @last_cmt_no: cmt_no of last orphan node read is passed and returned here
* @outofdate: whether the LEB is out of date is returned here
- * @last_flagged: whether the end orph node is encountered
+ * @last_flagged: whether the end orphan node is encountered
*
* This function is a helper to the 'kill_orphans()' function. It goes through
* every orphan node in a LEB and for every inode number recorded, removes
@@ -565,9 +570,9 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
list_for_each_entry(snod, &sleb->nodes, list) {
if (snod->type != UBIFS_ORPH_NODE) {
- ubifs_err("invalid node type %d in orphan area at "
- "%d:%d", snod->type, sleb->lnum, snod->offs);
- dbg_dump_node(c, snod->node);
+ ubifs_err("invalid node type %d in orphan area at %d:%d",
+ snod->type, sleb->lnum, snod->offs);
+ ubifs_dump_node(c, snod->node);
return -EINVAL;
}
@@ -578,8 +583,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
/*
* The commit number on the master node may be less, because
* of a failed commit. If there are several failed commits in a
- * row, the commit number written on orph nodes will continue to
- * increase (because the commit number is adjusted here) even
+ * row, the commit number written on orphan nodes will continue
+ * to increase (because the commit number is adjusted here) even
* though the commit number on the master node stays the same
* because the master node has not been re-written.
*/
@@ -587,15 +592,14 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
c->cmt_no = cmt_no;
if (cmt_no < *last_cmt_no && *last_flagged) {
/*
- * The last orph node had a higher commit number and was
- * flagged as the last written for that commit number.
- * That makes this orph node, out of date.
+ * The last orphan node had a higher commit number and
+ * was flagged as the last written for that commit
+ * number. That makes this orphan node, out of date.
*/
if (!first) {
- ubifs_err("out of order commit number %llu in "
- "orphan node at %d:%d",
+ ubifs_err("out of order commit number %llu in orphan node at %d:%d",
cmt_no, sleb->lnum, snod->offs);
- dbg_dump_node(c, snod->node);
+ ubifs_dump_node(c, snod->node);
return -EINVAL;
}
dbg_rcvry("out of date LEB %d", sleb->lnum);
@@ -609,7 +613,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3;
for (i = 0; i < n; i++) {
inum = le64_to_cpu(orph->inos[i]);
- dbg_rcvry("deleting orphaned inode %lu", inum);
+ dbg_rcvry("deleting orphaned inode %lu",
+ (unsigned long)inum);
err = ubifs_tnc_remove_ino(c, inum);
if (err)
return err;
@@ -655,10 +660,10 @@ static int kill_orphans(struct ubifs_info *c)
/*
* Orph nodes always start at c->orph_first and are written to each
* successive LEB in turn. Generally unused LEBs will have been unmapped
- * but may contain out of date orph nodes if the unmap didn't go
- * through. In addition, the last orph node written for each commit is
+ * but may contain out of date orphan nodes if the unmap didn't go
+ * through. In addition, the last orphan node written for each commit is
* marked (top bit of orph->cmt_no is set to 1). It is possible that
- * there are orph nodes from the next commit (i.e. the commit did not
+ * there are orphan nodes from the next commit (i.e. the commit did not
* complete successfully). In that case, no orphans will have been lost
* due to the way that orphans are written, and any orphans added will
* be valid orphans anyway and so can be deleted.
@@ -667,9 +672,11 @@ static int kill_orphans(struct ubifs_info *c)
struct ubifs_scan_leb *sleb;
dbg_rcvry("LEB %d", lnum);
- sleb = ubifs_scan(c, lnum, 0, c->sbuf);
+ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
if (IS_ERR(sleb)) {
- sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0);
+ if (PTR_ERR(sleb) == -EUCLEAN)
+ sleb = ubifs_recover_leb(c, lnum, 0,
+ c->sbuf, -1);
if (IS_ERR(sleb)) {
err = PTR_ERR(sleb);
break;
@@ -715,12 +722,14 @@ int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only)
if (unclean)
err = kill_orphans(c);
else if (!read_only)
- err = clear_orphans(c);
+ err = ubifs_clear_orphans(c);
return err;
}
-#ifdef CONFIG_UBIFS_FS_DEBUG
+/*
+ * Everything below is related to debugging.
+ */
struct check_orphan {
struct rb_node rb;
@@ -806,27 +815,10 @@ static int dbg_find_check_orphan(struct rb_root *root, ino_t inum)
static void dbg_free_check_tree(struct rb_root *root)
{
- struct rb_node *this = root->rb_node;
- struct check_orphan *o;
+ struct check_orphan *o, *n;
- while (this) {
- if (this->rb_left) {
- this = this->rb_left;
- continue;
- } else if (this->rb_right) {
- this = this->rb_right;
- continue;
- }
- o = rb_entry(this, struct check_orphan, rb);
- this = rb_parent(this);
- if (this) {
- if (this->rb_left == &o->rb)
- this->rb_left = NULL;
- else
- this->rb_right = NULL;
- }
+ rbtree_postorder_for_each_entry_safe(o, n, root, rb)
kfree(o);
- }
}
static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr,
@@ -840,8 +832,8 @@ static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr,
if (inum != ci->last_ino) {
/* Lowest node type is the inode node, so it comes first */
if (key_type(c, &zbr->key) != UBIFS_INO_KEY)
- ubifs_err("found orphan node ino %lu, type %d", inum,
- key_type(c, &zbr->key));
+ ubifs_err("found orphan node ino %lu, type %d",
+ (unsigned long)inum, key_type(c, &zbr->key));
ci->last_ino = inum;
ci->tot_inos += 1;
err = ubifs_tnc_read_node(c, zbr, ci->node);
@@ -853,7 +845,8 @@ static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr,
/* Must be recorded as an orphan */
if (!dbg_find_check_orphan(&ci->root, inum) &&
!dbg_find_orphan(c, inum)) {
- ubifs_err("missing orphan, ino %lu", inum);
+ ubifs_err("missing orphan, ino %lu",
+ (unsigned long)inum);
ci->missing += 1;
}
}
@@ -887,15 +880,22 @@ static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb)
static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
{
int lnum, err = 0;
+ void *buf;
/* Check no-orphans flag and skip this if no orphans */
if (c->no_orphs)
return 0;
+ buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
+ if (!buf) {
+ ubifs_err("cannot allocate memory to check orphans");
+ return 0;
+ }
+
for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
struct ubifs_scan_leb *sleb;
- sleb = ubifs_scan(c, lnum, 0, c->dbg_buf);
+ sleb = ubifs_scan(c, lnum, 0, buf, 0);
if (IS_ERR(sleb)) {
err = PTR_ERR(sleb);
break;
@@ -907,6 +907,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
break;
}
+ vfree(buf);
return err;
}
@@ -915,7 +916,7 @@ static int dbg_check_orphans(struct ubifs_info *c)
struct check_info ci;
int err;
- if (!(ubifs_chk_flags & UBIFS_CHK_ORPH))
+ if (!dbg_is_chk_orph(c))
return 0;
ci.last_ino = 0;
@@ -954,5 +955,3 @@ out:
kfree(ci.node);
return err;
}
-
-#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 77d26c141cf..c14adb2f420 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -23,14 +23,32 @@
/*
* This file implements functions needed to recover from unclean un-mounts.
* When UBIFS is mounted, it checks a flag on the master node to determine if
- * an un-mount was completed sucessfully. If not, the process of mounting
- * incorparates additional checking and fixing of on-flash data structures.
+ * an un-mount was completed successfully. If not, the process of mounting
+ * incorporates additional checking and fixing of on-flash data structures.
* UBIFS always cleans away all remnants of an unclean un-mount, so that
* errors do not accumulate. However UBIFS defers recovery if it is mounted
* read-only, and the flash is not modified in that case.
+ *
+ * The general UBIFS approach to the recovery is that it recovers from
+ * corruptions which could be caused by power cuts, but it refuses to recover
+ * from corruption caused by other reasons. And UBIFS tries to distinguish
+ * between these 2 reasons of corruptions and silently recover in the former
+ * case and loudly complain in the latter case.
+ *
+ * UBIFS writes only to erased LEBs, so it writes only to the flash space
+ * containing only 0xFFs. UBIFS also always writes strictly from the beginning
+ * of the LEB to the end. And UBIFS assumes that the underlying flash media
+ * writes in @c->max_write_size bytes at a time.
+ *
+ * Hence, if UBIFS finds a corrupted node at offset X, it expects only the min.
+ * I/O unit corresponding to offset X to contain corrupted data, all the
+ * following min. I/O units have to contain empty space (all 0xFFs). If this is
+ * not true, the corruption cannot be the result of a power cut, and UBIFS
+ * refuses to mount.
*/
#include <linux/crc32.h>
+#include <linux/slab.h>
#include "ubifs.h"
/**
@@ -53,6 +71,25 @@ static int is_empty(void *buf, int len)
}
/**
+ * first_non_ff - find offset of the first non-0xff byte.
+ * @buf: buffer to search in
+ * @len: length of buffer
+ *
+ * This function returns offset of the first non-0xff byte in @buf or %-1 if
+ * the buffer contains only 0xff bytes.
+ */
+static int first_non_ff(void *buf, int len)
+{
+ uint8_t *p = buf;
+ int i;
+
+ for (i = 0; i < len; i++)
+ if (*p++ != 0xff)
+ return i;
+ return -1;
+}
+
+/**
* get_master_node - get the last valid master node allowing for corruption.
* @c: UBIFS file-system description object
* @lnum: LEB number
@@ -80,7 +117,7 @@ static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf,
if (!sbuf)
return -ENOMEM;
- err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size);
+ err = ubifs_leb_read(c, lnum, sbuf, 0, c->leb_size, 0);
if (err && err != -EBADMSG)
goto out_free;
@@ -168,18 +205,18 @@ static int write_rcvrd_mst_node(struct ubifs_info *c,
struct ubifs_mst_node *mst)
{
int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz;
- uint32_t save_flags;
+ __le32 save_flags;
dbg_rcvry("recovery");
save_flags = mst->flags;
- mst->flags = cpu_to_le32(le32_to_cpu(mst->flags) | UBIFS_MST_RCVRY);
+ mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY);
ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1);
- err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM);
+ err = ubifs_leb_change(c, lnum, mst, sz);
if (err)
goto out;
- err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM);
+ err = ubifs_leb_change(c, lnum + 1, mst, sz);
if (err)
goto out;
out:
@@ -237,7 +274,8 @@ int ubifs_recover_master_node(struct ubifs_info *c)
if (cor1)
goto out_err;
mst = mst1;
- } else if (offs1 == 0 && offs2 + sz >= c->leb_size) {
+ } else if (offs1 == 0 &&
+ c->leb_size - offs2 - sz < sz) {
/* 1st LEB was unmapped and written, 2nd not */
if (cor1)
goto out_err;
@@ -267,12 +305,12 @@ int ubifs_recover_master_node(struct ubifs_info *c)
mst = mst2;
}
- dbg_rcvry("recovered master node from LEB %d",
+ ubifs_msg("recovered master node from LEB %d",
(mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
- if ((c->vfs_sb->s_flags & MS_RDONLY)) {
+ if (c->ro_mount) {
/* Read-only mode. Keep a copy for switching to rw mode */
c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL);
if (!c->rcvrd_mst_node) {
@@ -280,6 +318,32 @@ int ubifs_recover_master_node(struct ubifs_info *c)
goto out_free;
}
memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
+
+ /*
+ * We had to recover the master node, which means there was an
+ * unclean reboot. However, it is possible that the master node
+ * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set.
+ * E.g., consider the following chain of events:
+ *
+ * 1. UBIFS was cleanly unmounted, so the master node is clean
+ * 2. UBIFS is being mounted R/W and starts changing the master
+ * node in the first (%UBIFS_MST_LNUM). A power cut happens,
+ * so this LEB ends up with some amount of garbage at the
+ * end.
+ * 3. UBIFS is being mounted R/O. We reach this place and
+ * recover the master node from the second LEB
+ * (%UBIFS_MST_LNUM + 1). But we cannot update the media
+ * because we are being mounted R/O. We have to defer the
+ * operation.
+ * 4. However, this master node (@c->mst_node) is marked as
+ * clean (since the step 1). And if we just return, the
+ * mount code will be confused and won't recover the master
+ * node when it is re-mounter R/W later.
+ *
+ * Thus, to force the recovery by marking the master node as
+ * dirty.
+ */
+ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
} else {
/* Write the recovered master node */
c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
@@ -298,12 +362,12 @@ out_err:
out_free:
ubifs_err("failed to recover master node");
if (mst1) {
- dbg_err("dumping first master node");
- dbg_dump_node(c, mst1);
+ ubifs_err("dumping first master node");
+ ubifs_dump_node(c, mst1);
}
if (mst2) {
- dbg_err("dumping second master node");
- dbg_dump_node(c, mst2);
+ ubifs_err("dumping second master node");
+ ubifs_dump_node(c, mst2);
}
vfree(buf2);
vfree(buf1);
@@ -342,44 +406,23 @@ int ubifs_write_rcvrd_mst_node(struct ubifs_info *c)
* @offs: offset to check
*
* This function returns %1 if @offs was in the last write to the LEB whose data
- * is in @buf, otherwise %0 is returned. The determination is made by checking
- * for subsequent empty space starting from the next min_io_size boundary (or a
- * bit less than the common header size if min_io_size is one).
+ * is in @buf, otherwise %0 is returned. The determination is made by checking
+ * for subsequent empty space starting from the next @c->max_write_size
+ * boundary.
*/
static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
{
- int empty_offs;
- int check_len;
+ int empty_offs, check_len;
uint8_t *p;
- if (c->min_io_size == 1) {
- check_len = c->leb_size - offs;
- p = buf + check_len;
- for (; check_len > 0; check_len--)
- if (*--p != 0xff)
- break;
- /*
- * 'check_len' is the size of the corruption which cannot be
- * more than the size of 1 node if it was caused by an unclean
- * unmount.
- */
- if (check_len > UBIFS_MAX_NODE_SZ)
- return 0;
- return 1;
- }
-
/*
- * Round up to the next c->min_io_size boundary i.e. 'offs' is in the
- * last wbuf written. After that should be empty space.
+ * Round up to the next @c->max_write_size boundary i.e. @offs is in
+ * the last wbuf written. After that should be empty space.
*/
- empty_offs = ALIGN(offs + 1, c->min_io_size);
+ empty_offs = ALIGN(offs + 1, c->max_write_size);
check_len = c->leb_size - empty_offs;
p = buf + empty_offs - offs;
-
- for (; check_len > 0; check_len--)
- if (*p++ != 0xff)
- return 0;
- return 1;
+ return is_empty(p, check_len);
}
/**
@@ -392,7 +435,7 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
*
* This function pads up to the next min_io_size boundary (if there is one) and
* sets empty space to all 0xff. @buf, @offs and @len are updated to the next
- * min_io_size boundary (if there is one).
+ * @c->min_io_size boundary.
*/
static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
int *offs, int *len)
@@ -402,11 +445,6 @@ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
lnum = lnum;
dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs);
- if (c->min_io_size == 1) {
- memset(*buf, 0xff, c->leb_size - *offs);
- return;
- }
-
ubifs_assert(!(*offs & 7));
empty_offs = ALIGN(*offs, c->min_io_size);
pad_len = empty_offs - *offs;
@@ -425,59 +463,35 @@ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
* @lnum: LEB number of the LEB from which @buf was read
* @offs: offset from which @buf was read
*
- * This function scans @buf for more nodes and returns %0 is a node is found and
- * %1 if no more nodes are found.
+ * This function ensures that the corrupted node at @offs is the last thing
+ * written to a LEB. This function returns %1 if more data is not found and
+ * %0 if more data is found.
*/
static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
int lnum, int offs)
{
- int skip, next_offs = 0;
+ struct ubifs_ch *ch = buf;
+ int skip, dlen = le32_to_cpu(ch->len);
- if (len > UBIFS_DATA_NODE_SZ) {
- struct ubifs_ch *ch = buf;
- int dlen = le32_to_cpu(ch->len);
-
- if (ch->node_type == UBIFS_DATA_NODE && dlen >= UBIFS_CH_SZ &&
- dlen <= UBIFS_MAX_DATA_NODE_SZ)
- /* The corrupt node looks like a data node */
- next_offs = ALIGN(offs + dlen, 8);
- }
-
- if (c->min_io_size == 1)
- skip = 8;
- else
- skip = ALIGN(offs + 1, c->min_io_size) - offs;
-
- offs += skip;
- buf += skip;
- len -= skip;
- while (len > 8) {
- struct ubifs_ch *ch = buf;
- uint32_t magic = le32_to_cpu(ch->magic);
- int ret;
-
- if (magic == UBIFS_NODE_MAGIC) {
- ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
- if (ret == SCANNED_A_NODE || ret > 0) {
- /*
- * There is a small chance this is just data in
- * a data node, so check that possibility. e.g.
- * this is part of a file that itself contains
- * a UBIFS image.
- */
- if (next_offs && offs + le32_to_cpu(ch->len) <=
- next_offs)
- continue;
- dbg_rcvry("unexpected node at %d:%d", lnum,
- offs);
- return 0;
- }
- }
- offs += 8;
- buf += 8;
- len -= 8;
+ /* Check for empty space after the corrupt node's common header */
+ skip = ALIGN(offs + UBIFS_CH_SZ, c->max_write_size) - offs;
+ if (is_empty(buf + skip, len - skip))
+ return 1;
+ /*
+ * The area after the common header size is not empty, so the common
+ * header must be intact. Check it.
+ */
+ if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) {
+ dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs);
+ return 0;
}
- return 1;
+ /* Now we know the corrupt node's length we can skip over it */
+ skip = ALIGN(offs + dlen, c->max_write_size) - offs;
+ /* After which there should be empty space */
+ if (is_empty(buf + skip, len - skip))
+ return 1;
+ dbg_rcvry("unexpected data at %d:%d", lnum, offs + skip);
+ return 0;
}
/**
@@ -500,7 +514,7 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
endpt = snod->offs + snod->len;
}
- if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) {
+ if (c->ro_mount && !c->remounting_rw) {
/* Add to recovery list */
struct ubifs_unclean_leb *ucleb;
@@ -526,8 +540,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
int len = ALIGN(endpt, c->min_io_size);
if (start) {
- err = ubi_read(c->ubi, lnum, sleb->buf, 0,
- start);
+ err = ubifs_leb_read(c, lnum, sleb->buf, 0,
+ start, 1);
if (err)
return err;
}
@@ -541,8 +555,7 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
ubifs_pad(c, buf, pad_len);
}
}
- err = ubi_leb_change(c->ubi, lnum, sleb->buf, len,
- UBI_UNKNOWN);
+ err = ubifs_leb_change(c, lnum, sleb->buf, len);
if (err)
return err;
}
@@ -551,16 +564,15 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
}
/**
- * drop_incomplete_group - drop nodes from an incomplete group.
+ * drop_last_group - drop the last group of nodes.
* @sleb: scanned LEB information
* @offs: offset of dropped nodes is returned here
*
- * This function returns %1 if nodes are dropped and %0 otherwise.
+ * This is a helper function for 'ubifs_recover_leb()' which drops the last
+ * group of nodes of the scanned LEB.
*/
-static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
+static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs)
{
- int dropped = 0;
-
while (!list_empty(&sleb->nodes)) {
struct ubifs_scan_node *snod;
struct ubifs_ch *ch;
@@ -569,15 +581,41 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
list);
ch = snod->node;
if (ch->group_type != UBIFS_IN_NODE_GROUP)
- return dropped;
- dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs);
+ break;
+
+ dbg_rcvry("dropping grouped node at %d:%d",
+ sleb->lnum, snod->offs);
+ *offs = snod->offs;
+ list_del(&snod->list);
+ kfree(snod);
+ sleb->nodes_cnt -= 1;
+ }
+}
+
+/**
+ * drop_last_node - drop the last node.
+ * @sleb: scanned LEB information
+ * @offs: offset of dropped nodes is returned here
+ * @grouped: non-zero if whole group of nodes have to be dropped
+ *
+ * This is a helper function for 'ubifs_recover_leb()' which drops the last
+ * node of the scanned LEB.
+ */
+static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs)
+{
+ struct ubifs_scan_node *snod;
+
+ if (!list_empty(&sleb->nodes)) {
+ snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
+ list);
+
+ dbg_rcvry("dropping last node at %d:%d",
+ sleb->lnum, snod->offs);
*offs = snod->offs;
list_del(&snod->list);
kfree(snod);
sleb->nodes_cnt -= 1;
- dropped = 1;
}
- return dropped;
}
/**
@@ -586,33 +624,30 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
* @lnum: LEB number
* @offs: offset
* @sbuf: LEB-sized buffer to use
- * @grouped: nodes may be grouped for recovery
+ * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not
+ * belong to any journal head)
*
* This function does a scan of a LEB, but caters for errors that might have
* been caused by the unclean unmount from which we are attempting to recover.
- *
- * This function returns %0 on success and a negative error code on failure.
+ * Returns %0 in case of success, %-EUCLEAN if an unrecoverable corruption is
+ * found, and a negative error code in case of failure.
*/
struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
- int offs, void *sbuf, int grouped)
+ int offs, void *sbuf, int jhead)
{
- int err, len = c->leb_size - offs, need_clean = 0, quiet = 1;
- int empty_chkd = 0, start = offs;
+ int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
+ int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped;
struct ubifs_scan_leb *sleb;
void *buf = sbuf + offs;
- dbg_rcvry("%d:%d", lnum, offs);
+ dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped);
sleb = ubifs_start_scan(c, lnum, offs, sbuf);
if (IS_ERR(sleb))
return sleb;
- if (sleb->ecc)
- need_clean = 1;
-
+ ubifs_assert(len >= 8);
while (len >= 8) {
- int ret;
-
dbg_scan("look at LEB %d:%d (%d bytes left)",
lnum, offs, len);
@@ -622,8 +657,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
* Scan quietly until there is an error from which we cannot
* recover
*/
- ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
-
+ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
if (ret == SCANNED_A_NODE) {
/* A valid node, and not a padding node */
struct ubifs_ch *ch = buf;
@@ -636,98 +670,127 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
offs += node_len;
buf += node_len;
len -= node_len;
- continue;
- }
-
- if (ret > 0) {
+ } else if (ret > 0) {
/* Padding bytes or a valid padding node */
offs += ret;
buf += ret;
len -= ret;
- continue;
- }
-
- if (ret == SCANNED_EMPTY_SPACE) {
- if (!is_empty(buf, len)) {
- if (!is_last_write(c, buf, offs))
- break;
- clean_buf(c, &buf, lnum, &offs, &len);
- need_clean = 1;
- }
- empty_chkd = 1;
+ } else if (ret == SCANNED_EMPTY_SPACE ||
+ ret == SCANNED_GARBAGE ||
+ ret == SCANNED_A_BAD_PAD_NODE ||
+ ret == SCANNED_A_CORRUPT_NODE) {
+ dbg_rcvry("found corruption (%d) at %d:%d",
+ ret, lnum, offs);
break;
+ } else {
+ ubifs_err("unexpected return value %d", ret);
+ err = -EINVAL;
+ goto error;
}
+ }
- if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
- if (is_last_write(c, buf, offs)) {
- clean_buf(c, &buf, lnum, &offs, &len);
- need_clean = 1;
- empty_chkd = 1;
- break;
- }
+ if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) {
+ if (!is_last_write(c, buf, offs))
+ goto corrupted_rescan;
+ } else if (ret == SCANNED_A_CORRUPT_NODE) {
+ if (!no_more_nodes(c, buf, len, lnum, offs))
+ goto corrupted_rescan;
+ } else if (!is_empty(buf, len)) {
+ if (!is_last_write(c, buf, offs)) {
+ int corruption = first_non_ff(buf, len);
- if (ret == SCANNED_A_CORRUPT_NODE)
- if (no_more_nodes(c, buf, len, lnum, offs)) {
- clean_buf(c, &buf, lnum, &offs, &len);
- need_clean = 1;
- empty_chkd = 1;
- break;
- }
-
- if (quiet) {
- /* Redo the last scan but noisily */
- quiet = 0;
- continue;
- }
-
- switch (ret) {
- case SCANNED_GARBAGE:
- dbg_err("garbage");
- goto corrupted;
- case SCANNED_A_CORRUPT_NODE:
- case SCANNED_A_BAD_PAD_NODE:
- dbg_err("bad node");
- goto corrupted;
- default:
- dbg_err("unknown");
+ /*
+ * See header comment for this file for more
+ * explanations about the reasons we have this check.
+ */
+ ubifs_err("corrupt empty space LEB %d:%d, corruption starts at %d",
+ lnum, offs, corruption);
+ /* Make sure we dump interesting non-0xFF data */
+ offs += corruption;
+ buf += corruption;
goto corrupted;
}
}
- if (!empty_chkd && !is_empty(buf, len)) {
- if (is_last_write(c, buf, offs)) {
- clean_buf(c, &buf, lnum, &offs, &len);
- need_clean = 1;
- } else {
- ubifs_err("corrupt empty space at LEB %d:%d",
- lnum, offs);
- goto corrupted;
- }
- }
+ min_io_unit = round_down(offs, c->min_io_size);
+ if (grouped)
+ /*
+ * If nodes are grouped, always drop the incomplete group at
+ * the end.
+ */
+ drop_last_group(sleb, &offs);
- /* Drop nodes from incomplete group */
- if (grouped && drop_incomplete_group(sleb, &offs)) {
- buf = sbuf + offs;
- len = c->leb_size - offs;
- clean_buf(c, &buf, lnum, &offs, &len);
- need_clean = 1;
+ if (jhead == GCHD) {
+ /*
+ * If this LEB belongs to the GC head then while we are in the
+ * middle of the same min. I/O unit keep dropping nodes. So
+ * basically, what we want is to make sure that the last min.
+ * I/O unit where we saw the corruption is dropped completely
+ * with all the uncorrupted nodes which may possibly sit there.
+ *
+ * In other words, let's name the min. I/O unit where the
+ * corruption starts B, and the previous min. I/O unit A. The
+ * below code tries to deal with a situation when half of B
+ * contains valid nodes or the end of a valid node, and the
+ * second half of B contains corrupted data or garbage. This
+ * means that UBIFS had been writing to B just before the power
+ * cut happened. I do not know how realistic is this scenario
+ * that half of the min. I/O unit had been written successfully
+ * and the other half not, but this is possible in our 'failure
+ * mode emulation' infrastructure at least.
+ *
+ * So what is the problem, why we need to drop those nodes? Why
+ * can't we just clean-up the second half of B by putting a
+ * padding node there? We can, and this works fine with one
+ * exception which was reproduced with power cut emulation
+ * testing and happens extremely rarely.
+ *
+ * Imagine the file-system is full, we run GC which starts
+ * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is
+ * the current GC head LEB). The @c->gc_lnum is -1, which means
+ * that GC will retain LEB X and will try to continue. Imagine
+ * that LEB X is currently the dirtiest LEB, and the amount of
+ * used space in LEB Y is exactly the same as amount of free
+ * space in LEB X.
+ *
+ * And a power cut happens when nodes are moved from LEB X to
+ * LEB Y. We are here trying to recover LEB Y which is the GC
+ * head LEB. We find the min. I/O unit B as described above.
+ * Then we clean-up LEB Y by padding min. I/O unit. And later
+ * 'ubifs_rcvry_gc_commit()' function fails, because it cannot
+ * find a dirty LEB which could be GC'd into LEB Y! Even LEB X
+ * does not match because the amount of valid nodes there does
+ * not fit the free space in LEB Y any more! And this is
+ * because of the padding node which we added to LEB Y. The
+ * user-visible effect of this which I once observed and
+ * analysed is that we cannot mount the file-system with
+ * -ENOSPC error.
+ *
+ * So obviously, to make sure that situation does not happen we
+ * should free min. I/O unit B in LEB Y completely and the last
+ * used min. I/O unit in LEB Y should be A. This is basically
+ * what the below code tries to do.
+ */
+ while (offs > min_io_unit)
+ drop_last_node(sleb, &offs);
}
- if (offs % c->min_io_size) {
- clean_buf(c, &buf, lnum, &offs, &len);
- need_clean = 1;
- }
+ buf = sbuf + offs;
+ len = c->leb_size - offs;
+ clean_buf(c, &buf, lnum, &offs, &len);
ubifs_end_scan(c, sleb, lnum, offs);
- if (need_clean) {
- err = fix_unclean_leb(c, sleb, start);
- if (err)
- goto error;
- }
+ err = fix_unclean_leb(c, sleb, start);
+ if (err)
+ goto error;
return sleb;
+corrupted_rescan:
+ /* Re-scan the corrupted data with verbose messages */
+ ubifs_err("corruption %d", ret);
+ ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
corrupted:
ubifs_scanned_corruption(c, lnum, offs, buf);
err = -EUCLEAN;
@@ -758,22 +821,23 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
return -ENOMEM;
if (c->leb_size - offs < UBIFS_CS_NODE_SZ)
goto out_err;
- err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ);
+ err = ubifs_leb_read(c, lnum, (void *)cs_node, offs,
+ UBIFS_CS_NODE_SZ, 0);
if (err && err != -EBADMSG)
goto out_free;
ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
if (ret != SCANNED_A_NODE) {
- dbg_err("Not a valid node");
+ ubifs_err("Not a valid node");
goto out_err;
}
if (cs_node->ch.node_type != UBIFS_CS_NODE) {
- dbg_err("Node a CS node, type is %d", cs_node->ch.node_type);
+ ubifs_err("Node a CS node, type is %d", cs_node->ch.node_type);
goto out_err;
}
if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) {
- dbg_err("CS node cmt_no %llu != current cmt_no %llu",
- (unsigned long long)le64_to_cpu(cs_node->cmt_no),
- c->cmt_no);
+ ubifs_err("CS node cmt_no %llu != current cmt_no %llu",
+ (unsigned long long)le64_to_cpu(cs_node->cmt_no),
+ c->cmt_no);
goto out_err;
}
*cs_sqnum = le64_to_cpu(cs_node->ch.sqnum);
@@ -797,7 +861,8 @@ out_free:
* @sbuf: LEB-sized buffer to use
*
* This function does a scan of a LEB, but caters for errors that might have
- * been caused by the unclean unmount from which we are attempting to recover.
+ * been caused by unclean reboots from which we are attempting to recover
+ * (assume that only the last log LEB can be corrupted by an unclean reboot).
*
* This function returns %0 on success and a negative error code on failure.
*/
@@ -816,7 +881,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
* We can only recover at the end of the log, so check that the
* next log LEB is empty or out of date.
*/
- sleb = ubifs_scan(c, next_lnum, 0, sbuf);
+ sleb = ubifs_scan(c, next_lnum, 0, sbuf, 0);
if (IS_ERR(sleb))
return sleb;
if (sleb->nodes_cnt) {
@@ -835,15 +900,15 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
}
}
if (snod->sqnum > cs_sqnum) {
- ubifs_err("unrecoverable log corruption "
- "in LEB %d", lnum);
+ ubifs_err("unrecoverable log corruption in LEB %d",
+ lnum);
ubifs_scan_destroy(sleb);
return ERR_PTR(-EUCLEAN);
}
}
ubifs_scan_destroy(sleb);
}
- return ubifs_recover_leb(c, lnum, offs, sbuf, 0);
+ return ubifs_recover_leb(c, lnum, offs, sbuf, -1);
}
/**
@@ -857,15 +922,10 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
*
* This function returns %0 on success and a negative error code on failure.
*/
-static int recover_head(const struct ubifs_info *c, int lnum, int offs,
- void *sbuf)
+static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf)
{
- int len, err, need_clean = 0;
+ int len = c->max_write_size, err;
- if (c->min_io_size > 1)
- len = c->min_io_size;
- else
- len = 512;
if (offs + len > c->leb_size)
len = c->leb_size - offs;
@@ -873,27 +933,15 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs,
return 0;
/* Read at the head location and check it is empty flash */
- err = ubi_read(c->ubi, lnum, sbuf, offs, len);
- if (err)
- need_clean = 1;
- else {
- uint8_t *p = sbuf;
-
- while (len--)
- if (*p++ != 0xff) {
- need_clean = 1;
- break;
- }
- }
-
- if (need_clean) {
+ err = ubifs_leb_read(c, lnum, sbuf, offs, len, 1);
+ if (err || !is_empty(sbuf, len)) {
dbg_rcvry("cleaning head at %d:%d", lnum, offs);
if (offs == 0)
return ubifs_leb_unmap(c, lnum);
- err = ubi_read(c->ubi, lnum, sbuf, 0, offs);
+ err = ubifs_leb_read(c, lnum, sbuf, 0, offs, 1);
if (err)
return err;
- return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN);
+ return ubifs_leb_change(c, lnum, sbuf, offs);
}
return 0;
@@ -916,11 +964,11 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs,
*
* This function returns %0 on success and a negative error code on failure.
*/
-int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf)
+int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf)
{
int err;
- ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw);
+ ubifs_assert(!c->ro_mount || c->remounting_rw);
dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs);
err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf);
@@ -936,7 +984,7 @@ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf)
}
/**
- * clean_an_unclean_leb - read and write a LEB to remove corruption.
+ * clean_an_unclean_leb - read and write a LEB to remove corruption.
* @c: UBIFS file-system description object
* @ucleb: unclean LEB information
* @sbuf: LEB-sized buffer to use
@@ -947,7 +995,7 @@ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf)
*
* This function returns %0 on success and a negative error code on failure.
*/
-static int clean_an_unclean_leb(const struct ubifs_info *c,
+static int clean_an_unclean_leb(struct ubifs_info *c,
struct ubifs_unclean_leb *ucleb, void *sbuf)
{
int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1;
@@ -963,7 +1011,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c,
return 0;
}
- err = ubi_read(c->ubi, lnum, buf, offs, len);
+ err = ubifs_leb_read(c, lnum, buf, offs, len, 0);
if (err && err != -EBADMSG)
return err;
@@ -1023,7 +1071,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c,
}
/* Write back the LEB atomically */
- err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN);
+ err = ubifs_leb_change(c, lnum, sbuf, len);
if (err)
return err;
@@ -1043,7 +1091,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c,
*
* This function returns %0 on success and a negative error code on failure.
*/
-int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
+int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf)
{
dbg_rcvry("recovery");
while (!list_empty(&c->unclean_leb_list)) {
@@ -1062,6 +1110,53 @@ int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
}
/**
+ * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit.
+ * @c: UBIFS file-system description object
+ *
+ * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty
+ * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns
+ * zero in case of success and a negative error code in case of failure.
+ */
+static int grab_empty_leb(struct ubifs_info *c)
+{
+ int lnum, err;
+
+ /*
+ * Note, it is very important to first search for an empty LEB and then
+ * run the commit, not vice-versa. The reason is that there might be
+ * only one empty LEB at the moment, the one which has been the
+ * @c->gc_lnum just before the power cut happened. During the regular
+ * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no
+ * one but GC can grab it. But at this moment this single empty LEB is
+ * not marked as taken, so if we run commit - what happens? Right, the
+ * commit will grab it and write the index there. Remember that the
+ * index always expands as long as there is free space, and it only
+ * starts consolidating when we run out of space.
+ *
+ * IOW, if we run commit now, we might not be able to find a free LEB
+ * after this.
+ */
+ lnum = ubifs_find_free_leb_for_idx(c);
+ if (lnum < 0) {
+ ubifs_err("could not find an empty LEB");
+ ubifs_dump_lprops(c);
+ ubifs_dump_budg(c, &c->bi);
+ return lnum;
+ }
+
+ /* Reset the index flag */
+ err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
+ LPROPS_INDEX, 0);
+ if (err)
+ return err;
+
+ c->gc_lnum = lnum;
+ dbg_rcvry("found empty LEB %d, run commit", lnum);
+
+ return ubifs_run_commit(c);
+}
+
+/**
* ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
* @c: UBIFS file-system description object
*
@@ -1083,58 +1178,26 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
{
struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
struct ubifs_lprops lp;
- int lnum, err;
+ int err;
+
+ dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs);
c->gc_lnum = -1;
- if (wbuf->lnum == -1) {
- dbg_rcvry("no GC head LEB");
- goto find_free;
- }
- /*
- * See whether the used space in the dirtiest LEB fits in the GC head
- * LEB.
- */
- if (wbuf->offs == c->leb_size) {
- dbg_rcvry("no room in GC head LEB");
- goto find_free;
- }
+ if (wbuf->lnum == -1 || wbuf->offs == c->leb_size)
+ return grab_empty_leb(c);
+
err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
if (err) {
- if (err == -ENOSPC)
- dbg_err("could not find a dirty LEB");
- return err;
- }
- ubifs_assert(!(lp.flags & LPROPS_INDEX));
- lnum = lp.lnum;
- if (lp.free + lp.dirty == c->leb_size) {
- /* An empty LEB was returned */
- if (lp.free != c->leb_size) {
- err = ubifs_change_one_lp(c, lnum, c->leb_size,
- 0, 0, 0, 0);
- if (err)
- return err;
- }
- err = ubifs_leb_unmap(c, lnum);
- if (err)
- return err;
- c->gc_lnum = lnum;
- dbg_rcvry("allocated LEB %d for GC", lnum);
- /* Run the commit */
- dbg_rcvry("committing");
- return ubifs_run_commit(c);
- }
- /*
- * There was no empty LEB so the used space in the dirtiest LEB must fit
- * in the GC head LEB.
- */
- if (lp.free + lp.dirty < wbuf->offs) {
- dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d",
- lnum, wbuf->lnum, wbuf->offs);
- err = ubifs_return_leb(c, lnum);
- if (err)
+ if (err != -ENOSPC)
return err;
- goto find_free;
+
+ dbg_rcvry("could not find a dirty LEB");
+ return grab_empty_leb(c);
}
+
+ ubifs_assert(!(lp.flags & LPROPS_INDEX));
+ ubifs_assert(lp.free + lp.dirty >= wbuf->offs);
+
/*
* We run the commit before garbage collection otherwise subsequent
* mounts will see the GC and orphan deletion in a different order.
@@ -1143,11 +1206,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
err = ubifs_run_commit(c);
if (err)
return err;
- /*
- * The data in the dirtiest LEB fits in the GC head LEB, so do the GC
- * - use locking to keep 'ubifs_assert()' happy.
- */
- dbg_rcvry("GC'ing LEB %d", lnum);
+
+ dbg_rcvry("GC'ing LEB %d", lp.lnum);
mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
err = ubifs_garbage_collect_leb(c, &lp);
if (err >= 0) {
@@ -1158,42 +1218,22 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
}
mutex_unlock(&wbuf->io_mutex);
if (err < 0) {
- dbg_err("GC failed, error %d", err);
+ ubifs_err("GC failed, error %d", err);
if (err == -EAGAIN)
err = -EINVAL;
return err;
}
- if (err != LEB_RETAINED) {
- dbg_err("GC returned %d", err);
+
+ ubifs_assert(err == LEB_RETAINED);
+ if (err != LEB_RETAINED)
return -EINVAL;
- }
+
err = ubifs_leb_unmap(c, c->gc_lnum);
if (err)
return err;
- dbg_rcvry("allocated LEB %d for GC", lnum);
- return 0;
-find_free:
- /*
- * There is no GC head LEB or the free space in the GC head LEB is too
- * small. Allocate gc_lnum by calling 'ubifs_find_free_leb_for_idx()' so
- * GC is not run.
- */
- lnum = ubifs_find_free_leb_for_idx(c);
- if (lnum < 0) {
- dbg_err("could not find an empty LEB");
- return lnum;
- }
- /* And reset the index flag */
- err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
- LPROPS_INDEX, 0);
- if (err)
- return err;
- c->gc_lnum = lnum;
- dbg_rcvry("allocated LEB %d for GC", lnum);
- /* Run the commit */
- dbg_rcvry("committing");
- return ubifs_run_commit(c);
+ dbg_rcvry("allocated LEB %d for GC", lp.lnum);
+ return 0;
}
/**
@@ -1295,29 +1335,14 @@ static void remove_ino(struct ubifs_info *c, ino_t inum)
*/
void ubifs_destroy_size_tree(struct ubifs_info *c)
{
- struct rb_node *this = c->size_tree.rb_node;
- struct size_entry *e;
+ struct size_entry *e, *n;
- while (this) {
- if (this->rb_left) {
- this = this->rb_left;
- continue;
- } else if (this->rb_right) {
- this = this->rb_right;
- continue;
- }
- e = rb_entry(this, struct size_entry, rb);
+ rbtree_postorder_for_each_entry_safe(e, n, &c->size_tree, rb) {
if (e->inode)
iput(e->inode);
- this = rb_parent(this);
- if (this) {
- if (this->rb_left == &e->rb)
- this->rb_left = NULL;
- else
- this->rb_right = NULL;
- }
kfree(e);
}
+
c->size_tree = RB_ROOT;
}
@@ -1416,7 +1441,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
if (i_size >= e->d_size)
return 0;
/* Read the LEB */
- err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size);
+ err = ubifs_leb_read(c, lnum, c->sbuf, 0, c->leb_size, 1);
if (err)
goto out;
/* Change the size field and recalculate the CRC */
@@ -1432,16 +1457,16 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
len -= 1;
len = ALIGN(len + 1, c->min_io_size);
/* Atomically write the fixed LEB back again */
- err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
+ err = ubifs_leb_change(c, lnum, c->sbuf, len);
if (err)
goto out;
- dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", e->inum, lnum, offs,
- i_size, e->d_size);
+ dbg_rcvry("inode %lu at %d:%d size %lld -> %lld",
+ (unsigned long)e->inum, lnum, offs, i_size, e->d_size);
return 0;
out:
ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d",
- e->inum, e->i_size, e->d_size, err);
+ (unsigned long)e->inum, e->i_size, e->d_size, err);
return err;
}
@@ -1472,7 +1497,8 @@ int ubifs_recover_size(struct ubifs_info *c)
return err;
if (err == -ENOENT) {
/* Remove data nodes that have no inode */
- dbg_rcvry("removing ino %lu", e->inum);
+ dbg_rcvry("removing ino %lu",
+ (unsigned long)e->inum);
err = ubifs_tnc_remove_ino(c, e->inum);
if (err)
return err;
@@ -1483,20 +1509,27 @@ int ubifs_recover_size(struct ubifs_info *c)
e->i_size = le64_to_cpu(ino->size);
}
}
+
if (e->exists && e->i_size < e->d_size) {
- if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) {
+ if (c->ro_mount) {
/* Fix the inode size and pin it in memory */
struct inode *inode;
+ struct ubifs_inode *ui;
+
+ ubifs_assert(!e->inode);
inode = ubifs_iget(c->vfs_sb, e->inum);
if (IS_ERR(inode))
return PTR_ERR(inode);
+
+ ui = ubifs_inode(inode);
if (inode->i_size < e->d_size) {
dbg_rcvry("ino %lu size %lld -> %lld",
- e->inum, e->d_size,
- inode->i_size);
+ (unsigned long)e->inum,
+ inode->i_size, e->d_size);
inode->i_size = e->d_size;
- ubifs_inode(inode)->ui_size = e->d_size;
+ ui->ui_size = e->d_size;
+ ui->synced_i_size = e->d_size;
e->inode = inode;
this = rb_next(this);
continue;
@@ -1511,9 +1544,11 @@ int ubifs_recover_size(struct ubifs_info *c)
iput(e->inode);
}
}
+
this = rb_next(this);
rb_erase(&e->rb, &c->size_tree);
kfree(e);
}
+
return 0;
}
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 7399692af85..3187925e987 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -33,43 +33,32 @@
*/
#include "ubifs.h"
-
-/*
- * Replay flags.
- *
- * REPLAY_DELETION: node was deleted
- * REPLAY_REF: node is a reference node
- */
-enum {
- REPLAY_DELETION = 1,
- REPLAY_REF = 2,
-};
+#include <linux/list_sort.h>
/**
- * struct replay_entry - replay tree entry.
+ * struct replay_entry - replay list entry.
* @lnum: logical eraseblock number of the node
* @offs: node offset
* @len: node length
+ * @deletion: non-zero if this entry corresponds to a node deletion
* @sqnum: node sequence number
- * @flags: replay flags
- * @rb: links the replay tree
+ * @list: links the replay list
* @key: node key
* @nm: directory entry name
* @old_size: truncation old size
* @new_size: truncation new size
- * @free: amount of free space in a bud
- * @dirty: amount of dirty space in a bud from padding and deletion nodes
*
- * UBIFS journal replay must compare node sequence numbers, which means it must
- * build a tree of node information to insert into the TNC.
+ * The replay process first scans all buds and builds the replay list, then
+ * sorts the replay list in nodes sequence number order, and then inserts all
+ * the replay entries to the TNC.
*/
struct replay_entry {
int lnum;
int offs;
int len;
+ unsigned int deletion:1;
unsigned long long sqnum;
- int flags;
- struct rb_node rb;
+ struct list_head list;
union ubifs_key key;
union {
struct qstr nm;
@@ -77,10 +66,6 @@ struct replay_entry {
loff_t old_size;
loff_t new_size;
};
- struct {
- int free;
- int dirty;
- };
};
};
@@ -88,83 +73,116 @@ struct replay_entry {
* struct bud_entry - entry in the list of buds to replay.
* @list: next bud in the list
* @bud: bud description object
- * @free: free bytes in the bud
* @sqnum: reference node sequence number
+ * @free: free bytes in the bud
+ * @dirty: dirty bytes in the bud
*/
struct bud_entry {
struct list_head list;
struct ubifs_bud *bud;
- int free;
unsigned long long sqnum;
+ int free;
+ int dirty;
};
/**
* set_bud_lprops - set free and dirty space used by a bud.
* @c: UBIFS file-system description object
- * @r: replay entry of bud
+ * @b: bud entry which describes the bud
+ *
+ * This function makes sure the LEB properties of bud @b are set correctly
+ * after the replay. Returns zero in case of success and a negative error code
+ * in case of failure.
*/
-static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
+static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
{
const struct ubifs_lprops *lp;
int err = 0, dirty;
ubifs_get_lprops(c);
- lp = ubifs_lpt_lookup_dirty(c, r->lnum);
+ lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum);
if (IS_ERR(lp)) {
err = PTR_ERR(lp);
goto out;
}
dirty = lp->dirty;
- if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
+ if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
/*
* The LEB was added to the journal with a starting offset of
* zero which means the LEB must have been empty. The LEB
- * property values should be lp->free == c->leb_size and
- * lp->dirty == 0, but that is not the case. The reason is that
- * the LEB was garbage collected. The garbage collector resets
- * the free and dirty space without recording it anywhere except
- * lprops, so if there is not a commit then lprops does not have
- * that information next time the file system is mounted.
+ * property values should be @lp->free == @c->leb_size and
+ * @lp->dirty == 0, but that is not the case. The reason is that
+ * the LEB had been garbage collected before it became the bud,
+ * and there was not commit inbetween. The garbage collector
+ * resets the free and dirty space without recording it
+ * anywhere except lprops, so if there was no commit then
+ * lprops does not have that information.
*
* We do not need to adjust free space because the scan has told
* us the exact value which is recorded in the replay entry as
- * r->free.
+ * @b->free.
*
* However we do need to subtract from the dirty space the
* amount of space that the garbage collector reclaimed, which
* is the whole LEB minus the amount of space that was free.
*/
- dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
+ dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
lp->free, lp->dirty);
- dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
+ dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
lp->free, lp->dirty);
dirty -= c->leb_size - lp->free;
/*
* If the replay order was perfect the dirty space would now be
- * zero. The order is not perfect because the the journal heads
- * race with eachother. This is not a problem but is does mean
+ * zero. The order is not perfect because the journal heads
+ * race with each other. This is not a problem but is does mean
* that the dirty space may temporarily exceed c->leb_size
* during the replay.
*/
if (dirty != 0)
- dbg_msg("LEB %d lp: %d free %d dirty "
- "replay: %d free %d dirty", r->lnum, lp->free,
- lp->dirty, r->free, r->dirty);
+ dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty",
+ b->bud->lnum, lp->free, lp->dirty, b->free,
+ b->dirty);
}
- lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty,
+ lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty,
lp->flags | LPROPS_TAKEN, 0);
if (IS_ERR(lp)) {
err = PTR_ERR(lp);
goto out;
}
+
+ /* Make sure the journal head points to the latest bud */
+ err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf,
+ b->bud->lnum, c->leb_size - b->free);
+
out:
ubifs_release_lprops(c);
return err;
}
/**
+ * set_buds_lprops - set free and dirty space for all replayed buds.
+ * @c: UBIFS file-system description object
+ *
+ * This function sets LEB properties for all replayed buds. Returns zero in
+ * case of success and a negative error code in case of failure.
+ */
+static int set_buds_lprops(struct ubifs_info *c)
+{
+ struct bud_entry *b;
+ int err;
+
+ list_for_each_entry(b, &c->replay_buds, list) {
+ err = set_bud_lprops(c, b);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/**
* trun_remove_range - apply a replay entry for a truncation to the TNC.
* @c: UBIFS file-system description object
* @r: replay entry of truncation
@@ -200,24 +218,22 @@ static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
*/
static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
{
- int err, deletion = ((r->flags & REPLAY_DELETION) != 0);
+ int err;
- dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum,
- r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key));
+ dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ",
+ r->lnum, r->offs, r->len, r->deletion, r->sqnum);
/* Set c->replay_sqnum to help deal with dangling branches. */
c->replay_sqnum = r->sqnum;
- if (r->flags & REPLAY_REF)
- err = set_bud_lprops(c, r);
- else if (is_hash_key(c, &r->key)) {
- if (deletion)
+ if (is_hash_key(c, &r->key)) {
+ if (r->deletion)
err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
else
err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
r->len, &r->nm);
} else {
- if (deletion)
+ if (r->deletion)
switch (key_type(c, &r->key)) {
case UBIFS_INO_KEY:
{
@@ -240,7 +256,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
return err;
if (c->need_recovery)
- err = ubifs_recover_size_accum(c, &r->key, deletion,
+ err = ubifs_recover_size_accum(c, &r->key, r->deletion,
r->new_size);
}
@@ -248,68 +264,77 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
}
/**
- * destroy_replay_tree - destroy the replay.
- * @c: UBIFS file-system description object
+ * replay_entries_cmp - compare 2 replay entries.
+ * @priv: UBIFS file-system description object
+ * @a: first replay entry
+ * @a: second replay entry
*
- * Destroy the replay tree.
+ * This is a comparios function for 'list_sort()' which compares 2 replay
+ * entries @a and @b by comparing their sequence numer. Returns %1 if @a has
+ * greater sequence number and %-1 otherwise.
*/
-static void destroy_replay_tree(struct ubifs_info *c)
+static int replay_entries_cmp(void *priv, struct list_head *a,
+ struct list_head *b)
{
- struct rb_node *this = c->replay_tree.rb_node;
- struct replay_entry *r;
-
- while (this) {
- if (this->rb_left) {
- this = this->rb_left;
- continue;
- } else if (this->rb_right) {
- this = this->rb_right;
- continue;
- }
- r = rb_entry(this, struct replay_entry, rb);
- this = rb_parent(this);
- if (this) {
- if (this->rb_left == &r->rb)
- this->rb_left = NULL;
- else
- this->rb_right = NULL;
- }
- if (is_hash_key(c, &r->key))
- kfree(r->nm.name);
- kfree(r);
- }
- c->replay_tree = RB_ROOT;
+ struct replay_entry *ra, *rb;
+
+ cond_resched();
+ if (a == b)
+ return 0;
+
+ ra = list_entry(a, struct replay_entry, list);
+ rb = list_entry(b, struct replay_entry, list);
+ ubifs_assert(ra->sqnum != rb->sqnum);
+ if (ra->sqnum > rb->sqnum)
+ return 1;
+ return -1;
}
/**
- * apply_replay_tree - apply the replay tree to the TNC.
+ * apply_replay_list - apply the replay list to the TNC.
* @c: UBIFS file-system description object
*
- * Apply the replay tree.
- * Returns zero in case of success and a negative error code in case of
- * failure.
+ * Apply all entries in the replay list to the TNC. Returns zero in case of
+ * success and a negative error code in case of failure.
*/
-static int apply_replay_tree(struct ubifs_info *c)
+static int apply_replay_list(struct ubifs_info *c)
{
- struct rb_node *this = rb_first(&c->replay_tree);
+ struct replay_entry *r;
+ int err;
- while (this) {
- struct replay_entry *r;
- int err;
+ list_sort(c, &c->replay_list, &replay_entries_cmp);
+ list_for_each_entry(r, &c->replay_list, list) {
cond_resched();
- r = rb_entry(this, struct replay_entry, rb);
err = apply_replay_entry(c, r);
if (err)
return err;
- this = rb_next(this);
}
+
return 0;
}
/**
- * insert_node - insert a node to the replay tree.
+ * destroy_replay_list - destroy the replay.
+ * @c: UBIFS file-system description object
+ *
+ * Destroy the replay list.
+ */
+static void destroy_replay_list(struct ubifs_info *c)
+{
+ struct replay_entry *r, *tmp;
+
+ list_for_each_entry_safe(r, tmp, &c->replay_list, list) {
+ if (is_hash_key(c, &r->key))
+ kfree(r->nm.name);
+ list_del(&r->list);
+ kfree(r);
+ }
+}
+
+/**
+ * insert_node - insert a node to the replay list
* @c: UBIFS file-system description object
* @lnum: node logical eraseblock number
* @offs: node offset
@@ -321,39 +346,25 @@ static int apply_replay_tree(struct ubifs_info *c)
* @old_size: truncation old size
* @new_size: truncation new size
*
- * This function inserts a scanned non-direntry node to the replay tree. The
- * replay tree is an RB-tree containing @struct replay_entry elements which are
- * indexed by the sequence number. The replay tree is applied at the very end
- * of the replay process. Since the tree is sorted in sequence number order,
- * the older modifications are applied first. This function returns zero in
- * case of success and a negative error code in case of failure.
+ * This function inserts a scanned non-direntry node to the replay list. The
+ * replay list contains @struct replay_entry elements, and we sort this list in
+ * sequence number order before applying it. The replay list is applied at the
+ * very end of the replay process. Since the list is sorted in sequence number
+ * order, the older modifications are applied first. This function returns zero
+ * in case of success and a negative error code in case of failure.
*/
static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
union ubifs_key *key, unsigned long long sqnum,
int deletion, int *used, loff_t old_size,
loff_t new_size)
{
- struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
struct replay_entry *r;
+ dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs);
+
if (key_inum(c, key) >= c->highest_inum)
c->highest_inum = key_inum(c, key);
- dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
- while (*p) {
- parent = *p;
- r = rb_entry(parent, struct replay_entry, rb);
- if (sqnum < r->sqnum) {
- p = &(*p)->rb_left;
- continue;
- } else if (sqnum > r->sqnum) {
- p = &(*p)->rb_right;
- continue;
- }
- ubifs_err("duplicate sqnum in replay");
- return -EINVAL;
- }
-
r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
if (!r)
return -ENOMEM;
@@ -363,19 +374,18 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
r->lnum = lnum;
r->offs = offs;
r->len = len;
+ r->deletion = !!deletion;
r->sqnum = sqnum;
- r->flags = (deletion ? REPLAY_DELETION : 0);
+ key_copy(c, key, &r->key);
r->old_size = old_size;
r->new_size = new_size;
- key_copy(c, key, &r->key);
- rb_link_node(&r->rb, parent, p);
- rb_insert_color(&r->rb, &c->replay_tree);
+ list_add_tail(&r->list, &c->replay_list);
return 0;
}
/**
- * insert_dent - insert a directory entry node into the replay tree.
+ * insert_dent - insert a directory entry node into the replay list.
* @c: UBIFS file-system description object
* @lnum: node logical eraseblock number
* @offs: node offset
@@ -387,43 +397,25 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
* @deletion: non-zero if this is a deletion
* @used: number of bytes in use in a LEB
*
- * This function inserts a scanned directory entry node to the replay tree.
- * Returns zero in case of success and a negative error code in case of
- * failure.
- *
- * This function is also used for extended attribute entries because they are
- * implemented as directory entry nodes.
+ * This function inserts a scanned directory entry node or an extended
+ * attribute entry to the replay list. Returns zero in case of success and a
+ * negative error code in case of failure.
*/
static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
union ubifs_key *key, const char *name, int nlen,
unsigned long long sqnum, int deletion, int *used)
{
- struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
struct replay_entry *r;
char *nbuf;
+ dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs);
if (key_inum(c, key) >= c->highest_inum)
c->highest_inum = key_inum(c, key);
- dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
- while (*p) {
- parent = *p;
- r = rb_entry(parent, struct replay_entry, rb);
- if (sqnum < r->sqnum) {
- p = &(*p)->rb_left;
- continue;
- }
- if (sqnum > r->sqnum) {
- p = &(*p)->rb_right;
- continue;
- }
- ubifs_err("duplicate sqnum in replay");
- return -EINVAL;
- }
-
r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
if (!r)
return -ENOMEM;
+
nbuf = kmalloc(nlen + 1, GFP_KERNEL);
if (!nbuf) {
kfree(r);
@@ -435,17 +427,15 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
r->lnum = lnum;
r->offs = offs;
r->len = len;
+ r->deletion = !!deletion;
r->sqnum = sqnum;
+ key_copy(c, key, &r->key);
r->nm.len = nlen;
memcpy(nbuf, name, nlen);
nbuf[nlen] = '\0';
r->nm.name = nbuf;
- r->flags = (deletion ? REPLAY_DELETION : 0);
- key_copy(c, key, &r->key);
- ubifs_assert(!*p);
- rb_link_node(&r->rb, parent, p);
- rb_insert_color(&r->rb, &c->replay_tree);
+ list_add_tail(&r->list, &c->replay_list);
return 0;
}
@@ -482,31 +472,92 @@ int ubifs_validate_entry(struct ubifs_info *c,
}
/**
+ * is_last_bud - check if the bud is the last in the journal head.
+ * @c: UBIFS file-system description object
+ * @bud: bud description object
+ *
+ * This function checks if bud @bud is the last bud in its journal head. This
+ * information is then used by 'replay_bud()' to decide whether the bud can
+ * have corruptions or not. Indeed, only last buds can be corrupted by power
+ * cuts. Returns %1 if this is the last bud, and %0 if not.
+ */
+static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
+{
+ struct ubifs_jhead *jh = &c->jheads[bud->jhead];
+ struct ubifs_bud *next;
+ uint32_t data;
+ int err;
+
+ if (list_is_last(&bud->list, &jh->buds_list))
+ return 1;
+
+ /*
+ * The following is a quirk to make sure we work correctly with UBIFS
+ * images used with older UBIFS.
+ *
+ * Normally, the last bud will be the last in the journal head's list
+ * of bud. However, there is one exception if the UBIFS image belongs
+ * to older UBIFS. This is fairly unlikely: one would need to use old
+ * UBIFS, then have a power cut exactly at the right point, and then
+ * try to mount this image with new UBIFS.
+ *
+ * The exception is: it is possible to have 2 buds A and B, A goes
+ * before B, and B is the last, bud B is contains no data, and bud A is
+ * corrupted at the end. The reason is that in older versions when the
+ * journal code switched the next bud (from A to B), it first added a
+ * log reference node for the new bud (B), and only after this it
+ * synchronized the write-buffer of current bud (A). But later this was
+ * changed and UBIFS started to always synchronize the write-buffer of
+ * the bud (A) before writing the log reference for the new bud (B).
+ *
+ * But because older UBIFS always synchronized A's write-buffer before
+ * writing to B, we can recognize this exceptional situation but
+ * checking the contents of bud B - if it is empty, then A can be
+ * treated as the last and we can recover it.
+ *
+ * TODO: remove this piece of code in a couple of years (today it is
+ * 16.05.2011).
+ */
+ next = list_entry(bud->list.next, struct ubifs_bud, list);
+ if (!list_is_last(&next->list, &jh->buds_list))
+ return 0;
+
+ err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1);
+ if (err)
+ return 0;
+
+ return data == 0xFFFFFFFF;
+}
+
+/**
* replay_bud - replay a bud logical eraseblock.
* @c: UBIFS file-system description object
- * @lnum: bud logical eraseblock number to replay
- * @offs: bud start offset
- * @jhead: journal head to which this bud belongs
- * @free: amount of free space in the bud is returned here
- * @dirty: amount of dirty space from padding and deletion nodes is returned
- * here
+ * @b: bud entry which describes the bud
*
- * This function returns zero in case of success and a negative error code in
- * case of failure.
+ * This function replays bud @bud, recovers it if needed, and adds all nodes
+ * from this bud to the replay list. Returns zero in case of success and a
+ * negative error code in case of failure.
*/
-static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
- int *free, int *dirty)
+static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
{
- int err = 0, used = 0;
+ int is_last = is_last_bud(c, b->bud);
+ int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
struct ubifs_scan_leb *sleb;
struct ubifs_scan_node *snod;
- struct ubifs_bud *bud;
- dbg_mnt("replay bud LEB %d, head %d", lnum, jhead);
- if (c->need_recovery)
- sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD);
+ dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d",
+ lnum, b->bud->jhead, offs, is_last);
+
+ if (c->need_recovery && is_last)
+ /*
+ * Recover only last LEBs in the journal heads, because power
+ * cuts may cause corruptions only in these LEBs, because only
+ * these LEBs could possibly be written to at the power cut
+ * time.
+ */
+ sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead);
else
- sleb = ubifs_scan(c, lnum, offs, c->sbuf);
+ sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
if (IS_ERR(sleb))
return PTR_ERR(sleb);
@@ -620,20 +671,14 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
goto out;
}
- bud = ubifs_search_bud(c, lnum);
- if (!bud)
- BUG();
-
+ ubifs_assert(ubifs_search_bud(c, lnum));
ubifs_assert(sleb->endpt - offs >= used);
ubifs_assert(sleb->endpt % c->min_io_size == 0);
- if (sleb->endpt + c->min_io_size <= c->leb_size &&
- !(c->vfs_sb->s_flags & MS_RDONLY))
- err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum,
- sleb->endpt, UBI_SHORTTERM);
-
- *dirty = sleb->endpt - offs - used;
- *free = c->leb_size - sleb->endpt;
+ b->dirty = sleb->endpt - offs - used;
+ b->free = c->leb_size - sleb->endpt;
+ dbg_mnt("bud LEB %d replied: dirty %d, free %d",
+ lnum, b->dirty, b->free);
out:
ubifs_scan_destroy(sleb);
@@ -641,61 +686,12 @@ out:
out_dump:
ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs);
- dbg_dump_node(c, snod->node);
+ ubifs_dump_node(c, snod->node);
ubifs_scan_destroy(sleb);
return -EINVAL;
}
/**
- * insert_ref_node - insert a reference node to the replay tree.
- * @c: UBIFS file-system description object
- * @lnum: node logical eraseblock number
- * @offs: node offset
- * @sqnum: sequence number
- * @free: amount of free space in bud
- * @dirty: amount of dirty space from padding and deletion nodes
- *
- * This function inserts a reference node to the replay tree and returns zero
- * in case of success ort a negative error code in case of failure.
- */
-static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
- unsigned long long sqnum, int free, int dirty)
-{
- struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
- struct replay_entry *r;
-
- dbg_mnt("add ref LEB %d:%d", lnum, offs);
- while (*p) {
- parent = *p;
- r = rb_entry(parent, struct replay_entry, rb);
- if (sqnum < r->sqnum) {
- p = &(*p)->rb_left;
- continue;
- } else if (sqnum > r->sqnum) {
- p = &(*p)->rb_right;
- continue;
- }
- ubifs_err("duplicate sqnum in replay tree");
- return -EINVAL;
- }
-
- r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
- if (!r)
- return -ENOMEM;
-
- r->lnum = lnum;
- r->offs = offs;
- r->sqnum = sqnum;
- r->flags = REPLAY_REF;
- r->free = free;
- r->dirty = dirty;
-
- rb_link_node(&r->rb, parent, p);
- rb_insert_color(&r->rb, &c->replay_tree);
- return 0;
-}
-
-/**
* replay_buds - replay all buds.
* @c: UBIFS file-system description object
*
@@ -705,17 +701,16 @@ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
static int replay_buds(struct ubifs_info *c)
{
struct bud_entry *b;
- int err, uninitialized_var(free), uninitialized_var(dirty);
+ int err;
+ unsigned long long prev_sqnum = 0;
list_for_each_entry(b, &c->replay_buds, list) {
- err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead,
- &free, &dirty);
- if (err)
- return err;
- err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum,
- free, dirty);
+ err = replay_bud(c, b);
if (err)
return err;
+
+ ubifs_assert(b->sqnum > prev_sqnum);
+ prev_sqnum = b->sqnum;
}
return 0;
@@ -836,10 +831,16 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
const struct ubifs_cs_node *node;
dbg_mnt("replay log LEB %d:%d", lnum, offs);
- sleb = ubifs_scan(c, lnum, offs, sbuf);
+ sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery);
if (IS_ERR(sleb)) {
- if (c->need_recovery)
- sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
+ if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery)
+ return PTR_ERR(sleb);
+ /*
+ * Note, the below function will recover this log LEB only if
+ * it is the last, because unclean reboots can possibly corrupt
+ * only the tail of the log.
+ */
+ sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
if (IS_ERR(sleb))
return PTR_ERR(sleb);
}
@@ -850,7 +851,6 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
}
node = sleb->buf;
-
snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
if (c->cs_sqnum == 0) {
/*
@@ -861,16 +861,15 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
* numbers.
*/
if (snod->type != UBIFS_CS_NODE) {
- dbg_err("first log node at LEB %d:%d is not CS node",
- lnum, offs);
+ ubifs_err("first log node at LEB %d:%d is not CS node",
+ lnum, offs);
goto out_dump;
}
if (le64_to_cpu(node->cmt_no) != c->cmt_no) {
- dbg_err("first CS node at LEB %d:%d has wrong "
- "commit number %llu expected %llu",
- lnum, offs,
- (unsigned long long)le64_to_cpu(node->cmt_no),
- c->cmt_no);
+ ubifs_err("first CS node at LEB %d:%d has wrong commit number %llu expected %llu",
+ lnum, offs,
+ (unsigned long long)le64_to_cpu(node->cmt_no),
+ c->cmt_no);
goto out_dump;
}
@@ -883,7 +882,7 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
* This means that we reached end of log and now
* look to the older log data, which was already
* committed but the eraseblock was not erased (UBIFS
- * only unmaps it). So this basically means we have to
+ * only un-maps it). So this basically means we have to
* exit with "end of log" code.
*/
err = 1;
@@ -892,12 +891,11 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
/* Make sure the first node sits at offset zero of the LEB */
if (snod->offs != 0) {
- dbg_err("first node is not at zero offset");
+ ubifs_err("first node is not at zero offset");
goto out_dump;
}
list_for_each_entry(snod, &sleb->nodes, list) {
-
cond_resched();
if (snod->sqnum >= SQNUM_WATERMARK) {
@@ -906,8 +904,8 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
}
if (snod->sqnum < c->cs_sqnum) {
- dbg_err("bad sqnum %llu, commit sqnum %llu",
- snod->sqnum, c->cs_sqnum);
+ ubifs_err("bad sqnum %llu, commit sqnum %llu",
+ snod->sqnum, c->cs_sqnum);
goto out_dump;
}
@@ -957,9 +955,9 @@ out:
return err;
out_dump:
- ubifs_err("log error detected while replying the log at LEB %d:%d",
+ ubifs_err("log error detected while replaying the log at LEB %d:%d",
lnum, offs + snod->offs);
- dbg_dump_node(c, snod->node);
+ ubifs_dump_node(c, snod->node);
ubifs_scan_destroy(sleb);
return -EINVAL;
}
@@ -1009,8 +1007,7 @@ out:
*/
int ubifs_replay_journal(struct ubifs_info *c)
{
- int err, i, lnum, offs, free;
- void *sbuf = NULL;
+ int err, lnum, free;
BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
@@ -1025,51 +1022,48 @@ int ubifs_replay_journal(struct ubifs_info *c)
return -EINVAL;
}
- sbuf = vmalloc(c->leb_size);
- if (!sbuf)
- return -ENOMEM;
-
dbg_mnt("start replaying the journal");
-
c->replaying = 1;
-
lnum = c->ltail_lnum = c->lhead_lnum;
- offs = c->lhead_offs;
- for (i = 0; i < c->log_lebs; i++, lnum++) {
- if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) {
- /*
- * The log is logically circular, we reached the last
- * LEB, switch to the first one.
- */
- lnum = UBIFS_LOG_LNUM;
- offs = 0;
- }
- err = replay_log_leb(c, lnum, offs, sbuf);
+ do {
+ err = replay_log_leb(c, lnum, 0, c->sbuf);
if (err == 1)
/* We hit the end of the log */
break;
if (err)
goto out;
- offs = 0;
- }
+ lnum = ubifs_next_log_lnum(c, lnum);
+ } while (lnum != c->ltail_lnum);
err = replay_buds(c);
if (err)
goto out;
- err = apply_replay_tree(c);
+ err = apply_replay_list(c);
if (err)
goto out;
+ err = set_buds_lprops(c);
+ if (err)
+ goto out;
+
+ /*
+ * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable
+ * to roughly estimate index growth. Things like @c->bi.min_idx_lebs
+ * depend on it. This means we have to initialize it to make sure
+ * budgeting works properly.
+ */
+ c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
+ c->bi.uncommitted_idx *= c->max_idx_node_sz;
+
ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
- dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
- "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
- c->highest_inum);
+ dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu",
+ c->lhead_lnum, c->lhead_offs, c->max_sqnum,
+ (unsigned long)c->highest_inum);
out:
- destroy_replay_tree(c);
+ destroy_replay_list(c);
destroy_bud_list(c);
- vfree(sbuf);
c->replaying = 0;
return err;
}
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 2bf753b3888..4c37607a958 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -27,7 +27,9 @@
*/
#include "ubifs.h"
+#include <linux/slab.h>
#include <linux/random.h>
+#include <linux/math64.h>
/*
* Default journal size in logical eraseblocks as a percent of total
@@ -80,7 +82,8 @@ static int create_default_filesystem(struct ubifs_info *c)
int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first;
int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0;
int min_leb_cnt = UBIFS_MIN_LEB_CNT;
- uint64_t tmp64, main_bytes;
+ long long tmp64, main_bytes;
+ __le64 tmp_le64;
/* Some functions called from here depend on the @c->key_len filed */
c->key_len = UBIFS_SK_LEN;
@@ -127,7 +130,6 @@ static int create_default_filesystem(struct ubifs_info *c)
* orphan node.
*/
orph_lebs = UBIFS_MIN_ORPH_LEBS;
-#ifdef CONFIG_UBIFS_FS_DEBUG
if (c->leb_cnt - min_leb_cnt > 1)
/*
* For debugging purposes it is better to have at least 2
@@ -135,7 +137,6 @@ static int create_default_filesystem(struct ubifs_info *c)
* consolidations and would be stressed more.
*/
orph_lebs += 1;
-#endif
main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - log_lebs;
main_lebs -= orph_lebs;
@@ -159,7 +160,7 @@ static int create_default_filesystem(struct ubifs_info *c)
if (!sup)
return -ENOMEM;
- tmp64 = (uint64_t)max_buds * c->leb_size;
+ tmp64 = (long long)max_buds * c->leb_size;
if (big_lpt)
sup_flags |= UBIFS_FLG_BIGLPT;
@@ -178,19 +179,22 @@ static int create_default_filesystem(struct ubifs_info *c)
sup->fanout = cpu_to_le32(DEFAULT_FANOUT);
sup->lsave_cnt = cpu_to_le32(c->lsave_cnt);
sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION);
- sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO);
sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN);
+ if (c->mount_opts.override_compr)
+ sup->default_compr = cpu_to_le16(c->mount_opts.compr_type);
+ else
+ sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO);
generate_random_uuid(sup->uuid);
- main_bytes = (uint64_t)main_lebs * c->leb_size;
- tmp64 = main_bytes * DEFAULT_RP_PERCENT;
- do_div(tmp64, 100);
+ main_bytes = (long long)main_lebs * c->leb_size;
+ tmp64 = div_u64(main_bytes * DEFAULT_RP_PERCENT, 100);
if (tmp64 > DEFAULT_MAX_RP_SIZE)
tmp64 = DEFAULT_MAX_RP_SIZE;
sup->rp_size = cpu_to_le64(tmp64);
+ sup->ro_compat_version = cpu_to_le32(UBIFS_RO_COMPAT_VERSION);
- err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM);
+ err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0);
kfree(sup);
if (err)
return err;
@@ -241,19 +245,18 @@ static int create_default_filesystem(struct ubifs_info *c)
mst->total_dirty = cpu_to_le64(tmp64);
/* The indexing LEB does not contribute to dark space */
- tmp64 = (c->main_lebs - 1) * c->dark_wm;
+ tmp64 = ((long long)(c->main_lebs - 1) * c->dark_wm);
mst->total_dark = cpu_to_le64(tmp64);
mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ);
- err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0,
- UBI_UNKNOWN);
+ err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0);
if (err) {
kfree(mst);
return err;
}
- err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1, 0,
- UBI_UNKNOWN);
+ err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1,
+ 0);
kfree(mst);
if (err)
return err;
@@ -276,8 +279,7 @@ static int create_default_filesystem(struct ubifs_info *c)
key_write_idx(c, &key, &br->key);
br->lnum = cpu_to_le32(main_first + DEFAULT_DATA_LEB);
br->len = cpu_to_le32(UBIFS_INO_NODE_SZ);
- err = ubifs_write_node(c, idx, tmp, main_first + DEFAULT_IDX_LEB, 0,
- UBI_UNKNOWN);
+ err = ubifs_write_node(c, idx, tmp, main_first + DEFAULT_IDX_LEB, 0);
kfree(idx);
if (err)
return err;
@@ -295,10 +297,10 @@ static int create_default_filesystem(struct ubifs_info *c)
ino->ch.node_type = UBIFS_INO_NODE;
ino->creat_sqnum = cpu_to_le64(++c->max_sqnum);
ino->nlink = cpu_to_le32(2);
- tmp = cpu_to_le64(CURRENT_TIME_SEC.tv_sec);
- ino->atime_sec = tmp;
- ino->ctime_sec = tmp;
- ino->mtime_sec = tmp;
+ tmp_le64 = cpu_to_le64(CURRENT_TIME_SEC.tv_sec);
+ ino->atime_sec = tmp_le64;
+ ino->ctime_sec = tmp_le64;
+ ino->mtime_sec = tmp_le64;
ino->atime_nsec = 0;
ino->ctime_nsec = 0;
ino->mtime_nsec = 0;
@@ -309,8 +311,7 @@ static int create_default_filesystem(struct ubifs_info *c)
ino->flags = cpu_to_le32(UBIFS_COMPR_FL);
err = ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ,
- main_first + DEFAULT_DATA_LEB, 0,
- UBI_UNKNOWN);
+ main_first + DEFAULT_DATA_LEB, 0);
kfree(ino);
if (err)
return err;
@@ -329,8 +330,7 @@ static int create_default_filesystem(struct ubifs_info *c)
return -ENOMEM;
cs->ch.node_type = UBIFS_CS_NODE;
- err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM,
- 0, UBI_UNKNOWN);
+ err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM, 0);
kfree(cs);
ubifs_msg("default file-system created");
@@ -391,9 +391,8 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6;
if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) {
- ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, "
- "%d minimum required", c->leb_cnt, c->vi.size,
- min_leb_cnt);
+ ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, %d minimum required",
+ c->leb_cnt, c->vi.size, min_leb_cnt);
goto failed;
}
@@ -404,13 +403,22 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
}
if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) {
- err = 7;
+ ubifs_err("too few main LEBs count %d, must be at least %d",
+ c->main_lebs, UBIFS_MIN_MAIN_LEBS);
+ goto failed;
+ }
+
+ max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS;
+ if (c->max_bud_bytes < max_bytes) {
+ ubifs_err("too small journal (%lld bytes), must be at least %lld bytes",
+ c->max_bud_bytes, max_bytes);
goto failed;
}
- if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS ||
- c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) {
- err = 8;
+ max_bytes = (long long)c->leb_size * c->main_lebs;
+ if (c->max_bud_bytes > max_bytes) {
+ ubifs_err("too large journal size (%lld bytes), only %lld bytes available in the main area",
+ c->max_bud_bytes, max_bytes);
goto failed;
}
@@ -444,7 +452,6 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
goto failed;
}
- max_bytes = c->main_lebs * (long long)c->leb_size;
if (c->rp_size < 0 || max_bytes < c->rp_size) {
err = 14;
goto failed;
@@ -460,7 +467,7 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
failed:
ubifs_err("bad superblock, error %d", err);
- dbg_dump_node(c, sup);
+ ubifs_dump_node(c, sup);
return -EINVAL;
}
@@ -469,7 +476,8 @@ failed:
* @c: UBIFS file-system description object
*
* This function returns a pointer to the superblock node or a negative error
- * code.
+ * code. Note, the user of this function is responsible of kfree()'ing the
+ * returned superblock buffer.
*/
struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
{
@@ -502,7 +510,7 @@ int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup)
int len = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size);
ubifs_prepare_node(c, sup, UBIFS_SB_NODE_SZ, 1);
- return ubifs_leb_change(c, UBIFS_SB_LNUM, sup, len, UBI_LONGTERM);
+ return ubifs_leb_change(c, UBIFS_SB_LNUM, sup, len);
}
/**
@@ -528,17 +536,35 @@ int ubifs_read_superblock(struct ubifs_info *c)
if (IS_ERR(sup))
return PTR_ERR(sup);
+ c->fmt_version = le32_to_cpu(sup->fmt_version);
+ c->ro_compat_version = le32_to_cpu(sup->ro_compat_version);
+
/*
* The software supports all previous versions but not future versions,
* due to the unavailability of time-travelling equipment.
*/
- c->fmt_version = le32_to_cpu(sup->fmt_version);
if (c->fmt_version > UBIFS_FORMAT_VERSION) {
- ubifs_err("on-flash format version is %d, but software only "
- "supports up to version %d", c->fmt_version,
- UBIFS_FORMAT_VERSION);
- err = -EINVAL;
- goto out;
+ ubifs_assert(!c->ro_media || c->ro_mount);
+ if (!c->ro_mount ||
+ c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) {
+ ubifs_err("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d",
+ c->fmt_version, c->ro_compat_version,
+ UBIFS_FORMAT_VERSION,
+ UBIFS_RO_COMPAT_VERSION);
+ if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) {
+ ubifs_msg("only R/O mounting is possible");
+ err = -EROFS;
+ } else
+ err = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * The FS is mounted R/O, and the media format is
+ * R/O-compatible with the UBIFS implementation, so we can
+ * mount.
+ */
+ c->rw_incompat = 1;
}
if (c->fmt_version < 3) {
@@ -581,23 +607,23 @@ int ubifs_read_superblock(struct ubifs_info *c)
c->jhead_cnt = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT;
c->fanout = le32_to_cpu(sup->fanout);
c->lsave_cnt = le32_to_cpu(sup->lsave_cnt);
- c->default_compr = le16_to_cpu(sup->default_compr);
c->rp_size = le64_to_cpu(sup->rp_size);
- c->rp_uid = le32_to_cpu(sup->rp_uid);
- c->rp_gid = le32_to_cpu(sup->rp_gid);
+ c->rp_uid = make_kuid(&init_user_ns, le32_to_cpu(sup->rp_uid));
+ c->rp_gid = make_kgid(&init_user_ns, le32_to_cpu(sup->rp_gid));
sup_flags = le32_to_cpu(sup->flags);
+ if (!c->mount_opts.override_compr)
+ c->default_compr = le16_to_cpu(sup->default_compr);
c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran);
-
memcpy(&c->uuid, &sup->uuid, 16);
-
c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT);
+ c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP);
/* Automatically increase file system size to the maximum size */
c->old_leb_cnt = c->leb_cnt;
if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) {
c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size);
- if (c->vfs_sb->s_flags & MS_RDONLY)
+ if (c->ro_mount)
dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs",
c->old_leb_cnt, c->leb_cnt);
else {
@@ -620,10 +646,162 @@ int ubifs_read_superblock(struct ubifs_info *c)
c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS;
c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs;
c->main_first = c->leb_cnt - c->main_lebs;
- c->report_rp_size = ubifs_reported_space(c, c->rp_size);
err = validate_sb(c, sup);
out:
kfree(sup);
return err;
}
+
+/**
+ * fixup_leb - fixup/unmap an LEB containing free space.
+ * @c: UBIFS file-system description object
+ * @lnum: the LEB number to fix up
+ * @len: number of used bytes in LEB (starting at offset 0)
+ *
+ * This function reads the contents of the given LEB number @lnum, then fixes
+ * it up, so that empty min. I/O units in the end of LEB are actually erased on
+ * flash (rather than being just all-0xff real data). If the LEB is completely
+ * empty, it is simply unmapped.
+ */
+static int fixup_leb(struct ubifs_info *c, int lnum, int len)
+{
+ int err;
+
+ ubifs_assert(len >= 0);
+ ubifs_assert(len % c->min_io_size == 0);
+ ubifs_assert(len < c->leb_size);
+
+ if (len == 0) {
+ dbg_mnt("unmap empty LEB %d", lnum);
+ return ubifs_leb_unmap(c, lnum);
+ }
+
+ dbg_mnt("fixup LEB %d, data len %d", lnum, len);
+ err = ubifs_leb_read(c, lnum, c->sbuf, 0, len, 1);
+ if (err)
+ return err;
+
+ return ubifs_leb_change(c, lnum, c->sbuf, len);
+}
+
+/**
+ * fixup_free_space - find & remap all LEBs containing free space.
+ * @c: UBIFS file-system description object
+ *
+ * This function walks through all LEBs in the filesystem and fiexes up those
+ * containing free/empty space.
+ */
+static int fixup_free_space(struct ubifs_info *c)
+{
+ int lnum, err = 0;
+ struct ubifs_lprops *lprops;
+
+ ubifs_get_lprops(c);
+
+ /* Fixup LEBs in the master area */
+ for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) {
+ err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz);
+ if (err)
+ goto out;
+ }
+
+ /* Unmap unused log LEBs */
+ lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
+ while (lnum != c->ltail_lnum) {
+ err = fixup_leb(c, lnum, 0);
+ if (err)
+ goto out;
+ lnum = ubifs_next_log_lnum(c, lnum);
+ }
+
+ /*
+ * Fixup the log head which contains the only a CS node at the
+ * beginning.
+ */
+ err = fixup_leb(c, c->lhead_lnum,
+ ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size));
+ if (err)
+ goto out;
+
+ /* Fixup LEBs in the LPT area */
+ for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) {
+ int free = c->ltab[lnum - c->lpt_first].free;
+
+ if (free > 0) {
+ err = fixup_leb(c, lnum, c->leb_size - free);
+ if (err)
+ goto out;
+ }
+ }
+
+ /* Unmap LEBs in the orphans area */
+ for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
+ err = fixup_leb(c, lnum, 0);
+ if (err)
+ goto out;
+ }
+
+ /* Fixup LEBs in the main area */
+ for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) {
+ lprops = ubifs_lpt_lookup(c, lnum);
+ if (IS_ERR(lprops)) {
+ err = PTR_ERR(lprops);
+ goto out;
+ }
+
+ if (lprops->free > 0) {
+ err = fixup_leb(c, lnum, c->leb_size - lprops->free);
+ if (err)
+ goto out;
+ }
+ }
+
+out:
+ ubifs_release_lprops(c);
+ return err;
+}
+
+/**
+ * ubifs_fixup_free_space - find & fix all LEBs with free space.
+ * @c: UBIFS file-system description object
+ *
+ * This function fixes up LEBs containing free space on first mount, if the
+ * appropriate flag was set when the FS was created. Each LEB with one or more
+ * empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure
+ * the free space is actually erased. E.g., this is necessary for some NAND
+ * chips, since the free space may have been programmed like real "0xff" data
+ * (generating a non-0xff ECC), causing future writes to the not-really-erased
+ * NAND pages to behave badly. After the space is fixed up, the superblock flag
+ * is cleared, so that this is skipped for all future mounts.
+ */
+int ubifs_fixup_free_space(struct ubifs_info *c)
+{
+ int err;
+ struct ubifs_sb_node *sup;
+
+ ubifs_assert(c->space_fixup);
+ ubifs_assert(!c->ro_mount);
+
+ ubifs_msg("start fixing up free space");
+
+ err = fixup_free_space(c);
+ if (err)
+ return err;
+
+ sup = ubifs_read_sb_node(c);
+ if (IS_ERR(sup))
+ return PTR_ERR(sup);
+
+ /* Free-space fixup is no longer required */
+ c->space_fixup = 0;
+ sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP);
+
+ err = ubifs_write_sb_node(c, sup);
+ kfree(sup);
+ if (err)
+ return err;
+
+ ubifs_msg("free space fixup complete");
+ return err;
+}
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index acf5c5fffc6..58aa05df2bb 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -75,7 +75,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
magic = le32_to_cpu(ch->magic);
if (magic == 0xFFFFFFFF) {
- dbg_scan("hit empty space");
+ dbg_scan("hit empty space at LEB %d:%d", lnum, offs);
return SCANNED_EMPTY_SPACE;
}
@@ -85,9 +85,10 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
if (len < UBIFS_CH_SZ)
return SCANNED_GARBAGE;
- dbg_scan("scanning %s", dbg_ntype(ch->node_type));
+ dbg_scan("scanning %s at LEB %d:%d",
+ dbg_ntype(ch->node_type), lnum, offs);
- if (ubifs_check_node(c, buf, lnum, offs, quiet))
+ if (ubifs_check_node(c, buf, lnum, offs, quiet, 1))
return SCANNED_A_CORRUPT_NODE;
if (ch->node_type == UBIFS_PAD_NODE) {
@@ -101,22 +102,21 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
if (!quiet) {
ubifs_err("bad pad node at LEB %d:%d",
lnum, offs);
- dbg_dump_node(c, pad);
+ ubifs_dump_node(c, pad);
}
return SCANNED_A_BAD_PAD_NODE;
}
/* Make the node pads to 8-byte boundary */
if ((node_len + pad_len) & 7) {
- if (!quiet) {
- dbg_err("bad padding length %d - %d",
- offs, offs + node_len + pad_len);
- }
+ if (!quiet)
+ ubifs_err("bad padding length %d - %d",
+ offs, offs + node_len + pad_len);
return SCANNED_A_BAD_PAD_NODE;
}
- dbg_scan("%d bytes padded, offset now %d",
- pad_len, ALIGN(offs + node_len + pad_len, 8));
+ dbg_scan("%d bytes padded at LEB %d:%d, offset now %d", pad_len,
+ lnum, offs, ALIGN(offs + node_len + pad_len, 8));
return node_len + pad_len;
}
@@ -149,10 +149,10 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
INIT_LIST_HEAD(&sleb->nodes);
sleb->buf = sbuf;
- err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs);
+ err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0);
if (err && err != -EBADMSG) {
- ubifs_err("cannot read %d bytes from LEB %d:%d,"
- " error %d", c->leb_size - offs, lnum, offs, err);
+ ubifs_err("cannot read %d bytes from LEB %d:%d, error %d",
+ c->leb_size - offs, lnum, offs, err);
kfree(sleb);
return ERR_PTR(err);
}
@@ -198,7 +198,7 @@ int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
struct ubifs_ino_node *ino = buf;
struct ubifs_scan_node *snod;
- snod = kzalloc(sizeof(struct ubifs_scan_node), GFP_NOFS);
+ snod = kmalloc(sizeof(struct ubifs_scan_node), GFP_NOFS);
if (!snod)
return -ENOMEM;
@@ -213,13 +213,15 @@ int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
case UBIFS_DENT_NODE:
case UBIFS_XENT_NODE:
case UBIFS_DATA_NODE:
- case UBIFS_TRUN_NODE:
/*
* The key is in the same place in all keyed
* nodes.
*/
key_read(c, &ino->key, &snod->key);
break;
+ default:
+ invalid_key_init(c, &snod->key);
+ break;
}
list_add_tail(&snod->list, &sleb->nodes);
sleb->nodes_cnt += 1;
@@ -238,13 +240,11 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
{
int len;
- ubifs_err("corrupted data at LEB %d:%d", lnum, offs);
- if (dbg_failure_mode)
- return;
+ ubifs_err("corruption at LEB %d:%d", lnum, offs);
len = c->leb_size - offs;
- if (len > 4096)
- len = 4096;
- dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs);
+ if (len > 8192)
+ len = 8192;
+ ubifs_err("first %d bytes from LEB %d:%d", len, lnum, offs);
print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1);
}
@@ -253,13 +253,19 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
* @c: UBIFS file-system description object
* @lnum: logical eraseblock number
* @offs: offset to start at (usually zero)
- * @sbuf: scan buffer (must be c->leb_size)
+ * @sbuf: scan buffer (must be of @c->leb_size bytes in size)
+ * @quiet: print no messages
*
* This function scans LEB number @lnum and returns complete information about
- * its contents. Returns an error code in case of failure.
+ * its contents. Returns the scaned information in case of success and,
+ * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case
+ * of failure.
+ *
+ * If @quiet is non-zero, this function does not print large and scary
+ * error messages and flash dumps in case of errors.
*/
struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
- int offs, void *sbuf)
+ int offs, void *sbuf, int quiet)
{
void *buf = sbuf + offs;
int err, len = c->leb_size - offs;
@@ -278,8 +284,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
cond_resched();
- ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
-
+ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
if (ret > 0) {
/* Padding bytes or a valid padding node */
offs += ret;
@@ -294,17 +299,18 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
switch (ret) {
case SCANNED_GARBAGE:
- dbg_err("garbage");
+ ubifs_err("garbage");
goto corrupted;
case SCANNED_A_NODE:
break;
case SCANNED_A_CORRUPT_NODE:
case SCANNED_A_BAD_PAD_NODE:
- dbg_err("bad node");
+ ubifs_err("bad node");
goto corrupted;
default:
- dbg_err("unknown");
- goto corrupted;
+ ubifs_err("unknown");
+ err = -EINVAL;
+ goto error;
}
err = ubifs_add_snod(c, sleb, buf, offs);
@@ -317,8 +323,12 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
len -= node_len;
}
- if (offs % c->min_io_size)
+ if (offs % c->min_io_size) {
+ if (!quiet)
+ ubifs_err("empty space starts at non-aligned offset %d",
+ offs);
goto corrupted;
+ }
ubifs_end_scan(c, sleb, lnum, offs);
@@ -327,18 +337,25 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
break;
for (; len; offs++, buf++, len--)
if (*(uint8_t *)buf != 0xff) {
- ubifs_err("corrupt empty space at LEB %d:%d",
- lnum, offs);
+ if (!quiet)
+ ubifs_err("corrupt empty space at LEB %d:%d",
+ lnum, offs);
goto corrupted;
}
return sleb;
corrupted:
- ubifs_scanned_corruption(c, lnum, offs, buf);
+ if (!quiet) {
+ ubifs_scanned_corruption(c, lnum, offs, buf);
+ ubifs_err("LEB %d scanning failed", lnum);
+ }
err = -EUCLEAN;
+ ubifs_scan_destroy(sleb);
+ return ERR_PTR(err);
+
error:
- ubifs_err("LEB %d scanning failed", lnum);
+ ubifs_err("LEB %d scanning failed, error %d", lnum, err);
ubifs_scan_destroy(sleb);
return ERR_PTR(err);
}
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index f248533841a..9a9fb94a41c 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -128,7 +128,6 @@ static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
freed = ubifs_destroy_tnc_subtree(znode);
atomic_long_sub(freed, &ubifs_clean_zn_cnt);
atomic_long_sub(freed, &c->clean_zn_cnt);
- ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0);
total_freed += freed;
znode = zprev;
}
@@ -151,7 +150,7 @@ static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
* @contention: if any contention, this is set to %1
*
* This function walks the list of mounted UBIFS file-systems and frees clean
- * znodes which are older then @age, until at least @nr znodes are freed.
+ * znodes which are older than @age, until at least @nr znodes are freed.
* Returns the number of freed znodes.
*/
static int shrink_tnc_trees(int nr, int age, int *contention)
@@ -206,8 +205,7 @@ static int shrink_tnc_trees(int nr, int age, int *contention)
* Move this one to the end of the list to provide some
* fairness.
*/
- list_del(&c->infos_list);
- list_add_tail(&c->infos_list, &ubifs_infos);
+ list_move_tail(&c->infos_list, &ubifs_infos);
mutex_unlock(&c->umount_mutex);
if (freed >= nr)
break;
@@ -251,7 +249,7 @@ static int kick_a_thread(void)
dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
- c->ro_media) {
+ c->ro_mount || c->ro_error) {
mutex_unlock(&c->umount_mutex);
continue;
}
@@ -263,8 +261,7 @@ static int kick_a_thread(void)
}
if (i == 1) {
- list_del(&c->infos_list);
- list_add_tail(&c->infos_list, &ubifs_infos);
+ list_move_tail(&c->infos_list, &ubifs_infos);
spin_unlock(&ubifs_infos_lock);
ubifs_request_bg_commit(c);
@@ -279,13 +276,25 @@ static int kick_a_thread(void)
return 0;
}
-int ubifs_shrinker(int nr, gfp_t gfp_mask)
+unsigned long ubifs_shrink_count(struct shrinker *shrink,
+ struct shrink_control *sc)
{
- int freed, contention = 0;
long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
- if (nr == 0)
- return clean_zn_cnt;
+ /*
+ * Due to the way UBIFS updates the clean znode counter it may
+ * temporarily be negative.
+ */
+ return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
+}
+
+unsigned long ubifs_shrink_scan(struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ unsigned long nr = sc->nr_to_scan;
+ int contention = 0;
+ unsigned long freed;
+ long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
if (!clean_zn_cnt) {
/*
@@ -313,10 +322,10 @@ int ubifs_shrinker(int nr, gfp_t gfp_mask)
if (!freed && contention) {
dbg_tnc("freed nothing, but contention");
- return -1;
+ return SHRINK_STOP;
}
out:
- dbg_tnc("%d znodes were freed, requested %d", freed, nr);
+ dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
return freed;
}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index ca1e2d4e03c..3904c8574ef 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -30,19 +30,27 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/ctype.h>
-#include <linux/random.h>
#include <linux/kthread.h>
#include <linux/parser.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
+#include <linux/math64.h>
+#include <linux/writeback.h>
#include "ubifs.h"
+/*
+ * Maximum amount of memory we may 'kmalloc()' without worrying that we are
+ * allocating too much.
+ */
+#define UBIFS_KMALLOC_OK (128*1024)
+
/* Slab cache for UBIFS inodes */
struct kmem_cache *ubifs_inode_slab;
/* UBIFS TNC shrinker description */
static struct shrinker ubifs_shrinker_info = {
- .shrink = ubifs_shrinker,
+ .scan_objects = ubifs_shrink_scan,
+ .count_objects = ubifs_shrink_count,
.seeks = DEFAULT_SEEKS,
};
@@ -78,16 +86,15 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode)
if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA)
return 4;
- if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG)
+ if (ui->xattr && !S_ISREG(inode->i_mode))
return 5;
if (!ubifs_compr_present(ui->compr_type)) {
- ubifs_warn("inode %lu uses '%s' compression, but it was not "
- "compiled in", inode->i_ino,
- ubifs_compr_name(ui->compr_type));
+ ubifs_warn("inode %lu uses '%s' compression, but it was not compiled in",
+ inode->i_ino, ubifs_compr_name(ui->compr_type));
}
- err = dbg_check_dir_size(c, inode);
+ err = dbg_check_dir(c, inode);
return err;
}
@@ -122,9 +129,9 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
goto out_ino;
inode->i_flags |= (S_NOCMTIME | S_NOATIME);
- inode->i_nlink = le32_to_cpu(ino->nlink);
- inode->i_uid = le32_to_cpu(ino->uid);
- inode->i_gid = le32_to_cpu(ino->gid);
+ set_nlink(inode, le32_to_cpu(ino->nlink));
+ i_uid_write(inode, le32_to_cpu(ino->uid));
+ i_gid_write(inode, le32_to_cpu(ino->gid));
inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec);
inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec);
inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec);
@@ -149,7 +156,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
if (err)
goto out_invalid;
- /* Disable readahead */
+ /* Disable read-ahead */
inode->i_mapping->backing_dev_info = &c->bdi;
switch (inode->i_mode & S_IFMT) {
@@ -239,8 +246,8 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
out_invalid:
ubifs_err("inode %lu validation failed, error %d", inode->i_ino, err);
- dbg_dump_node(c, ino);
- dbg_dump_inode(c, inode);
+ ubifs_dump_node(c, ino);
+ ubifs_dump_inode(c, inode);
err = -EINVAL;
out_ino:
kfree(ino);
@@ -265,20 +272,27 @@ static struct inode *ubifs_alloc_inode(struct super_block *sb)
return &ui->vfs_inode;
};
+static void ubifs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct ubifs_inode *ui = ubifs_inode(inode);
+ kmem_cache_free(ubifs_inode_slab, ui);
+}
+
static void ubifs_destroy_inode(struct inode *inode)
{
struct ubifs_inode *ui = ubifs_inode(inode);
kfree(ui->data);
- kmem_cache_free(ubifs_inode_slab, inode);
+ call_rcu(&inode->i_rcu, ubifs_i_callback);
}
/*
* Note, Linux write-back code calls this without 'i_mutex'.
*/
-static int ubifs_write_inode(struct inode *inode, int wait)
+static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
- int err;
+ int err = 0;
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct ubifs_inode *ui = ubifs_inode(inode);
@@ -289,7 +303,7 @@ static int ubifs_write_inode(struct inode *inode, int wait)
mutex_lock(&ui->ui_mutex);
/*
* Due to races between write-back forced by budgeting
- * (see 'sync_some_inodes()') and pdflush write-back, the inode may
+ * (see 'sync_some_inodes()') and background write-back, the inode may
* have already been synchronized, do not do this again. This might
* also happen if it was synchronized in an VFS operation, e.g.
* 'ubifs_link()'.
@@ -299,10 +313,20 @@ static int ubifs_write_inode(struct inode *inode, int wait)
return 0;
}
- dbg_gen("inode %lu", inode->i_ino);
- err = ubifs_jnl_write_inode(c, inode, 0);
- if (err)
- ubifs_err("can't write inode %lu, error %d", inode->i_ino, err);
+ /*
+ * As an optimization, do not write orphan inodes to the media just
+ * because this is not needed.
+ */
+ dbg_gen("inode %lu, mode %#x, nlink %u",
+ inode->i_ino, (int)inode->i_mode, inode->i_nlink);
+ if (inode->i_nlink) {
+ err = ubifs_jnl_write_inode(c, inode);
+ if (err)
+ ubifs_err("can't write inode %lu, error %d",
+ inode->i_ino, err);
+ else
+ err = dbg_check_inode_size(c, inode, ui->ui_size);
+ }
ui->dirty = 0;
mutex_unlock(&ui->ui_mutex);
@@ -310,12 +334,13 @@ static int ubifs_write_inode(struct inode *inode, int wait)
return err;
}
-static void ubifs_delete_inode(struct inode *inode)
+static void ubifs_evict_inode(struct inode *inode)
{
int err;
struct ubifs_info *c = inode->i_sb->s_fs_info;
+ struct ubifs_inode *ui = ubifs_inode(inode);
- if (ubifs_inode(inode)->xattr)
+ if (ui->xattr)
/*
* Extended attribute inode deletions are fully handled in
* 'ubifs_removexattr()'. These inodes are special and have
@@ -323,27 +348,40 @@ static void ubifs_delete_inode(struct inode *inode)
*/
goto out;
- dbg_gen("inode %lu", inode->i_ino);
+ dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode);
ubifs_assert(!atomic_read(&inode->i_count));
- ubifs_assert(inode->i_nlink == 0);
- truncate_inode_pages(&inode->i_data, 0);
+ truncate_inode_pages_final(&inode->i_data);
+
+ if (inode->i_nlink)
+ goto done;
+
if (is_bad_inode(inode))
goto out;
- ubifs_inode(inode)->ui_size = inode->i_size = 0;
- err = ubifs_jnl_write_inode(c, inode, 1);
+ ui->ui_size = inode->i_size = 0;
+ err = ubifs_jnl_delete_inode(c, inode);
if (err)
/*
* Worst case we have a lost orphan inode wasting space, so a
- * simple error message is ok here.
+ * simple error message is OK here.
*/
- ubifs_err("can't write inode %lu, error %d", inode->i_ino, err);
+ ubifs_err("can't delete inode %lu, error %d",
+ inode->i_ino, err);
+
out:
+ if (ui->dirty)
+ ubifs_release_dirty_inode_budget(c, ui);
+ else {
+ /* We've deleted something - clean the "no space" flags */
+ c->bi.nospace = c->bi.nospace_rp = 0;
+ smp_wmb();
+ }
+done:
clear_inode(inode);
}
-static void ubifs_dirty_inode(struct inode *inode)
+static void ubifs_dirty_inode(struct inode *inode, int flags)
{
struct ubifs_inode *ui = ubifs_inode(inode);
@@ -358,8 +396,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct ubifs_info *c = dentry->d_sb->s_fs_info;
unsigned long long free;
+ __le32 *uuid = (__le32 *)c->uuid;
- free = ubifs_budg_get_free_space(c);
+ free = ubifs_get_free_space(c);
dbg_gen("free space %lld bytes (%lld blocks)",
free, free >> UBIFS_BLOCK_SHIFT);
@@ -374,39 +413,74 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = 0;
buf->f_ffree = 0;
buf->f_namelen = UBIFS_MAX_NLEN;
-
+ buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]);
+ buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]);
+ ubifs_assert(buf->f_bfree <= c->block_cnt);
return 0;
}
-static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt)
+static int ubifs_show_options(struct seq_file *s, struct dentry *root)
{
- struct ubifs_info *c = mnt->mnt_sb->s_fs_info;
+ struct ubifs_info *c = root->d_sb->s_fs_info;
if (c->mount_opts.unmount_mode == 2)
seq_printf(s, ",fast_unmount");
else if (c->mount_opts.unmount_mode == 1)
seq_printf(s, ",norm_unmount");
+ if (c->mount_opts.bulk_read == 2)
+ seq_printf(s, ",bulk_read");
+ else if (c->mount_opts.bulk_read == 1)
+ seq_printf(s, ",no_bulk_read");
+
+ if (c->mount_opts.chk_data_crc == 2)
+ seq_printf(s, ",chk_data_crc");
+ else if (c->mount_opts.chk_data_crc == 1)
+ seq_printf(s, ",no_chk_data_crc");
+
+ if (c->mount_opts.override_compr) {
+ seq_printf(s, ",compr=%s",
+ ubifs_compr_name(c->mount_opts.compr_type));
+ }
+
return 0;
}
static int ubifs_sync_fs(struct super_block *sb, int wait)
{
+ int i, err;
struct ubifs_info *c = sb->s_fs_info;
- int i, ret = 0, err;
- if (c->jheads)
- for (i = 0; i < c->jhead_cnt; i++) {
- err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
- if (err && !ret)
- ret = err;
- }
/*
- * We ought to call sync for c->ubi but it does not have one. If it had
- * it would in turn call mtd->sync, however mtd operations are
- * synchronous anyway, so we don't lose any sleep here.
+ * Zero @wait is just an advisory thing to help the file system shove
+ * lots of data into the queues, and there will be the second
+ * '->sync_fs()' call, with non-zero @wait.
+ */
+ if (!wait)
+ return 0;
+
+ /*
+ * Synchronize write buffers, because 'ubifs_run_commit()' does not
+ * do this if it waits for an already running commit.
+ */
+ for (i = 0; i < c->jhead_cnt; i++) {
+ err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
+ if (err)
+ return err;
+ }
+
+ /*
+ * Strictly speaking, it is not necessary to commit the journal here,
+ * synchronizing write-buffers would be enough. But committing makes
+ * UBIFS free space predictions much more accurate, so we want to let
+ * the user be able to get more accurate results of 'statfs()' after
+ * they synchronize the file system.
*/
- return ret;
+ err = ubifs_run_commit(c);
+ if (err)
+ return err;
+
+ return ubi_sync(c->vi.ubi_num);
}
/**
@@ -437,9 +511,12 @@ static int init_constants_early(struct ubifs_info *c)
c->leb_cnt = c->vi.size;
c->leb_size = c->vi.usable_leb_size;
+ c->leb_start = c->di.leb_start;
c->half_leb_size = c->leb_size / 2;
c->min_io_size = c->di.min_io_size;
c->min_io_shift = fls(c->min_io_size) - 1;
+ c->max_write_size = c->di.max_write_size;
+ c->max_write_shift = fls(c->max_write_size) - 1;
if (c->leb_size < UBIFS_MIN_LEB_SZ) {
ubifs_err("too small LEBs (%d bytes), min. is %d bytes",
@@ -459,6 +536,18 @@ static int init_constants_early(struct ubifs_info *c)
}
/*
+ * Maximum write size has to be greater or equivalent to min. I/O
+ * size, and be multiple of min. I/O size.
+ */
+ if (c->max_write_size < c->min_io_size ||
+ c->max_write_size % c->min_io_size ||
+ !is_power_of_2(c->max_write_size)) {
+ ubifs_err("bad write buffer size %d for %d min. I/O unit",
+ c->max_write_size, c->min_io_size);
+ return -EINVAL;
+ }
+
+ /*
* UBIFS aligns all node to 8-byte boundary, so to make function in
* io.c simpler, assume minimum I/O unit size to be 8 bytes if it is
* less than 8.
@@ -466,6 +555,10 @@ static int init_constants_early(struct ubifs_info *c)
if (c->min_io_size < 8) {
c->min_io_size = 8;
c->min_io_shift = 3;
+ if (c->max_write_size < c->min_io_size) {
+ c->max_write_size = c->min_io_size;
+ c->max_write_shift = c->min_io_shift;
+ }
}
c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size);
@@ -505,19 +598,23 @@ static int init_constants_early(struct ubifs_info *c)
c->ranges[UBIFS_IDX_NODE].max_len = INT_MAX;
/*
- * Initialize dead and dark LEB space watermarks.
- *
- * Dead space is the space which cannot be used. Its watermark is
- * equivalent to min. I/O unit or minimum node size if it is greater
- * then min. I/O unit.
- *
- * Dark space is the space which might be used, or might not, depending
- * on which node should be written to the LEB. Its watermark is
- * equivalent to maximum UBIFS node size.
+ * Initialize dead and dark LEB space watermarks. See gc.c for comments
+ * about these values.
*/
c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size);
c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size);
+ /*
+ * Calculate how many bytes would be wasted at the end of LEB if it was
+ * fully filled with data nodes of maximum size. This is used in
+ * calculations when reporting free space.
+ */
+ c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ;
+
+ /* Buffer size for bulk-reads */
+ c->max_bu_buf_len = UBIFS_MAX_BULK_READ * UBIFS_MAX_DATA_NODE_SZ;
+ if (c->max_bu_buf_len > c->leb_size)
+ c->max_bu_buf_len = c->leb_size;
return 0;
}
@@ -542,7 +639,7 @@ static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad)
}
/*
- * init_constants_late - initialize UBIFS constants.
+ * init_constants_sb - initialize UBIFS constants.
* @c: UBIFS file-system description object
*
* This is a helper function which initializes various UBIFS constants after
@@ -550,10 +647,10 @@ static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad)
* makes sure they are all right. Returns zero in case of success and a
* negative error code in case of failure.
*/
-static int init_constants_late(struct ubifs_info *c)
+static int init_constants_sb(struct ubifs_info *c)
{
int tmp, err;
- uint64_t tmp64;
+ long long tmp64;
c->main_bytes = (long long)c->main_lebs * c->leb_size;
c->max_znode_sz = sizeof(struct ubifs_znode) +
@@ -571,8 +668,8 @@ static int init_constants_late(struct ubifs_info *c)
tmp = UBIFS_CS_NODE_SZ + UBIFS_REF_NODE_SZ * c->jhead_cnt;
tmp = ALIGN(tmp, c->min_io_size);
if (tmp > c->leb_size) {
- dbg_err("too small LEB size %d, at least %d needed",
- c->leb_size, tmp);
+ ubifs_err("too small LEB size %d, at least %d needed",
+ c->leb_size, tmp);
return -EINVAL;
}
@@ -580,15 +677,14 @@ static int init_constants_late(struct ubifs_info *c)
* Make sure that the log is large enough to fit reference nodes for
* all buds plus one reserved LEB.
*/
- tmp64 = c->max_bud_bytes;
- tmp = do_div(tmp64, c->leb_size);
- c->max_bud_cnt = tmp64 + !!tmp;
+ tmp64 = c->max_bud_bytes + c->leb_size - 1;
+ c->max_bud_cnt = div_u64(tmp64, c->leb_size);
tmp = (c->ref_node_alsz * c->max_bud_cnt + c->leb_size - 1);
tmp /= c->leb_size;
tmp += 1;
if (c->log_lebs < tmp) {
- dbg_err("too small log %d LEBs, required min. %d LEBs",
- c->log_lebs, tmp);
+ ubifs_err("too small log %d LEBs, required min. %d LEBs",
+ c->log_lebs, tmp);
return -EINVAL;
}
@@ -597,11 +693,11 @@ static int init_constants_late(struct ubifs_info *c)
* be compressed and direntries are of the maximum size.
*
* Note, data, which may be stored in inodes is budgeted separately, so
- * it is not included into 'c->inode_budget'.
+ * it is not included into 'c->bi.inode_budget'.
*/
- c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE;
- c->inode_budget = UBIFS_INO_NODE_SZ;
- c->dent_budget = UBIFS_MAX_DENT_NODE_SZ;
+ c->bi.page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE;
+ c->bi.inode_budget = UBIFS_INO_NODE_SZ;
+ c->bi.dent_budget = UBIFS_MAX_DENT_NODE_SZ;
/*
* When the amount of flash space used by buds becomes
@@ -618,7 +714,7 @@ static int init_constants_late(struct ubifs_info *c)
* Consequently, if the journal is too small, UBIFS will treat it as
* always full.
*/
- tmp64 = (uint64_t)(c->jhead_cnt + 1) * c->leb_size + 1;
+ tmp64 = (long long)(c->jhead_cnt + 1) * c->leb_size + 1;
if (c->bg_bud_bytes < tmp64)
c->bg_bud_bytes = tmp64;
if (c->max_bud_bytes < tmp64 + c->leb_size)
@@ -628,36 +724,51 @@ static int init_constants_late(struct ubifs_info *c)
if (err)
return err;
- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+ /* Initialize effective LEB size used in budgeting calculations */
+ c->idx_leb_size = c->leb_size - c->max_idx_node_sz;
+ return 0;
+}
+
+/*
+ * init_constants_master - initialize UBIFS constants.
+ * @c: UBIFS file-system description object
+ *
+ * This is a helper function which initializes various UBIFS constants after
+ * the master node has been read. It also checks various UBIFS parameters and
+ * makes sure they are all right.
+ */
+static void init_constants_master(struct ubifs_info *c)
+{
+ long long tmp64;
+
+ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+ c->report_rp_size = ubifs_reported_space(c, c->rp_size);
/*
* Calculate total amount of FS blocks. This number is not used
* internally because it does not make much sense for UBIFS, but it is
* necessary to report something for the 'statfs()' call.
*
- * Subtract the LEB reserved for GC and the LEB which is reserved for
- * deletions.
- *
- * Review 'ubifs_calc_available()' if changing this calculation.
+ * Subtract the LEB reserved for GC, the LEB which is reserved for
+ * deletions, minimum LEBs for the index, and assume only one journal
+ * head is available.
*/
- tmp64 = c->main_lebs - 2;
- tmp64 *= (uint64_t)c->leb_size - c->dark_wm;
+ tmp64 = c->main_lebs - 1 - 1 - MIN_INDEX_LEBS - c->jhead_cnt + 1;
+ tmp64 *= (long long)c->leb_size - c->leb_overhead;
tmp64 = ubifs_reported_space(c, tmp64);
c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT;
-
- return 0;
}
/**
* take_gc_lnum - reserve GC LEB.
* @c: UBIFS file-system description object
*
- * This function ensures that the LEB reserved for garbage collection is
- * unmapped and is marked as "taken" in lprops. We also have to set free space
- * to LEB size and dirty space to zero, because lprops may contain out-of-date
- * information if the file-system was un-mounted before it has been committed.
- * This function returns zero in case of success and a negative error code in
- * case of failure.
+ * This function ensures that the LEB reserved for garbage collection is marked
+ * as "taken" in lprops. We also have to set free space to LEB size and dirty
+ * space to zero, because lprops may contain out-of-date information if the
+ * file-system was un-mounted before it has been committed. This function
+ * returns zero in case of success and a negative error code in case of
+ * failure.
*/
static int take_gc_lnum(struct ubifs_info *c)
{
@@ -668,10 +779,6 @@ static int take_gc_lnum(struct ubifs_info *c)
return -EINVAL;
}
- err = ubifs_leb_unmap(c, c->gc_lnum);
- if (err)
- return err;
-
/* And we have to tell lprops that this LEB is taken */
err = ubifs_change_one_lp(c, c->gc_lnum, c->leb_size, 0,
LPROPS_TAKEN, 0, 0);
@@ -703,15 +810,15 @@ static int alloc_wbufs(struct ubifs_info *c)
c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback;
c->jheads[i].wbuf.jhead = i;
+ c->jheads[i].grouped = 1;
}
- c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM;
/*
- * Garbage Collector head likely contains long-term data and
- * does not need to be synchronized by timer.
+ * Garbage Collector head does not need to be synchronized by timer.
+ * Also GC head nodes are not grouped.
*/
- c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM;
- c->jheads[GCHD].wbuf.timeout = 0;
+ c->jheads[GCHD].wbuf.no_timer = 1;
+ c->jheads[GCHD].grouped = 0;
return 0;
}
@@ -753,7 +860,7 @@ static void free_orphans(struct ubifs_info *c)
orph = list_entry(c->orph_list.next, struct ubifs_orphan, list);
list_del(&orph->list);
kfree(orph);
- dbg_err("orphan list not empty at unmount");
+ ubifs_err("orphan list not empty at unmount");
}
vfree(c->orph_buf);
@@ -766,26 +873,10 @@ static void free_orphans(struct ubifs_info *c)
*/
static void free_buds(struct ubifs_info *c)
{
- struct rb_node *this = c->buds.rb_node;
- struct ubifs_bud *bud;
-
- while (this) {
- if (this->rb_left)
- this = this->rb_left;
- else if (this->rb_right)
- this = this->rb_right;
- else {
- bud = rb_entry(this, struct ubifs_bud, rb);
- this = rb_parent(this);
- if (this) {
- if (this->rb_left == &bud->rb)
- this->rb_left = NULL;
- else
- this->rb_right = NULL;
- }
- kfree(bud);
- }
- }
+ struct ubifs_bud *bud, *n;
+
+ rbtree_postorder_for_each_entry_safe(bud, n, &c->buds, rb)
+ kfree(bud);
}
/**
@@ -803,7 +894,7 @@ static int check_volume_empty(struct ubifs_info *c)
c->empty = 1;
for (lnum = 0; lnum < c->leb_cnt; lnum++) {
- err = ubi_is_mapped(c->ubi, lnum);
+ err = ubifs_is_mapped(c, lnum);
if (unlikely(err < 0))
return err;
if (err == 1) {
@@ -822,21 +913,57 @@ static int check_volume_empty(struct ubifs_info *c)
*
* Opt_fast_unmount: do not run a journal commit before un-mounting
* Opt_norm_unmount: run a journal commit before un-mounting
+ * Opt_bulk_read: enable bulk-reads
+ * Opt_no_bulk_read: disable bulk-reads
+ * Opt_chk_data_crc: check CRCs when reading data nodes
+ * Opt_no_chk_data_crc: do not check CRCs when reading data nodes
+ * Opt_override_compr: override default compressor
* Opt_err: just end of array marker
*/
enum {
Opt_fast_unmount,
Opt_norm_unmount,
+ Opt_bulk_read,
+ Opt_no_bulk_read,
+ Opt_chk_data_crc,
+ Opt_no_chk_data_crc,
+ Opt_override_compr,
Opt_err,
};
-static match_table_t tokens = {
+static const match_table_t tokens = {
{Opt_fast_unmount, "fast_unmount"},
{Opt_norm_unmount, "norm_unmount"},
+ {Opt_bulk_read, "bulk_read"},
+ {Opt_no_bulk_read, "no_bulk_read"},
+ {Opt_chk_data_crc, "chk_data_crc"},
+ {Opt_no_chk_data_crc, "no_chk_data_crc"},
+ {Opt_override_compr, "compr=%s"},
{Opt_err, NULL},
};
/**
+ * parse_standard_option - parse a standard mount option.
+ * @option: the option to parse
+ *
+ * Normally, standard mount options like "sync" are passed to file-systems as
+ * flags. However, when a "rootflags=" kernel boot parameter is used, they may
+ * be present in the options string. This function tries to deal with this
+ * situation and parse standard options. Returns 0 if the option was not
+ * recognized, and the corresponding integer flag if it was.
+ *
+ * UBIFS is only interested in the "sync" option, so do not check for anything
+ * else.
+ */
+static int parse_standard_option(const char *option)
+{
+ ubifs_msg("parse %s", option);
+ if (!strcmp(option, "sync"))
+ return MS_SYNCHRONOUS;
+ return 0;
+}
+
+/**
* ubifs_parse_options - parse mount parameters.
* @c: UBIFS file-system description object
* @options: parameters to parse
@@ -862,18 +989,69 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
token = match_token(p, tokens, args);
switch (token) {
+ /*
+ * %Opt_fast_unmount and %Opt_norm_unmount options are ignored.
+ * We accept them in order to be backward-compatible. But this
+ * should be removed at some point.
+ */
case Opt_fast_unmount:
c->mount_opts.unmount_mode = 2;
- c->fast_unmount = 1;
break;
case Opt_norm_unmount:
c->mount_opts.unmount_mode = 1;
- c->fast_unmount = 0;
break;
+ case Opt_bulk_read:
+ c->mount_opts.bulk_read = 2;
+ c->bulk_read = 1;
+ break;
+ case Opt_no_bulk_read:
+ c->mount_opts.bulk_read = 1;
+ c->bulk_read = 0;
+ break;
+ case Opt_chk_data_crc:
+ c->mount_opts.chk_data_crc = 2;
+ c->no_chk_data_crc = 0;
+ break;
+ case Opt_no_chk_data_crc:
+ c->mount_opts.chk_data_crc = 1;
+ c->no_chk_data_crc = 1;
+ break;
+ case Opt_override_compr:
+ {
+ char *name = match_strdup(&args[0]);
+
+ if (!name)
+ return -ENOMEM;
+ if (!strcmp(name, "none"))
+ c->mount_opts.compr_type = UBIFS_COMPR_NONE;
+ else if (!strcmp(name, "lzo"))
+ c->mount_opts.compr_type = UBIFS_COMPR_LZO;
+ else if (!strcmp(name, "zlib"))
+ c->mount_opts.compr_type = UBIFS_COMPR_ZLIB;
+ else {
+ ubifs_err("unknown compressor \"%s\"", name);
+ kfree(name);
+ return -EINVAL;
+ }
+ kfree(name);
+ c->mount_opts.override_compr = 1;
+ c->default_compr = c->mount_opts.compr_type;
+ break;
+ }
default:
- ubifs_err("unrecognized mount option \"%s\" "
- "or missing value", p);
- return -EINVAL;
+ {
+ unsigned long flag;
+ struct super_block *sb = c->vfs_sb;
+
+ flag = parse_standard_option(p);
+ if (!flag) {
+ ubifs_err("unrecognized mount option \"%s\" or missing value",
+ p);
+ return -EINVAL;
+ }
+ sb->s_flags |= flag;
+ break;
+ }
}
}
@@ -911,37 +1089,82 @@ static void destroy_journal(struct ubifs_info *c)
}
/**
+ * bu_init - initialize bulk-read information.
+ * @c: UBIFS file-system description object
+ */
+static void bu_init(struct ubifs_info *c)
+{
+ ubifs_assert(c->bulk_read == 1);
+
+ if (c->bu.buf)
+ return; /* Already initialized */
+
+again:
+ c->bu.buf = kmalloc(c->max_bu_buf_len, GFP_KERNEL | __GFP_NOWARN);
+ if (!c->bu.buf) {
+ if (c->max_bu_buf_len > UBIFS_KMALLOC_OK) {
+ c->max_bu_buf_len = UBIFS_KMALLOC_OK;
+ goto again;
+ }
+
+ /* Just disable bulk-read */
+ ubifs_warn("cannot allocate %d bytes of memory for bulk-read, disabling it",
+ c->max_bu_buf_len);
+ c->mount_opts.bulk_read = 1;
+ c->bulk_read = 0;
+ return;
+ }
+}
+
+/**
+ * check_free_space - check if there is enough free space to mount.
+ * @c: UBIFS file-system description object
+ *
+ * This function makes sure UBIFS has enough free space to be mounted in
+ * read/write mode. UBIFS must always have some free space to allow deletions.
+ */
+static int check_free_space(struct ubifs_info *c)
+{
+ ubifs_assert(c->dark_wm > 0);
+ if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) {
+ ubifs_err("insufficient free space to mount in R/W mode");
+ ubifs_dump_budg(c, &c->bi);
+ ubifs_dump_lprops(c);
+ return -ENOSPC;
+ }
+ return 0;
+}
+
+/**
* mount_ubifs - mount UBIFS file-system.
* @c: UBIFS file-system description object
*
* This function mounts UBIFS file system. Returns zero in case of success and
* a negative error code in case of failure.
- *
- * Note, the function does not de-allocate resources it it fails half way
- * through, and the caller has to do this instead.
*/
static int mount_ubifs(struct ubifs_info *c)
{
- struct super_block *sb = c->vfs_sb;
- int err, mounted_read_only = (sb->s_flags & MS_RDONLY);
- long long x;
+ int err;
+ long long x, y;
size_t sz;
+ c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY);
+ /* Suppress error messages while probing if MS_SILENT is set */
+ c->probing = !!(c->vfs_sb->s_flags & MS_SILENT);
+
err = init_constants_early(c);
if (err)
return err;
-#ifdef CONFIG_UBIFS_FS_DEBUG
- c->dbg_buf = vmalloc(c->leb_size);
- if (!c->dbg_buf)
- return -ENOMEM;
-#endif
+ err = ubifs_debugging_init(c);
+ if (err)
+ return err;
err = check_volume_empty(c);
if (err)
goto out_free;
- if (c->empty && (mounted_read_only || c->ro_media)) {
+ if (c->empty && (c->ro_mount || c->ro_media)) {
/*
* This UBI volume is empty, and read-only, or the file system
* is mounted read-only - we cannot format it.
@@ -952,7 +1175,7 @@ static int mount_ubifs(struct ubifs_info *c)
goto out_free;
}
- if (c->ro_media && !mounted_read_only) {
+ if (c->ro_media && !c->ro_mount) {
ubifs_err("cannot mount read-write - read-only media");
err = -EROFS;
goto out_free;
@@ -972,51 +1195,61 @@ static int mount_ubifs(struct ubifs_info *c)
if (!c->sbuf)
goto out_free;
- if (!mounted_read_only) {
+ if (!c->ro_mount) {
c->ileb_buf = vmalloc(c->leb_size);
if (!c->ileb_buf)
goto out_free;
}
+ if (c->bulk_read == 1)
+ bu_init(c);
+
+ if (!c->ro_mount) {
+ c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ,
+ GFP_KERNEL);
+ if (!c->write_reserve_buf)
+ goto out_free;
+ }
+
+ c->mounting = 1;
+
err = ubifs_read_superblock(c);
if (err)
goto out_free;
+ c->probing = 0;
+
/*
- * Make sure the compressor which is set as the default on in the
- * superblock was actually compiled in.
+ * Make sure the compressor which is set as default in the superblock
+ * or overridden by mount options is actually compiled in.
*/
if (!ubifs_compr_present(c->default_compr)) {
- ubifs_warn("'%s' compressor is set by superblock, but not "
- "compiled in", ubifs_compr_name(c->default_compr));
- c->default_compr = UBIFS_COMPR_NONE;
+ ubifs_err("'compressor \"%s\" is not compiled in",
+ ubifs_compr_name(c->default_compr));
+ err = -ENOTSUPP;
+ goto out_free;
}
- dbg_failure_mode_registration(c);
-
- err = init_constants_late(c);
+ err = init_constants_sb(c);
if (err)
- goto out_dereg;
+ goto out_free;
sz = ALIGN(c->max_idx_node_sz, c->min_io_size);
sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size);
c->cbuf = kmalloc(sz, GFP_NOFS);
if (!c->cbuf) {
err = -ENOMEM;
- goto out_dereg;
+ goto out_free;
}
- if (!mounted_read_only) {
- err = alloc_wbufs(c);
- if (err)
- goto out_cbuf;
+ err = alloc_wbufs(c);
+ if (err)
+ goto out_cbuf;
+ sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
+ if (!c->ro_mount) {
/* Create background thread */
- sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num,
- c->vi.vol_id);
- c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
- if (!c->bgt)
- c->bgt = ERR_PTR(-EINVAL);
+ c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
if (IS_ERR(c->bgt)) {
err = PTR_ERR(c->bgt);
c->bgt = NULL;
@@ -1031,15 +1264,30 @@ static int mount_ubifs(struct ubifs_info *c)
if (err)
goto out_master;
+ init_constants_master(c);
+
if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) {
ubifs_msg("recovery needed");
c->need_recovery = 1;
- if (!mounted_read_only) {
- err = ubifs_recover_inl_heads(c, c->sbuf);
- if (err)
- goto out_master;
- }
- } else if (!mounted_read_only) {
+ }
+
+ if (c->need_recovery && !c->ro_mount) {
+ err = ubifs_recover_inl_heads(c, c->sbuf);
+ if (err)
+ goto out_master;
+ }
+
+ err = ubifs_lpt_init(c, 1, !c->ro_mount);
+ if (err)
+ goto out_master;
+
+ if (!c->ro_mount && c->space_fixup) {
+ err = ubifs_fixup_free_space(c);
+ if (err)
+ goto out_lpt;
+ }
+
+ if (!c->ro_mount) {
/*
* Set the "dirty" flag so that if we reboot uncleanly we
* will notice this immediately on the next mount.
@@ -1047,14 +1295,10 @@ static int mount_ubifs(struct ubifs_info *c)
c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
err = ubifs_write_master(c);
if (err)
- goto out_master;
+ goto out_lpt;
}
- err = ubifs_lpt_init(c, 1, !mounted_read_only);
- if (err)
- goto out_lpt;
-
- err = dbg_check_idx_size(c, c->old_idx_sz);
+ err = dbg_check_idx_size(c, c->bi.old_idx_sz);
if (err)
goto out_lpt;
@@ -1062,19 +1306,19 @@ static int mount_ubifs(struct ubifs_info *c)
if (err)
goto out_journal;
- err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only);
+ /* Calculate 'min_idx_lebs' after journal replay */
+ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+
+ err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount);
if (err)
goto out_orphans;
- if (!mounted_read_only) {
+ if (!c->ro_mount) {
int lnum;
- /* Check for enough free space */
- if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) {
- ubifs_err("insufficient available space");
- err = -EINVAL;
+ err = check_free_space(c);
+ if (err)
goto out_orphans;
- }
/* Check for enough log space */
lnum = c->lhead_lnum + 1;
@@ -1091,10 +1335,21 @@ static int mount_ubifs(struct ubifs_info *c)
if (err)
goto out_orphans;
err = ubifs_rcvry_gc_commit(c);
- } else
+ if (err)
+ goto out_orphans;
+ } else {
err = take_gc_lnum(c);
- if (err)
- goto out_orphans;
+ if (err)
+ goto out_orphans;
+
+ /*
+ * GC LEB may contain garbage if there was an unclean
+ * reboot, and it should be un-mapped.
+ */
+ err = ubifs_leb_unmap(c, c->gc_lnum);
+ if (err)
+ goto out_orphans;
+ }
err = dbg_check_lprops(c);
if (err)
@@ -1103,6 +1358,16 @@ static int mount_ubifs(struct ubifs_info *c)
err = ubifs_recover_size(c);
if (err)
goto out_orphans;
+ } else {
+ /*
+ * Even if we mount read-only, we have to set space in GC LEB
+ * to proper value because this affects UBIFS free space
+ * reporting. We do not want to have a situation when
+ * re-mounting from R/O to R/W changes amount of free space.
+ */
+ err = take_gc_lnum(c);
+ if (err)
+ goto out_orphans;
}
spin_lock(&ubifs_infos_lock);
@@ -1110,76 +1375,94 @@ static int mount_ubifs(struct ubifs_info *c)
spin_unlock(&ubifs_infos_lock);
if (c->need_recovery) {
- if (mounted_read_only)
+ if (c->ro_mount)
ubifs_msg("recovery deferred");
else {
c->need_recovery = 0;
ubifs_msg("recovery completed");
+ /*
+ * GC LEB has to be empty and taken at this point. But
+ * the journal head LEBs may also be accounted as
+ * "empty taken" if they are empty.
+ */
+ ubifs_assert(c->lst.taken_empty_lebs > 0);
}
- }
+ } else
+ ubifs_assert(c->lst.taken_empty_lebs > 0);
err = dbg_check_filesystem(c);
if (err)
goto out_infos;
- ubifs_msg("mounted UBI device %d, volume %d", c->vi.ubi_num,
- c->vi.vol_id);
- if (mounted_read_only)
- ubifs_msg("mounted read-only");
+ err = dbg_debugfs_init_fs(c);
+ if (err)
+ goto out_infos;
+
+ c->mounting = 0;
+
+ ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"%s",
+ c->vi.ubi_num, c->vi.vol_id, c->vi.name,
+ c->ro_mount ? ", R/O mode" : "");
x = (long long)c->main_lebs * c->leb_size;
- ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)",
- x, x >> 10, x >> 20, c->main_lebs);
- x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
- ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)",
- x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt);
- ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr));
- ubifs_msg("media format %d, latest format %d",
- c->fmt_version, UBIFS_FORMAT_VERSION);
-
- dbg_msg("compiled on: " __DATE__ " at " __TIME__);
- dbg_msg("min. I/O unit size: %d bytes", c->min_io_size);
- dbg_msg("LEB size: %d bytes (%d KiB)",
- c->leb_size, c->leb_size / 1024);
- dbg_msg("data journal heads: %d",
+ y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
+ ubifs_msg("LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes",
+ c->leb_size, c->leb_size >> 10, c->min_io_size,
+ c->max_write_size);
+ ubifs_msg("FS size: %lld bytes (%lld MiB, %d LEBs), journal size %lld bytes (%lld MiB, %d LEBs)",
+ x, x >> 20, c->main_lebs,
+ y, y >> 20, c->log_lebs + c->max_bud_cnt);
+ ubifs_msg("reserved for root: %llu bytes (%llu KiB)",
+ c->report_rp_size, c->report_rp_size >> 10);
+ ubifs_msg("media format: w%d/r%d (latest is w%d/r%d), UUID %pUB%s",
+ c->fmt_version, c->ro_compat_version,
+ UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION, c->uuid,
+ c->big_lpt ? ", big LPT model" : ", small LPT model");
+
+ dbg_gen("default compressor: %s", ubifs_compr_name(c->default_compr));
+ dbg_gen("data journal heads: %d",
c->jhead_cnt - NONDATA_JHEADS_CNT);
- dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X"
- "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X",
- c->uuid[0], c->uuid[1], c->uuid[2], c->uuid[3],
- c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7],
- c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11],
- c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]);
- dbg_msg("fast unmount: %d", c->fast_unmount);
- dbg_msg("big_lpt %d", c->big_lpt);
- dbg_msg("log LEBs: %d (%d - %d)",
+ dbg_gen("log LEBs: %d (%d - %d)",
c->log_lebs, UBIFS_LOG_LNUM, c->log_last);
- dbg_msg("LPT area LEBs: %d (%d - %d)",
+ dbg_gen("LPT area LEBs: %d (%d - %d)",
c->lpt_lebs, c->lpt_first, c->lpt_last);
- dbg_msg("orphan area LEBs: %d (%d - %d)",
+ dbg_gen("orphan area LEBs: %d (%d - %d)",
c->orph_lebs, c->orph_first, c->orph_last);
- dbg_msg("main area LEBs: %d (%d - %d)",
+ dbg_gen("main area LEBs: %d (%d - %d)",
c->main_lebs, c->main_first, c->leb_cnt - 1);
- dbg_msg("index LEBs: %d", c->lst.idx_lebs);
- dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)",
- c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20);
- dbg_msg("key hash type: %d", c->key_hash_type);
- dbg_msg("tree fanout: %d", c->fanout);
- dbg_msg("reserved GC LEB: %d", c->gc_lnum);
- dbg_msg("first main LEB: %d", c->main_first);
- dbg_msg("dead watermark: %d", c->dead_wm);
- dbg_msg("dark watermark: %d", c->dark_wm);
+ dbg_gen("index LEBs: %d", c->lst.idx_lebs);
+ dbg_gen("total index bytes: %lld (%lld KiB, %lld MiB)",
+ c->bi.old_idx_sz, c->bi.old_idx_sz >> 10,
+ c->bi.old_idx_sz >> 20);
+ dbg_gen("key hash type: %d", c->key_hash_type);
+ dbg_gen("tree fanout: %d", c->fanout);
+ dbg_gen("reserved GC LEB: %d", c->gc_lnum);
+ dbg_gen("max. znode size %d", c->max_znode_sz);
+ dbg_gen("max. index node size %d", c->max_idx_node_sz);
+ dbg_gen("node sizes: data %zu, inode %zu, dentry %zu",
+ UBIFS_DATA_NODE_SZ, UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ);
+ dbg_gen("node sizes: trun %zu, sb %zu, master %zu",
+ UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ);
+ dbg_gen("node sizes: ref %zu, cmt. start %zu, orph %zu",
+ UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);
+ dbg_gen("max. node sizes: data %zu, inode %zu dentry %zu, idx %d",
+ UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
+ UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout));
+ dbg_gen("dead watermark: %d", c->dead_wm);
+ dbg_gen("dark watermark: %d", c->dark_wm);
+ dbg_gen("LEB overhead: %d", c->leb_overhead);
x = (long long)c->main_lebs * c->dark_wm;
- dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)",
+ dbg_gen("max. dark space: %lld (%lld KiB, %lld MiB)",
x, x >> 10, x >> 20);
- dbg_msg("maximum bud bytes: %lld (%lld KiB, %lld MiB)",
+ dbg_gen("maximum bud bytes: %lld (%lld KiB, %lld MiB)",
c->max_bud_bytes, c->max_bud_bytes >> 10,
c->max_bud_bytes >> 20);
- dbg_msg("BG commit bud bytes: %lld (%lld KiB, %lld MiB)",
+ dbg_gen("BG commit bud bytes: %lld (%lld KiB, %lld MiB)",
c->bg_bud_bytes, c->bg_bud_bytes >> 10,
c->bg_bud_bytes >> 20);
- dbg_msg("current bud bytes %lld (%lld KiB, %lld MiB)",
+ dbg_gen("current bud bytes %lld (%lld KiB, %lld MiB)",
c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20);
- dbg_msg("max. seq. number: %llu", c->max_sqnum);
- dbg_msg("commit number: %llu", c->cmt_no);
+ dbg_gen("max. seq. number: %llu", c->max_sqnum);
+ dbg_gen("commit number: %llu", c->cmt_no);
return 0;
@@ -1202,13 +1485,13 @@ out_wbufs:
free_wbufs(c);
out_cbuf:
kfree(c->cbuf);
-out_dereg:
- dbg_failure_mode_deregistration(c);
out_free:
+ kfree(c->write_reserve_buf);
+ kfree(c->bu.buf);
vfree(c->ileb_buf);
vfree(c->sbuf);
kfree(c->bottom_up_buf);
- UBIFS_DBG(vfree(c->dbg_buf));
+ ubifs_debugging_exit(c);
return err;
}
@@ -1226,6 +1509,7 @@ static void ubifs_umount(struct ubifs_info *c)
dbg_gen("un-mounting UBI device %d, volume %d", c->vi.ubi_num,
c->vi.vol_id);
+ dbg_debugfs_exit_fs(c);
spin_lock(&ubifs_infos_lock);
list_del(&c->infos_list);
spin_unlock(&ubifs_infos_lock);
@@ -1241,11 +1525,12 @@ static void ubifs_umount(struct ubifs_info *c)
kfree(c->cbuf);
kfree(c->rcvrd_mst_node);
kfree(c->mst_node);
+ kfree(c->write_reserve_buf);
+ kfree(c->bu.buf);
+ vfree(c->ileb_buf);
vfree(c->sbuf);
kfree(c->bottom_up_buf);
- UBIFS_DBG(vfree(c->dbg_buf));
- vfree(c->ileb_buf);
- dbg_failure_mode_deregistration(c);
+ ubifs_debugging_exit(c);
}
/**
@@ -1260,19 +1545,29 @@ static int ubifs_remount_rw(struct ubifs_info *c)
{
int err, lnum;
- if (c->ro_media)
- return -EINVAL;
+ if (c->rw_incompat) {
+ ubifs_err("the file-system is not R/W-compatible");
+ ubifs_msg("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d",
+ c->fmt_version, c->ro_compat_version,
+ UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION);
+ return -EROFS;
+ }
mutex_lock(&c->umount_mutex);
+ dbg_save_space_info(c);
c->remounting_rw = 1;
+ c->ro_mount = 0;
- /* Check for enough free space */
- if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) {
- ubifs_err("insufficient available space");
- err = -EINVAL;
- goto out;
+ if (c->space_fixup) {
+ err = ubifs_fixup_free_space(c);
+ if (err)
+ goto out;
}
+ err = check_free_space(c);
+ if (err)
+ goto out;
+
if (c->old_leb_cnt != c->leb_cnt) {
struct ubifs_sb_node *sup;
@@ -1283,6 +1578,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
}
sup->leb_cnt = cpu_to_le32(c->leb_cnt);
err = ubifs_write_sb_node(c, sup);
+ kfree(sup);
if (err)
goto out;
}
@@ -1301,6 +1597,12 @@ static int ubifs_remount_rw(struct ubifs_info *c)
err = ubifs_recover_inl_heads(c, c->sbuf);
if (err)
goto out;
+ } else {
+ /* A readonly mount is not allowed to have orphans */
+ ubifs_assert(c->tot_orphans == 0);
+ err = ubifs_clear_orphans(c);
+ if (err)
+ goto out;
}
if (!(c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY))) {
@@ -1316,32 +1618,32 @@ static int ubifs_remount_rw(struct ubifs_info *c)
goto out;
}
- err = ubifs_lpt_init(c, 0, 1);
- if (err)
+ c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL);
+ if (!c->write_reserve_buf) {
+ err = -ENOMEM;
goto out;
+ }
- err = alloc_wbufs(c);
+ err = ubifs_lpt_init(c, 0, 1);
if (err)
goto out;
- ubifs_create_buds_lists(c);
-
/* Create background thread */
- c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
- if (!c->bgt)
- c->bgt = ERR_PTR(-EINVAL);
+ c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
if (IS_ERR(c->bgt)) {
err = PTR_ERR(c->bgt);
c->bgt = NULL;
ubifs_err("cannot spawn \"%s\", error %d",
c->bgt_name, err);
- return err;
+ goto out;
}
wake_up_process(c->bgt);
c->orph_buf = vmalloc(c->leb_size);
- if (!c->orph_buf)
- return -ENOMEM;
+ if (!c->orph_buf) {
+ err = -ENOMEM;
+ goto out;
+ }
/* Check for enough log space */
lnum = c->lhead_lnum + 1;
@@ -1356,22 +1658,35 @@ static int ubifs_remount_rw(struct ubifs_info *c)
if (c->need_recovery)
err = ubifs_rcvry_gc_commit(c);
else
- err = take_gc_lnum(c);
+ err = ubifs_leb_unmap(c, c->gc_lnum);
if (err)
goto out;
+ dbg_gen("re-mounted read-write");
+ c->remounting_rw = 0;
+
if (c->need_recovery) {
c->need_recovery = 0;
ubifs_msg("deferred recovery completed");
+ } else {
+ /*
+ * Do not run the debugging space check if the were doing
+ * recovery, because when we saved the information we had the
+ * file-system in a state where the TNC and lprops has been
+ * modified in memory, but all the I/O operations (including a
+ * commit) were deferred. So the file-system was in
+ * "non-committed" state. Now the file-system is in committed
+ * state, and of course the amount of free space will change
+ * because, for example, the old index size was imprecise.
+ */
+ err = dbg_check_space_info(c);
}
- dbg_gen("re-mounted read-write");
- c->vfs_sb->s_flags &= ~MS_RDONLY;
- c->remounting_rw = 0;
mutex_unlock(&c->umount_mutex);
- return 0;
+ return err;
out:
+ c->ro_mount = 1;
vfree(c->orph_buf);
c->orph_buf = NULL;
if (c->bgt) {
@@ -1379,6 +1694,8 @@ out:
c->bgt = NULL;
}
free_wbufs(c);
+ kfree(c->write_reserve_buf);
+ c->write_reserve_buf = NULL;
vfree(c->ileb_buf);
c->ileb_buf = NULL;
ubifs_lpt_free(c, 1);
@@ -1388,42 +1705,18 @@ out:
}
/**
- * commit_on_unmount - commit the journal when un-mounting.
- * @c: UBIFS file-system description object
- *
- * This function is called during un-mounting and it commits the journal unless
- * the "fast unmount" mode is enabled. It also avoids committing the journal if
- * it contains too few data.
- *
- * Sometimes recovery requires the journal to be committed at least once, and
- * this function takes care about this.
- */
-static void commit_on_unmount(struct ubifs_info *c)
-{
- if (!c->fast_unmount) {
- long long bud_bytes;
-
- spin_lock(&c->buds_lock);
- bud_bytes = c->bud_bytes;
- spin_unlock(&c->buds_lock);
- if (bud_bytes > c->leb_size)
- ubifs_run_commit(c);
- }
-}
-
-/**
* ubifs_remount_ro - re-mount in read-only mode.
* @c: UBIFS file-system description object
*
- * We rely on VFS to have stopped writing. Possibly the background thread could
- * be running a commit, however kthread_stop will wait in that case.
+ * We assume VFS has stopped writing. Possibly the background thread could be
+ * running a commit, however kthread_stop will wait in that case.
*/
static void ubifs_remount_ro(struct ubifs_info *c)
{
int i, err;
ubifs_assert(!c->need_recovery);
- commit_on_unmount(c);
+ ubifs_assert(!c->ro_mount);
mutex_lock(&c->umount_mutex);
if (c->bgt) {
@@ -1431,27 +1724,29 @@ static void ubifs_remount_ro(struct ubifs_info *c)
c->bgt = NULL;
}
- for (i = 0; i < c->jhead_cnt; i++) {
+ dbg_save_space_info(c);
+
+ for (i = 0; i < c->jhead_cnt; i++)
ubifs_wbuf_sync(&c->jheads[i].wbuf);
- del_timer_sync(&c->jheads[i].wbuf.timer);
- }
- if (!c->ro_media) {
- c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
- c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
- c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum);
- err = ubifs_write_master(c);
- if (err)
- ubifs_ro_mode(c, err);
- }
+ c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
+ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
+ c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum);
+ err = ubifs_write_master(c);
+ if (err)
+ ubifs_ro_mode(c, err);
- ubifs_destroy_idx_gc(c);
- free_wbufs(c);
vfree(c->orph_buf);
c->orph_buf = NULL;
+ kfree(c->write_reserve_buf);
+ c->write_reserve_buf = NULL;
vfree(c->ileb_buf);
c->ileb_buf = NULL;
ubifs_lpt_free(c, 1);
+ c->ro_mount = 1;
+ err = dbg_check_space_info(c);
+ if (err)
+ ubifs_ro_mode(c, err);
mutex_unlock(&c->umount_mutex);
}
@@ -1462,14 +1757,17 @@ static void ubifs_put_super(struct super_block *sb)
ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num,
c->vi.vol_id);
+
/*
* The following asserts are only valid if there has not been a failure
* of the media. For example, there will be dirty inodes if we failed
* to write them back because of I/O errors.
*/
- ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0);
- ubifs_assert(c->budg_idx_growth == 0);
- ubifs_assert(c->budg_data_growth == 0);
+ if (!c->ro_error) {
+ ubifs_assert(c->bi.idx_growth == 0);
+ ubifs_assert(c->bi.dd_growth == 0);
+ ubifs_assert(c->bi.data_growth == 0);
+ }
/*
* The 'c->umount_lock' prevents races between UBIFS memory shrinker
@@ -1478,7 +1776,7 @@ static void ubifs_put_super(struct super_block *sb)
* the mutex is locked.
*/
mutex_lock(&c->umount_mutex);
- if (!(c->vfs_sb->s_flags & MS_RDONLY)) {
+ if (!c->ro_mount) {
/*
* First of all kill the background thread to make sure it does
* not interfere with un-mounting and freeing resources.
@@ -1488,25 +1786,22 @@ static void ubifs_put_super(struct super_block *sb)
c->bgt = NULL;
}
- /* Synchronize write-buffers */
- if (c->jheads)
- for (i = 0; i < c->jhead_cnt; i++) {
- ubifs_wbuf_sync(&c->jheads[i].wbuf);
- del_timer_sync(&c->jheads[i].wbuf.timer);
- }
-
/*
- * On fatal errors c->ro_media is set to 1, in which case we do
+ * On fatal errors c->ro_error is set to 1, in which case we do
* not write the master node.
*/
- if (!c->ro_media) {
+ if (!c->ro_error) {
+ int err;
+
+ /* Synchronize write-buffers */
+ for (i = 0; i < c->jhead_cnt; i++)
+ ubifs_wbuf_sync(&c->jheads[i].wbuf);
+
/*
* We are being cleanly unmounted which means the
* orphans were killed - indicate this in the master
* node. Also save the reserved GC LEB number.
*/
- int err;
-
c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum);
@@ -1517,8 +1812,12 @@ static void ubifs_put_super(struct super_block *sb)
* next mount, so we just print a message and
* continue to unmount normally.
*/
- ubifs_err("failed to write master node, "
- "error %d", err);
+ ubifs_err("failed to write master node, error %d",
+ err);
+ } else {
+ for (i = 0; i < c->jhead_cnt; i++)
+ /* Make sure write-buffer timers are canceled */
+ hrtimer_cancel(&c->jheads[i].wbuf.timer);
}
}
@@ -1526,7 +1825,6 @@ static void ubifs_put_super(struct super_block *sb)
bdi_destroy(&c->bdi);
ubi_close_volume(c->ubi);
mutex_unlock(&c->umount_mutex);
- kfree(c);
}
static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
@@ -1534,6 +1832,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
int err;
struct ubifs_info *c = sb->s_fs_info;
+ sync_filesystem(sb);
dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags);
err = ubifs_parse_options(c, data, 1);
@@ -1541,22 +1840,45 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
ubifs_err("invalid or unknown remount parameter");
return err;
}
- if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
+
+ if (c->ro_mount && !(*flags & MS_RDONLY)) {
+ if (c->ro_error) {
+ ubifs_msg("cannot re-mount R/W due to prior errors");
+ return -EROFS;
+ }
+ if (c->ro_media) {
+ ubifs_msg("cannot re-mount R/W - UBI volume is R/O");
+ return -EROFS;
+ }
err = ubifs_remount_rw(c);
if (err)
return err;
- } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY))
+ } else if (!c->ro_mount && (*flags & MS_RDONLY)) {
+ if (c->ro_error) {
+ ubifs_msg("cannot re-mount R/O due to prior errors");
+ return -EROFS;
+ }
ubifs_remount_ro(c);
+ }
+ if (c->bulk_read == 1)
+ bu_init(c);
+ else {
+ dbg_gen("disable bulk-read");
+ kfree(c->bu.buf);
+ c->bu.buf = NULL;
+ }
+
+ ubifs_assert(c->lst.taken_empty_lebs > 0);
return 0;
}
-struct super_operations ubifs_super_operations = {
+const struct super_operations ubifs_super_operations = {
.alloc_inode = ubifs_alloc_inode,
.destroy_inode = ubifs_destroy_inode,
.put_super = ubifs_put_super,
.write_inode = ubifs_write_inode,
- .delete_inode = ubifs_delete_inode,
+ .evict_inode = ubifs_evict_inode,
.statfs = ubifs_statfs,
.dirty_inode = ubifs_dirty_inode,
.remount_fs = ubifs_remount_fs,
@@ -1569,22 +1891,32 @@ struct super_operations ubifs_super_operations = {
* @name: UBI volume name
* @mode: UBI volume open mode
*
- * There are several ways to specify UBI volumes when mounting UBIFS:
- * o ubiX_Y - UBI device number X, volume Y;
- * o ubiY - UBI device number 0, volume Y;
+ * The primary method of mounting UBIFS is by specifying the UBI volume
+ * character device node path. However, UBIFS may also be mounted withoug any
+ * character device node using one of the following methods:
+ *
+ * o ubiX_Y - mount UBI device number X, volume Y;
+ * o ubiY - mount UBI device number 0, volume Y;
* o ubiX:NAME - mount UBI device X, volume with name NAME;
* o ubi:NAME - mount UBI device 0, volume with name NAME.
*
* Alternative '!' separator may be used instead of ':' (because some shells
* like busybox may interpret ':' as an NFS host name separator). This function
- * returns ubi volume object in case of success and a negative error code in
- * case of failure.
+ * returns UBI volume description object in case of success and a negative
+ * error code in case of failure.
*/
static struct ubi_volume_desc *open_ubi(const char *name, int mode)
{
+ struct ubi_volume_desc *ubi;
int dev, vol;
char *endptr;
+ /* First, try to open using the device node path method */
+ ubi = ubi_open_volume_path(name, mode);
+ if (!IS_ERR(ubi))
+ return ubi;
+
+ /* Try the "nodev" method */
if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i')
return ERR_PTR(-EINVAL);
@@ -1616,85 +1948,94 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode)
return ERR_PTR(-EINVAL);
}
-static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
+static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi)
{
- struct ubi_volume_desc *ubi = sb->s_fs_info;
struct ubifs_info *c;
- struct inode *root;
- int err;
c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL);
- if (!c)
- return -ENOMEM;
+ if (c) {
+ spin_lock_init(&c->cnt_lock);
+ spin_lock_init(&c->cs_lock);
+ spin_lock_init(&c->buds_lock);
+ spin_lock_init(&c->space_lock);
+ spin_lock_init(&c->orphan_lock);
+ init_rwsem(&c->commit_sem);
+ mutex_init(&c->lp_mutex);
+ mutex_init(&c->tnc_mutex);
+ mutex_init(&c->log_mutex);
+ mutex_init(&c->mst_mutex);
+ mutex_init(&c->umount_mutex);
+ mutex_init(&c->bu_mutex);
+ mutex_init(&c->write_reserve_mutex);
+ init_waitqueue_head(&c->cmt_wq);
+ c->buds = RB_ROOT;
+ c->old_idx = RB_ROOT;
+ c->size_tree = RB_ROOT;
+ c->orph_tree = RB_ROOT;
+ INIT_LIST_HEAD(&c->infos_list);
+ INIT_LIST_HEAD(&c->idx_gc);
+ INIT_LIST_HEAD(&c->replay_list);
+ INIT_LIST_HEAD(&c->replay_buds);
+ INIT_LIST_HEAD(&c->uncat_list);
+ INIT_LIST_HEAD(&c->empty_list);
+ INIT_LIST_HEAD(&c->freeable_list);
+ INIT_LIST_HEAD(&c->frdi_idx_list);
+ INIT_LIST_HEAD(&c->unclean_leb_list);
+ INIT_LIST_HEAD(&c->old_buds);
+ INIT_LIST_HEAD(&c->orph_list);
+ INIT_LIST_HEAD(&c->orph_new);
+ c->no_chk_data_crc = 1;
+
+ c->highest_inum = UBIFS_FIRST_INO;
+ c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
+
+ ubi_get_volume_info(ubi, &c->vi);
+ ubi_get_device_info(c->vi.ubi_num, &c->di);
+ }
+ return c;
+}
- spin_lock_init(&c->cnt_lock);
- spin_lock_init(&c->cs_lock);
- spin_lock_init(&c->buds_lock);
- spin_lock_init(&c->space_lock);
- spin_lock_init(&c->orphan_lock);
- init_rwsem(&c->commit_sem);
- mutex_init(&c->lp_mutex);
- mutex_init(&c->tnc_mutex);
- mutex_init(&c->log_mutex);
- mutex_init(&c->mst_mutex);
- mutex_init(&c->umount_mutex);
- init_waitqueue_head(&c->cmt_wq);
- c->buds = RB_ROOT;
- c->old_idx = RB_ROOT;
- c->size_tree = RB_ROOT;
- c->orph_tree = RB_ROOT;
- INIT_LIST_HEAD(&c->infos_list);
- INIT_LIST_HEAD(&c->idx_gc);
- INIT_LIST_HEAD(&c->replay_list);
- INIT_LIST_HEAD(&c->replay_buds);
- INIT_LIST_HEAD(&c->uncat_list);
- INIT_LIST_HEAD(&c->empty_list);
- INIT_LIST_HEAD(&c->freeable_list);
- INIT_LIST_HEAD(&c->frdi_idx_list);
- INIT_LIST_HEAD(&c->unclean_leb_list);
- INIT_LIST_HEAD(&c->old_buds);
- INIT_LIST_HEAD(&c->orph_list);
- INIT_LIST_HEAD(&c->orph_new);
-
- c->highest_inum = UBIFS_FIRST_INO;
- get_random_bytes(&c->vfs_gen, sizeof(int));
- c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
-
- ubi_get_volume_info(ubi, &c->vi);
- ubi_get_device_info(c->vi.ubi_num, &c->di);
+static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct ubifs_info *c = sb->s_fs_info;
+ struct inode *root;
+ int err;
+ c->vfs_sb = sb;
/* Re-open the UBI device in read-write mode */
c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE);
if (IS_ERR(c->ubi)) {
err = PTR_ERR(c->ubi);
- goto out_free;
+ goto out;
}
/*
- * UBIFS provids 'backing_dev_info' in order to disable readahead. For
+ * UBIFS provides 'backing_dev_info' in order to disable read-ahead. For
* UBIFS, I/O is not deferred, it is done immediately in readpage,
* which means the user would have to wait not just for their own I/O
- * but the readahead I/O as well i.e. completely pointless.
+ * but the read-ahead I/O as well i.e. completely pointless.
*
* Read-ahead will be disabled because @c->bdi.ra_pages is 0.
*/
+ c->bdi.name = "ubifs",
c->bdi.capabilities = BDI_CAP_MAP_COPY;
- c->bdi.unplug_io_fn = default_unplug_io_fn;
err = bdi_init(&c->bdi);
if (err)
goto out_close;
+ err = bdi_register(&c->bdi, NULL, "ubifs_%d_%d",
+ c->vi.ubi_num, c->vi.vol_id);
+ if (err)
+ goto out_bdi;
err = ubifs_parse_options(c, data, 0);
if (err)
goto out_bdi;
- c->vfs_sb = sb;
-
+ sb->s_bdi = &c->bdi;
sb->s_fs_info = c;
sb->s_magic = UBIFS_SUPER_MAGIC;
sb->s_blocksize = UBIFS_BLOCK_SIZE;
sb->s_blocksize_bits = UBIFS_BLOCK_SHIFT;
- sb->s_dev = c->vi.cdev;
sb->s_maxbytes = c->max_inode_sz = key_max_inode_size(c);
if (c->max_inode_sz > MAX_LFS_FILESIZE)
sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE;
@@ -1714,16 +2055,15 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
goto out_umount;
}
- sb->s_root = d_alloc_root(root);
- if (!sb->s_root)
- goto out_iput;
+ sb->s_root = d_make_root(root);
+ if (!sb->s_root) {
+ err = -ENOMEM;
+ goto out_umount;
+ }
mutex_unlock(&c->umount_mutex);
-
return 0;
-out_iput:
- iput(root);
out_umount:
ubifs_umount(c);
out_unlock:
@@ -1732,31 +2072,29 @@ out_bdi:
bdi_destroy(&c->bdi);
out_close:
ubi_close_volume(c->ubi);
-out_free:
- kfree(c);
+out:
return err;
}
static int sb_test(struct super_block *sb, void *data)
{
- dev_t *dev = data;
+ struct ubifs_info *c1 = data;
+ struct ubifs_info *c = sb->s_fs_info;
- return sb->s_dev == *dev;
+ return c->vi.cdev == c1->vi.cdev;
}
static int sb_set(struct super_block *sb, void *data)
{
- dev_t *dev = data;
-
- sb->s_dev = *dev;
- return 0;
+ sb->s_fs_info = data;
+ return set_anon_super(sb, NULL);
}
-static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
- const char *name, void *data, struct vfsmount *mnt)
+static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
+ const char *name, void *data)
{
struct ubi_volume_desc *ubi;
- struct ubi_volume_info vi;
+ struct ubifs_info *c;
struct super_block *sb;
int err;
@@ -1771,32 +2109,34 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
if (IS_ERR(ubi)) {
ubifs_err("cannot open \"%s\", error %d",
name, (int)PTR_ERR(ubi));
- return PTR_ERR(ubi);
+ return ERR_CAST(ubi);
}
- ubi_get_volume_info(ubi, &vi);
- dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id);
+ c = alloc_ubifs_info(ubi);
+ if (!c) {
+ err = -ENOMEM;
+ goto out_close;
+ }
- sb = sget(fs_type, &sb_test, &sb_set, &vi.cdev);
+ dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
+
+ sb = sget(fs_type, sb_test, sb_set, flags, c);
if (IS_ERR(sb)) {
err = PTR_ERR(sb);
+ kfree(c);
goto out_close;
}
if (sb->s_root) {
+ struct ubifs_info *c1 = sb->s_fs_info;
+ kfree(c);
/* A new mount point for already mounted UBIFS */
dbg_gen("this ubi volume is already mounted");
- if ((flags ^ sb->s_flags) & MS_RDONLY) {
+ if (!!(flags & MS_RDONLY) != c1->ro_mount) {
err = -EBUSY;
goto out_deact;
}
} else {
- sb->s_flags = flags;
- /*
- * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is
- * replaced by 'c'.
- */
- sb->s_fs_info = ubi;
err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
if (err)
goto out_deact;
@@ -1807,36 +2147,29 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
/* 'fill_super()' opens ubi again so we must close it here */
ubi_close_volume(ubi);
- return simple_set_mnt(mnt, sb);
+ return dget(sb->s_root);
out_deact:
- up_write(&sb->s_umount);
- deactivate_super(sb);
+ deactivate_locked_super(sb);
out_close:
ubi_close_volume(ubi);
- return err;
+ return ERR_PTR(err);
}
-static void ubifs_kill_sb(struct super_block *sb)
+static void kill_ubifs_super(struct super_block *s)
{
- struct ubifs_info *c = sb->s_fs_info;
-
- /*
- * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()'
- * in order to be outside BKL.
- */
- if (sb->s_root && !(sb->s_flags & MS_RDONLY))
- commit_on_unmount(c);
- /* The un-mount routine is actually done in put_super() */
- generic_shutdown_super(sb);
+ struct ubifs_info *c = s->s_fs_info;
+ kill_anon_super(s);
+ kfree(c);
}
static struct file_system_type ubifs_fs_type = {
.name = "ubifs",
.owner = THIS_MODULE,
- .get_sb = ubifs_get_sb,
- .kill_sb = ubifs_kill_sb
+ .mount = ubifs_mount,
+ .kill_sb = kill_ubifs_super,
};
+MODULE_ALIAS_FS("ubifs");
/*
* Inode slab cache constructor.
@@ -1891,43 +2224,54 @@ static int __init ubifs_init(void)
BUILD_BUG_ON(UBIFS_REF_NODE_SZ != 64);
/*
+ * We use 2 bit wide bit-fields to store compression type, which should
+ * be amended if more compressors are added. The bit-fields are:
+ * @compr_type in 'struct ubifs_inode', @default_compr in
+ * 'struct ubifs_info' and @compr_type in 'struct ubifs_mount_opts'.
+ */
+ BUILD_BUG_ON(UBIFS_COMPR_TYPES_CNT > 4);
+
+ /*
* We require that PAGE_CACHE_SIZE is greater-than-or-equal-to
* UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2.
*/
if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) {
- ubifs_err("VFS page cache size is %u bytes, but UBIFS requires"
- " at least 4096 bytes",
+ ubifs_err("VFS page cache size is %u bytes, but UBIFS requires at least 4096 bytes",
(unsigned int)PAGE_CACHE_SIZE);
return -EINVAL;
}
- err = register_filesystem(&ubifs_fs_type);
- if (err) {
- ubifs_err("cannot register file system, error %d", err);
- return err;
- }
-
- err = -ENOMEM;
ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab",
sizeof(struct ubifs_inode), 0,
SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT,
&inode_slab_ctor);
if (!ubifs_inode_slab)
- goto out_reg;
+ return -ENOMEM;
register_shrinker(&ubifs_shrinker_info);
err = ubifs_compressors_init();
if (err)
+ goto out_shrinker;
+
+ err = dbg_debugfs_init();
+ if (err)
goto out_compr;
+ err = register_filesystem(&ubifs_fs_type);
+ if (err) {
+ ubifs_err("cannot register file system, error %d", err);
+ goto out_dbg;
+ }
return 0;
+out_dbg:
+ dbg_debugfs_exit();
out_compr:
+ ubifs_compressors_exit();
+out_shrinker:
unregister_shrinker(&ubifs_shrinker_info);
kmem_cache_destroy(ubifs_inode_slab);
-out_reg:
- unregister_filesystem(&ubifs_fs_type);
return err;
}
/* late_initcall to let compressors initialize first */
@@ -1938,8 +2282,15 @@ static void __exit ubifs_exit(void)
ubifs_assert(list_empty(&ubifs_infos));
ubifs_assert(atomic_long_read(&ubifs_clean_zn_cnt) == 0);
+ dbg_debugfs_exit();
ubifs_compressors_exit();
unregister_shrinker(&ubifs_shrinker_info);
+
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(ubifs_inode_slab);
unregister_filesystem(&ubifs_fs_type);
}
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index e909f4a9644..8a40cf9c02d 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -31,6 +31,7 @@
*/
#include <linux/crc32.h>
+#include <linux/slab.h>
#include "ubifs.h"
/*
@@ -177,27 +178,11 @@ static int ins_clr_old_idx_znode(struct ubifs_info *c,
*/
void destroy_old_idx(struct ubifs_info *c)
{
- struct rb_node *this = c->old_idx.rb_node;
- struct ubifs_old_idx *old_idx;
+ struct ubifs_old_idx *old_idx, *n;
- while (this) {
- if (this->rb_left) {
- this = this->rb_left;
- continue;
- } else if (this->rb_right) {
- this = this->rb_right;
- continue;
- }
- old_idx = rb_entry(this, struct ubifs_old_idx, rb);
- this = rb_parent(this);
- if (this) {
- if (this->rb_left == &old_idx->rb)
- this->rb_left = NULL;
- else
- this->rb_right = NULL;
- }
+ rbtree_postorder_for_each_entry_safe(old_idx, n, &c->old_idx, rb)
kfree(old_idx);
- }
+
c->old_idx = RB_ROOT;
}
@@ -222,7 +207,7 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c,
__set_bit(DIRTY_ZNODE, &zn->flags);
__clear_bit(COW_ZNODE, &zn->flags);
- ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags));
+ ubifs_assert(!ubifs_zn_obsolete(znode));
__set_bit(OBSOLETE_ZNODE, &znode->flags);
if (znode->level != 0) {
@@ -270,7 +255,7 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c,
struct ubifs_znode *zn;
int err;
- if (!test_bit(COW_ZNODE, &znode->flags)) {
+ if (!ubifs_zn_cow(znode)) {
/* znode is not being committed */
if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) {
atomic_long_inc(&c->dirty_zn_cnt);
@@ -284,7 +269,7 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c,
}
zn = copy_znode(c, znode);
- if (unlikely(IS_ERR(zn)))
+ if (IS_ERR(zn))
return zn;
if (zbr->len) {
@@ -338,17 +323,16 @@ static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr,
err = ubifs_validate_entry(c, dent);
if (err) {
- dbg_dump_stack();
- dbg_dump_node(c, dent);
+ dump_stack();
+ ubifs_dump_node(c, dent);
return err;
}
- lnc_node = kmalloc(zbr->len, GFP_NOFS);
+ lnc_node = kmemdup(node, zbr->len, GFP_NOFS);
if (!lnc_node)
/* We don't have to have the cache, so no error */
return 0;
- memcpy(lnc_node, node, zbr->len);
zbr->leaf = lnc_node;
return 0;
}
@@ -372,8 +356,8 @@ static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr,
err = ubifs_validate_entry(c, node);
if (err) {
- dbg_dump_stack();
- dbg_dump_node(c, node);
+ dump_stack();
+ ubifs_dump_node(c, node);
return err;
}
@@ -443,6 +427,14 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr,
* This function performs that same function as ubifs_read_node except that
* it does not require that there is actually a node present and instead
* the return code indicates if a node was read.
+ *
+ * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc
+ * is true (it is controlled by corresponding mount option). However, if
+ * @c->mounting or @c->remounting_rw is true (we are mounting or re-mounting to
+ * R/W mode), @c->no_chk_data_crc is ignored and CRC is checked. This is
+ * because during mounting or re-mounting from R/O mode to R/W mode we may read
+ * journal nodes (when replying the journal or doing the recovery) and the
+ * journal nodes may potentially be corrupted, so checking is required.
*/
static int try_read_node(const struct ubifs_info *c, void *buf, int type,
int len, int lnum, int offs)
@@ -453,7 +445,7 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
- err = ubi_read(c->ubi, lnum, buf, offs, len);
+ err = ubifs_leb_read(c, lnum, buf, offs, len, 1);
if (err) {
ubifs_err("cannot read node type %d from LEB %d:%d, error %d",
type, lnum, offs, err);
@@ -470,6 +462,10 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
if (node_len != len)
return 0;
+ if (type == UBIFS_DATA_NODE && c->no_chk_data_crc && !c->mounting &&
+ !c->remounting_rw)
+ return 1;
+
crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
node_crc = le32_to_cpu(ch->crc);
if (crc != node_crc)
@@ -493,7 +489,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
{
int ret;
- dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key));
+ dbg_tnck(key, "LEB %d:%d, key ", zbr->lnum, zbr->offs);
ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum,
zbr->offs);
@@ -506,9 +502,9 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
if (keys_cmp(c, key, &node_key) != 0)
ret = 0;
}
- if (ret == 0)
- dbg_mnt("dangling branch LEB %d:%d len %d, key %s",
- zbr->lnum, zbr->offs, zbr->len, DBGKEY(key));
+ if (ret == 0 && c->replaying)
+ dbg_mntk(key, "dangling branch LEB %d:%d len %d, key ",
+ zbr->lnum, zbr->offs, zbr->len);
return ret;
}
@@ -983,9 +979,9 @@ static int fallible_resolve_collision(struct ubifs_info *c,
if (adding || !o_znode)
return 0;
- dbg_mnt("dangling match LEB %d:%d len %d %s",
+ dbg_mntk(key, "dangling match LEB %d:%d len %d key ",
o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs,
- o_znode->zbranch[o_n].len, DBGKEY(key));
+ o_znode->zbranch[o_n].len);
*zn = o_znode;
*n = o_n;
return 1;
@@ -1128,7 +1124,7 @@ static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c,
ubifs_assert(znode == c->zroot.znode);
znode = dirty_cow_znode(c, &c->zroot);
}
- if (unlikely(IS_ERR(znode)) || !p)
+ if (IS_ERR(znode) || !p)
break;
ubifs_assert(path[p - 1] >= 0);
ubifs_assert(path[p - 1] < znode->child_cnt);
@@ -1151,8 +1147,8 @@ static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c,
* o exact match, i.e. the found zero-level znode contains key @key, then %1
* is returned and slot number of the matched branch is stored in @n;
* o not exact match, which means that zero-level znode does not contain
- * @key, then %0 is returned and slot number of the closed branch is stored
- * in @n;
+ * @key, then %0 is returned and slot number of the closest branch is stored
+ * in @n;
* o @key is so small that it is even less than the lowest key of the
* leftmost zero-level node, then %0 is returned and %0 is stored in @n.
*
@@ -1167,7 +1163,8 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
struct ubifs_znode *znode;
unsigned long time = get_seconds();
- dbg_tnc("search key %s", DBGKEY(key));
+ dbg_tnck(key, "search key ");
+ ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY);
znode = c->zroot.znode;
if (unlikely(!znode)) {
@@ -1244,7 +1241,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
* splitting in the middle of the colliding sequence. Also, when
* removing the leftmost key, we would have to correct the key of the
* parent node, which would introduce additional complications. Namely,
- * if we changed the the leftmost key of the parent znode, the garbage
+ * if we changed the leftmost key of the parent znode, the garbage
* collector would be unable to find it (GC is doing this when GC'ing
* indexing LEBs). Although we already have an additional RB-tree where
* we save such changed znodes (see 'ins_clr_old_idx_znode()') until
@@ -1302,7 +1299,7 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key,
struct ubifs_znode *znode;
unsigned long time = get_seconds();
- dbg_tnc("search and dirty key %s", DBGKEY(key));
+ dbg_tnck(key, "search and dirty key ");
znode = c->zroot.znode;
if (unlikely(!znode)) {
@@ -1382,23 +1379,62 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key,
}
/**
- * ubifs_tnc_lookup - look up a file-system node.
+ * maybe_leb_gced - determine if a LEB may have been garbage collected.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number
+ * @gc_seq1: garbage collection sequence number
+ *
+ * This function determines if @lnum may have been garbage collected since
+ * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise
+ * %0 is returned.
+ */
+static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1)
+{
+ int gc_seq2, gced_lnum;
+
+ gced_lnum = c->gced_lnum;
+ smp_rmb();
+ gc_seq2 = c->gc_seq;
+ /* Same seq means no GC */
+ if (gc_seq1 == gc_seq2)
+ return 0;
+ /* Different by more than 1 means we don't know */
+ if (gc_seq1 + 1 != gc_seq2)
+ return 1;
+ /*
+ * We have seen the sequence number has increased by 1. Now we need to
+ * be sure we read the right LEB number, so read it again.
+ */
+ smp_rmb();
+ if (gced_lnum != c->gced_lnum)
+ return 1;
+ /* Finally we can check lnum */
+ if (gced_lnum == lnum)
+ return 1;
+ return 0;
+}
+
+/**
+ * ubifs_tnc_locate - look up a file-system node and return it and its location.
* @c: UBIFS file-system description object
* @key: node key to lookup
* @node: the node is returned here
+ * @lnum: LEB number is returned here
+ * @offs: offset is returned here
*
- * This function look up and reads node with key @key. The caller has to make
+ * This function looks up and reads node with key @key. The caller has to make
* sure the @node buffer is large enough to fit the node. Returns zero in case
* of success, %-ENOENT if the node was not found, and a negative error code in
- * case of failure.
+ * case of failure. The node location can be returned in @lnum and @offs.
*/
-int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
- void *node)
+int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
+ void *node, int *lnum, int *offs)
{
- int found, n, err;
+ int found, n, err, safely = 0, gc_seq1;
struct ubifs_znode *znode;
struct ubifs_zbranch zbr, *zt;
+again:
mutex_lock(&c->tnc_mutex);
found = ubifs_lookup_level0(c, key, &znode, &n);
if (!found) {
@@ -1409,6 +1445,10 @@ int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
goto out;
}
zt = &znode->zbranch[n];
+ if (lnum) {
+ *lnum = zt->lnum;
+ *offs = zt->offs;
+ }
if (is_hash_key(c, key)) {
/*
* In this case the leaf node cache gets used, so we pass the
@@ -1417,11 +1457,31 @@ int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
err = tnc_read_node_nm(c, zt, node);
goto out;
}
+ if (safely) {
+ err = ubifs_tnc_read_node(c, zt, node);
+ goto out;
+ }
+ /* Drop the TNC mutex prematurely and race with garbage collection */
zbr = znode->zbranch[n];
+ gc_seq1 = c->gc_seq;
mutex_unlock(&c->tnc_mutex);
- err = ubifs_tnc_read_node(c, &zbr, node);
- return err;
+ if (ubifs_get_wbuf(c, zbr.lnum)) {
+ /* We do not GC journal heads */
+ err = ubifs_tnc_read_node(c, &zbr, node);
+ return err;
+ }
+
+ err = fallible_read_node(c, key, &zbr, node);
+ if (err <= 0 || maybe_leb_gced(c, zbr.lnum, gc_seq1)) {
+ /*
+ * The node may have been GC'ed out from under us so try again
+ * while keeping the TNC mutex locked.
+ */
+ safely = 1;
+ goto again;
+ }
+ return 0;
out:
mutex_unlock(&c->tnc_mutex);
@@ -1429,58 +1489,294 @@ out:
}
/**
- * ubifs_tnc_locate - look up a file-system node and return it and its location.
+ * ubifs_tnc_get_bu_keys - lookup keys for bulk-read.
* @c: UBIFS file-system description object
- * @key: node key to lookup
- * @node: the node is returned here
- * @lnum: LEB number is returned here
- * @offs: offset is returned here
+ * @bu: bulk-read parameters and results
*
- * This function is the same as 'ubifs_tnc_lookup()' but it returns the node
- * location also. See 'ubifs_tnc_lookup()'.
+ * Lookup consecutive data node keys for the same inode that reside
+ * consecutively in the same LEB. This function returns zero in case of success
+ * and a negative error code in case of failure.
+ *
+ * Note, if the bulk-read buffer length (@bu->buf_len) is known, this function
+ * makes sure bulk-read nodes fit the buffer. Otherwise, this function prepares
+ * maximum possible amount of nodes for bulk-read.
*/
-int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
- void *node, int *lnum, int *offs)
+int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu)
{
- int found, n, err;
+ int n, err = 0, lnum = -1, uninitialized_var(offs);
+ int uninitialized_var(len);
+ unsigned int block = key_block(c, &bu->key);
struct ubifs_znode *znode;
- struct ubifs_zbranch zbr, *zt;
+
+ bu->cnt = 0;
+ bu->blk_cnt = 0;
+ bu->eof = 0;
mutex_lock(&c->tnc_mutex);
- found = ubifs_lookup_level0(c, key, &znode, &n);
- if (!found) {
- err = -ENOENT;
- goto out;
- } else if (found < 0) {
- err = found;
+ /* Find first key */
+ err = ubifs_lookup_level0(c, &bu->key, &znode, &n);
+ if (err < 0)
goto out;
+ if (err) {
+ /* Key found */
+ len = znode->zbranch[n].len;
+ /* The buffer must be big enough for at least 1 node */
+ if (len > bu->buf_len) {
+ err = -EINVAL;
+ goto out;
+ }
+ /* Add this key */
+ bu->zbranch[bu->cnt++] = znode->zbranch[n];
+ bu->blk_cnt += 1;
+ lnum = znode->zbranch[n].lnum;
+ offs = ALIGN(znode->zbranch[n].offs + len, 8);
}
- zt = &znode->zbranch[n];
- if (is_hash_key(c, key)) {
- /*
- * In this case the leaf node cache gets used, so we pass the
- * address of the zbranch and keep the mutex locked
- */
- *lnum = zt->lnum;
- *offs = zt->offs;
- err = tnc_read_node_nm(c, zt, node);
- goto out;
+ while (1) {
+ struct ubifs_zbranch *zbr;
+ union ubifs_key *key;
+ unsigned int next_block;
+
+ /* Find next key */
+ err = tnc_next(c, &znode, &n);
+ if (err)
+ goto out;
+ zbr = &znode->zbranch[n];
+ key = &zbr->key;
+ /* See if there is another data key for this file */
+ if (key_inum(c, key) != key_inum(c, &bu->key) ||
+ key_type(c, key) != UBIFS_DATA_KEY) {
+ err = -ENOENT;
+ goto out;
+ }
+ if (lnum < 0) {
+ /* First key found */
+ lnum = zbr->lnum;
+ offs = ALIGN(zbr->offs + zbr->len, 8);
+ len = zbr->len;
+ if (len > bu->buf_len) {
+ err = -EINVAL;
+ goto out;
+ }
+ } else {
+ /*
+ * The data nodes must be in consecutive positions in
+ * the same LEB.
+ */
+ if (zbr->lnum != lnum || zbr->offs != offs)
+ goto out;
+ offs += ALIGN(zbr->len, 8);
+ len = ALIGN(len, 8) + zbr->len;
+ /* Must not exceed buffer length */
+ if (len > bu->buf_len)
+ goto out;
+ }
+ /* Allow for holes */
+ next_block = key_block(c, key);
+ bu->blk_cnt += (next_block - block - 1);
+ if (bu->blk_cnt >= UBIFS_MAX_BULK_READ)
+ goto out;
+ block = next_block;
+ /* Add this key */
+ bu->zbranch[bu->cnt++] = *zbr;
+ bu->blk_cnt += 1;
+ /* See if we have room for more */
+ if (bu->cnt >= UBIFS_MAX_BULK_READ)
+ goto out;
+ if (bu->blk_cnt >= UBIFS_MAX_BULK_READ)
+ goto out;
}
- zbr = znode->zbranch[n];
+out:
+ if (err == -ENOENT) {
+ bu->eof = 1;
+ err = 0;
+ }
+ bu->gc_seq = c->gc_seq;
mutex_unlock(&c->tnc_mutex);
+ if (err)
+ return err;
+ /*
+ * An enormous hole could cause bulk-read to encompass too many
+ * page cache pages, so limit the number here.
+ */
+ if (bu->blk_cnt > UBIFS_MAX_BULK_READ)
+ bu->blk_cnt = UBIFS_MAX_BULK_READ;
+ /*
+ * Ensure that bulk-read covers a whole number of page cache
+ * pages.
+ */
+ if (UBIFS_BLOCKS_PER_PAGE == 1 ||
+ !(bu->blk_cnt & (UBIFS_BLOCKS_PER_PAGE - 1)))
+ return 0;
+ if (bu->eof) {
+ /* At the end of file we can round up */
+ bu->blk_cnt += UBIFS_BLOCKS_PER_PAGE - 1;
+ return 0;
+ }
+ /* Exclude data nodes that do not make up a whole page cache page */
+ block = key_block(c, &bu->key) + bu->blk_cnt;
+ block &= ~(UBIFS_BLOCKS_PER_PAGE - 1);
+ while (bu->cnt) {
+ if (key_block(c, &bu->zbranch[bu->cnt - 1].key) < block)
+ break;
+ bu->cnt -= 1;
+ }
+ return 0;
+}
- *lnum = zbr.lnum;
- *offs = zbr.offs;
+/**
+ * read_wbuf - bulk-read from a LEB with a wbuf.
+ * @wbuf: wbuf that may overlap the read
+ * @buf: buffer into which to read
+ * @len: read length
+ * @lnum: LEB number from which to read
+ * @offs: offset from which to read
+ *
+ * This functions returns %0 on success or a negative error code on failure.
+ */
+static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum,
+ int offs)
+{
+ const struct ubifs_info *c = wbuf->c;
+ int rlen, overlap;
- err = ubifs_tnc_read_node(c, &zbr, node);
- return err;
+ dbg_io("LEB %d:%d, length %d", lnum, offs, len);
+ ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
+ ubifs_assert(!(offs & 7) && offs < c->leb_size);
+ ubifs_assert(offs + len <= c->leb_size);
+ spin_lock(&wbuf->lock);
+ overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs);
+ if (!overlap) {
+ /* We may safely unlock the write-buffer and read the data */
+ spin_unlock(&wbuf->lock);
+ return ubifs_leb_read(c, lnum, buf, offs, len, 0);
+ }
+
+ /* Don't read under wbuf */
+ rlen = wbuf->offs - offs;
+ if (rlen < 0)
+ rlen = 0;
+
+ /* Copy the rest from the write-buffer */
+ memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen);
+ spin_unlock(&wbuf->lock);
+
+ if (rlen > 0)
+ /* Read everything that goes before write-buffer */
+ return ubifs_leb_read(c, lnum, buf, offs, rlen, 0);
+
+ return 0;
+}
+
+/**
+ * validate_data_node - validate data nodes for bulk-read.
+ * @c: UBIFS file-system description object
+ * @buf: buffer containing data node to validate
+ * @zbr: zbranch of data node to validate
+ *
+ * This functions returns %0 on success or a negative error code on failure.
+ */
+static int validate_data_node(struct ubifs_info *c, void *buf,
+ struct ubifs_zbranch *zbr)
+{
+ union ubifs_key key1;
+ struct ubifs_ch *ch = buf;
+ int err, len;
+
+ if (ch->node_type != UBIFS_DATA_NODE) {
+ ubifs_err("bad node type (%d but expected %d)",
+ ch->node_type, UBIFS_DATA_NODE);
+ goto out_err;
+ }
+
+ err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0);
+ if (err) {
+ ubifs_err("expected node type %d", UBIFS_DATA_NODE);
+ goto out;
+ }
+
+ len = le32_to_cpu(ch->len);
+ if (len != zbr->len) {
+ ubifs_err("bad node length %d, expected %d", len, zbr->len);
+ goto out_err;
+ }
+
+ /* Make sure the key of the read node is correct */
+ key_read(c, buf + UBIFS_KEY_OFFSET, &key1);
+ if (!keys_eq(c, &zbr->key, &key1)) {
+ ubifs_err("bad key in node at LEB %d:%d",
+ zbr->lnum, zbr->offs);
+ dbg_tnck(&zbr->key, "looked for key ");
+ dbg_tnck(&key1, "found node's key ");
+ goto out_err;
+ }
+
+ return 0;
+
+out_err:
+ err = -EINVAL;
out:
- mutex_unlock(&c->tnc_mutex);
+ ubifs_err("bad node at LEB %d:%d", zbr->lnum, zbr->offs);
+ ubifs_dump_node(c, buf);
+ dump_stack();
return err;
}
/**
+ * ubifs_tnc_bulk_read - read a number of data nodes in one go.
+ * @c: UBIFS file-system description object
+ * @bu: bulk-read parameters and results
+ *
+ * This functions reads and validates the data nodes that were identified by the
+ * 'ubifs_tnc_get_bu_keys()' function. This functions returns %0 on success,
+ * -EAGAIN to indicate a race with GC, or another negative error code on
+ * failure.
+ */
+int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu)
+{
+ int lnum = bu->zbranch[0].lnum, offs = bu->zbranch[0].offs, len, err, i;
+ struct ubifs_wbuf *wbuf;
+ void *buf;
+
+ len = bu->zbranch[bu->cnt - 1].offs;
+ len += bu->zbranch[bu->cnt - 1].len - offs;
+ if (len > bu->buf_len) {
+ ubifs_err("buffer too small %d vs %d", bu->buf_len, len);
+ return -EINVAL;
+ }
+
+ /* Do the read */
+ wbuf = ubifs_get_wbuf(c, lnum);
+ if (wbuf)
+ err = read_wbuf(wbuf, bu->buf, len, lnum, offs);
+ else
+ err = ubifs_leb_read(c, lnum, bu->buf, offs, len, 0);
+
+ /* Check for a race with GC */
+ if (maybe_leb_gced(c, lnum, bu->gc_seq))
+ return -EAGAIN;
+
+ if (err && err != -EBADMSG) {
+ ubifs_err("failed to read from LEB %d:%d, error %d",
+ lnum, offs, err);
+ dump_stack();
+ dbg_tnck(&bu->key, "key ");
+ return err;
+ }
+
+ /* Validate the nodes read */
+ buf = bu->buf;
+ for (i = 0; i < bu->cnt; i++) {
+ err = validate_data_node(c, buf, &bu->zbranch[i]);
+ if (err)
+ return err;
+ buf = buf + ALIGN(bu->zbranch[i].len, 8);
+ }
+
+ return 0;
+}
+
+/**
* do_lookup_nm- look up a "hashed" node.
* @c: UBIFS file-system description object
* @key: node key to lookup
@@ -1498,9 +1794,8 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
{
int found, n, err;
struct ubifs_znode *znode;
- struct ubifs_zbranch zbr;
- dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key));
+ dbg_tnck(key, "name '%.*s' key ", nm->len, nm->name);
mutex_lock(&c->tnc_mutex);
found = ubifs_lookup_level0(c, key, &znode, &n);
if (!found) {
@@ -1522,11 +1817,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
goto out_unlock;
}
- zbr = znode->zbranch[n];
- mutex_unlock(&c->tnc_mutex);
-
- err = tnc_read_node_nm(c, &zbr, node);
- return err;
+ err = tnc_read_node_nm(c, &znode->zbranch[n], node);
out_unlock:
mutex_unlock(&c->tnc_mutex);
@@ -1669,7 +1960,7 @@ static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode,
{
struct ubifs_znode *zn, *zi, *zp;
int i, keep, move, appending = 0;
- union ubifs_key *key = &zbr->key;
+ union ubifs_key *key = &zbr->key, *key1;
ubifs_assert(n >= 0 && n <= c->fanout);
@@ -1678,8 +1969,7 @@ again:
zp = znode->parent;
if (znode->child_cnt < c->fanout) {
ubifs_assert(n != c->fanout);
- dbg_tnc("inserted at %d level %d, key %s", n, znode->level,
- DBGKEY(key));
+ dbg_tnck(key, "inserted at %d level %d, key ", n, znode->level);
insert_zbranch(znode, zbr, n);
@@ -1694,7 +1984,7 @@ again:
* Unfortunately, @znode does not have more empty slots and we have to
* split it.
*/
- dbg_tnc("splitting level %d, key %s", znode->level, DBGKEY(key));
+ dbg_tnck(key, "splitting level %d, key ", znode->level);
if (znode->alt)
/*
@@ -1710,20 +2000,33 @@ again:
zn->level = znode->level;
/* Decide where to split */
- if (znode->level == 0 && n == c->fanout &&
- key_type(c, key) == UBIFS_DATA_KEY) {
- union ubifs_key *key1;
-
- /*
- * If this is an inode which is being appended - do not split
- * it because no other zbranches can be inserted between
- * zbranches of consecutive data nodes anyway.
- */
- key1 = &znode->zbranch[n - 1].key;
- if (key_inum(c, key1) == key_inum(c, key) &&
- key_type(c, key1) == UBIFS_DATA_KEY &&
- key_block(c, key1) == key_block(c, key) - 1)
- appending = 1;
+ if (znode->level == 0 && key_type(c, key) == UBIFS_DATA_KEY) {
+ /* Try not to split consecutive data keys */
+ if (n == c->fanout) {
+ key1 = &znode->zbranch[n - 1].key;
+ if (key_inum(c, key1) == key_inum(c, key) &&
+ key_type(c, key1) == UBIFS_DATA_KEY)
+ appending = 1;
+ } else
+ goto check_split;
+ } else if (appending && n != c->fanout) {
+ /* Try not to split consecutive data keys */
+ appending = 0;
+check_split:
+ if (n >= (c->fanout + 1) / 2) {
+ key1 = &znode->zbranch[0].key;
+ if (key_inum(c, key1) == key_inum(c, key) &&
+ key_type(c, key1) == UBIFS_DATA_KEY) {
+ key1 = &znode->zbranch[n].key;
+ if (key_inum(c, key1) != key_inum(c, key) ||
+ key_type(c, key1) != UBIFS_DATA_KEY) {
+ keep = n;
+ move = c->fanout - keep;
+ zi = znode;
+ goto do_split;
+ }
+ }
+ }
}
if (appending) {
@@ -1753,6 +2056,8 @@ again:
zbr->znode->parent = zn;
}
+do_split:
+
__set_bit(DIRTY_ZNODE, &zn->flags);
atomic_long_inc(&c->dirty_zn_cnt);
@@ -1773,20 +2078,17 @@ again:
}
/* Insert new key and branch */
- dbg_tnc("inserting at %d level %d, key %s", n, zn->level, DBGKEY(key));
+ dbg_tnck(key, "inserting at %d level %d, key ", n, zn->level);
insert_zbranch(zi, zbr, n);
/* Insert new znode (produced by spitting) into the parent */
if (zp) {
- i = n;
+ if (n == 0 && zi == znode && znode->iip == 0)
+ correct_parent_keys(c, znode);
+
/* Locate insertion point */
n = znode->iip + 1;
- if (appending && n != c->fanout)
- appending = 0;
-
- if (i == 0 && zi == znode && znode->iip == 0)
- correct_parent_keys(c, znode);
/* Tail recursion */
zbr->key = zn->zbranch[0].key;
@@ -1852,7 +2154,7 @@ int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum,
struct ubifs_znode *znode;
mutex_lock(&c->tnc_mutex);
- dbg_tnc("%d:%d, len %d, key %s", lnum, offs, len, DBGKEY(key));
+ dbg_tnck(key, "%d:%d, len %d, key ", lnum, offs, len);
found = lookup_level0_dirty(c, key, &znode, &n);
if (!found) {
struct ubifs_zbranch zbr;
@@ -1901,8 +2203,8 @@ int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key,
struct ubifs_znode *znode;
mutex_lock(&c->tnc_mutex);
- dbg_tnc("old LEB %d:%d, new LEB %d:%d, len %d, key %s", old_lnum,
- old_offs, lnum, offs, len, DBGKEY(key));
+ dbg_tnck(key, "old LEB %d:%d, new LEB %d:%d, len %d, key ", old_lnum,
+ old_offs, lnum, offs, len);
found = lookup_level0_dirty(c, key, &znode, &n);
if (found < 0) {
err = found;
@@ -1935,12 +2237,11 @@ int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key,
if (found) {
/* Ensure the znode is dirtied */
if (znode->cnext || !ubifs_zn_dirty(znode)) {
- znode = dirty_cow_bottom_up(c,
- znode);
- if (IS_ERR(znode)) {
- err = PTR_ERR(znode);
- goto out_unlock;
- }
+ znode = dirty_cow_bottom_up(c, znode);
+ if (IS_ERR(znode)) {
+ err = PTR_ERR(znode);
+ goto out_unlock;
+ }
}
zbr = &znode->zbranch[n];
lnc_free(zbr);
@@ -1985,8 +2286,8 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
struct ubifs_znode *znode;
mutex_lock(&c->tnc_mutex);
- dbg_tnc("LEB %d:%d, name '%.*s', key %s", lnum, offs, nm->len, nm->name,
- DBGKEY(key));
+ dbg_tnck(key, "LEB %d:%d, name '%.*s', key ",
+ lnum, offs, nm->len, nm->name);
found = lookup_level0_dirty(c, key, &znode, &n);
if (found < 0) {
err = found;
@@ -2007,11 +2308,11 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
/* Ensure the znode is dirtied */
if (znode->cnext || !ubifs_zn_dirty(znode)) {
- znode = dirty_cow_bottom_up(c, znode);
- if (IS_ERR(znode)) {
- err = PTR_ERR(znode);
- goto out_unlock;
- }
+ znode = dirty_cow_bottom_up(c, znode);
+ if (IS_ERR(znode)) {
+ err = PTR_ERR(znode);
+ goto out_unlock;
+ }
}
if (found == 1) {
@@ -2044,7 +2345,7 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
* by passing 'ubifs_tnc_remove_nm()' the same key but
* an unmatchable name.
*/
- struct qstr noname = { .len = 0, .name = "" };
+ struct qstr noname = { .name = "" };
err = dbg_check_tnc(c, 0);
mutex_unlock(&c->tnc_mutex);
@@ -2079,14 +2380,14 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n)
/* Delete without merge for now */
ubifs_assert(znode->level == 0);
ubifs_assert(n >= 0 && n < c->fanout);
- dbg_tnc("deleting %s", DBGKEY(&znode->zbranch[n].key));
+ dbg_tnck(&znode->zbranch[n].key, "deleting key ");
zbr = &znode->zbranch[n];
lnc_free(zbr);
err = ubifs_add_dirt(c, zbr->lnum, zbr->len);
if (err) {
- dbg_dump_znode(c, znode);
+ ubifs_dump_znode(c, znode);
return err;
}
@@ -2104,7 +2405,7 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n)
*/
do {
- ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags));
+ ubifs_assert(!ubifs_zn_obsolete(znode));
ubifs_assert(ubifs_zn_dirty(znode));
zp = znode->parent;
@@ -2160,9 +2461,8 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n)
c->zroot.offs = zbr->offs;
c->zroot.len = zbr->len;
c->zroot.znode = znode;
- ubifs_assert(!test_bit(OBSOLETE_ZNODE,
- &zp->flags));
- ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags));
+ ubifs_assert(!ubifs_zn_obsolete(zp));
+ ubifs_assert(ubifs_zn_dirty(zp));
atomic_long_dec(&c->dirty_zn_cnt);
if (zp->cnext) {
@@ -2190,7 +2490,7 @@ int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key)
struct ubifs_znode *znode;
mutex_lock(&c->tnc_mutex);
- dbg_tnc("key %s", DBGKEY(key));
+ dbg_tnck(key, "key ");
found = lookup_level0_dirty(c, key, &znode, &n);
if (found < 0) {
err = found;
@@ -2221,7 +2521,7 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
struct ubifs_znode *znode;
mutex_lock(&c->tnc_mutex);
- dbg_tnc("%.*s, key %s", nm->len, nm->name, DBGKEY(key));
+ dbg_tnck(key, "%.*s, key ", nm->len, nm->name);
err = lookup_level0_dirty(c, key, &znode, &n);
if (err < 0)
goto out_unlock;
@@ -2238,11 +2538,11 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
if (err) {
/* Ensure the znode is dirtied */
if (znode->cnext || !ubifs_zn_dirty(znode)) {
- znode = dirty_cow_bottom_up(c, znode);
- if (IS_ERR(znode)) {
- err = PTR_ERR(znode);
- goto out_unlock;
- }
+ znode = dirty_cow_bottom_up(c, znode);
+ if (IS_ERR(znode)) {
+ err = PTR_ERR(znode);
+ goto out_unlock;
+ }
}
err = tnc_delete(c, znode, n);
}
@@ -2317,11 +2617,11 @@ int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key,
/* Ensure the znode is dirtied */
if (znode->cnext || !ubifs_zn_dirty(znode)) {
- znode = dirty_cow_bottom_up(c, znode);
- if (IS_ERR(znode)) {
- err = PTR_ERR(znode);
- goto out_unlock;
- }
+ znode = dirty_cow_bottom_up(c, znode);
+ if (IS_ERR(znode)) {
+ err = PTR_ERR(znode);
+ goto out_unlock;
+ }
}
/* Remove all keys in range except the first */
@@ -2333,10 +2633,10 @@ int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key,
err = ubifs_add_dirt(c, znode->zbranch[i].lnum,
znode->zbranch[i].len);
if (err) {
- dbg_dump_znode(c, znode);
+ ubifs_dump_znode(c, znode);
goto out_unlock;
}
- dbg_tnc("removing %s", DBGKEY(key));
+ dbg_tnck(key, "removing key ");
}
if (k) {
for (i = n + 1 + k; i < znode->child_cnt; i++)
@@ -2372,7 +2672,7 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum)
struct ubifs_dent_node *xent, *pxent = NULL;
struct qstr nm = { .name = NULL };
- dbg_tnc("ino %lu", inum);
+ dbg_tnc("ino %lu", (unsigned long)inum);
/*
* Walk all extended attribute entries and remove them together with
@@ -2392,7 +2692,8 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum)
}
xattr_inum = le64_to_cpu(xent->inum);
- dbg_tnc("xent '%s', ino %lu", xent->name, xattr_inum);
+ dbg_tnc("xent '%s', ino %lu", xent->name,
+ (unsigned long)xattr_inum);
nm.name = xent->name;
nm.len = le16_to_cpu(xent->nlen);
@@ -2455,7 +2756,7 @@ struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c,
struct ubifs_zbranch *zbr;
union ubifs_key *dkey;
- dbg_tnc("%s %s", nm->name ? (char *)nm->name : "(lowest)", DBGKEY(key));
+ dbg_tnck(key, "%s ", nm->name ? (char *)nm->name : "(lowest)");
ubifs_assert(is_hash_key(c, key));
mutex_lock(&c->tnc_mutex);
@@ -2545,7 +2846,7 @@ static void tnc_destroy_cnext(struct ubifs_info *c)
struct ubifs_znode *znode = cnext;
cnext = cnext->cnext;
- if (test_bit(OBSOLETE_ZNODE, &znode->flags))
+ if (ubifs_zn_obsolete(znode))
kfree(znode);
} while (cnext && cnext != c->cnext);
}
@@ -2556,12 +2857,14 @@ static void tnc_destroy_cnext(struct ubifs_info *c)
*/
void ubifs_tnc_close(struct ubifs_info *c)
{
- long clean_freed;
-
tnc_destroy_cnext(c);
if (c->zroot.znode) {
- clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode);
- atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt);
+ long n, freed;
+
+ n = atomic_long_read(&c->clean_zn_cnt);
+ freed = ubifs_destroy_tnc_subtree(c->zroot.znode);
+ ubifs_assert(freed == n);
+ atomic_long_sub(n, &ubifs_clean_zn_cnt);
}
kfree(c->gap_lebs);
kfree(c->ilebs);
@@ -2651,7 +2954,7 @@ static struct ubifs_znode *right_znode(struct ubifs_info *c,
*
* This function searches an indexing node by its first key @key and its
* address @lnum:@offs. It looks up the indexing tree by pulling all indexing
- * nodes it traverses to TNC. This function is called fro indexing nodes which
+ * nodes it traverses to TNC. This function is called for indexing nodes which
* were found on the media by scanning, for example when garbage-collecting or
* when doing in-the-gaps commit. This means that the indexing node which is
* looked for does not have to have exactly the same leftmost key @key, because
@@ -2673,6 +2976,8 @@ static struct ubifs_znode *lookup_znode(struct ubifs_info *c,
struct ubifs_znode *znode, *zn;
int n, nn;
+ ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY);
+
/*
* The arguments have probably been read off flash, so don't assume
* they are valid.
@@ -2954,3 +3259,70 @@ out_unlock:
mutex_unlock(&c->tnc_mutex);
return err;
}
+
+/**
+ * dbg_check_inode_size - check if inode size is correct.
+ * @c: UBIFS file-system description object
+ * @inum: inode number
+ * @size: inode size
+ *
+ * This function makes sure that the inode size (@size) is correct and it does
+ * not have any pages beyond @size. Returns zero if the inode is OK, %-EINVAL
+ * if it has a data page beyond @size, and other negative error code in case of
+ * other errors.
+ */
+int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
+ loff_t size)
+{
+ int err, n;
+ union ubifs_key from_key, to_key, *key;
+ struct ubifs_znode *znode;
+ unsigned int block;
+
+ if (!S_ISREG(inode->i_mode))
+ return 0;
+ if (!dbg_is_chk_gen(c))
+ return 0;
+
+ block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT;
+ data_key_init(c, &from_key, inode->i_ino, block);
+ highest_data_key(c, &to_key, inode->i_ino);
+
+ mutex_lock(&c->tnc_mutex);
+ err = ubifs_lookup_level0(c, &from_key, &znode, &n);
+ if (err < 0)
+ goto out_unlock;
+
+ if (err) {
+ err = -EINVAL;
+ key = &from_key;
+ goto out_dump;
+ }
+
+ err = tnc_next(c, &znode, &n);
+ if (err == -ENOENT) {
+ err = 0;
+ goto out_unlock;
+ }
+ if (err < 0)
+ goto out_unlock;
+
+ ubifs_assert(err == 0);
+ key = &znode->zbranch[n].key;
+ if (!key_in_range(c, key, &from_key, &to_key))
+ goto out_unlock;
+
+out_dump:
+ block = key_block(c, key);
+ ubifs_err("inode %lu has size %lld, but there are data at offset %lld",
+ (unsigned long)inode->i_ino, size,
+ ((loff_t)block) << UBIFS_BLOCK_SHIFT);
+ mutex_unlock(&c->tnc_mutex);
+ ubifs_dump_inode(c, inode);
+ dump_stack();
+ return -EINVAL;
+
+out_unlock:
+ mutex_unlock(&c->tnc_mutex);
+ return err;
+}
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 8117e65ba2e..3600994f841 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -22,6 +22,7 @@
/* This file implements TNC functions for committing */
+#include <linux/random.h>
#include "ubifs.h"
/**
@@ -53,18 +54,16 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
br->len = cpu_to_le32(zbr->len);
if (!zbr->lnum || !zbr->len) {
ubifs_err("bad ref in znode");
- dbg_dump_znode(c, znode);
+ ubifs_dump_znode(c, znode);
if (zbr->znode)
- dbg_dump_znode(c, zbr->znode);
+ ubifs_dump_znode(c, zbr->znode);
}
}
ubifs_prepare_node(c, idx, len, 0);
-#ifdef CONFIG_UBIFS_FS_DEBUG
znode->lnum = lnum;
znode->offs = offs;
znode->len = len;
-#endif
err = insert_old_idx_znode(c, znode);
@@ -87,8 +86,12 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
atomic_long_dec(&c->dirty_zn_cnt);
ubifs_assert(ubifs_zn_dirty(znode));
- ubifs_assert(test_bit(COW_ZNODE, &znode->flags));
+ ubifs_assert(ubifs_zn_cow(znode));
+ /*
+ * Note, unlike 'write_index()' we do not add memory barriers here
+ * because this function is called with @c->tnc_mutex locked.
+ */
__clear_bit(DIRTY_ZNODE, &znode->flags);
__clear_bit(COW_ZNODE, &znode->flags);
@@ -245,7 +248,7 @@ static int layout_leb_in_gaps(struct ubifs_info *c, int *p)
* it is more comprehensive and less efficient than is needed for this
* purpose.
*/
- sleb = ubifs_scan(c, lnum, 0, c->ileb_buf);
+ sleb = ubifs_scan(c, lnum, 0, c->ileb_buf, 0);
c->ileb_len = 0;
if (IS_ERR(sleb))
return PTR_ERR(sleb);
@@ -317,8 +320,7 @@ static int layout_leb_in_gaps(struct ubifs_info *c, int *p)
0, 0, 0);
if (err)
return err;
- err = ubifs_leb_change(c, lnum, c->ileb_buf, c->ileb_len,
- UBI_SHORTTERM);
+ err = ubifs_leb_change(c, lnum, c->ileb_buf, c->ileb_len);
if (err)
return err;
dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written);
@@ -372,26 +374,23 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
written = layout_leb_in_gaps(c, p);
if (written < 0) {
err = written;
- if (err == -ENOSPC) {
- if (!dbg_force_in_the_gaps_enabled) {
- /*
- * Do not print scary warnings if the
- * debugging option which forces
- * in-the-gaps is enabled.
- */
- ubifs_err("out of space");
- spin_lock(&c->space_lock);
- dbg_dump_budg(c);
- spin_unlock(&c->space_lock);
- dbg_dump_lprops(c);
- }
- /* Try to commit anyway */
- err = 0;
- break;
+ if (err != -ENOSPC) {
+ kfree(c->gap_lebs);
+ c->gap_lebs = NULL;
+ return err;
}
- kfree(c->gap_lebs);
- c->gap_lebs = NULL;
- return err;
+ if (!dbg_is_chk_index(c)) {
+ /*
+ * Do not print scary warnings if the debugging
+ * option which forces in-the-gaps is enabled.
+ */
+ ubifs_warn("out of space");
+ ubifs_dump_budg(c, &c->bi);
+ ubifs_dump_lprops(c);
+ }
+ /* Try to commit anyway */
+ err = 0;
+ break;
}
p++;
cnt -= written;
@@ -454,11 +453,9 @@ static int layout_in_empty_space(struct ubifs_info *c)
offs = buf_offs + used;
-#ifdef CONFIG_UBIFS_FS_DEBUG
znode->lnum = lnum;
znode->offs = offs;
znode->len = len;
-#endif
/* Update the parent */
zp = znode->parent;
@@ -494,25 +491,6 @@ static int layout_in_empty_space(struct ubifs_info *c)
else
next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
- if (c->min_io_size == 1) {
- buf_offs += ALIGN(len, 8);
- if (next_len) {
- if (buf_offs + next_len <= c->leb_size)
- continue;
- err = ubifs_update_one_lp(c, lnum, 0,
- c->leb_size - buf_offs, 0, 0);
- if (err)
- return err;
- lnum = -1;
- continue;
- }
- err = ubifs_update_one_lp(c, lnum,
- c->leb_size - buf_offs, 0, 0, 0);
- if (err)
- return err;
- break;
- }
-
/* Update buffer positions */
wlen = used + len;
used += ALIGN(len, 8);
@@ -553,10 +531,8 @@ static int layout_in_empty_space(struct ubifs_info *c)
break;
}
-#ifdef CONFIG_UBIFS_FS_DEBUG
- c->new_ihead_lnum = lnum;
- c->new_ihead_offs = buf_offs;
-#endif
+ c->dbg->new_ihead_lnum = lnum;
+ c->dbg->new_ihead_offs = buf_offs;
return 0;
}
@@ -661,7 +637,7 @@ static int get_znodes_to_commit(struct ubifs_info *c)
}
cnt += 1;
while (1) {
- ubifs_assert(!test_bit(COW_ZNODE, &znode->flags));
+ ubifs_assert(!ubifs_zn_cow(znode));
__set_bit(COW_ZNODE, &znode->flags);
znode->alt = 0;
cnext = find_next_dirty(znode);
@@ -707,7 +683,7 @@ static int alloc_idx_lebs(struct ubifs_info *c, int cnt)
c->ilebs[c->ileb_cnt++] = lnum;
dbg_cmt("LEB %d", lnum);
}
- if (dbg_force_in_the_gaps())
+ if (dbg_is_chk_index(c) && !(prandom_u32() & 7))
return -ENOSPC;
return 0;
}
@@ -797,14 +773,16 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot)
spin_lock(&c->space_lock);
/*
* Although we have not finished committing yet, update size of the
- * committed index ('c->old_idx_sz') and zero out the index growth
+ * committed index ('c->bi.old_idx_sz') and zero out the index growth
* budget. It is OK to do this now, because we've reserved all the
* space which is needed to commit the index, and it is save for the
* budgeting subsystem to assume the index is already committed,
* even though it is not.
*/
- c->old_idx_sz = c->calc_idx_sz;
- c->budg_uncommitted_idx = 0;
+ ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
+ c->bi.old_idx_sz = c->calc_idx_sz;
+ c->bi.uncommitted_idx = 0;
+ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
spin_unlock(&c->space_lock);
mutex_unlock(&c->tnc_mutex);
@@ -831,7 +809,7 @@ static int write_index(struct ubifs_info *c)
struct ubifs_idx_node *idx;
struct ubifs_znode *znode, *cnext;
int i, lnum, offs, len, next_len, buf_len, buf_offs, used;
- int avail, wlen, err, lnum_pos = 0;
+ int avail, wlen, err, lnum_pos = 0, blen, nxt_offs;
cnext = c->enext;
if (!cnext)
@@ -879,9 +857,9 @@ static int write_index(struct ubifs_info *c)
br->len = cpu_to_le32(zbr->len);
if (!zbr->lnum || !zbr->len) {
ubifs_err("bad ref in znode");
- dbg_dump_znode(c, znode);
+ ubifs_dump_znode(c, znode);
if (zbr->znode)
- dbg_dump_znode(c, zbr->znode);
+ ubifs_dump_znode(c, zbr->znode);
}
}
len = ubifs_idx_node_sz(c, znode->child_cnt);
@@ -896,19 +874,17 @@ static int write_index(struct ubifs_info *c)
}
offs = buf_offs + used;
-#ifdef CONFIG_UBIFS_FS_DEBUG
if (lnum != znode->lnum || offs != znode->offs ||
len != znode->len) {
ubifs_err("inconsistent znode posn");
return -EINVAL;
}
-#endif
/* Grab some stuff from znode while we still can */
cnext = znode->cnext;
ubifs_assert(ubifs_zn_dirty(znode));
- ubifs_assert(test_bit(COW_ZNODE, &znode->flags));
+ ubifs_assert(ubifs_zn_cow(znode));
/*
* It is important that other threads should see %DIRTY_ZNODE
@@ -919,9 +895,31 @@ static int write_index(struct ubifs_info *c)
* the reason for the second barrier.
*/
clear_bit(DIRTY_ZNODE, &znode->flags);
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(COW_ZNODE, &znode->flags);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
+
+ /*
+ * We have marked the znode as clean but have not updated the
+ * @c->clean_zn_cnt counter. If this znode becomes dirty again
+ * before 'free_obsolete_znodes()' is called, then
+ * @c->clean_zn_cnt will be decremented before it gets
+ * incremented (resulting in 2 decrements for the same znode).
+ * This means that @c->clean_zn_cnt may become negative for a
+ * while.
+ *
+ * Q: why we cannot increment @c->clean_zn_cnt?
+ * A: because we do not have the @c->tnc_mutex locked, and the
+ * following code would be racy and buggy:
+ *
+ * if (!ubifs_zn_obsolete(znode)) {
+ * atomic_long_inc(&c->clean_zn_cnt);
+ * atomic_long_inc(&ubifs_clean_zn_cnt);
+ * }
+ *
+ * Thus, we just delay the @c->clean_zn_cnt update until we
+ * have the mutex locked.
+ */
/* Do not access znode from this point on */
@@ -939,75 +937,46 @@ static int write_index(struct ubifs_info *c)
else
next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
- if (c->min_io_size == 1) {
- /*
- * Write the prepared index node immediately if there is
- * no minimum IO size
- */
- err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs,
- wlen, UBI_SHORTTERM);
- if (err)
- return err;
- buf_offs += ALIGN(wlen, 8);
- if (next_len) {
- used = 0;
- avail = buf_len;
- if (buf_offs + next_len > c->leb_size) {
- err = ubifs_update_one_lp(c, lnum,
- LPROPS_NC, 0, 0, LPROPS_TAKEN);
- if (err)
- return err;
- lnum = -1;
- }
+ nxt_offs = buf_offs + used + next_len;
+ if (next_len && nxt_offs <= c->leb_size) {
+ if (avail > 0)
continue;
- }
+ else
+ blen = buf_len;
} else {
- int blen, nxt_offs = buf_offs + used + next_len;
-
- if (next_len && nxt_offs <= c->leb_size) {
- if (avail > 0)
- continue;
- else
- blen = buf_len;
- } else {
- wlen = ALIGN(wlen, 8);
- blen = ALIGN(wlen, c->min_io_size);
- ubifs_pad(c, c->cbuf + wlen, blen - wlen);
- }
- /*
- * The buffer is full or there are no more znodes
- * to do
- */
- err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs,
- blen, UBI_SHORTTERM);
- if (err)
- return err;
- buf_offs += blen;
- if (next_len) {
- if (nxt_offs > c->leb_size) {
- err = ubifs_update_one_lp(c, lnum,
- LPROPS_NC, 0, 0, LPROPS_TAKEN);
- if (err)
- return err;
- lnum = -1;
- }
- used -= blen;
- if (used < 0)
- used = 0;
- avail = buf_len - used;
- memmove(c->cbuf, c->cbuf + blen, used);
- continue;
+ wlen = ALIGN(wlen, 8);
+ blen = ALIGN(wlen, c->min_io_size);
+ ubifs_pad(c, c->cbuf + wlen, blen - wlen);
+ }
+
+ /* The buffer is full or there are no more znodes to do */
+ err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, blen);
+ if (err)
+ return err;
+ buf_offs += blen;
+ if (next_len) {
+ if (nxt_offs > c->leb_size) {
+ err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0,
+ 0, LPROPS_TAKEN);
+ if (err)
+ return err;
+ lnum = -1;
}
+ used -= blen;
+ if (used < 0)
+ used = 0;
+ avail = buf_len - used;
+ memmove(c->cbuf, c->cbuf + blen, used);
+ continue;
}
break;
}
-#ifdef CONFIG_UBIFS_FS_DEBUG
- if (lnum != c->new_ihead_lnum || buf_offs != c->new_ihead_offs) {
+ if (lnum != c->dbg->new_ihead_lnum ||
+ buf_offs != c->dbg->new_ihead_offs) {
ubifs_err("inconsistent ihead");
return -EINVAL;
}
-#endif
c->ihead_lnum = lnum;
c->ihead_offs = buf_offs;
@@ -1029,7 +998,7 @@ static void free_obsolete_znodes(struct ubifs_info *c)
do {
znode = cnext;
cnext = znode->cnext;
- if (test_bit(OBSOLETE_ZNODE, &znode->flags))
+ if (ubifs_zn_obsolete(znode))
kfree(znode);
else {
znode->cnext = NULL;
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
index a25c1cc1f8d..f6bf8995c7b 100644
--- a/fs/ubifs/tnc_misc.c
+++ b/fs/ubifs/tnc_misc.c
@@ -293,10 +293,10 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
lnum, offs, znode->level, znode->child_cnt);
if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) {
- dbg_err("current fanout %d, branch count %d",
- c->fanout, znode->child_cnt);
- dbg_err("max levels %d, znode level %d",
- UBIFS_MAX_LEVELS, znode->level);
+ ubifs_err("current fanout %d, branch count %d",
+ c->fanout, znode->child_cnt);
+ ubifs_err("max levels %d, znode level %d",
+ UBIFS_MAX_LEVELS, znode->level);
err = 1;
goto out_dump;
}
@@ -316,7 +316,7 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
if (zbr->lnum < c->main_first ||
zbr->lnum >= c->leb_cnt || zbr->offs < 0 ||
zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) {
- dbg_err("bad branch %d", i);
+ ubifs_err("bad branch %d", i);
err = 2;
goto out_dump;
}
@@ -328,8 +328,8 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
case UBIFS_XENT_KEY:
break;
default:
- dbg_msg("bad key type at slot %d: %s", i,
- DBGKEY(&zbr->key));
+ ubifs_err("bad key type at slot %d: %d",
+ i, key_type(c, &zbr->key));
err = 3;
goto out_dump;
}
@@ -340,19 +340,19 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
type = key_type(c, &zbr->key);
if (c->ranges[type].max_len == 0) {
if (zbr->len != c->ranges[type].len) {
- dbg_err("bad target node (type %d) length (%d)",
- type, zbr->len);
- dbg_err("have to be %d", c->ranges[type].len);
+ ubifs_err("bad target node (type %d) length (%d)",
+ type, zbr->len);
+ ubifs_err("have to be %d", c->ranges[type].len);
err = 4;
goto out_dump;
}
} else if (zbr->len < c->ranges[type].min_len ||
zbr->len > c->ranges[type].max_len) {
- dbg_err("bad target node (type %d) length (%d)",
- type, zbr->len);
- dbg_err("have to be in range of %d-%d",
- c->ranges[type].min_len,
- c->ranges[type].max_len);
+ ubifs_err("bad target node (type %d) length (%d)",
+ type, zbr->len);
+ ubifs_err("have to be in range of %d-%d",
+ c->ranges[type].min_len,
+ c->ranges[type].max_len);
err = 5;
goto out_dump;
}
@@ -370,13 +370,13 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
cmp = keys_cmp(c, key1, key2);
if (cmp > 0) {
- dbg_err("bad key order (keys %d and %d)", i, i + 1);
+ ubifs_err("bad key order (keys %d and %d)", i, i + 1);
err = 6;
goto out_dump;
} else if (cmp == 0 && !is_hash_key(c, key1)) {
/* These can only be keys with colliding hash */
- dbg_err("keys %d and %d are not hashed but equivalent",
- i, i + 1);
+ ubifs_err("keys %d and %d are not hashed but equivalent",
+ i, i + 1);
err = 7;
goto out_dump;
}
@@ -387,7 +387,7 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
out_dump:
ubifs_err("bad indexing node at LEB %d:%d, error %d", lnum, offs, err);
- dbg_dump_node(c, idx);
+ ubifs_dump_node(c, idx);
kfree(idx);
return -EINVAL;
}
@@ -475,18 +475,18 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
zbr->offs);
if (err) {
- dbg_tnc("key %s", DBGKEY(key));
+ dbg_tnck(key, "key ");
return err;
}
/* Make sure the key of the read node is correct */
- key_read(c, key, &key1);
- if (memcmp(node + UBIFS_KEY_OFFSET, &key1, c->key_len)) {
+ key_read(c, node + UBIFS_KEY_OFFSET, &key1);
+ if (!keys_eq(c, key, &key1)) {
ubifs_err("bad key in node at LEB %d:%d",
zbr->lnum, zbr->offs);
- dbg_tnc("looked for key %s found node's key %s",
- DBGKEY(key), DBGKEY1(&key1));
- dbg_dump_node(c, node);
+ dbg_tnck(key, "looked for key ");
+ dbg_tnck(&key1, "but found node's key ");
+ ubifs_dump_node(c, node);
return -EINVAL;
}
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index 0cc7da9bed4..e24380cf46e 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -36,9 +36,31 @@
/* UBIFS node magic number (must not have the padding byte first or last) */
#define UBIFS_NODE_MAGIC 0x06101831
-/* UBIFS on-flash format version */
+/*
+ * UBIFS on-flash format version. This version is increased when the on-flash
+ * format is changing. If this happens, UBIFS is will support older versions as
+ * well. But older UBIFS code will not support newer formats. Format changes
+ * will be rare and only when absolutely necessary, e.g. to fix a bug or to add
+ * a new feature.
+ *
+ * UBIFS went into mainline kernel with format version 4. The older formats
+ * were development formats.
+ */
#define UBIFS_FORMAT_VERSION 4
+/*
+ * Read-only compatibility version. If the UBIFS format is changed, older UBIFS
+ * implementations will not be able to mount newer formats in read-write mode.
+ * However, depending on the change, it may be possible to mount newer formats
+ * in R/O mode. This is indicated by the R/O compatibility version which is
+ * stored in the super-block.
+ *
+ * This is needed to support boot-loaders which only need R/O mounting. With
+ * this flag it is possible to do UBIFS format changes without a need to update
+ * boot-loaders.
+ */
+#define UBIFS_RO_COMPAT_VERSION 0
+
/* Minimum logical eraseblock size in bytes */
#define UBIFS_MIN_LEB_SZ (15*1024)
@@ -51,6 +73,13 @@
*/
#define UBIFS_MIN_COMPR_LEN 128
+/*
+ * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes
+ * shorter than uncompressed data length, UBIFS prefers to leave this data
+ * node uncompress, because it'll be read faster.
+ */
+#define UBIFS_MIN_COMPRESS_DIFF 64
+
/* Root inode number */
#define UBIFS_ROOT_INO 1
@@ -75,7 +104,6 @@
*/
#define UBIFS_BLOCK_SIZE 4096
#define UBIFS_BLOCK_SHIFT 12
-#define UBIFS_BLOCK_MASK 0x00000FFF
/* UBIFS padding byte pattern (must not be first or last byte of node magic) */
#define UBIFS_PADDING_BYTE 0xCE
@@ -87,7 +115,7 @@
#define UBIFS_SK_LEN 8
/* Minimum index tree fanout */
-#define UBIFS_MIN_FANOUT 2
+#define UBIFS_MIN_FANOUT 3
/* Maximum number of levels in UBIFS indexing B-tree */
#define UBIFS_MAX_LEVELS 512
@@ -107,6 +135,13 @@
/* The key is always at the same position in all keyed nodes */
#define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key)
+/* Garbage collector journal head number */
+#define UBIFS_GC_HEAD 0
+/* Base journal head number */
+#define UBIFS_BASE_HEAD 1
+/* Data journal head number */
+#define UBIFS_DATA_HEAD 2
+
/*
* LEB Properties Tree node types.
*
@@ -228,10 +263,10 @@ enum {
/* Minimum number of orphan area logical eraseblocks */
#define UBIFS_MIN_ORPH_LEBS 1
/*
- * Minimum number of main area logical eraseblocks (buds, 2 for the index, 1
+ * Minimum number of main area logical eraseblocks (buds, 3 for the index, 1
* for GC, 1 for deletions, and at least 1 for committed data).
*/
-#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 5)
+#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 6)
/* Minimum number of logical eraseblocks */
#define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \
@@ -373,9 +408,11 @@ enum {
* Superblock flags.
*
* UBIFS_FLG_BIGLPT: if "big" LPT model is used if set
+ * UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed
*/
enum {
UBIFS_FLG_BIGLPT = 0x02,
+ UBIFS_FLG_SPACE_FIXUP = 0x04,
};
/**
@@ -399,7 +436,7 @@ struct ubifs_ch {
__u8 node_type;
__u8 group_type;
__u8 padding[2];
-} __attribute__ ((packed));
+} __packed;
/**
* union ubifs_dev_desc - device node descriptor.
@@ -413,7 +450,7 @@ struct ubifs_ch {
union ubifs_dev_desc {
__le32 new;
__le64 huge;
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_ino_node - inode node.
@@ -474,7 +511,7 @@ struct ubifs_ino_node {
__le16 compr_type;
__u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */
__u8 data[];
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_dent_node - directory entry node.
@@ -499,7 +536,7 @@ struct ubifs_dent_node {
__le16 nlen;
__u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */
__u8 name[];
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_data_node - data node.
@@ -520,7 +557,7 @@ struct ubifs_data_node {
__le16 compr_type;
__u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */
__u8 data[];
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_trun_node - truncation node.
@@ -540,7 +577,7 @@ struct ubifs_trun_node {
__u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */
__le64 old_size;
__le64 new_size;
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_pad_node - padding node.
@@ -551,7 +588,7 @@ struct ubifs_trun_node {
struct ubifs_pad_node {
struct ubifs_ch ch;
__le32 pad_len;
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_sb_node - superblock node.
@@ -580,6 +617,7 @@ struct ubifs_pad_node {
* @padding2: reserved for future, zeroes
* @time_gran: time granularity in nanoseconds
* @uuid: UUID generated when the file system image was created
+ * @ro_compat_version: UBIFS R/O compatibility version
*/
struct ubifs_sb_node {
struct ubifs_ch ch;
@@ -606,8 +644,9 @@ struct ubifs_sb_node {
__le64 rp_size;
__le32 time_gran;
__u8 uuid[16];
- __u8 padding2[3972];
-} __attribute__ ((packed));
+ __le32 ro_compat_version;
+ __u8 padding2[3968];
+} __packed;
/**
* struct ubifs_mst_node - master node.
@@ -674,7 +713,7 @@ struct ubifs_mst_node {
__le32 idx_lebs;
__le32 leb_cnt;
__u8 padding[344];
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_ref_node - logical eraseblock reference node.
@@ -690,7 +729,7 @@ struct ubifs_ref_node {
__le32 offs;
__le32 jhead;
__u8 padding[28];
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_branch - key/reference/length branch
@@ -704,7 +743,7 @@ struct ubifs_branch {
__le32 offs;
__le32 len;
__u8 key[];
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_idx_node - indexing node.
@@ -718,7 +757,7 @@ struct ubifs_idx_node {
__le16 child_cnt;
__le16 level;
__u8 branches[];
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_cs_node - commit start node.
@@ -728,7 +767,7 @@ struct ubifs_idx_node {
struct ubifs_cs_node {
struct ubifs_ch ch;
__le64 cmt_no;
-} __attribute__ ((packed));
+} __packed;
/**
* struct ubifs_orph_node - orphan node.
@@ -740,6 +779,6 @@ struct ubifs_orph_node {
struct ubifs_ch ch;
__le64 cmt_no;
__le64 inos[];
-} __attribute__ ((packed));
+} __packed;
#endif /* __UBIFS_MEDIA_H__ */
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index e4f89f27182..c1f71fe17cc 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -20,8 +20,6 @@
* Adrian Hunter
*/
-/* Implementation version 0.7 */
-
#ifndef __UBIFS_H__
#define __UBIFS_H__
@@ -30,6 +28,7 @@
#include <linux/fs.h>
#include <linux/err.h>
#include <linux/sched.h>
+#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
@@ -43,16 +42,24 @@
#define UBIFS_VERSION 1
/* Normal UBIFS messages */
-#define ubifs_msg(fmt, ...) \
- printk(KERN_NOTICE "UBIFS: " fmt "\n", ##__VA_ARGS__)
+#define ubifs_msg(fmt, ...) pr_notice("UBIFS: " fmt "\n", ##__VA_ARGS__)
/* UBIFS error messages */
-#define ubifs_err(fmt, ...) \
- printk(KERN_ERR "UBIFS error (pid %d): %s: " fmt "\n", current->pid, \
+#define ubifs_err(fmt, ...) \
+ pr_err("UBIFS error (pid %d): %s: " fmt "\n", current->pid, \
__func__, ##__VA_ARGS__)
/* UBIFS warning messages */
-#define ubifs_warn(fmt, ...) \
- printk(KERN_WARNING "UBIFS warning (pid %d): %s: " fmt "\n", \
- current->pid, __func__, ##__VA_ARGS__)
+#define ubifs_warn(fmt, ...) \
+ pr_warn("UBIFS warning (pid %d): %s: " fmt "\n", \
+ current->pid, __func__, ##__VA_ARGS__)
+/*
+ * A variant of 'ubifs_err()' which takes the UBIFS file-sytem description
+ * object as an argument.
+ */
+#define ubifs_errc(c, fmt, ...) \
+ do { \
+ if (!(c)->probing) \
+ ubifs_err(fmt, ##__VA_ARGS__); \
+ } while (0)
/* UBIFS file system VFS magic number */
#define UBIFS_SUPER_MAGIC 0x24051905
@@ -65,6 +72,14 @@
#define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL
#define SQNUM_WATERMARK 0xFFFFFFFFFF000000ULL
+/*
+ * Minimum amount of LEBs reserved for the index. At present the index needs at
+ * least 2 LEBs: one for the index head and one for in-the-gaps method (which
+ * currently does not cater for the index head and so excludes it from
+ * consideration).
+ */
+#define MIN_INDEX_LEBS 2
+
/* Minimum amount of data UBIFS writes to the flash */
#define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8)
@@ -77,9 +92,6 @@
#define INUM_WARN_WATERMARK 0xFFF00000
#define INUM_WATERMARK 0xFFFFFF00
-/* Largest key size supported in this implementation */
-#define CUR_MAX_KEY_LEN UBIFS_SK_LEN
-
/* Maximum number of entries in each LPT (LEB category) heap */
#define LPT_HEAP_SZ 256
@@ -89,8 +101,9 @@
*/
#define BGT_NAME_PATTERN "ubifs_bgt%d_%d"
-/* Default write-buffer synchronization timeout (5 secs) */
-#define DEFAULT_WBUF_TIMEOUT (5 * HZ)
+/* Write-buffer synchronization timeout interval in seconds */
+#define WBUF_TIMEOUT_SOFTLIMIT 3
+#define WBUF_TIMEOUT_HARDLIMIT 5
/* Maximum possible inode number (only 32-bit inodes are supported now) */
#define MAX_INUM 0xFFFFFFFF
@@ -98,12 +111,10 @@
/* Number of non-data journal heads */
#define NONDATA_JHEADS_CNT 2
-/* Garbage collector head */
-#define GCHD 0
-/* Base journal head number */
-#define BASEHD 1
-/* First "general purpose" journal head */
-#define DATAHD 2
+/* Shorter names for journal head numbers for internal usage */
+#define GCHD UBIFS_GC_HEAD
+#define BASEHD UBIFS_BASE_HEAD
+#define DATAHD UBIFS_DATA_HEAD
/* 'No change' value for 'ubifs_change_lp()' */
#define LPROPS_NC 0x80000001
@@ -113,8 +124,12 @@
* in TNC. However, when replaying, it is handy to introduce fake "truncation"
* keys for truncation nodes because the code becomes simpler. So we define
* %UBIFS_TRUN_KEY type.
+ *
+ * But otherwise, out of the journal reply scope, the truncation keys are
+ * invalid.
*/
-#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT
+#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT
+#define UBIFS_INVALID_KEY UBIFS_KEY_TYPES_CNT
/*
* How much a directory entry/extended attribute entry adds to the parent/host
@@ -141,9 +156,18 @@
*/
#define WORST_COMPR_FACTOR 2
+/*
+ * How much memory is needed for a buffer where we comress a data node.
+ */
+#define COMPRESSED_DATA_NODE_BUF_SZ \
+ (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR)
+
/* Maximum expected tree height for use by bottom_up_buf */
#define BOTTOM_UP_HEIGHT 64
+/* Maximum number of data nodes to bulk-read */
+#define UBIFS_MAX_BULK_READ 32
+
/*
* Lockdep classes for UBIFS inode @ui_mutex.
*/
@@ -211,14 +235,14 @@ enum {
* LPT cnode flag bits.
*
* DIRTY_CNODE: cnode is dirty
- * COW_CNODE: cnode is being committed and must be copied before writing
* OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted),
- * so it can (and must) be freed when the commit is finished
+ * so it can (and must) be freed when the commit is finished
+ * COW_CNODE: cnode is being committed and must be copied before writing
*/
enum {
DIRTY_CNODE = 0,
- COW_CNODE = 1,
- OBSOLETE_CNODE = 2,
+ OBSOLETE_CNODE = 1,
+ COW_CNODE = 2,
};
/*
@@ -258,10 +282,10 @@ struct ubifs_old_idx {
/* The below union makes it easier to deal with keys */
union ubifs_key {
- uint8_t u8[CUR_MAX_KEY_LEN];
- uint32_t u32[CUR_MAX_KEY_LEN/4];
- uint64_t u64[CUR_MAX_KEY_LEN/8];
- __le32 j32[CUR_MAX_KEY_LEN/4];
+ uint8_t u8[UBIFS_SK_LEN];
+ uint32_t u32[UBIFS_SK_LEN/4];
+ uint64_t u64[UBIFS_SK_LEN/8];
+ __le32 j32[UBIFS_SK_LEN/4];
};
/**
@@ -322,15 +346,18 @@ struct ubifs_gced_idx_leb {
* struct ubifs_inode - UBIFS in-memory inode description.
* @vfs_inode: VFS inode description object
* @creat_sqnum: sequence number at time of creation
+ * @del_cmtno: commit number corresponding to the time the inode was deleted,
+ * protected by @c->commit_sem;
* @xattr_size: summarized size of all extended attributes in bytes
* @xattr_cnt: count of extended attributes this inode has
* @xattr_names: sum of lengths of all extended attribute names belonging to
* this inode
* @dirty: non-zero if the inode is dirty
* @xattr: non-zero if this is an extended attribute inode
+ * @bulk_read: non-zero if bulk-read should be used
* @ui_mutex: serializes inode write-back with the rest of VFS operations,
- * serializes "clean <-> dirty" state changes, protects @dirty,
- * @ui_size, and @xattr_size
+ * serializes "clean <-> dirty" state changes, serializes bulk-read,
+ * protects @dirty, @bulk_read, @ui_size, and @xattr_size
* @ui_lock: protects @synced_i_size
* @synced_i_size: synchronized size of inode, i.e. the value of inode size
* currently stored on the flash; used only for regular file
@@ -338,6 +365,8 @@ struct ubifs_gced_idx_leb {
* @ui_size: inode size used by UBIFS when writing to flash
* @flags: inode flags (@UBIFS_COMPR_FL, etc)
* @compr_type: default compression type used for this inode
+ * @last_page_read: page number of last page read (for bulk read)
+ * @read_in_a_row: number of consecutive pages read in a row (for bulk read)
* @data_len: length of the data attached to the inode
* @data: inode's data
*
@@ -365,25 +394,29 @@ struct ubifs_gced_idx_leb {
* The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
* @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
* make sure @inode->i_size is always changed under @ui_mutex, because it
- * cannot call 'vmtruncate()' with @ui_mutex locked, because it would deadlock
- * with 'ubifs_writepage()' (see file.c). All the other inode fields are
- * changed under @ui_mutex, so they do not need "shadow" fields. Note, one
+ * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would
+ * deadlock with 'ubifs_writepage()' (see file.c). All the other inode fields
+ * are changed under @ui_mutex, so they do not need "shadow" fields. Note, one
* could consider to rework locking and base it on "shadow" fields.
*/
struct ubifs_inode {
struct inode vfs_inode;
unsigned long long creat_sqnum;
+ unsigned long long del_cmtno;
unsigned int xattr_size;
unsigned int xattr_cnt;
unsigned int xattr_names;
unsigned int dirty:1;
unsigned int xattr:1;
+ unsigned int bulk_read:1;
+ unsigned int compr_type:2;
struct mutex ui_mutex;
spinlock_t ui_lock;
loff_t synced_i_size;
loff_t ui_size;
int flags;
- int compr_type;
+ pgoff_t last_page_read;
+ pgoff_t read_in_a_row;
int data_len;
void *data;
};
@@ -408,9 +441,9 @@ struct ubifs_unclean_leb {
* LEB properties flags.
*
* LPROPS_UNCAT: not categorized
- * LPROPS_DIRTY: dirty > 0, not index
- * LPROPS_DIRTY_IDX: dirty + free > UBIFS_CH_SZ and index
- * LPROPS_FREE: free > 0, not empty, not index
+ * LPROPS_DIRTY: dirty > free, dirty >= @c->dead_wm, not index
+ * LPROPS_DIRTY_IDX: dirty + free > @c->min_idx_node_sze and index
+ * LPROPS_FREE: free > 0, dirty < @c->dead_wm, not empty, not index
* LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs
* LPROPS_EMPTY: LEB is empty, not taken
* LPROPS_FREEABLE: free + dirty == leb_size, not index, not taken
@@ -463,8 +496,8 @@ struct ubifs_lprops {
struct ubifs_lpt_lprops {
int free;
int dirty;
- unsigned tgc : 1;
- unsigned cmt : 1;
+ unsigned tgc:1;
+ unsigned cmt:1;
};
/**
@@ -472,24 +505,26 @@ struct ubifs_lpt_lprops {
* @empty_lebs: number of empty LEBs
* @taken_empty_lebs: number of taken LEBs
* @idx_lebs: number of indexing LEBs
- * @total_free: total free space in bytes
- * @total_dirty: total dirty space in bytes
- * @total_used: total used space in bytes (includes only data LEBs)
- * @total_dead: total dead space in bytes (includes only data LEBs)
- * @total_dark: total dark space in bytes (includes only data LEBs)
+ * @total_free: total free space in bytes (includes all LEBs)
+ * @total_dirty: total dirty space in bytes (includes all LEBs)
+ * @total_used: total used space in bytes (does not include index LEBs)
+ * @total_dead: total dead space in bytes (does not include index LEBs)
+ * @total_dark: total dark space in bytes (does not include index LEBs)
+ *
+ * The @taken_empty_lebs field counts the LEBs that are in the transient state
+ * of having been "taken" for use but not yet written to. @taken_empty_lebs is
+ * needed to account correctly for @gc_lnum, otherwise @empty_lebs could be
+ * used by itself (in which case 'unused_lebs' would be a better name). In the
+ * case of @gc_lnum, it is "taken" at mount time or whenever a LEB is retained
+ * by GC, but unlike other empty LEBs that are "taken", it may not be written
+ * straight away (i.e. before the next commit start or unmount), so either
+ * @gc_lnum must be specially accounted for, or the current approach followed
+ * i.e. count it under @taken_empty_lebs.
*
- * N.B. total_dirty and total_used are different to other total_* fields,
- * because they account _all_ LEBs, not just data LEBs.
+ * @empty_lebs includes @taken_empty_lebs.
*
- * 'taken_empty_lebs' counts the LEBs that are in the transient state of having
- * been 'taken' for use but not yet written to. 'taken_empty_lebs' is needed
- * to account correctly for gc_lnum, otherwise 'empty_lebs' could be used
- * by itself (in which case 'unused_lebs' would be a better name). In the case
- * of gc_lnum, it is 'taken' at mount time or whenever a LEB is retained by GC,
- * but unlike other empty LEBs that are 'taken', it may not be written straight
- * away (i.e. before the next commit start or unmount), so either gc_lnum must
- * be specially accounted for, or the current approach followed i.e. count it
- * under 'taken_empty_lebs'.
+ * @total_used, @total_dead and @total_dark fields do not account indexing
+ * LEBs.
*/
struct ubifs_lp_stats {
int empty_lebs;
@@ -622,17 +657,19 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c,
* @offs: write-buffer offset in this logical eraseblock
* @avail: number of bytes available in the write-buffer
* @used: number of used bytes in the write-buffer
- * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM,
- * %UBI_UNKNOWN)
+ * @size: write-buffer size (in [@c->min_io_size, @c->max_write_size] range)
* @jhead: journal head the mutex belongs to (note, needed only to shut lockdep
* up by 'mutex_lock_nested()).
* @sync_callback: write-buffer synchronization callback
* @io_mutex: serializes write-buffer I/O
* @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes
* fields
+ * @softlimit: soft write-buffer timeout interval
+ * @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit
+ * and @softlimit + @delta)
* @timer: write-buffer timer
- * @timeout: timer expire interval in jiffies
- * @need_sync: it is set if its timer expired and needs sync
+ * @no_timer: non-zero if this write-buffer does not have a timer
+ * @need_sync: non-zero if the timer expired and the wbuf needs sync'ing
* @next_ino: points to the next position of the following inode number
* @inodes: stores the inode numbers of the nodes which are in wbuf
*
@@ -653,14 +690,16 @@ struct ubifs_wbuf {
int offs;
int avail;
int used;
- int dtype;
+ int size;
int jhead;
int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad);
struct mutex io_mutex;
spinlock_t lock;
- struct timer_list timer;
- int timeout;
- int need_sync;
+ ktime_t softlimit;
+ unsigned long long delta;
+ struct hrtimer timer;
+ unsigned int no_timer:1;
+ unsigned int need_sync:1;
int next_ino;
ino_t *inodes;
};
@@ -685,20 +724,22 @@ struct ubifs_bud {
* struct ubifs_jhead - journal head.
* @wbuf: head's write-buffer
* @buds_list: list of bud LEBs belonging to this journal head
+ * @grouped: non-zero if UBIFS groups nodes when writing to this journal head
*
* Note, the @buds list is protected by the @c->buds_lock.
*/
struct ubifs_jhead {
struct ubifs_wbuf wbuf;
struct list_head buds_list;
+ unsigned int grouped:1;
};
/**
* struct ubifs_zbranch - key/coordinate/length branch stored in znodes.
* @key: key
* @znode: znode address in memory
- * @lnum: LEB number of the indexing node
- * @offs: offset of the indexing node within @lnum
+ * @lnum: LEB number of the target node (indexing node or data node)
+ * @offs: target node offset within @lnum
* @len: target node length
*/
struct ubifs_zbranch {
@@ -726,6 +767,9 @@ struct ubifs_zbranch {
* @offs: offset of the corresponding indexing node
* @len: length of the corresponding indexing node
* @zbranch: array of znode branches (@c->fanout elements)
+ *
+ * Note! The @lnum, @offs, and @len fields are not really needed - we have them
+ * only for internal consistency check. They could be removed to save some RAM.
*/
struct ubifs_znode {
struct ubifs_znode *parent;
@@ -736,13 +780,35 @@ struct ubifs_znode {
int child_cnt;
int iip;
int alt;
-#ifdef CONFIG_UBIFS_FS_DEBUG
- int lnum, offs, len;
-#endif
+ int lnum;
+ int offs;
+ int len;
struct ubifs_zbranch zbranch[];
};
/**
+ * struct bu_info - bulk-read information.
+ * @key: first data node key
+ * @zbranch: zbranches of data nodes to bulk read
+ * @buf: buffer to read into
+ * @buf_len: buffer length
+ * @gc_seq: GC sequence number to detect races with GC
+ * @cnt: number of data nodes for bulk read
+ * @blk_cnt: number of data blocks including holes
+ * @oef: end of file reached
+ */
+struct bu_info {
+ union ubifs_key key;
+ struct ubifs_zbranch zbranch[UBIFS_MAX_BULK_READ];
+ void *buf;
+ int buf_len;
+ int gc_seq;
+ int cnt;
+ int blk_cnt;
+ int eof;
+};
+
+/**
* struct ubifs_node_range - node length range description data structure.
* @len: fixed node length
* @min_len: minimum possible node length
@@ -779,7 +845,7 @@ struct ubifs_compressor {
/**
* struct ubifs_budget_req - budget requirements of an operation.
*
- * @fast: non-zero if the budgeting should try to aquire budget quickly and
+ * @fast: non-zero if the budgeting should try to acquire budget quickly and
* should not try to call write-back
* @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields
* have to be re-calculated
@@ -805,21 +871,31 @@ struct ubifs_compressor {
* An inode may contain 4KiB of data at max., thus the widths of @new_ino_d
* is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made
* dirty by the re-name operation.
+ *
+ * Note, UBIFS aligns node lengths to 8-bytes boundary, so the requester has to
+ * make sure the amount of inode data which contribute to @new_ino_d and
+ * @dirtied_ino_d fields are aligned.
*/
struct ubifs_budget_req {
unsigned int fast:1;
unsigned int recalculate:1;
+#ifndef UBIFS_DEBUG
unsigned int new_page:1;
unsigned int dirtied_page:1;
unsigned int new_dent:1;
unsigned int mod_dent:1;
unsigned int new_ino:1;
unsigned int new_ino_d:13;
-#ifndef UBIFS_DEBUG
unsigned int dirtied_ino:4;
unsigned int dirtied_ino_d:15;
#else
/* Not bit-fields to check for overflows */
+ unsigned int new_page;
+ unsigned int dirtied_page;
+ unsigned int new_dent;
+ unsigned int mod_dent;
+ unsigned int new_ino;
+ unsigned int new_ino_d;
unsigned int dirtied_ino;
unsigned int dirtied_ino_d;
#endif
@@ -837,6 +913,8 @@ struct ubifs_budget_req {
* @dnext: next orphan to delete
* @inum: inode number
* @new: %1 => added since the last commit, otherwise %0
+ * @cmt: %1 => commit pending, otherwise %0
+ * @del: %1 => delete pending, otherwise %0
*/
struct ubifs_orphan {
struct rb_node rb;
@@ -845,29 +923,80 @@ struct ubifs_orphan {
struct ubifs_orphan *cnext;
struct ubifs_orphan *dnext;
ino_t inum;
- int new;
+ unsigned new:1;
+ unsigned cmt:1;
+ unsigned del:1;
};
/**
* struct ubifs_mount_opts - UBIFS-specific mount options information.
* @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast)
+ * @bulk_read: enable/disable bulk-reads (%0 default, %1 disabe, %2 enable)
+ * @chk_data_crc: enable/disable CRC data checking when reading data nodes
+ * (%0 default, %1 disabe, %2 enable)
+ * @override_compr: override default compressor (%0 - do not override and use
+ * superblock compressor, %1 - override and use compressor
+ * specified in @compr_type)
+ * @compr_type: compressor type to override the superblock compressor with
+ * (%UBIFS_COMPR_NONE, etc)
*/
struct ubifs_mount_opts {
unsigned int unmount_mode:2;
+ unsigned int bulk_read:2;
+ unsigned int chk_data_crc:2;
+ unsigned int override_compr:1;
+ unsigned int compr_type:2;
+};
+
+/**
+ * struct ubifs_budg_info - UBIFS budgeting information.
+ * @idx_growth: amount of bytes budgeted for index growth
+ * @data_growth: amount of bytes budgeted for cached data
+ * @dd_growth: amount of bytes budgeted for cached data that will make
+ * other data dirty
+ * @uncommitted_idx: amount of bytes were budgeted for growth of the index, but
+ * which still have to be taken into account because the index
+ * has not been committed so far
+ * @old_idx_sz: size of index on flash
+ * @min_idx_lebs: minimum number of LEBs required for the index
+ * @nospace: non-zero if the file-system does not have flash space (used as
+ * optimization)
+ * @nospace_rp: the same as @nospace, but additionally means that even reserved
+ * pool is full
+ * @page_budget: budget for a page (constant, nenver changed after mount)
+ * @inode_budget: budget for an inode (constant, nenver changed after mount)
+ * @dent_budget: budget for a directory entry (constant, nenver changed after
+ * mount)
+ */
+struct ubifs_budg_info {
+ long long idx_growth;
+ long long data_growth;
+ long long dd_growth;
+ long long uncommitted_idx;
+ unsigned long long old_idx_sz;
+ int min_idx_lebs;
+ unsigned int nospace:1;
+ unsigned int nospace_rp:1;
+ int page_budget;
+ int inode_budget;
+ int dent_budget;
};
+struct ubifs_debug_info;
+
/**
* struct ubifs_info - UBIFS file-system description data structure
* (per-superblock).
* @vfs_sb: VFS @struct super_block object
- * @bdi: backing device info object to make VFS happy and disable readahead
+ * @bdi: backing device info object to make VFS happy and disable read-ahead
*
* @highest_inum: highest used inode number
- * @vfs_gen: VFS inode generation counter
* @max_sqnum: current global sequence number
- * @cmt_no: commit number (last successfully completed commit)
- * @cnt_lock: protects @highest_inum, @vfs_gen, and @max_sqnum counters
+ * @cmt_no: commit number of the last successfully completed commit, protected
+ * by @commit_sem
+ * @cnt_lock: protects @highest_inum and @max_sqnum counters
* @fmt_version: UBIFS on-flash format version
+ * @ro_compat_version: R/O compatibility version
* @uuid: UUID from super block
*
* @lhead_lnum: log head logical eraseblock number
@@ -894,13 +1023,14 @@ struct ubifs_mount_opts {
* @cmt_state: commit state
* @cs_lock: commit state lock
* @cmt_wq: wait queue to sleep on if the log is full and a commit is running
- * @fast_unmount: do not run journal commit before un-mounting
+ *
* @big_lpt: flag that LPT is too big to write whole during commit
- * @check_lpt_free: flag that indicates LPT GC may be needed
- * @nospace: non-zero if the file-system does not have flash space (used as
- * optimization)
- * @nospace_rp: the same as @nospace, but additionally means that even reserved
- * pool is full
+ * @space_fixup: flag indicating that free space in LEBs needs to be cleaned up
+ * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
+ * recovery)
+ * @bulk_read: enable bulk-reads
+ * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
+ * @rw_incompat: the media is not R/W compatible
*
* @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
* @calc_idx_sz
@@ -918,13 +1048,20 @@ struct ubifs_mount_opts {
* @ileb_nxt: next pre-allocated index LEBs
* @old_idx: tree of index nodes obsoleted since the last commit start
* @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c
- * @new_ihead_lnum: used by debugging to check ihead_lnum
- * @new_ihead_offs: used by debugging to check ihead_offs
*
* @mst_node: master node
* @mst_offs: offset of valid master node
* @mst_mutex: protects the master node area, @mst_node, and @mst_offs
*
+ * @max_bu_buf_len: maximum bulk-read buffer length
+ * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu
+ * @bu: pre-allocated bulk-read information
+ *
+ * @write_reserve_mutex: protects @write_reserve_buf
+ * @write_reserve_buf: on the write path we allocate memory, which might
+ * sometimes be unavailable, in which case we use this
+ * write reserve buffer
+ *
* @log_lebs: number of logical eraseblocks in the log
* @log_bytes: log size in bytes
* @log_last: last LEB of the log
@@ -937,7 +1074,6 @@ struct ubifs_mount_opts {
* @main_lebs: count of LEBs in the main area
* @main_first: first LEB of the main area
* @main_bytes: main area size in bytes
- * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
*
* @key_hash_type: type of the key hash
* @key_hash: direntry key hash function
@@ -947,43 +1083,42 @@ struct ubifs_mount_opts {
*
* @min_io_size: minimal input/output unit size
* @min_io_shift: number of bits in @min_io_size minus one
+ * @max_write_size: maximum amount of bytes the underlying flash can write at a
+ * time (MTD write buffer size)
+ * @max_write_shift: number of bits in @max_write_size minus one
* @leb_size: logical eraseblock size in bytes
+ * @leb_start: starting offset of logical eraseblocks within physical
+ * eraseblocks
* @half_leb_size: half LEB size
+ * @idx_leb_size: how many bytes of an LEB are effectively available when it is
+ * used to store indexing nodes (@leb_size - @max_idx_node_sz)
* @leb_cnt: count of logical eraseblocks
* @max_leb_cnt: maximum count of logical eraseblocks
* @old_leb_cnt: count of logical eraseblocks before re-size
* @ro_media: the underlying UBI volume is read-only
+ * @ro_mount: the file-system was mounted as read-only
+ * @ro_error: UBIFS switched to R/O mode because an error happened
*
* @dirty_pg_cnt: number of dirty pages (not used)
* @dirty_zn_cnt: number of dirty znodes
* @clean_zn_cnt: number of clean znodes
*
- * @budg_idx_growth: amount of bytes budgeted for index growth
- * @budg_data_growth: amount of bytes budgeted for cached data
- * @budg_dd_growth: amount of bytes budgeted for cached data that will make
- * other data dirty
- * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index,
- * but which still have to be taken into account because
- * the index has not been committed so far
- * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth,
- * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, and @lst;
- * @min_idx_lebs: minimum number of LEBs required for the index
- * @old_idx_sz: size of index on flash
+ * @space_lock: protects @bi and @lst
+ * @lst: lprops statistics
+ * @bi: budgeting information
* @calc_idx_sz: temporary variable which is used to calculate new index size
* (contains accurate new index size at end of TNC commit start)
- * @lst: lprops statistics
- *
- * @page_budget: budget for a page
- * @inode_budget: budget for an inode
- * @dent_budget: budget for a directory entry
*
* @ref_node_alsz: size of the LEB reference node aligned to the min. flash
- * I/O unit
+ * I/O unit
* @mst_node_alsz: master node aligned size
* @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary
* @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
* @max_inode_sz: maximum possible inode size in bytes
* @max_znode_sz: size of znode in bytes
+ *
+ * @leb_overhead: how many bytes are wasted in an LEB when it is filled with
+ * data nodes of maximum size - used in free space reporting
* @dead_wm: LEB dead space watermark
* @dark_wm: LEB dark space watermark
* @block_cnt: count of 4KiB blocks on the FS
@@ -1017,6 +1152,8 @@ struct ubifs_mount_opts {
* @sbuf: a buffer of LEB size used by GC and replay for scanning
* @idx_gc: list of index LEBs that have been garbage collected
* @idx_gc_cnt: number of elements on the idx_gc list
+ * @gc_seq: incremented for every non-index LEB garbage collected
+ * @gced_lnum: last non-index LEB that was garbage collected
*
* @infos_list: links all 'ubifs_info' objects
* @umount_mutex: serializes shrinker and un-mount
@@ -1045,6 +1182,7 @@ struct ubifs_mount_opts {
* @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab
* @dirty_nn_cnt: number of dirty nnodes
* @dirty_pn_cnt: number of dirty pnodes
+ * @check_lpt_free: flag that indicates LPT GC may be needed
* @lpt_sz: LPT size
* @lpt_nod_buf: buffer for an on-flash nnode or pnode
* @lpt_buf: buffer of LEB size used by LPT
@@ -1055,9 +1193,11 @@ struct ubifs_mount_opts {
* previous commit start
* @uncat_list: list of un-categorized LEBs
* @empty_list: list of empty LEBs
- * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size)
- * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size)
+ * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size)
+ * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size)
* @freeable_cnt: number of freeable LEBs in @freeable_list
+ * @in_a_category_cnt: count of lprops which are in a certain category, which
+ * basically meants that they were loaded from the flash
*
* @ltab_lnum: LEB number of LPT's own lprops table
* @ltab_offs: offset of LPT's own lprops table
@@ -1074,40 +1214,35 @@ struct ubifs_mount_opts {
* @rp_uid: reserved pool user ID
* @rp_gid: reserved pool group ID
*
- * @empty: if the UBI device is empty
- * @replay_tree: temporary tree used during journal replay
+ * @empty: %1 if the UBI device is empty
+ * @need_recovery: %1 if the file-system needs recovery
+ * @replaying: %1 during journal replay
+ * @mounting: %1 while mounting
+ * @probing: %1 while attempting to mount if MS_SILENT mount flag is set
+ * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode
* @replay_list: temporary list used during journal replay
* @replay_buds: list of buds to replay
* @cs_sqnum: sequence number of first node in the log (commit start node)
* @replay_sqnum: sequence number of node currently being replayed
- * @need_recovery: file-system needs recovery
- * @replaying: set to %1 during journal replay
- * @unclean_leb_list: LEBs to recover when mounting ro to rw
- * @rcvrd_mst_node: recovered master node to write when mounting ro to rw
+ * @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W
+ * mode
+ * @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted
+ * FS to R/W mode
* @size_tree: inode size information for recovery
- * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY)
* @mount_opts: UBIFS-specific mount options
*
- * @dbg_buf: a buffer of LEB size used for debugging purposes
- * @old_zroot: old index root - used by 'dbg_check_old_index()'
- * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
- * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
- * @failure_mode: failure mode for recovery testing
- * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls
- * @fail_timeout: time in jiffies when delay of failure mode expires
- * @fail_cnt: current number of calls to failure mode I/O functions
- * @fail_cnt_max: number of calls by which to delay failure mode
+ * @dbg: debugging-related information
*/
struct ubifs_info {
struct super_block *vfs_sb;
struct backing_dev_info bdi;
ino_t highest_inum;
- unsigned int vfs_gen;
unsigned long long max_sqnum;
unsigned long long cmt_no;
spinlock_t cnt_lock;
int fmt_version;
+ int ro_compat_version;
unsigned char uuid[16];
int lhead_lnum;
@@ -1131,11 +1266,13 @@ struct ubifs_info {
int cmt_state;
spinlock_t cs_lock;
wait_queue_head_t cmt_wq;
- unsigned int fast_unmount:1;
+
unsigned int big_lpt:1;
- unsigned int check_lpt_free:1;
- unsigned int nospace:1;
- unsigned int nospace_rp:1;
+ unsigned int space_fixup:1;
+ unsigned int no_chk_data_crc:1;
+ unsigned int bulk_read:1;
+ unsigned int default_compr:2;
+ unsigned int rw_incompat:1;
struct mutex tnc_mutex;
struct ubifs_zbranch zroot;
@@ -1152,15 +1289,18 @@ struct ubifs_info {
int ileb_nxt;
struct rb_root old_idx;
int *bottom_up_buf;
-#ifdef CONFIG_UBIFS_FS_DEBUG
- int new_ihead_lnum;
- int new_ihead_offs;
-#endif
struct ubifs_mst_node *mst_node;
int mst_offs;
struct mutex mst_mutex;
+ int max_bu_buf_len;
+ struct mutex bu_mutex;
+ struct bu_info bu;
+
+ struct mutex write_reserve_mutex;
+ void *write_reserve_buf;
+
int log_lebs;
long long log_bytes;
int log_last;
@@ -1173,7 +1313,6 @@ struct ubifs_info {
int main_lebs;
int main_first;
long long main_bytes;
- int default_compr;
uint8_t key_hash_type;
uint32_t (*key_hash)(const char *str, int len);
@@ -1183,30 +1322,27 @@ struct ubifs_info {
int min_io_size;
int min_io_shift;
+ int max_write_size;
+ int max_write_shift;
int leb_size;
+ int leb_start;
int half_leb_size;
+ int idx_leb_size;
int leb_cnt;
int max_leb_cnt;
int old_leb_cnt;
- int ro_media;
+ unsigned int ro_media:1;
+ unsigned int ro_mount:1;
+ unsigned int ro_error:1;
atomic_long_t dirty_pg_cnt;
atomic_long_t dirty_zn_cnt;
atomic_long_t clean_zn_cnt;
- long long budg_idx_growth;
- long long budg_data_growth;
- long long budg_dd_growth;
- long long budg_uncommitted_idx;
spinlock_t space_lock;
- int min_idx_lebs;
- unsigned long long old_idx_sz;
- unsigned long long calc_idx_sz;
struct ubifs_lp_stats lst;
-
- int page_budget;
- int inode_budget;
- int dent_budget;
+ struct ubifs_budg_info bi;
+ unsigned long long calc_idx_sz;
int ref_node_alsz;
int mst_node_alsz;
@@ -1214,6 +1350,8 @@ struct ubifs_info {
int max_idx_node_sz;
long long max_inode_sz;
int max_znode_sz;
+
+ int leb_overhead;
int dead_wm;
int dark_wm;
int block_cnt;
@@ -1247,6 +1385,8 @@ struct ubifs_info {
void *sbuf;
struct list_head idx_gc;
int idx_gc_cnt;
+ int gc_seq;
+ int gced_lnum;
struct list_head infos_list;
struct mutex umount_mutex;
@@ -1275,6 +1415,7 @@ struct ubifs_info {
int lpt_drty_flgs;
int dirty_nn_cnt;
int dirty_pn_cnt;
+ int check_lpt_free;
long long lpt_sz;
void *lpt_nod_buf;
void *lpt_buf;
@@ -1287,6 +1428,7 @@ struct ubifs_info {
struct list_head freeable_list;
struct list_head frdi_idx_list;
int freeable_cnt;
+ int in_a_category_cnt;
int ltab_lnum;
int ltab_offs;
@@ -1300,64 +1442,63 @@ struct ubifs_info {
long long rp_size;
long long report_rp_size;
- uid_t rp_uid;
- gid_t rp_gid;
+ kuid_t rp_uid;
+ kgid_t rp_gid;
/* The below fields are used only during mounting and re-mounting */
- int empty;
- struct rb_root replay_tree;
+ unsigned int empty:1;
+ unsigned int need_recovery:1;
+ unsigned int replaying:1;
+ unsigned int mounting:1;
+ unsigned int remounting_rw:1;
+ unsigned int probing:1;
struct list_head replay_list;
struct list_head replay_buds;
unsigned long long cs_sqnum;
unsigned long long replay_sqnum;
- int need_recovery;
- int replaying;
struct list_head unclean_leb_list;
struct ubifs_mst_node *rcvrd_mst_node;
struct rb_root size_tree;
- int remounting_rw;
struct ubifs_mount_opts mount_opts;
-#ifdef CONFIG_UBIFS_FS_DEBUG
- void *dbg_buf;
- struct ubifs_zbranch old_zroot;
- int old_zroot_level;
- unsigned long long old_zroot_sqnum;
- int failure_mode;
- int fail_delay;
- unsigned long fail_timeout;
- unsigned int fail_cnt;
- unsigned int fail_cnt_max;
-#endif
+ struct ubifs_debug_info *dbg;
};
extern struct list_head ubifs_infos;
extern spinlock_t ubifs_infos_lock;
extern atomic_long_t ubifs_clean_zn_cnt;
extern struct kmem_cache *ubifs_inode_slab;
-extern struct super_operations ubifs_super_operations;
-extern struct address_space_operations ubifs_file_address_operations;
-extern struct file_operations ubifs_file_operations;
-extern struct inode_operations ubifs_file_inode_operations;
-extern struct file_operations ubifs_dir_operations;
-extern struct inode_operations ubifs_dir_inode_operations;
-extern struct inode_operations ubifs_symlink_inode_operations;
+extern const struct super_operations ubifs_super_operations;
+extern const struct address_space_operations ubifs_file_address_operations;
+extern const struct file_operations ubifs_file_operations;
+extern const struct inode_operations ubifs_file_inode_operations;
+extern const struct file_operations ubifs_dir_operations;
+extern const struct inode_operations ubifs_dir_inode_operations;
+extern const struct inode_operations ubifs_symlink_inode_operations;
extern struct backing_dev_info ubifs_backing_dev_info;
extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
/* io.c */
+void ubifs_ro_mode(struct ubifs_info *c, int err);
+int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs,
+ int len, int even_ebadmsg);
+int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
+ int len);
+int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len);
+int ubifs_leb_unmap(struct ubifs_info *c, int lnum);
+int ubifs_leb_map(struct ubifs_info *c, int lnum);
+int ubifs_is_mapped(const struct ubifs_info *c, int lnum);
int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len);
-int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
- int dtype);
+int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs);
int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf);
int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
int lnum, int offs);
int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
int lnum, int offs);
int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum,
- int offs, int dtype);
+ int offs);
int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
- int offs, int quiet);
+ int offs, int quiet, int must_chk_crc);
void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad);
void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last);
int ubifs_io_init(struct ubifs_info *c);
@@ -1369,7 +1510,7 @@ int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode);
/* scan.c */
struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
- int offs, void *sbuf);
+ int offs, void *sbuf, int quiet);
void ubifs_scan_destroy(struct ubifs_scan_leb *sleb);
int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
int offs, int quiet);
@@ -1399,8 +1540,8 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
int deletion, int xent);
int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
const union ubifs_key *key, const void *buf, int len);
-int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
- int last_reference);
+int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode);
+int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode);
int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
const struct dentry *old_dentry,
const struct inode *new_dir,
@@ -1423,13 +1564,15 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode,
struct ubifs_budget_req *req);
void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode,
struct ubifs_budget_req *req);
-long long ubifs_budg_get_free_space(struct ubifs_info *c);
+long long ubifs_get_free_space(struct ubifs_info *c);
+long long ubifs_get_free_space_nolock(struct ubifs_info *c);
int ubifs_calc_min_idx_lebs(struct ubifs_info *c);
void ubifs_convert_page_budget(struct ubifs_info *c);
+long long ubifs_reported_space(const struct ubifs_info *c, long long free);
long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
/* find.c */
-int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
+int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
int squeeze);
int ubifs_find_free_leb_for_idx(struct ubifs_info *c);
int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
@@ -1440,8 +1583,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c);
/* tnc.c */
int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
struct ubifs_znode **zn, int *n);
-int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
- void *node);
int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
void *node, const struct qstr *nm);
int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
@@ -1471,6 +1612,8 @@ void destroy_old_idx(struct ubifs_info *c);
int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level,
int lnum, int offs);
int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode);
+int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu);
+int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu);
/* tnc_misc.c */
struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr,
@@ -1492,7 +1635,10 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
int ubifs_tnc_end_commit(struct ubifs_info *c);
/* shrinker.c */
-int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask);
+unsigned long ubifs_shrink_scan(struct shrinker *shrink,
+ struct shrink_control *sc);
+unsigned long ubifs_shrink_count(struct shrinker *shrink,
+ struct shrink_control *sc);
/* commit.c */
int ubifs_bg_thread(void *info);
@@ -1511,6 +1657,7 @@ int ubifs_write_master(struct ubifs_info *c);
int ubifs_read_superblock(struct ubifs_info *c);
struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c);
int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup);
+int ubifs_fixup_free_space(struct ubifs_info *c);
/* replay.c */
int ubifs_validate_entry(struct ubifs_info *c,
@@ -1531,6 +1678,7 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum);
int ubifs_orphan_start_commit(struct ubifs_info *c);
int ubifs_orphan_end_commit(struct ubifs_info *c);
int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only);
+int ubifs_clear_orphans(struct ubifs_info *c);
/* lpt.c */
int ubifs_calc_lpt_geom(struct ubifs_info *c);
@@ -1559,6 +1707,9 @@ void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty);
void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode);
uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits);
struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght);
+/* Needed only in debugging code in lpt_commit.c */
+int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf,
+ struct ubifs_nnode *nnode);
/* lpt_commit.c */
int ubifs_lpt_start_commit(struct ubifs_info *c);
@@ -1567,13 +1718,11 @@ int ubifs_lpt_post_commit(struct ubifs_info *c);
void ubifs_lpt_free(struct ubifs_info *c, int wr_only);
/* lprops.c */
-void ubifs_get_lprops(struct ubifs_info *c);
const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
const struct ubifs_lprops *lp,
int free, int dirty, int flags,
int idx_gc_cnt);
-void ubifs_release_lprops(struct ubifs_info *c);
-void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats);
+void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst);
void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
int cat);
void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops,
@@ -1590,14 +1739,15 @@ const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c);
const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c);
const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c);
const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c);
+int ubifs_calc_dark(const struct ubifs_info *c, int spc);
/* file.c */
-int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync);
+int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync);
int ubifs_setattr(struct dentry *dentry, struct iattr *attr);
/* dir.c */
struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
- int mode);
+ umode_t mode);
int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
@@ -1616,11 +1766,11 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum);
int ubifs_recover_master_node(struct ubifs_info *c);
int ubifs_write_rcvrd_mst_node(struct ubifs_info *c);
struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
- int offs, void *sbuf, int grouped);
+ int offs, void *sbuf, int jhead);
struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
int offs, void *sbuf);
-int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf);
-int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf);
+int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf);
+int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf);
int ubifs_rcvry_gc_commit(struct ubifs_info *c);
int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
int deletion, loff_t new_size);
@@ -1636,7 +1786,7 @@ long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
/* compressor.c */
int __init ubifs_compressors_init(void);
-void __exit ubifs_compressors_exit(void);
+void ubifs_compressors_exit(void);
void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
int *compr_type);
int ubifs_decompress(const void *buf, int len, void *out, int *out_len,
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 1388a078e1a..5e0a63b1b0d 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -55,13 +55,15 @@
* ACL support is not implemented.
*/
+#include "ubifs.h"
+#include <linux/fs.h>
+#include <linux/slab.h>
#include <linux/xattr.h>
#include <linux/posix_acl_xattr.h>
-#include "ubifs.h"
/*
* Limit the number of extended attributes per inode so that the total size
- * (xattr_size) is guaranteeded to fit in an 'unsigned int'.
+ * (@xattr_size) is guaranteeded to fit in an 'unsigned int'.
*/
#define MAX_XATTRS_PER_INODE 65535
@@ -78,9 +80,8 @@ enum {
SECURITY_XATTR,
};
-static struct inode_operations none_inode_operations;
-static struct address_space_operations none_address_operations;
-static struct file_operations none_file_operations;
+static const struct inode_operations empty_iops;
+static const struct file_operations empty_fops;
/**
* create_xattr - create an extended attribute.
@@ -103,14 +104,14 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
struct inode *inode;
struct ubifs_inode *ui, *host_ui = ubifs_inode(host);
struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
- .new_ino_d = size, .dirtied_ino = 1,
- .dirtied_ino_d = host_ui->data_len};
+ .new_ino_d = ALIGN(size, 8), .dirtied_ino = 1,
+ .dirtied_ino_d = ALIGN(host_ui->data_len, 8) };
if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE)
return -ENOSPC;
/*
* Linux limits the maximum size of the extended attribute names list
- * to %XATTR_LIST_MAX. This means we should not allow creating more*
+ * to %XATTR_LIST_MAX. This means we should not allow creating more
* extended attributes if the name list becomes larger. This limitation
* is artificial for UBIFS, though.
*/
@@ -128,36 +129,30 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
goto out_budg;
}
- mutex_lock(&host_ui->ui_mutex);
/* Re-define all operations to be "nothing" */
- inode->i_mapping->a_ops = &none_address_operations;
- inode->i_op = &none_inode_operations;
- inode->i_fop = &none_file_operations;
+ inode->i_mapping->a_ops = &empty_aops;
+ inode->i_op = &empty_iops;
+ inode->i_fop = &empty_fops;
inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA;
ui = ubifs_inode(inode);
ui->xattr = 1;
ui->flags |= UBIFS_XATTR_FL;
- ui->data = kmalloc(size, GFP_NOFS);
+ ui->data = kmemdup(value, size, GFP_NOFS);
if (!ui->data) {
err = -ENOMEM;
- goto out_unlock;
+ goto out_free;
}
+ inode->i_size = ui->ui_size = size;
+ ui->data_len = size;
- memcpy(ui->data, value, size);
+ mutex_lock(&host_ui->ui_mutex);
host->i_ctime = ubifs_current_time(host);
host_ui->xattr_cnt += 1;
host_ui->xattr_size += CALC_DENT_SIZE(nm->len);
host_ui->xattr_size += CALC_XATTR_BYTES(size);
host_ui->xattr_names += nm->len;
- /*
- * We do not use i_size_write() because nobody can race with us as we
- * are holding host @host->i_mutex - every xattr operation for this
- * inode is serialized by it.
- */
- inode->i_size = ui->ui_size = size;
- ui->data_len = size;
err = ubifs_jnl_update(c, host, nm, inode, 0, 1);
if (err)
goto out_cancel;
@@ -172,8 +167,8 @@ out_cancel:
host_ui->xattr_cnt -= 1;
host_ui->xattr_size -= CALC_DENT_SIZE(nm->len);
host_ui->xattr_size -= CALC_XATTR_BYTES(size);
-out_unlock:
mutex_unlock(&host_ui->ui_mutex);
+out_free:
make_bad_inode(inode);
iput(inode);
out_budg:
@@ -200,29 +195,27 @@ static int change_xattr(struct ubifs_info *c, struct inode *host,
struct ubifs_inode *host_ui = ubifs_inode(host);
struct ubifs_inode *ui = ubifs_inode(inode);
struct ubifs_budget_req req = { .dirtied_ino = 2,
- .dirtied_ino_d = size + host_ui->data_len };
+ .dirtied_ino_d = ALIGN(size, 8) + ALIGN(host_ui->data_len, 8) };
ubifs_assert(ui->data_len == inode->i_size);
err = ubifs_budget_space(c, &req);
if (err)
return err;
- mutex_lock(&host_ui->ui_mutex);
- host->i_ctime = ubifs_current_time(host);
- host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
- host_ui->xattr_size += CALC_XATTR_BYTES(size);
-
kfree(ui->data);
- ui->data = kmalloc(size, GFP_NOFS);
+ ui->data = kmemdup(value, size, GFP_NOFS);
if (!ui->data) {
err = -ENOMEM;
- goto out_unlock;
+ goto out_free;
}
-
- memcpy(ui->data, value, size);
inode->i_size = ui->ui_size = size;
ui->data_len = size;
+ mutex_lock(&host_ui->ui_mutex);
+ host->i_ctime = ubifs_current_time(host);
+ host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
+ host_ui->xattr_size += CALC_XATTR_BYTES(size);
+
/*
* It is important to write the host inode after the xattr inode
* because if the host inode gets synchronized (via 'fsync()'), then
@@ -240,9 +233,9 @@ static int change_xattr(struct ubifs_info *c, struct inode *host,
out_cancel:
host_ui->xattr_size -= CALC_XATTR_BYTES(size);
host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len);
- make_bad_inode(inode);
-out_unlock:
mutex_unlock(&host_ui->ui_mutex);
+ make_bad_inode(inode);
+out_free:
ubifs_release_budget(c, &req);
return err;
}
@@ -305,13 +298,14 @@ int ubifs_setxattr(struct dentry *dentry, const char *name,
{
struct inode *inode, *host = dentry->d_inode;
struct ubifs_info *c = host->i_sb->s_fs_info;
- struct qstr nm = { .name = name, .len = strlen(name) };
+ struct qstr nm = QSTR_INIT(name, strlen(name));
struct ubifs_dent_node *xent;
union ubifs_key key;
int err, type;
- dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name,
- host->i_ino, dentry->d_name.len, dentry->d_name.name, size);
+ dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd", name,
+ host->i_ino, dentry, size);
+ ubifs_assert(mutex_is_locked(&host->i_mutex));
if (size > UBIFS_MAX_INO_DATA)
return -ERANGE;
@@ -367,14 +361,14 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
{
struct inode *inode, *host = dentry->d_inode;
struct ubifs_info *c = host->i_sb->s_fs_info;
- struct qstr nm = { .name = name, .len = strlen(name) };
+ struct qstr nm = QSTR_INIT(name, strlen(name));
struct ubifs_inode *ui;
struct ubifs_dent_node *xent;
union ubifs_key key;
int err;
- dbg_gen("xattr '%s', ino %lu ('%.*s'), buf size %zd", name,
- host->i_ino, dentry->d_name.len, dentry->d_name.name, size);
+ dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name,
+ host->i_ino, dentry, size);
err = check_namespace(&nm);
if (err < 0)
@@ -384,7 +378,6 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
if (!xent)
return -ENOMEM;
- mutex_lock(&host->i_mutex);
xent_key_init(c, &key, host->i_ino, &nm);
err = ubifs_tnc_lookup_nm(c, &key, xent, &nm);
if (err) {
@@ -406,8 +399,8 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
if (buf) {
/* If @buf is %NULL we are supposed to return the length */
if (ui->data_len > size) {
- dbg_err("buffer size %zd, xattr len %d",
- size, ui->data_len);
+ ubifs_err("buffer size %zd, xattr len %d",
+ size, ui->data_len);
err = -ERANGE;
goto out_iput;
}
@@ -419,7 +412,6 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
out_iput:
iput(inode);
out_unlock:
- mutex_unlock(&host->i_mutex);
kfree(xent);
return err;
}
@@ -434,8 +426,8 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
int err, len, written = 0;
struct qstr nm = { .name = NULL };
- dbg_gen("ino %lu ('%.*s'), buffer size %zd", host->i_ino,
- dentry->d_name.len, dentry->d_name.name, size);
+ dbg_gen("ino %lu ('%pd'), buffer size %zd", host->i_ino,
+ dentry, size);
len = host_ui->xattr_names + host_ui->xattr_cnt;
if (!buffer)
@@ -449,13 +441,11 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
return -ERANGE;
lowest_xent_key(c, &key, host->i_ino);
-
- mutex_lock(&host->i_mutex);
while (1) {
int type;
xent = ubifs_tnc_next_ent(c, &key, &nm);
- if (unlikely(IS_ERR(xent))) {
+ if (IS_ERR(xent)) {
err = PTR_ERR(xent);
break;
}
@@ -479,7 +469,6 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
pxent = xent;
key_read(c, &xent->key, &key);
}
- mutex_unlock(&host->i_mutex);
kfree(pxent);
if (err != -ENOENT) {
@@ -497,8 +486,8 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host,
int err;
struct ubifs_inode *host_ui = ubifs_inode(host);
struct ubifs_inode *ui = ubifs_inode(inode);
- struct ubifs_budget_req req = { .dirtied_ino = 1, .mod_dent = 1,
- .dirtied_ino_d = host_ui->data_len };
+ struct ubifs_budget_req req = { .dirtied_ino = 2, .mod_dent = 1,
+ .dirtied_ino_d = ALIGN(host_ui->data_len, 8) };
ubifs_assert(ui->data_len == inode->i_size);
@@ -535,13 +524,13 @@ int ubifs_removexattr(struct dentry *dentry, const char *name)
{
struct inode *inode, *host = dentry->d_inode;
struct ubifs_info *c = host->i_sb->s_fs_info;
- struct qstr nm = { .name = name, .len = strlen(name) };
+ struct qstr nm = QSTR_INIT(name, strlen(name));
struct ubifs_dent_node *xent;
union ubifs_key key;
int err;
- dbg_gen("xattr '%s', ino %lu ('%.*s')", name,
- host->i_ino, dentry->d_name.len, dentry->d_name.name);
+ dbg_gen("xattr '%s', ino %lu ('%pd')", name,
+ host->i_ino, dentry);
ubifs_assert(mutex_is_locked(&host->i_mutex));
err = check_namespace(&nm);
@@ -567,10 +556,10 @@ int ubifs_removexattr(struct dentry *dentry, const char *name)
}
ubifs_assert(inode->i_nlink == 1);
- inode->i_nlink = 0;
+ clear_nlink(inode);
err = remove_xattr(c, host, inode, &nm);
if (err)
- inode->i_nlink = 1;
+ set_nlink(inode, 1);
/* If @i_nlink is 0, 'iput()' will delete the inode */
iput(inode);