diff options
Diffstat (limited to 'fs/ubifs')
| -rw-r--r-- | fs/ubifs/Kconfig | 35 | ||||
| -rw-r--r-- | fs/ubifs/Makefile | 5 | ||||
| -rw-r--r-- | fs/ubifs/budget.c | 116 | ||||
| -rw-r--r-- | fs/ubifs/commit.c | 90 | ||||
| -rw-r--r-- | fs/ubifs/compress.c | 7 | ||||
| -rw-r--r-- | fs/ubifs/debug.c | 1813 | ||||
| -rw-r--r-- | fs/ubifs/debug.h | 521 | ||||
| -rw-r--r-- | fs/ubifs/dir.c | 201 | ||||
| -rw-r--r-- | fs/ubifs/file.c | 127 | ||||
| -rw-r--r-- | fs/ubifs/find.c | 26 | ||||
| -rw-r--r-- | fs/ubifs/gc.c | 82 | ||||
| -rw-r--r-- | fs/ubifs/io.c | 418 | ||||
| -rw-r--r-- | fs/ubifs/ioctl.c | 8 | ||||
| -rw-r--r-- | fs/ubifs/journal.c | 83 | ||||
| -rw-r--r-- | fs/ubifs/log.c | 105 | ||||
| -rw-r--r-- | fs/ubifs/lprops.c | 209 | ||||
| -rw-r--r-- | fs/ubifs/lpt.c | 112 | ||||
| -rw-r--r-- | fs/ubifs/lpt_commit.c | 275 | ||||
| -rw-r--r-- | fs/ubifs/master.c | 16 | ||||
| -rw-r--r-- | fs/ubifs/misc.h | 120 | ||||
| -rw-r--r-- | fs/ubifs/orphan.c | 84 | ||||
| -rw-r--r-- | fs/ubifs/recovery.c | 561 | ||||
| -rw-r--r-- | fs/ubifs/replay.c | 514 | ||||
| -rw-r--r-- | fs/ubifs/sb.c | 217 | ||||
| -rw-r--r-- | fs/ubifs/scan.c | 33 | ||||
| -rw-r--r-- | fs/ubifs/shrinker.c | 25 | ||||
| -rw-r--r-- | fs/ubifs/super.c | 535 | ||||
| -rw-r--r-- | fs/ubifs/tnc.c | 164 | ||||
| -rw-r--r-- | fs/ubifs/tnc_commit.c | 187 | ||||
| -rw-r--r-- | fs/ubifs/tnc_misc.c | 46 | ||||
| -rw-r--r-- | fs/ubifs/ubifs-media.h | 30 | ||||
| -rw-r--r-- | fs/ubifs/ubifs.h | 231 | ||||
| -rw-r--r-- | fs/ubifs/xattr.c | 48 |
33 files changed, 3772 insertions, 3272 deletions
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index 830e3f76f44..ba66d508006 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig @@ -11,12 +11,6 @@ config UBIFS_FS help UBIFS is a file system for flash devices which works on top of UBI. -config UBIFS_FS_XATTR - bool "Extended attributes support" - depends on UBIFS_FS - help - This option enables support of extended attributes. - config UBIFS_FS_ADVANCED_COMPR bool "Advanced compression options" depends on UBIFS_FS @@ -41,32 +35,3 @@ config UBIFS_FS_ZLIB default y help Zlib compresses better than LZO but it is slower. Say 'Y' if unsure. - -# Debugging-related stuff -config UBIFS_FS_DEBUG - bool "Enable debugging" - depends on UBIFS_FS - select DEBUG_FS - select KALLSYMS_ALL - help - This option enables UBIFS debugging. - -config UBIFS_FS_DEBUG_MSG_LVL - int "Default message level (0 = no extra messages, 3 = lots)" - depends on UBIFS_FS_DEBUG - default "0" - help - This controls the amount of debugging messages produced by UBIFS. - If reporting bugs, please try to have available a full dump of the - messages at level 1 while the misbehaviour was occurring. Level 2 - may become necessary if level 1 messages were not enough to find the - bug. Generally Level 3 should be avoided. - -config UBIFS_FS_DEBUG_CHKS - bool "Enable extra checks" - depends on UBIFS_FS_DEBUG - help - If extra checks are enabled UBIFS will check the consistency of its - internal data structures during operation. However, UBIFS performance - is dramatically slower when this option is selected especially if the - file system is large. diff --git a/fs/ubifs/Makefile b/fs/ubifs/Makefile index 80e93c35e49..2c6f0cb816b 100644 --- a/fs/ubifs/Makefile +++ b/fs/ubifs/Makefile @@ -3,7 +3,4 @@ obj-$(CONFIG_UBIFS_FS) += ubifs.o ubifs-y += shrinker.o journal.o file.o dir.o super.o sb.o io.o ubifs-y += tnc.o master.o scan.o replay.o log.o commit.o gc.o orphan.o ubifs-y += budget.o find.o tnc_commit.o compress.o lpt.o lprops.o -ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o - -ubifs-$(CONFIG_UBIFS_FS_DEBUG) += debug.o -ubifs-$(CONFIG_UBIFS_FS_XATTR) += xattr.o +ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o xattr.o debug.o diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index c8ff0d1ae5d..eb997e9c4ab 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -63,7 +63,7 @@ static void shrink_liability(struct ubifs_info *c, int nr_to_write) { down_read(&c->vfs_sb->s_umount); - writeback_inodes_sb(c->vfs_sb); + writeback_inodes_sb(c->vfs_sb, WB_REASON_FS_FREE_SPACE); up_read(&c->vfs_sb->s_umount); } @@ -106,7 +106,7 @@ static long long get_liability(struct ubifs_info *c) long long liab; spin_lock(&c->space_lock); - liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth; + liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth; spin_unlock(&c->space_lock); return liab; } @@ -147,7 +147,7 @@ static int make_free_space(struct ubifs_info *c) if (liab2 < liab1) return -EAGAIN; - dbg_budg("new liability %lld (not shrinked)", liab2); + dbg_budg("new liability %lld (not shrunk)", liab2); /* Liability did not shrink again, try GC */ dbg_budg("Run GC"); @@ -180,7 +180,7 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c) int idx_lebs; long long idx_size; - idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; + idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx; /* And make sure we have thrice the index size of space reserved */ idx_size += idx_size << 1; /* @@ -272,8 +272,8 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) */ static int can_use_rp(struct ubifs_info *c) { - if (current_fsuid() == c->rp_uid || capable(CAP_SYS_RESOURCE) || - (c->rp_gid != 0 && in_group_p(c->rp_gid))) + if (uid_eq(current_fsuid(), c->rp_uid) || capable(CAP_SYS_RESOURCE) || + (!gid_eq(c->rp_gid, GLOBAL_ROOT_GID) && in_group_p(c->rp_gid))) return 1; return 0; } @@ -292,13 +292,13 @@ static int can_use_rp(struct ubifs_info *c) * budgeted index space to the size of the current index, multiplies this by 3, * and makes sure this does not exceed the amount of free LEBs. * - * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: + * Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables: * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might * be large, because UBIFS does not do any index consolidation as long as * there is free space. IOW, the index may take a lot of LEBs, but the LEBs * will contain a lot of dirt. - * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW, - * the index may be consolidated to take up to @c->min_idx_lebs LEBs. + * o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW, + * the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs. * * This function returns zero in case of success, and %-ENOSPC in case of * failure. @@ -342,14 +342,13 @@ static int do_budget_space(struct ubifs_info *c) lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - c->lst.taken_empty_lebs; if (unlikely(rsvd_idx_lebs > lebs)) { - dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " - "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs, - rsvd_idx_lebs); + dbg_budg("out of indexing space: min_idx_lebs %d (old %d), rsvd_idx_lebs %d", + min_idx_lebs, c->bi.min_idx_lebs, rsvd_idx_lebs); return -ENOSPC; } available = ubifs_calc_available(c, min_idx_lebs); - outstanding = c->budg_data_growth + c->budg_dd_growth; + outstanding = c->bi.data_growth + c->bi.dd_growth; if (unlikely(available < outstanding)) { dbg_budg("out of data space: available %lld, outstanding %lld", @@ -360,7 +359,7 @@ static int do_budget_space(struct ubifs_info *c) if (available - outstanding <= c->rp_size && !can_use_rp(c)) return -ENOSPC; - c->min_idx_lebs = min_idx_lebs; + c->bi.min_idx_lebs = min_idx_lebs; return 0; } @@ -393,11 +392,11 @@ static int calc_data_growth(const struct ubifs_info *c, { int data_growth; - data_growth = req->new_ino ? c->inode_budget : 0; + data_growth = req->new_ino ? c->bi.inode_budget : 0; if (req->new_page) - data_growth += c->page_budget; + data_growth += c->bi.page_budget; if (req->new_dent) - data_growth += c->dent_budget; + data_growth += c->bi.dent_budget; data_growth += req->new_ino_d; return data_growth; } @@ -413,12 +412,12 @@ static int calc_dd_growth(const struct ubifs_info *c, { int dd_growth; - dd_growth = req->dirtied_page ? c->page_budget : 0; + dd_growth = req->dirtied_page ? c->bi.page_budget : 0; if (req->dirtied_ino) - dd_growth += c->inode_budget << (req->dirtied_ino - 1); + dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1); if (req->mod_dent) - dd_growth += c->dent_budget; + dd_growth += c->bi.dent_budget; dd_growth += req->dirtied_ino_d; return dd_growth; } @@ -438,7 +437,6 @@ static int calc_dd_growth(const struct ubifs_info *c, */ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) { - int uninitialized_var(cmt_retries), uninitialized_var(wb_retries); int err, idx_growth, data_growth, dd_growth, retried = 0; ubifs_assert(req->new_page <= 1); @@ -460,19 +458,19 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) again: spin_lock(&c->space_lock); - ubifs_assert(c->budg_idx_growth >= 0); - ubifs_assert(c->budg_data_growth >= 0); - ubifs_assert(c->budg_dd_growth >= 0); + ubifs_assert(c->bi.idx_growth >= 0); + ubifs_assert(c->bi.data_growth >= 0); + ubifs_assert(c->bi.dd_growth >= 0); - if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) { + if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) { dbg_budg("no space"); spin_unlock(&c->space_lock); return -ENOSPC; } - c->budg_idx_growth += idx_growth; - c->budg_data_growth += data_growth; - c->budg_dd_growth += dd_growth; + c->bi.idx_growth += idx_growth; + c->bi.data_growth += data_growth; + c->bi.dd_growth += dd_growth; err = do_budget_space(c); if (likely(!err)) { @@ -484,9 +482,9 @@ again: } /* Restore the old values */ - c->budg_idx_growth -= idx_growth; - c->budg_data_growth -= data_growth; - c->budg_dd_growth -= dd_growth; + c->bi.idx_growth -= idx_growth; + c->bi.data_growth -= data_growth; + c->bi.dd_growth -= dd_growth; spin_unlock(&c->space_lock); if (req->fast) { @@ -506,9 +504,9 @@ again: goto again; } dbg_budg("FS is full, -ENOSPC"); - c->nospace = 1; + c->bi.nospace = 1; if (can_use_rp(c) || c->rp_size == 0) - c->nospace_rp = 1; + c->bi.nospace_rp = 1; smp_wmb(); } else ubifs_err("cannot budget space, error %d", err); @@ -523,8 +521,8 @@ again: * This function releases the space budgeted by 'ubifs_budget_space()'. Note, * since the index changes (which were budgeted for in @req->idx_growth) will * only be written to the media on commit, this function moves the index budget - * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be - * zeroed by the commit operation. + * from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed + * by the commit operation. */ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) { @@ -553,23 +551,23 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) if (!req->data_growth && !req->dd_growth) return; - c->nospace = c->nospace_rp = 0; + c->bi.nospace = c->bi.nospace_rp = 0; smp_wmb(); spin_lock(&c->space_lock); - c->budg_idx_growth -= req->idx_growth; - c->budg_uncommitted_idx += req->idx_growth; - c->budg_data_growth -= req->data_growth; - c->budg_dd_growth -= req->dd_growth; - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); - - ubifs_assert(c->budg_idx_growth >= 0); - ubifs_assert(c->budg_data_growth >= 0); - ubifs_assert(c->budg_dd_growth >= 0); - ubifs_assert(c->min_idx_lebs < c->main_lebs); - ubifs_assert(!(c->budg_idx_growth & 7)); - ubifs_assert(!(c->budg_data_growth & 7)); - ubifs_assert(!(c->budg_dd_growth & 7)); + c->bi.idx_growth -= req->idx_growth; + c->bi.uncommitted_idx += req->idx_growth; + c->bi.data_growth -= req->data_growth; + c->bi.dd_growth -= req->dd_growth; + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); + + ubifs_assert(c->bi.idx_growth >= 0); + ubifs_assert(c->bi.data_growth >= 0); + ubifs_assert(c->bi.dd_growth >= 0); + ubifs_assert(c->bi.min_idx_lebs < c->main_lebs); + ubifs_assert(!(c->bi.idx_growth & 7)); + ubifs_assert(!(c->bi.data_growth & 7)); + ubifs_assert(!(c->bi.dd_growth & 7)); spin_unlock(&c->space_lock); } @@ -586,13 +584,13 @@ void ubifs_convert_page_budget(struct ubifs_info *c) { spin_lock(&c->space_lock); /* Release the index growth reservation */ - c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; + c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; /* Release the data growth reservation */ - c->budg_data_growth -= c->page_budget; + c->bi.data_growth -= c->bi.page_budget; /* Increase the dirty data growth reservation instead */ - c->budg_dd_growth += c->page_budget; + c->bi.dd_growth += c->bi.page_budget; /* And re-calculate the indexing space reservation */ - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); spin_unlock(&c->space_lock); } @@ -612,7 +610,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, memset(&req, 0, sizeof(struct ubifs_budget_req)); /* The "no space" flags will be cleared because dd_growth is > 0 */ - req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); + req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8); ubifs_release_budget(c, &req); } @@ -682,9 +680,9 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c) int rsvd_idx_lebs, lebs; long long available, outstanding, free; - ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); - outstanding = c->budg_data_growth + c->budg_dd_growth; - available = ubifs_calc_available(c, c->min_idx_lebs); + ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c)); + outstanding = c->bi.data_growth + c->bi.dd_growth; + available = ubifs_calc_available(c, c->bi.min_idx_lebs); /* * When reporting free space to user-space, UBIFS guarantees that it is @@ -697,8 +695,8 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c) * Note, the calculations below are similar to what we have in * 'do_budget_space()', so refer there for comments. */ - if (c->min_idx_lebs > c->lst.idx_lebs) - rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; + if (c->bi.min_idx_lebs > c->lst.idx_lebs) + rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; else rsvd_idx_lebs = 0; lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index 02429d81ca3..ff8229340cd 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -48,6 +48,56 @@ #include <linux/slab.h> #include "ubifs.h" +/* + * nothing_to_commit - check if there is nothing to commit. + * @c: UBIFS file-system description object + * + * This is a helper function which checks if there is anything to commit. It is + * used as an optimization to avoid starting the commit if it is not really + * necessary. Indeed, the commit operation always assumes flash I/O (e.g., + * writing the commit start node to the log), and it is better to avoid doing + * this unnecessarily. E.g., 'ubifs_sync_fs()' runs the commit, but if there is + * nothing to commit, it is more optimal to avoid any flash I/O. + * + * This function has to be called with @c->commit_sem locked for writing - + * this function does not take LPT/TNC locks because the @c->commit_sem + * guarantees that we have exclusive access to the TNC and LPT data structures. + * + * This function returns %1 if there is nothing to commit and %0 otherwise. + */ +static int nothing_to_commit(struct ubifs_info *c) +{ + /* + * During mounting or remounting from R/O mode to R/W mode we may + * commit for various recovery-related reasons. + */ + if (c->mounting || c->remounting_rw) + return 0; + + /* + * If the root TNC node is dirty, we definitely have something to + * commit. + */ + if (c->zroot.znode && ubifs_zn_dirty(c->zroot.znode)) + return 0; + + /* + * Even though the TNC is clean, the LPT tree may have dirty nodes. For + * example, this may happen if the budgeting subsystem invoked GC to + * make some free space, and the GC found an LEB with only dirty and + * free space. In this case GC would just change the lprops of this + * LEB (by turning all space into free space) and unmap it. + */ + if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags)) + return 0; + + ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0); + ubifs_assert(c->dirty_pn_cnt == 0); + ubifs_assert(c->dirty_nn_cnt == 0); + + return 1; +} + /** * do_commit - commit the journal. * @c: UBIFS file-system description object @@ -70,6 +120,12 @@ static int do_commit(struct ubifs_info *c) goto out_up; } + if (nothing_to_commit(c)) { + up_write(&c->commit_sem); + err = 0; + goto out_cancel; + } + /* Sync all write buffers (necessary for recovery) */ for (i = 0; i < c->jhead_cnt; i++) { err = ubifs_wbuf_sync(&c->jheads[i].wbuf); @@ -126,7 +182,7 @@ static int do_commit(struct ubifs_info *c) c->mst_node->root_len = cpu_to_le32(zroot.len); c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); - c->mst_node->index_size = cpu_to_le64(c->old_idx_sz); + c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz); c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); @@ -162,12 +218,12 @@ static int do_commit(struct ubifs_info *c) if (err) goto out; +out_cancel: spin_lock(&c->cs_lock); c->cmt_state = COMMIT_RESTING; wake_up(&c->cmt_wq); dbg_cmt("commit end"); spin_unlock(&c->cs_lock); - return 0; out_up: @@ -237,8 +293,8 @@ int ubifs_bg_thread(void *info) int err; struct ubifs_info *c = info; - dbg_msg("background thread \"%s\" started, PID %d", - c->bgt_name, current->pid); + ubifs_msg("background thread \"%s\" started, PID %d", + c->bgt_name, current->pid); set_freezable(); while (1) { @@ -272,7 +328,7 @@ int ubifs_bg_thread(void *info) cond_resched(); } - dbg_msg("background thread \"%s\" stops", c->bgt_name); + ubifs_msg("background thread \"%s\" stops", c->bgt_name); return 0; } @@ -362,7 +418,7 @@ int ubifs_run_commit(struct ubifs_info *c) spin_lock(&c->cs_lock); if (c->cmt_state == COMMIT_BROKEN) { - err = -EINVAL; + err = -EROFS; goto out; } @@ -388,7 +444,7 @@ int ubifs_run_commit(struct ubifs_info *c) * re-check it. */ if (c->cmt_state == COMMIT_BROKEN) { - err = -EINVAL; + err = -EROFS; goto out_cmt_unlock; } @@ -440,7 +496,9 @@ int ubifs_gc_should_commit(struct ubifs_info *c) return ret; } -#ifdef CONFIG_UBIFS_FS_DEBUG +/* + * Everything below is related to debugging. + */ /** * struct idx_node - hold index nodes during index tree traversal. @@ -456,7 +514,7 @@ struct idx_node { struct list_head list; int iip; union ubifs_key upper_key; - struct ubifs_idx_node idx __attribute__((aligned(8))); + struct ubifs_idx_node idx __aligned(8); }; /** @@ -520,8 +578,8 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) struct idx_node *i; size_t sz; - if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) - goto out; + if (!dbg_is_chk_index(c)) + return 0; INIT_LIST_HEAD(&list); @@ -658,14 +716,14 @@ out: return 0; out_dump: - dbg_err("dumping index node (iip=%d)", i->iip); - dbg_dump_node(c, idx); + ubifs_err("dumping index node (iip=%d)", i->iip); + ubifs_dump_node(c, idx); list_del(&i->list); kfree(i); if (!list_empty(&list)) { i = list_entry(list.prev, struct idx_node, list); - dbg_err("dumping parent index node"); - dbg_dump_node(c, &i->idx); + ubifs_err("dumping parent index node"); + ubifs_dump_node(c, &i->idx); } out_free: while (!list_empty(&list)) { @@ -678,5 +736,3 @@ out_free: err = -EINVAL; return err; } - -#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c index 11e4132f314..2bfa0953335 100644 --- a/fs/ubifs/compress.c +++ b/fs/ubifs/compress.c @@ -112,8 +112,7 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, if (compr->comp_mutex) mutex_unlock(compr->comp_mutex); if (unlikely(err)) { - ubifs_warn("cannot compress %d bytes, compressor %s, " - "error %d, leave data uncompressed", + ubifs_warn("cannot compress %d bytes, compressor %s, error %d, leave data uncompressed", in_len, compr->name, err); goto no_compr; } @@ -176,8 +175,8 @@ int ubifs_decompress(const void *in_buf, int in_len, void *out_buf, if (compr->decomp_mutex) mutex_unlock(compr->decomp_mutex); if (err) - ubifs_err("cannot decompress %d bytes, compressor %s, " - "error %d", in_len, compr->name, err); + ubifs_err("cannot decompress %d bytes, compressor %s, error %d", + in_len, compr->name, err); return err; } diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 0bee4dbffc3..177b0152fef 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -27,33 +27,14 @@ * various local functions of those subsystems. */ -#define UBIFS_DBG_PRESERVE_UBI - -#include "ubifs.h" #include <linux/module.h> -#include <linux/moduleparam.h> #include <linux/debugfs.h> #include <linux/math64.h> -#include <linux/slab.h> - -#ifdef CONFIG_UBIFS_FS_DEBUG - -DEFINE_SPINLOCK(dbg_lock); - -static char dbg_key_buf0[128]; -static char dbg_key_buf1[128]; - -unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT; -unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT; -unsigned int ubifs_tst_flags; - -module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR); -module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); -module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); +#include <linux/uaccess.h> +#include <linux/random.h> +#include "ubifs.h" -MODULE_PARM_DESC(debug_msgs, "Debug message type flags"); -MODULE_PARM_DESC(debug_chks, "Debug check flags"); -MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); +static DEFINE_SPINLOCK(dbg_lock); static const char *get_key_fmt(int fmt) { @@ -95,8 +76,30 @@ static const char *get_key_type(int type) } } -static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, - char *buffer) +static const char *get_dent_type(int type) +{ + switch (type) { + case UBIFS_ITYPE_REG: + return "file"; + case UBIFS_ITYPE_DIR: + return "dir"; + case UBIFS_ITYPE_LNK: + return "symlink"; + case UBIFS_ITYPE_BLK: + return "blkdev"; + case UBIFS_ITYPE_CHR: + return "char dev"; + case UBIFS_ITYPE_FIFO: + return "fifo"; + case UBIFS_ITYPE_SOCK: + return "socket"; + default: + return "unknown/invalid type"; + } +} + +const char *dbg_snprintf_key(const struct ubifs_info *c, + const union ubifs_key *key, char *buffer, int len) { char *p = buffer; int type = key_type(c, key); @@ -104,45 +107,34 @@ static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) { switch (type) { case UBIFS_INO_KEY: - sprintf(p, "(%lu, %s)", (unsigned long)key_inum(c, key), - get_key_type(type)); + len -= snprintf(p, len, "(%lu, %s)", + (unsigned long)key_inum(c, key), + get_key_type(type)); break; case UBIFS_DENT_KEY: case UBIFS_XENT_KEY: - sprintf(p, "(%lu, %s, %#08x)", - (unsigned long)key_inum(c, key), - get_key_type(type), key_hash(c, key)); + len -= snprintf(p, len, "(%lu, %s, %#08x)", + (unsigned long)key_inum(c, key), + get_key_type(type), key_hash(c, key)); break; case UBIFS_DATA_KEY: - sprintf(p, "(%lu, %s, %u)", - (unsigned long)key_inum(c, key), - get_key_type(type), key_block(c, key)); + len -= snprintf(p, len, "(%lu, %s, %u)", + (unsigned long)key_inum(c, key), + get_key_type(type), key_block(c, key)); break; case UBIFS_TRUN_KEY: - sprintf(p, "(%lu, %s)", - (unsigned long)key_inum(c, key), - get_key_type(type)); + len -= snprintf(p, len, "(%lu, %s)", + (unsigned long)key_inum(c, key), + get_key_type(type)); break; default: - sprintf(p, "(bad key type: %#08x, %#08x)", - key->u32[0], key->u32[1]); + len -= snprintf(p, len, "(bad key type: %#08x, %#08x)", + key->u32[0], key->u32[1]); } } else - sprintf(p, "bad key format %d", c->key_fmt); -} - -const char *dbg_key_str0(const struct ubifs_info *c, const union ubifs_key *key) -{ - /* dbg_lock must be held */ - sprintf_key(c, key, dbg_key_buf0); - return dbg_key_buf0; -} - -const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key) -{ - /* dbg_lock must be held */ - sprintf_key(c, key, dbg_key_buf1); - return dbg_key_buf1; + len -= snprintf(p, len, "bad key format %d", c->key_fmt); + ubifs_assert(len > 0); + return p; } const char *dbg_ntype(int type) @@ -227,68 +219,96 @@ const char *dbg_jhead(int jhead) static void dump_ch(const struct ubifs_ch *ch) { - printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic)); - printk(KERN_DEBUG "\tcrc %#x\n", le32_to_cpu(ch->crc)); - printk(KERN_DEBUG "\tnode_type %d (%s)\n", ch->node_type, + pr_err("\tmagic %#x\n", le32_to_cpu(ch->magic)); + pr_err("\tcrc %#x\n", le32_to_cpu(ch->crc)); + pr_err("\tnode_type %d (%s)\n", ch->node_type, dbg_ntype(ch->node_type)); - printk(KERN_DEBUG "\tgroup_type %d (%s)\n", ch->group_type, + pr_err("\tgroup_type %d (%s)\n", ch->group_type, dbg_gtype(ch->group_type)); - printk(KERN_DEBUG "\tsqnum %llu\n", + pr_err("\tsqnum %llu\n", (unsigned long long)le64_to_cpu(ch->sqnum)); - printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len)); + pr_err("\tlen %u\n", le32_to_cpu(ch->len)); } -void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode) +void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode) { const struct ubifs_inode *ui = ubifs_inode(inode); + struct qstr nm = { .name = NULL }; + union ubifs_key key; + struct ubifs_dent_node *dent, *pdent = NULL; + int count = 2; - printk(KERN_DEBUG "Dump in-memory inode:"); - printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino); - printk(KERN_DEBUG "\tsize %llu\n", + pr_err("Dump in-memory inode:"); + pr_err("\tinode %lu\n", inode->i_ino); + pr_err("\tsize %llu\n", (unsigned long long)i_size_read(inode)); - printk(KERN_DEBUG "\tnlink %u\n", inode->i_nlink); - printk(KERN_DEBUG "\tuid %u\n", (unsigned int)inode->i_uid); - printk(KERN_DEBUG "\tgid %u\n", (unsigned int)inode->i_gid); - printk(KERN_DEBUG "\tatime %u.%u\n", + pr_err("\tnlink %u\n", inode->i_nlink); + pr_err("\tuid %u\n", (unsigned int)i_uid_read(inode)); + pr_err("\tgid %u\n", (unsigned int)i_gid_read(inode)); + pr_err("\tatime %u.%u\n", (unsigned int)inode->i_atime.tv_sec, (unsigned int)inode->i_atime.tv_nsec); - printk(KERN_DEBUG "\tmtime %u.%u\n", + pr_err("\tmtime %u.%u\n", (unsigned int)inode->i_mtime.tv_sec, (unsigned int)inode->i_mtime.tv_nsec); - printk(KERN_DEBUG "\tctime %u.%u\n", + pr_err("\tctime %u.%u\n", (unsigned int)inode->i_ctime.tv_sec, (unsigned int)inode->i_ctime.tv_nsec); - printk(KERN_DEBUG "\tcreat_sqnum %llu\n", ui->creat_sqnum); - printk(KERN_DEBUG "\txattr_size %u\n", ui->xattr_size); - printk(KERN_DEBUG "\txattr_cnt %u\n", ui->xattr_cnt); - printk(KERN_DEBUG "\txattr_names %u\n", ui->xattr_names); - printk(KERN_DEBUG "\tdirty %u\n", ui->dirty); - printk(KERN_DEBUG "\txattr %u\n", ui->xattr); - printk(KERN_DEBUG "\tbulk_read %u\n", ui->xattr); - printk(KERN_DEBUG "\tsynced_i_size %llu\n", + pr_err("\tcreat_sqnum %llu\n", ui->creat_sqnum); + pr_err("\txattr_size %u\n", ui->xattr_size); + pr_err("\txattr_cnt %u\n", ui->xattr_cnt); + pr_err("\txattr_names %u\n", ui->xattr_names); + pr_err("\tdirty %u\n", ui->dirty); + pr_err("\txattr %u\n", ui->xattr); + pr_err("\tbulk_read %u\n", ui->xattr); + pr_err("\tsynced_i_size %llu\n", (unsigned long long)ui->synced_i_size); - printk(KERN_DEBUG "\tui_size %llu\n", + pr_err("\tui_size %llu\n", (unsigned long long)ui->ui_size); - printk(KERN_DEBUG "\tflags %d\n", ui->flags); - printk(KERN_DEBUG "\tcompr_type %d\n", ui->compr_type); - printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read); - printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row); - printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len); + pr_err("\tflags %d\n", ui->flags); + pr_err("\tcompr_type %d\n", ui->compr_type); + pr_err("\tlast_page_read %lu\n", ui->last_page_read); + pr_err("\tread_in_a_row %lu\n", ui->read_in_a_row); + pr_err("\tdata_len %d\n", ui->data_len); + + if (!S_ISDIR(inode->i_mode)) + return; + + pr_err("List of directory entries:\n"); + ubifs_assert(!mutex_is_locked(&c->tnc_mutex)); + + lowest_dent_key(c, &key, inode->i_ino); + while (1) { + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + if (PTR_ERR(dent) != -ENOENT) + pr_err("error %ld\n", PTR_ERR(dent)); + break; + } + + pr_err("\t%d: %s (%s)\n", + count++, dent->name, get_dent_type(dent->type)); + + nm.name = dent->name; + nm.len = le16_to_cpu(dent->nlen); + kfree(pdent); + pdent = dent; + key_read(c, &dent->key, &key); + } + kfree(pdent); } -void dbg_dump_node(const struct ubifs_info *c, const void *node) +void ubifs_dump_node(const struct ubifs_info *c, const void *node) { int i, n; union ubifs_key key; const struct ubifs_ch *ch = node; - - if (dbg_failure_mode) - return; + char key_buf[DBG_KEY_BUF_LEN]; /* If the magic is incorrect, just hexdump the first bytes */ if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) { - printk(KERN_DEBUG "Not a node, first %zu bytes:", UBIFS_CH_SZ); - print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, + pr_err("Not a node, first %zu bytes:", UBIFS_CH_SZ); + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1, (void *)node, UBIFS_CH_SZ, 1); return; } @@ -301,8 +321,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) { const struct ubifs_pad_node *pad = node; - printk(KERN_DEBUG "\tpad_len %u\n", - le32_to_cpu(pad->pad_len)); + pr_err("\tpad_len %u\n", le32_to_cpu(pad->pad_len)); break; } case UBIFS_SB_NODE: @@ -310,110 +329,77 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) const struct ubifs_sb_node *sup = node; unsigned int sup_flags = le32_to_cpu(sup->flags); - printk(KERN_DEBUG "\tkey_hash %d (%s)\n", + pr_err("\tkey_hash %d (%s)\n", (int)sup->key_hash, get_key_hash(sup->key_hash)); - printk(KERN_DEBUG "\tkey_fmt %d (%s)\n", + pr_err("\tkey_fmt %d (%s)\n", (int)sup->key_fmt, get_key_fmt(sup->key_fmt)); - printk(KERN_DEBUG "\tflags %#x\n", sup_flags); - printk(KERN_DEBUG "\t big_lpt %u\n", + pr_err("\tflags %#x\n", sup_flags); + pr_err("\t big_lpt %u\n", !!(sup_flags & UBIFS_FLG_BIGLPT)); - printk(KERN_DEBUG "\tmin_io_size %u\n", - le32_to_cpu(sup->min_io_size)); - printk(KERN_DEBUG "\tleb_size %u\n", - le32_to_cpu(sup->leb_size)); - printk(KERN_DEBUG "\tleb_cnt %u\n", - le32_to_cpu(sup->leb_cnt)); - printk(KERN_DEBUG "\tmax_leb_cnt %u\n", - le32_to_cpu(sup->max_leb_cnt)); - printk(KERN_DEBUG "\tmax_bud_bytes %llu\n", + pr_err("\t space_fixup %u\n", + !!(sup_flags & UBIFS_FLG_SPACE_FIXUP)); + pr_err("\tmin_io_size %u\n", le32_to_cpu(sup->min_io_size)); + pr_err("\tleb_size %u\n", le32_to_cpu(sup->leb_size)); + pr_err("\tleb_cnt %u\n", le32_to_cpu(sup->leb_cnt)); + pr_err("\tmax_leb_cnt %u\n", le32_to_cpu(sup->max_leb_cnt)); + pr_err("\tmax_bud_bytes %llu\n", (unsigned long long)le64_to_cpu(sup->max_bud_bytes)); - printk(KERN_DEBUG "\tlog_lebs %u\n", - le32_to_cpu(sup->log_lebs)); - printk(KERN_DEBUG "\tlpt_lebs %u\n", - le32_to_cpu(sup->lpt_lebs)); - printk(KERN_DEBUG "\torph_lebs %u\n", - le32_to_cpu(sup->orph_lebs)); - printk(KERN_DEBUG "\tjhead_cnt %u\n", - le32_to_cpu(sup->jhead_cnt)); - printk(KERN_DEBUG "\tfanout %u\n", - le32_to_cpu(sup->fanout)); - printk(KERN_DEBUG "\tlsave_cnt %u\n", - le32_to_cpu(sup->lsave_cnt)); - printk(KERN_DEBUG "\tdefault_compr %u\n", + pr_err("\tlog_lebs %u\n", le32_to_cpu(sup->log_lebs)); + pr_err("\tlpt_lebs %u\n", le32_to_cpu(sup->lpt_lebs)); + pr_err("\torph_lebs %u\n", le32_to_cpu(sup->orph_lebs)); + pr_err("\tjhead_cnt %u\n", le32_to_cpu(sup->jhead_cnt)); + pr_err("\tfanout %u\n", le32_to_cpu(sup->fanout)); + pr_err("\tlsave_cnt %u\n", le32_to_cpu(sup->lsave_cnt)); + pr_err("\tdefault_compr %u\n", (int)le16_to_cpu(sup->default_compr)); - printk(KERN_DEBUG "\trp_size %llu\n", + pr_err("\trp_size %llu\n", (unsigned long long)le64_to_cpu(sup->rp_size)); - printk(KERN_DEBUG "\trp_uid %u\n", - le32_to_cpu(sup->rp_uid)); - printk(KERN_DEBUG "\trp_gid %u\n", - le32_to_cpu(sup->rp_gid)); - printk(KERN_DEBUG "\tfmt_version %u\n", - le32_to_cpu(sup->fmt_version)); - printk(KERN_DEBUG "\ttime_gran %u\n", - le32_to_cpu(sup->time_gran)); - printk(KERN_DEBUG "\tUUID %pUB\n", - sup->uuid); + pr_err("\trp_uid %u\n", le32_to_cpu(sup->rp_uid)); + pr_err("\trp_gid %u\n", le32_to_cpu(sup->rp_gid)); + pr_err("\tfmt_version %u\n", le32_to_cpu(sup->fmt_version)); + pr_err("\ttime_gran %u\n", le32_to_cpu(sup->time_gran)); + pr_err("\tUUID %pUB\n", sup->uuid); break; } case UBIFS_MST_NODE: { const struct ubifs_mst_node *mst = node; - printk(KERN_DEBUG "\thighest_inum %llu\n", + pr_err("\thighest_inum %llu\n", (unsigned long long)le64_to_cpu(mst->highest_inum)); - printk(KERN_DEBUG "\tcommit number %llu\n", + pr_err("\tcommit number %llu\n", (unsigned long long)le64_to_cpu(mst->cmt_no)); - printk(KERN_DEBUG "\tflags %#x\n", - le32_to_cpu(mst->flags)); - printk(KERN_DEBUG "\tlog_lnum %u\n", - le32_to_cpu(mst->log_lnum)); - printk(KERN_DEBUG "\troot_lnum %u\n", - le32_to_cpu(mst->root_lnum)); - printk(KERN_DEBUG "\troot_offs %u\n", - le32_to_cpu(mst->root_offs)); - printk(KERN_DEBUG "\troot_len %u\n", - le32_to_cpu(mst->root_len)); - printk(KERN_DEBUG "\tgc_lnum %u\n", - le32_to_cpu(mst->gc_lnum)); - printk(KERN_DEBUG "\tihead_lnum %u\n", - le32_to_cpu(mst->ihead_lnum)); - printk(KERN_DEBUG "\tihead_offs %u\n", - le32_to_cpu(mst->ihead_offs)); - printk(KERN_DEBUG "\tindex_size %llu\n", + pr_err("\tflags %#x\n", le32_to_cpu(mst->flags)); + pr_err("\tlog_lnum %u\n", le32_to_cpu(mst->log_lnum)); + pr_err("\troot_lnum %u\n", le32_to_cpu(mst->root_lnum)); + pr_err("\troot_offs %u\n", le32_to_cpu(mst->root_offs)); + pr_err("\troot_len %u\n", le32_to_cpu(mst->root_len)); + pr_err("\tgc_lnum %u\n", le32_to_cpu(mst->gc_lnum)); + pr_err("\tihead_lnum %u\n", le32_to_cpu(mst->ihead_lnum)); + pr_err("\tihead_offs %u\n", le32_to_cpu(mst->ihead_offs)); + pr_err("\tindex_size %llu\n", (unsigned long long)le64_to_cpu(mst->index_size)); - printk(KERN_DEBUG "\tlpt_lnum %u\n", - le32_to_cpu(mst->lpt_lnum)); - printk(KERN_DEBUG "\tlpt_offs %u\n", - le32_to_cpu(mst->lpt_offs)); - printk(KERN_DEBUG "\tnhead_lnum %u\n", - le32_to_cpu(mst->nhead_lnum)); - printk(KERN_DEBUG "\tnhead_offs %u\n", - le32_to_cpu(mst->nhead_offs)); - printk(KERN_DEBUG "\tltab_lnum %u\n", - le32_to_cpu(mst->ltab_lnum)); - printk(KERN_DEBUG "\tltab_offs %u\n", - le32_to_cpu(mst->ltab_offs)); - printk(KERN_DEBUG "\tlsave_lnum %u\n", - le32_to_cpu(mst->lsave_lnum)); - printk(KERN_DEBUG "\tlsave_offs %u\n", - le32_to_cpu(mst->lsave_offs)); - printk(KERN_DEBUG "\tlscan_lnum %u\n", - le32_to_cpu(mst->lscan_lnum)); - printk(KERN_DEBUG "\tleb_cnt %u\n", - le32_to_cpu(mst->leb_cnt)); - printk(KERN_DEBUG "\tempty_lebs %u\n", - le32_to_cpu(mst->empty_lebs)); - printk(KERN_DEBUG "\tidx_lebs %u\n", - le32_to_cpu(mst->idx_lebs)); - printk(KERN_DEBUG "\ttotal_free %llu\n", + pr_err("\tlpt_lnum %u\n", le32_to_cpu(mst->lpt_lnum)); + pr_err("\tlpt_offs %u\n", le32_to_cpu(mst->lpt_offs)); + pr_err("\tnhead_lnum %u\n", le32_to_cpu(mst->nhead_lnum)); + pr_err("\tnhead_offs %u\n", le32_to_cpu(mst->nhead_offs)); + pr_err("\tltab_lnum %u\n", le32_to_cpu(mst->ltab_lnum)); + pr_err("\tltab_offs %u\n", le32_to_cpu(mst->ltab_offs)); + pr_err("\tlsave_lnum %u\n", le32_to_cpu(mst->lsave_lnum)); + pr_err("\tlsave_offs %u\n", le32_to_cpu(mst->lsave_offs)); + pr_err("\tlscan_lnum %u\n", le32_to_cpu(mst->lscan_lnum)); + pr_err("\tleb_cnt %u\n", le32_to_cpu(mst->leb_cnt)); + pr_err("\tempty_lebs %u\n", le32_to_cpu(mst->empty_lebs)); + pr_err("\tidx_lebs %u\n", le32_to_cpu(mst->idx_lebs)); + pr_err("\ttotal_free %llu\n", (unsigned long long)le64_to_cpu(mst->total_free)); - printk(KERN_DEBUG "\ttotal_dirty %llu\n", + pr_err("\ttotal_dirty %llu\n", (unsigned long long)le64_to_cpu(mst->total_dirty)); - printk(KERN_DEBUG "\ttotal_used %llu\n", + pr_err("\ttotal_used %llu\n", (unsigned long long)le64_to_cpu(mst->total_used)); - printk(KERN_DEBUG "\ttotal_dead %llu\n", + pr_err("\ttotal_dead %llu\n", (unsigned long long)le64_to_cpu(mst->total_dead)); - printk(KERN_DEBUG "\ttotal_dark %llu\n", + pr_err("\ttotal_dark %llu\n", (unsigned long long)le64_to_cpu(mst->total_dark)); break; } @@ -421,12 +407,9 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) { const struct ubifs_ref_node *ref = node; - printk(KERN_DEBUG "\tlnum %u\n", - le32_to_cpu(ref->lnum)); - printk(KERN_DEBUG "\toffs %u\n", - le32_to_cpu(ref->offs)); - printk(KERN_DEBUG "\tjhead %u\n", - le32_to_cpu(ref->jhead)); + pr_err("\tlnum %u\n", le32_to_cpu(ref->lnum)); + pr_err("\toffs %u\n", le32_to_cpu(ref->offs)); + pr_err("\tjhead %u\n", le32_to_cpu(ref->jhead)); break; } case UBIFS_INO_NODE: @@ -434,40 +417,32 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) const struct ubifs_ino_node *ino = node; key_read(c, &ino->key, &key); - printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); - printk(KERN_DEBUG "\tcreat_sqnum %llu\n", + pr_err("\tkey %s\n", + dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); + pr_err("\tcreat_sqnum %llu\n", (unsigned long long)le64_to_cpu(ino->creat_sqnum)); - printk(KERN_DEBUG "\tsize %llu\n", + pr_err("\tsize %llu\n", (unsigned long long)le64_to_cpu(ino->size)); - printk(KERN_DEBUG "\tnlink %u\n", - le32_to_cpu(ino->nlink)); - printk(KERN_DEBUG "\tatime %lld.%u\n", + pr_err("\tnlink %u\n", le32_to_cpu(ino->nlink)); + pr_err("\tatime %lld.%u\n", (long long)le64_to_cpu(ino->atime_sec), le32_to_cpu(ino->atime_nsec)); - printk(KERN_DEBUG "\tmtime %lld.%u\n", + pr_err("\tmtime %lld.%u\n", (long long)le64_to_cpu(ino->mtime_sec), le32_to_cpu(ino->mtime_nsec)); - printk(KERN_DEBUG "\tctime %lld.%u\n", + pr_err("\tctime %lld.%u\n", (long long)le64_to_cpu(ino->ctime_sec), le32_to_cpu(ino->ctime_nsec)); - printk(KERN_DEBUG "\tuid %u\n", - le32_to_cpu(ino->uid)); - printk(KERN_DEBUG "\tgid %u\n", - le32_to_cpu(ino->gid)); - printk(KERN_DEBUG "\tmode %u\n", - le32_to_cpu(ino->mode)); - printk(KERN_DEBUG "\tflags %#x\n", - le32_to_cpu(ino->flags)); - printk(KERN_DEBUG "\txattr_cnt %u\n", - le32_to_cpu(ino->xattr_cnt)); - printk(KERN_DEBUG "\txattr_size %u\n", - le32_to_cpu(ino->xattr_size)); - printk(KERN_DEBUG "\txattr_names %u\n", - le32_to_cpu(ino->xattr_names)); - printk(KERN_DEBUG "\tcompr_type %#x\n", + pr_err("\tuid %u\n", le32_to_cpu(ino->uid)); + pr_err("\tgid %u\n", le32_to_cpu(ino->gid)); + pr_err("\tmode %u\n", le32_to_cpu(ino->mode)); + pr_err("\tflags %#x\n", le32_to_cpu(ino->flags)); + pr_err("\txattr_cnt %u\n", le32_to_cpu(ino->xattr_cnt)); + pr_err("\txattr_size %u\n", le32_to_cpu(ino->xattr_size)); + pr_err("\txattr_names %u\n", le32_to_cpu(ino->xattr_names)); + pr_err("\tcompr_type %#x\n", (int)le16_to_cpu(ino->compr_type)); - printk(KERN_DEBUG "\tdata len %u\n", - le32_to_cpu(ino->data_len)); + pr_err("\tdata len %u\n", le32_to_cpu(ino->data_len)); break; } case UBIFS_DENT_NODE: @@ -477,21 +452,21 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) int nlen = le16_to_cpu(dent->nlen); key_read(c, &dent->key, &key); - printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); - printk(KERN_DEBUG "\tinum %llu\n", + pr_err("\tkey %s\n", + dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); + pr_err("\tinum %llu\n", (unsigned long long)le64_to_cpu(dent->inum)); - printk(KERN_DEBUG "\ttype %d\n", (int)dent->type); - printk(KERN_DEBUG "\tnlen %d\n", nlen); - printk(KERN_DEBUG "\tname "); + pr_err("\ttype %d\n", (int)dent->type); + pr_err("\tnlen %d\n", nlen); + pr_err("\tname "); if (nlen > UBIFS_MAX_NLEN) - printk(KERN_DEBUG "(bad name length, not printing, " - "bad or corrupted node)"); + pr_err("(bad name length, not printing, bad or corrupted node)"); else { for (i = 0; i < nlen && dent->name[i]; i++) - printk(KERN_CONT "%c", dent->name[i]); + pr_cont("%c", dent->name[i]); } - printk(KERN_CONT "\n"); + pr_cont("\n"); break; } @@ -501,15 +476,14 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; key_read(c, &dn->key, &key); - printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); - printk(KERN_DEBUG "\tsize %u\n", - le32_to_cpu(dn->size)); - printk(KERN_DEBUG "\tcompr_typ %d\n", + pr_err("\tkey %s\n", + dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); + pr_err("\tsize %u\n", le32_to_cpu(dn->size)); + pr_err("\tcompr_typ %d\n", (int)le16_to_cpu(dn->compr_type)); - printk(KERN_DEBUG "\tdata size %d\n", - dlen); - printk(KERN_DEBUG "\tdata:\n"); - print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, 32, 1, + pr_err("\tdata size %d\n", dlen); + pr_err("\tdata:\n"); + print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1, (void *)&dn->data, dlen, 0); break; } @@ -517,11 +491,10 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) { const struct ubifs_trun_node *trun = node; - printk(KERN_DEBUG "\tinum %u\n", - le32_to_cpu(trun->inum)); - printk(KERN_DEBUG "\told_size %llu\n", + pr_err("\tinum %u\n", le32_to_cpu(trun->inum)); + pr_err("\told_size %llu\n", (unsigned long long)le64_to_cpu(trun->old_size)); - printk(KERN_DEBUG "\tnew_size %llu\n", + pr_err("\tnew_size %llu\n", (unsigned long long)le64_to_cpu(trun->new_size)); break; } @@ -530,19 +503,20 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) const struct ubifs_idx_node *idx = node; n = le16_to_cpu(idx->child_cnt); - printk(KERN_DEBUG "\tchild_cnt %d\n", n); - printk(KERN_DEBUG "\tlevel %d\n", - (int)le16_to_cpu(idx->level)); - printk(KERN_DEBUG "\tBranches:\n"); + pr_err("\tchild_cnt %d\n", n); + pr_err("\tlevel %d\n", (int)le16_to_cpu(idx->level)); + pr_err("\tBranches:\n"); for (i = 0; i < n && i < c->fanout - 1; i++) { const struct ubifs_branch *br; br = ubifs_idx_branch(c, idx, i); key_read(c, &br->key, &key); - printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n", + pr_err("\t%d: LEB %d:%d len %d key %s\n", i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), - le32_to_cpu(br->len), DBGKEY(&key)); + le32_to_cpu(br->len), + dbg_snprintf_key(c, &key, key_buf, + DBG_KEY_BUF_LEN)); } break; } @@ -552,57 +526,55 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) { const struct ubifs_orph_node *orph = node; - printk(KERN_DEBUG "\tcommit number %llu\n", + pr_err("\tcommit number %llu\n", (unsigned long long) le64_to_cpu(orph->cmt_no) & LLONG_MAX); - printk(KERN_DEBUG "\tlast node flag %llu\n", + pr_err("\tlast node flag %llu\n", (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; - printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); + pr_err("\t%d orphan inode numbers:\n", n); for (i = 0; i < n; i++) - printk(KERN_DEBUG "\t ino %llu\n", + pr_err("\t ino %llu\n", (unsigned long long)le64_to_cpu(orph->inos[i])); break; } default: - printk(KERN_DEBUG "node type %d was not recognized\n", + pr_err("node type %d was not recognized\n", (int)ch->node_type); } spin_unlock(&dbg_lock); } -void dbg_dump_budget_req(const struct ubifs_budget_req *req) +void ubifs_dump_budget_req(const struct ubifs_budget_req *req) { spin_lock(&dbg_lock); - printk(KERN_DEBUG "Budgeting request: new_ino %d, dirtied_ino %d\n", + pr_err("Budgeting request: new_ino %d, dirtied_ino %d\n", req->new_ino, req->dirtied_ino); - printk(KERN_DEBUG "\tnew_ino_d %d, dirtied_ino_d %d\n", + pr_err("\tnew_ino_d %d, dirtied_ino_d %d\n", req->new_ino_d, req->dirtied_ino_d); - printk(KERN_DEBUG "\tnew_page %d, dirtied_page %d\n", + pr_err("\tnew_page %d, dirtied_page %d\n", req->new_page, req->dirtied_page); - printk(KERN_DEBUG "\tnew_dent %d, mod_dent %d\n", + pr_err("\tnew_dent %d, mod_dent %d\n", req->new_dent, req->mod_dent); - printk(KERN_DEBUG "\tidx_growth %d\n", req->idx_growth); - printk(KERN_DEBUG "\tdata_growth %d dd_growth %d\n", + pr_err("\tidx_growth %d\n", req->idx_growth); + pr_err("\tdata_growth %d dd_growth %d\n", req->data_growth, req->dd_growth); spin_unlock(&dbg_lock); } -void dbg_dump_lstats(const struct ubifs_lp_stats *lst) +void ubifs_dump_lstats(const struct ubifs_lp_stats *lst) { spin_lock(&dbg_lock); - printk(KERN_DEBUG "(pid %d) Lprops statistics: empty_lebs %d, " - "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs); - printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, " - "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, - lst->total_dirty); - printk(KERN_DEBUG "\ttotal_used %lld, total_dark %lld, " - "total_dead %lld\n", lst->total_used, lst->total_dark, - lst->total_dead); + pr_err("(pid %d) Lprops statistics: empty_lebs %d, idx_lebs %d\n", + current->pid, lst->empty_lebs, lst->idx_lebs); + pr_err("\ttaken_empty_lebs %d, total_free %lld, total_dirty %lld\n", + lst->taken_empty_lebs, lst->total_free, lst->total_dirty); + pr_err("\ttotal_used %lld, total_dark %lld, total_dead %lld\n", + lst->total_used, lst->total_dark, lst->total_dead); spin_unlock(&dbg_lock); } -void dbg_dump_budg(struct ubifs_info *c) +void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) { int i; struct rb_node *rb; @@ -610,54 +582,67 @@ void dbg_dump_budg(struct ubifs_info *c) struct ubifs_gced_idx_leb *idx_gc; long long available, outstanding, free; - ubifs_assert(spin_is_locked(&c->space_lock)); + spin_lock(&c->space_lock); spin_lock(&dbg_lock); - printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " - "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, - c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); - printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " - "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, - c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth, - c->freeable_cnt); - printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, " - "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs, - c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt); - printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " - "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), + pr_err("(pid %d) Budgeting info: data budget sum %lld, total budget sum %lld\n", + current->pid, bi->data_growth + bi->dd_growth, + bi->data_growth + bi->dd_growth + bi->idx_growth); + pr_err("\tbudg_data_growth %lld, budg_dd_growth %lld, budg_idx_growth %lld\n", + bi->data_growth, bi->dd_growth, bi->idx_growth); + pr_err("\tmin_idx_lebs %d, old_idx_sz %llu, uncommitted_idx %lld\n", + bi->min_idx_lebs, bi->old_idx_sz, bi->uncommitted_idx); + pr_err("\tpage_budget %d, inode_budget %d, dent_budget %d\n", + bi->page_budget, bi->inode_budget, bi->dent_budget); + pr_err("\tnospace %u, nospace_rp %u\n", bi->nospace, bi->nospace_rp); + pr_err("\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", + c->dark_wm, c->dead_wm, c->max_idx_node_sz); + + if (bi != &c->bi) + /* + * If we are dumping saved budgeting data, do not print + * additional information which is about the current state, not + * the old one which corresponded to the saved budgeting data. + */ + goto out_unlock; + + pr_err("\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", + c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); + pr_err("\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, clean_zn_cnt %ld\n", + atomic_long_read(&c->dirty_pg_cnt), atomic_long_read(&c->dirty_zn_cnt), atomic_long_read(&c->clean_zn_cnt)); - printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", - c->dark_wm, c->dead_wm, c->max_idx_node_sz); - printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", - c->gc_lnum, c->ihead_lnum); + pr_err("\tgc_lnum %d, ihead_lnum %d\n", c->gc_lnum, c->ihead_lnum); + /* If we are in R/O mode, journal heads do not exist */ if (c->jheads) for (i = 0; i < c->jhead_cnt; i++) - printk(KERN_DEBUG "\tjhead %s\t LEB %d\n", + pr_err("\tjhead %s\t LEB %d\n", dbg_jhead(c->jheads[i].wbuf.jhead), c->jheads[i].wbuf.lnum); for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { bud = rb_entry(rb, struct ubifs_bud, rb); - printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); + pr_err("\tbud LEB %d\n", bud->lnum); } list_for_each_entry(bud, &c->old_buds, list) - printk(KERN_DEBUG "\told bud LEB %d\n", bud->lnum); + pr_err("\told bud LEB %d\n", bud->lnum); list_for_each_entry(idx_gc, &c->idx_gc, list) - printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n", + pr_err("\tGC'ed idx LEB %d unmap %d\n", idx_gc->lnum, idx_gc->unmap); - printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); + pr_err("\tcommit state %d\n", c->cmt_state); /* Print budgeting predictions */ - available = ubifs_calc_available(c, c->min_idx_lebs); - outstanding = c->budg_data_growth + c->budg_dd_growth; + available = ubifs_calc_available(c, c->bi.min_idx_lebs); + outstanding = c->bi.data_growth + c->bi.dd_growth; free = ubifs_get_free_space_nolock(c); - printk(KERN_DEBUG "Budgeting predictions:\n"); - printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", + pr_err("Budgeting predictions:\n"); + pr_err("\tavailable: %lld, outstanding %lld, free %lld\n", available, outstanding, free); +out_unlock: spin_unlock(&dbg_lock); + spin_unlock(&c->space_lock); } -void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) +void ubifs_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) { int i, spc, dark = 0, dead = 0; struct rb_node *rb; @@ -670,21 +655,19 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) dark = ubifs_calc_dark(c, spc); if (lp->flags & LPROPS_INDEX) - printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " - "free + dirty %-8d flags %#x (", lp->lnum, lp->free, - lp->dirty, c->leb_size - spc, spc, lp->flags); + pr_err("LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d flags %#x (", + lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc, + lp->flags); else - printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " - "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d " - "flags %#-4x (", lp->lnum, lp->free, lp->dirty, - c->leb_size - spc, spc, dark, dead, - (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags); + pr_err("LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d flags %#-4x (", + lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc, + dark, dead, (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags); if (lp->flags & LPROPS_TAKEN) { if (lp->flags & LPROPS_INDEX) - printk(KERN_CONT "index, taken"); + pr_cont("index, taken"); else - printk(KERN_CONT "taken"); + pr_cont("taken"); } else { const char *s; @@ -721,7 +704,7 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) break; } } - printk(KERN_CONT "%s", s); + pr_cont("%s", s); } for (rb = rb_first((struct rb_root *)&c->buds); rb; rb = rb_next(rb)) { @@ -729,120 +712,147 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) if (bud->lnum == lp->lnum) { int head = 0; for (i = 0; i < c->jhead_cnt; i++) { - if (lp->lnum == c->jheads[i].wbuf.lnum) { - printk(KERN_CONT ", jhead %s", - dbg_jhead(i)); + /* + * Note, if we are in R/O mode or in the middle + * of mounting/re-mounting, the write-buffers do + * not exist. + */ + if (c->jheads && + lp->lnum == c->jheads[i].wbuf.lnum) { + pr_cont(", jhead %s", dbg_jhead(i)); head = 1; } } if (!head) - printk(KERN_CONT ", bud of jhead %s", + pr_cont(", bud of jhead %s", dbg_jhead(bud->jhead)); } } if (lp->lnum == c->gc_lnum) - printk(KERN_CONT ", GC LEB"); - printk(KERN_CONT ")\n"); + pr_cont(", GC LEB"); + pr_cont(")\n"); } -void dbg_dump_lprops(struct ubifs_info *c) +void ubifs_dump_lprops(struct ubifs_info *c) { int lnum, err; struct ubifs_lprops lp; struct ubifs_lp_stats lst; - printk(KERN_DEBUG "(pid %d) start dumping LEB properties\n", - current->pid); + pr_err("(pid %d) start dumping LEB properties\n", current->pid); ubifs_get_lp_stats(c, &lst); - dbg_dump_lstats(&lst); + ubifs_dump_lstats(&lst); for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { err = ubifs_read_one_lp(c, lnum, &lp); - if (err) + if (err) { ubifs_err("cannot read lprops for LEB %d", lnum); + continue; + } - dbg_dump_lprop(c, &lp); + ubifs_dump_lprop(c, &lp); } - printk(KERN_DEBUG "(pid %d) finish dumping LEB properties\n", - current->pid); + pr_err("(pid %d) finish dumping LEB properties\n", current->pid); } -void dbg_dump_lpt_info(struct ubifs_info *c) +void ubifs_dump_lpt_info(struct ubifs_info *c) { int i; spin_lock(&dbg_lock); - printk(KERN_DEBUG "(pid %d) dumping LPT information\n", current->pid); - printk(KERN_DEBUG "\tlpt_sz: %lld\n", c->lpt_sz); - printk(KERN_DEBUG "\tpnode_sz: %d\n", c->pnode_sz); - printk(KERN_DEBUG "\tnnode_sz: %d\n", c->nnode_sz); - printk(KERN_DEBUG "\tltab_sz: %d\n", c->ltab_sz); - printk(KERN_DEBUG "\tlsave_sz: %d\n", c->lsave_sz); - printk(KERN_DEBUG "\tbig_lpt: %d\n", c->big_lpt); - printk(KERN_DEBUG "\tlpt_hght: %d\n", c->lpt_hght); - printk(KERN_DEBUG "\tpnode_cnt: %d\n", c->pnode_cnt); - printk(KERN_DEBUG "\tnnode_cnt: %d\n", c->nnode_cnt); - printk(KERN_DEBUG "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); - printk(KERN_DEBUG "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); - printk(KERN_DEBUG "\tlsave_cnt: %d\n", c->lsave_cnt); - printk(KERN_DEBUG "\tspace_bits: %d\n", c->space_bits); - printk(KERN_DEBUG "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); - printk(KERN_DEBUG "\tlpt_offs_bits: %d\n", c->lpt_offs_bits); - printk(KERN_DEBUG "\tlpt_spc_bits: %d\n", c->lpt_spc_bits); - printk(KERN_DEBUG "\tpcnt_bits: %d\n", c->pcnt_bits); - printk(KERN_DEBUG "\tlnum_bits: %d\n", c->lnum_bits); - printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); - printk(KERN_DEBUG "\tLPT head is at %d:%d\n", + pr_err("(pid %d) dumping LPT information\n", current->pid); + pr_err("\tlpt_sz: %lld\n", c->lpt_sz); + pr_err("\tpnode_sz: %d\n", c->pnode_sz); + pr_err("\tnnode_sz: %d\n", c->nnode_sz); + pr_err("\tltab_sz: %d\n", c->ltab_sz); + pr_err("\tlsave_sz: %d\n", c->lsave_sz); + pr_err("\tbig_lpt: %d\n", c->big_lpt); + pr_err("\tlpt_hght: %d\n", c->lpt_hght); + pr_err("\tpnode_cnt: %d\n", c->pnode_cnt); + pr_err("\tnnode_cnt: %d\n", c->nnode_cnt); + pr_err("\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); + pr_err("\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); + pr_err("\tlsave_cnt: %d\n", c->lsave_cnt); + pr_err("\tspace_bits: %d\n", c->space_bits); + pr_err("\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); + pr_err("\tlpt_offs_bits: %d\n", c->lpt_offs_bits); + pr_err("\tlpt_spc_bits: %d\n", c->lpt_spc_bits); + pr_err("\tpcnt_bits: %d\n", c->pcnt_bits); + pr_err("\tlnum_bits: %d\n", c->lnum_bits); + pr_err("\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); + pr_err("\tLPT head is at %d:%d\n", c->nhead_lnum, c->nhead_offs); - printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", - c->ltab_lnum, c->ltab_offs); + pr_err("\tLPT ltab is at %d:%d\n", c->ltab_lnum, c->ltab_offs); if (c->big_lpt) - printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n", + pr_err("\tLPT lsave is at %d:%d\n", c->lsave_lnum, c->lsave_offs); for (i = 0; i < c->lpt_lebs; i++) - printk(KERN_DEBUG "\tLPT LEB %d free %d dirty %d tgc %d " - "cmt %d\n", i + c->lpt_first, c->ltab[i].free, - c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt); + pr_err("\tLPT LEB %d free %d dirty %d tgc %d cmt %d\n", + i + c->lpt_first, c->ltab[i].free, c->ltab[i].dirty, + c->ltab[i].tgc, c->ltab[i].cmt); spin_unlock(&dbg_lock); } -void dbg_dump_leb(const struct ubifs_info *c, int lnum) +void ubifs_dump_sleb(const struct ubifs_info *c, + const struct ubifs_scan_leb *sleb, int offs) +{ + struct ubifs_scan_node *snod; + + pr_err("(pid %d) start dumping scanned data from LEB %d:%d\n", + current->pid, sleb->lnum, offs); + + list_for_each_entry(snod, &sleb->nodes, list) { + cond_resched(); + pr_err("Dumping node at LEB %d:%d len %d\n", + sleb->lnum, snod->offs, snod->len); + ubifs_dump_node(c, snod->node); + } +} + +void ubifs_dump_leb(const struct ubifs_info *c, int lnum) { struct ubifs_scan_leb *sleb; struct ubifs_scan_node *snod; + void *buf; + + pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum); - if (dbg_failure_mode) + buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + if (!buf) { + ubifs_err("cannot allocate memory for dumping LEB %d", lnum); return; + } - printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", - current->pid, lnum); - sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); + sleb = ubifs_scan(c, lnum, 0, buf, 0); if (IS_ERR(sleb)) { ubifs_err("scan error %d", (int)PTR_ERR(sleb)); - return; + goto out; } - printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum, + pr_err("LEB %d has %d nodes ending at %d\n", lnum, sleb->nodes_cnt, sleb->endpt); list_for_each_entry(snod, &sleb->nodes, list) { cond_resched(); - printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", lnum, + pr_err("Dumping node at LEB %d:%d len %d\n", lnum, snod->offs, snod->len); - dbg_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node); } - printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", - current->pid, lnum); + pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum); ubifs_scan_destroy(sleb); + +out: + vfree(buf); return; } -void dbg_dump_znode(const struct ubifs_info *c, - const struct ubifs_znode *znode) +void ubifs_dump_znode(const struct ubifs_info *c, + const struct ubifs_znode *znode) { int n; const struct ubifs_zbranch *zbr; + char key_buf[DBG_KEY_BUF_LEN]; spin_lock(&dbg_lock); if (znode->parent) @@ -850,103 +860,102 @@ void dbg_dump_znode(const struct ubifs_info *c, else zbr = &c->zroot; - printk(KERN_DEBUG "znode %p, LEB %d:%d len %d parent %p iip %d level %d" - " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs, - zbr->len, znode->parent, znode->iip, znode->level, - znode->child_cnt, znode->flags); + pr_err("znode %p, LEB %d:%d len %d parent %p iip %d level %d child_cnt %d flags %lx\n", + znode, zbr->lnum, zbr->offs, zbr->len, znode->parent, znode->iip, + znode->level, znode->child_cnt, znode->flags); if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) { spin_unlock(&dbg_lock); return; } - printk(KERN_DEBUG "zbranches:\n"); + pr_err("zbranches:\n"); for (n = 0; n < znode->child_cnt; n++) { zbr = &znode->zbranch[n]; if (znode->level > 0) - printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key " - "%s\n", n, zbr->znode, zbr->lnum, - zbr->offs, zbr->len, - DBGKEY(&zbr->key)); + pr_err("\t%d: znode %p LEB %d:%d len %d key %s\n", + n, zbr->znode, zbr->lnum, zbr->offs, zbr->len, + dbg_snprintf_key(c, &zbr->key, key_buf, + DBG_KEY_BUF_LEN)); else - printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key " - "%s\n", n, zbr->znode, zbr->lnum, - zbr->offs, zbr->len, - DBGKEY(&zbr->key)); + pr_err("\t%d: LNC %p LEB %d:%d len %d key %s\n", + n, zbr->znode, zbr->lnum, zbr->offs, zbr->len, + dbg_snprintf_key(c, &zbr->key, key_buf, + DBG_KEY_BUF_LEN)); } spin_unlock(&dbg_lock); } -void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) +void ubifs_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) { int i; - printk(KERN_DEBUG "(pid %d) start dumping heap cat %d (%d elements)\n", + pr_err("(pid %d) start dumping heap cat %d (%d elements)\n", current->pid, cat, heap->cnt); for (i = 0; i < heap->cnt; i++) { struct ubifs_lprops *lprops = heap->arr[i]; - printk(KERN_DEBUG "\t%d. LEB %d hpos %d free %d dirty %d " - "flags %d\n", i, lprops->lnum, lprops->hpos, - lprops->free, lprops->dirty, lprops->flags); + pr_err("\t%d. LEB %d hpos %d free %d dirty %d flags %d\n", + i, lprops->lnum, lprops->hpos, lprops->free, + lprops->dirty, lprops->flags); } - printk(KERN_DEBUG "(pid %d) finish dumping heap\n", current->pid); + pr_err("(pid %d) finish dumping heap\n", current->pid); } -void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, - struct ubifs_nnode *parent, int iip) +void ubifs_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, + struct ubifs_nnode *parent, int iip) { int i; - printk(KERN_DEBUG "(pid %d) dumping pnode:\n", current->pid); - printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", + pr_err("(pid %d) dumping pnode:\n", current->pid); + pr_err("\taddress %zx parent %zx cnext %zx\n", (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); - printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", + pr_err("\tflags %lu iip %d level %d num %d\n", pnode->flags, iip, pnode->level, pnode->num); for (i = 0; i < UBIFS_LPT_FANOUT; i++) { struct ubifs_lprops *lp = &pnode->lprops[i]; - printk(KERN_DEBUG "\t%d: free %d dirty %d flags %d lnum %d\n", + pr_err("\t%d: free %d dirty %d flags %d lnum %d\n", i, lp->free, lp->dirty, lp->flags, lp->lnum); } } -void dbg_dump_tnc(struct ubifs_info *c) +void ubifs_dump_tnc(struct ubifs_info *c) { struct ubifs_znode *znode; int level; - printk(KERN_DEBUG "\n"); - printk(KERN_DEBUG "(pid %d) start dumping TNC tree\n", current->pid); + pr_err("\n"); + pr_err("(pid %d) start dumping TNC tree\n", current->pid); znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); level = znode->level; - printk(KERN_DEBUG "== Level %d ==\n", level); + pr_err("== Level %d ==\n", level); while (znode) { if (level != znode->level) { level = znode->level; - printk(KERN_DEBUG "== Level %d ==\n", level); + pr_err("== Level %d ==\n", level); } - dbg_dump_znode(c, znode); + ubifs_dump_znode(c, znode); znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); } - printk(KERN_DEBUG "(pid %d) finish dumping TNC tree\n", current->pid); + pr_err("(pid %d) finish dumping TNC tree\n", current->pid); } static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, void *priv) { - dbg_dump_znode(c, znode); + ubifs_dump_znode(c, znode); return 0; } /** - * dbg_dump_index - dump the on-flash index. + * ubifs_dump_index - dump the on-flash index. * @c: UBIFS file-system description object * - * This function dumps whole UBIFS indexing B-tree, unlike 'dbg_dump_tnc()' + * This function dumps whole UBIFS indexing B-tree, unlike 'ubifs_dump_tnc()' * which dumps only in-memory znodes and does not read znodes which from flash. */ -void dbg_dump_index(struct ubifs_info *c) +void ubifs_dump_index(struct ubifs_info *c) { dbg_walk_index(c, NULL, dump_znode, NULL); } @@ -961,11 +970,41 @@ void dbg_dump_index(struct ubifs_info *c) void dbg_save_space_info(struct ubifs_info *c) { struct ubifs_debug_info *d = c->dbg; - - ubifs_get_lp_stats(c, &d->saved_lst); + int freeable_cnt; spin_lock(&c->space_lock); + memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats)); + memcpy(&d->saved_bi, &c->bi, sizeof(struct ubifs_budg_info)); + d->saved_idx_gc_cnt = c->idx_gc_cnt; + + /* + * We use a dirty hack here and zero out @c->freeable_cnt, because it + * affects the free space calculations, and UBIFS might not know about + * all freeable eraseblocks. Indeed, we know about freeable eraseblocks + * only when we read their lprops, and we do this only lazily, upon the + * need. So at any given point of time @c->freeable_cnt might be not + * exactly accurate. + * + * Just one example about the issue we hit when we did not zero + * @c->freeable_cnt. + * 1. The file-system is mounted R/O, c->freeable_cnt is %0. We save the + * amount of free space in @d->saved_free + * 2. We re-mount R/W, which makes UBIFS to read the "lsave" + * information from flash, where we cache LEBs from various + * categories ('ubifs_remount_fs()' -> 'ubifs_lpt_init()' + * -> 'lpt_init_wr()' -> 'read_lsave()' -> 'ubifs_lpt_lookup()' + * -> 'ubifs_get_pnode()' -> 'update_cats()' + * -> 'ubifs_add_to_cat()'). + * 3. Lsave contains a freeable eraseblock, and @c->freeable_cnt + * becomes %1. + * 4. We calculate the amount of free space when the re-mount is + * finished in 'dbg_check_space_info()' and it does not match + * @d->saved_free. + */ + freeable_cnt = c->freeable_cnt; + c->freeable_cnt = 0; d->saved_free = ubifs_get_free_space_nolock(c); + c->freeable_cnt = freeable_cnt; spin_unlock(&c->space_lock); } @@ -982,12 +1021,15 @@ int dbg_check_space_info(struct ubifs_info *c) { struct ubifs_debug_info *d = c->dbg; struct ubifs_lp_stats lst; - long long avail, free; + long long free; + int freeable_cnt; spin_lock(&c->space_lock); - avail = ubifs_calc_available(c, c->min_idx_lebs); + freeable_cnt = c->freeable_cnt; + c->freeable_cnt = 0; + free = ubifs_get_free_space_nolock(c); + c->freeable_cnt = freeable_cnt; spin_unlock(&c->space_lock); - free = ubifs_get_free_space(c); if (free != d->saved_free) { ubifs_err("free space changed from %lld to %lld", @@ -999,21 +1041,22 @@ int dbg_check_space_info(struct ubifs_info *c) out: ubifs_msg("saved lprops statistics dump"); - dbg_dump_lstats(&d->saved_lst); - ubifs_get_lp_stats(c, &lst); - + ubifs_dump_lstats(&d->saved_lst); + ubifs_msg("saved budgeting info dump"); + ubifs_dump_budg(c, &d->saved_bi); + ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt); ubifs_msg("current lprops statistics dump"); - dbg_dump_lstats(&lst); - - spin_lock(&c->space_lock); - dbg_dump_budg(c); - spin_unlock(&c->space_lock); + ubifs_get_lp_stats(c, &lst); + ubifs_dump_lstats(&lst); + ubifs_msg("current budgeting info dump"); + ubifs_dump_budg(c, &c->bi); dump_stack(); return -EINVAL; } /** * dbg_check_synced_i_size - check synchronized inode size. + * @c: UBIFS file-system description object * @inode: inode to check * * If inode is clean, synchronized inode size has to be equivalent to current @@ -1021,12 +1064,12 @@ out: * has to be locked). Returns %0 if synchronized inode size if correct, and * %-EINVAL if not. */ -int dbg_check_synced_i_size(struct inode *inode) +int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode) { int err = 0; struct ubifs_inode *ui = ubifs_inode(inode); - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; if (!S_ISREG(inode->i_mode)) return 0; @@ -1034,11 +1077,11 @@ int dbg_check_synced_i_size(struct inode *inode) mutex_lock(&ui->ui_mutex); spin_lock(&ui->ui_lock); if (ui->ui_size != ui->synced_i_size && !ui->dirty) { - ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode " - "is clean", ui->ui_size, ui->synced_i_size); + ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode is clean", + ui->ui_size, ui->synced_i_size); ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino, inode->i_mode, i_size_read(inode)); - dbg_dump_stack(); + dump_stack(); err = -EINVAL; } spin_unlock(&ui->ui_lock); @@ -1059,7 +1102,7 @@ int dbg_check_synced_i_size(struct inode *inode) * Note, it is good idea to make sure the @dir->i_mutex is locked before * calling this function. */ -int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) +int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) { unsigned int nlink = 2; union ubifs_key key; @@ -1067,7 +1110,7 @@ int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) struct qstr nm = { .name = NULL }; loff_t size = UBIFS_INO_NODE_SZ; - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; if (!S_ISDIR(dir->i_mode)) @@ -1097,16 +1140,17 @@ int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) kfree(pdent); if (i_size_read(dir) != size) { - ubifs_err("directory inode %lu has size %llu, " - "but calculated size is %llu", dir->i_ino, - (unsigned long long)i_size_read(dir), + ubifs_err("directory inode %lu has size %llu, but calculated size is %llu", + dir->i_ino, (unsigned long long)i_size_read(dir), (unsigned long long)size); + ubifs_dump_inode(c, dir); dump_stack(); return -EINVAL; } if (dir->i_nlink != nlink) { - ubifs_err("directory inode %lu has nlink %u, but calculated " - "nlink is %u", dir->i_ino, dir->i_nlink, nlink); + ubifs_err("directory inode %lu has nlink %u, but calculated nlink is %u", + dir->i_ino, dir->i_nlink, nlink); + ubifs_dump_inode(c, dir); dump_stack(); return -EINVAL; } @@ -1133,6 +1177,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, int err, nlen1, nlen2, cmp; struct ubifs_dent_node *dent1, *dent2; union ubifs_key key; + char key_buf[DBG_KEY_BUF_LEN]; ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key)); dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); @@ -1162,21 +1207,25 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, err = 1; key_read(c, &dent1->key, &key); if (keys_cmp(c, &zbr1->key, &key)) { - dbg_err("1st entry at %d:%d has key %s", zbr1->lnum, - zbr1->offs, DBGKEY(&key)); - dbg_err("but it should have key %s according to tnc", - DBGKEY(&zbr1->key)); - dbg_dump_node(c, dent1); + ubifs_err("1st entry at %d:%d has key %s", zbr1->lnum, + zbr1->offs, dbg_snprintf_key(c, &key, key_buf, + DBG_KEY_BUF_LEN)); + ubifs_err("but it should have key %s according to tnc", + dbg_snprintf_key(c, &zbr1->key, key_buf, + DBG_KEY_BUF_LEN)); + ubifs_dump_node(c, dent1); goto out_free; } key_read(c, &dent2->key, &key); if (keys_cmp(c, &zbr2->key, &key)) { - dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum, - zbr1->offs, DBGKEY(&key)); - dbg_err("but it should have key %s according to tnc", - DBGKEY(&zbr2->key)); - dbg_dump_node(c, dent2); + ubifs_err("2nd entry at %d:%d has key %s", zbr1->lnum, + zbr1->offs, dbg_snprintf_key(c, &key, key_buf, + DBG_KEY_BUF_LEN)); + ubifs_err("but it should have key %s according to tnc", + dbg_snprintf_key(c, &zbr2->key, key_buf, + DBG_KEY_BUF_LEN)); + ubifs_dump_node(c, dent2); goto out_free; } @@ -1189,15 +1238,15 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, goto out_free; } if (cmp == 0 && nlen1 == nlen2) - dbg_err("2 xent/dent nodes with the same name"); + ubifs_err("2 xent/dent nodes with the same name"); else - dbg_err("bad order of colliding key %s", - DBGKEY(&key)); + ubifs_err("bad order of colliding key %s", + dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); - dbg_dump_node(c, dent1); + ubifs_dump_node(c, dent1); ubifs_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs); - dbg_dump_node(c, dent2); + ubifs_dump_node(c, dent2); out_free: kfree(dent2); @@ -1400,10 +1449,10 @@ static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr) out: ubifs_err("failed, error %d", err); ubifs_msg("dump of the znode"); - dbg_dump_znode(c, znode); + ubifs_dump_znode(c, znode); if (zp) { ubifs_msg("dump of the parent znode"); - dbg_dump_znode(c, zp); + ubifs_dump_znode(c, zp); } dump_stack(); return -EINVAL; @@ -1423,7 +1472,7 @@ int dbg_check_tnc(struct ubifs_info *c, int extra) long clean_cnt = 0, dirty_cnt = 0; int err, last; - if (!(ubifs_chk_flags & UBIFS_CHK_TNC)) + if (!dbg_is_chk_index(c)) return 0; ubifs_assert(mutex_is_locked(&c->tnc_mutex)); @@ -1470,9 +1519,9 @@ int dbg_check_tnc(struct ubifs_info *c, int extra) return err; if (err) { ubifs_msg("first znode"); - dbg_dump_znode(c, prev); + ubifs_dump_znode(c, prev); ubifs_msg("second znode"); - dbg_dump_znode(c, znode); + ubifs_dump_znode(c, znode); return -EINVAL; } } @@ -1559,9 +1608,9 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, if (znode_cb) { err = znode_cb(c, znode, priv); if (err) { - ubifs_err("znode checking function returned " - "error %d", err); - dbg_dump_znode(c, znode); + ubifs_err("znode checking function returned error %d", + err); + ubifs_dump_znode(c, znode); goto out_dump; } } @@ -1570,9 +1619,7 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, zbr = &znode->zbranch[idx]; err = leaf_cb(c, zbr, priv); if (err) { - ubifs_err("leaf checking function " - "returned error %d, for leaf " - "at LEB %d:%d", + ubifs_err("leaf checking function returned error %d, for leaf at LEB %d:%d", err, zbr->lnum, zbr->offs); goto out_dump; } @@ -1629,7 +1676,7 @@ out_dump: else zbr = &c->zroot; ubifs_msg("dump of znode at LEB %d:%d", zbr->lnum, zbr->offs); - dbg_dump_znode(c, znode); + ubifs_dump_znode(c, znode); out_unlock: mutex_unlock(&c->tnc_mutex); return err; @@ -1670,7 +1717,7 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) int err; long long calc = 0; - if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ)) + if (!dbg_is_chk_index(c)) return 0; err = dbg_walk_index(c, NULL, add_size, &calc); @@ -1680,8 +1727,8 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) } if (calc != idx_size) { - ubifs_err("index size check failed: calculated size is %lld, " - "should be %lld", calc, idx_size); + ubifs_err("index size check failed: calculated size is %lld, should be %lld", + calc, idx_size); dump_stack(); return -EINVAL; } @@ -1751,6 +1798,8 @@ static struct fsck_inode *add_inode(struct ubifs_info *c, struct rb_node **p, *parent = NULL; struct fsck_inode *fscki; ino_t inum = key_inum_flash(c, &ino->key); + struct inode *inode; + struct ubifs_inode *ui; p = &fsckd->inodes.rb_node; while (*p) { @@ -1774,19 +1823,46 @@ static struct fsck_inode *add_inode(struct ubifs_info *c, if (!fscki) return ERR_PTR(-ENOMEM); + inode = ilookup(c->vfs_sb, inum); + fscki->inum = inum; - fscki->nlink = le32_to_cpu(ino->nlink); - fscki->size = le64_to_cpu(ino->size); - fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); - fscki->xattr_sz = le32_to_cpu(ino->xattr_size); - fscki->xattr_nms = le32_to_cpu(ino->xattr_names); - fscki->mode = le32_to_cpu(ino->mode); + /* + * If the inode is present in the VFS inode cache, use it instead of + * the on-flash inode which might be out-of-date. E.g., the size might + * be out-of-date. If we do not do this, the following may happen, for + * example: + * 1. A power cut happens + * 2. We mount the file-system R/O, the replay process fixes up the + * inode size in the VFS cache, but on on-flash. + * 3. 'check_leaf()' fails because it hits a data node beyond inode + * size. + */ + if (!inode) { + fscki->nlink = le32_to_cpu(ino->nlink); + fscki->size = le64_to_cpu(ino->size); + fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); + fscki->xattr_sz = le32_to_cpu(ino->xattr_size); + fscki->xattr_nms = le32_to_cpu(ino->xattr_names); + fscki->mode = le32_to_cpu(ino->mode); + } else { + ui = ubifs_inode(inode); + fscki->nlink = inode->i_nlink; + fscki->size = inode->i_size; + fscki->xattr_cnt = ui->xattr_cnt; + fscki->xattr_sz = ui->xattr_size; + fscki->xattr_nms = ui->xattr_names; + fscki->mode = inode->i_mode; + iput(inode); + } + if (S_ISDIR(fscki->mode)) { fscki->calc_sz = UBIFS_INO_NODE_SZ; fscki->calc_cnt = 2; } + rb_link_node(&fscki->rb, parent, p); rb_insert_color(&fscki->rb, &fsckd->inodes); + return fscki; } @@ -1964,8 +2040,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, fscki = read_add_inode(c, priv, inum); if (IS_ERR(fscki)) { err = PTR_ERR(fscki); - ubifs_err("error %d while processing data node and " - "trying to find inode node %lu", + ubifs_err("error %d while processing data node and trying to find inode node %lu", err, (unsigned long)inum); goto out_dump; } @@ -1975,9 +2050,8 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, blk_offs <<= UBIFS_BLOCK_SHIFT; blk_offs += le32_to_cpu(dn->size); if (blk_offs > fscki->size) { - ubifs_err("data node at LEB %d:%d is not within inode " - "size %lld", zbr->lnum, zbr->offs, - fscki->size); + ubifs_err("data node at LEB %d:%d is not within inode size %lld", + zbr->lnum, zbr->offs, fscki->size); err = -EINVAL; goto out_dump; } @@ -1998,8 +2072,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, fscki = read_add_inode(c, priv, inum); if (IS_ERR(fscki)) { err = PTR_ERR(fscki); - ubifs_err("error %d while processing entry node and " - "trying to find inode node %lu", + ubifs_err("error %d while processing entry node and trying to find inode node %lu", err, (unsigned long)inum); goto out_dump; } @@ -2011,8 +2084,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, fscki1 = read_add_inode(c, priv, inum); if (IS_ERR(fscki1)) { err = PTR_ERR(fscki1); - ubifs_err("error %d while processing entry node and " - "trying to find parent inode node %lu", + ubifs_err("error %d while processing entry node and trying to find parent inode node %lu", err, (unsigned long)inum); goto out_dump; } @@ -2036,7 +2108,7 @@ out: out_dump: ubifs_msg("dump of node at LEB %d:%d", zbr->lnum, zbr->offs); - dbg_dump_node(c, node); + ubifs_dump_node(c, node); out_free: kfree(node); return err; @@ -2048,26 +2120,10 @@ out_free: */ static void free_inodes(struct fsck_data *fsckd) { - struct rb_node *this = fsckd->inodes.rb_node; - struct fsck_inode *fscki; + struct fsck_inode *fscki, *n; - while (this) { - if (this->rb_left) - this = this->rb_left; - else if (this->rb_right) - this = this->rb_right; - else { - fscki = rb_entry(this, struct fsck_inode, rb); - this = rb_parent(this); - if (this) { - if (this->rb_left == &fscki->rb) - this->rb_left = NULL; - else - this->rb_right = NULL; - } - kfree(fscki); - } - } + rbtree_postorder_for_each_entry_safe(fscki, n, &fsckd->inodes, rb) + kfree(fscki); } /** @@ -2102,61 +2158,52 @@ static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd) */ if (fscki->inum != UBIFS_ROOT_INO && fscki->references != 1) { - ubifs_err("directory inode %lu has %d " - "direntries which refer it, but " - "should be 1", + ubifs_err("directory inode %lu has %d direntries which refer it, but should be 1", (unsigned long)fscki->inum, fscki->references); goto out_dump; } if (fscki->inum == UBIFS_ROOT_INO && fscki->references != 0) { - ubifs_err("root inode %lu has non-zero (%d) " - "direntries which refer it", + ubifs_err("root inode %lu has non-zero (%d) direntries which refer it", (unsigned long)fscki->inum, fscki->references); goto out_dump; } if (fscki->calc_sz != fscki->size) { - ubifs_err("directory inode %lu size is %lld, " - "but calculated size is %lld", + ubifs_err("directory inode %lu size is %lld, but calculated size is %lld", (unsigned long)fscki->inum, fscki->size, fscki->calc_sz); goto out_dump; } if (fscki->calc_cnt != fscki->nlink) { - ubifs_err("directory inode %lu nlink is %d, " - "but calculated nlink is %d", + ubifs_err("directory inode %lu nlink is %d, but calculated nlink is %d", (unsigned long)fscki->inum, fscki->nlink, fscki->calc_cnt); goto out_dump; } } else { if (fscki->references != fscki->nlink) { - ubifs_err("inode %lu nlink is %d, but " - "calculated nlink is %d", + ubifs_err("inode %lu nlink is %d, but calculated nlink is %d", (unsigned long)fscki->inum, fscki->nlink, fscki->references); goto out_dump; } } if (fscki->xattr_sz != fscki->calc_xsz) { - ubifs_err("inode %lu has xattr size %u, but " - "calculated size is %lld", + ubifs_err("inode %lu has xattr size %u, but calculated size is %lld", (unsigned long)fscki->inum, fscki->xattr_sz, fscki->calc_xsz); goto out_dump; } if (fscki->xattr_cnt != fscki->calc_xcnt) { - ubifs_err("inode %lu has %u xattrs, but " - "calculated count is %lld", + ubifs_err("inode %lu has %u xattrs, but calculated count is %lld", (unsigned long)fscki->inum, fscki->xattr_cnt, fscki->calc_xcnt); goto out_dump; } if (fscki->xattr_nms != fscki->calc_xnms) { - ubifs_err("inode %lu has xattr names' size %u, but " - "calculated names' size is %lld", + ubifs_err("inode %lu has xattr names' size %u, but calculated names' size is %lld", (unsigned long)fscki->inum, fscki->xattr_nms, fscki->calc_xnms); goto out_dump; @@ -2194,7 +2241,7 @@ out_dump: ubifs_msg("dump of the inode %lu sitting in LEB %d:%d", (unsigned long)fscki->inum, zbr->lnum, zbr->offs); - dbg_dump_node(c, ino); + ubifs_dump_node(c, ino); kfree(ino); return -EINVAL; } @@ -2217,7 +2264,7 @@ int dbg_check_filesystem(struct ubifs_info *c) int err; struct fsck_data fsckd; - if (!(ubifs_chk_flags & UBIFS_CHK_FS)) + if (!dbg_is_chk_fs(c)) return 0; fsckd.inodes = RB_ROOT; @@ -2252,7 +2299,7 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) struct list_head *cur; struct ubifs_scan_node *sa, *sb; - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; for (cur = head->next; cur->next != head; cur = cur->next) { @@ -2265,12 +2312,12 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) if (sa->type != UBIFS_DATA_NODE) { ubifs_err("bad node type %d", sa->type); - dbg_dump_node(c, sa->node); + ubifs_dump_node(c, sa->node); return -EINVAL; } if (sb->type != UBIFS_DATA_NODE) { ubifs_err("bad node type %d", sb->type); - dbg_dump_node(c, sb->node); + ubifs_dump_node(c, sb->node); return -EINVAL; } @@ -2301,8 +2348,8 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) return 0; error_dump: - dbg_dump_node(c, sa->node); - dbg_dump_node(c, sb->node); + ubifs_dump_node(c, sa->node); + ubifs_dump_node(c, sb->node); return -EINVAL; } @@ -2319,7 +2366,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) struct list_head *cur; struct ubifs_scan_node *sa, *sb; - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; for (cur = head->next; cur->next != head; cur = cur->next) { @@ -2333,13 +2380,13 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && sa->type != UBIFS_XENT_NODE) { ubifs_err("bad node type %d", sa->type); - dbg_dump_node(c, sa->node); + ubifs_dump_node(c, sa->node); return -EINVAL; } if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && sa->type != UBIFS_XENT_NODE) { ubifs_err("bad node type %d", sb->type); - dbg_dump_node(c, sb->node); + ubifs_dump_node(c, sb->node); return -EINVAL; } @@ -2379,7 +2426,8 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) hashb = key_block(c, &sb->key); if (hasha > hashb) { - ubifs_err("larger hash %u goes before %u", hasha, hashb); + ubifs_err("larger hash %u goes before %u", + hasha, hashb); goto error_dump; } } @@ -2388,400 +2436,363 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) error_dump: ubifs_msg("dumping first node"); - dbg_dump_node(c, sa->node); + ubifs_dump_node(c, sa->node); ubifs_msg("dumping second node"); - dbg_dump_node(c, sb->node); + ubifs_dump_node(c, sb->node); return -EINVAL; return 0; } -static int invocation_cnt; - -int dbg_force_in_the_gaps(void) -{ - if (!dbg_force_in_the_gaps_enabled) - return 0; - /* Force in-the-gaps every 8th commit */ - return !((invocation_cnt++) & 0x7); -} - -/* Failure mode for recovery testing */ - -#define chance(n, d) (simple_rand() <= (n) * 32768LL / (d)) - -struct failure_mode_info { - struct list_head list; - struct ubifs_info *c; -}; - -static LIST_HEAD(fmi_list); -static DEFINE_SPINLOCK(fmi_lock); - -static unsigned int next; - -static int simple_rand(void) -{ - if (next == 0) - next = current->pid; - next = next * 1103515245 + 12345; - return (next >> 16) & 32767; -} - -static void failure_mode_init(struct ubifs_info *c) +static inline int chance(unsigned int n, unsigned int out_of) { - struct failure_mode_info *fmi; + return !!((prandom_u32() % out_of) + 1 <= n); - fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS); - if (!fmi) { - ubifs_err("Failed to register failure mode - no memory"); - return; - } - fmi->c = c; - spin_lock(&fmi_lock); - list_add_tail(&fmi->list, &fmi_list); - spin_unlock(&fmi_lock); } -static void failure_mode_exit(struct ubifs_info *c) +static int power_cut_emulated(struct ubifs_info *c, int lnum, int write) { - struct failure_mode_info *fmi, *tmp; - - spin_lock(&fmi_lock); - list_for_each_entry_safe(fmi, tmp, &fmi_list, list) - if (fmi->c == c) { - list_del(&fmi->list); - kfree(fmi); - } - spin_unlock(&fmi_lock); -} - -static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc) -{ - struct failure_mode_info *fmi; - - spin_lock(&fmi_lock); - list_for_each_entry(fmi, &fmi_list, list) - if (fmi->c->ubi == desc) { - struct ubifs_info *c = fmi->c; - - spin_unlock(&fmi_lock); - return c; - } - spin_unlock(&fmi_lock); - return NULL; -} - -static int in_failure_mode(struct ubi_volume_desc *desc) -{ - struct ubifs_info *c = dbg_find_info(desc); - - if (c && dbg_failure_mode) - return c->dbg->failure_mode; - return 0; -} + struct ubifs_debug_info *d = c->dbg; -static int do_fail(struct ubi_volume_desc *desc, int lnum, int write) -{ - struct ubifs_info *c = dbg_find_info(desc); - struct ubifs_debug_info *d; + ubifs_assert(dbg_is_tst_rcvry(c)); - if (!c || !dbg_failure_mode) - return 0; - d = c->dbg; - if (d->failure_mode) - return 1; - if (!d->fail_cnt) { - /* First call - decide delay to failure */ + if (!d->pc_cnt) { + /* First call - decide delay to the power cut */ if (chance(1, 2)) { - unsigned int delay = 1 << (simple_rand() >> 11); + unsigned long delay; if (chance(1, 2)) { - d->fail_delay = 1; - d->fail_timeout = jiffies + - msecs_to_jiffies(delay); - dbg_rcvry("failing after %ums", delay); + d->pc_delay = 1; + /* Fail withing 1 minute */ + delay = prandom_u32() % 60000; + d->pc_timeout = jiffies; + d->pc_timeout += msecs_to_jiffies(delay); + ubifs_warn("failing after %lums", delay); } else { - d->fail_delay = 2; - d->fail_cnt_max = delay; - dbg_rcvry("failing after %u calls", delay); + d->pc_delay = 2; + delay = prandom_u32() % 10000; + /* Fail within 10000 operations */ + d->pc_cnt_max = delay; + ubifs_warn("failing after %lu calls", delay); } } - d->fail_cnt += 1; + + d->pc_cnt += 1; } + /* Determine if failure delay has expired */ - if (d->fail_delay == 1) { - if (time_before(jiffies, d->fail_timeout)) + if (d->pc_delay == 1 && time_before(jiffies, d->pc_timeout)) return 0; - } else if (d->fail_delay == 2) - if (d->fail_cnt++ < d->fail_cnt_max) + if (d->pc_delay == 2 && d->pc_cnt++ < d->pc_cnt_max) return 0; + if (lnum == UBIFS_SB_LNUM) { - if (write) { - if (chance(1, 2)) - return 0; - } else if (chance(19, 20)) + if (write && chance(1, 2)) return 0; - dbg_rcvry("failing in super block LEB %d", lnum); + if (chance(19, 20)) + return 0; + ubifs_warn("failing in super block LEB %d", lnum); } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) { if (chance(19, 20)) return 0; - dbg_rcvry("failing in master LEB %d", lnum); + ubifs_warn("failing in master LEB %d", lnum); } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) { - if (write) { - if (chance(99, 100)) - return 0; - } else if (chance(399, 400)) + if (write && chance(99, 100)) + return 0; + if (chance(399, 400)) return 0; - dbg_rcvry("failing in log LEB %d", lnum); + ubifs_warn("failing in log LEB %d", lnum); } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) { - if (write) { - if (chance(7, 8)) - return 0; - } else if (chance(19, 20)) + if (write && chance(7, 8)) + return 0; + if (chance(19, 20)) return 0; - dbg_rcvry("failing in LPT LEB %d", lnum); + ubifs_warn("failing in LPT LEB %d", lnum); } else if (lnum >= c->orph_first && lnum <= c->orph_last) { - if (write) { - if (chance(1, 2)) - return 0; - } else if (chance(9, 10)) + if (write && chance(1, 2)) return 0; - dbg_rcvry("failing in orphan LEB %d", lnum); + if (chance(9, 10)) + return 0; + ubifs_warn("failing in orphan LEB %d", lnum); } else if (lnum == c->ihead_lnum) { if (chance(99, 100)) return 0; - dbg_rcvry("failing in index head LEB %d", lnum); + ubifs_warn("failing in index head LEB %d", lnum); } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) { if (chance(9, 10)) return 0; - dbg_rcvry("failing in GC head LEB %d", lnum); + ubifs_warn("failing in GC head LEB %d", lnum); } else if (write && !RB_EMPTY_ROOT(&c->buds) && !ubifs_search_bud(c, lnum)) { if (chance(19, 20)) return 0; - dbg_rcvry("failing in non-bud LEB %d", lnum); + ubifs_warn("failing in non-bud LEB %d", lnum); } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND || c->cmt_state == COMMIT_RUNNING_REQUIRED) { if (chance(999, 1000)) return 0; - dbg_rcvry("failing in bud LEB %d commit running", lnum); + ubifs_warn("failing in bud LEB %d commit running", lnum); } else { if (chance(9999, 10000)) return 0; - dbg_rcvry("failing in bud LEB %d commit not running", lnum); + ubifs_warn("failing in bud LEB %d commit not running", lnum); } - ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum); - d->failure_mode = 1; + + d->pc_happened = 1; + ubifs_warn("========== Power cut emulated =========="); dump_stack(); return 1; } -static void cut_data(const void *buf, int len) +static int corrupt_data(const struct ubifs_info *c, const void *buf, + unsigned int len) { - int flen, i; + unsigned int from, to, ffs = chance(1, 2); unsigned char *p = (void *)buf; - flen = (len * (long long)simple_rand()) >> 15; - for (i = flen; i < len; i++) - p[i] = 0xff; -} + from = prandom_u32() % len; + /* Corruption span max to end of write unit */ + to = min(len, ALIGN(from + 1, c->max_write_size)); -int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, - int len, int check) -{ - if (in_failure_mode(desc)) - return -EIO; - return ubi_leb_read(desc, lnum, buf, offset, len, check); + ubifs_warn("filled bytes %u-%u with %s", from, to - 1, + ffs ? "0xFFs" : "random data"); + + if (ffs) + memset(p + from, 0xFF, to - from); + else + prandom_bytes(p + from, to - from); + + return to; } -int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, - int offset, int len, int dtype) +int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, + int offs, int len) { int err, failing; - if (in_failure_mode(desc)) - return -EIO; - failing = do_fail(desc, lnum, 1); - if (failing) - cut_data(buf, len); - err = ubi_leb_write(desc, lnum, buf, offset, len, dtype); + if (c->dbg->pc_happened) + return -EROFS; + + failing = power_cut_emulated(c, lnum, 1); + if (failing) { + len = corrupt_data(c, buf, len); + ubifs_warn("actually write %d bytes to LEB %d:%d (the buffer was corrupted)", + len, lnum, offs); + } + err = ubi_leb_write(c->ubi, lnum, buf, offs, len); if (err) return err; if (failing) - return -EIO; + return -EROFS; return 0; } -int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, - int len, int dtype) +int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, + int len) { int err; - if (do_fail(desc, lnum, 1)) - return -EIO; - err = ubi_leb_change(desc, lnum, buf, len, dtype); + if (c->dbg->pc_happened) + return -EROFS; + if (power_cut_emulated(c, lnum, 1)) + return -EROFS; + err = ubi_leb_change(c->ubi, lnum, buf, len); if (err) return err; - if (do_fail(desc, lnum, 1)) - return -EIO; + if (power_cut_emulated(c, lnum, 1)) + return -EROFS; return 0; } -int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum) +int dbg_leb_unmap(struct ubifs_info *c, int lnum) { int err; - if (do_fail(desc, lnum, 0)) - return -EIO; - err = ubi_leb_erase(desc, lnum); + if (c->dbg->pc_happened) + return -EROFS; + if (power_cut_emulated(c, lnum, 0)) + return -EROFS; + err = ubi_leb_unmap(c->ubi, lnum); if (err) return err; - if (do_fail(desc, lnum, 0)) - return -EIO; + if (power_cut_emulated(c, lnum, 0)) + return -EROFS; return 0; } -int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum) +int dbg_leb_map(struct ubifs_info *c, int lnum) { int err; - if (do_fail(desc, lnum, 0)) - return -EIO; - err = ubi_leb_unmap(desc, lnum); + if (c->dbg->pc_happened) + return -EROFS; + if (power_cut_emulated(c, lnum, 0)) + return -EROFS; + err = ubi_leb_map(c->ubi, lnum); if (err) return err; - if (do_fail(desc, lnum, 0)) - return -EIO; + if (power_cut_emulated(c, lnum, 0)) + return -EROFS; return 0; } -int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) -{ - if (in_failure_mode(desc)) - return -EIO; - return ubi_is_mapped(desc, lnum); -} +/* + * Root directory for UBIFS stuff in debugfs. Contains sub-directories which + * contain the stuff specific to particular file-system mounts. + */ +static struct dentry *dfs_rootdir; -int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) +static int dfs_file_open(struct inode *inode, struct file *file) { - int err; - - if (do_fail(desc, lnum, 0)) - return -EIO; - err = ubi_leb_map(desc, lnum, dtype); - if (err) - return err; - if (do_fail(desc, lnum, 0)) - return -EIO; - return 0; + file->private_data = inode->i_private; + return nonseekable_open(inode, file); } /** - * ubifs_debugging_init - initialize UBIFS debugging. - * @c: UBIFS file-system description object + * provide_user_output - provide output to the user reading a debugfs file. + * @val: boolean value for the answer + * @u: the buffer to store the answer at + * @count: size of the buffer + * @ppos: position in the @u output buffer * - * This function initializes debugging-related data for the file system. - * Returns zero in case of success and a negative error code in case of + * This is a simple helper function which stores @val boolean value in the user + * buffer when the user reads one of UBIFS debugfs files. Returns amount of + * bytes written to @u in case of success and a negative error code in case of * failure. */ -int ubifs_debugging_init(struct ubifs_info *c) +static int provide_user_output(int val, char __user *u, size_t count, + loff_t *ppos) { - c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); - if (!c->dbg) - return -ENOMEM; - - c->dbg->buf = vmalloc(c->leb_size); - if (!c->dbg->buf) - goto out; + char buf[3]; - failure_mode_init(c); - return 0; + if (val) + buf[0] = '1'; + else + buf[0] = '0'; + buf[1] = '\n'; + buf[2] = 0x00; -out: - kfree(c->dbg); - return -ENOMEM; + return simple_read_from_buffer(u, count, ppos, buf, 2); } -/** - * ubifs_debugging_exit - free debugging data. - * @c: UBIFS file-system description object - */ -void ubifs_debugging_exit(struct ubifs_info *c) +static ssize_t dfs_file_read(struct file *file, char __user *u, size_t count, + loff_t *ppos) { - failure_mode_exit(c); - vfree(c->dbg->buf); - kfree(c->dbg); -} + struct dentry *dent = file->f_path.dentry; + struct ubifs_info *c = file->private_data; + struct ubifs_debug_info *d = c->dbg; + int val; + + if (dent == d->dfs_chk_gen) + val = d->chk_gen; + else if (dent == d->dfs_chk_index) + val = d->chk_index; + else if (dent == d->dfs_chk_orph) + val = d->chk_orph; + else if (dent == d->dfs_chk_lprops) + val = d->chk_lprops; + else if (dent == d->dfs_chk_fs) + val = d->chk_fs; + else if (dent == d->dfs_tst_rcvry) + val = d->tst_rcvry; + else if (dent == d->dfs_ro_error) + val = c->ro_error; + else + return -EINVAL; -/* - * Root directory for UBIFS stuff in debugfs. Contains sub-directories which - * contain the stuff specific to particular file-system mounts. - */ -static struct dentry *dfs_rootdir; + return provide_user_output(val, u, count, ppos); +} /** - * dbg_debugfs_init - initialize debugfs file-system. + * interpret_user_input - interpret user debugfs file input. + * @u: user-provided buffer with the input + * @count: buffer size * - * UBIFS uses debugfs file-system to expose various debugging knobs to - * user-space. This function creates "ubifs" directory in the debugfs - * file-system. Returns zero in case of success and a negative error code in - * case of failure. + * This is a helper function which interpret user input to a boolean UBIFS + * debugfs file. Returns %0 or %1 in case of success and a negative error code + * in case of failure. */ -int dbg_debugfs_init(void) +static int interpret_user_input(const char __user *u, size_t count) { - dfs_rootdir = debugfs_create_dir("ubifs", NULL); - if (IS_ERR(dfs_rootdir)) { - int err = PTR_ERR(dfs_rootdir); - ubifs_err("cannot create \"ubifs\" debugfs directory, " - "error %d\n", err); - return err; - } + size_t buf_size; + char buf[8]; - return 0; -} + buf_size = min_t(size_t, count, (sizeof(buf) - 1)); + if (copy_from_user(buf, u, buf_size)) + return -EFAULT; -/** - * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. - */ -void dbg_debugfs_exit(void) -{ - debugfs_remove(dfs_rootdir); -} + if (buf[0] == '1') + return 1; + else if (buf[0] == '0') + return 0; -static int open_debugfs_file(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; + return -EINVAL; } -static ssize_t write_debugfs_file(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t dfs_file_write(struct file *file, const char __user *u, + size_t count, loff_t *ppos) { struct ubifs_info *c = file->private_data; struct ubifs_debug_info *d = c->dbg; + struct dentry *dent = file->f_path.dentry; + int val; - if (file->f_path.dentry == d->dfs_dump_lprops) - dbg_dump_lprops(c); - else if (file->f_path.dentry == d->dfs_dump_budg) { - spin_lock(&c->space_lock); - dbg_dump_budg(c); - spin_unlock(&c->space_lock); - } else if (file->f_path.dentry == d->dfs_dump_tnc) { + /* + * TODO: this is racy - the file-system might have already been + * unmounted and we'd oops in this case. The plan is to fix it with + * help of 'iterate_supers_type()' which we should have in v3.0: when + * a debugfs opened, we rember FS's UUID in file->private_data. Then + * whenever we access the FS via a debugfs file, we iterate all UBIFS + * superblocks and fine the one with the same UUID, and take the + * locking right. + * + * The other way to go suggested by Al Viro is to create a separate + * 'ubifs-debug' file-system instead. + */ + if (file->f_path.dentry == d->dfs_dump_lprops) { + ubifs_dump_lprops(c); + return count; + } + if (file->f_path.dentry == d->dfs_dump_budg) { + ubifs_dump_budg(c, &c->bi); + return count; + } + if (file->f_path.dentry == d->dfs_dump_tnc) { mutex_lock(&c->tnc_mutex); - dbg_dump_tnc(c); + ubifs_dump_tnc(c); mutex_unlock(&c->tnc_mutex); - } else + return count; + } + + val = interpret_user_input(u, count); + if (val < 0) + return val; + + if (dent == d->dfs_chk_gen) + d->chk_gen = val; + else if (dent == d->dfs_chk_index) + d->chk_index = val; + else if (dent == d->dfs_chk_orph) + d->chk_orph = val; + else if (dent == d->dfs_chk_lprops) + d->chk_lprops = val; + else if (dent == d->dfs_chk_fs) + d->chk_fs = val; + else if (dent == d->dfs_tst_rcvry) + d->tst_rcvry = val; + else if (dent == d->dfs_ro_error) + c->ro_error = !!val; + else return -EINVAL; - *ppos += count; return count; } static const struct file_operations dfs_fops = { - .open = open_debugfs_file, - .write = write_debugfs_file, + .open = dfs_file_open, + .read = dfs_file_read, + .write = dfs_file_write, .owner = THIS_MODULE, - .llseek = default_llseek, + .llseek = no_llseek, }; /** @@ -2798,46 +2809,104 @@ static const struct file_operations dfs_fops = { */ int dbg_debugfs_init_fs(struct ubifs_info *c) { - int err; + int err, n; const char *fname; struct dentry *dent; struct ubifs_debug_info *d = c->dbg; - sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); - d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir); - if (IS_ERR(d->dfs_dir)) { - err = PTR_ERR(d->dfs_dir); - ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", - d->dfs_dir_name, err); + if (!IS_ENABLED(CONFIG_DEBUG_FS)) + return 0; + + n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, + c->vi.ubi_num, c->vi.vol_id); + if (n == UBIFS_DFS_DIR_LEN) { + /* The array size is too small */ + fname = UBIFS_DFS_DIR_NAME; + dent = ERR_PTR(-EINVAL); goto out; } + fname = d->dfs_dir_name; + dent = debugfs_create_dir(fname, dfs_rootdir); + if (IS_ERR_OR_NULL(dent)) + goto out; + d->dfs_dir = dent; + fname = "dump_lprops"; - dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); - if (IS_ERR(dent)) + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); + if (IS_ERR_OR_NULL(dent)) goto out_remove; d->dfs_dump_lprops = dent; fname = "dump_budg"; - dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); - if (IS_ERR(dent)) + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); + if (IS_ERR_OR_NULL(dent)) goto out_remove; d->dfs_dump_budg = dent; fname = "dump_tnc"; - dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); - if (IS_ERR(dent)) + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); + if (IS_ERR_OR_NULL(dent)) goto out_remove; d->dfs_dump_tnc = dent; + fname = "chk_general"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_gen = dent; + + fname = "chk_index"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_index = dent; + + fname = "chk_orphans"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_orph = dent; + + fname = "chk_lprops"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_lprops = dent; + + fname = "chk_fs"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_fs = dent; + + fname = "tst_recovery"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_tst_rcvry = dent; + + fname = "ro_error"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_ro_error = dent; + return 0; out_remove: - err = PTR_ERR(dent); - ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", - fname, err); debugfs_remove_recursive(d->dfs_dir); out: + err = dent ? PTR_ERR(dent) : -ENODEV; + ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", + fname, err); return err; } @@ -2847,7 +2916,185 @@ out: */ void dbg_debugfs_exit_fs(struct ubifs_info *c) { - debugfs_remove_recursive(c->dbg->dfs_dir); + if (IS_ENABLED(CONFIG_DEBUG_FS)) + debugfs_remove_recursive(c->dbg->dfs_dir); +} + +struct ubifs_global_debug_info ubifs_dbg; + +static struct dentry *dfs_chk_gen; +static struct dentry *dfs_chk_index; +static struct dentry *dfs_chk_orph; +static struct dentry *dfs_chk_lprops; +static struct dentry *dfs_chk_fs; +static struct dentry *dfs_tst_rcvry; + +static ssize_t dfs_global_file_read(struct file *file, char __user *u, + size_t count, loff_t *ppos) +{ + struct dentry *dent = file->f_path.dentry; + int val; + + if (dent == dfs_chk_gen) + val = ubifs_dbg.chk_gen; + else if (dent == dfs_chk_index) + val = ubifs_dbg.chk_index; + else if (dent == dfs_chk_orph) + val = ubifs_dbg.chk_orph; + else if (dent == dfs_chk_lprops) + val = ubifs_dbg.chk_lprops; + else if (dent == dfs_chk_fs) + val = ubifs_dbg.chk_fs; + else if (dent == dfs_tst_rcvry) + val = ubifs_dbg.tst_rcvry; + else + return -EINVAL; + + return provide_user_output(val, u, count, ppos); } -#endif /* CONFIG_UBIFS_FS_DEBUG */ +static ssize_t dfs_global_file_write(struct file *file, const char __user *u, + size_t count, loff_t *ppos) +{ + struct dentry *dent = file->f_path.dentry; + int val; + + val = interpret_user_input(u, count); + if (val < 0) + return val; + + if (dent == dfs_chk_gen) + ubifs_dbg.chk_gen = val; + else if (dent == dfs_chk_index) + ubifs_dbg.chk_index = val; + else if (dent == dfs_chk_orph) + ubifs_dbg.chk_orph = val; + else if (dent == dfs_chk_lprops) + ubifs_dbg.chk_lprops = val; + else if (dent == dfs_chk_fs) + ubifs_dbg.chk_fs = val; + else if (dent == dfs_tst_rcvry) + ubifs_dbg.tst_rcvry = val; + else + return -EINVAL; + + return count; +} + +static const struct file_operations dfs_global_fops = { + .read = dfs_global_file_read, + .write = dfs_global_file_write, + .owner = THIS_MODULE, + .llseek = no_llseek, +}; + +/** + * dbg_debugfs_init - initialize debugfs file-system. + * + * UBIFS uses debugfs file-system to expose various debugging knobs to + * user-space. This function creates "ubifs" directory in the debugfs + * file-system. Returns zero in case of success and a negative error code in + * case of failure. + */ +int dbg_debugfs_init(void) +{ + int err; + const char *fname; + struct dentry *dent; + + if (!IS_ENABLED(CONFIG_DEBUG_FS)) + return 0; + + fname = "ubifs"; + dent = debugfs_create_dir(fname, NULL); + if (IS_ERR_OR_NULL(dent)) + goto out; + dfs_rootdir = dent; + + fname = "chk_general"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_gen = dent; + + fname = "chk_index"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_index = dent; + + fname = "chk_orphans"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_orph = dent; + + fname = "chk_lprops"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_lprops = dent; + + fname = "chk_fs"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_fs = dent; + + fname = "tst_recovery"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_tst_rcvry = dent; + + return 0; + +out_remove: + debugfs_remove_recursive(dfs_rootdir); +out: + err = dent ? PTR_ERR(dent) : -ENODEV; + ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", + fname, err); + return err; +} + +/** + * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. + */ +void dbg_debugfs_exit(void) +{ + if (IS_ENABLED(CONFIG_DEBUG_FS)) + debugfs_remove_recursive(dfs_rootdir); +} + +/** + * ubifs_debugging_init - initialize UBIFS debugging. + * @c: UBIFS file-system description object + * + * This function initializes debugging-related data for the file system. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_debugging_init(struct ubifs_info *c) +{ + c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); + if (!c->dbg) + return -ENOMEM; + + return 0; +} + +/** + * ubifs_debugging_exit - free debugging data. + * @c: UBIFS file-system description object + */ +void ubifs_debugging_exit(struct ubifs_info *c) +{ + kfree(c->dbg); +} diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 69ebe472915..e03d5179769 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -23,19 +23,31 @@ #ifndef __UBIFS_DEBUG_H__ #define __UBIFS_DEBUG_H__ -#ifdef CONFIG_UBIFS_FS_DEBUG +/* Checking helper functions */ +typedef int (*dbg_leaf_callback)(struct ubifs_info *c, + struct ubifs_zbranch *zbr, void *priv); +typedef int (*dbg_znode_callback)(struct ubifs_info *c, + struct ubifs_znode *znode, void *priv); + +/* + * The UBIFS debugfs directory name pattern and maximum name length (3 for "ubi" + * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte. + */ +#define UBIFS_DFS_DIR_NAME "ubi%d_%d" +#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1) /** * ubifs_debug_info - per-FS debugging information. - * @buf: a buffer of LEB size, used for various purposes * @old_zroot: old index root - used by 'dbg_check_old_index()' * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' - * @failure_mode: failure mode for recovery testing - * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls - * @fail_timeout: time in jiffies when delay of failure mode expires - * @fail_cnt: current number of calls to failure mode I/O functions - * @fail_cnt_max: number of calls by which to delay failure mode + * + * @pc_happened: non-zero if an emulated power cut happened + * @pc_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls + * @pc_timeout: time in jiffies when delay of failure mode expires + * @pc_cnt: current number of calls to failure mode I/O functions + * @pc_cnt_max: number of calls by which to delay failure mode + * * @chk_lpt_sz: used by LPT tree size checker * @chk_lpt_sz2: used by LPT tree size checker * @chk_lpt_wastage: used by LPT tree size checker @@ -45,24 +57,44 @@ * @new_ihead_offs: used by debugging to check @c->ihead_offs * * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') - * @saved_free: saved free space (used by 'dbg_save_space_info()') + * @saved_bi: saved budgeting information + * @saved_free: saved amount of free space + * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt + * + * @chk_gen: if general extra checks are enabled + * @chk_index: if index xtra checks are enabled + * @chk_orph: if orphans extra checks are enabled + * @chk_lprops: if lprops extra checks are enabled + * @chk_fs: if UBIFS contents extra checks are enabled + * @tst_rcvry: if UBIFS recovery testing mode enabled * - * dfs_dir_name: name of debugfs directory containing this file-system's files - * dfs_dir: direntry object of the file-system debugfs directory - * dfs_dump_lprops: "dump lprops" debugfs knob - * dfs_dump_budg: "dump budgeting information" debugfs knob - * dfs_dump_tnc: "dump TNC" debugfs knob + * @dfs_dir_name: name of debugfs directory containing this file-system's files + * @dfs_dir: direntry object of the file-system debugfs directory + * @dfs_dump_lprops: "dump lprops" debugfs knob + * @dfs_dump_budg: "dump budgeting information" debugfs knob + * @dfs_dump_tnc: "dump TNC" debugfs knob + * @dfs_chk_gen: debugfs knob to enable UBIFS general extra checks + * @dfs_chk_index: debugfs knob to enable UBIFS index extra checks + * @dfs_chk_orph: debugfs knob to enable UBIFS orphans extra checks + * @dfs_chk_lprops: debugfs knob to enable UBIFS LEP properties extra checks + * @dfs_chk_fs: debugfs knob to enable UBIFS contents extra checks + * @dfs_tst_rcvry: debugfs knob to enable UBIFS recovery testing + * @dfs_ro_error: debugfs knob to switch UBIFS to R/O mode (different to + * re-mounting to R/O mode because it does not flush any buffers + * and UBIFS just starts returning -EROFS on all write + * operations) */ struct ubifs_debug_info { - void *buf; struct ubifs_zbranch old_zroot; int old_zroot_level; unsigned long long old_zroot_sqnum; - int failure_mode; - int fail_delay; - unsigned long fail_timeout; - unsigned int fail_cnt; - unsigned int fail_cnt_max; + + int pc_happened; + int pc_delay; + unsigned long pc_timeout; + unsigned int pc_cnt; + unsigned int pc_cnt_max; + long long chk_lpt_sz; long long chk_lpt_sz2; long long chk_lpt_wastage; @@ -72,198 +104,141 @@ struct ubifs_debug_info { int new_ihead_offs; struct ubifs_lp_stats saved_lst; + struct ubifs_budg_info saved_bi; long long saved_free; + int saved_idx_gc_cnt; + + unsigned int chk_gen:1; + unsigned int chk_index:1; + unsigned int chk_orph:1; + unsigned int chk_lprops:1; + unsigned int chk_fs:1; + unsigned int tst_rcvry:1; - char dfs_dir_name[100]; + char dfs_dir_name[UBIFS_DFS_DIR_LEN + 1]; struct dentry *dfs_dir; struct dentry *dfs_dump_lprops; struct dentry *dfs_dump_budg; struct dentry *dfs_dump_tnc; + struct dentry *dfs_chk_gen; + struct dentry *dfs_chk_index; + struct dentry *dfs_chk_orph; + struct dentry *dfs_chk_lprops; + struct dentry *dfs_chk_fs; + struct dentry *dfs_tst_rcvry; + struct dentry *dfs_ro_error; +}; + +/** + * ubifs_global_debug_info - global (not per-FS) UBIFS debugging information. + * + * @chk_gen: if general extra checks are enabled + * @chk_index: if index xtra checks are enabled + * @chk_orph: if orphans extra checks are enabled + * @chk_lprops: if lprops extra checks are enabled + * @chk_fs: if UBIFS contents extra checks are enabled + * @tst_rcvry: if UBIFS recovery testing mode enabled + */ +struct ubifs_global_debug_info { + unsigned int chk_gen:1; + unsigned int chk_index:1; + unsigned int chk_orph:1; + unsigned int chk_lprops:1; + unsigned int chk_fs:1; + unsigned int tst_rcvry:1; }; #define ubifs_assert(expr) do { \ if (unlikely(!(expr))) { \ - printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ + pr_crit("UBIFS assert failed in %s at %u (pid %d)\n", \ __func__, __LINE__, current->pid); \ - dbg_dump_stack(); \ + dump_stack(); \ } \ } while (0) #define ubifs_assert_cmt_locked(c) do { \ if (unlikely(down_write_trylock(&(c)->commit_sem))) { \ up_write(&(c)->commit_sem); \ - printk(KERN_CRIT "commit lock is not locked!\n"); \ + pr_crit("commit lock is not locked!\n"); \ ubifs_assert(0); \ } \ } while (0) -#define dbg_dump_stack() do { \ - if (!dbg_failure_mode) \ - dump_stack(); \ -} while (0) - -/* Generic debugging messages */ -#define dbg_msg(fmt, ...) do { \ - spin_lock(&dbg_lock); \ - printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ - __func__, ##__VA_ARGS__); \ - spin_unlock(&dbg_lock); \ -} while (0) +#define ubifs_dbg_msg(type, fmt, ...) \ + pr_debug("UBIFS DBG " type " (pid %d): " fmt "\n", current->pid, \ + ##__VA_ARGS__) -#define dbg_do_msg(typ, fmt, ...) do { \ - if (ubifs_msg_flags & typ) \ - dbg_msg(fmt, ##__VA_ARGS__); \ +#define DBG_KEY_BUF_LEN 48 +#define ubifs_dbg_msg_key(type, key, fmt, ...) do { \ + char __tmp_key_buf[DBG_KEY_BUF_LEN]; \ + pr_debug("UBIFS DBG " type " (pid %d): " fmt "%s\n", current->pid, \ + ##__VA_ARGS__, \ + dbg_snprintf_key(c, key, __tmp_key_buf, DBG_KEY_BUF_LEN)); \ } while (0) -#define dbg_err(fmt, ...) do { \ - spin_lock(&dbg_lock); \ - ubifs_err(fmt, ##__VA_ARGS__); \ - spin_unlock(&dbg_lock); \ -} while (0) - -const char *dbg_key_str0(const struct ubifs_info *c, - const union ubifs_key *key); -const char *dbg_key_str1(const struct ubifs_info *c, - const union ubifs_key *key); - -/* - * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message - * macros. - */ -#define DBGKEY(key) dbg_key_str0(c, (key)) -#define DBGKEY1(key) dbg_key_str1(c, (key)) - /* General messages */ -#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) - +#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) /* Additional journal messages */ -#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) - +#define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__) +#define dbg_jnlk(key, fmt, ...) \ + ubifs_dbg_msg_key("jnl", key, fmt, ##__VA_ARGS__) /* Additional TNC messages */ -#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) - +#define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__) +#define dbg_tnck(key, fmt, ...) \ + ubifs_dbg_msg_key("tnc", key, fmt, ##__VA_ARGS__) /* Additional lprops messages */ -#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) - +#define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__) /* Additional LEB find messages */ -#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) - +#define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__) /* Additional mount messages */ -#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) - +#define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__) +#define dbg_mntk(key, fmt, ...) \ + ubifs_dbg_msg_key("mnt", key, fmt, ##__VA_ARGS__) /* Additional I/O messages */ -#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) - +#define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__) /* Additional commit messages */ -#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) - +#define dbg_cmt(fmt, ...) ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__) /* Additional budgeting messages */ -#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) - +#define dbg_budg(fmt, ...) ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__) /* Additional log messages */ -#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) - +#define dbg_log(fmt, ...) ubifs_dbg_msg("log", fmt, ##__VA_ARGS__) /* Additional gc messages */ -#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) - +#define dbg_gc(fmt, ...) ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__) /* Additional scan messages */ -#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) - +#define dbg_scan(fmt, ...) ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__) /* Additional recovery messages */ -#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) - -/* - * Debugging message type flags (must match msg_type_names in debug.c). - * - * UBIFS_MSG_GEN: general messages - * UBIFS_MSG_JNL: journal messages - * UBIFS_MSG_MNT: mount messages - * UBIFS_MSG_CMT: commit messages - * UBIFS_MSG_FIND: LEB find messages - * UBIFS_MSG_BUDG: budgeting messages - * UBIFS_MSG_GC: garbage collection messages - * UBIFS_MSG_TNC: TNC messages - * UBIFS_MSG_LP: lprops messages - * UBIFS_MSG_IO: I/O messages - * UBIFS_MSG_LOG: log messages - * UBIFS_MSG_SCAN: scan messages - * UBIFS_MSG_RCVRY: recovery messages - */ -enum { - UBIFS_MSG_GEN = 0x1, - UBIFS_MSG_JNL = 0x2, - UBIFS_MSG_MNT = 0x4, - UBIFS_MSG_CMT = 0x8, - UBIFS_MSG_FIND = 0x10, - UBIFS_MSG_BUDG = 0x20, - UBIFS_MSG_GC = 0x40, - UBIFS_MSG_TNC = 0x80, - UBIFS_MSG_LP = 0x100, - UBIFS_MSG_IO = 0x200, - UBIFS_MSG_LOG = 0x400, - UBIFS_MSG_SCAN = 0x800, - UBIFS_MSG_RCVRY = 0x1000, -}; - -/* Debugging message type flags for each default debug message level */ -#define UBIFS_MSG_LVL_0 0 -#define UBIFS_MSG_LVL_1 0x1 -#define UBIFS_MSG_LVL_2 0x7f -#define UBIFS_MSG_LVL_3 0xffff - -/* - * Debugging check flags (must match chk_names in debug.c). - * - * UBIFS_CHK_GEN: general checks - * UBIFS_CHK_TNC: check TNC - * UBIFS_CHK_IDX_SZ: check index size - * UBIFS_CHK_ORPH: check orphans - * UBIFS_CHK_OLD_IDX: check the old index - * UBIFS_CHK_LPROPS: check lprops - * UBIFS_CHK_FS: check the file-system - */ -enum { - UBIFS_CHK_GEN = 0x1, - UBIFS_CHK_TNC = 0x2, - UBIFS_CHK_IDX_SZ = 0x4, - UBIFS_CHK_ORPH = 0x8, - UBIFS_CHK_OLD_IDX = 0x10, - UBIFS_CHK_LPROPS = 0x20, - UBIFS_CHK_FS = 0x40, -}; - -/* - * Special testing flags (must match tst_names in debug.c). - * - * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method - * UBIFS_TST_RCVRY: failure mode for recovery testing - */ -enum { - UBIFS_TST_FORCE_IN_THE_GAPS = 0x2, - UBIFS_TST_RCVRY = 0x4, -}; - -#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1 -#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1 -#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2 -#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2 -#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3 -#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3 -#else -#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0 -#endif - -#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS -#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff -#else -#define UBIFS_CHK_FLAGS_DEFAULT 0 -#endif +#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) -extern spinlock_t dbg_lock; +extern struct ubifs_global_debug_info ubifs_dbg; -extern unsigned int ubifs_msg_flags; -extern unsigned int ubifs_chk_flags; -extern unsigned int ubifs_tst_flags; +static inline int dbg_is_chk_gen(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_gen || c->dbg->chk_gen); +} +static inline int dbg_is_chk_index(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_index || c->dbg->chk_index); +} +static inline int dbg_is_chk_orph(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_orph || c->dbg->chk_orph); +} +static inline int dbg_is_chk_lprops(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_lprops || c->dbg->chk_lprops); +} +static inline int dbg_is_chk_fs(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_fs || c->dbg->chk_fs); +} +static inline int dbg_is_tst_rcvry(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.tst_rcvry || c->dbg->tst_rcvry); +} +static inline int dbg_is_power_cut(const struct ubifs_info *c) +{ + return !!c->dbg->pc_happened; +} int ubifs_debugging_init(struct ubifs_info *c); void ubifs_debugging_exit(struct ubifs_info *c); @@ -274,31 +249,30 @@ const char *dbg_cstate(int cmt_state); const char *dbg_jhead(int jhead); const char *dbg_get_key_dump(const struct ubifs_info *c, const union ubifs_key *key); -void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); -void dbg_dump_node(const struct ubifs_info *c, const void *node); -void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum, - int offs); -void dbg_dump_budget_req(const struct ubifs_budget_req *req); -void dbg_dump_lstats(const struct ubifs_lp_stats *lst); -void dbg_dump_budg(struct ubifs_info *c); -void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); -void dbg_dump_lprops(struct ubifs_info *c); -void dbg_dump_lpt_info(struct ubifs_info *c); -void dbg_dump_leb(const struct ubifs_info *c, int lnum); -void dbg_dump_znode(const struct ubifs_info *c, - const struct ubifs_znode *znode); -void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat); -void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, - struct ubifs_nnode *parent, int iip); -void dbg_dump_tnc(struct ubifs_info *c); -void dbg_dump_index(struct ubifs_info *c); -void dbg_dump_lpt_lebs(const struct ubifs_info *c); +const char *dbg_snprintf_key(const struct ubifs_info *c, + const union ubifs_key *key, char *buffer, int len); +void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode); +void ubifs_dump_node(const struct ubifs_info *c, const void *node); +void ubifs_dump_budget_req(const struct ubifs_budget_req *req); +void ubifs_dump_lstats(const struct ubifs_lp_stats *lst); +void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi); +void ubifs_dump_lprop(const struct ubifs_info *c, + const struct ubifs_lprops *lp); +void ubifs_dump_lprops(struct ubifs_info *c); +void ubifs_dump_lpt_info(struct ubifs_info *c); +void ubifs_dump_leb(const struct ubifs_info *c, int lnum); +void ubifs_dump_sleb(const struct ubifs_info *c, + const struct ubifs_scan_leb *sleb, int offs); +void ubifs_dump_znode(const struct ubifs_info *c, + const struct ubifs_znode *znode); +void ubifs_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, + int cat); +void ubifs_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, + struct ubifs_nnode *parent, int iip); +void ubifs_dump_tnc(struct ubifs_info *c); +void ubifs_dump_index(struct ubifs_info *c); +void ubifs_dump_lpt_lebs(const struct ubifs_info *c); -/* Checking helper functions */ -typedef int (*dbg_leaf_callback)(struct ubifs_info *c, - struct ubifs_zbranch *zbr, void *priv); -typedef int (*dbg_znode_callback)(struct ubifs_info *c, - struct ubifs_znode *znode, void *priv); int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, dbg_znode_callback znode_cb, void *priv); @@ -312,14 +286,13 @@ int dbg_check_cats(struct ubifs_info *c); int dbg_check_ltab(struct ubifs_info *c); int dbg_chk_lpt_free_spc(struct ubifs_info *c); int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len); -int dbg_check_synced_i_size(struct inode *inode); -int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); +int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode); +int dbg_check_dir(struct ubifs_info *c, const struct inode *dir); int dbg_check_tnc(struct ubifs_info *c, int extra); int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); int dbg_check_filesystem(struct ubifs_info *c); void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, int add_pos); -int dbg_check_lprops(struct ubifs_info *c); int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, int row, int col); int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, @@ -327,57 +300,11 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head); int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); -/* Force the use of in-the-gaps method for testing */ - -#define dbg_force_in_the_gaps_enabled \ - (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS) - -int dbg_force_in_the_gaps(void); - -/* Failure mode for recovery testing */ - -#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) - -#ifndef UBIFS_DBG_PRESERVE_UBI - -#define ubi_leb_read dbg_leb_read -#define ubi_leb_write dbg_leb_write -#define ubi_leb_change dbg_leb_change -#define ubi_leb_erase dbg_leb_erase -#define ubi_leb_unmap dbg_leb_unmap -#define ubi_is_mapped dbg_is_mapped -#define ubi_leb_map dbg_leb_map - -#endif - -int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, - int len, int check); -int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, - int offset, int len, int dtype); -int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, - int len, int dtype); -int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum); -int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum); -int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum); -int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype); - -static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf, - int offset, int len) -{ - return dbg_leb_read(desc, lnum, buf, offset, len, 0); -} - -static inline int dbg_write(struct ubi_volume_desc *desc, int lnum, - const void *buf, int offset, int len) -{ - return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN); -} - -static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, - const void *buf, int len) -{ - return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN); -} +int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, + int len); +int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len); +int dbg_leb_unmap(struct ubifs_info *c, int lnum); +int dbg_leb_map(struct ubifs_info *c, int lnum); /* Debugfs-related stuff */ int dbg_debugfs_init(void); @@ -385,98 +312,4 @@ void dbg_debugfs_exit(void); int dbg_debugfs_init_fs(struct ubifs_info *c); void dbg_debugfs_exit_fs(struct ubifs_info *c); -#else /* !CONFIG_UBIFS_FS_DEBUG */ - -/* Use "if (0)" to make compiler check arguments even if debugging is off */ -#define ubifs_assert(expr) do { \ - if (0 && (expr)) \ - printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ - __func__, __LINE__, current->pid); \ -} while (0) - -#define dbg_err(fmt, ...) do { \ - if (0) \ - ubifs_err(fmt, ##__VA_ARGS__); \ -} while (0) - -#define dbg_msg(fmt, ...) do { \ - if (0) \ - printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \ - current->pid, __func__, ##__VA_ARGS__); \ -} while (0) - -#define dbg_dump_stack() -#define ubifs_assert_cmt_locked(c) - -#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) - -#define DBGKEY(key) ((char *)(key)) -#define DBGKEY1(key) ((char *)(key)) - -#define ubifs_debugging_init(c) 0 -#define ubifs_debugging_exit(c) ({}) - -#define dbg_ntype(type) "" -#define dbg_cstate(cmt_state) "" -#define dbg_jhead(jhead) "" -#define dbg_get_key_dump(c, key) ({}) -#define dbg_dump_inode(c, inode) ({}) -#define dbg_dump_node(c, node) ({}) -#define dbg_dump_lpt_node(c, node, lnum, offs) ({}) -#define dbg_dump_budget_req(req) ({}) -#define dbg_dump_lstats(lst) ({}) -#define dbg_dump_budg(c) ({}) -#define dbg_dump_lprop(c, lp) ({}) -#define dbg_dump_lprops(c) ({}) -#define dbg_dump_lpt_info(c) ({}) -#define dbg_dump_leb(c, lnum) ({}) -#define dbg_dump_znode(c, znode) ({}) -#define dbg_dump_heap(c, heap, cat) ({}) -#define dbg_dump_pnode(c, pnode, parent, iip) ({}) -#define dbg_dump_tnc(c) ({}) -#define dbg_dump_index(c) ({}) -#define dbg_dump_lpt_lebs(c) ({}) - -#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 -#define dbg_old_index_check_init(c, zroot) 0 -#define dbg_save_space_info(c) ({}) -#define dbg_check_space_info(c) 0 -#define dbg_check_old_index(c, zroot) 0 -#define dbg_check_cats(c) 0 -#define dbg_check_ltab(c) 0 -#define dbg_chk_lpt_free_spc(c) 0 -#define dbg_chk_lpt_sz(c, action, len) 0 -#define dbg_check_synced_i_size(inode) 0 -#define dbg_check_dir_size(c, dir) 0 -#define dbg_check_tnc(c, x) 0 -#define dbg_check_idx_size(c, idx_size) 0 -#define dbg_check_filesystem(c) 0 -#define dbg_check_heap(c, heap, cat, add_pos) ({}) -#define dbg_check_lprops(c) 0 -#define dbg_check_lpt_nodes(c, cnode, row, col) 0 -#define dbg_check_inode_size(c, inode, size) 0 -#define dbg_check_data_nodes_order(c, head) 0 -#define dbg_check_nondata_nodes_order(c, head) 0 -#define dbg_force_in_the_gaps_enabled 0 -#define dbg_force_in_the_gaps() 0 -#define dbg_failure_mode 0 - -#define dbg_debugfs_init() 0 -#define dbg_debugfs_exit() -#define dbg_debugfs_init_fs(c) 0 -#define dbg_debugfs_exit_fs(c) 0 - -#endif /* !CONFIG_UBIFS_FS_DEBUG */ #endif /* !__UBIFS_DEBUG_H__ */ diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 14f64b689d7..ea41649e4ca 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -56,7 +56,7 @@ * * This function returns the inherited flags. */ -static int inherit_flags(const struct inode *dir, int mode) +static int inherit_flags(const struct inode *dir, umode_t mode) { int flags; const struct ubifs_inode *ui = ubifs_inode(dir); @@ -86,7 +86,7 @@ static int inherit_flags(const struct inode *dir, int mode) * case of failure. */ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, - int mode) + umode_t mode) { struct inode *inode; struct ubifs_inode *ui; @@ -102,7 +102,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, * UBIFS has to fully control "clean <-> dirty" transitions of inodes * to make budgeting work. */ - inode->i_flags |= (S_NOCMTIME); + inode->i_flags |= S_NOCMTIME; inode_init_owner(inode, dir, mode); inode->i_mtime = inode->i_atime = inode->i_ctime = @@ -170,11 +170,11 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, return inode; } -#ifdef CONFIG_UBIFS_FS_DEBUG - -static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm) +static int dbg_check_name(const struct ubifs_info *c, + const struct ubifs_dent_node *dent, + const struct qstr *nm) { - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; if (le16_to_cpu(dent->nlen) != nm->len) return -EINVAL; @@ -183,14 +183,8 @@ static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm) return 0; } -#else - -#define dbg_check_name(dent, nm) 0 - -#endif - static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { int err; union ubifs_key key; @@ -198,8 +192,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, struct ubifs_dent_node *dent; struct ubifs_info *c = dir->i_sb->s_fs_info; - dbg_gen("'%.*s' in dir ino %lu", - dentry->d_name.len, dentry->d_name.name, dir->i_ino); + dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino); if (dentry->d_name.len > UBIFS_MAX_NLEN) return ERR_PTR(-ENAMETOOLONG); @@ -219,7 +212,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - if (dbg_check_name(dent, &dentry->d_name)) { + if (dbg_check_name(c, dent, &dentry->d_name)) { err = -EINVAL; goto out; } @@ -231,8 +224,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, * checking. */ err = PTR_ERR(inode); - ubifs_err("dead directory entry '%.*s', error %d", - dentry->d_name.len, dentry->d_name.name, err); + ubifs_err("dead directory entry '%pd', error %d", + dentry, err); ubifs_ro_mode(c, err); goto out; } @@ -251,8 +244,8 @@ out: return ERR_PTR(err); } -static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode, - struct nameidata *nd) +static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool excl) { struct inode *inode; struct ubifs_info *c = dir->i_sb->s_fs_info; @@ -266,8 +259,8 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode, * parent directory inode. */ - dbg_gen("dent '%.*s', mode %#x in dir ino %lu", - dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); + dbg_gen("dent '%pd', mode %#hx in dir ino %lu", + dentry, mode, dir->i_ino); err = ubifs_budget_space(c, &req); if (err) @@ -352,38 +345,46 @@ static unsigned int vfs_dent_type(uint8_t type) * This means that UBIFS cannot support NFS which requires full * 'seekdir()'/'telldir()' support. */ -static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) +static int ubifs_readdir(struct file *file, struct dir_context *ctx) { - int err, over = 0; + int err; struct qstr nm; union ubifs_key key; struct ubifs_dent_node *dent; - struct inode *dir = file->f_path.dentry->d_inode; + struct inode *dir = file_inode(file); struct ubifs_info *c = dir->i_sb->s_fs_info; - dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos); + dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, ctx->pos); - if (file->f_pos > UBIFS_S_KEY_HASH_MASK || file->f_pos == 2) + if (ctx->pos > UBIFS_S_KEY_HASH_MASK || ctx->pos == 2) /* * The directory was seek'ed to a senseless position or there * are no more entries. */ return 0; - /* File positions 0 and 1 correspond to "." and ".." */ - if (file->f_pos == 0) { - ubifs_assert(!file->private_data); - over = filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR); - if (over) - return 0; - file->f_pos = 1; + if (file->f_version == 0) { + /* + * The file was seek'ed, which means that @file->private_data + * is now invalid. This may also be just the first + * 'ubifs_readdir()' invocation, in which case + * @file->private_data is NULL, and the below code is + * basically a no-op. + */ + kfree(file->private_data); + file->private_data = NULL; } - if (file->f_pos == 1) { + /* + * 'generic_file_llseek()' unconditionally sets @file->f_version to + * zero, and we use this for detecting whether the file was seek'ed. + */ + file->f_version = 1; + + /* File positions 0 and 1 correspond to "." and ".." */ + if (ctx->pos < 2) { ubifs_assert(!file->private_data); - over = filldir(dirent, "..", 2, 1, - parent_ino(file->f_path.dentry), DT_DIR); - if (over) + if (!dir_emit_dots(file, ctx)) return 0; /* Find the first entry in TNC and save it */ @@ -395,7 +396,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) goto out; } - file->f_pos = key_hash_flash(c, &dent->key); + ctx->pos = key_hash_flash(c, &dent->key); file->private_data = dent; } @@ -403,17 +404,16 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) if (!dent) { /* * The directory was seek'ed to and is now readdir'ed. - * Find the entry corresponding to @file->f_pos or the - * closest one. + * Find the entry corresponding to @ctx->pos or the closest one. */ - dent_key_init_hash(c, &key, dir->i_ino, file->f_pos); + dent_key_init_hash(c, &key, dir->i_ino, ctx->pos); nm.name = NULL; dent = ubifs_tnc_next_ent(c, &key, &nm); if (IS_ERR(dent)) { err = PTR_ERR(dent); goto out; } - file->f_pos = key_hash_flash(c, &dent->key); + ctx->pos = key_hash_flash(c, &dent->key); file->private_data = dent; } @@ -425,10 +425,9 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) ubifs_inode(dir)->creat_sqnum); nm.len = le16_to_cpu(dent->nlen); - over = filldir(dirent, dent->name, nm.len, file->f_pos, + if (!dir_emit(ctx, dent->name, nm.len, le64_to_cpu(dent->inum), - vfs_dent_type(dent->type)); - if (over) + vfs_dent_type(dent->type))) return 0; /* Switch to the next entry */ @@ -441,7 +440,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) } kfree(file->private_data); - file->f_pos = key_hash_flash(c, &dent->key); + ctx->pos = key_hash_flash(c, &dent->key); file->private_data = dent; cond_resched(); } @@ -454,18 +453,11 @@ out: kfree(file->private_data); file->private_data = NULL; - file->f_pos = 2; + /* 2 is a special value indicating that there are no more direntries */ + ctx->pos = 2; return 0; } -/* If a directory is seeked, we have to free saved readdir() state */ -static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int origin) -{ - kfree(file->private_data); - file->private_data = NULL; - return generic_file_llseek(file, offset, origin); -} - /* Free saved readdir() state when the directory is closed */ static int ubifs_dir_release(struct inode *dir, struct file *file) { @@ -516,31 +508,13 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, * changing the parent inode. */ - dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu", - dentry->d_name.len, dentry->d_name.name, inode->i_ino, + dbg_gen("dent '%pd' to ino %lu (nlink %d) in dir ino %lu", + dentry, inode->i_ino, inode->i_nlink, dir->i_ino); ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); - /* - * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing - * otherwise has the potential to corrupt the orphan inode list. - * - * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and - * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not - * lock 'dirA->i_mutex', so this is possible. Both of the functions - * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes - * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this - * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA' - * to the list of orphans. After this, 'vfs_link()' will link - * 'dirB/fileB' to 'inodeA'. This is a problem because, for example, - * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode - * to the list of orphans. - */ - if (inode->i_nlink == 0) - return -ENOENT; - - err = dbg_check_synced_i_size(inode); + err = dbg_check_synced_i_size(c, inode); if (err) return err; @@ -582,6 +556,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) int sz_change = CALC_DENT_SIZE(dentry->d_name.len); int err, budgeted = 1; struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; + unsigned int saved_nlink = inode->i_nlink; /* * Budget request settings: deletion direntry, deletion inode (+1 for @@ -590,12 +565,12 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) * deletions. */ - dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu", - dentry->d_name.len, dentry->d_name.name, inode->i_ino, + dbg_gen("dent '%pd' from ino %lu (nlink %d) in dir ino %lu", + dentry, inode->i_ino, inode->i_nlink, dir->i_ino); ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); - err = dbg_check_synced_i_size(inode); + err = dbg_check_synced_i_size(c, inode); if (err) return err; @@ -621,7 +596,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) ubifs_release_budget(c, &req); else { /* We've deleted something - clean the "no space" flags */ - c->nospace = c->nospace_rp = 0; + c->bi.nospace = c->bi.nospace_rp = 0; smp_wmb(); } return 0; @@ -629,7 +604,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) out_cancel: dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; - inc_nlink(inode); + set_nlink(inode, saved_nlink); unlock_2_inodes(dir, inode); if (budgeted) ubifs_release_budget(c, &req); @@ -680,8 +655,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) * because we have extra space reserved for deletions. */ - dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len, - dentry->d_name.name, inode->i_ino, dir->i_ino); + dbg_gen("directory '%pd', ino %lu in dir ino %lu", dentry, + inode->i_ino, dir->i_ino); ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); err = check_dir_empty(c, dentry->d_inode); @@ -711,7 +686,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) ubifs_release_budget(c, &req); else { /* We've deleted something - clean the "no space" flags */ - c->nospace = c->nospace_rp = 0; + c->bi.nospace = c->bi.nospace_rp = 0; smp_wmb(); } return 0; @@ -720,15 +695,14 @@ out_cancel: dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; inc_nlink(dir); - inc_nlink(inode); - inc_nlink(inode); + set_nlink(inode, 2); unlock_2_inodes(dir, inode); if (budgeted) ubifs_release_budget(c, &req); return err; } -static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; struct ubifs_inode *dir_ui = ubifs_inode(dir); @@ -741,8 +715,8 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode) * directory inode. */ - dbg_gen("dent '%.*s', mode %#x in dir ino %lu", - dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); + dbg_gen("dent '%pd', mode %#hx in dir ino %lu", + dentry, mode, dir->i_ino); err = ubifs_budget_space(c, &req); if (err) @@ -785,7 +759,7 @@ out_budg: } static int ubifs_mknod(struct inode *dir, struct dentry *dentry, - int mode, dev_t rdev) + umode_t mode, dev_t rdev) { struct inode *inode; struct ubifs_inode *ui; @@ -803,8 +777,7 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry, * directory inode. */ - dbg_gen("dent '%.*s' in dir ino %lu", - dentry->d_name.len, dentry->d_name.name, dir->i_ino); + dbg_gen("dent '%pd' in dir ino %lu", dentry, dir->i_ino); if (!new_valid_dev(rdev)) return -EINVAL; @@ -878,8 +851,8 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, * directory inode. */ - dbg_gen("dent '%.*s', target '%s' in dir ino %lu", dentry->d_name.len, - dentry->d_name.name, symname, dir->i_ino); + dbg_gen("dent '%pd', target '%s' in dir ino %lu", dentry, + symname, dir->i_ino); if (len > UBIFS_MAX_INO_DATA) return -ENAMETOOLONG; @@ -993,6 +966,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, struct ubifs_budget_req ino_req = { .dirtied_ino = 1, .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; struct timespec time; + unsigned int uninitialized_var(saved_nlink); /* * Budget request settings: deletion direntry, new direntry, removing @@ -1003,10 +977,9 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, * separately. */ - dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in " - "dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name, - old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len, - new_dentry->d_name.name, new_dir->i_ino); + dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu", + old_dentry, old_inode->i_ino, old_dir->i_ino, + new_dentry, new_dir->i_ino); ubifs_assert(mutex_is_locked(&old_dir->i_mutex)); ubifs_assert(mutex_is_locked(&new_dir->i_mutex)); if (unlink) @@ -1075,13 +1048,14 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlink) { /* * Directories cannot have hard-links, so if this is a - * directory, decrement its @i_nlink twice because an empty - * directory has @i_nlink 2. + * directory, just clear @i_nlink. */ + saved_nlink = new_inode->i_nlink; if (is_dir) + clear_nlink(new_inode); + else drop_nlink(new_inode); new_inode->i_ctime = time; - drop_nlink(new_inode); } else { new_dir->i_size += new_sz; ubifs_inode(new_dir)->ui_size = new_dir->i_size; @@ -1118,9 +1092,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, out_cancel: if (unlink) { - if (is_dir) - inc_nlink(new_inode); - inc_nlink(new_inode); + set_nlink(new_inode, saved_nlink); } else { new_dir->i_size -= new_sz; ubifs_inode(new_dir)->ui_size = new_dir->i_size; @@ -1151,16 +1123,7 @@ int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct ubifs_inode *ui = ubifs_inode(inode); mutex_lock(&ui->ui_mutex); - stat->dev = inode->i_sb->s_dev; - stat->ino = inode->i_ino; - stat->mode = inode->i_mode; - stat->nlink = inode->i_nlink; - stat->uid = inode->i_uid; - stat->gid = inode->i_gid; - stat->rdev = inode->i_rdev; - stat->atime = inode->i_atime; - stat->mtime = inode->i_mtime; - stat->ctime = inode->i_ctime; + generic_fillattr(inode, stat); stat->blksize = UBIFS_BLOCK_SIZE; stat->size = ui->ui_size; @@ -1203,19 +1166,17 @@ const struct inode_operations ubifs_dir_inode_operations = { .rename = ubifs_rename, .setattr = ubifs_setattr, .getattr = ubifs_getattr, -#ifdef CONFIG_UBIFS_FS_XATTR .setxattr = ubifs_setxattr, .getxattr = ubifs_getxattr, .listxattr = ubifs_listxattr, .removexattr = ubifs_removexattr, -#endif }; const struct file_operations ubifs_dir_operations = { - .llseek = ubifs_dir_llseek, + .llseek = generic_file_llseek, .release = ubifs_dir_release, .read = generic_read_dir, - .readdir = ubifs_readdir, + .iterate = ubifs_readdir, .fsync = ubifs_fsync, .unlocked_ioctl = ubifs_ioctl, #ifdef CONFIG_COMPAT diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index d77db7e3648..b5b593c4527 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -37,11 +37,11 @@ * * A thing to keep in mind: inode @i_mutex is locked in most VFS operations we * implement. However, this is not true for 'ubifs_writepage()', which may be - * called with @i_mutex unlocked. For example, when pdflush is doing background - * write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex. At "normal" - * work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g. in the - * "sys_write -> alloc_pages -> direct reclaim path". So, in 'ubifs_writepage()' - * we are only guaranteed that the page is locked. + * called with @i_mutex unlocked. For example, when flusher thread is doing + * background write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex. + * At "normal" work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g. + * in the "sys_write -> alloc_pages -> direct reclaim path". So, in + * 'ubifs_writepage()' we are only guaranteed that the page is locked. * * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the * read-ahead path does not lock it ("sys_read -> generic_file_aio_read -> @@ -50,6 +50,7 @@ */ #include "ubifs.h" +#include <linux/aio.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/slab.h> @@ -97,7 +98,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block, dump: ubifs_err("bad data node (block %u, inode %lu)", block, inode->i_ino); - dbg_dump_node(c, dn); + ubifs_dump_node(c, dn); return -EINVAL; } @@ -212,7 +213,7 @@ static void release_new_page_budget(struct ubifs_info *c) */ static void release_existing_page_budget(struct ubifs_info *c) { - struct ubifs_budget_req req = { .dd_growth = c->page_budget}; + struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget}; ubifs_release_budget(c, &req); } @@ -448,10 +449,12 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) { /* * We change whole page so no need to load it. But we - * have to set the @PG_checked flag to make the further - * code know that the page is new. This might be not - * true, but it is better to budget more than to read - * the page from the media. + * do not know whether this page exists on the media or + * not, so we assume the latter because it requires + * larger budget. The assumption is that it is better + * to budget a bit more than to read the page from the + * media. Thus, we are setting the @PG_checked flag + * here. */ SetPageChecked(page); skipped_read = 1; @@ -559,6 +562,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, dbg_gen("copied %d instead of %d, read page and repeat", copied, len); cancel_budget(c, page, ui, appending); + ClearPageChecked(page); /* * Return 0 to force VFS to repeat the whole operation, or the @@ -899,8 +903,9 @@ static int do_writepage(struct page *page, int len) struct ubifs_info *c = inode->i_sb->s_fs_info; #ifdef UBIFS_DEBUG + struct ubifs_inode *ui = ubifs_inode(inode); spin_lock(&ui->ui_lock); - ubifs_assert(page->index <= ui->synced_i_size << PAGE_CACHE_SIZE); + ubifs_assert(page->index <= ui->synced_i_size >> PAGE_CACHE_SHIFT); spin_unlock(&ui->ui_lock); #endif @@ -968,11 +973,11 @@ static int do_writepage(struct page *page, int len) * the page locked, and it locks @ui_mutex. However, write-back does take inode * @i_mutex, which means other VFS operations may be run on this inode at the * same time. And the problematic one is truncation to smaller size, from where - * we have to call 'truncate_setsize()', which first changes @inode->i_size, then - * drops the truncated pages. And while dropping the pages, it takes the page - * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with - * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This - * means that @inode->i_size is changed while @ui_mutex is unlocked. + * we have to call 'truncate_setsize()', which first changes @inode->i_size, + * then drops the truncated pages. And while dropping the pages, it takes the + * page lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' + * with @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. + * This means that @inode->i_size is changed while @ui_mutex is unlocked. * * XXX(truncate): with the new truncate sequence this is not true anymore, * and the calls to truncate_setsize can be move around freely. They should @@ -1039,10 +1044,10 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memset(kaddr + len, 0, PAGE_CACHE_SIZE - len); flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (i_size > synced_i_size) { err = inode->i_sb->s_op->write_inode(inode, NULL); @@ -1186,7 +1191,7 @@ out_budg: if (budgeted) ubifs_release_budget(c, &req); else { - c->nospace = c->nospace_rp = 0; + c->bi.nospace = c->bi.nospace_rp = 0; smp_wmb(); } return err; @@ -1260,7 +1265,7 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; - err = dbg_check_synced_i_size(inode); + err = dbg_check_synced_i_size(c, inode); if (err) return err; @@ -1273,13 +1278,14 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr) return err; } -static void ubifs_invalidatepage(struct page *page, unsigned long offset) +static void ubifs_invalidatepage(struct page *page, unsigned int offset, + unsigned int length) { struct inode *inode = page->mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; ubifs_assert(PagePrivate(page)); - if (offset) + if (offset || length < PAGE_CACHE_SIZE) /* Partial page remains dirty */ return; @@ -1301,7 +1307,7 @@ static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd) return NULL; } -int ubifs_fsync(struct file *file, int datasync) +int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; @@ -1309,14 +1315,23 @@ int ubifs_fsync(struct file *file, int datasync) dbg_gen("syncing inode %lu", inode->i_ino); - /* - * VFS has already synchronized dirty pages for this inode. Synchronize - * the inode unless this is a 'datasync()' call. - */ + if (c->ro_mount) + /* + * For some really strange reasons VFS does not filter out + * 'fsync()' for R/O mounted file-systems as per 2.6.39. + */ + return 0; + + err = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (err) + return err; + mutex_lock(&inode->i_mutex); + + /* Synchronize the inode unless this is a 'datasync()' call. */ if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) { err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) - return err; + goto out; } /* @@ -1324,10 +1339,9 @@ int ubifs_fsync(struct file *file, int datasync) * them. */ err = ubifs_sync_wbufs_by_inode(c, inode); - if (err) - return err; - - return 0; +out: + mutex_unlock(&inode->i_mutex); + return err; } /** @@ -1350,17 +1364,17 @@ static inline int mctime_update_needed(const struct inode *inode, /** * update_ctime - update mtime and ctime of an inode. - * @c: UBIFS file-system description object * @inode: inode to update * * This function updates mtime and ctime of the inode if it is not equivalent to * current time. Returns zero in case of success and a negative error code in * case of failure. */ -static int update_mctime(struct ubifs_info *c, struct inode *inode) +static int update_mctime(struct inode *inode) { struct timespec now = ubifs_current_time(inode); struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_info *c = inode->i_sb->s_fs_info; if (mctime_update_needed(inode, &now)) { int err, release; @@ -1383,18 +1397,13 @@ static int update_mctime(struct ubifs_info *c, struct inode *inode) return 0; } -static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t ubifs_write_iter(struct kiocb *iocb, struct iov_iter *from) { - int err; - struct inode *inode = iocb->ki_filp->f_mapping->host; - struct ubifs_info *c = inode->i_sb->s_fs_info; - - err = update_mctime(c, inode); + int err = update_mctime(file_inode(iocb->ki_filp)); if (err) return err; - return generic_file_aio_write(iocb, iov, nr_segs, pos); + return generic_file_write_iter(iocb, from); } static int ubifs_set_page_dirty(struct page *page) @@ -1426,13 +1435,14 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) } /* - * mmap()d file has taken write protection fault and is being made - * writable. UBIFS must ensure page is budgeted for. + * mmap()d file has taken write protection fault and is being made writable. + * UBIFS must ensure page is budgeted for. */ -static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, + struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = file_inode(vma->vm_file); struct ubifs_info *c = inode->i_sb->s_fs_info; struct timespec now = ubifs_current_time(inode); struct ubifs_budget_req req = { .new_page = 1 }; @@ -1474,8 +1484,8 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vm err = ubifs_budget_space(c, &req); if (unlikely(err)) { if (err == -ENOSPC) - ubifs_warn("out of space for mmapped file " - "(inode number %lu)", inode->i_ino); + ubifs_warn("out of space for mmapped file (inode number %lu)", + inode->i_ino); return VM_FAULT_SIGBUS; } @@ -1510,8 +1520,8 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vm ubifs_release_dirty_inode_budget(c, ui); } - unlock_page(page); - return 0; + wait_for_stable_page(page); + return VM_FAULT_LOCKED; out_unlock: unlock_page(page); @@ -1523,14 +1533,15 @@ out_unlock: static const struct vm_operations_struct ubifs_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = ubifs_vm_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) { int err; - /* 'generic_file_mmap()' takes care of NOMMU case */ err = generic_file_mmap(file, vma); if (err) return err; @@ -1551,12 +1562,10 @@ const struct address_space_operations ubifs_file_address_operations = { const struct inode_operations ubifs_file_inode_operations = { .setattr = ubifs_setattr, .getattr = ubifs_getattr, -#ifdef CONFIG_UBIFS_FS_XATTR .setxattr = ubifs_setxattr, .getxattr = ubifs_getxattr, .listxattr = ubifs_listxattr, .removexattr = ubifs_removexattr, -#endif }; const struct inode_operations ubifs_symlink_inode_operations = { @@ -1568,15 +1577,15 @@ const struct inode_operations ubifs_symlink_inode_operations = { const struct file_operations ubifs_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = ubifs_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = generic_file_read_iter, + .write_iter = ubifs_write_iter, .mmap = ubifs_file_mmap, .fsync = ubifs_fsync, .unlocked_ioctl = ubifs_ioctl, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, #ifdef CONFIG_COMPAT .compat_ioctl = ubifs_compat_ioctl, #endif diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 1d54383d126..2dcf3d473fe 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -252,8 +252,8 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, * But if the index takes fewer LEBs than it is reserved for it, * this function must avoid picking those reserved LEBs. */ - if (c->min_idx_lebs >= c->lst.idx_lebs) { - rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; + if (c->bi.min_idx_lebs >= c->lst.idx_lebs) { + rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; exclude_index = 1; } spin_unlock(&c->space_lock); @@ -276,7 +276,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, pick_free = 0; } else { spin_lock(&c->space_lock); - exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs); + exclude_index = (c->bi.min_idx_lebs >= c->lst.idx_lebs); spin_unlock(&c->space_lock); } @@ -501,8 +501,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, /* Check if there are enough empty LEBs for commit */ spin_lock(&c->space_lock); - if (c->min_idx_lebs > c->lst.idx_lebs) - rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; + if (c->bi.min_idx_lebs > c->lst.idx_lebs) + rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; else rsvd_idx_lebs = 0; lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - @@ -681,8 +681,16 @@ int ubifs_find_free_leb_for_idx(struct ubifs_info *c) if (!lprops) { lprops = ubifs_fast_find_freeable(c); if (!lprops) { - ubifs_assert(c->freeable_cnt == 0); - if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { + /* + * The first condition means the following: go scan the + * LPT if there are uncategorized lprops, which means + * there may be freeable LEBs there (UBIFS does not + * store the information about freeable LEBs in the + * master node). + */ + if (c->in_a_category_cnt != c->main_lebs || + c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { + ubifs_assert(c->freeable_cnt == 0); lprops = scan_for_leb_for_idx(c); if (IS_ERR(lprops)) { err = PTR_ERR(lprops); @@ -939,8 +947,8 @@ static int find_dirtiest_idx_leb(struct ubifs_info *c) } dbg_find("LEB %d, dirty %d and free %d flags %#x", lp->lnum, lp->dirty, lp->free, lp->flags); - ubifs_assert(lp->flags | LPROPS_TAKEN); - ubifs_assert(lp->flags | LPROPS_INDEX); + ubifs_assert(lp->flags & LPROPS_TAKEN); + ubifs_assert(lp->flags & LPROPS_INDEX); return lnum; } diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 151f1088282..9718da86ad0 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -100,12 +100,16 @@ static int switch_gc_head(struct ubifs_info *c) if (err) return err; + err = ubifs_wbuf_sync_nolock(wbuf); + if (err) + return err; + err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0); if (err) return err; c->gc_lnum = -1; - err = ubifs_wbuf_seek_nolock(wbuf, gc_lnum, 0, UBI_LONGTERM); + err = ubifs_wbuf_seek_nolock(wbuf, gc_lnum, 0); return err; } @@ -118,7 +122,7 @@ static int switch_gc_head(struct ubifs_info *c) * This function compares data nodes @a and @b. Returns %1 if @a has greater * inode or block number, and %-1 otherwise. */ -int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) +static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) { ino_t inuma, inumb; struct ubifs_info *c = priv; @@ -161,7 +165,8 @@ int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) * first and sorted by length in descending order. Directory entry nodes go * after inode nodes and are sorted in ascending hash valuer order. */ -int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) +static int nondata_nodes_cmp(void *priv, struct list_head *a, + struct list_head *b) { ino_t inuma, inumb; struct ubifs_info *c = priv; @@ -473,6 +478,37 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) ubifs_assert(c->gc_lnum != lnum); ubifs_assert(wbuf->lnum != lnum); + if (lp->free + lp->dirty == c->leb_size) { + /* Special case - a free LEB */ + dbg_gc("LEB %d is free, return it", lp->lnum); + ubifs_assert(!(lp->flags & LPROPS_INDEX)); + + if (lp->free != c->leb_size) { + /* + * Write buffers must be sync'd before unmapping + * freeable LEBs, because one of them may contain data + * which obsoletes something in 'lp->pnum'. + */ + err = gc_sync_wbufs(c); + if (err) + return err; + err = ubifs_change_one_lp(c, lp->lnum, c->leb_size, + 0, 0, 0, 0); + if (err) + return err; + } + err = ubifs_leb_unmap(c, lp->lnum); + if (err) + return err; + + if (c->gc_lnum == -1) { + c->gc_lnum = lnum; + return LEB_RETAINED; + } + + return LEB_FREED; + } + /* * We scan the entire LEB even though we only really need to scan up to * (c->leb_size - lp->free). @@ -632,8 +668,7 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway) ubifs_assert(!wbuf->used); for (i = 0; ; i++) { - int space_before = c->leb_size - wbuf->offs - wbuf->used; - int space_after; + int space_before, space_after; cond_resched(); @@ -678,40 +713,9 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway) break; } - dbg_gc("found LEB %d: free %d, dirty %d, sum %d " - "(min. space %d)", lp.lnum, lp.free, lp.dirty, - lp.free + lp.dirty, min_space); - - if (lp.free + lp.dirty == c->leb_size) { - /* An empty LEB was returned */ - dbg_gc("LEB %d is free, return it", lp.lnum); - /* - * ubifs_find_dirty_leb() doesn't return freeable index - * LEBs. - */ - ubifs_assert(!(lp.flags & LPROPS_INDEX)); - if (lp.free != c->leb_size) { - /* - * Write buffers must be sync'd before - * unmapping freeable LEBs, because one of them - * may contain data which obsoletes something - * in 'lp.pnum'. - */ - ret = gc_sync_wbufs(c); - if (ret) - goto out; - ret = ubifs_change_one_lp(c, lp.lnum, - c->leb_size, 0, 0, 0, - 0); - if (ret) - goto out; - } - ret = ubifs_leb_unmap(c, lp.lnum); - if (ret) - goto out; - ret = lp.lnum; - break; - } + dbg_gc("found LEB %d: free %d, dirty %d, sum %d (min. space %d)", + lp.lnum, lp.free, lp.dirty, lp.free + lp.dirty, + min_space); space_before = c->leb_size - wbuf->offs - wbuf->used; if (wbuf->lnum == -1) diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index d82173182ee..2290d586672 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -31,6 +31,26 @@ * buffer is full or when it is not used for some time (by timer). This is * similar to the mechanism is used by JFFS2. * + * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum + * write size (@c->max_write_size). The latter is the maximum amount of bytes + * the underlying flash is able to program at a time, and writing in + * @c->max_write_size units should presumably be faster. Obviously, + * @c->min_io_size <= @c->max_write_size. Write-buffers are of + * @c->max_write_size bytes in size for maximum performance. However, when a + * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size + * boundary) which contains data is written, not the whole write-buffer, + * because this is more space-efficient. + * + * This optimization adds few complications to the code. Indeed, on the one + * hand, we want to write in optimal @c->max_write_size bytes chunks, which + * also means aligning writes at the @c->max_write_size bytes offsets. On the + * other hand, we do not want to waste space when synchronizing the write + * buffer, so during synchronization we writes in smaller chunks. And this makes + * the next write offset to be not aligned to @c->max_write_size bytes. So the + * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned + * to @c->max_write_size bytes again. We do this by temporarily shrinking + * write-buffer size (@wbuf->size). + * * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by * mutexes defined inside these objects. Since sometimes upper-level code * has to lock the write-buffer (e.g. journal space reservation code), many @@ -46,8 +66,8 @@ * UBIFS uses padding when it pads to the next min. I/O unit. In this case it * uses padding nodes or padding bytes, if the padding node does not fit. * - * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes - * every time they are read from the flash media. + * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when + * they are read from the flash media. */ #include <linux/crc32.h> @@ -66,8 +86,124 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) c->no_chk_data_crc = 0; c->vfs_sb->s_flags |= MS_RDONLY; ubifs_warn("switched to read-only mode, error %d", err); - dbg_dump_stack(); + dump_stack(); + } +} + +/* + * Below are simple wrappers over UBI I/O functions which include some + * additional checks and UBIFS debugging stuff. See corresponding UBI function + * for more information. + */ + +int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, + int len, int even_ebadmsg) +{ + int err; + + err = ubi_read(c->ubi, lnum, buf, offs, len); + /* + * In case of %-EBADMSG print the error message only if the + * @even_ebadmsg is true. + */ + if (err && (err != -EBADMSG || even_ebadmsg)) { + ubifs_err("reading %d bytes from LEB %d:%d failed, error %d", + len, lnum, offs, err); + dump_stack(); + } + return err; +} + +int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, + int len) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + if (!dbg_is_tst_rcvry(c)) + err = ubi_leb_write(c->ubi, lnum, buf, offs, len); + else + err = dbg_leb_write(c, lnum, buf, offs, len); + if (err) { + ubifs_err("writing %d bytes to LEB %d:%d failed, error %d", + len, lnum, offs, err); + ubifs_ro_mode(c, err); + dump_stack(); + } + return err; +} + +int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + if (!dbg_is_tst_rcvry(c)) + err = ubi_leb_change(c->ubi, lnum, buf, len); + else + err = dbg_leb_change(c, lnum, buf, len); + if (err) { + ubifs_err("changing %d bytes in LEB %d failed, error %d", + len, lnum, err); + ubifs_ro_mode(c, err); + dump_stack(); + } + return err; +} + +int ubifs_leb_unmap(struct ubifs_info *c, int lnum) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + if (!dbg_is_tst_rcvry(c)) + err = ubi_leb_unmap(c->ubi, lnum); + else + err = dbg_leb_unmap(c, lnum); + if (err) { + ubifs_err("unmap LEB %d failed, error %d", lnum, err); + ubifs_ro_mode(c, err); + dump_stack(); + } + return err; +} + +int ubifs_leb_map(struct ubifs_info *c, int lnum) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + if (!dbg_is_tst_rcvry(c)) + err = ubi_leb_map(c->ubi, lnum); + else + err = dbg_leb_map(c, lnum); + if (err) { + ubifs_err("mapping LEB %d failed, error %d", lnum, err); + ubifs_ro_mode(c, err); + dump_stack(); + } + return err; +} + +int ubifs_is_mapped(const struct ubifs_info *c, int lnum) +{ + int err; + + err = ubi_is_mapped(c->ubi, lnum); + if (err < 0) { + ubifs_err("ubi_is_mapped failed for LEB %d, error %d", + lnum, err); + dump_stack(); } + return err; } /** @@ -88,8 +224,12 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) * This function may skip data nodes CRC checking if @c->no_chk_data_crc is * true, which is controlled by corresponding UBIFS mount option. However, if * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is - * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is - * ignored and CRC is checked. + * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are + * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC + * is checked. This is because during mounting or re-mounting from R/O mode to + * R/W mode we may read journal nodes (when replying the journal or doing the + * recovery) and the journal nodes may potentially be corrupted, so checking is + * required. * * This function returns zero in case of success and %-EUCLEAN in case of bad * CRC or magic. @@ -131,8 +271,8 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, node_len > c->ranges[type].max_len) goto out_len; - if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc && - c->no_chk_data_crc) + if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting && + !c->remounting_rw && c->no_chk_data_crc) return 0; crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); @@ -153,8 +293,8 @@ out_len: out: if (!quiet) { ubifs_err("bad node at LEB %d:%d", lnum, offs); - dbg_dump_node(c, buf); - dbg_dump_stack(); + ubifs_dump_node(c, buf); + dump_stack(); } return err; } @@ -343,11 +483,17 @@ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) * * This function synchronizes write-buffer @buf and returns zero in case of * success or a negative error code in case of failure. + * + * Note, although write-buffers are of @c->max_write_size, this function does + * not necessarily writes all @c->max_write_size bytes to the flash. Instead, + * if the write-buffer is only partially filled with data, only the used part + * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized. + * This way we waste less space. */ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) { struct ubifs_info *c = wbuf->c; - int err, dirt; + int err, dirt, sync_len; cancel_wbuf_timer_nolock(wbuf); if (!wbuf->used || wbuf->lnum == -1) @@ -357,27 +503,48 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) dbg_io("LEB %d:%d, %d bytes, jhead %s", wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); ubifs_assert(!(wbuf->avail & 7)); - ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); + ubifs_assert(wbuf->offs + wbuf->size <= c->leb_size); + ubifs_assert(wbuf->size >= c->min_io_size); + ubifs_assert(wbuf->size <= c->max_write_size); + ubifs_assert(wbuf->size % c->min_io_size == 0); ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->leb_size - wbuf->offs >= c->max_write_size) + ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); if (c->ro_error) return -EROFS; - ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); - err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, - c->min_io_size, wbuf->dtype); - if (err) { - ubifs_err("cannot write %d bytes to LEB %d:%d", - c->min_io_size, wbuf->lnum, wbuf->offs); - dbg_dump_stack(); + /* + * Do not write whole write buffer but write only the minimum necessary + * amount of min. I/O units. + */ + sync_len = ALIGN(wbuf->used, c->min_io_size); + dirt = sync_len - wbuf->used; + if (dirt) + ubifs_pad(c, wbuf->buf + wbuf->used, dirt); + err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, sync_len); + if (err) return err; - } - - dirt = wbuf->avail; spin_lock(&wbuf->lock); - wbuf->offs += c->min_io_size; - wbuf->avail = c->min_io_size; + wbuf->offs += sync_len; + /* + * Now @wbuf->offs is not necessarily aligned to @c->max_write_size. + * But our goal is to optimize writes and make sure we write in + * @c->max_write_size chunks and to @c->max_write_size-aligned offset. + * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make + * sure that @wbuf->offs + @wbuf->size is aligned to + * @c->max_write_size. This way we make sure that after next + * write-buffer flush we are again at the optimal offset (aligned to + * @c->max_write_size). + */ + if (c->leb_size - wbuf->offs < c->max_write_size) + wbuf->size = c->leb_size - wbuf->offs; + else if (wbuf->offs & (c->max_write_size - 1)) + wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; + else + wbuf->size = c->max_write_size; + wbuf->avail = wbuf->size; wbuf->used = 0; wbuf->next_ino = 0; spin_unlock(&wbuf->lock); @@ -393,14 +560,12 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) * @wbuf: write-buffer * @lnum: logical eraseblock number to seek to * @offs: logical eraseblock offset to seek to - * @dtype: data type * * This function targets the write-buffer to logical eraseblock @lnum:@offs. - * The write-buffer is synchronized if it is not empty. Returns zero in case of - * success and a negative error code in case of failure. + * The write-buffer has to be empty. Returns zero in case of success and a + * negative error code in case of failure. */ -int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, - int dtype) +int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs) { const struct ubifs_info *c = wbuf->c; @@ -409,21 +574,20 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, ubifs_assert(offs >= 0 && offs <= c->leb_size); ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); ubifs_assert(lnum != wbuf->lnum); - - if (wbuf->used > 0) { - int err = ubifs_wbuf_sync_nolock(wbuf); - - if (err) - return err; - } + ubifs_assert(wbuf->used == 0); spin_lock(&wbuf->lock); wbuf->lnum = lnum; wbuf->offs = offs; - wbuf->avail = c->min_io_size; + if (c->leb_size - wbuf->offs < c->max_write_size) + wbuf->size = c->leb_size - wbuf->offs; + else if (wbuf->offs & (c->max_write_size - 1)) + wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; + else + wbuf->size = c->max_write_size; + wbuf->avail = wbuf->size; wbuf->used = 0; spin_unlock(&wbuf->lock); - wbuf->dtype = dtype; return 0; } @@ -500,8 +664,9 @@ out_timers: * * This function writes data to flash via write-buffer @wbuf. This means that * the last piece of the node won't reach the flash media immediately if it - * does not take whole minimal I/O unit. Instead, the node will sit in RAM - * until the write-buffer is synchronized (e.g., by timer). + * does not take whole max. write unit (@c->max_write_size). Instead, the node + * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or + * because more data are appended to the write-buffer). * * This function returns zero in case of success and a negative error code in * case of failure. If the node cannot be written because there is no more @@ -510,7 +675,7 @@ out_timers: int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) { struct ubifs_info *c = wbuf->c; - int err, written, n, aligned_len = ALIGN(len, 8), offs; + int err, written, n, aligned_len = ALIGN(len, 8); dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, dbg_ntype(((struct ubifs_ch *)buf)->node_type), @@ -518,9 +683,15 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); - ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size); + ubifs_assert(wbuf->avail > 0 && wbuf->avail <= wbuf->size); + ubifs_assert(wbuf->size >= c->min_io_size); + ubifs_assert(wbuf->size <= c->max_write_size); + ubifs_assert(wbuf->size % c->min_io_size == 0); ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); ubifs_assert(!c->ro_media && !c->ro_mount); + ubifs_assert(!c->space_fixup); + if (c->leb_size - wbuf->offs >= c->max_write_size) + ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { err = -ENOSPC; @@ -542,15 +713,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) if (aligned_len == wbuf->avail) { dbg_io("flush jhead %s wbuf to LEB %d:%d", dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); - err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, - wbuf->offs, c->min_io_size, - wbuf->dtype); + err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, + wbuf->offs, wbuf->size); if (err) goto out; spin_lock(&wbuf->lock); - wbuf->offs += c->min_io_size; - wbuf->avail = c->min_io_size; + wbuf->offs += wbuf->size; + if (c->leb_size - wbuf->offs >= c->max_write_size) + wbuf->size = c->max_write_size; + else + wbuf->size = c->leb_size - wbuf->offs; + wbuf->avail = wbuf->size; wbuf->used = 0; wbuf->next_ino = 0; spin_unlock(&wbuf->lock); @@ -564,39 +738,63 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) goto exit; } - /* - * The node is large enough and does not fit entirely within current - * minimal I/O unit. We have to fill and flush write-buffer and switch - * to the next min. I/O unit. - */ - dbg_io("flush jhead %s wbuf to LEB %d:%d", - dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); - memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); - err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, - c->min_io_size, wbuf->dtype); - if (err) - goto out; + written = 0; + + if (wbuf->used) { + /* + * The node is large enough and does not fit entirely within + * current available space. We have to fill and flush + * write-buffer and switch to the next max. write unit. + */ + dbg_io("flush jhead %s wbuf to LEB %d:%d", + dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); + memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); + err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, + wbuf->size); + if (err) + goto out; - offs = wbuf->offs + c->min_io_size; - len -= wbuf->avail; - aligned_len -= wbuf->avail; - written = wbuf->avail; + wbuf->offs += wbuf->size; + len -= wbuf->avail; + aligned_len -= wbuf->avail; + written += wbuf->avail; + } else if (wbuf->offs & (c->max_write_size - 1)) { + /* + * The write-buffer offset is not aligned to + * @c->max_write_size and @wbuf->size is less than + * @c->max_write_size. Write @wbuf->size bytes to make sure the + * following writes are done in optimal @c->max_write_size + * chunks. + */ + dbg_io("write %d bytes to LEB %d:%d", + wbuf->size, wbuf->lnum, wbuf->offs); + err = ubifs_leb_write(c, wbuf->lnum, buf, wbuf->offs, + wbuf->size); + if (err) + goto out; + + wbuf->offs += wbuf->size; + len -= wbuf->size; + aligned_len -= wbuf->size; + written += wbuf->size; + } /* - * The remaining data may take more whole min. I/O units, so write the - * remains multiple to min. I/O unit size directly to the flash media. + * The remaining data may take more whole max. write units, so write the + * remains multiple to max. write unit size directly to the flash media. * We align node length to 8-byte boundary because we anyway flash wbuf * if the remaining space is less than 8 bytes. */ - n = aligned_len >> c->min_io_shift; + n = aligned_len >> c->max_write_shift; if (n) { - n <<= c->min_io_shift; - dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); - err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, - wbuf->dtype); + n <<= c->max_write_shift; + dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, + wbuf->offs); + err = ubifs_leb_write(c, wbuf->lnum, buf + written, + wbuf->offs, n); if (err) goto out; - offs += n; + wbuf->offs += n; aligned_len -= n; len -= n; written += n; @@ -606,14 +804,17 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) if (aligned_len) /* * And now we have what's left and what does not take whole - * min. I/O unit, so write it to the write-buffer and we are + * max. write unit, so write it to the write-buffer and we are * done. */ memcpy(wbuf->buf, buf + written, len); - wbuf->offs = offs; + if (c->leb_size - wbuf->offs >= c->max_write_size) + wbuf->size = c->max_write_size; + else + wbuf->size = c->leb_size - wbuf->offs; + wbuf->avail = wbuf->size - aligned_len; wbuf->used = aligned_len; - wbuf->avail = c->min_io_size - aligned_len; wbuf->next_ino = 0; spin_unlock(&wbuf->lock); @@ -634,9 +835,9 @@ exit: out: ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", len, wbuf->lnum, wbuf->offs, err); - dbg_dump_node(c, buf); - dbg_dump_stack(); - dbg_dump_leb(c, wbuf->lnum); + ubifs_dump_node(c, buf); + dump_stack(); + ubifs_dump_leb(c, wbuf->lnum); return err; } @@ -647,7 +848,6 @@ out: * @len: node length * @lnum: logical eraseblock number * @offs: offset within the logical eraseblock - * @dtype: node life-time hint (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN) * * This function automatically fills node magic number, assigns sequence * number, and calculates node CRC checksum. The length of the @buf buffer has @@ -656,7 +856,7 @@ out: * success and a negative error code in case of failure. */ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, - int offs, int dtype) + int offs) { int err, buf_len = ALIGN(len, c->min_io_size); @@ -666,18 +866,15 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); ubifs_assert(!c->ro_media && !c->ro_mount); + ubifs_assert(!c->space_fixup); if (c->ro_error) return -EROFS; ubifs_prepare_node(c, buf, len, 1); - err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype); - if (err) { - ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", - buf_len, lnum, offs, err); - dbg_dump_node(c, buf); - dbg_dump_stack(); - } + err = ubifs_leb_write(c, lnum, buf, offs, buf_len); + if (err) + ubifs_dump_node(c, buf); return err; } @@ -729,13 +926,9 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, if (rlen > 0) { /* Read everything that goes before write-buffer */ - err = ubi_read(c->ubi, lnum, buf, offs, rlen); - if (err && err != -EBADMSG) { - ubifs_err("failed to read node %d from LEB %d:%d, " - "error %d", type, lnum, offs, err); - dbg_dump_stack(); + err = ubifs_leb_read(c, lnum, buf, offs, rlen, 0); + if (err && err != -EBADMSG) return err; - } } if (type != ch->node_type) { @@ -760,8 +953,8 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, out: ubifs_err("bad node at LEB %d:%d", lnum, offs); - dbg_dump_node(c, buf); - dbg_dump_stack(); + ubifs_dump_node(c, buf); + dump_stack(); return -EINVAL; } @@ -790,38 +983,37 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, ubifs_assert(!(offs & 7) && offs < c->leb_size); ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); - err = ubi_read(c->ubi, lnum, buf, offs, len); - if (err && err != -EBADMSG) { - ubifs_err("cannot read node %d from LEB %d:%d, error %d", - type, lnum, offs, err); + err = ubifs_leb_read(c, lnum, buf, offs, len, 0); + if (err && err != -EBADMSG) return err; - } if (type != ch->node_type) { - ubifs_err("bad node type (%d but expected %d)", - ch->node_type, type); + ubifs_errc(c, "bad node type (%d but expected %d)", + ch->node_type, type); goto out; } err = ubifs_check_node(c, buf, lnum, offs, 0, 0); if (err) { - ubifs_err("expected node type %d", type); + ubifs_errc(c, "expected node type %d", type); return err; } l = le32_to_cpu(ch->len); if (l != len) { - ubifs_err("bad node length %d, expected %d", l, len); + ubifs_errc(c, "bad node length %d, expected %d", l, len); goto out; } return 0; out: - ubifs_err("bad node at LEB %d:%d, LEB mapping status %d", lnum, offs, - ubi_is_mapped(c->ubi, lnum)); - dbg_dump_node(c, buf); - dbg_dump_stack(); + ubifs_errc(c, "bad node at LEB %d:%d, LEB mapping status %d", lnum, + offs, ubi_is_mapped(c->ubi, lnum)); + if (!c->probing) { + ubifs_dump_node(c, buf); + dump_stack(); + } return -EINVAL; } @@ -837,11 +1029,11 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) { size_t size; - wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); + wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL); if (!wbuf->buf) return -ENOMEM; - size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); + size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); wbuf->inodes = kmalloc(size, GFP_KERNEL); if (!wbuf->inodes) { kfree(wbuf->buf); @@ -851,8 +1043,14 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) wbuf->used = 0; wbuf->lnum = wbuf->offs = -1; - wbuf->avail = c->min_io_size; - wbuf->dtype = UBI_UNKNOWN; + /* + * If the LEB starts at the max. write size aligned address, then + * write-buffer size has to be set to @c->max_write_size. Otherwise, + * set it to something smaller so that it ends at the closest max. + * write size boundary. + */ + size = c->max_write_size - (c->leb_start % c->max_write_size); + wbuf->avail = wbuf->size = size; wbuf->sync_callback = NULL; mutex_init(&wbuf->io_mutex); spin_lock_init(&wbuf->lock); diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 8aacd64957a..648b143606c 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -147,7 +147,7 @@ out_unlock: long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int flags, err; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); switch (cmd) { case FS_IOC_GETFLAGS: @@ -160,7 +160,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (IS_RDONLY(inode)) return -EROFS; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EACCES; if (get_user(flags, (int __user *) arg)) @@ -173,12 +173,12 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) * Make sure the file-system is read-write and make sure it * will not become read-only while we are changing the flags. */ - err = mnt_want_write(file->f_path.mnt); + err = mnt_want_write_file(file); if (err) return err; dbg_gen("set flags: %#x, i_flags %#x", flags, inode->i_flags); err = setflags(inode, flags); - mnt_drop_write(file->f_path.mnt); + mnt_drop_write_file(file); return err; } diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 914f1bd89e5..0e045e75abd 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -141,14 +141,8 @@ again: * LEB with some empty space. */ lnum = ubifs_find_free_space(c, len, &offs, squeeze); - if (lnum >= 0) { - /* Found an LEB, add it to the journal head */ - err = ubifs_add_bud_to_log(c, jhead, lnum, offs); - if (err) - goto out_return; - /* A new bud was successfully allocated and added to the log */ + if (lnum >= 0) goto out; - } err = lnum; if (err != -ENOSPC) @@ -203,13 +197,24 @@ again: return 0; } - err = ubifs_add_bud_to_log(c, jhead, lnum, 0); - if (err) - goto out_return; offs = 0; out: - err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype); + /* + * Make sure we synchronize the write-buffer before we add the new bud + * to the log. Otherwise we may have a power cut after the log + * reference node for the last bud (@lnum) is written but before the + * write-buffer data are written to the next-to-last bud + * (@wbuf->lnum). And the effect would be that the recovery would see + * that there is corruption in the next-to-last bud. + */ + err = ubifs_wbuf_sync_nolock(wbuf); + if (err) + goto out_return; + err = ubifs_add_bud_to_log(c, jhead, lnum, offs); + if (err) + goto out_return; + err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs); if (err) goto out_unlock; @@ -380,11 +385,9 @@ out: if (err == -ENOSPC) { /* This are some budgeting problems, print useful information */ down_write(&c->commit_sem); - spin_lock(&c->space_lock); - dbg_dump_stack(); - dbg_dump_budg(c); - spin_unlock(&c->space_lock); - dbg_dump_lprops(c); + dump_stack(); + ubifs_dump_budg(c, &c->bi); + ubifs_dump_lprops(c); cmt_retries = dbg_check_lprops(c); up_write(&c->commit_sem); } @@ -466,8 +469,8 @@ static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino, ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); ino->mtime_sec = cpu_to_le64(inode->i_mtime.tv_sec); ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); - ino->uid = cpu_to_le32(inode->i_uid); - ino->gid = cpu_to_le32(inode->i_gid); + ino->uid = cpu_to_le32(i_uid_read(inode)); + ino->gid = cpu_to_le32(i_gid_read(inode)); ino->mode = cpu_to_le32(inode->i_mode); ino->flags = cpu_to_le32(ui->flags); ino->size = cpu_to_le64(ui->ui_size); @@ -666,6 +669,7 @@ out_free: out_release: release_head(c, BASEHD); + kfree(dent); out_ro: ubifs_ro_mode(c, err); if (last_reference) @@ -690,17 +694,26 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, { struct ubifs_data_node *data; int err, lnum, offs, compr_type, out_len; - int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR; + int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1; struct ubifs_inode *ui = ubifs_inode(inode); - dbg_jnl("ino %lu, blk %u, len %d, key %s", - (unsigned long)key_inum(c, key), key_block(c, key), len, - DBGKEY(key)); + dbg_jnlk(key, "ino %lu, blk %u, len %d, key ", + (unsigned long)key_inum(c, key), key_block(c, key), len); ubifs_assert(len <= UBIFS_BLOCK_SIZE); - data = kmalloc(dlen, GFP_NOFS); - if (!data) - return -ENOMEM; + data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN); + if (!data) { + /* + * Fall-back to the write reserve buffer. Note, we might be + * currently on the memory reclaim path, when the kernel is + * trying to free some memory by writing out dirty pages. The + * write reserve buffer helps us to guarantee that we are + * always able to write the data. + */ + allocated = 0; + mutex_lock(&c->write_reserve_mutex); + data = c->write_reserve_buf; + } data->ch.node_type = UBIFS_DATA_NODE; key_write(c, key, &data->key); @@ -736,7 +749,10 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, goto out_ro; finish_reservation(c); - kfree(data); + if (!allocated) + mutex_unlock(&c->write_reserve_mutex); + else + kfree(data); return 0; out_release: @@ -745,7 +761,10 @@ out_ro: ubifs_ro_mode(c, err); finish_reservation(c); out_free: - kfree(data); + if (!allocated) + mutex_unlock(&c->write_reserve_mutex); + else + kfree(data); return err; } @@ -914,10 +933,8 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, int move = (old_dir != new_dir); struct ubifs_inode *uninitialized_var(new_ui); - dbg_jnl("dent '%.*s' in dir ino %lu to dent '%.*s' in dir ino %lu", - old_dentry->d_name.len, old_dentry->d_name.name, - old_dir->i_ino, new_dentry->d_name.len, - new_dentry->d_name.name, new_dir->i_ino); + dbg_jnl("dent '%pd' in dir ino %lu to dent '%pd' in dir ino %lu", + old_dentry, old_dir->i_ino, new_dentry, new_dir->i_ino); ubifs_assert(ubifs_inode(old_dir)->data_len == 0); ubifs_assert(ubifs_inode(new_dir)->data_len == 0); ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex)); @@ -1157,7 +1174,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, dn = (void *)trun + UBIFS_TRUN_NODE_SZ; blk = new_size >> UBIFS_BLOCK_SHIFT; data_key_init(c, &key, inum, blk); - dbg_jnl("last block key %s", DBGKEY(&key)); + dbg_jnlk(&key, "last block key "); err = ubifs_tnc_lookup(c, &key, dn); if (err == -ENOENT) dlen = 0; /* Not found (so it is a hole) */ @@ -1248,7 +1265,6 @@ out_free: return err; } -#ifdef CONFIG_UBIFS_FS_XATTR /** * ubifs_jnl_delete_xattr - delete an extended attribute. @@ -1443,4 +1459,3 @@ out_free: return err; } -#endif /* CONFIG_UBIFS_FS_XATTR */ diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 4d0cb124146..a902c5919e4 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -29,11 +29,7 @@ #include "ubifs.h" -#ifdef CONFIG_UBIFS_FS_DEBUG static int dbg_check_bud_bytes(struct ubifs_info *c); -#else -#define dbg_check_bud_bytes(c) 0 -#endif /** * ubifs_search_bud - search bud LEB. @@ -100,20 +96,6 @@ struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum) } /** - * next_log_lnum - switch to the next log LEB. - * @c: UBIFS file-system description object - * @lnum: current log LEB - */ -static inline int next_log_lnum(const struct ubifs_info *c, int lnum) -{ - lnum += 1; - if (lnum > c->log_last) - lnum = UBIFS_LOG_LNUM; - - return lnum; -} - -/** * empty_log_bytes - calculate amount of empty space in the log. * @c: UBIFS file-system description object */ @@ -175,26 +157,6 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud) } /** - * ubifs_create_buds_lists - create journal head buds lists for remount rw. - * @c: UBIFS file-system description object - */ -void ubifs_create_buds_lists(struct ubifs_info *c) -{ - struct rb_node *p; - - spin_lock(&c->buds_lock); - p = rb_first(&c->buds); - while (p) { - struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb); - struct ubifs_jhead *jhead = &c->jheads[bud->jhead]; - - list_add_tail(&bud->list, &jhead->buds_list); - p = rb_next(p); - } - spin_unlock(&c->buds_lock); -} - -/** * ubifs_add_bud_to_log - add a new bud to the log. * @c: UBIFS file-system description object * @jhead: journal head the bud belongs to @@ -277,7 +239,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) ref->jhead = cpu_to_le32(jhead); if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { - c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); + c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); c->lhead_offs = 0; } @@ -296,7 +258,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) * an unclean reboot, because the target LEB might have been * unmapped, but not yet physically erased. */ - err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM); + err = ubifs_leb_map(c, bud->lnum); if (err) goto out_unlock; } @@ -304,7 +266,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) dbg_log("write ref LEB %d:%d", c->lhead_lnum, c->lhead_offs); err = ubifs_write_node(c, ref, UBIFS_REF_NODE_SZ, c->lhead_lnum, - c->lhead_offs, UBI_SHORTTERM); + c->lhead_offs); if (err) goto out_unlock; @@ -317,8 +279,6 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) return 0; out_unlock: - if (err != -EAGAIN) - ubifs_ro_mode(c, err); mutex_unlock(&c->log_mutex); kfree(ref); kfree(bud); @@ -355,17 +315,15 @@ static void remove_buds(struct ubifs_info *c) * heads (non-closed buds). */ c->cmt_bud_bytes += wbuf->offs - bud->start; - dbg_log("preserve %d:%d, jhead %s, bud bytes %d, " - "cmt_bud_bytes %lld", bud->lnum, bud->start, - dbg_jhead(bud->jhead), wbuf->offs - bud->start, - c->cmt_bud_bytes); + dbg_log("preserve %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld", + bud->lnum, bud->start, dbg_jhead(bud->jhead), + wbuf->offs - bud->start, c->cmt_bud_bytes); bud->start = wbuf->offs; } else { c->cmt_bud_bytes += c->leb_size - bud->start; - dbg_log("remove %d:%d, jhead %s, bud bytes %d, " - "cmt_bud_bytes %lld", bud->lnum, bud->start, - dbg_jhead(bud->jhead), c->leb_size - bud->start, - c->cmt_bud_bytes); + dbg_log("remove %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld", + bud->lnum, bud->start, dbg_jhead(bud->jhead), + c->leb_size - bud->start, c->cmt_bud_bytes); rb_erase(p1, &c->buds); /* * If the commit does not finish, the recovery will need @@ -445,7 +403,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) /* Switch to the next log LEB */ if (c->lhead_offs) { - c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); + c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); c->lhead_offs = 0; } @@ -458,7 +416,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) len = ALIGN(len, c->min_io_size); dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len); - err = ubifs_leb_write(c, c->lhead_lnum, cs, 0, len, UBI_SHORTTERM); + err = ubifs_leb_write(c, c->lhead_lnum, cs, 0, len); if (err) goto out; @@ -466,7 +424,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) c->lhead_offs += len; if (c->lhead_offs == c->leb_size) { - c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); + c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); c->lhead_offs = 0; } @@ -553,7 +511,7 @@ int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum) } mutex_lock(&c->log_mutex); for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; - lnum = next_log_lnum(c, lnum)) { + lnum = ubifs_next_log_lnum(c, lnum)) { dbg_log("unmap log LEB %d", lnum); err = ubifs_leb_unmap(c, lnum); if (err) @@ -616,27 +574,10 @@ static int done_already(struct rb_root *done_tree, int lnum) */ static void destroy_done_tree(struct rb_root *done_tree) { - struct rb_node *this = done_tree->rb_node; - struct done_ref *dr; + struct done_ref *dr, *n; - while (this) { - if (this->rb_left) { - this = this->rb_left; - continue; - } else if (this->rb_right) { - this = this->rb_right; - continue; - } - dr = rb_entry(this, struct done_ref, rb); - this = rb_parent(this); - if (this) { - if (this->rb_left == &dr->rb) - this->rb_left = NULL; - else - this->rb_right = NULL; - } + rbtree_postorder_for_each_entry_safe(dr, n, done_tree, rb) kfree(dr); - } } /** @@ -659,10 +600,10 @@ static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs, int sz = ALIGN(*offs, c->min_io_size), err; ubifs_pad(c, buf + *offs, sz - *offs); - err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM); + err = ubifs_leb_change(c, *lnum, buf, sz); if (err) return err; - *lnum = next_log_lnum(c, *lnum); + *lnum = ubifs_next_log_lnum(c, *lnum); *offs = 0; } memcpy(buf + *offs, node, len); @@ -732,13 +673,13 @@ int ubifs_consolidate_log(struct ubifs_info *c) ubifs_scan_destroy(sleb); if (lnum == c->lhead_lnum) break; - lnum = next_log_lnum(c, lnum); + lnum = ubifs_next_log_lnum(c, lnum); } if (offs) { int sz = ALIGN(offs, c->min_io_size); ubifs_pad(c, buf + offs, sz - offs); - err = ubifs_leb_change(c, write_lnum, buf, sz, UBI_SHORTTERM); + err = ubifs_leb_change(c, write_lnum, buf, sz); if (err) goto out_free; offs = ALIGN(offs, c->min_io_size); @@ -752,7 +693,7 @@ int ubifs_consolidate_log(struct ubifs_info *c) /* Unmap remaining LEBs */ lnum = write_lnum; do { - lnum = next_log_lnum(c, lnum); + lnum = ubifs_next_log_lnum(c, lnum); err = ubifs_leb_unmap(c, lnum); if (err) return err; @@ -770,8 +711,6 @@ out_free: return err; } -#ifdef CONFIG_UBIFS_FS_DEBUG - /** * dbg_check_bud_bytes - make sure bud bytes calculation are all right. * @c: UBIFS file-system description object @@ -786,7 +725,7 @@ static int dbg_check_bud_bytes(struct ubifs_info *c) struct ubifs_bud *bud; long long bud_bytes = 0; - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; spin_lock(&c->buds_lock); @@ -803,5 +742,3 @@ static int dbg_check_bud_bytes(struct ubifs_info *c) return err; } - -#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 4d4ca388889..46190a7c42a 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -300,8 +300,11 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, default: ubifs_assert(0); } + lprops->flags &= ~LPROPS_CAT_MASK; lprops->flags |= cat; + c->in_a_category_cnt += 1; + ubifs_assert(c->in_a_category_cnt <= c->main_lebs); } /** @@ -334,6 +337,9 @@ static void ubifs_remove_from_cat(struct ubifs_info *c, default: ubifs_assert(0); } + + c->in_a_category_cnt -= 1; + ubifs_assert(c->in_a_category_cnt >= 0); } /** @@ -447,7 +453,7 @@ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) int new_cat = ubifs_categorize_lprops(c, lprops); if (old_cat == new_cat) { - struct ubifs_lpt_heap *heap = &c->lpt_heap[new_cat - 1]; + struct ubifs_lpt_heap *heap; /* lprops on a heap now must be moved up or down */ if (new_cat < 1 || new_cat > LPROPS_HEAP_CNT) @@ -504,7 +510,7 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) pnode = (struct ubifs_pnode *)container_of(lprops - pos, struct ubifs_pnode, lprops[0]); - return !test_bit(COW_ZNODE, &pnode->flags) && + return !test_bit(COW_CNODE, &pnode->flags) && test_bit(DIRTY_CNODE, &pnode->flags); } @@ -846,7 +852,9 @@ const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c) return lprops; } -#ifdef CONFIG_UBIFS_FS_DEBUG +/* + * Everything below is related to debugging. + */ /** * dbg_check_cats - check category heaps and lists. @@ -860,20 +868,20 @@ int dbg_check_cats(struct ubifs_info *c) struct list_head *pos; int i, cat; - if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) + if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c)) return 0; list_for_each_entry(lprops, &c->empty_list, list) { if (lprops->free != c->leb_size) { - ubifs_err("non-empty LEB %d on empty list " - "(free %d dirty %d flags %d)", lprops->lnum, - lprops->free, lprops->dirty, lprops->flags); + ubifs_err("non-empty LEB %d on empty list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); return -EINVAL; } if (lprops->flags & LPROPS_TAKEN) { - ubifs_err("taken LEB %d on empty list " - "(free %d dirty %d flags %d)", lprops->lnum, - lprops->free, lprops->dirty, lprops->flags); + ubifs_err("taken LEB %d on empty list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); return -EINVAL; } } @@ -881,15 +889,15 @@ int dbg_check_cats(struct ubifs_info *c) i = 0; list_for_each_entry(lprops, &c->freeable_list, list) { if (lprops->free + lprops->dirty != c->leb_size) { - ubifs_err("non-freeable LEB %d on freeable list " - "(free %d dirty %d flags %d)", lprops->lnum, - lprops->free, lprops->dirty, lprops->flags); + ubifs_err("non-freeable LEB %d on freeable list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); return -EINVAL; } if (lprops->flags & LPROPS_TAKEN) { - ubifs_err("taken LEB %d on freeable list " - "(free %d dirty %d flags %d)", lprops->lnum, - lprops->free, lprops->dirty, lprops->flags); + ubifs_err("taken LEB %d on freeable list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); return -EINVAL; } i += 1; @@ -911,21 +919,21 @@ int dbg_check_cats(struct ubifs_info *c) list_for_each_entry(lprops, &c->frdi_idx_list, list) { if (lprops->free + lprops->dirty != c->leb_size) { - ubifs_err("non-freeable LEB %d on frdi_idx list " - "(free %d dirty %d flags %d)", lprops->lnum, - lprops->free, lprops->dirty, lprops->flags); + ubifs_err("non-freeable LEB %d on frdi_idx list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); return -EINVAL; } if (lprops->flags & LPROPS_TAKEN) { - ubifs_err("taken LEB %d on frdi_idx list " - "(free %d dirty %d flags %d)", lprops->lnum, - lprops->free, lprops->dirty, lprops->flags); + ubifs_err("taken LEB %d on frdi_idx list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); return -EINVAL; } if (!(lprops->flags & LPROPS_INDEX)) { - ubifs_err("non-index LEB %d on frdi_idx list " - "(free %d dirty %d flags %d)", lprops->lnum, - lprops->free, lprops->dirty, lprops->flags); + ubifs_err("non-index LEB %d on frdi_idx list (free %d dirty %d flags %d)", + lprops->lnum, lprops->free, lprops->dirty, + lprops->flags); return -EINVAL; } } @@ -958,7 +966,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, { int i = 0, j, err = 0; - if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) + if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c)) return; for (i = 0; i < heap->cnt; i++) { @@ -980,9 +988,9 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, goto out; } if (lprops != lp) { - dbg_msg("lprops %zx lp %zx lprops->lnum %d lp->lnum %d", - (size_t)lprops, (size_t)lp, lprops->lnum, - lp->lnum); + ubifs_err("lprops %zx lp %zx lprops->lnum %d lp->lnum %d", + (size_t)lprops, (size_t)lp, lprops->lnum, + lp->lnum); err = 4; goto out; } @@ -1000,28 +1008,18 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, } out: if (err) { - dbg_msg("failed cat %d hpos %d err %d", cat, i, err); - dbg_dump_stack(); - dbg_dump_heap(c, heap, cat); + ubifs_err("failed cat %d hpos %d err %d", cat, i, err); + dump_stack(); + ubifs_dump_heap(c, heap, cat); } } /** - * struct scan_check_data - data provided to scan callback function. - * @lst: LEB properties statistics - * @err: error code - */ -struct scan_check_data { - struct ubifs_lp_stats lst; - int err; -}; - -/** * scan_check_cb - scan callback. * @c: the UBIFS file-system description object * @lp: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @data: information passed to and from the caller of the scan + * @lst: lprops statistics to update * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree @@ -1030,12 +1028,12 @@ struct scan_check_data { */ static int scan_check_cb(struct ubifs_info *c, const struct ubifs_lprops *lp, int in_tree, - struct scan_check_data *data) + struct ubifs_lp_stats *lst) { struct ubifs_scan_leb *sleb; struct ubifs_scan_node *snod; - struct ubifs_lp_stats *lst = &data->lst; - int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty; + int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret; + void *buf = NULL; cat = lp->flags & LPROPS_CAT_MASK; if (cat != LPROPS_UNCAT) { @@ -1043,7 +1041,7 @@ static int scan_check_cb(struct ubifs_info *c, if (cat != (lp->flags & LPROPS_CAT_MASK)) { ubifs_err("bad LEB category %d expected %d", (lp->flags & LPROPS_CAT_MASK), cat); - goto out; + return -EINVAL; } } @@ -1077,7 +1075,7 @@ static int scan_check_cb(struct ubifs_info *c, } if (!found) { ubifs_err("bad LPT list (category %d)", cat); - goto out; + return -EINVAL; } } } @@ -1089,36 +1087,40 @@ static int scan_check_cb(struct ubifs_info *c, if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || lp != heap->arr[lp->hpos]) { ubifs_err("bad LPT heap (category %d)", cat); - goto out; + return -EINVAL; } } - sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); - if (IS_ERR(sleb)) { - /* - * After an unclean unmount, empty and freeable LEBs - * may contain garbage. - */ - if (lp->free == c->leb_size) { - ubifs_err("scan errors were in empty LEB " - "- continuing checking"); - lst->empty_lebs += 1; - lst->total_free += c->leb_size; - lst->total_dark += ubifs_calc_dark(c, c->leb_size); - return LPT_SCAN_CONTINUE; - } + buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + if (!buf) + return -ENOMEM; + + /* + * After an unclean unmount, empty and freeable LEBs + * may contain garbage - do not scan them. + */ + if (lp->free == c->leb_size) { + lst->empty_lebs += 1; + lst->total_free += c->leb_size; + lst->total_dark += ubifs_calc_dark(c, c->leb_size); + return LPT_SCAN_CONTINUE; + } + if (lp->free + lp->dirty == c->leb_size && + !(lp->flags & LPROPS_INDEX)) { + lst->total_free += lp->free; + lst->total_dirty += lp->dirty; + lst->total_dark += ubifs_calc_dark(c, c->leb_size); + return LPT_SCAN_CONTINUE; + } - if (lp->free + lp->dirty == c->leb_size && - !(lp->flags & LPROPS_INDEX)) { - ubifs_err("scan errors were in freeable LEB " - "- continuing checking"); - lst->total_free += lp->free; - lst->total_dirty += lp->dirty; - lst->total_dark += ubifs_calc_dark(c, c->leb_size); - return LPT_SCAN_CONTINUE; + sleb = ubifs_scan(c, lnum, 0, buf, 0); + if (IS_ERR(sleb)) { + ret = PTR_ERR(sleb); + if (ret == -EUCLEAN) { + ubifs_dump_lprops(c); + ubifs_dump_budg(c, &c->bi); } - data->err = PTR_ERR(sleb); - return LPT_SCAN_STOP; + goto out; } is_idx = -1; @@ -1157,8 +1159,8 @@ static int scan_check_cb(struct ubifs_info *c, if (free > c->leb_size || free < 0 || dirty > c->leb_size || dirty < 0) { - ubifs_err("bad calculated accounting for LEB %d: " - "free %d, dirty %d", lnum, free, dirty); + ubifs_err("bad calculated accounting for LEB %d: free %d, dirty %d", + lnum, free, dirty); goto out_destroy; } @@ -1204,8 +1206,7 @@ static int scan_check_cb(struct ubifs_info *c, /* Free but not unmapped LEB, it's fine */ is_idx = 0; else { - ubifs_err("indexing node without indexing " - "flag"); + ubifs_err("indexing node without indexing flag"); goto out_print; } } @@ -1236,18 +1237,19 @@ static int scan_check_cb(struct ubifs_info *c, } ubifs_scan_destroy(sleb); + vfree(buf); return LPT_SCAN_CONTINUE; out_print: - ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " - "should be free %d, dirty %d", + ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, should be free %d, dirty %d", lnum, lp->free, lp->dirty, lp->flags, free, dirty); - dbg_dump_leb(c, lnum); + ubifs_dump_leb(c, lnum); out_destroy: ubifs_scan_destroy(sleb); + ret = -EINVAL; out: - data->err = -EINVAL; - return LPT_SCAN_STOP; + vfree(buf); + return ret; } /** @@ -1264,10 +1266,9 @@ out: int dbg_check_lprops(struct ubifs_info *c) { int i, err; - struct scan_check_data data; - struct ubifs_lp_stats *lst = &data.lst; + struct ubifs_lp_stats lst; - if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + if (!dbg_is_chk_lprops(c)) return 0; /* @@ -1280,42 +1281,34 @@ int dbg_check_lprops(struct ubifs_info *c) return err; } - memset(lst, 0, sizeof(struct ubifs_lp_stats)); - - data.err = 0; + memset(&lst, 0, sizeof(struct ubifs_lp_stats)); err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, (ubifs_lpt_scan_callback)scan_check_cb, - &data); + &lst); if (err && err != -ENOSPC) goto out; - if (data.err) { - err = data.err; - goto out; - } - if (lst->empty_lebs != c->lst.empty_lebs || - lst->idx_lebs != c->lst.idx_lebs || - lst->total_free != c->lst.total_free || - lst->total_dirty != c->lst.total_dirty || - lst->total_used != c->lst.total_used) { + if (lst.empty_lebs != c->lst.empty_lebs || + lst.idx_lebs != c->lst.idx_lebs || + lst.total_free != c->lst.total_free || + lst.total_dirty != c->lst.total_dirty || + lst.total_used != c->lst.total_used) { ubifs_err("bad overall accounting"); - ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " - "total_free %lld, total_dirty %lld, total_used %lld", - lst->empty_lebs, lst->idx_lebs, lst->total_free, - lst->total_dirty, lst->total_used); - ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " - "total_free %lld, total_dirty %lld, total_used %lld", + ubifs_err("calculated: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld", + lst.empty_lebs, lst.idx_lebs, lst.total_free, + lst.total_dirty, lst.total_used); + ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld", c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, c->lst.total_dirty, c->lst.total_used); err = -EINVAL; goto out; } - if (lst->total_dead != c->lst.total_dead || - lst->total_dark != c->lst.total_dark) { + if (lst.total_dead != c->lst.total_dead || + lst.total_dark != c->lst.total_dark) { ubifs_err("bad dead/dark space accounting"); ubifs_err("calculated: total_dead %lld, total_dark %lld", - lst->total_dead, lst->total_dark); + lst.total_dead, lst.total_dark); ubifs_err("read from lprops: total_dead %lld, total_dark %lld", c->lst.total_dead, c->lst.total_dark); err = -EINVAL; @@ -1326,5 +1319,3 @@ int dbg_check_lprops(struct ubifs_info *c) out: return err; } - -#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index 72775d35b99..d46b19ec181 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -701,8 +701,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, alen = ALIGN(len, c->min_io_size); set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubi_leb_change(c->ubi, lnum++, buf, alen, - UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum++, buf, alen); if (err) goto out; p = buf; @@ -732,8 +731,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubi_leb_change(c->ubi, lnum++, buf, alen, - UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum++, buf, alen); if (err) goto out; p = buf; @@ -780,8 +778,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, alen = ALIGN(len, c->min_io_size); set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubi_leb_change(c->ubi, lnum++, buf, alen, - UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum++, buf, alen); if (err) goto out; p = buf; @@ -806,7 +803,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, alen = ALIGN(len, c->min_io_size); set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum++, buf, alen); if (err) goto out; p = buf; @@ -826,7 +823,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, /* Write remaining buffer */ memset(p, 0xff, alen - len); - err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum, buf, alen); if (err) goto out; @@ -926,7 +923,7 @@ static int check_lpt_crc(void *buf, int len) if (crc != calc_crc) { ubifs_err("invalid crc in LPT node: crc %hx calc %hx", crc, calc_crc); - dbg_dump_stack(); + dump_stack(); return -EINVAL; } return 0; @@ -949,7 +946,7 @@ static int check_lpt_type(uint8_t **addr, int *pos, int type) if (node_type != type) { ubifs_err("invalid type (%d) in LPT node type %d", node_type, type); - dbg_dump_stack(); + dump_stack(); return -EINVAL; } return 0; @@ -1222,7 +1219,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) if (c->big_lpt) nnode->num = calc_nnode_num_from_parent(c, parent, iip); } else { - err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); + err = ubifs_leb_read(c, lnum, buf, offs, c->nnode_sz, 1); if (err) goto out; err = ubifs_unpack_nnode(c, buf, nnode); @@ -1247,6 +1244,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) out: ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs); + dump_stack(); kfree(nnode); return err; } @@ -1270,10 +1268,9 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) lnum = branch->lnum; offs = branch->offs; pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS); - if (!pnode) { - err = -ENOMEM; - goto out; - } + if (!pnode) + return -ENOMEM; + if (lnum == 0) { /* * This pnode was not written which just means that the LEB @@ -1291,7 +1288,7 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) lprops->flags = ubifs_categorize_lprops(c, lprops); } } else { - err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz); + err = ubifs_leb_read(c, lnum, buf, offs, c->pnode_sz, 1); if (err) goto out; err = unpack_pnode(c, buf, pnode); @@ -1312,8 +1309,9 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) out: ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); - dbg_dump_pnode(c, pnode, parent, iip); - dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); + ubifs_dump_pnode(c, pnode, parent, iip); + dump_stack(); + ubifs_err("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); kfree(pnode); return err; } @@ -1332,7 +1330,7 @@ static int read_ltab(struct ubifs_info *c) buf = vmalloc(c->ltab_sz); if (!buf) return -ENOMEM; - err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz); + err = ubifs_leb_read(c, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz, 1); if (err) goto out; err = unpack_ltab(c, buf); @@ -1355,7 +1353,8 @@ static int read_lsave(struct ubifs_info *c) buf = vmalloc(c->lsave_sz); if (!buf) return -ENOMEM; - err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz); + err = ubifs_leb_read(c, c->lsave_lnum, buf, c->lsave_offs, + c->lsave_sz, 1); if (err) goto out; err = unpack_lsave(c, buf); @@ -1738,16 +1737,23 @@ int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr) if (rd) { err = lpt_init_rd(c); if (err) - return err; + goto out_err; } if (wr) { err = lpt_init_wr(c); if (err) - return err; + goto out_err; } return 0; + +out_err: + if (wr) + ubifs_lpt_free(c, 1); + if (rd) + ubifs_lpt_free(c, 0); + return err; } /** @@ -1815,8 +1821,8 @@ static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c, if (c->big_lpt) nnode->num = calc_nnode_num_from_parent(c, parent, iip); } else { - err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, - c->nnode_sz); + err = ubifs_leb_read(c, branch->lnum, buf, branch->offs, + c->nnode_sz, 1); if (err) return ERR_PTR(err); err = ubifs_unpack_nnode(c, buf, nnode); @@ -1884,8 +1890,8 @@ static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c, ubifs_assert(branch->lnum >= c->lpt_first && branch->lnum <= c->lpt_last); ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size); - err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, - c->pnode_sz); + err = ubifs_leb_read(c, branch->lnum, buf, branch->offs, + c->pnode_sz, 1); if (err) return ERR_PTR(err); err = unpack_pnode(c, buf, pnode); @@ -1984,12 +1990,11 @@ again: if (path[h].in_tree) continue; - nnode = kmalloc(sz, GFP_NOFS); + nnode = kmemdup(&path[h].nnode, sz, GFP_NOFS); if (!nnode) { err = -ENOMEM; goto out; } - memcpy(nnode, &path[h].nnode, sz); parent = nnode->parent; parent->nbranch[nnode->iip].nnode = nnode; path[h].ptr.nnode = nnode; @@ -2002,12 +2007,11 @@ again: const size_t sz = sizeof(struct ubifs_pnode); struct ubifs_nnode *parent; - pnode = kmalloc(sz, GFP_NOFS); + pnode = kmemdup(&path[h].pnode, sz, GFP_NOFS); if (!pnode) { err = -ENOMEM; goto out; } - memcpy(pnode, &path[h].pnode, sz); parent = pnode->parent; parent->nbranch[pnode->iip].pnode = pnode; path[h].ptr.pnode = pnode; @@ -2080,8 +2084,6 @@ out: return err; } -#ifdef CONFIG_UBIFS_FS_DEBUG - /** * dbg_chk_pnode - check a pnode. * @c: the UBIFS file-system description object @@ -2096,8 +2098,8 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, int i; if (pnode->num != col) { - dbg_err("pnode num %d expected %d parent num %d iip %d", - pnode->num, col, pnode->parent->num, pnode->iip); + ubifs_err("pnode num %d expected %d parent num %d iip %d", + pnode->num, col, pnode->parent->num, pnode->iip); return -EINVAL; } for (i = 0; i < UBIFS_LPT_FANOUT; i++) { @@ -2111,14 +2113,14 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, if (lnum >= c->leb_cnt) continue; if (lprops->lnum != lnum) { - dbg_err("bad LEB number %d expected %d", - lprops->lnum, lnum); + ubifs_err("bad LEB number %d expected %d", + lprops->lnum, lnum); return -EINVAL; } if (lprops->flags & LPROPS_TAKEN) { if (cat != LPROPS_UNCAT) { - dbg_err("LEB %d taken but not uncat %d", - lprops->lnum, cat); + ubifs_err("LEB %d taken but not uncat %d", + lprops->lnum, cat); return -EINVAL; } continue; @@ -2130,8 +2132,8 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, case LPROPS_FRDI_IDX: break; default: - dbg_err("LEB %d index but cat %d", - lprops->lnum, cat); + ubifs_err("LEB %d index but cat %d", + lprops->lnum, cat); return -EINVAL; } } else { @@ -2143,8 +2145,8 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, case LPROPS_FREEABLE: break; default: - dbg_err("LEB %d not index but cat %d", - lprops->lnum, cat); + ubifs_err("LEB %d not index but cat %d", + lprops->lnum, cat); return -EINVAL; } } @@ -2184,24 +2186,24 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, break; } if (!found) { - dbg_err("LEB %d cat %d not found in cat heap/list", - lprops->lnum, cat); + ubifs_err("LEB %d cat %d not found in cat heap/list", + lprops->lnum, cat); return -EINVAL; } switch (cat) { case LPROPS_EMPTY: if (lprops->free != c->leb_size) { - dbg_err("LEB %d cat %d free %d dirty %d", - lprops->lnum, cat, lprops->free, - lprops->dirty); + ubifs_err("LEB %d cat %d free %d dirty %d", + lprops->lnum, cat, lprops->free, + lprops->dirty); return -EINVAL; } case LPROPS_FREEABLE: case LPROPS_FRDI_IDX: if (lprops->free + lprops->dirty != c->leb_size) { - dbg_err("LEB %d cat %d free %d dirty %d", - lprops->lnum, cat, lprops->free, - lprops->dirty); + ubifs_err("LEB %d cat %d free %d dirty %d", + lprops->lnum, cat, lprops->free, + lprops->dirty); return -EINVAL; } } @@ -2225,7 +2227,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, struct ubifs_cnode *cn; int num, iip = 0, err; - if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + if (!dbg_is_chk_lprops(c)) return 0; while (cnode) { @@ -2235,9 +2237,9 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, /* cnode is a nnode */ num = calc_nnode_num(row, col); if (cnode->num != num) { - dbg_err("nnode num %d expected %d " - "parent num %d iip %d", cnode->num, num, - (nnode ? nnode->num : 0), cnode->iip); + ubifs_err("nnode num %d expected %d parent num %d iip %d", + cnode->num, num, + (nnode ? nnode->num : 0), cnode->iip); return -EINVAL; } nn = (struct ubifs_nnode *)cnode; @@ -2274,5 +2276,3 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, } return 0; } - -#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 5c90dec5db0..45d4e96a6ba 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -27,8 +27,11 @@ #include <linux/crc16.h> #include <linux/slab.h> +#include <linux/random.h> #include "ubifs.h" +static int dbg_populate_lsave(struct ubifs_info *c); + /** * first_dirty_cnode - find first dirty cnode. * @c: UBIFS file-system description object @@ -110,8 +113,8 @@ static int get_cnodes_to_commit(struct ubifs_info *c) return 0; cnt += 1; while (1) { - ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags)); - __set_bit(COW_ZNODE, &cnode->flags); + ubifs_assert(!test_bit(COW_CNODE, &cnode->flags)); + __set_bit(COW_CNODE, &cnode->flags); cnext = next_dirty_cnode(cnode); if (!cnext) { cnode->cnext = c->lpt_cnext; @@ -317,11 +320,10 @@ static int layout_cnodes(struct ubifs_info *c) return 0; no_space: - ubifs_err("LPT out of space"); - dbg_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, " - "done_lsave %d", lnum, offs, len, done_ltab, done_lsave); - dbg_dump_lpt_info(c); - dbg_dump_lpt_lebs(c); + ubifs_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, done_lsave %d", + lnum, offs, len, done_ltab, done_lsave); + ubifs_dump_lpt_info(c); + ubifs_dump_lpt_lebs(c); dump_stack(); return err; } @@ -414,7 +416,7 @@ static int write_cnodes(struct ubifs_info *c) alen = ALIGN(wlen, c->min_io_size); memset(buf + offs, 0xff, alen - wlen); err = ubifs_leb_write(c, lnum, buf + from, from, - alen, UBI_SHORTTERM); + alen); if (err) return err; } @@ -458,9 +460,9 @@ static int write_cnodes(struct ubifs_info *c) * important. */ clear_bit(DIRTY_CNODE, &cnode->flags); - smp_mb__before_clear_bit(); - clear_bit(COW_ZNODE, &cnode->flags); - smp_mb__after_clear_bit(); + smp_mb__before_atomic(); + clear_bit(COW_CNODE, &cnode->flags); + smp_mb__after_atomic(); offs += len; dbg_chk_lpt_sz(c, 1, len); cnode = cnode->cnext; @@ -472,8 +474,7 @@ static int write_cnodes(struct ubifs_info *c) wlen = offs - from; alen = ALIGN(wlen, c->min_io_size); memset(buf + offs, 0xff, alen - wlen); - err = ubifs_leb_write(c, lnum, buf + from, from, alen, - UBI_SHORTTERM); + err = ubifs_leb_write(c, lnum, buf + from, from, alen); if (err) return err; dbg_chk_lpt_sz(c, 2, c->leb_size - offs); @@ -499,8 +500,7 @@ static int write_cnodes(struct ubifs_info *c) wlen = offs - from; alen = ALIGN(wlen, c->min_io_size); memset(buf + offs, 0xff, alen - wlen); - err = ubifs_leb_write(c, lnum, buf + from, from, alen, - UBI_SHORTTERM); + err = ubifs_leb_write(c, lnum, buf + from, from, alen); if (err) return err; dbg_chk_lpt_sz(c, 2, c->leb_size - offs); @@ -524,7 +524,7 @@ static int write_cnodes(struct ubifs_info *c) wlen = offs - from; alen = ALIGN(wlen, c->min_io_size); memset(buf + offs, 0xff, alen - wlen); - err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM); + err = ubifs_leb_write(c, lnum, buf + from, from, alen); if (err) return err; @@ -545,11 +545,10 @@ static int write_cnodes(struct ubifs_info *c) return 0; no_space: - ubifs_err("LPT out of space mismatch"); - dbg_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab " - "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave); - dbg_dump_lpt_info(c); - dbg_dump_lpt_lebs(c); + ubifs_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab %d, done_lsave %d", + lnum, offs, len, done_ltab, done_lsave); + ubifs_dump_lpt_info(c); + ubifs_dump_lpt_lebs(c); dump_stack(); return err; } @@ -586,7 +585,7 @@ static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c, if (nnode->nbranch[iip].lnum) break; } - } while (iip >= UBIFS_LPT_FANOUT); + } while (iip >= UBIFS_LPT_FANOUT); /* Go right */ nnode = ubifs_get_nnode(c, nnode, iip); @@ -815,6 +814,10 @@ static void populate_lsave(struct ubifs_info *c) c->lpt_drty_flgs |= LSAVE_DIRTY; ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); } + + if (dbg_populate_lsave(c)) + return; + list_for_each_entry(lprops, &c->empty_list, list) { c->lsave[cnt++] = lprops->lnum; if (cnt >= c->lsave_cnt) @@ -1150,11 +1153,11 @@ static int lpt_gc_lnum(struct ubifs_info *c, int lnum) void *buf = c->lpt_buf; dbg_lp("LEB %d", lnum); - err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); - if (err) { - ubifs_err("cannot read LEB %d, error %d", lnum, err); + + err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); + if (err) return err; - } + while (1) { if (!is_a_node(c, buf, len)) { int pad_len; @@ -1486,7 +1489,9 @@ void ubifs_lpt_free(struct ubifs_info *c, int wr_only) kfree(c->lpt_nod_buf); } -#ifdef CONFIG_UBIFS_FS_DEBUG +/* + * Everything below is related to debugging. + */ /** * dbg_is_all_ff - determine if a buffer contains only 0xFF bytes. @@ -1628,56 +1633,65 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) { int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len; int ret; - void *buf = c->dbg->buf; + void *buf, *p; - if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + if (!dbg_is_chk_lprops(c)) return 0; - dbg_lp("LEB %d", lnum); - err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); - if (err) { - dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err); - return err; + buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + if (!buf) { + ubifs_err("cannot allocate memory for ltab checking"); + return 0; } + + dbg_lp("LEB %d", lnum); + + err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); + if (err) + goto out; + while (1) { - if (!is_a_node(c, buf, len)) { + if (!is_a_node(c, p, len)) { int i, pad_len; - pad_len = get_pad_len(c, buf, len); + pad_len = get_pad_len(c, p, len); if (pad_len) { - buf += pad_len; + p += pad_len; len -= pad_len; dirty += pad_len; continue; } - if (!dbg_is_all_ff(buf, len)) { - dbg_msg("invalid empty space in LEB %d at %d", - lnum, c->leb_size - len); + if (!dbg_is_all_ff(p, len)) { + ubifs_err("invalid empty space in LEB %d at %d", + lnum, c->leb_size - len); err = -EINVAL; } i = lnum - c->lpt_first; if (len != c->ltab[i].free) { - dbg_msg("invalid free space in LEB %d " - "(free %d, expected %d)", - lnum, len, c->ltab[i].free); + ubifs_err("invalid free space in LEB %d (free %d, expected %d)", + lnum, len, c->ltab[i].free); err = -EINVAL; } if (dirty != c->ltab[i].dirty) { - dbg_msg("invalid dirty space in LEB %d " - "(dirty %d, expected %d)", - lnum, dirty, c->ltab[i].dirty); + ubifs_err("invalid dirty space in LEB %d (dirty %d, expected %d)", + lnum, dirty, c->ltab[i].dirty); err = -EINVAL; } - return err; + goto out; } - node_type = get_lpt_node_type(c, buf, &node_num); + node_type = get_lpt_node_type(c, p, &node_num); node_len = get_lpt_node_len(c, node_type); ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len); if (ret == 1) dirty += node_len; - buf += node_len; + p += node_len; len -= node_len; } + + err = 0; +out: + vfree(buf); + return err; } /** @@ -1690,7 +1704,7 @@ int dbg_check_ltab(struct ubifs_info *c) { int lnum, err, i, cnt; - if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + if (!dbg_is_chk_lprops(c)) return 0; /* Bring the entire tree into memory */ @@ -1713,7 +1727,7 @@ int dbg_check_ltab(struct ubifs_info *c) for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) { err = dbg_check_ltab_lnum(c, lnum); if (err) { - dbg_err("failed at LEB %d", lnum); + ubifs_err("failed at LEB %d", lnum); return err; } } @@ -1733,7 +1747,7 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) long long free = 0; int i; - if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + if (!dbg_is_chk_lprops(c)) return 0; for (i = 0; i < c->lpt_lebs; i++) { @@ -1745,10 +1759,10 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) free += c->leb_size; } if (free < c->lpt_sz) { - dbg_err("LPT space error: free %lld lpt_sz %lld", - free, c->lpt_sz); - dbg_dump_lpt_info(c); - dbg_dump_lpt_lebs(c); + ubifs_err("LPT space error: free %lld lpt_sz %lld", + free, c->lpt_sz); + ubifs_dump_lpt_info(c); + ubifs_dump_lpt_lebs(c); dump_stack(); return -EINVAL; } @@ -1775,7 +1789,7 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) long long chk_lpt_sz, lpt_sz; int err = 0; - if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + if (!dbg_is_chk_lprops(c)) return 0; switch (action) { @@ -1785,13 +1799,13 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) d->chk_lpt_lebs = 0; d->chk_lpt_wastage = 0; if (c->dirty_pn_cnt > c->pnode_cnt) { - dbg_err("dirty pnodes %d exceed max %d", - c->dirty_pn_cnt, c->pnode_cnt); + ubifs_err("dirty pnodes %d exceed max %d", + c->dirty_pn_cnt, c->pnode_cnt); err = -EINVAL; } if (c->dirty_nn_cnt > c->nnode_cnt) { - dbg_err("dirty nnodes %d exceed max %d", - c->dirty_nn_cnt, c->nnode_cnt); + ubifs_err("dirty nnodes %d exceed max %d", + c->dirty_nn_cnt, c->nnode_cnt); err = -EINVAL; } return err; @@ -1808,23 +1822,23 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) chk_lpt_sz *= d->chk_lpt_lebs; chk_lpt_sz += len - c->nhead_offs; if (d->chk_lpt_sz != chk_lpt_sz) { - dbg_err("LPT wrote %lld but space used was %lld", - d->chk_lpt_sz, chk_lpt_sz); + ubifs_err("LPT wrote %lld but space used was %lld", + d->chk_lpt_sz, chk_lpt_sz); err = -EINVAL; } if (d->chk_lpt_sz > c->lpt_sz) { - dbg_err("LPT wrote %lld but lpt_sz is %lld", - d->chk_lpt_sz, c->lpt_sz); + ubifs_err("LPT wrote %lld but lpt_sz is %lld", + d->chk_lpt_sz, c->lpt_sz); err = -EINVAL; } if (d->chk_lpt_sz2 && d->chk_lpt_sz != d->chk_lpt_sz2) { - dbg_err("LPT layout size %lld but wrote %lld", - d->chk_lpt_sz, d->chk_lpt_sz2); + ubifs_err("LPT layout size %lld but wrote %lld", + d->chk_lpt_sz, d->chk_lpt_sz2); err = -EINVAL; } if (d->chk_lpt_sz2 && d->new_nhead_offs != len) { - dbg_err("LPT new nhead offs: expected %d was %d", - d->new_nhead_offs, len); + ubifs_err("LPT new nhead offs: expected %d was %d", + d->new_nhead_offs, len); err = -EINVAL; } lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; @@ -1833,13 +1847,13 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) if (c->big_lpt) lpt_sz += c->lsave_sz; if (d->chk_lpt_sz - d->chk_lpt_wastage > lpt_sz) { - dbg_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld", - d->chk_lpt_sz, d->chk_lpt_wastage, lpt_sz); + ubifs_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld", + d->chk_lpt_sz, d->chk_lpt_wastage, lpt_sz); err = -EINVAL; } if (err) { - dbg_dump_lpt_info(c); - dbg_dump_lpt_lebs(c); + ubifs_dump_lpt_info(c); + ubifs_dump_lpt_lebs(c); dump_stack(); } d->chk_lpt_sz2 = d->chk_lpt_sz; @@ -1858,7 +1872,7 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) } /** - * dbg_dump_lpt_leb - dump an LPT LEB. + * ubifs_dump_lpt_leb - dump an LPT LEB. * @c: UBIFS file-system description object * @lnum: LEB number to dump * @@ -1870,45 +1884,48 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) static void dump_lpt_leb(const struct ubifs_info *c, int lnum) { int err, len = c->leb_size, node_type, node_num, node_len, offs; - void *buf = c->dbg->buf; + void *buf, *p; - printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", - current->pid, lnum); - err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); - if (err) { - ubifs_err("cannot read LEB %d, error %d", lnum, err); + pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum); + buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + if (!buf) { + ubifs_err("cannot allocate memory to dump LPT"); return; } + + err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); + if (err) + goto out; + while (1) { offs = c->leb_size - len; - if (!is_a_node(c, buf, len)) { + if (!is_a_node(c, p, len)) { int pad_len; - pad_len = get_pad_len(c, buf, len); + pad_len = get_pad_len(c, p, len); if (pad_len) { - printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n", + pr_err("LEB %d:%d, pad %d bytes\n", lnum, offs, pad_len); - buf += pad_len; + p += pad_len; len -= pad_len; continue; } if (len) - printk(KERN_DEBUG "LEB %d:%d, free %d bytes\n", + pr_err("LEB %d:%d, free %d bytes\n", lnum, offs, len); break; } - node_type = get_lpt_node_type(c, buf, &node_num); + node_type = get_lpt_node_type(c, p, &node_num); switch (node_type) { case UBIFS_LPT_PNODE: { node_len = c->pnode_sz; if (c->big_lpt) - printk(KERN_DEBUG "LEB %d:%d, pnode num %d\n", + pr_err("LEB %d:%d, pnode num %d\n", lnum, offs, node_num); else - printk(KERN_DEBUG "LEB %d:%d, pnode\n", - lnum, offs); + pr_err("LEB %d:%d, pnode\n", lnum, offs); break; } case UBIFS_LPT_NNODE: @@ -1918,60 +1935,100 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) node_len = c->nnode_sz; if (c->big_lpt) - printk(KERN_DEBUG "LEB %d:%d, nnode num %d, ", + pr_err("LEB %d:%d, nnode num %d, ", lnum, offs, node_num); else - printk(KERN_DEBUG "LEB %d:%d, nnode, ", + pr_err("LEB %d:%d, nnode, ", lnum, offs); - err = ubifs_unpack_nnode(c, buf, &nnode); + err = ubifs_unpack_nnode(c, p, &nnode); for (i = 0; i < UBIFS_LPT_FANOUT; i++) { - printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum, + pr_cont("%d:%d", nnode.nbranch[i].lnum, nnode.nbranch[i].offs); if (i != UBIFS_LPT_FANOUT - 1) - printk(KERN_CONT ", "); + pr_cont(", "); } - printk(KERN_CONT "\n"); + pr_cont("\n"); break; } case UBIFS_LPT_LTAB: node_len = c->ltab_sz; - printk(KERN_DEBUG "LEB %d:%d, ltab\n", - lnum, offs); + pr_err("LEB %d:%d, ltab\n", lnum, offs); break; case UBIFS_LPT_LSAVE: node_len = c->lsave_sz; - printk(KERN_DEBUG "LEB %d:%d, lsave len\n", lnum, offs); + pr_err("LEB %d:%d, lsave len\n", lnum, offs); break; default: ubifs_err("LPT node type %d not recognized", node_type); - return; + goto out; } - buf += node_len; + p += node_len; len -= node_len; } - printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", - current->pid, lnum); + pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum); +out: + vfree(buf); + return; } /** - * dbg_dump_lpt_lebs - dump LPT lebs. + * ubifs_dump_lpt_lebs - dump LPT lebs. * @c: UBIFS file-system description object * * This function dumps all LPT LEBs. The caller has to make sure the LPT is * locked. */ -void dbg_dump_lpt_lebs(const struct ubifs_info *c) +void ubifs_dump_lpt_lebs(const struct ubifs_info *c) { int i; - printk(KERN_DEBUG "(pid %d) start dumping all LPT LEBs\n", - current->pid); + pr_err("(pid %d) start dumping all LPT LEBs\n", current->pid); for (i = 0; i < c->lpt_lebs; i++) dump_lpt_leb(c, i + c->lpt_first); - printk(KERN_DEBUG "(pid %d) finish dumping all LPT LEBs\n", - current->pid); + pr_err("(pid %d) finish dumping all LPT LEBs\n", current->pid); } -#endif /* CONFIG_UBIFS_FS_DEBUG */ +/** + * dbg_populate_lsave - debugging version of 'populate_lsave()' + * @c: UBIFS file-system description object + * + * This is a debugging version for 'populate_lsave()' which populates lsave + * with random LEBs instead of useful LEBs, which is good for test coverage. + * Returns zero if lsave has not been populated (this debugging feature is + * disabled) an non-zero if lsave has been populated. + */ +static int dbg_populate_lsave(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + struct ubifs_lpt_heap *heap; + int i; + + if (!dbg_is_chk_gen(c)) + return 0; + if (prandom_u32() & 3) + return 0; + + for (i = 0; i < c->lsave_cnt; i++) + c->lsave[i] = c->main_first; + + list_for_each_entry(lprops, &c->empty_list, list) + c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum; + list_for_each_entry(lprops, &c->freeable_list, list) + c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum; + list_for_each_entry(lprops, &c->frdi_idx_list, list) + c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum; + + heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; + for (i = 0; i < heap->cnt; i++) + c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum; + heap = &c->lpt_heap[LPROPS_DIRTY - 1]; + for (i = 0; i < heap->cnt; i++) + c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum; + heap = &c->lpt_heap[LPROPS_FREE - 1]; + for (i = 0; i < heap->cnt; i++) + c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum; + + return 1; +} diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index 21f47afdacf..ab83ace9910 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c @@ -148,7 +148,7 @@ static int validate_master(const struct ubifs_info *c) } main_sz = (long long)c->main_lebs * c->leb_size; - if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { + if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) { err = 9; goto out; } @@ -218,7 +218,7 @@ static int validate_master(const struct ubifs_info *c) } if (c->lst.total_dead + c->lst.total_dark + - c->lst.total_used + c->old_idx_sz > main_sz) { + c->lst.total_used + c->bi.old_idx_sz > main_sz) { err = 21; goto out; } @@ -241,7 +241,7 @@ static int validate_master(const struct ubifs_info *c) out: ubifs_err("bad master node at offset %d error %d", c->mst_offs, err); - dbg_dump_node(c, c->mst_node); + ubifs_dump_node(c, c->mst_node); return -EINVAL; } @@ -286,7 +286,7 @@ int ubifs_read_master(struct ubifs_info *c) c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); - c->old_idx_sz = le64_to_cpu(c->mst_node->index_size); + c->bi.old_idx_sz = le64_to_cpu(c->mst_node->index_size); c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); @@ -305,7 +305,7 @@ int ubifs_read_master(struct ubifs_info *c) c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); - c->calc_idx_sz = c->old_idx_sz; + c->calc_idx_sz = c->bi.old_idx_sz; if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) c->no_orphs = 1; @@ -317,7 +317,7 @@ int ubifs_read_master(struct ubifs_info *c) if (c->leb_cnt < old_leb_cnt || c->leb_cnt < UBIFS_MIN_LEB_CNT) { ubifs_err("bad leb_cnt on master node"); - dbg_dump_node(c, c->mst_node); + ubifs_dump_node(c, c->mst_node); return -EINVAL; } @@ -379,7 +379,7 @@ int ubifs_write_master(struct ubifs_info *c) c->mst_offs = offs; c->mst_node->highest_inum = cpu_to_le64(c->highest_inum); - err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM); + err = ubifs_write_node(c, c->mst_node, len, lnum, offs); if (err) return err; @@ -390,7 +390,7 @@ int ubifs_write_master(struct ubifs_info *c) if (err) return err; } - err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM); + err = ubifs_write_node(c, c->mst_node, len, lnum, offs); return err; } diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index c3de04dc952..ee7cb5ebb6e 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h @@ -39,6 +39,29 @@ static inline int ubifs_zn_dirty(const struct ubifs_znode *znode) } /** + * ubifs_zn_obsolete - check if znode is obsolete. + * @znode: znode to check + * + * This helper function returns %1 if @znode is obsolete and %0 otherwise. + */ +static inline int ubifs_zn_obsolete(const struct ubifs_znode *znode) +{ + return !!test_bit(OBSOLETE_ZNODE, &znode->flags); +} + +/** + * ubifs_zn_cow - check if znode has to be copied on write. + * @znode: znode to check + * + * This helper function returns %1 if @znode is has COW flag set and %0 + * otherwise. + */ +static inline int ubifs_zn_cow(const struct ubifs_znode *znode) +{ + return !!test_bit(COW_ZNODE, &znode->flags); +} + +/** * ubifs_wake_up_bgt - wake up background thread. * @c: UBIFS file-system description object */ @@ -122,86 +145,6 @@ static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf) } /** - * ubifs_leb_unmap - unmap an LEB. - * @c: UBIFS file-system description object - * @lnum: LEB number to unmap - * - * This function returns %0 on success and a negative error code on failure. - */ -static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum) -{ - int err; - - ubifs_assert(!c->ro_media && !c->ro_mount); - if (c->ro_error) - return -EROFS; - err = ubi_leb_unmap(c->ubi, lnum); - if (err) { - ubifs_err("unmap LEB %d failed, error %d", lnum, err); - return err; - } - - return 0; -} - -/** - * ubifs_leb_write - write to a LEB. - * @c: UBIFS file-system description object - * @lnum: LEB number to write - * @buf: buffer to write from - * @offs: offset within LEB to write to - * @len: length to write - * @dtype: data type - * - * This function returns %0 on success and a negative error code on failure. - */ -static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum, - const void *buf, int offs, int len, int dtype) -{ - int err; - - ubifs_assert(!c->ro_media && !c->ro_mount); - if (c->ro_error) - return -EROFS; - err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); - if (err) { - ubifs_err("writing %d bytes at %d:%d, error %d", - len, lnum, offs, err); - return err; - } - - return 0; -} - -/** - * ubifs_leb_change - atomic LEB change. - * @c: UBIFS file-system description object - * @lnum: LEB number to write - * @buf: buffer to write from - * @len: length to write - * @dtype: data type - * - * This function returns %0 on success and a negative error code on failure. - */ -static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum, - const void *buf, int len, int dtype) -{ - int err; - - ubifs_assert(!c->ro_media && !c->ro_mount); - if (c->ro_error) - return -EROFS; - err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); - if (err) { - ubifs_err("changing %d bytes in LEB %d, error %d", - len, lnum, err); - return err; - } - - return 0; -} - -/** * ubifs_encode_dev - encode device node IDs. * @dev: UBIFS device node information * @rdev: device IDs to encode @@ -340,4 +283,21 @@ static inline void ubifs_release_lprops(struct ubifs_info *c) mutex_unlock(&c->lp_mutex); } +/** + * ubifs_next_log_lnum - switch to the next log LEB. + * @c: UBIFS file-system description object + * @lnum: current log LEB + * + * This helper function returns the log LEB number which goes next after LEB + * 'lnum'. + */ +static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum) +{ + lnum += 1; + if (lnum > c->log_last) + lnum = UBIFS_LOG_LNUM; + + return lnum; +} + #endif /* __UBIFS_MISC_H__ */ diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 82009c74b6a..f1c3e5a1b31 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -52,11 +52,7 @@ * than the maximum number of orphans allowed. */ -#ifdef CONFIG_UBIFS_FS_DEBUG static int dbg_check_orphans(struct ubifs_info *c); -#else -#define dbg_check_orphans(c) 0 -#endif /** * ubifs_add_orphan - add an orphan. @@ -92,7 +88,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) else if (inum > o->inum) p = &(*p)->rb_right; else { - dbg_err("orphaned twice"); + ubifs_err("orphaned twice"); spin_unlock(&c->orphan_lock); kfree(orphan); return 0; @@ -130,13 +126,14 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) else if (inum > o->inum) p = p->rb_right; else { - if (o->dnext) { + if (o->del) { spin_unlock(&c->orphan_lock); dbg_gen("deleted twice ino %lu", (unsigned long)inum); return; } - if (o->cnext) { + if (o->cmt) { + o->del = 1; o->dnext = c->orph_dnext; c->orph_dnext = o; spin_unlock(&c->orphan_lock); @@ -158,8 +155,8 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) } } spin_unlock(&c->orphan_lock); - dbg_err("missing orphan ino %lu", (unsigned long)inum); - dbg_dump_stack(); + ubifs_err("missing orphan ino %lu", (unsigned long)inum); + dump_stack(); } /** @@ -176,11 +173,13 @@ int ubifs_orphan_start_commit(struct ubifs_info *c) last = &c->orph_cnext; list_for_each_entry(orphan, &c->orph_new, new_list) { ubifs_assert(orphan->new); + ubifs_assert(!orphan->cmt); orphan->new = 0; + orphan->cmt = 1; *last = orphan; last = &orphan->cnext; } - *last = orphan->cnext; + *last = NULL; c->cmt_orphans = c->new_orphans; c->new_orphans = 0; dbg_cmt("%d orphans to commit", c->cmt_orphans); @@ -248,8 +247,7 @@ static int do_write_orph_node(struct ubifs_info *c, int len, int atomic) ubifs_assert(c->ohead_offs == 0); ubifs_prepare_node(c, c->orph_buf, len, 1); len = ALIGN(len, c->min_io_size); - err = ubifs_leb_change(c, c->ohead_lnum, c->orph_buf, len, - UBI_SHORTTERM); + err = ubifs_leb_change(c, c->ohead_lnum, c->orph_buf, len); } else { if (c->ohead_offs == 0) { /* Ensure LEB has been unmapped */ @@ -258,7 +256,7 @@ static int do_write_orph_node(struct ubifs_info *c, int len, int atomic) return err; } err = ubifs_write_node(c, c->orph_buf, len, c->ohead_lnum, - c->ohead_offs, UBI_SHORTTERM); + c->ohead_offs); } return err; } @@ -304,7 +302,9 @@ static int write_orph_node(struct ubifs_info *c, int atomic) cnext = c->orph_cnext; for (i = 0; i < cnt; i++) { orphan = cnext; + ubifs_assert(orphan->cmt); orph->inos[i] = cpu_to_le64(orphan->inum); + orphan->cmt = 0; cnext = orphan->cnext; orphan->cnext = NULL; } @@ -383,11 +383,12 @@ static int consolidate(struct ubifs_info *c) list_for_each_entry(orphan, &c->orph_list, list) { if (orphan->new) continue; + orphan->cmt = 1; *last = orphan; last = &orphan->cnext; cnt += 1; } - *last = orphan->cnext; + *last = NULL; ubifs_assert(cnt == c->tot_orphans - c->new_orphans); c->cmt_orphans = cnt; c->ohead_lnum = c->orph_first; @@ -447,6 +448,7 @@ static void erase_deleted(struct ubifs_info *c) orphan = dnext; dnext = orphan->dnext; ubifs_assert(!orphan->new); + ubifs_assert(orphan->del); rb_erase(&orphan->rb, &c->orph_tree); list_del(&orphan->list); c->tot_orphans -= 1; @@ -536,6 +538,7 @@ static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) rb_link_node(&orphan->rb, parent, p); rb_insert_color(&orphan->rb, &c->orph_tree); list_add_tail(&orphan->list, &c->orph_list); + orphan->del = 1; orphan->dnext = c->orph_dnext; c->orph_dnext = orphan; dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum, @@ -567,9 +570,9 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, list_for_each_entry(snod, &sleb->nodes, list) { if (snod->type != UBIFS_ORPH_NODE) { - ubifs_err("invalid node type %d in orphan area at " - "%d:%d", snod->type, sleb->lnum, snod->offs); - dbg_dump_node(c, snod->node); + ubifs_err("invalid node type %d in orphan area at %d:%d", + snod->type, sleb->lnum, snod->offs); + ubifs_dump_node(c, snod->node); return -EINVAL; } @@ -594,10 +597,9 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, * number. That makes this orphan node, out of date. */ if (!first) { - ubifs_err("out of order commit number %llu in " - "orphan node at %d:%d", + ubifs_err("out of order commit number %llu in orphan node at %d:%d", cmt_no, sleb->lnum, snod->offs); - dbg_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node); return -EINVAL; } dbg_rcvry("out of date LEB %d", sleb->lnum); @@ -673,7 +675,8 @@ static int kill_orphans(struct ubifs_info *c) sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); if (IS_ERR(sleb)) { if (PTR_ERR(sleb) == -EUCLEAN) - sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); + sleb = ubifs_recover_leb(c, lnum, 0, + c->sbuf, -1); if (IS_ERR(sleb)) { err = PTR_ERR(sleb); break; @@ -724,7 +727,9 @@ int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only) return err; } -#ifdef CONFIG_UBIFS_FS_DEBUG +/* + * Everything below is related to debugging. + */ struct check_orphan { struct rb_node rb; @@ -810,27 +815,10 @@ static int dbg_find_check_orphan(struct rb_root *root, ino_t inum) static void dbg_free_check_tree(struct rb_root *root) { - struct rb_node *this = root->rb_node; - struct check_orphan *o; + struct check_orphan *o, *n; - while (this) { - if (this->rb_left) { - this = this->rb_left; - continue; - } else if (this->rb_right) { - this = this->rb_right; - continue; - } - o = rb_entry(this, struct check_orphan, rb); - this = rb_parent(this); - if (this) { - if (this->rb_left == &o->rb) - this->rb_left = NULL; - else - this->rb_right = NULL; - } + rbtree_postorder_for_each_entry_safe(o, n, root, rb) kfree(o); - } } static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr, @@ -892,15 +880,22 @@ static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb) static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) { int lnum, err = 0; + void *buf; /* Check no-orphans flag and skip this if no orphans */ if (c->no_orphs) return 0; + buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + if (!buf) { + ubifs_err("cannot allocate memory to check orphans"); + return 0; + } + for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { struct ubifs_scan_leb *sleb; - sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); + sleb = ubifs_scan(c, lnum, 0, buf, 0); if (IS_ERR(sleb)) { err = PTR_ERR(sleb); break; @@ -912,6 +907,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) break; } + vfree(buf); return err; } @@ -920,7 +916,7 @@ static int dbg_check_orphans(struct ubifs_info *c) struct check_info ci; int err; - if (!(ubifs_chk_flags & UBIFS_CHK_ORPH)) + if (!dbg_is_chk_orph(c)) return 0; ci.last_ino = 0; @@ -959,5 +955,3 @@ out: kfree(ci.node); return err; } - -#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 77e9b874b6c..c14adb2f420 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -28,6 +28,23 @@ * UBIFS always cleans away all remnants of an unclean un-mount, so that * errors do not accumulate. However UBIFS defers recovery if it is mounted * read-only, and the flash is not modified in that case. + * + * The general UBIFS approach to the recovery is that it recovers from + * corruptions which could be caused by power cuts, but it refuses to recover + * from corruption caused by other reasons. And UBIFS tries to distinguish + * between these 2 reasons of corruptions and silently recover in the former + * case and loudly complain in the latter case. + * + * UBIFS writes only to erased LEBs, so it writes only to the flash space + * containing only 0xFFs. UBIFS also always writes strictly from the beginning + * of the LEB to the end. And UBIFS assumes that the underlying flash media + * writes in @c->max_write_size bytes at a time. + * + * Hence, if UBIFS finds a corrupted node at offset X, it expects only the min. + * I/O unit corresponding to offset X to contain corrupted data, all the + * following min. I/O units have to contain empty space (all 0xFFs). If this is + * not true, the corruption cannot be the result of a power cut, and UBIFS + * refuses to mount. */ #include <linux/crc32.h> @@ -100,7 +117,7 @@ static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf, if (!sbuf) return -ENOMEM; - err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size); + err = ubifs_leb_read(c, lnum, sbuf, 0, c->leb_size, 0); if (err && err != -EBADMSG) goto out_free; @@ -196,10 +213,10 @@ static int write_rcvrd_mst_node(struct ubifs_info *c, mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY); ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1); - err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum, mst, sz); if (err) goto out; - err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum + 1, mst, sz); if (err) goto out; out: @@ -257,7 +274,8 @@ int ubifs_recover_master_node(struct ubifs_info *c) if (cor1) goto out_err; mst = mst1; - } else if (offs1 == 0 && offs2 + sz >= c->leb_size) { + } else if (offs1 == 0 && + c->leb_size - offs2 - sz < sz) { /* 1st LEB was unmapped and written, 2nd not */ if (cor1) goto out_err; @@ -300,6 +318,32 @@ int ubifs_recover_master_node(struct ubifs_info *c) goto out_free; } memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ); + + /* + * We had to recover the master node, which means there was an + * unclean reboot. However, it is possible that the master node + * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set. + * E.g., consider the following chain of events: + * + * 1. UBIFS was cleanly unmounted, so the master node is clean + * 2. UBIFS is being mounted R/W and starts changing the master + * node in the first (%UBIFS_MST_LNUM). A power cut happens, + * so this LEB ends up with some amount of garbage at the + * end. + * 3. UBIFS is being mounted R/O. We reach this place and + * recover the master node from the second LEB + * (%UBIFS_MST_LNUM + 1). But we cannot update the media + * because we are being mounted R/O. We have to defer the + * operation. + * 4. However, this master node (@c->mst_node) is marked as + * clean (since the step 1). And if we just return, the + * mount code will be confused and won't recover the master + * node when it is re-mounter R/W later. + * + * Thus, to force the recovery by marking the master node as + * dirty. + */ + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); } else { /* Write the recovered master node */ c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1; @@ -318,12 +362,12 @@ out_err: out_free: ubifs_err("failed to recover master node"); if (mst1) { - dbg_err("dumping first master node"); - dbg_dump_node(c, mst1); + ubifs_err("dumping first master node"); + ubifs_dump_node(c, mst1); } if (mst2) { - dbg_err("dumping second master node"); - dbg_dump_node(c, mst2); + ubifs_err("dumping second master node"); + ubifs_dump_node(c, mst2); } vfree(buf2); vfree(buf1); @@ -362,8 +406,9 @@ int ubifs_write_rcvrd_mst_node(struct ubifs_info *c) * @offs: offset to check * * This function returns %1 if @offs was in the last write to the LEB whose data - * is in @buf, otherwise %0 is returned. The determination is made by checking - * for subsequent empty space starting from the next @c->min_io_size boundary. + * is in @buf, otherwise %0 is returned. The determination is made by checking + * for subsequent empty space starting from the next @c->max_write_size + * boundary. */ static int is_last_write(const struct ubifs_info *c, void *buf, int offs) { @@ -371,10 +416,10 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs) uint8_t *p; /* - * Round up to the next @c->min_io_size boundary i.e. @offs is in the - * last wbuf written. After that should be empty space. + * Round up to the next @c->max_write_size boundary i.e. @offs is in + * the last wbuf written. After that should be empty space. */ - empty_offs = ALIGN(offs + 1, c->min_io_size); + empty_offs = ALIGN(offs + 1, c->max_write_size); check_len = c->leb_size - empty_offs; p = buf + empty_offs - offs; return is_empty(p, check_len); @@ -429,7 +474,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, int skip, dlen = le32_to_cpu(ch->len); /* Check for empty space after the corrupt node's common header */ - skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs; + skip = ALIGN(offs + UBIFS_CH_SZ, c->max_write_size) - offs; if (is_empty(buf + skip, len - skip)) return 1; /* @@ -441,7 +486,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, return 0; } /* Now we know the corrupt node's length we can skip over it */ - skip = ALIGN(offs + dlen, c->min_io_size) - offs; + skip = ALIGN(offs + dlen, c->max_write_size) - offs; /* After which there should be empty space */ if (is_empty(buf + skip, len - skip)) return 1; @@ -495,8 +540,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, int len = ALIGN(endpt, c->min_io_size); if (start) { - err = ubi_read(c->ubi, lnum, sleb->buf, 0, - start); + err = ubifs_leb_read(c, lnum, sleb->buf, 0, + start, 1); if (err) return err; } @@ -510,8 +555,7 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ubifs_pad(c, buf, pad_len); } } - err = ubi_leb_change(c->ubi, lnum, sleb->buf, len, - UBI_UNKNOWN); + err = ubifs_leb_change(c, lnum, sleb->buf, len); if (err) return err; } @@ -520,16 +564,15 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, } /** - * drop_incomplete_group - drop nodes from an incomplete group. + * drop_last_group - drop the last group of nodes. * @sleb: scanned LEB information * @offs: offset of dropped nodes is returned here * - * This function returns %1 if nodes are dropped and %0 otherwise. + * This is a helper function for 'ubifs_recover_leb()' which drops the last + * group of nodes of the scanned LEB. */ -static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) +static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs) { - int dropped = 0; - while (!list_empty(&sleb->nodes)) { struct ubifs_scan_node *snod; struct ubifs_ch *ch; @@ -538,15 +581,41 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) list); ch = snod->node; if (ch->group_type != UBIFS_IN_NODE_GROUP) - return dropped; - dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); + break; + + dbg_rcvry("dropping grouped node at %d:%d", + sleb->lnum, snod->offs); + *offs = snod->offs; + list_del(&snod->list); + kfree(snod); + sleb->nodes_cnt -= 1; + } +} + +/** + * drop_last_node - drop the last node. + * @sleb: scanned LEB information + * @offs: offset of dropped nodes is returned here + * @grouped: non-zero if whole group of nodes have to be dropped + * + * This is a helper function for 'ubifs_recover_leb()' which drops the last + * node of the scanned LEB. + */ +static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs) +{ + struct ubifs_scan_node *snod; + + if (!list_empty(&sleb->nodes)) { + snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, + list); + + dbg_rcvry("dropping last node at %d:%d", + sleb->lnum, snod->offs); *offs = snod->offs; list_del(&snod->list); kfree(snod); sleb->nodes_cnt -= 1; - dropped = 1; } - return dropped; } /** @@ -555,7 +624,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) * @lnum: LEB number * @offs: offset * @sbuf: LEB-sized buffer to use - * @grouped: nodes may be grouped for recovery + * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not + * belong to any journal head) * * This function does a scan of a LEB, but caters for errors that might have * been caused by the unclean unmount from which we are attempting to recover. @@ -563,25 +633,21 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) * found, and a negative error code in case of failure. */ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, - int offs, void *sbuf, int grouped) + int offs, void *sbuf, int jhead) { - int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; - int empty_chkd = 0, start = offs; + int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; + int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped; struct ubifs_scan_leb *sleb; void *buf = sbuf + offs; - dbg_rcvry("%d:%d", lnum, offs); + dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped); sleb = ubifs_start_scan(c, lnum, offs, sbuf); if (IS_ERR(sleb)) return sleb; - if (sleb->ecc) - need_clean = 1; - + ubifs_assert(len >= 8); while (len >= 8) { - int ret; - dbg_scan("look at LEB %d:%d (%d bytes left)", lnum, offs, len); @@ -591,8 +657,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, * Scan quietly until there is an error from which we cannot * recover */ - ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); - + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); if (ret == SCANNED_A_NODE) { /* A valid node, and not a padding node */ struct ubifs_ch *ch = buf; @@ -605,104 +670,127 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, offs += node_len; buf += node_len; len -= node_len; - continue; - } - - if (ret > 0) { + } else if (ret > 0) { /* Padding bytes or a valid padding node */ offs += ret; buf += ret; len -= ret; - continue; - } - - if (ret == SCANNED_EMPTY_SPACE) { - if (!is_empty(buf, len)) { - if (!is_last_write(c, buf, offs)) - break; - clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; - } - empty_chkd = 1; + } else if (ret == SCANNED_EMPTY_SPACE || + ret == SCANNED_GARBAGE || + ret == SCANNED_A_BAD_PAD_NODE || + ret == SCANNED_A_CORRUPT_NODE) { + dbg_rcvry("found corruption (%d) at %d:%d", + ret, lnum, offs); break; - } - - if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) - if (is_last_write(c, buf, offs)) { - clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; - empty_chkd = 1; - break; - } - - if (ret == SCANNED_A_CORRUPT_NODE) - if (no_more_nodes(c, buf, len, lnum, offs)) { - clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; - empty_chkd = 1; - break; - } - - if (quiet) { - /* Redo the last scan but noisily */ - quiet = 0; - continue; - } - - switch (ret) { - case SCANNED_GARBAGE: - dbg_err("garbage"); - goto corrupted; - case SCANNED_A_CORRUPT_NODE: - case SCANNED_A_BAD_PAD_NODE: - dbg_err("bad node"); - goto corrupted; - default: - dbg_err("unknown"); + } else { + ubifs_err("unexpected return value %d", ret); err = -EINVAL; goto error; } } - if (!empty_chkd && !is_empty(buf, len)) { - if (is_last_write(c, buf, offs)) { - clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; - } else { + if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) { + if (!is_last_write(c, buf, offs)) + goto corrupted_rescan; + } else if (ret == SCANNED_A_CORRUPT_NODE) { + if (!no_more_nodes(c, buf, len, lnum, offs)) + goto corrupted_rescan; + } else if (!is_empty(buf, len)) { + if (!is_last_write(c, buf, offs)) { int corruption = first_non_ff(buf, len); - ubifs_err("corrupt empty space LEB %d:%d, corruption " - "starts at %d", lnum, offs, corruption); + /* + * See header comment for this file for more + * explanations about the reasons we have this check. + */ + ubifs_err("corrupt empty space LEB %d:%d, corruption starts at %d", + lnum, offs, corruption); /* Make sure we dump interesting non-0xFF data */ - offs = corruption; + offs += corruption; buf += corruption; goto corrupted; } } - /* Drop nodes from incomplete group */ - if (grouped && drop_incomplete_group(sleb, &offs)) { - buf = sbuf + offs; - len = c->leb_size - offs; - clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; - } + min_io_unit = round_down(offs, c->min_io_size); + if (grouped) + /* + * If nodes are grouped, always drop the incomplete group at + * the end. + */ + drop_last_group(sleb, &offs); - if (offs % c->min_io_size) { - clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; + if (jhead == GCHD) { + /* + * If this LEB belongs to the GC head then while we are in the + * middle of the same min. I/O unit keep dropping nodes. So + * basically, what we want is to make sure that the last min. + * I/O unit where we saw the corruption is dropped completely + * with all the uncorrupted nodes which may possibly sit there. + * + * In other words, let's name the min. I/O unit where the + * corruption starts B, and the previous min. I/O unit A. The + * below code tries to deal with a situation when half of B + * contains valid nodes or the end of a valid node, and the + * second half of B contains corrupted data or garbage. This + * means that UBIFS had been writing to B just before the power + * cut happened. I do not know how realistic is this scenario + * that half of the min. I/O unit had been written successfully + * and the other half not, but this is possible in our 'failure + * mode emulation' infrastructure at least. + * + * So what is the problem, why we need to drop those nodes? Why + * can't we just clean-up the second half of B by putting a + * padding node there? We can, and this works fine with one + * exception which was reproduced with power cut emulation + * testing and happens extremely rarely. + * + * Imagine the file-system is full, we run GC which starts + * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is + * the current GC head LEB). The @c->gc_lnum is -1, which means + * that GC will retain LEB X and will try to continue. Imagine + * that LEB X is currently the dirtiest LEB, and the amount of + * used space in LEB Y is exactly the same as amount of free + * space in LEB X. + * + * And a power cut happens when nodes are moved from LEB X to + * LEB Y. We are here trying to recover LEB Y which is the GC + * head LEB. We find the min. I/O unit B as described above. + * Then we clean-up LEB Y by padding min. I/O unit. And later + * 'ubifs_rcvry_gc_commit()' function fails, because it cannot + * find a dirty LEB which could be GC'd into LEB Y! Even LEB X + * does not match because the amount of valid nodes there does + * not fit the free space in LEB Y any more! And this is + * because of the padding node which we added to LEB Y. The + * user-visible effect of this which I once observed and + * analysed is that we cannot mount the file-system with + * -ENOSPC error. + * + * So obviously, to make sure that situation does not happen we + * should free min. I/O unit B in LEB Y completely and the last + * used min. I/O unit in LEB Y should be A. This is basically + * what the below code tries to do. + */ + while (offs > min_io_unit) + drop_last_node(sleb, &offs); } + buf = sbuf + offs; + len = c->leb_size - offs; + + clean_buf(c, &buf, lnum, &offs, &len); ubifs_end_scan(c, sleb, lnum, offs); - if (need_clean) { - err = fix_unclean_leb(c, sleb, start); - if (err) - goto error; - } + err = fix_unclean_leb(c, sleb, start); + if (err) + goto error; return sleb; +corrupted_rescan: + /* Re-scan the corrupted data with verbose messages */ + ubifs_err("corruption %d", ret); + ubifs_scan_a_node(c, buf, len, lnum, offs, 1); corrupted: ubifs_scanned_corruption(c, lnum, offs, buf); err = -EUCLEAN; @@ -733,22 +821,23 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, return -ENOMEM; if (c->leb_size - offs < UBIFS_CS_NODE_SZ) goto out_err; - err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ); + err = ubifs_leb_read(c, lnum, (void *)cs_node, offs, + UBIFS_CS_NODE_SZ, 0); if (err && err != -EBADMSG) goto out_free; ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); if (ret != SCANNED_A_NODE) { - dbg_err("Not a valid node"); + ubifs_err("Not a valid node"); goto out_err; } if (cs_node->ch.node_type != UBIFS_CS_NODE) { - dbg_err("Node a CS node, type is %d", cs_node->ch.node_type); + ubifs_err("Node a CS node, type is %d", cs_node->ch.node_type); goto out_err; } if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) { - dbg_err("CS node cmt_no %llu != current cmt_no %llu", - (unsigned long long)le64_to_cpu(cs_node->cmt_no), - c->cmt_no); + ubifs_err("CS node cmt_no %llu != current cmt_no %llu", + (unsigned long long)le64_to_cpu(cs_node->cmt_no), + c->cmt_no); goto out_err; } *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum); @@ -811,15 +900,15 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, } } if (snod->sqnum > cs_sqnum) { - ubifs_err("unrecoverable log corruption " - "in LEB %d", lnum); + ubifs_err("unrecoverable log corruption in LEB %d", + lnum); ubifs_scan_destroy(sleb); return ERR_PTR(-EUCLEAN); } } ubifs_scan_destroy(sleb); } - return ubifs_recover_leb(c, lnum, offs, sbuf, 0); + return ubifs_recover_leb(c, lnum, offs, sbuf, -1); } /** @@ -833,15 +922,10 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, * * This function returns %0 on success and a negative error code on failure. */ -static int recover_head(const struct ubifs_info *c, int lnum, int offs, - void *sbuf) +static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf) { - int len, err; + int len = c->max_write_size, err; - if (c->min_io_size > 1) - len = c->min_io_size; - else - len = 512; if (offs + len > c->leb_size) len = c->leb_size - offs; @@ -849,15 +933,15 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs, return 0; /* Read at the head location and check it is empty flash */ - err = ubi_read(c->ubi, lnum, sbuf, offs, len); + err = ubifs_leb_read(c, lnum, sbuf, offs, len, 1); if (err || !is_empty(sbuf, len)) { dbg_rcvry("cleaning head at %d:%d", lnum, offs); if (offs == 0) return ubifs_leb_unmap(c, lnum); - err = ubi_read(c->ubi, lnum, sbuf, 0, offs); + err = ubifs_leb_read(c, lnum, sbuf, 0, offs, 1); if (err) return err; - return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN); + return ubifs_leb_change(c, lnum, sbuf, offs); } return 0; @@ -880,7 +964,7 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs, * * This function returns %0 on success and a negative error code on failure. */ -int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) +int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf) { int err; @@ -900,7 +984,7 @@ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) } /** - * clean_an_unclean_leb - read and write a LEB to remove corruption. + * clean_an_unclean_leb - read and write a LEB to remove corruption. * @c: UBIFS file-system description object * @ucleb: unclean LEB information * @sbuf: LEB-sized buffer to use @@ -911,7 +995,7 @@ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) * * This function returns %0 on success and a negative error code on failure. */ -static int clean_an_unclean_leb(const struct ubifs_info *c, +static int clean_an_unclean_leb(struct ubifs_info *c, struct ubifs_unclean_leb *ucleb, void *sbuf) { int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1; @@ -927,7 +1011,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c, return 0; } - err = ubi_read(c->ubi, lnum, buf, offs, len); + err = ubifs_leb_read(c, lnum, buf, offs, len, 0); if (err && err != -EBADMSG) return err; @@ -987,7 +1071,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c, } /* Write back the LEB atomically */ - err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN); + err = ubifs_leb_change(c, lnum, sbuf, len); if (err) return err; @@ -1007,7 +1091,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c, * * This function returns %0 on success and a negative error code on failure. */ -int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) +int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf) { dbg_rcvry("recovery"); while (!list_empty(&c->unclean_leb_list)) { @@ -1026,6 +1110,53 @@ int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) } /** + * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit. + * @c: UBIFS file-system description object + * + * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty + * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns + * zero in case of success and a negative error code in case of failure. + */ +static int grab_empty_leb(struct ubifs_info *c) +{ + int lnum, err; + + /* + * Note, it is very important to first search for an empty LEB and then + * run the commit, not vice-versa. The reason is that there might be + * only one empty LEB at the moment, the one which has been the + * @c->gc_lnum just before the power cut happened. During the regular + * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no + * one but GC can grab it. But at this moment this single empty LEB is + * not marked as taken, so if we run commit - what happens? Right, the + * commit will grab it and write the index there. Remember that the + * index always expands as long as there is free space, and it only + * starts consolidating when we run out of space. + * + * IOW, if we run commit now, we might not be able to find a free LEB + * after this. + */ + lnum = ubifs_find_free_leb_for_idx(c); + if (lnum < 0) { + ubifs_err("could not find an empty LEB"); + ubifs_dump_lprops(c); + ubifs_dump_budg(c, &c->bi); + return lnum; + } + + /* Reset the index flag */ + err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, + LPROPS_INDEX, 0); + if (err) + return err; + + c->gc_lnum = lnum; + dbg_rcvry("found empty LEB %d, run commit", lnum); + + return ubifs_run_commit(c); +} + +/** * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. * @c: UBIFS file-system description object * @@ -1047,71 +1178,26 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) { struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; struct ubifs_lprops lp; - int lnum, err; + int err; + + dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs); c->gc_lnum = -1; - if (wbuf->lnum == -1) { - dbg_rcvry("no GC head LEB"); - goto find_free; - } - /* - * See whether the used space in the dirtiest LEB fits in the GC head - * LEB. - */ - if (wbuf->offs == c->leb_size) { - dbg_rcvry("no room in GC head LEB"); - goto find_free; - } + if (wbuf->lnum == -1 || wbuf->offs == c->leb_size) + return grab_empty_leb(c); + err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); if (err) { - /* - * There are no dirty or empty LEBs subject to here being - * enough for the index. Try to use - * 'ubifs_find_free_leb_for_idx()', which will return any empty - * LEBs (ignoring index requirements). If the index then - * doesn't have enough LEBs the recovery commit will fail - - * which is the same result anyway i.e. recovery fails. So - * there is no problem ignoring index requirements and just - * grabbing a free LEB since we have already established there - * is not a dirty LEB we could have used instead. - */ - if (err == -ENOSPC) { - dbg_rcvry("could not find a dirty LEB"); - goto find_free; - } - return err; - } - ubifs_assert(!(lp.flags & LPROPS_INDEX)); - lnum = lp.lnum; - if (lp.free + lp.dirty == c->leb_size) { - /* An empty LEB was returned */ - if (lp.free != c->leb_size) { - err = ubifs_change_one_lp(c, lnum, c->leb_size, - 0, 0, 0, 0); - if (err) - return err; - } - err = ubifs_leb_unmap(c, lnum); - if (err) - return err; - c->gc_lnum = lnum; - dbg_rcvry("allocated LEB %d for GC", lnum); - /* Run the commit */ - dbg_rcvry("committing"); - return ubifs_run_commit(c); - } - /* - * There was no empty LEB so the used space in the dirtiest LEB must fit - * in the GC head LEB. - */ - if (lp.free + lp.dirty < wbuf->offs) { - dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d", - lnum, wbuf->lnum, wbuf->offs); - err = ubifs_return_leb(c, lnum); - if (err) + if (err != -ENOSPC) return err; - goto find_free; + + dbg_rcvry("could not find a dirty LEB"); + return grab_empty_leb(c); } + + ubifs_assert(!(lp.flags & LPROPS_INDEX)); + ubifs_assert(lp.free + lp.dirty >= wbuf->offs); + /* * We run the commit before garbage collection otherwise subsequent * mounts will see the GC and orphan deletion in a different order. @@ -1120,11 +1206,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) err = ubifs_run_commit(c); if (err) return err; - /* - * The data in the dirtiest LEB fits in the GC head LEB, so do the GC - * - use locking to keep 'ubifs_assert()' happy. - */ - dbg_rcvry("GC'ing LEB %d", lnum); + + dbg_rcvry("GC'ing LEB %d", lp.lnum); mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); err = ubifs_garbage_collect_leb(c, &lp); if (err >= 0) { @@ -1135,42 +1218,22 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) } mutex_unlock(&wbuf->io_mutex); if (err < 0) { - dbg_err("GC failed, error %d", err); + ubifs_err("GC failed, error %d", err); if (err == -EAGAIN) err = -EINVAL; return err; } - if (err != LEB_RETAINED) { - dbg_err("GC returned %d", err); + + ubifs_assert(err == LEB_RETAINED); + if (err != LEB_RETAINED) return -EINVAL; - } + err = ubifs_leb_unmap(c, c->gc_lnum); if (err) return err; - dbg_rcvry("allocated LEB %d for GC", lnum); - return 0; -find_free: - /* - * There is no GC head LEB or the free space in the GC head LEB is too - * small, or there are not dirty LEBs. Allocate gc_lnum by calling - * 'ubifs_find_free_leb_for_idx()' so GC is not run. - */ - lnum = ubifs_find_free_leb_for_idx(c); - if (lnum < 0) { - dbg_err("could not find an empty LEB"); - return lnum; - } - /* And reset the index flag */ - err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, - LPROPS_INDEX, 0); - if (err) - return err; - c->gc_lnum = lnum; - dbg_rcvry("allocated LEB %d for GC", lnum); - /* Run the commit */ - dbg_rcvry("committing"); - return ubifs_run_commit(c); + dbg_rcvry("allocated LEB %d for GC", lp.lnum); + return 0; } /** @@ -1272,29 +1335,14 @@ static void remove_ino(struct ubifs_info *c, ino_t inum) */ void ubifs_destroy_size_tree(struct ubifs_info *c) { - struct rb_node *this = c->size_tree.rb_node; - struct size_entry *e; + struct size_entry *e, *n; - while (this) { - if (this->rb_left) { - this = this->rb_left; - continue; - } else if (this->rb_right) { - this = this->rb_right; - continue; - } - e = rb_entry(this, struct size_entry, rb); + rbtree_postorder_for_each_entry_safe(e, n, &c->size_tree, rb) { if (e->inode) iput(e->inode); - this = rb_parent(this); - if (this) { - if (this->rb_left == &e->rb) - this->rb_left = NULL; - else - this->rb_right = NULL; - } kfree(e); } + c->size_tree = RB_ROOT; } @@ -1393,7 +1441,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) if (i_size >= e->d_size) return 0; /* Read the LEB */ - err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size); + err = ubifs_leb_read(c, lnum, c->sbuf, 0, c->leb_size, 1); if (err) goto out; /* Change the size field and recalculate the CRC */ @@ -1409,10 +1457,10 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) len -= 1; len = ALIGN(len + 1, c->min_io_size); /* Atomically write the fixed LEB back again */ - err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); + err = ubifs_leb_change(c, lnum, c->sbuf, len); if (err) goto out; - dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", + dbg_rcvry("inode %lu at %d:%d size %lld -> %lld", (unsigned long)e->inum, lnum, offs, i_size, e->d_size); return 0; @@ -1461,20 +1509,27 @@ int ubifs_recover_size(struct ubifs_info *c) e->i_size = le64_to_cpu(ino->size); } } + if (e->exists && e->i_size < e->d_size) { - if (!e->inode && c->ro_mount) { + if (c->ro_mount) { /* Fix the inode size and pin it in memory */ struct inode *inode; + struct ubifs_inode *ui; + + ubifs_assert(!e->inode); inode = ubifs_iget(c->vfs_sb, e->inum); if (IS_ERR(inode)) return PTR_ERR(inode); + + ui = ubifs_inode(inode); if (inode->i_size < e->d_size) { dbg_rcvry("ino %lu size %lld -> %lld", (unsigned long)e->inum, - e->d_size, inode->i_size); + inode->i_size, e->d_size); inode->i_size = e->d_size; - ubifs_inode(inode)->ui_size = e->d_size; + ui->ui_size = e->d_size; + ui->synced_i_size = e->d_size; e->inode = inode; this = rb_next(this); continue; @@ -1489,9 +1544,11 @@ int ubifs_recover_size(struct ubifs_info *c) iput(e->inode); } } + this = rb_next(this); rb_erase(&e->rb, &c->size_tree); kfree(e); } + return 0; } diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index eed0fcff8d7..3187925e987 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -33,43 +33,32 @@ */ #include "ubifs.h" - -/* - * Replay flags. - * - * REPLAY_DELETION: node was deleted - * REPLAY_REF: node is a reference node - */ -enum { - REPLAY_DELETION = 1, - REPLAY_REF = 2, -}; +#include <linux/list_sort.h> /** - * struct replay_entry - replay tree entry. + * struct replay_entry - replay list entry. * @lnum: logical eraseblock number of the node * @offs: node offset * @len: node length + * @deletion: non-zero if this entry corresponds to a node deletion * @sqnum: node sequence number - * @flags: replay flags - * @rb: links the replay tree + * @list: links the replay list * @key: node key * @nm: directory entry name * @old_size: truncation old size * @new_size: truncation new size - * @free: amount of free space in a bud - * @dirty: amount of dirty space in a bud from padding and deletion nodes * - * UBIFS journal replay must compare node sequence numbers, which means it must - * build a tree of node information to insert into the TNC. + * The replay process first scans all buds and builds the replay list, then + * sorts the replay list in nodes sequence number order, and then inserts all + * the replay entries to the TNC. */ struct replay_entry { int lnum; int offs; int len; + unsigned int deletion:1; unsigned long long sqnum; - int flags; - struct rb_node rb; + struct list_head list; union ubifs_key key; union { struct qstr nm; @@ -77,10 +66,6 @@ struct replay_entry { loff_t old_size; loff_t new_size; }; - struct { - int free; - int dirty; - }; }; }; @@ -88,57 +73,64 @@ struct replay_entry { * struct bud_entry - entry in the list of buds to replay. * @list: next bud in the list * @bud: bud description object - * @free: free bytes in the bud * @sqnum: reference node sequence number + * @free: free bytes in the bud + * @dirty: dirty bytes in the bud */ struct bud_entry { struct list_head list; struct ubifs_bud *bud; - int free; unsigned long long sqnum; + int free; + int dirty; }; /** * set_bud_lprops - set free and dirty space used by a bud. * @c: UBIFS file-system description object - * @r: replay entry of bud + * @b: bud entry which describes the bud + * + * This function makes sure the LEB properties of bud @b are set correctly + * after the replay. Returns zero in case of success and a negative error code + * in case of failure. */ -static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) +static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) { const struct ubifs_lprops *lp; int err = 0, dirty; ubifs_get_lprops(c); - lp = ubifs_lpt_lookup_dirty(c, r->lnum); + lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum); if (IS_ERR(lp)) { err = PTR_ERR(lp); goto out; } dirty = lp->dirty; - if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { + if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { /* * The LEB was added to the journal with a starting offset of * zero which means the LEB must have been empty. The LEB - * property values should be lp->free == c->leb_size and - * lp->dirty == 0, but that is not the case. The reason is that - * the LEB was garbage collected. The garbage collector resets - * the free and dirty space without recording it anywhere except - * lprops, so if there is not a commit then lprops does not have - * that information next time the file system is mounted. + * property values should be @lp->free == @c->leb_size and + * @lp->dirty == 0, but that is not the case. The reason is that + * the LEB had been garbage collected before it became the bud, + * and there was not commit inbetween. The garbage collector + * resets the free and dirty space without recording it + * anywhere except lprops, so if there was no commit then + * lprops does not have that information. * * We do not need to adjust free space because the scan has told * us the exact value which is recorded in the replay entry as - * r->free. + * @b->free. * * However we do need to subtract from the dirty space the * amount of space that the garbage collector reclaimed, which * is the whole LEB minus the amount of space that was free. */ - dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, + dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, lp->free, lp->dirty); - dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, + dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, lp->free, lp->dirty); dirty -= c->leb_size - lp->free; /* @@ -149,22 +141,48 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) * during the replay. */ if (dirty != 0) - dbg_msg("LEB %d lp: %d free %d dirty " - "replay: %d free %d dirty", r->lnum, lp->free, - lp->dirty, r->free, r->dirty); + dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty", + b->bud->lnum, lp->free, lp->dirty, b->free, + b->dirty); } - lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, + lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, lp->flags | LPROPS_TAKEN, 0); if (IS_ERR(lp)) { err = PTR_ERR(lp); goto out; } + + /* Make sure the journal head points to the latest bud */ + err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf, + b->bud->lnum, c->leb_size - b->free); + out: ubifs_release_lprops(c); return err; } /** + * set_buds_lprops - set free and dirty space for all replayed buds. + * @c: UBIFS file-system description object + * + * This function sets LEB properties for all replayed buds. Returns zero in + * case of success and a negative error code in case of failure. + */ +static int set_buds_lprops(struct ubifs_info *c) +{ + struct bud_entry *b; + int err; + + list_for_each_entry(b, &c->replay_buds, list) { + err = set_bud_lprops(c, b); + if (err) + return err; + } + + return 0; +} + +/** * trun_remove_range - apply a replay entry for a truncation to the TNC. * @c: UBIFS file-system description object * @r: replay entry of truncation @@ -200,24 +218,22 @@ static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) */ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) { - int err, deletion = ((r->flags & REPLAY_DELETION) != 0); + int err; - dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, - r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); + dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ", + r->lnum, r->offs, r->len, r->deletion, r->sqnum); /* Set c->replay_sqnum to help deal with dangling branches. */ c->replay_sqnum = r->sqnum; - if (r->flags & REPLAY_REF) - err = set_bud_lprops(c, r); - else if (is_hash_key(c, &r->key)) { - if (deletion) + if (is_hash_key(c, &r->key)) { + if (r->deletion) err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); else err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, r->len, &r->nm); } else { - if (deletion) + if (r->deletion) switch (key_type(c, &r->key)) { case UBIFS_INO_KEY: { @@ -240,7 +256,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) return err; if (c->need_recovery) - err = ubifs_recover_size_accum(c, &r->key, deletion, + err = ubifs_recover_size_accum(c, &r->key, r->deletion, r->new_size); } @@ -248,68 +264,77 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) } /** - * destroy_replay_tree - destroy the replay. - * @c: UBIFS file-system description object + * replay_entries_cmp - compare 2 replay entries. + * @priv: UBIFS file-system description object + * @a: first replay entry + * @a: second replay entry * - * Destroy the replay tree. + * This is a comparios function for 'list_sort()' which compares 2 replay + * entries @a and @b by comparing their sequence numer. Returns %1 if @a has + * greater sequence number and %-1 otherwise. */ -static void destroy_replay_tree(struct ubifs_info *c) +static int replay_entries_cmp(void *priv, struct list_head *a, + struct list_head *b) { - struct rb_node *this = c->replay_tree.rb_node; - struct replay_entry *r; - - while (this) { - if (this->rb_left) { - this = this->rb_left; - continue; - } else if (this->rb_right) { - this = this->rb_right; - continue; - } - r = rb_entry(this, struct replay_entry, rb); - this = rb_parent(this); - if (this) { - if (this->rb_left == &r->rb) - this->rb_left = NULL; - else - this->rb_right = NULL; - } - if (is_hash_key(c, &r->key)) - kfree(r->nm.name); - kfree(r); - } - c->replay_tree = RB_ROOT; + struct replay_entry *ra, *rb; + + cond_resched(); + if (a == b) + return 0; + + ra = list_entry(a, struct replay_entry, list); + rb = list_entry(b, struct replay_entry, list); + ubifs_assert(ra->sqnum != rb->sqnum); + if (ra->sqnum > rb->sqnum) + return 1; + return -1; } /** - * apply_replay_tree - apply the replay tree to the TNC. + * apply_replay_list - apply the replay list to the TNC. * @c: UBIFS file-system description object * - * Apply the replay tree. - * Returns zero in case of success and a negative error code in case of - * failure. + * Apply all entries in the replay list to the TNC. Returns zero in case of + * success and a negative error code in case of failure. */ -static int apply_replay_tree(struct ubifs_info *c) +static int apply_replay_list(struct ubifs_info *c) { - struct rb_node *this = rb_first(&c->replay_tree); + struct replay_entry *r; + int err; - while (this) { - struct replay_entry *r; - int err; + list_sort(c, &c->replay_list, &replay_entries_cmp); + list_for_each_entry(r, &c->replay_list, list) { cond_resched(); - r = rb_entry(this, struct replay_entry, rb); err = apply_replay_entry(c, r); if (err) return err; - this = rb_next(this); } + return 0; } /** - * insert_node - insert a node to the replay tree. + * destroy_replay_list - destroy the replay. + * @c: UBIFS file-system description object + * + * Destroy the replay list. + */ +static void destroy_replay_list(struct ubifs_info *c) +{ + struct replay_entry *r, *tmp; + + list_for_each_entry_safe(r, tmp, &c->replay_list, list) { + if (is_hash_key(c, &r->key)) + kfree(r->nm.name); + list_del(&r->list); + kfree(r); + } +} + +/** + * insert_node - insert a node to the replay list * @c: UBIFS file-system description object * @lnum: node logical eraseblock number * @offs: node offset @@ -321,39 +346,25 @@ static int apply_replay_tree(struct ubifs_info *c) * @old_size: truncation old size * @new_size: truncation new size * - * This function inserts a scanned non-direntry node to the replay tree. The - * replay tree is an RB-tree containing @struct replay_entry elements which are - * indexed by the sequence number. The replay tree is applied at the very end - * of the replay process. Since the tree is sorted in sequence number order, - * the older modifications are applied first. This function returns zero in - * case of success and a negative error code in case of failure. + * This function inserts a scanned non-direntry node to the replay list. The + * replay list contains @struct replay_entry elements, and we sort this list in + * sequence number order before applying it. The replay list is applied at the + * very end of the replay process. Since the list is sorted in sequence number + * order, the older modifications are applied first. This function returns zero + * in case of success and a negative error code in case of failure. */ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, union ubifs_key *key, unsigned long long sqnum, int deletion, int *used, loff_t old_size, loff_t new_size) { - struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; struct replay_entry *r; + dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); + if (key_inum(c, key) >= c->highest_inum) c->highest_inum = key_inum(c, key); - dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); - while (*p) { - parent = *p; - r = rb_entry(parent, struct replay_entry, rb); - if (sqnum < r->sqnum) { - p = &(*p)->rb_left; - continue; - } else if (sqnum > r->sqnum) { - p = &(*p)->rb_right; - continue; - } - ubifs_err("duplicate sqnum in replay"); - return -EINVAL; - } - r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); if (!r) return -ENOMEM; @@ -363,19 +374,18 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, r->lnum = lnum; r->offs = offs; r->len = len; + r->deletion = !!deletion; r->sqnum = sqnum; - r->flags = (deletion ? REPLAY_DELETION : 0); + key_copy(c, key, &r->key); r->old_size = old_size; r->new_size = new_size; - key_copy(c, key, &r->key); - rb_link_node(&r->rb, parent, p); - rb_insert_color(&r->rb, &c->replay_tree); + list_add_tail(&r->list, &c->replay_list); return 0; } /** - * insert_dent - insert a directory entry node into the replay tree. + * insert_dent - insert a directory entry node into the replay list. * @c: UBIFS file-system description object * @lnum: node logical eraseblock number * @offs: node offset @@ -387,43 +397,25 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, * @deletion: non-zero if this is a deletion * @used: number of bytes in use in a LEB * - * This function inserts a scanned directory entry node to the replay tree. - * Returns zero in case of success and a negative error code in case of - * failure. - * - * This function is also used for extended attribute entries because they are - * implemented as directory entry nodes. + * This function inserts a scanned directory entry node or an extended + * attribute entry to the replay list. Returns zero in case of success and a + * negative error code in case of failure. */ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, union ubifs_key *key, const char *name, int nlen, unsigned long long sqnum, int deletion, int *used) { - struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; struct replay_entry *r; char *nbuf; + dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); if (key_inum(c, key) >= c->highest_inum) c->highest_inum = key_inum(c, key); - dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); - while (*p) { - parent = *p; - r = rb_entry(parent, struct replay_entry, rb); - if (sqnum < r->sqnum) { - p = &(*p)->rb_left; - continue; - } - if (sqnum > r->sqnum) { - p = &(*p)->rb_right; - continue; - } - ubifs_err("duplicate sqnum in replay"); - return -EINVAL; - } - r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); if (!r) return -ENOMEM; + nbuf = kmalloc(nlen + 1, GFP_KERNEL); if (!nbuf) { kfree(r); @@ -435,17 +427,15 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, r->lnum = lnum; r->offs = offs; r->len = len; + r->deletion = !!deletion; r->sqnum = sqnum; + key_copy(c, key, &r->key); r->nm.len = nlen; memcpy(nbuf, name, nlen); nbuf[nlen] = '\0'; r->nm.name = nbuf; - r->flags = (deletion ? REPLAY_DELETION : 0); - key_copy(c, key, &r->key); - ubifs_assert(!*p); - rb_link_node(&r->rb, parent, p); - rb_insert_color(&r->rb, &c->replay_tree); + list_add_tail(&r->list, &c->replay_list); return 0; } @@ -482,29 +472,90 @@ int ubifs_validate_entry(struct ubifs_info *c, } /** + * is_last_bud - check if the bud is the last in the journal head. + * @c: UBIFS file-system description object + * @bud: bud description object + * + * This function checks if bud @bud is the last bud in its journal head. This + * information is then used by 'replay_bud()' to decide whether the bud can + * have corruptions or not. Indeed, only last buds can be corrupted by power + * cuts. Returns %1 if this is the last bud, and %0 if not. + */ +static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) +{ + struct ubifs_jhead *jh = &c->jheads[bud->jhead]; + struct ubifs_bud *next; + uint32_t data; + int err; + + if (list_is_last(&bud->list, &jh->buds_list)) + return 1; + + /* + * The following is a quirk to make sure we work correctly with UBIFS + * images used with older UBIFS. + * + * Normally, the last bud will be the last in the journal head's list + * of bud. However, there is one exception if the UBIFS image belongs + * to older UBIFS. This is fairly unlikely: one would need to use old + * UBIFS, then have a power cut exactly at the right point, and then + * try to mount this image with new UBIFS. + * + * The exception is: it is possible to have 2 buds A and B, A goes + * before B, and B is the last, bud B is contains no data, and bud A is + * corrupted at the end. The reason is that in older versions when the + * journal code switched the next bud (from A to B), it first added a + * log reference node for the new bud (B), and only after this it + * synchronized the write-buffer of current bud (A). But later this was + * changed and UBIFS started to always synchronize the write-buffer of + * the bud (A) before writing the log reference for the new bud (B). + * + * But because older UBIFS always synchronized A's write-buffer before + * writing to B, we can recognize this exceptional situation but + * checking the contents of bud B - if it is empty, then A can be + * treated as the last and we can recover it. + * + * TODO: remove this piece of code in a couple of years (today it is + * 16.05.2011). + */ + next = list_entry(bud->list.next, struct ubifs_bud, list); + if (!list_is_last(&next->list, &jh->buds_list)) + return 0; + + err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1); + if (err) + return 0; + + return data == 0xFFFFFFFF; +} + +/** * replay_bud - replay a bud logical eraseblock. * @c: UBIFS file-system description object - * @lnum: bud logical eraseblock number to replay - * @offs: bud start offset - * @jhead: journal head to which this bud belongs - * @free: amount of free space in the bud is returned here - * @dirty: amount of dirty space from padding and deletion nodes is returned - * here + * @b: bud entry which describes the bud * - * This function returns zero in case of success and a negative error code in - * case of failure. + * This function replays bud @bud, recovers it if needed, and adds all nodes + * from this bud to the replay list. Returns zero in case of success and a + * negative error code in case of failure. */ -static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, - int *free, int *dirty) +static int replay_bud(struct ubifs_info *c, struct bud_entry *b) { - int err = 0, used = 0; + int is_last = is_last_bud(c, b->bud); + int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start; struct ubifs_scan_leb *sleb; struct ubifs_scan_node *snod; - struct ubifs_bud *bud; - dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); - if (c->need_recovery) - sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); + dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d", + lnum, b->bud->jhead, offs, is_last); + + if (c->need_recovery && is_last) + /* + * Recover only last LEBs in the journal heads, because power + * cuts may cause corruptions only in these LEBs, because only + * these LEBs could possibly be written to at the power cut + * time. + */ + sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); else sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); if (IS_ERR(sleb)) @@ -620,19 +671,14 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, goto out; } - bud = ubifs_search_bud(c, lnum); - if (!bud) - BUG(); - + ubifs_assert(ubifs_search_bud(c, lnum)); ubifs_assert(sleb->endpt - offs >= used); ubifs_assert(sleb->endpt % c->min_io_size == 0); - if (sleb->endpt + c->min_io_size <= c->leb_size && !c->ro_mount) - err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum, - sleb->endpt, UBI_SHORTTERM); - - *dirty = sleb->endpt - offs - used; - *free = c->leb_size - sleb->endpt; + b->dirty = sleb->endpt - offs - used; + b->free = c->leb_size - sleb->endpt; + dbg_mnt("bud LEB %d replied: dirty %d, free %d", + lnum, b->dirty, b->free); out: ubifs_scan_destroy(sleb); @@ -640,61 +686,12 @@ out: out_dump: ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs); - dbg_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node); ubifs_scan_destroy(sleb); return -EINVAL; } /** - * insert_ref_node - insert a reference node to the replay tree. - * @c: UBIFS file-system description object - * @lnum: node logical eraseblock number - * @offs: node offset - * @sqnum: sequence number - * @free: amount of free space in bud - * @dirty: amount of dirty space from padding and deletion nodes - * - * This function inserts a reference node to the replay tree and returns zero - * in case of success or a negative error code in case of failure. - */ -static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, - unsigned long long sqnum, int free, int dirty) -{ - struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; - struct replay_entry *r; - - dbg_mnt("add ref LEB %d:%d", lnum, offs); - while (*p) { - parent = *p; - r = rb_entry(parent, struct replay_entry, rb); - if (sqnum < r->sqnum) { - p = &(*p)->rb_left; - continue; - } else if (sqnum > r->sqnum) { - p = &(*p)->rb_right; - continue; - } - ubifs_err("duplicate sqnum in replay tree"); - return -EINVAL; - } - - r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); - if (!r) - return -ENOMEM; - - r->lnum = lnum; - r->offs = offs; - r->sqnum = sqnum; - r->flags = REPLAY_REF; - r->free = free; - r->dirty = dirty; - - rb_link_node(&r->rb, parent, p); - rb_insert_color(&r->rb, &c->replay_tree); - return 0; -} - -/** * replay_buds - replay all buds. * @c: UBIFS file-system description object * @@ -704,17 +701,16 @@ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, static int replay_buds(struct ubifs_info *c) { struct bud_entry *b; - int err, uninitialized_var(free), uninitialized_var(dirty); + int err; + unsigned long long prev_sqnum = 0; list_for_each_entry(b, &c->replay_buds, list) { - err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, - &free, &dirty); - if (err) - return err; - err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, - free, dirty); + err = replay_bud(c, b); if (err) return err; + + ubifs_assert(b->sqnum > prev_sqnum); + prev_sqnum = b->sqnum; } return 0; @@ -865,16 +861,15 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) * numbers. */ if (snod->type != UBIFS_CS_NODE) { - dbg_err("first log node at LEB %d:%d is not CS node", - lnum, offs); + ubifs_err("first log node at LEB %d:%d is not CS node", + lnum, offs); goto out_dump; } if (le64_to_cpu(node->cmt_no) != c->cmt_no) { - dbg_err("first CS node at LEB %d:%d has wrong " - "commit number %llu expected %llu", - lnum, offs, - (unsigned long long)le64_to_cpu(node->cmt_no), - c->cmt_no); + ubifs_err("first CS node at LEB %d:%d has wrong commit number %llu expected %llu", + lnum, offs, + (unsigned long long)le64_to_cpu(node->cmt_no), + c->cmt_no); goto out_dump; } @@ -896,7 +891,7 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) /* Make sure the first node sits at offset zero of the LEB */ if (snod->offs != 0) { - dbg_err("first node is not at zero offset"); + ubifs_err("first node is not at zero offset"); goto out_dump; } @@ -909,8 +904,8 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) } if (snod->sqnum < c->cs_sqnum) { - dbg_err("bad sqnum %llu, commit sqnum %llu", - snod->sqnum, c->cs_sqnum); + ubifs_err("bad sqnum %llu, commit sqnum %llu", + snod->sqnum, c->cs_sqnum); goto out_dump; } @@ -962,7 +957,7 @@ out: out_dump: ubifs_err("log error detected while replaying the log at LEB %d:%d", lnum, offs + snod->offs); - dbg_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node); ubifs_scan_destroy(sleb); return -EINVAL; } @@ -1012,7 +1007,7 @@ out: */ int ubifs_replay_journal(struct ubifs_info *c) { - int err, i, lnum, offs, free; + int err, lnum, free; BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); @@ -1030,49 +1025,44 @@ int ubifs_replay_journal(struct ubifs_info *c) dbg_mnt("start replaying the journal"); c->replaying = 1; lnum = c->ltail_lnum = c->lhead_lnum; - offs = c->lhead_offs; - for (i = 0; i < c->log_lebs; i++, lnum++) { - if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) { - /* - * The log is logically circular, we reached the last - * LEB, switch to the first one. - */ - lnum = UBIFS_LOG_LNUM; - offs = 0; - } - err = replay_log_leb(c, lnum, offs, c->sbuf); + do { + err = replay_log_leb(c, lnum, 0, c->sbuf); if (err == 1) /* We hit the end of the log */ break; if (err) goto out; - offs = 0; - } + lnum = ubifs_next_log_lnum(c, lnum); + } while (lnum != c->ltail_lnum); err = replay_buds(c); if (err) goto out; - err = apply_replay_tree(c); + err = apply_replay_list(c); + if (err) + goto out; + + err = set_buds_lprops(c); if (err) goto out; /* - * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable - * to roughly estimate index growth. Things like @c->min_idx_lebs + * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable + * to roughly estimate index growth. Things like @c->bi.min_idx_lebs * depend on it. This means we have to initialize it to make sure * budgeting works properly. */ - c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); - c->budg_uncommitted_idx *= c->max_idx_node_sz; + c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); + c->bi.uncommitted_idx *= c->max_idx_node_sz; ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); - dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " - "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, + dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu", + c->lhead_lnum, c->lhead_offs, c->max_sqnum, (unsigned long)c->highest_inum); out: - destroy_replay_tree(c); + destroy_replay_list(c); destroy_bud_list(c); c->replaying = 0; return err; diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index bf31b4729e5..4c37607a958 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -130,7 +130,6 @@ static int create_default_filesystem(struct ubifs_info *c) * orphan node. */ orph_lebs = UBIFS_MIN_ORPH_LEBS; -#ifdef CONFIG_UBIFS_FS_DEBUG if (c->leb_cnt - min_leb_cnt > 1) /* * For debugging purposes it is better to have at least 2 @@ -138,7 +137,6 @@ static int create_default_filesystem(struct ubifs_info *c) * consolidations and would be stressed more. */ orph_lebs += 1; -#endif main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - log_lebs; main_lebs -= orph_lebs; @@ -196,7 +194,7 @@ static int create_default_filesystem(struct ubifs_info *c) sup->rp_size = cpu_to_le64(tmp64); sup->ro_compat_version = cpu_to_le32(UBIFS_RO_COMPAT_VERSION); - err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM); + err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0); kfree(sup); if (err) return err; @@ -247,19 +245,18 @@ static int create_default_filesystem(struct ubifs_info *c) mst->total_dirty = cpu_to_le64(tmp64); /* The indexing LEB does not contribute to dark space */ - tmp64 = (c->main_lebs - 1) * c->dark_wm; + tmp64 = ((long long)(c->main_lebs - 1) * c->dark_wm); mst->total_dark = cpu_to_le64(tmp64); mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ); - err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0, - UBI_UNKNOWN); + err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0); if (err) { kfree(mst); return err; } - err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1, 0, - UBI_UNKNOWN); + err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1, + 0); kfree(mst); if (err) return err; @@ -282,8 +279,7 @@ static int create_default_filesystem(struct ubifs_info *c) key_write_idx(c, &key, &br->key); br->lnum = cpu_to_le32(main_first + DEFAULT_DATA_LEB); br->len = cpu_to_le32(UBIFS_INO_NODE_SZ); - err = ubifs_write_node(c, idx, tmp, main_first + DEFAULT_IDX_LEB, 0, - UBI_UNKNOWN); + err = ubifs_write_node(c, idx, tmp, main_first + DEFAULT_IDX_LEB, 0); kfree(idx); if (err) return err; @@ -315,8 +311,7 @@ static int create_default_filesystem(struct ubifs_info *c) ino->flags = cpu_to_le32(UBIFS_COMPR_FL); err = ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ, - main_first + DEFAULT_DATA_LEB, 0, - UBI_UNKNOWN); + main_first + DEFAULT_DATA_LEB, 0); kfree(ino); if (err) return err; @@ -335,8 +330,7 @@ static int create_default_filesystem(struct ubifs_info *c) return -ENOMEM; cs->ch.node_type = UBIFS_CS_NODE; - err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM, - 0, UBI_UNKNOWN); + err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM, 0); kfree(cs); ubifs_msg("default file-system created"); @@ -397,9 +391,8 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6; if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) { - ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, " - "%d minimum required", c->leb_cnt, c->vi.size, - min_leb_cnt); + ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, %d minimum required", + c->leb_cnt, c->vi.size, min_leb_cnt); goto failed; } @@ -410,13 +403,22 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) } if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) { - err = 7; + ubifs_err("too few main LEBs count %d, must be at least %d", + c->main_lebs, UBIFS_MIN_MAIN_LEBS); goto failed; } - if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS || - c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) { - err = 8; + max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS; + if (c->max_bud_bytes < max_bytes) { + ubifs_err("too small journal (%lld bytes), must be at least %lld bytes", + c->max_bud_bytes, max_bytes); + goto failed; + } + + max_bytes = (long long)c->leb_size * c->main_lebs; + if (c->max_bud_bytes > max_bytes) { + ubifs_err("too large journal size (%lld bytes), only %lld bytes available in the main area", + c->max_bud_bytes, max_bytes); goto failed; } @@ -450,7 +452,6 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) goto failed; } - max_bytes = c->main_lebs * (long long)c->leb_size; if (c->rp_size < 0 || max_bytes < c->rp_size) { err = 14; goto failed; @@ -466,7 +467,7 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) failed: ubifs_err("bad superblock, error %d", err); - dbg_dump_node(c, sup); + ubifs_dump_node(c, sup); return -EINVAL; } @@ -475,7 +476,8 @@ failed: * @c: UBIFS file-system description object * * This function returns a pointer to the superblock node or a negative error - * code. + * code. Note, the user of this function is responsible of kfree()'ing the + * returned superblock buffer. */ struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) { @@ -508,7 +510,7 @@ int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup) int len = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size); ubifs_prepare_node(c, sup, UBIFS_SB_NODE_SZ, 1); - return ubifs_leb_change(c, UBIFS_SB_LNUM, sup, len, UBI_LONGTERM); + return ubifs_leb_change(c, UBIFS_SB_LNUM, sup, len); } /** @@ -545,10 +547,9 @@ int ubifs_read_superblock(struct ubifs_info *c) ubifs_assert(!c->ro_media || c->ro_mount); if (!c->ro_mount || c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { - ubifs_err("on-flash format version is w%d/r%d, but " - "software only supports up to version " - "w%d/r%d", c->fmt_version, - c->ro_compat_version, UBIFS_FORMAT_VERSION, + ubifs_err("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d", + c->fmt_version, c->ro_compat_version, + UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) { ubifs_msg("only R/O mounting is possible"); @@ -607,8 +608,8 @@ int ubifs_read_superblock(struct ubifs_info *c) c->fanout = le32_to_cpu(sup->fanout); c->lsave_cnt = le32_to_cpu(sup->lsave_cnt); c->rp_size = le64_to_cpu(sup->rp_size); - c->rp_uid = le32_to_cpu(sup->rp_uid); - c->rp_gid = le32_to_cpu(sup->rp_gid); + c->rp_uid = make_kuid(&init_user_ns, le32_to_cpu(sup->rp_uid)); + c->rp_gid = make_kgid(&init_user_ns, le32_to_cpu(sup->rp_gid)); sup_flags = le32_to_cpu(sup->flags); if (!c->mount_opts.override_compr) c->default_compr = le16_to_cpu(sup->default_compr); @@ -616,6 +617,7 @@ int ubifs_read_superblock(struct ubifs_info *c) c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); memcpy(&c->uuid, &sup->uuid, 16); c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); + c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP); /* Automatically increase file system size to the maximum size */ c->old_leb_cnt = c->leb_cnt; @@ -650,3 +652,156 @@ out: kfree(sup); return err; } + +/** + * fixup_leb - fixup/unmap an LEB containing free space. + * @c: UBIFS file-system description object + * @lnum: the LEB number to fix up + * @len: number of used bytes in LEB (starting at offset 0) + * + * This function reads the contents of the given LEB number @lnum, then fixes + * it up, so that empty min. I/O units in the end of LEB are actually erased on + * flash (rather than being just all-0xff real data). If the LEB is completely + * empty, it is simply unmapped. + */ +static int fixup_leb(struct ubifs_info *c, int lnum, int len) +{ + int err; + + ubifs_assert(len >= 0); + ubifs_assert(len % c->min_io_size == 0); + ubifs_assert(len < c->leb_size); + + if (len == 0) { + dbg_mnt("unmap empty LEB %d", lnum); + return ubifs_leb_unmap(c, lnum); + } + + dbg_mnt("fixup LEB %d, data len %d", lnum, len); + err = ubifs_leb_read(c, lnum, c->sbuf, 0, len, 1); + if (err) + return err; + + return ubifs_leb_change(c, lnum, c->sbuf, len); +} + +/** + * fixup_free_space - find & remap all LEBs containing free space. + * @c: UBIFS file-system description object + * + * This function walks through all LEBs in the filesystem and fiexes up those + * containing free/empty space. + */ +static int fixup_free_space(struct ubifs_info *c) +{ + int lnum, err = 0; + struct ubifs_lprops *lprops; + + ubifs_get_lprops(c); + + /* Fixup LEBs in the master area */ + for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) { + err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz); + if (err) + goto out; + } + + /* Unmap unused log LEBs */ + lnum = ubifs_next_log_lnum(c, c->lhead_lnum); + while (lnum != c->ltail_lnum) { + err = fixup_leb(c, lnum, 0); + if (err) + goto out; + lnum = ubifs_next_log_lnum(c, lnum); + } + + /* + * Fixup the log head which contains the only a CS node at the + * beginning. + */ + err = fixup_leb(c, c->lhead_lnum, + ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size)); + if (err) + goto out; + + /* Fixup LEBs in the LPT area */ + for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) { + int free = c->ltab[lnum - c->lpt_first].free; + + if (free > 0) { + err = fixup_leb(c, lnum, c->leb_size - free); + if (err) + goto out; + } + } + + /* Unmap LEBs in the orphans area */ + for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { + err = fixup_leb(c, lnum, 0); + if (err) + goto out; + } + + /* Fixup LEBs in the main area */ + for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { + lprops = ubifs_lpt_lookup(c, lnum); + if (IS_ERR(lprops)) { + err = PTR_ERR(lprops); + goto out; + } + + if (lprops->free > 0) { + err = fixup_leb(c, lnum, c->leb_size - lprops->free); + if (err) + goto out; + } + } + +out: + ubifs_release_lprops(c); + return err; +} + +/** + * ubifs_fixup_free_space - find & fix all LEBs with free space. + * @c: UBIFS file-system description object + * + * This function fixes up LEBs containing free space on first mount, if the + * appropriate flag was set when the FS was created. Each LEB with one or more + * empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure + * the free space is actually erased. E.g., this is necessary for some NAND + * chips, since the free space may have been programmed like real "0xff" data + * (generating a non-0xff ECC), causing future writes to the not-really-erased + * NAND pages to behave badly. After the space is fixed up, the superblock flag + * is cleared, so that this is skipped for all future mounts. + */ +int ubifs_fixup_free_space(struct ubifs_info *c) +{ + int err; + struct ubifs_sb_node *sup; + + ubifs_assert(c->space_fixup); + ubifs_assert(!c->ro_mount); + + ubifs_msg("start fixing up free space"); + + err = fixup_free_space(c); + if (err) + return err; + + sup = ubifs_read_sb_node(c); + if (IS_ERR(sup)) + return PTR_ERR(sup); + + /* Free-space fixup is no longer required */ + c->space_fixup = 0; + sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP); + + err = ubifs_write_sb_node(c, sup); + kfree(sup); + if (err) + return err; + + ubifs_msg("free space fixup complete"); + return err; +} diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index 3e1ee57dbea..58aa05df2bb 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -75,7 +75,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, magic = le32_to_cpu(ch->magic); if (magic == 0xFFFFFFFF) { - dbg_scan("hit empty space"); + dbg_scan("hit empty space at LEB %d:%d", lnum, offs); return SCANNED_EMPTY_SPACE; } @@ -85,7 +85,8 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, if (len < UBIFS_CH_SZ) return SCANNED_GARBAGE; - dbg_scan("scanning %s", dbg_ntype(ch->node_type)); + dbg_scan("scanning %s at LEB %d:%d", + dbg_ntype(ch->node_type), lnum, offs); if (ubifs_check_node(c, buf, lnum, offs, quiet, 1)) return SCANNED_A_CORRUPT_NODE; @@ -101,7 +102,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, if (!quiet) { ubifs_err("bad pad node at LEB %d:%d", lnum, offs); - dbg_dump_node(c, pad); + ubifs_dump_node(c, pad); } return SCANNED_A_BAD_PAD_NODE; } @@ -109,13 +110,13 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, /* Make the node pads to 8-byte boundary */ if ((node_len + pad_len) & 7) { if (!quiet) - dbg_err("bad padding length %d - %d", - offs, offs + node_len + pad_len); + ubifs_err("bad padding length %d - %d", + offs, offs + node_len + pad_len); return SCANNED_A_BAD_PAD_NODE; } - dbg_scan("%d bytes padded, offset now %d", - pad_len, ALIGN(offs + node_len + pad_len, 8)); + dbg_scan("%d bytes padded at LEB %d:%d, offset now %d", pad_len, + lnum, offs, ALIGN(offs + node_len + pad_len, 8)); return node_len + pad_len; } @@ -148,10 +149,10 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, INIT_LIST_HEAD(&sleb->nodes); sleb->buf = sbuf; - err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs); + err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0); if (err && err != -EBADMSG) { - ubifs_err("cannot read %d bytes from LEB %d:%d," - " error %d", c->leb_size - offs, lnum, offs, err); + ubifs_err("cannot read %d bytes from LEB %d:%d, error %d", + c->leb_size - offs, lnum, offs, err); kfree(sleb); return ERR_PTR(err); } @@ -240,12 +241,10 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, int len; ubifs_err("corruption at LEB %d:%d", lnum, offs); - if (dbg_failure_mode) - return; len = c->leb_size - offs; if (len > 8192) len = 8192; - dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs); + ubifs_err("first %d bytes from LEB %d:%d", len, lnum, offs); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1); } @@ -300,16 +299,16 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, switch (ret) { case SCANNED_GARBAGE: - dbg_err("garbage"); + ubifs_err("garbage"); goto corrupted; case SCANNED_A_NODE: break; case SCANNED_A_CORRUPT_NODE: case SCANNED_A_BAD_PAD_NODE: - dbg_err("bad node"); + ubifs_err("bad node"); goto corrupted; default: - dbg_err("unknown"); + ubifs_err("unknown"); err = -EINVAL; goto error; } @@ -328,7 +327,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, if (!quiet) ubifs_err("empty space starts at non-aligned offset %d", offs); - goto corrupted;; + goto corrupted; } ubifs_end_scan(c, sleb, lnum, offs); diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index 46961c00323..9a9fb94a41c 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -128,7 +128,6 @@ static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) freed = ubifs_destroy_tnc_subtree(znode); atomic_long_sub(freed, &ubifs_clean_zn_cnt); atomic_long_sub(freed, &c->clean_zn_cnt); - ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0); total_freed += freed; znode = zprev; } @@ -277,13 +276,25 @@ static int kick_a_thread(void) return 0; } -int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask) +unsigned long ubifs_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) { - int freed, contention = 0; long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); - if (nr == 0) - return clean_zn_cnt; + /* + * Due to the way UBIFS updates the clean znode counter it may + * temporarily be negative. + */ + return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; +} + +unsigned long ubifs_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) +{ + unsigned long nr = sc->nr_to_scan; + int contention = 0; + unsigned long freed; + long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); if (!clean_zn_cnt) { /* @@ -311,10 +322,10 @@ int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask) if (!freed && contention) { dbg_tnc("freed nothing, but contention"); - return -1; + return SHRINK_STOP; } out: - dbg_tnc("%d znodes were freed, requested %d", freed, nr); + dbg_tnc("%lu znodes were freed, requested %lu", freed, nr); return freed; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 6e11c2975dc..3904c8574ef 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -49,7 +49,8 @@ struct kmem_cache *ubifs_inode_slab; /* UBIFS TNC shrinker description */ static struct shrinker ubifs_shrinker_info = { - .shrink = ubifs_shrinker, + .scan_objects = ubifs_shrink_scan, + .count_objects = ubifs_shrink_count, .seeks = DEFAULT_SEEKS, }; @@ -85,16 +86,15 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode) if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA) return 4; - if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG) + if (ui->xattr && !S_ISREG(inode->i_mode)) return 5; if (!ubifs_compr_present(ui->compr_type)) { - ubifs_warn("inode %lu uses '%s' compression, but it was not " - "compiled in", inode->i_ino, - ubifs_compr_name(ui->compr_type)); + ubifs_warn("inode %lu uses '%s' compression, but it was not compiled in", + inode->i_ino, ubifs_compr_name(ui->compr_type)); } - err = dbg_check_dir_size(c, inode); + err = dbg_check_dir(c, inode); return err; } @@ -129,9 +129,9 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) goto out_ino; inode->i_flags |= (S_NOCMTIME | S_NOATIME); - inode->i_nlink = le32_to_cpu(ino->nlink); - inode->i_uid = le32_to_cpu(ino->uid); - inode->i_gid = le32_to_cpu(ino->gid); + set_nlink(inode, le32_to_cpu(ino->nlink)); + i_uid_write(inode, le32_to_cpu(ino->uid)); + i_gid_write(inode, le32_to_cpu(ino->gid)); inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec); inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec); inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec); @@ -246,8 +246,8 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) out_invalid: ubifs_err("inode %lu validation failed, error %d", inode->i_ino, err); - dbg_dump_node(c, ino); - dbg_dump_inode(c, inode); + ubifs_dump_node(c, ino); + ubifs_dump_inode(c, inode); err = -EINVAL; out_ino: kfree(ino); @@ -276,7 +276,6 @@ static void ubifs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); struct ubifs_inode *ui = ubifs_inode(inode); - INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(ubifs_inode_slab, ui); } @@ -304,7 +303,7 @@ static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc) mutex_lock(&ui->ui_mutex); /* * Due to races between write-back forced by budgeting - * (see 'sync_some_inodes()') and pdflush write-back, the inode may + * (see 'sync_some_inodes()') and background write-back, the inode may * have already been synchronized, do not do this again. This might * also happen if it was synchronized in an VFS operation, e.g. * 'ubifs_link()'. @@ -352,7 +351,7 @@ static void ubifs_evict_inode(struct inode *inode) dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); ubifs_assert(!atomic_read(&inode->i_count)); - truncate_inode_pages(&inode->i_data, 0); + truncate_inode_pages_final(&inode->i_data); if (inode->i_nlink) goto done; @@ -375,14 +374,14 @@ out: ubifs_release_dirty_inode_budget(c, ui); else { /* We've deleted something - clean the "no space" flags */ - c->nospace = c->nospace_rp = 0; + c->bi.nospace = c->bi.nospace_rp = 0; smp_wmb(); } done: - end_writeback(inode); + clear_inode(inode); } -static void ubifs_dirty_inode(struct inode *inode) +static void ubifs_dirty_inode(struct inode *inode, int flags) { struct ubifs_inode *ui = ubifs_inode(inode); @@ -420,9 +419,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt) +static int ubifs_show_options(struct seq_file *s, struct dentry *root) { - struct ubifs_info *c = mnt->mnt_sb->s_fs_info; + struct ubifs_info *c = root->d_sb->s_fs_info; if (c->mount_opts.unmount_mode == 2) seq_printf(s, ",fast_unmount"); @@ -512,9 +511,12 @@ static int init_constants_early(struct ubifs_info *c) c->leb_cnt = c->vi.size; c->leb_size = c->vi.usable_leb_size; + c->leb_start = c->di.leb_start; c->half_leb_size = c->leb_size / 2; c->min_io_size = c->di.min_io_size; c->min_io_shift = fls(c->min_io_size) - 1; + c->max_write_size = c->di.max_write_size; + c->max_write_shift = fls(c->max_write_size) - 1; if (c->leb_size < UBIFS_MIN_LEB_SZ) { ubifs_err("too small LEBs (%d bytes), min. is %d bytes", @@ -534,6 +536,18 @@ static int init_constants_early(struct ubifs_info *c) } /* + * Maximum write size has to be greater or equivalent to min. I/O + * size, and be multiple of min. I/O size. + */ + if (c->max_write_size < c->min_io_size || + c->max_write_size % c->min_io_size || + !is_power_of_2(c->max_write_size)) { + ubifs_err("bad write buffer size %d for %d min. I/O unit", + c->max_write_size, c->min_io_size); + return -EINVAL; + } + + /* * UBIFS aligns all node to 8-byte boundary, so to make function in * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is * less than 8. @@ -541,6 +555,10 @@ static int init_constants_early(struct ubifs_info *c) if (c->min_io_size < 8) { c->min_io_size = 8; c->min_io_shift = 3; + if (c->max_write_size < c->min_io_size) { + c->max_write_size = c->min_io_size; + c->max_write_shift = c->min_io_shift; + } } c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size); @@ -650,8 +668,8 @@ static int init_constants_sb(struct ubifs_info *c) tmp = UBIFS_CS_NODE_SZ + UBIFS_REF_NODE_SZ * c->jhead_cnt; tmp = ALIGN(tmp, c->min_io_size); if (tmp > c->leb_size) { - dbg_err("too small LEB size %d, at least %d needed", - c->leb_size, tmp); + ubifs_err("too small LEB size %d, at least %d needed", + c->leb_size, tmp); return -EINVAL; } @@ -665,8 +683,8 @@ static int init_constants_sb(struct ubifs_info *c) tmp /= c->leb_size; tmp += 1; if (c->log_lebs < tmp) { - dbg_err("too small log %d LEBs, required min. %d LEBs", - c->log_lebs, tmp); + ubifs_err("too small log %d LEBs, required min. %d LEBs", + c->log_lebs, tmp); return -EINVAL; } @@ -675,11 +693,11 @@ static int init_constants_sb(struct ubifs_info *c) * be compressed and direntries are of the maximum size. * * Note, data, which may be stored in inodes is budgeted separately, so - * it is not included into 'c->inode_budget'. + * it is not included into 'c->bi.inode_budget'. */ - c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; - c->inode_budget = UBIFS_INO_NODE_SZ; - c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; + c->bi.page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; + c->bi.inode_budget = UBIFS_INO_NODE_SZ; + c->bi.dent_budget = UBIFS_MAX_DENT_NODE_SZ; /* * When the amount of flash space used by buds becomes @@ -723,7 +741,7 @@ static void init_constants_master(struct ubifs_info *c) { long long tmp64; - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); c->report_rp_size = ubifs_reported_space(c, c->rp_size); /* @@ -792,15 +810,15 @@ static int alloc_wbufs(struct ubifs_info *c) c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; c->jheads[i].wbuf.jhead = i; + c->jheads[i].grouped = 1; } - c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; /* - * Garbage Collector head likely contains long-term data and - * does not need to be synchronized by timer. + * Garbage Collector head does not need to be synchronized by timer. + * Also GC head nodes are not grouped. */ - c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; c->jheads[GCHD].wbuf.no_timer = 1; + c->jheads[GCHD].grouped = 0; return 0; } @@ -842,7 +860,7 @@ static void free_orphans(struct ubifs_info *c) orph = list_entry(c->orph_list.next, struct ubifs_orphan, list); list_del(&orph->list); kfree(orph); - dbg_err("orphan list not empty at unmount"); + ubifs_err("orphan list not empty at unmount"); } vfree(c->orph_buf); @@ -855,26 +873,10 @@ static void free_orphans(struct ubifs_info *c) */ static void free_buds(struct ubifs_info *c) { - struct rb_node *this = c->buds.rb_node; - struct ubifs_bud *bud; - - while (this) { - if (this->rb_left) - this = this->rb_left; - else if (this->rb_right) - this = this->rb_right; - else { - bud = rb_entry(this, struct ubifs_bud, rb); - this = rb_parent(this); - if (this) { - if (this->rb_left == &bud->rb) - this->rb_left = NULL; - else - this->rb_right = NULL; - } - kfree(bud); - } - } + struct ubifs_bud *bud, *n; + + rbtree_postorder_for_each_entry_safe(bud, n, &c->buds, rb) + kfree(bud); } /** @@ -892,7 +894,7 @@ static int check_volume_empty(struct ubifs_info *c) c->empty = 1; for (lnum = 0; lnum < c->leb_cnt; lnum++) { - err = ubi_is_mapped(c->ubi, lnum); + err = ubifs_is_mapped(c, lnum); if (unlikely(err < 0)) return err; if (err == 1) { @@ -1043,8 +1045,8 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, flag = parse_standard_option(p); if (!flag) { - ubifs_err("unrecognized mount option \"%s\" " - "or missing value", p); + ubifs_err("unrecognized mount option \"%s\" or missing value", + p); return -EINVAL; } sb->s_flags |= flag; @@ -1106,8 +1108,8 @@ again: } /* Just disable bulk-read */ - ubifs_warn("Cannot allocate %d bytes of memory for bulk-read, " - "disabling it", c->max_bu_buf_len); + ubifs_warn("cannot allocate %d bytes of memory for bulk-read, disabling it", + c->max_bu_buf_len); c->mount_opts.bulk_read = 1; c->bulk_read = 0; return; @@ -1125,9 +1127,9 @@ static int check_free_space(struct ubifs_info *c) { ubifs_assert(c->dark_wm > 0); if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { - ubifs_err("insufficient free space to mount in read/write mode"); - dbg_dump_budg(c); - dbg_dump_lprops(c); + ubifs_err("insufficient free space to mount in R/W mode"); + ubifs_dump_budg(c, &c->bi); + ubifs_dump_lprops(c); return -ENOSPC; } return 0; @@ -1139,17 +1141,17 @@ static int check_free_space(struct ubifs_info *c) * * This function mounts UBIFS file system. Returns zero in case of success and * a negative error code in case of failure. - * - * Note, the function does not de-allocate resources it it fails half way - * through, and the caller has to do this instead. */ static int mount_ubifs(struct ubifs_info *c) { int err; - long long x; + long long x, y; size_t sz; c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY); + /* Suppress error messages while probing if MS_SILENT is set */ + c->probing = !!(c->vfs_sb->s_flags & MS_SILENT); + err = init_constants_early(c); if (err) return err; @@ -1202,16 +1204,21 @@ static int mount_ubifs(struct ubifs_info *c) if (c->bulk_read == 1) bu_init(c); - /* - * We have to check all CRCs, even for data nodes, when we mount the FS - * (specifically, when we are replaying). - */ - c->always_chk_crc = 1; + if (!c->ro_mount) { + c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, + GFP_KERNEL); + if (!c->write_reserve_buf) + goto out_free; + } + + c->mounting = 1; err = ubifs_read_superblock(c); if (err) goto out_free; + c->probing = 0; + /* * Make sure the compressor which is set as default in the superblock * or overridden by mount options is actually compiled in. @@ -1235,12 +1242,12 @@ static int mount_ubifs(struct ubifs_info *c) goto out_free; } + err = alloc_wbufs(c); + if (err) + goto out_cbuf; + sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); if (!c->ro_mount) { - err = alloc_wbufs(c); - if (err) - goto out_cbuf; - /* Create background thread */ c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); if (IS_ERR(c->bgt)) { @@ -1262,12 +1269,25 @@ static int mount_ubifs(struct ubifs_info *c) if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { ubifs_msg("recovery needed"); c->need_recovery = 1; - if (!c->ro_mount) { - err = ubifs_recover_inl_heads(c, c->sbuf); - if (err) - goto out_master; - } - } else if (!c->ro_mount) { + } + + if (c->need_recovery && !c->ro_mount) { + err = ubifs_recover_inl_heads(c, c->sbuf); + if (err) + goto out_master; + } + + err = ubifs_lpt_init(c, 1, !c->ro_mount); + if (err) + goto out_master; + + if (!c->ro_mount && c->space_fixup) { + err = ubifs_fixup_free_space(c); + if (err) + goto out_lpt; + } + + if (!c->ro_mount) { /* * Set the "dirty" flag so that if we reboot uncleanly we * will notice this immediately on the next mount. @@ -1275,14 +1295,10 @@ static int mount_ubifs(struct ubifs_info *c) c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); err = ubifs_write_master(c); if (err) - goto out_master; + goto out_lpt; } - err = ubifs_lpt_init(c, 1, !c->ro_mount); - if (err) - goto out_lpt; - - err = dbg_check_idx_size(c, c->old_idx_sz); + err = dbg_check_idx_size(c, c->bi.old_idx_sz); if (err) goto out_lpt; @@ -1291,7 +1307,7 @@ static int mount_ubifs(struct ubifs_info *c) goto out_journal; /* Calculate 'min_idx_lebs' after journal replay */ - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount); if (err) @@ -1382,75 +1398,71 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_infos; - c->always_chk_crc = 0; + c->mounting = 0; - ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", - c->vi.ubi_num, c->vi.vol_id, c->vi.name); - if (c->ro_mount) - ubifs_msg("mounted read-only"); + ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"%s", + c->vi.ubi_num, c->vi.vol_id, c->vi.name, + c->ro_mount ? ", R/O mode" : ""); x = (long long)c->main_lebs * c->leb_size; - ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d " - "LEBs)", x, x >> 10, x >> 20, c->main_lebs); - x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; - ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d " - "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); - ubifs_msg("media format: w%d/r%d (latest is w%d/r%d)", + y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; + ubifs_msg("LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes", + c->leb_size, c->leb_size >> 10, c->min_io_size, + c->max_write_size); + ubifs_msg("FS size: %lld bytes (%lld MiB, %d LEBs), journal size %lld bytes (%lld MiB, %d LEBs)", + x, x >> 20, c->main_lebs, + y, y >> 20, c->log_lebs + c->max_bud_cnt); + ubifs_msg("reserved for root: %llu bytes (%llu KiB)", + c->report_rp_size, c->report_rp_size >> 10); + ubifs_msg("media format: w%d/r%d (latest is w%d/r%d), UUID %pUB%s", c->fmt_version, c->ro_compat_version, - UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); - ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); - ubifs_msg("reserved for root: %llu bytes (%llu KiB)", - c->report_rp_size, c->report_rp_size >> 10); - - dbg_msg("compiled on: " __DATE__ " at " __TIME__); - dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); - dbg_msg("LEB size: %d bytes (%d KiB)", - c->leb_size, c->leb_size >> 10); - dbg_msg("data journal heads: %d", + UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION, c->uuid, + c->big_lpt ? ", big LPT model" : ", small LPT model"); + + dbg_gen("default compressor: %s", ubifs_compr_name(c->default_compr)); + dbg_gen("data journal heads: %d", c->jhead_cnt - NONDATA_JHEADS_CNT); - dbg_msg("UUID: %pUB", c->uuid); - dbg_msg("big_lpt %d", c->big_lpt); - dbg_msg("log LEBs: %d (%d - %d)", + dbg_gen("log LEBs: %d (%d - %d)", c->log_lebs, UBIFS_LOG_LNUM, c->log_last); - dbg_msg("LPT area LEBs: %d (%d - %d)", + dbg_gen("LPT area LEBs: %d (%d - %d)", c->lpt_lebs, c->lpt_first, c->lpt_last); - dbg_msg("orphan area LEBs: %d (%d - %d)", + dbg_gen("orphan area LEBs: %d (%d - %d)", c->orph_lebs, c->orph_first, c->orph_last); - dbg_msg("main area LEBs: %d (%d - %d)", + dbg_gen("main area LEBs: %d (%d - %d)", c->main_lebs, c->main_first, c->leb_cnt - 1); - dbg_msg("index LEBs: %d", c->lst.idx_lebs); - dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", - c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); - dbg_msg("key hash type: %d", c->key_hash_type); - dbg_msg("tree fanout: %d", c->fanout); - dbg_msg("reserved GC LEB: %d", c->gc_lnum); - dbg_msg("first main LEB: %d", c->main_first); - dbg_msg("max. znode size %d", c->max_znode_sz); - dbg_msg("max. index node size %d", c->max_idx_node_sz); - dbg_msg("node sizes: data %zu, inode %zu, dentry %zu", + dbg_gen("index LEBs: %d", c->lst.idx_lebs); + dbg_gen("total index bytes: %lld (%lld KiB, %lld MiB)", + c->bi.old_idx_sz, c->bi.old_idx_sz >> 10, + c->bi.old_idx_sz >> 20); + dbg_gen("key hash type: %d", c->key_hash_type); + dbg_gen("tree fanout: %d", c->fanout); + dbg_gen("reserved GC LEB: %d", c->gc_lnum); + dbg_gen("max. znode size %d", c->max_znode_sz); + dbg_gen("max. index node size %d", c->max_idx_node_sz); + dbg_gen("node sizes: data %zu, inode %zu, dentry %zu", UBIFS_DATA_NODE_SZ, UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ); - dbg_msg("node sizes: trun %zu, sb %zu, master %zu", + dbg_gen("node sizes: trun %zu, sb %zu, master %zu", UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ); - dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", + dbg_gen("node sizes: ref %zu, cmt. start %zu, orph %zu", UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); - dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu", - UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, - UBIFS_MAX_DENT_NODE_SZ); - dbg_msg("dead watermark: %d", c->dead_wm); - dbg_msg("dark watermark: %d", c->dark_wm); - dbg_msg("LEB overhead: %d", c->leb_overhead); + dbg_gen("max. node sizes: data %zu, inode %zu dentry %zu, idx %d", + UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, + UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout)); + dbg_gen("dead watermark: %d", c->dead_wm); + dbg_gen("dark watermark: %d", c->dark_wm); + dbg_gen("LEB overhead: %d", c->leb_overhead); x = (long long)c->main_lebs * c->dark_wm; - dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)", + dbg_gen("max. dark space: %lld (%lld KiB, %lld MiB)", x, x >> 10, x >> 20); - dbg_msg("maximum bud bytes: %lld (%lld KiB, %lld MiB)", + dbg_gen("maximum bud bytes: %lld (%lld KiB, %lld MiB)", c->max_bud_bytes, c->max_bud_bytes >> 10, c->max_bud_bytes >> 20); - dbg_msg("BG commit bud bytes: %lld (%lld KiB, %lld MiB)", + dbg_gen("BG commit bud bytes: %lld (%lld KiB, %lld MiB)", c->bg_bud_bytes, c->bg_bud_bytes >> 10, c->bg_bud_bytes >> 20); - dbg_msg("current bud bytes %lld (%lld KiB, %lld MiB)", + dbg_gen("current bud bytes %lld (%lld KiB, %lld MiB)", c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20); - dbg_msg("max. seq. number: %llu", c->max_sqnum); - dbg_msg("commit number: %llu", c->cmt_no); + dbg_gen("max. seq. number: %llu", c->max_sqnum); + dbg_gen("commit number: %llu", c->cmt_no); return 0; @@ -1474,6 +1486,7 @@ out_wbufs: out_cbuf: kfree(c->cbuf); out_free: + kfree(c->write_reserve_buf); kfree(c->bu.buf); vfree(c->ileb_buf); vfree(c->sbuf); @@ -1512,6 +1525,7 @@ static void ubifs_umount(struct ubifs_info *c) kfree(c->cbuf); kfree(c->rcvrd_mst_node); kfree(c->mst_node); + kfree(c->write_reserve_buf); kfree(c->bu.buf); vfree(c->ileb_buf); vfree(c->sbuf); @@ -1533,17 +1547,22 @@ static int ubifs_remount_rw(struct ubifs_info *c) if (c->rw_incompat) { ubifs_err("the file-system is not R/W-compatible"); - ubifs_msg("on-flash format version is w%d/r%d, but software " - "only supports up to version w%d/r%d", c->fmt_version, - c->ro_compat_version, UBIFS_FORMAT_VERSION, - UBIFS_RO_COMPAT_VERSION); + ubifs_msg("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d", + c->fmt_version, c->ro_compat_version, + UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); return -EROFS; } mutex_lock(&c->umount_mutex); dbg_save_space_info(c); c->remounting_rw = 1; - c->always_chk_crc = 1; + c->ro_mount = 0; + + if (c->space_fixup) { + err = ubifs_fixup_free_space(c); + if (err) + goto out; + } err = check_free_space(c); if (err) @@ -1559,6 +1578,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) } sup->leb_cnt = cpu_to_le32(c->leb_cnt); err = ubifs_write_sb_node(c, sup); + kfree(sup); if (err) goto out; } @@ -1598,16 +1618,16 @@ static int ubifs_remount_rw(struct ubifs_info *c) goto out; } - err = ubifs_lpt_init(c, 0, 1); - if (err) + c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL); + if (!c->write_reserve_buf) { + err = -ENOMEM; goto out; + } - err = alloc_wbufs(c); + err = ubifs_lpt_init(c, 0, 1); if (err) goto out; - ubifs_create_buds_lists(c); - /* Create background thread */ c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); if (IS_ERR(c->bgt)) { @@ -1642,20 +1662,31 @@ static int ubifs_remount_rw(struct ubifs_info *c) if (err) goto out; + dbg_gen("re-mounted read-write"); + c->remounting_rw = 0; + if (c->need_recovery) { c->need_recovery = 0; ubifs_msg("deferred recovery completed"); + } else { + /* + * Do not run the debugging space check if the were doing + * recovery, because when we saved the information we had the + * file-system in a state where the TNC and lprops has been + * modified in memory, but all the I/O operations (including a + * commit) were deferred. So the file-system was in + * "non-committed" state. Now the file-system is in committed + * state, and of course the amount of free space will change + * because, for example, the old index size was imprecise. + */ + err = dbg_check_space_info(c); } - dbg_gen("re-mounted read-write"); - c->ro_mount = 0; - c->remounting_rw = 0; - c->always_chk_crc = 0; - err = dbg_check_space_info(c); mutex_unlock(&c->umount_mutex); return err; out: + c->ro_mount = 1; vfree(c->orph_buf); c->orph_buf = NULL; if (c->bgt) { @@ -1663,11 +1694,12 @@ out: c->bgt = NULL; } free_wbufs(c); + kfree(c->write_reserve_buf); + c->write_reserve_buf = NULL; vfree(c->ileb_buf); c->ileb_buf = NULL; ubifs_lpt_free(c, 1); c->remounting_rw = 0; - c->always_chk_crc = 0; mutex_unlock(&c->umount_mutex); return err; } @@ -1704,9 +1736,10 @@ static void ubifs_remount_ro(struct ubifs_info *c) if (err) ubifs_ro_mode(c, err); - free_wbufs(c); vfree(c->orph_buf); c->orph_buf = NULL; + kfree(c->write_reserve_buf); + c->write_reserve_buf = NULL; vfree(c->ileb_buf); c->ileb_buf = NULL; ubifs_lpt_free(c, 1); @@ -1730,10 +1763,11 @@ static void ubifs_put_super(struct super_block *sb) * of the media. For example, there will be dirty inodes if we failed * to write them back because of I/O errors. */ - ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); - ubifs_assert(c->budg_idx_growth == 0); - ubifs_assert(c->budg_dd_growth == 0); - ubifs_assert(c->budg_data_growth == 0); + if (!c->ro_error) { + ubifs_assert(c->bi.idx_growth == 0); + ubifs_assert(c->bi.dd_growth == 0); + ubifs_assert(c->bi.data_growth == 0); + } /* * The 'c->umount_lock' prevents races between UBIFS memory shrinker @@ -1778,8 +1812,8 @@ static void ubifs_put_super(struct super_block *sb) * next mount, so we just print a message and * continue to unmount normally. */ - ubifs_err("failed to write master node, " - "error %d", err); + ubifs_err("failed to write master node, error %d", + err); } else { for (i = 0; i < c->jhead_cnt; i++) /* Make sure write-buffer timers are canceled */ @@ -1791,7 +1825,6 @@ static void ubifs_put_super(struct super_block *sb) bdi_destroy(&c->bdi); ubi_close_volume(c->ubi); mutex_unlock(&c->umount_mutex); - kfree(c); } static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) @@ -1799,6 +1832,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) int err; struct ubifs_info *c = sb->s_fs_info; + sync_filesystem(sb); dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags); err = ubifs_parse_options(c, data, 1); @@ -1914,59 +1948,65 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode) return ERR_PTR(-EINVAL); } -static int ubifs_fill_super(struct super_block *sb, void *data, int silent) +static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi) { - struct ubi_volume_desc *ubi = sb->s_fs_info; struct ubifs_info *c; - struct inode *root; - int err; c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL); - if (!c) - return -ENOMEM; + if (c) { + spin_lock_init(&c->cnt_lock); + spin_lock_init(&c->cs_lock); + spin_lock_init(&c->buds_lock); + spin_lock_init(&c->space_lock); + spin_lock_init(&c->orphan_lock); + init_rwsem(&c->commit_sem); + mutex_init(&c->lp_mutex); + mutex_init(&c->tnc_mutex); + mutex_init(&c->log_mutex); + mutex_init(&c->mst_mutex); + mutex_init(&c->umount_mutex); + mutex_init(&c->bu_mutex); + mutex_init(&c->write_reserve_mutex); + init_waitqueue_head(&c->cmt_wq); + c->buds = RB_ROOT; + c->old_idx = RB_ROOT; + c->size_tree = RB_ROOT; + c->orph_tree = RB_ROOT; + INIT_LIST_HEAD(&c->infos_list); + INIT_LIST_HEAD(&c->idx_gc); + INIT_LIST_HEAD(&c->replay_list); + INIT_LIST_HEAD(&c->replay_buds); + INIT_LIST_HEAD(&c->uncat_list); + INIT_LIST_HEAD(&c->empty_list); + INIT_LIST_HEAD(&c->freeable_list); + INIT_LIST_HEAD(&c->frdi_idx_list); + INIT_LIST_HEAD(&c->unclean_leb_list); + INIT_LIST_HEAD(&c->old_buds); + INIT_LIST_HEAD(&c->orph_list); + INIT_LIST_HEAD(&c->orph_new); + c->no_chk_data_crc = 1; + + c->highest_inum = UBIFS_FIRST_INO; + c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; + + ubi_get_volume_info(ubi, &c->vi); + ubi_get_device_info(c->vi.ubi_num, &c->di); + } + return c; +} - spin_lock_init(&c->cnt_lock); - spin_lock_init(&c->cs_lock); - spin_lock_init(&c->buds_lock); - spin_lock_init(&c->space_lock); - spin_lock_init(&c->orphan_lock); - init_rwsem(&c->commit_sem); - mutex_init(&c->lp_mutex); - mutex_init(&c->tnc_mutex); - mutex_init(&c->log_mutex); - mutex_init(&c->mst_mutex); - mutex_init(&c->umount_mutex); - mutex_init(&c->bu_mutex); - init_waitqueue_head(&c->cmt_wq); - c->buds = RB_ROOT; - c->old_idx = RB_ROOT; - c->size_tree = RB_ROOT; - c->orph_tree = RB_ROOT; - INIT_LIST_HEAD(&c->infos_list); - INIT_LIST_HEAD(&c->idx_gc); - INIT_LIST_HEAD(&c->replay_list); - INIT_LIST_HEAD(&c->replay_buds); - INIT_LIST_HEAD(&c->uncat_list); - INIT_LIST_HEAD(&c->empty_list); - INIT_LIST_HEAD(&c->freeable_list); - INIT_LIST_HEAD(&c->frdi_idx_list); - INIT_LIST_HEAD(&c->unclean_leb_list); - INIT_LIST_HEAD(&c->old_buds); - INIT_LIST_HEAD(&c->orph_list); - INIT_LIST_HEAD(&c->orph_new); +static int ubifs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct ubifs_info *c = sb->s_fs_info; + struct inode *root; + int err; c->vfs_sb = sb; - c->highest_inum = UBIFS_FIRST_INO; - c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; - - ubi_get_volume_info(ubi, &c->vi); - ubi_get_device_info(c->vi.ubi_num, &c->di); - /* Re-open the UBI device in read-write mode */ c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE); if (IS_ERR(c->ubi)) { err = PTR_ERR(c->ubi); - goto out_free; + goto out; } /* @@ -1979,7 +2019,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) */ c->bdi.name = "ubifs", c->bdi.capabilities = BDI_CAP_MAP_COPY; - c->bdi.unplug_io_fn = default_unplug_io_fn; err = bdi_init(&c->bdi); if (err) goto out_close; @@ -2016,15 +2055,15 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) goto out_umount; } - sb->s_root = d_alloc_root(root); - if (!sb->s_root) - goto out_iput; + sb->s_root = d_make_root(root); + if (!sb->s_root) { + err = -ENOMEM; + goto out_umount; + } mutex_unlock(&c->umount_mutex); return 0; -out_iput: - iput(root); out_umount: ubifs_umount(c); out_unlock: @@ -2033,24 +2072,29 @@ out_bdi: bdi_destroy(&c->bdi); out_close: ubi_close_volume(c->ubi); -out_free: - kfree(c); +out: return err; } static int sb_test(struct super_block *sb, void *data) { - dev_t *dev = data; + struct ubifs_info *c1 = data; struct ubifs_info *c = sb->s_fs_info; - return c->vi.cdev == *dev; + return c->vi.cdev == c1->vi.cdev; +} + +static int sb_set(struct super_block *sb, void *data) +{ + sb->s_fs_info = data; + return set_anon_super(sb, NULL); } static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, const char *name, void *data) { struct ubi_volume_desc *ubi; - struct ubi_volume_info vi; + struct ubifs_info *c; struct super_block *sb; int err; @@ -2063,23 +2107,29 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, */ ubi = open_ubi(name, UBI_READONLY); if (IS_ERR(ubi)) { - dbg_err("cannot open \"%s\", error %d", - name, (int)PTR_ERR(ubi)); + ubifs_err("cannot open \"%s\", error %d", + name, (int)PTR_ERR(ubi)); return ERR_CAST(ubi); } - ubi_get_volume_info(ubi, &vi); - dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id); + c = alloc_ubifs_info(ubi); + if (!c) { + err = -ENOMEM; + goto out_close; + } + + dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); - sb = sget(fs_type, &sb_test, &set_anon_super, &vi.cdev); + sb = sget(fs_type, sb_test, sb_set, flags, c); if (IS_ERR(sb)) { err = PTR_ERR(sb); + kfree(c); goto out_close; } if (sb->s_root) { struct ubifs_info *c1 = sb->s_fs_info; - + kfree(c); /* A new mount point for already mounted UBIFS */ dbg_gen("this ubi volume is already mounted"); if (!!(flags & MS_RDONLY) != c1->ro_mount) { @@ -2087,12 +2137,6 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, goto out_deact; } } else { - sb->s_flags = flags; - /* - * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is - * replaced by 'c'. - */ - sb->s_fs_info = ubi; err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (err) goto out_deact; @@ -2112,12 +2156,20 @@ out_close: return ERR_PTR(err); } +static void kill_ubifs_super(struct super_block *s) +{ + struct ubifs_info *c = s->s_fs_info; + kill_anon_super(s); + kfree(c); +} + static struct file_system_type ubifs_fs_type = { .name = "ubifs", .owner = THIS_MODULE, .mount = ubifs_mount, - .kill_sb = kill_anon_super, + .kill_sb = kill_ubifs_super, }; +MODULE_ALIAS_FS("ubifs"); /* * Inode slab cache constructor. @@ -2184,25 +2236,17 @@ static int __init ubifs_init(void) * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. */ if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) { - ubifs_err("VFS page cache size is %u bytes, but UBIFS requires" - " at least 4096 bytes", + ubifs_err("VFS page cache size is %u bytes, but UBIFS requires at least 4096 bytes", (unsigned int)PAGE_CACHE_SIZE); return -EINVAL; } - err = register_filesystem(&ubifs_fs_type); - if (err) { - ubifs_err("cannot register file system, error %d", err); - return err; - } - - err = -ENOMEM; ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab", sizeof(struct ubifs_inode), 0, SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT, &inode_slab_ctor); if (!ubifs_inode_slab) - goto out_reg; + return -ENOMEM; register_shrinker(&ubifs_shrinker_info); @@ -2214,15 +2258,20 @@ static int __init ubifs_init(void) if (err) goto out_compr; + err = register_filesystem(&ubifs_fs_type); + if (err) { + ubifs_err("cannot register file system, error %d", err); + goto out_dbg; + } return 0; +out_dbg: + dbg_debugfs_exit(); out_compr: ubifs_compressors_exit(); out_shrinker: unregister_shrinker(&ubifs_shrinker_info); kmem_cache_destroy(ubifs_inode_slab); -out_reg: - unregister_filesystem(&ubifs_fs_type); return err; } /* late_initcall to let compressors initialize first */ @@ -2236,6 +2285,12 @@ static void __exit ubifs_exit(void) dbg_debugfs_exit(); ubifs_compressors_exit(); unregister_shrinker(&ubifs_shrinker_info); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ubifs_inode_slab); unregister_filesystem(&ubifs_fs_type); } diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index ad9cf013362..8a40cf9c02d 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -178,27 +178,11 @@ static int ins_clr_old_idx_znode(struct ubifs_info *c, */ void destroy_old_idx(struct ubifs_info *c) { - struct rb_node *this = c->old_idx.rb_node; - struct ubifs_old_idx *old_idx; + struct ubifs_old_idx *old_idx, *n; - while (this) { - if (this->rb_left) { - this = this->rb_left; - continue; - } else if (this->rb_right) { - this = this->rb_right; - continue; - } - old_idx = rb_entry(this, struct ubifs_old_idx, rb); - this = rb_parent(this); - if (this) { - if (this->rb_left == &old_idx->rb) - this->rb_left = NULL; - else - this->rb_right = NULL; - } + rbtree_postorder_for_each_entry_safe(old_idx, n, &c->old_idx, rb) kfree(old_idx); - } + c->old_idx = RB_ROOT; } @@ -223,7 +207,7 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c, __set_bit(DIRTY_ZNODE, &zn->flags); __clear_bit(COW_ZNODE, &zn->flags); - ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); + ubifs_assert(!ubifs_zn_obsolete(znode)); __set_bit(OBSOLETE_ZNODE, &znode->flags); if (znode->level != 0) { @@ -271,7 +255,7 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, struct ubifs_znode *zn; int err; - if (!test_bit(COW_ZNODE, &znode->flags)) { + if (!ubifs_zn_cow(znode)) { /* znode is not being committed */ if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) { atomic_long_inc(&c->dirty_zn_cnt); @@ -339,17 +323,16 @@ static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr, err = ubifs_validate_entry(c, dent); if (err) { - dbg_dump_stack(); - dbg_dump_node(c, dent); + dump_stack(); + ubifs_dump_node(c, dent); return err; } - lnc_node = kmalloc(zbr->len, GFP_NOFS); + lnc_node = kmemdup(node, zbr->len, GFP_NOFS); if (!lnc_node) /* We don't have to have the cache, so no error */ return 0; - memcpy(lnc_node, node, zbr->len); zbr->leaf = lnc_node; return 0; } @@ -373,8 +356,8 @@ static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr, err = ubifs_validate_entry(c, node); if (err) { - dbg_dump_stack(); - dbg_dump_node(c, node); + dump_stack(); + ubifs_dump_node(c, node); return err; } @@ -447,8 +430,11 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr, * * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc * is true (it is controlled by corresponding mount option). However, if - * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always - * checked. + * @c->mounting or @c->remounting_rw is true (we are mounting or re-mounting to + * R/W mode), @c->no_chk_data_crc is ignored and CRC is checked. This is + * because during mounting or re-mounting from R/O mode to R/W mode we may read + * journal nodes (when replying the journal or doing the recovery) and the + * journal nodes may potentially be corrupted, so checking is required. */ static int try_read_node(const struct ubifs_info *c, void *buf, int type, int len, int lnum, int offs) @@ -459,7 +445,7 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); - err = ubi_read(c->ubi, lnum, buf, offs, len); + err = ubifs_leb_read(c, lnum, buf, offs, len, 1); if (err) { ubifs_err("cannot read node type %d from LEB %d:%d, error %d", type, lnum, offs, err); @@ -476,7 +462,8 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, if (node_len != len) return 0; - if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc) + if (type == UBIFS_DATA_NODE && c->no_chk_data_crc && !c->mounting && + !c->remounting_rw) return 1; crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); @@ -502,7 +489,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, { int ret; - dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key)); + dbg_tnck(key, "LEB %d:%d, key ", zbr->lnum, zbr->offs); ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum, zbr->offs); @@ -516,8 +503,8 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, ret = 0; } if (ret == 0 && c->replaying) - dbg_mnt("dangling branch LEB %d:%d len %d, key %s", - zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); + dbg_mntk(key, "dangling branch LEB %d:%d len %d, key ", + zbr->lnum, zbr->offs, zbr->len); return ret; } @@ -992,9 +979,9 @@ static int fallible_resolve_collision(struct ubifs_info *c, if (adding || !o_znode) return 0; - dbg_mnt("dangling match LEB %d:%d len %d %s", + dbg_mntk(key, "dangling match LEB %d:%d len %d key ", o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs, - o_znode->zbranch[o_n].len, DBGKEY(key)); + o_znode->zbranch[o_n].len); *zn = o_znode; *n = o_n; return 1; @@ -1176,7 +1163,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, struct ubifs_znode *znode; unsigned long time = get_seconds(); - dbg_tnc("search key %s", DBGKEY(key)); + dbg_tnck(key, "search key "); ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY); znode = c->zroot.znode; @@ -1312,7 +1299,7 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, struct ubifs_znode *znode; unsigned long time = get_seconds(); - dbg_tnc("search and dirty key %s", DBGKEY(key)); + dbg_tnck(key, "search and dirty key "); znode = c->zroot.znode; if (unlikely(!znode)) { @@ -1662,7 +1649,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum, if (!overlap) { /* We may safely unlock the write-buffer and read the data */ spin_unlock(&wbuf->lock); - return ubi_read(c->ubi, lnum, buf, offs, len); + return ubifs_leb_read(c, lnum, buf, offs, len, 0); } /* Don't read under wbuf */ @@ -1676,7 +1663,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum, if (rlen > 0) /* Read everything that goes before write-buffer */ - return ubi_read(c->ubi, lnum, buf, offs, rlen); + return ubifs_leb_read(c, lnum, buf, offs, rlen, 0); return 0; } @@ -1719,8 +1706,8 @@ static int validate_data_node(struct ubifs_info *c, void *buf, if (!keys_eq(c, &zbr->key, &key1)) { ubifs_err("bad key in node at LEB %d:%d", zbr->lnum, zbr->offs); - dbg_tnc("looked for key %s found node's key %s", - DBGKEY(&zbr->key), DBGKEY1(&key1)); + dbg_tnck(&zbr->key, "looked for key "); + dbg_tnck(&key1, "found node's key "); goto out_err; } @@ -1730,8 +1717,8 @@ out_err: err = -EINVAL; out: ubifs_err("bad node at LEB %d:%d", zbr->lnum, zbr->offs); - dbg_dump_node(c, buf); - dbg_dump_stack(); + ubifs_dump_node(c, buf); + dump_stack(); return err; } @@ -1763,7 +1750,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) if (wbuf) err = read_wbuf(wbuf, bu->buf, len, lnum, offs); else - err = ubi_read(c->ubi, lnum, bu->buf, offs, len); + err = ubifs_leb_read(c, lnum, bu->buf, offs, len, 0); /* Check for a race with GC */ if (maybe_leb_gced(c, lnum, bu->gc_seq)) @@ -1772,8 +1759,8 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) if (err && err != -EBADMSG) { ubifs_err("failed to read from LEB %d:%d, error %d", lnum, offs, err); - dbg_dump_stack(); - dbg_tnc("key %s", DBGKEY(&bu->key)); + dump_stack(); + dbg_tnck(&bu->key, "key "); return err; } @@ -1808,7 +1795,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, int found, n, err; struct ubifs_znode *znode; - dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); + dbg_tnck(key, "name '%.*s' key ", nm->len, nm->name); mutex_lock(&c->tnc_mutex); found = ubifs_lookup_level0(c, key, &znode, &n); if (!found) { @@ -1982,8 +1969,7 @@ again: zp = znode->parent; if (znode->child_cnt < c->fanout) { ubifs_assert(n != c->fanout); - dbg_tnc("inserted at %d level %d, key %s", n, znode->level, - DBGKEY(key)); + dbg_tnck(key, "inserted at %d level %d, key ", n, znode->level); insert_zbranch(znode, zbr, n); @@ -1998,7 +1984,7 @@ again: * Unfortunately, @znode does not have more empty slots and we have to * split it. */ - dbg_tnc("splitting level %d, key %s", znode->level, DBGKEY(key)); + dbg_tnck(key, "splitting level %d, key ", znode->level); if (znode->alt) /* @@ -2092,7 +2078,7 @@ do_split: } /* Insert new key and branch */ - dbg_tnc("inserting at %d level %d, key %s", n, zn->level, DBGKEY(key)); + dbg_tnck(key, "inserting at %d level %d, key ", n, zn->level); insert_zbranch(zi, zbr, n); @@ -2168,7 +2154,7 @@ int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, struct ubifs_znode *znode; mutex_lock(&c->tnc_mutex); - dbg_tnc("%d:%d, len %d, key %s", lnum, offs, len, DBGKEY(key)); + dbg_tnck(key, "%d:%d, len %d, key ", lnum, offs, len); found = lookup_level0_dirty(c, key, &znode, &n); if (!found) { struct ubifs_zbranch zbr; @@ -2217,8 +2203,8 @@ int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, struct ubifs_znode *znode; mutex_lock(&c->tnc_mutex); - dbg_tnc("old LEB %d:%d, new LEB %d:%d, len %d, key %s", old_lnum, - old_offs, lnum, offs, len, DBGKEY(key)); + dbg_tnck(key, "old LEB %d:%d, new LEB %d:%d, len %d, key ", old_lnum, + old_offs, lnum, offs, len); found = lookup_level0_dirty(c, key, &znode, &n); if (found < 0) { err = found; @@ -2300,8 +2286,8 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, struct ubifs_znode *znode; mutex_lock(&c->tnc_mutex); - dbg_tnc("LEB %d:%d, name '%.*s', key %s", lnum, offs, nm->len, nm->name, - DBGKEY(key)); + dbg_tnck(key, "LEB %d:%d, name '%.*s', key ", + lnum, offs, nm->len, nm->name); found = lookup_level0_dirty(c, key, &znode, &n); if (found < 0) { err = found; @@ -2359,7 +2345,7 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, * by passing 'ubifs_tnc_remove_nm()' the same key but * an unmatchable name. */ - struct qstr noname = { .len = 0, .name = "" }; + struct qstr noname = { .name = "" }; err = dbg_check_tnc(c, 0); mutex_unlock(&c->tnc_mutex); @@ -2394,14 +2380,14 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) /* Delete without merge for now */ ubifs_assert(znode->level == 0); ubifs_assert(n >= 0 && n < c->fanout); - dbg_tnc("deleting %s", DBGKEY(&znode->zbranch[n].key)); + dbg_tnck(&znode->zbranch[n].key, "deleting key "); zbr = &znode->zbranch[n]; lnc_free(zbr); err = ubifs_add_dirt(c, zbr->lnum, zbr->len); if (err) { - dbg_dump_znode(c, znode); + ubifs_dump_znode(c, znode); return err; } @@ -2419,7 +2405,7 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) */ do { - ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); + ubifs_assert(!ubifs_zn_obsolete(znode)); ubifs_assert(ubifs_zn_dirty(znode)); zp = znode->parent; @@ -2475,9 +2461,8 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) c->zroot.offs = zbr->offs; c->zroot.len = zbr->len; c->zroot.znode = znode; - ubifs_assert(!test_bit(OBSOLETE_ZNODE, - &zp->flags)); - ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags)); + ubifs_assert(!ubifs_zn_obsolete(zp)); + ubifs_assert(ubifs_zn_dirty(zp)); atomic_long_dec(&c->dirty_zn_cnt); if (zp->cnext) { @@ -2505,7 +2490,7 @@ int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key) struct ubifs_znode *znode; mutex_lock(&c->tnc_mutex); - dbg_tnc("key %s", DBGKEY(key)); + dbg_tnck(key, "key "); found = lookup_level0_dirty(c, key, &znode, &n); if (found < 0) { err = found; @@ -2536,7 +2521,7 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, struct ubifs_znode *znode; mutex_lock(&c->tnc_mutex); - dbg_tnc("%.*s, key %s", nm->len, nm->name, DBGKEY(key)); + dbg_tnck(key, "%.*s, key ", nm->len, nm->name); err = lookup_level0_dirty(c, key, &znode, &n); if (err < 0) goto out_unlock; @@ -2553,11 +2538,11 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, if (err) { /* Ensure the znode is dirtied */ if (znode->cnext || !ubifs_zn_dirty(znode)) { - znode = dirty_cow_bottom_up(c, znode); - if (IS_ERR(znode)) { - err = PTR_ERR(znode); - goto out_unlock; - } + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } } err = tnc_delete(c, znode, n); } @@ -2648,10 +2633,10 @@ int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, err = ubifs_add_dirt(c, znode->zbranch[i].lnum, znode->zbranch[i].len); if (err) { - dbg_dump_znode(c, znode); + ubifs_dump_znode(c, znode); goto out_unlock; } - dbg_tnc("removing %s", DBGKEY(key)); + dbg_tnck(key, "removing key "); } if (k) { for (i = n + 1 + k; i < znode->child_cnt; i++) @@ -2771,7 +2756,7 @@ struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, struct ubifs_zbranch *zbr; union ubifs_key *dkey; - dbg_tnc("%s %s", nm->name ? (char *)nm->name : "(lowest)", DBGKEY(key)); + dbg_tnck(key, "%s ", nm->name ? (char *)nm->name : "(lowest)"); ubifs_assert(is_hash_key(c, key)); mutex_lock(&c->tnc_mutex); @@ -2861,7 +2846,7 @@ static void tnc_destroy_cnext(struct ubifs_info *c) struct ubifs_znode *znode = cnext; cnext = cnext->cnext; - if (test_bit(OBSOLETE_ZNODE, &znode->flags)) + if (ubifs_zn_obsolete(znode)) kfree(znode); } while (cnext && cnext != c->cnext); } @@ -2872,12 +2857,14 @@ static void tnc_destroy_cnext(struct ubifs_info *c) */ void ubifs_tnc_close(struct ubifs_info *c) { - long clean_freed; - tnc_destroy_cnext(c); if (c->zroot.znode) { - clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode); - atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt); + long n, freed; + + n = atomic_long_read(&c->clean_zn_cnt); + freed = ubifs_destroy_tnc_subtree(c->zroot.znode); + ubifs_assert(freed == n); + atomic_long_sub(n, &ubifs_clean_zn_cnt); } kfree(c->gap_lebs); kfree(c->ilebs); @@ -3273,8 +3260,6 @@ out_unlock: return err; } -#ifdef CONFIG_UBIFS_FS_DEBUG - /** * dbg_check_inode_size - check if inode size is correct. * @c: UBIFS file-system description object @@ -3296,7 +3281,7 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, if (!S_ISREG(inode->i_mode)) return 0; - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; @@ -3329,16 +3314,15 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, out_dump: block = key_block(c, key); - ubifs_err("inode %lu has size %lld, but there are data at offset %lld " - "(data key %s)", (unsigned long)inode->i_ino, size, - ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key)); - dbg_dump_inode(c, inode); - dbg_dump_stack(); - err = -EINVAL; + ubifs_err("inode %lu has size %lld, but there are data at offset %lld", + (unsigned long)inode->i_ino, size, + ((loff_t)block) << UBIFS_BLOCK_SHIFT); + mutex_unlock(&c->tnc_mutex); + ubifs_dump_inode(c, inode); + dump_stack(); + return -EINVAL; out_unlock: mutex_unlock(&c->tnc_mutex); return err; } - -#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 53288e5d604..3600994f841 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -22,6 +22,7 @@ /* This file implements TNC functions for committing */ +#include <linux/random.h> #include "ubifs.h" /** @@ -53,18 +54,16 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx, br->len = cpu_to_le32(zbr->len); if (!zbr->lnum || !zbr->len) { ubifs_err("bad ref in znode"); - dbg_dump_znode(c, znode); + ubifs_dump_znode(c, znode); if (zbr->znode) - dbg_dump_znode(c, zbr->znode); + ubifs_dump_znode(c, zbr->znode); } } ubifs_prepare_node(c, idx, len, 0); -#ifdef CONFIG_UBIFS_FS_DEBUG znode->lnum = lnum; znode->offs = offs; znode->len = len; -#endif err = insert_old_idx_znode(c, znode); @@ -87,8 +86,12 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx, atomic_long_dec(&c->dirty_zn_cnt); ubifs_assert(ubifs_zn_dirty(znode)); - ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); + ubifs_assert(ubifs_zn_cow(znode)); + /* + * Note, unlike 'write_index()' we do not add memory barriers here + * because this function is called with @c->tnc_mutex locked. + */ __clear_bit(DIRTY_ZNODE, &znode->flags); __clear_bit(COW_ZNODE, &znode->flags); @@ -317,8 +320,7 @@ static int layout_leb_in_gaps(struct ubifs_info *c, int *p) 0, 0, 0); if (err) return err; - err = ubifs_leb_change(c, lnum, c->ileb_buf, c->ileb_len, - UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum, c->ileb_buf, c->ileb_len); if (err) return err; dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written); @@ -377,16 +379,14 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) c->gap_lebs = NULL; return err; } - if (!dbg_force_in_the_gaps_enabled) { + if (!dbg_is_chk_index(c)) { /* * Do not print scary warnings if the debugging * option which forces in-the-gaps is enabled. */ - ubifs_err("out of space"); - spin_lock(&c->space_lock); - dbg_dump_budg(c); - spin_unlock(&c->space_lock); - dbg_dump_lprops(c); + ubifs_warn("out of space"); + ubifs_dump_budg(c, &c->bi); + ubifs_dump_lprops(c); } /* Try to commit anyway */ err = 0; @@ -453,11 +453,9 @@ static int layout_in_empty_space(struct ubifs_info *c) offs = buf_offs + used; -#ifdef CONFIG_UBIFS_FS_DEBUG znode->lnum = lnum; znode->offs = offs; znode->len = len; -#endif /* Update the parent */ zp = znode->parent; @@ -493,25 +491,6 @@ static int layout_in_empty_space(struct ubifs_info *c) else next_len = ubifs_idx_node_sz(c, cnext->child_cnt); - if (c->min_io_size == 1) { - buf_offs += ALIGN(len, 8); - if (next_len) { - if (buf_offs + next_len <= c->leb_size) - continue; - err = ubifs_update_one_lp(c, lnum, 0, - c->leb_size - buf_offs, 0, 0); - if (err) - return err; - lnum = -1; - continue; - } - err = ubifs_update_one_lp(c, lnum, - c->leb_size - buf_offs, 0, 0, 0); - if (err) - return err; - break; - } - /* Update buffer positions */ wlen = used + len; used += ALIGN(len, 8); @@ -552,10 +531,8 @@ static int layout_in_empty_space(struct ubifs_info *c) break; } -#ifdef CONFIG_UBIFS_FS_DEBUG c->dbg->new_ihead_lnum = lnum; c->dbg->new_ihead_offs = buf_offs; -#endif return 0; } @@ -660,7 +637,7 @@ static int get_znodes_to_commit(struct ubifs_info *c) } cnt += 1; while (1) { - ubifs_assert(!test_bit(COW_ZNODE, &znode->flags)); + ubifs_assert(!ubifs_zn_cow(znode)); __set_bit(COW_ZNODE, &znode->flags); znode->alt = 0; cnext = find_next_dirty(znode); @@ -706,7 +683,7 @@ static int alloc_idx_lebs(struct ubifs_info *c, int cnt) c->ilebs[c->ileb_cnt++] = lnum; dbg_cmt("LEB %d", lnum); } - if (dbg_force_in_the_gaps()) + if (dbg_is_chk_index(c) && !(prandom_u32() & 7)) return -ENOSPC; return 0; } @@ -796,16 +773,16 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot) spin_lock(&c->space_lock); /* * Although we have not finished committing yet, update size of the - * committed index ('c->old_idx_sz') and zero out the index growth + * committed index ('c->bi.old_idx_sz') and zero out the index growth * budget. It is OK to do this now, because we've reserved all the * space which is needed to commit the index, and it is save for the * budgeting subsystem to assume the index is already committed, * even though it is not. */ - ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); - c->old_idx_sz = c->calc_idx_sz; - c->budg_uncommitted_idx = 0; - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c)); + c->bi.old_idx_sz = c->calc_idx_sz; + c->bi.uncommitted_idx = 0; + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); spin_unlock(&c->space_lock); mutex_unlock(&c->tnc_mutex); @@ -832,7 +809,7 @@ static int write_index(struct ubifs_info *c) struct ubifs_idx_node *idx; struct ubifs_znode *znode, *cnext; int i, lnum, offs, len, next_len, buf_len, buf_offs, used; - int avail, wlen, err, lnum_pos = 0; + int avail, wlen, err, lnum_pos = 0, blen, nxt_offs; cnext = c->enext; if (!cnext) @@ -880,9 +857,9 @@ static int write_index(struct ubifs_info *c) br->len = cpu_to_le32(zbr->len); if (!zbr->lnum || !zbr->len) { ubifs_err("bad ref in znode"); - dbg_dump_znode(c, znode); + ubifs_dump_znode(c, znode); if (zbr->znode) - dbg_dump_znode(c, zbr->znode); + ubifs_dump_znode(c, zbr->znode); } } len = ubifs_idx_node_sz(c, znode->child_cnt); @@ -897,19 +874,17 @@ static int write_index(struct ubifs_info *c) } offs = buf_offs + used; -#ifdef CONFIG_UBIFS_FS_DEBUG if (lnum != znode->lnum || offs != znode->offs || len != znode->len) { ubifs_err("inconsistent znode posn"); return -EINVAL; } -#endif /* Grab some stuff from znode while we still can */ cnext = znode->cnext; ubifs_assert(ubifs_zn_dirty(znode)); - ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); + ubifs_assert(ubifs_zn_cow(znode)); /* * It is important that other threads should see %DIRTY_ZNODE @@ -920,9 +895,31 @@ static int write_index(struct ubifs_info *c) * the reason for the second barrier. */ clear_bit(DIRTY_ZNODE, &znode->flags); - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(COW_ZNODE, &znode->flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); + + /* + * We have marked the znode as clean but have not updated the + * @c->clean_zn_cnt counter. If this znode becomes dirty again + * before 'free_obsolete_znodes()' is called, then + * @c->clean_zn_cnt will be decremented before it gets + * incremented (resulting in 2 decrements for the same znode). + * This means that @c->clean_zn_cnt may become negative for a + * while. + * + * Q: why we cannot increment @c->clean_zn_cnt? + * A: because we do not have the @c->tnc_mutex locked, and the + * following code would be racy and buggy: + * + * if (!ubifs_zn_obsolete(znode)) { + * atomic_long_inc(&c->clean_zn_cnt); + * atomic_long_inc(&ubifs_clean_zn_cnt); + * } + * + * Thus, we just delay the @c->clean_zn_cnt update until we + * have the mutex locked. + */ /* Do not access znode from this point on */ @@ -940,76 +937,46 @@ static int write_index(struct ubifs_info *c) else next_len = ubifs_idx_node_sz(c, cnext->child_cnt); - if (c->min_io_size == 1) { - /* - * Write the prepared index node immediately if there is - * no minimum IO size - */ - err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, - wlen, UBI_SHORTTERM); - if (err) - return err; - buf_offs += ALIGN(wlen, 8); - if (next_len) { - used = 0; - avail = buf_len; - if (buf_offs + next_len > c->leb_size) { - err = ubifs_update_one_lp(c, lnum, - LPROPS_NC, 0, 0, LPROPS_TAKEN); - if (err) - return err; - lnum = -1; - } + nxt_offs = buf_offs + used + next_len; + if (next_len && nxt_offs <= c->leb_size) { + if (avail > 0) continue; - } + else + blen = buf_len; } else { - int blen, nxt_offs = buf_offs + used + next_len; - - if (next_len && nxt_offs <= c->leb_size) { - if (avail > 0) - continue; - else - blen = buf_len; - } else { - wlen = ALIGN(wlen, 8); - blen = ALIGN(wlen, c->min_io_size); - ubifs_pad(c, c->cbuf + wlen, blen - wlen); - } - /* - * The buffer is full or there are no more znodes - * to do - */ - err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, - blen, UBI_SHORTTERM); - if (err) - return err; - buf_offs += blen; - if (next_len) { - if (nxt_offs > c->leb_size) { - err = ubifs_update_one_lp(c, lnum, - LPROPS_NC, 0, 0, LPROPS_TAKEN); - if (err) - return err; - lnum = -1; - } - used -= blen; - if (used < 0) - used = 0; - avail = buf_len - used; - memmove(c->cbuf, c->cbuf + blen, used); - continue; + wlen = ALIGN(wlen, 8); + blen = ALIGN(wlen, c->min_io_size); + ubifs_pad(c, c->cbuf + wlen, blen - wlen); + } + + /* The buffer is full or there are no more znodes to do */ + err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, blen); + if (err) + return err; + buf_offs += blen; + if (next_len) { + if (nxt_offs > c->leb_size) { + err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0, + 0, LPROPS_TAKEN); + if (err) + return err; + lnum = -1; } + used -= blen; + if (used < 0) + used = 0; + avail = buf_len - used; + memmove(c->cbuf, c->cbuf + blen, used); + continue; } break; } -#ifdef CONFIG_UBIFS_FS_DEBUG if (lnum != c->dbg->new_ihead_lnum || buf_offs != c->dbg->new_ihead_offs) { ubifs_err("inconsistent ihead"); return -EINVAL; } -#endif c->ihead_lnum = lnum; c->ihead_offs = buf_offs; @@ -1031,7 +998,7 @@ static void free_obsolete_znodes(struct ubifs_info *c) do { znode = cnext; cnext = znode->cnext; - if (test_bit(OBSOLETE_ZNODE, &znode->flags)) + if (ubifs_zn_obsolete(znode)) kfree(znode); else { znode->cnext = NULL; diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c index b48db999903..f6bf8995c7b 100644 --- a/fs/ubifs/tnc_misc.c +++ b/fs/ubifs/tnc_misc.c @@ -293,10 +293,10 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, lnum, offs, znode->level, znode->child_cnt); if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) { - dbg_err("current fanout %d, branch count %d", - c->fanout, znode->child_cnt); - dbg_err("max levels %d, znode level %d", - UBIFS_MAX_LEVELS, znode->level); + ubifs_err("current fanout %d, branch count %d", + c->fanout, znode->child_cnt); + ubifs_err("max levels %d, znode level %d", + UBIFS_MAX_LEVELS, znode->level); err = 1; goto out_dump; } @@ -316,7 +316,7 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, if (zbr->lnum < c->main_first || zbr->lnum >= c->leb_cnt || zbr->offs < 0 || zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) { - dbg_err("bad branch %d", i); + ubifs_err("bad branch %d", i); err = 2; goto out_dump; } @@ -328,8 +328,8 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, case UBIFS_XENT_KEY: break; default: - dbg_msg("bad key type at slot %d: %s", i, - DBGKEY(&zbr->key)); + ubifs_err("bad key type at slot %d: %d", + i, key_type(c, &zbr->key)); err = 3; goto out_dump; } @@ -340,19 +340,19 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, type = key_type(c, &zbr->key); if (c->ranges[type].max_len == 0) { if (zbr->len != c->ranges[type].len) { - dbg_err("bad target node (type %d) length (%d)", - type, zbr->len); - dbg_err("have to be %d", c->ranges[type].len); + ubifs_err("bad target node (type %d) length (%d)", + type, zbr->len); + ubifs_err("have to be %d", c->ranges[type].len); err = 4; goto out_dump; } } else if (zbr->len < c->ranges[type].min_len || zbr->len > c->ranges[type].max_len) { - dbg_err("bad target node (type %d) length (%d)", - type, zbr->len); - dbg_err("have to be in range of %d-%d", - c->ranges[type].min_len, - c->ranges[type].max_len); + ubifs_err("bad target node (type %d) length (%d)", + type, zbr->len); + ubifs_err("have to be in range of %d-%d", + c->ranges[type].min_len, + c->ranges[type].max_len); err = 5; goto out_dump; } @@ -370,13 +370,13 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, cmp = keys_cmp(c, key1, key2); if (cmp > 0) { - dbg_err("bad key order (keys %d and %d)", i, i + 1); + ubifs_err("bad key order (keys %d and %d)", i, i + 1); err = 6; goto out_dump; } else if (cmp == 0 && !is_hash_key(c, key1)) { /* These can only be keys with colliding hash */ - dbg_err("keys %d and %d are not hashed but equivalent", - i, i + 1); + ubifs_err("keys %d and %d are not hashed but equivalent", + i, i + 1); err = 7; goto out_dump; } @@ -387,7 +387,7 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, out_dump: ubifs_err("bad indexing node at LEB %d:%d, error %d", lnum, offs, err); - dbg_dump_node(c, idx); + ubifs_dump_node(c, idx); kfree(idx); return -EINVAL; } @@ -475,7 +475,7 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, zbr->offs); if (err) { - dbg_tnc("key %s", DBGKEY(key)); + dbg_tnck(key, "key "); return err; } @@ -484,9 +484,9 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, if (!keys_eq(c, key, &key1)) { ubifs_err("bad key in node at LEB %d:%d", zbr->lnum, zbr->offs); - dbg_tnc("looked for key %s found node's key %s", - DBGKEY(key), DBGKEY1(&key1)); - dbg_dump_node(c, node); + dbg_tnck(key, "looked for key "); + dbg_tnck(&key1, "but found node's key "); + ubifs_dump_node(c, node); return -EINVAL; } diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index 191ca7863fe..e24380cf46e 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h @@ -408,9 +408,11 @@ enum { * Superblock flags. * * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set + * UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed */ enum { UBIFS_FLG_BIGLPT = 0x02, + UBIFS_FLG_SPACE_FIXUP = 0x04, }; /** @@ -434,7 +436,7 @@ struct ubifs_ch { __u8 node_type; __u8 group_type; __u8 padding[2]; -} __attribute__ ((packed)); +} __packed; /** * union ubifs_dev_desc - device node descriptor. @@ -448,7 +450,7 @@ struct ubifs_ch { union ubifs_dev_desc { __le32 new; __le64 huge; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_ino_node - inode node. @@ -509,7 +511,7 @@ struct ubifs_ino_node { __le16 compr_type; __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ __u8 data[]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_dent_node - directory entry node. @@ -534,7 +536,7 @@ struct ubifs_dent_node { __le16 nlen; __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ __u8 name[]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_data_node - data node. @@ -555,7 +557,7 @@ struct ubifs_data_node { __le16 compr_type; __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ __u8 data[]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_trun_node - truncation node. @@ -575,7 +577,7 @@ struct ubifs_trun_node { __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ __le64 old_size; __le64 new_size; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_pad_node - padding node. @@ -586,7 +588,7 @@ struct ubifs_trun_node { struct ubifs_pad_node { struct ubifs_ch ch; __le32 pad_len; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_sb_node - superblock node. @@ -644,7 +646,7 @@ struct ubifs_sb_node { __u8 uuid[16]; __le32 ro_compat_version; __u8 padding2[3968]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_mst_node - master node. @@ -711,7 +713,7 @@ struct ubifs_mst_node { __le32 idx_lebs; __le32 leb_cnt; __u8 padding[344]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_ref_node - logical eraseblock reference node. @@ -727,7 +729,7 @@ struct ubifs_ref_node { __le32 offs; __le32 jhead; __u8 padding[28]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_branch - key/reference/length branch @@ -741,7 +743,7 @@ struct ubifs_branch { __le32 offs; __le32 len; __u8 key[]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_idx_node - indexing node. @@ -755,7 +757,7 @@ struct ubifs_idx_node { __le16 child_cnt; __le16 level; __u8 branches[]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_cs_node - commit start node. @@ -765,7 +767,7 @@ struct ubifs_idx_node { struct ubifs_cs_node { struct ubifs_ch ch; __le64 cmt_no; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_orph_node - orphan node. @@ -777,6 +779,6 @@ struct ubifs_orph_node { struct ubifs_ch ch; __le64 cmt_no; __le64 inos[]; -} __attribute__ ((packed)); +} __packed; #endif /* __UBIFS_MEDIA_H__ */ diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 381d6b207a5..c1f71fe17cc 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -42,16 +42,24 @@ #define UBIFS_VERSION 1 /* Normal UBIFS messages */ -#define ubifs_msg(fmt, ...) \ - printk(KERN_NOTICE "UBIFS: " fmt "\n", ##__VA_ARGS__) +#define ubifs_msg(fmt, ...) pr_notice("UBIFS: " fmt "\n", ##__VA_ARGS__) /* UBIFS error messages */ -#define ubifs_err(fmt, ...) \ - printk(KERN_ERR "UBIFS error (pid %d): %s: " fmt "\n", current->pid, \ +#define ubifs_err(fmt, ...) \ + pr_err("UBIFS error (pid %d): %s: " fmt "\n", current->pid, \ __func__, ##__VA_ARGS__) /* UBIFS warning messages */ -#define ubifs_warn(fmt, ...) \ - printk(KERN_WARNING "UBIFS warning (pid %d): %s: " fmt "\n", \ - current->pid, __func__, ##__VA_ARGS__) +#define ubifs_warn(fmt, ...) \ + pr_warn("UBIFS warning (pid %d): %s: " fmt "\n", \ + current->pid, __func__, ##__VA_ARGS__) +/* + * A variant of 'ubifs_err()' which takes the UBIFS file-sytem description + * object as an argument. + */ +#define ubifs_errc(c, fmt, ...) \ + do { \ + if (!(c)->probing) \ + ubifs_err(fmt, ##__VA_ARGS__); \ + } while (0) /* UBIFS file system VFS magic number */ #define UBIFS_SUPER_MAGIC 0x24051905 @@ -84,9 +92,6 @@ #define INUM_WARN_WATERMARK 0xFFF00000 #define INUM_WATERMARK 0xFFFFFF00 -/* Largest key size supported in this implementation */ -#define CUR_MAX_KEY_LEN UBIFS_SK_LEN - /* Maximum number of entries in each LPT (LEB category) heap */ #define LPT_HEAP_SZ 256 @@ -151,6 +156,12 @@ */ #define WORST_COMPR_FACTOR 2 +/* + * How much memory is needed for a buffer where we comress a data node. + */ +#define COMPRESSED_DATA_NODE_BUF_SZ \ + (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR) + /* Maximum expected tree height for use by bottom_up_buf */ #define BOTTOM_UP_HEIGHT 64 @@ -224,14 +235,14 @@ enum { * LPT cnode flag bits. * * DIRTY_CNODE: cnode is dirty - * COW_CNODE: cnode is being committed and must be copied before writing * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted), - * so it can (and must) be freed when the commit is finished + * so it can (and must) be freed when the commit is finished + * COW_CNODE: cnode is being committed and must be copied before writing */ enum { DIRTY_CNODE = 0, - COW_CNODE = 1, - OBSOLETE_CNODE = 2, + OBSOLETE_CNODE = 1, + COW_CNODE = 2, }; /* @@ -271,10 +282,10 @@ struct ubifs_old_idx { /* The below union makes it easier to deal with keys */ union ubifs_key { - uint8_t u8[CUR_MAX_KEY_LEN]; - uint32_t u32[CUR_MAX_KEY_LEN/4]; - uint64_t u64[CUR_MAX_KEY_LEN/8]; - __le32 j32[CUR_MAX_KEY_LEN/4]; + uint8_t u8[UBIFS_SK_LEN]; + uint32_t u32[UBIFS_SK_LEN/4]; + uint64_t u64[UBIFS_SK_LEN/8]; + __le32 j32[UBIFS_SK_LEN/4]; }; /** @@ -383,9 +394,9 @@ struct ubifs_gced_idx_leb { * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot * make sure @inode->i_size is always changed under @ui_mutex, because it - * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock - * with 'ubifs_writepage()' (see file.c). All the other inode fields are - * changed under @ui_mutex, so they do not need "shadow" fields. Note, one + * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would + * deadlock with 'ubifs_writepage()' (see file.c). All the other inode fields + * are changed under @ui_mutex, so they do not need "shadow" fields. Note, one * could consider to rework locking and base it on "shadow" fields. */ struct ubifs_inode { @@ -646,8 +657,7 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c, * @offs: write-buffer offset in this logical eraseblock * @avail: number of bytes available in the write-buffer * @used: number of used bytes in the write-buffer - * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM, - * %UBI_UNKNOWN) + * @size: write-buffer size (in [@c->min_io_size, @c->max_write_size] range) * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep * up by 'mutex_lock_nested()). * @sync_callback: write-buffer synchronization callback @@ -680,7 +690,7 @@ struct ubifs_wbuf { int offs; int avail; int used; - int dtype; + int size; int jhead; int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad); struct mutex io_mutex; @@ -714,12 +724,14 @@ struct ubifs_bud { * struct ubifs_jhead - journal head. * @wbuf: head's write-buffer * @buds_list: list of bud LEBs belonging to this journal head + * @grouped: non-zero if UBIFS groups nodes when writing to this journal head * * Note, the @buds list is protected by the @c->buds_lock. */ struct ubifs_jhead { struct ubifs_wbuf wbuf; struct list_head buds_list; + unsigned int grouped:1; }; /** @@ -755,6 +767,9 @@ struct ubifs_zbranch { * @offs: offset of the corresponding indexing node * @len: length of the corresponding indexing node * @zbranch: array of znode branches (@c->fanout elements) + * + * Note! The @lnum, @offs, and @len fields are not really needed - we have them + * only for internal consistency check. They could be removed to save some RAM. */ struct ubifs_znode { struct ubifs_znode *parent; @@ -765,9 +780,9 @@ struct ubifs_znode { int child_cnt; int iip; int alt; -#ifdef CONFIG_UBIFS_FS_DEBUG - int lnum, offs, len; -#endif + int lnum; + int offs; + int len; struct ubifs_zbranch zbranch[]; }; @@ -898,6 +913,8 @@ struct ubifs_budget_req { * @dnext: next orphan to delete * @inum: inode number * @new: %1 => added since the last commit, otherwise %0 + * @cmt: %1 => commit pending, otherwise %0 + * @del: %1 => delete pending, otherwise %0 */ struct ubifs_orphan { struct rb_node rb; @@ -906,7 +923,9 @@ struct ubifs_orphan { struct ubifs_orphan *cnext; struct ubifs_orphan *dnext; ino_t inum; - int new; + unsigned new:1; + unsigned cmt:1; + unsigned del:1; }; /** @@ -929,6 +948,40 @@ struct ubifs_mount_opts { unsigned int compr_type:2; }; +/** + * struct ubifs_budg_info - UBIFS budgeting information. + * @idx_growth: amount of bytes budgeted for index growth + * @data_growth: amount of bytes budgeted for cached data + * @dd_growth: amount of bytes budgeted for cached data that will make + * other data dirty + * @uncommitted_idx: amount of bytes were budgeted for growth of the index, but + * which still have to be taken into account because the index + * has not been committed so far + * @old_idx_sz: size of index on flash + * @min_idx_lebs: minimum number of LEBs required for the index + * @nospace: non-zero if the file-system does not have flash space (used as + * optimization) + * @nospace_rp: the same as @nospace, but additionally means that even reserved + * pool is full + * @page_budget: budget for a page (constant, nenver changed after mount) + * @inode_budget: budget for an inode (constant, nenver changed after mount) + * @dent_budget: budget for a directory entry (constant, nenver changed after + * mount) + */ +struct ubifs_budg_info { + long long idx_growth; + long long data_growth; + long long dd_growth; + long long uncommitted_idx; + unsigned long long old_idx_sz; + int min_idx_lebs; + unsigned int nospace:1; + unsigned int nospace_rp:1; + int page_budget; + int inode_budget; + int dent_budget; +}; + struct ubifs_debug_info; /** @@ -972,6 +1025,7 @@ struct ubifs_debug_info; * @cmt_wq: wait queue to sleep on if the log is full and a commit is running * * @big_lpt: flag that LPT is too big to write whole during commit + * @space_fixup: flag indicating that free space in LEBs needs to be cleaned up * @no_chk_data_crc: do not check CRCs when reading data nodes (except during * recovery) * @bulk_read: enable bulk-reads @@ -1003,6 +1057,11 @@ struct ubifs_debug_info; * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu * @bu: pre-allocated bulk-read information * + * @write_reserve_mutex: protects @write_reserve_buf + * @write_reserve_buf: on the write path we allocate memory, which might + * sometimes be unavailable, in which case we use this + * write reserve buffer + * * @log_lebs: number of logical eraseblocks in the log * @log_bytes: log size in bytes * @log_last: last LEB of the log @@ -1024,7 +1083,12 @@ struct ubifs_debug_info; * * @min_io_size: minimal input/output unit size * @min_io_shift: number of bits in @min_io_size minus one + * @max_write_size: maximum amount of bytes the underlying flash can write at a + * time (MTD write buffer size) + * @max_write_shift: number of bits in @max_write_size minus one * @leb_size: logical eraseblock size in bytes + * @leb_start: starting offset of logical eraseblocks within physical + * eraseblocks * @half_leb_size: half LEB size * @idx_leb_size: how many bytes of an LEB are effectively available when it is * used to store indexing nodes (@leb_size - @max_idx_node_sz) @@ -1039,32 +1103,14 @@ struct ubifs_debug_info; * @dirty_zn_cnt: number of dirty znodes * @clean_zn_cnt: number of clean znodes * - * @budg_idx_growth: amount of bytes budgeted for index growth - * @budg_data_growth: amount of bytes budgeted for cached data - * @budg_dd_growth: amount of bytes budgeted for cached data that will make - * other data dirty - * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index, - * but which still have to be taken into account because - * the index has not been committed so far - * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, - * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst, - * @nospace, and @nospace_rp; - * @min_idx_lebs: minimum number of LEBs required for the index - * @old_idx_sz: size of index on flash + * @space_lock: protects @bi and @lst + * @lst: lprops statistics + * @bi: budgeting information * @calc_idx_sz: temporary variable which is used to calculate new index size * (contains accurate new index size at end of TNC commit start) - * @lst: lprops statistics - * @nospace: non-zero if the file-system does not have flash space (used as - * optimization) - * @nospace_rp: the same as @nospace, but additionally means that even reserved - * pool is full - * - * @page_budget: budget for a page - * @inode_budget: budget for an inode - * @dent_budget: budget for a directory entry * * @ref_node_alsz: size of the LEB reference node aligned to the min. flash - * I/O unit + * I/O unit * @mst_node_alsz: master node aligned size * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary @@ -1150,6 +1196,8 @@ struct ubifs_debug_info; * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size) * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size) * @freeable_cnt: number of freeable LEBs in @freeable_list + * @in_a_category_cnt: count of lprops which are in a certain category, which + * basically meants that they were loaded from the flash * * @ltab_lnum: LEB number of LPT's own lprops table * @ltab_offs: offset of LPT's own lprops table @@ -1166,22 +1214,21 @@ struct ubifs_debug_info; * @rp_uid: reserved pool user ID * @rp_gid: reserved pool group ID * - * @empty: if the UBI device is empty - * @replay_tree: temporary tree used during journal replay + * @empty: %1 if the UBI device is empty + * @need_recovery: %1 if the file-system needs recovery + * @replaying: %1 during journal replay + * @mounting: %1 while mounting + * @probing: %1 while attempting to mount if MS_SILENT mount flag is set + * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode * @replay_list: temporary list used during journal replay * @replay_buds: list of buds to replay * @cs_sqnum: sequence number of first node in the log (commit start node) * @replay_sqnum: sequence number of node currently being replayed - * @need_recovery: file-system needs recovery - * @replaying: set to %1 during journal replay * @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W * mode * @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted * FS to R/W mode * @size_tree: inode size information for recovery - * @remounting_rw: set while re-mounting from R/O mode to R/W mode - * @always_chk_crc: always check CRCs (while mounting and remounting to R/W - * mode) * @mount_opts: UBIFS-specific mount options * * @dbg: debugging-related information @@ -1221,6 +1268,7 @@ struct ubifs_info { wait_queue_head_t cmt_wq; unsigned int big_lpt:1; + unsigned int space_fixup:1; unsigned int no_chk_data_crc:1; unsigned int bulk_read:1; unsigned int default_compr:2; @@ -1250,6 +1298,9 @@ struct ubifs_info { struct mutex bu_mutex; struct bu_info bu; + struct mutex write_reserve_mutex; + void *write_reserve_buf; + int log_lebs; long long log_bytes; int log_last; @@ -1271,7 +1322,10 @@ struct ubifs_info { int min_io_size; int min_io_shift; + int max_write_size; + int max_write_shift; int leb_size; + int leb_start; int half_leb_size; int idx_leb_size; int leb_cnt; @@ -1285,21 +1339,10 @@ struct ubifs_info { atomic_long_t dirty_zn_cnt; atomic_long_t clean_zn_cnt; - long long budg_idx_growth; - long long budg_data_growth; - long long budg_dd_growth; - long long budg_uncommitted_idx; spinlock_t space_lock; - int min_idx_lebs; - unsigned long long old_idx_sz; - unsigned long long calc_idx_sz; struct ubifs_lp_stats lst; - unsigned int nospace:1; - unsigned int nospace_rp:1; - - int page_budget; - int inode_budget; - int dent_budget; + struct ubifs_budg_info bi; + unsigned long long calc_idx_sz; int ref_node_alsz; int mst_node_alsz; @@ -1385,6 +1428,7 @@ struct ubifs_info { struct list_head freeable_list; struct list_head frdi_idx_list; int freeable_cnt; + int in_a_category_cnt; int ltab_lnum; int ltab_offs; @@ -1398,28 +1442,26 @@ struct ubifs_info { long long rp_size; long long report_rp_size; - uid_t rp_uid; - gid_t rp_gid; + kuid_t rp_uid; + kgid_t rp_gid; /* The below fields are used only during mounting and re-mounting */ - int empty; - struct rb_root replay_tree; + unsigned int empty:1; + unsigned int need_recovery:1; + unsigned int replaying:1; + unsigned int mounting:1; + unsigned int remounting_rw:1; + unsigned int probing:1; struct list_head replay_list; struct list_head replay_buds; unsigned long long cs_sqnum; unsigned long long replay_sqnum; - int need_recovery; - int replaying; struct list_head unclean_leb_list; struct ubifs_mst_node *rcvrd_mst_node; struct rb_root size_tree; - int remounting_rw; - int always_chk_crc; struct ubifs_mount_opts mount_opts; -#ifdef CONFIG_UBIFS_FS_DEBUG struct ubifs_debug_info *dbg; -#endif }; extern struct list_head ubifs_infos; @@ -1438,16 +1480,23 @@ extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; /* io.c */ void ubifs_ro_mode(struct ubifs_info *c, int err); +int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, + int len, int even_ebadmsg); +int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, + int len); +int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len); +int ubifs_leb_unmap(struct ubifs_info *c, int lnum); +int ubifs_leb_map(struct ubifs_info *c, int lnum); +int ubifs_is_mapped(const struct ubifs_info *c, int lnum); int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); -int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, - int dtype); +int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs); int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf); int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, int lnum, int offs); int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, int lnum, int offs); int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, - int offs, int dtype); + int offs); int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, int offs, int quiet, int must_chk_crc); void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); @@ -1586,7 +1635,10 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); int ubifs_tnc_end_commit(struct ubifs_info *c); /* shrinker.c */ -int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); +unsigned long ubifs_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc); +unsigned long ubifs_shrink_count(struct shrinker *shrink, + struct shrink_control *sc); /* commit.c */ int ubifs_bg_thread(void *info); @@ -1605,6 +1657,7 @@ int ubifs_write_master(struct ubifs_info *c); int ubifs_read_superblock(struct ubifs_info *c); struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); +int ubifs_fixup_free_space(struct ubifs_info *c); /* replay.c */ int ubifs_validate_entry(struct ubifs_info *c, @@ -1689,12 +1742,12 @@ const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c); int ubifs_calc_dark(const struct ubifs_info *c, int spc); /* file.c */ -int ubifs_fsync(struct file *file, int datasync); +int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync); int ubifs_setattr(struct dentry *dentry, struct iattr *attr); /* dir.c */ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, - int mode); + umode_t mode); int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); @@ -1713,11 +1766,11 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum); int ubifs_recover_master_node(struct ubifs_info *c); int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, - int offs, void *sbuf, int grouped); + int offs, void *sbuf, int jhead); struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf); -int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); -int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf); +int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf); +int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf); int ubifs_rcvry_gc_commit(struct ubifs_info *c); int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, int deletion, loff_t new_size); diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index c74400f88fe..5e0a63b1b0d 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -56,6 +56,7 @@ */ #include "ubifs.h" +#include <linux/fs.h> #include <linux/slab.h> #include <linux/xattr.h> #include <linux/posix_acl_xattr.h> @@ -79,9 +80,8 @@ enum { SECURITY_XATTR, }; -static const struct inode_operations none_inode_operations; -static const struct address_space_operations none_address_operations; -static const struct file_operations none_file_operations; +static const struct inode_operations empty_iops; +static const struct file_operations empty_fops; /** * create_xattr - create an extended attribute. @@ -130,20 +130,19 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, } /* Re-define all operations to be "nothing" */ - inode->i_mapping->a_ops = &none_address_operations; - inode->i_op = &none_inode_operations; - inode->i_fop = &none_file_operations; + inode->i_mapping->a_ops = &empty_aops; + inode->i_op = &empty_iops; + inode->i_fop = &empty_fops; inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA; ui = ubifs_inode(inode); ui->xattr = 1; ui->flags |= UBIFS_XATTR_FL; - ui->data = kmalloc(size, GFP_NOFS); + ui->data = kmemdup(value, size, GFP_NOFS); if (!ui->data) { err = -ENOMEM; goto out_free; } - memcpy(ui->data, value, size); inode->i_size = ui->ui_size = size; ui->data_len = size; @@ -204,12 +203,11 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, return err; kfree(ui->data); - ui->data = kmalloc(size, GFP_NOFS); + ui->data = kmemdup(value, size, GFP_NOFS); if (!ui->data) { err = -ENOMEM; goto out_free; } - memcpy(ui->data, value, size); inode->i_size = ui->ui_size = size; ui->data_len = size; @@ -300,13 +298,13 @@ int ubifs_setxattr(struct dentry *dentry, const char *name, { struct inode *inode, *host = dentry->d_inode; struct ubifs_info *c = host->i_sb->s_fs_info; - struct qstr nm = { .name = name, .len = strlen(name) }; + struct qstr nm = QSTR_INIT(name, strlen(name)); struct ubifs_dent_node *xent; union ubifs_key key; int err, type; - dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name, - host->i_ino, dentry->d_name.len, dentry->d_name.name, size); + dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd", name, + host->i_ino, dentry, size); ubifs_assert(mutex_is_locked(&host->i_mutex)); if (size > UBIFS_MAX_INO_DATA) @@ -363,14 +361,14 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, { struct inode *inode, *host = dentry->d_inode; struct ubifs_info *c = host->i_sb->s_fs_info; - struct qstr nm = { .name = name, .len = strlen(name) }; + struct qstr nm = QSTR_INIT(name, strlen(name)); struct ubifs_inode *ui; struct ubifs_dent_node *xent; union ubifs_key key; int err; - dbg_gen("xattr '%s', ino %lu ('%.*s'), buf size %zd", name, - host->i_ino, dentry->d_name.len, dentry->d_name.name, size); + dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name, + host->i_ino, dentry, size); err = check_namespace(&nm); if (err < 0) @@ -401,8 +399,8 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, if (buf) { /* If @buf is %NULL we are supposed to return the length */ if (ui->data_len > size) { - dbg_err("buffer size %zd, xattr len %d", - size, ui->data_len); + ubifs_err("buffer size %zd, xattr len %d", + size, ui->data_len); err = -ERANGE; goto out_iput; } @@ -428,8 +426,8 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) int err, len, written = 0; struct qstr nm = { .name = NULL }; - dbg_gen("ino %lu ('%.*s'), buffer size %zd", host->i_ino, - dentry->d_name.len, dentry->d_name.name, size); + dbg_gen("ino %lu ('%pd'), buffer size %zd", host->i_ino, + dentry, size); len = host_ui->xattr_names + host_ui->xattr_cnt; if (!buffer) @@ -526,13 +524,13 @@ int ubifs_removexattr(struct dentry *dentry, const char *name) { struct inode *inode, *host = dentry->d_inode; struct ubifs_info *c = host->i_sb->s_fs_info; - struct qstr nm = { .name = name, .len = strlen(name) }; + struct qstr nm = QSTR_INIT(name, strlen(name)); struct ubifs_dent_node *xent; union ubifs_key key; int err; - dbg_gen("xattr '%s', ino %lu ('%.*s')", name, - host->i_ino, dentry->d_name.len, dentry->d_name.name); + dbg_gen("xattr '%s', ino %lu ('%pd')", name, + host->i_ino, dentry); ubifs_assert(mutex_is_locked(&host->i_mutex)); err = check_namespace(&nm); @@ -558,10 +556,10 @@ int ubifs_removexattr(struct dentry *dentry, const char *name) } ubifs_assert(inode->i_nlink == 1); - inode->i_nlink = 0; + clear_nlink(inode); err = remove_xattr(c, host, inode, &nm); if (err) - inode->i_nlink = 1; + set_nlink(inode, 1); /* If @i_nlink is 0, 'iput()' will delete the inode */ iput(inode); |
