aboutsummaryrefslogtreecommitdiff
path: root/fs/ubifs/journal.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ubifs/journal.c')
-rw-r--r--fs/ubifs/journal.c224
1 files changed, 149 insertions, 75 deletions
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 283155abe5f..0e045e75abd 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -114,7 +114,7 @@ static inline void zero_trun_node_unused(struct ubifs_trun_node *trun)
*/
static int reserve_space(struct ubifs_info *c, int jhead, int len)
{
- int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze;
+ int err = 0, err1, retries = 0, avail, lnum, offs, squeeze;
struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
/*
@@ -122,11 +122,12 @@ static int reserve_space(struct ubifs_info *c, int jhead, int len)
* better to try to allocate space at the ends of eraseblocks. This is
* what the squeeze parameter does.
*/
+ ubifs_assert(!c->ro_media && !c->ro_mount);
squeeze = (jhead == BASEHD);
again:
mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
- if (c->ro_media) {
+ if (c->ro_error) {
err = -EROFS;
goto out_unlock;
}
@@ -139,16 +140,9 @@ again:
* Write buffer wasn't seek'ed or there is no enough space - look for an
* LEB with some empty space.
*/
- lnum = ubifs_find_free_space(c, len, &free, squeeze);
- if (lnum >= 0) {
- /* Found an LEB, add it to the journal head */
- offs = c->leb_size - free;
- err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
- if (err)
- goto out_return;
- /* A new bud was successfully allocated and added to the log */
+ lnum = ubifs_find_free_space(c, len, &offs, squeeze);
+ if (lnum >= 0)
goto out;
- }
err = lnum;
if (err != -ENOSPC)
@@ -159,7 +153,7 @@ again:
* some. But the write-buffer mutex has to be unlocked because
* GC also takes it.
*/
- dbg_jnl("no free space jhead %d, run GC", jhead);
+ dbg_jnl("no free space in jhead %s, run GC", dbg_jhead(jhead));
mutex_unlock(&wbuf->io_mutex);
lnum = ubifs_garbage_collect(c, 0);
@@ -174,7 +168,8 @@ again:
* because we dropped @wbuf->io_mutex, so try once
* again.
*/
- dbg_jnl("GC couldn't make a free LEB for jhead %d", jhead);
+ dbg_jnl("GC couldn't make a free LEB for jhead %s",
+ dbg_jhead(jhead));
if (retries++ < 2) {
dbg_jnl("retry (%d)", retries);
goto again;
@@ -185,13 +180,13 @@ again:
}
mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
- dbg_jnl("got LEB %d for jhead %d", lnum, jhead);
+ dbg_jnl("got LEB %d for jhead %s", lnum, dbg_jhead(jhead));
avail = c->leb_size - wbuf->offs - wbuf->used;
if (wbuf->lnum != -1 && avail >= len) {
/*
* Someone else has switched the journal head and we have
- * enough space now. This happens when more then one process is
+ * enough space now. This happens when more than one process is
* trying to write to the same journal head at the same time.
*/
dbg_jnl("return LEB %d back, already have LEB %d:%d",
@@ -202,13 +197,24 @@ again:
return 0;
}
- err = ubifs_add_bud_to_log(c, jhead, lnum, 0);
- if (err)
- goto out_return;
offs = 0;
out:
- err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, UBI_SHORTTERM);
+ /*
+ * Make sure we synchronize the write-buffer before we add the new bud
+ * to the log. Otherwise we may have a power cut after the log
+ * reference node for the last bud (@lnum) is written but before the
+ * write-buffer data are written to the next-to-last bud
+ * (@wbuf->lnum). And the effect would be that the recovery would see
+ * that there is corruption in the next-to-last bud.
+ */
+ err = ubifs_wbuf_sync_nolock(wbuf);
+ if (err)
+ goto out_return;
+ err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
+ if (err)
+ goto out_return;
+ err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs);
if (err)
goto out_unlock;
@@ -256,7 +262,8 @@ static int write_node(struct ubifs_info *c, int jhead, void *node, int len,
*lnum = c->jheads[jhead].wbuf.lnum;
*offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
- dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len);
+ dbg_jnl("jhead %s, LEB %d:%d, len %d",
+ dbg_jhead(jhead), *lnum, *offs, len);
ubifs_prepare_node(c, node, len, 0);
return ubifs_wbuf_write_nolock(wbuf, node, len);
@@ -286,7 +293,8 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
*lnum = c->jheads[jhead].wbuf.lnum;
*offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
- dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len);
+ dbg_jnl("jhead %s, LEB %d:%d, len %d",
+ dbg_jhead(jhead), *lnum, *offs, len);
err = ubifs_wbuf_write_nolock(wbuf, buf, len);
if (err)
@@ -377,11 +385,9 @@ out:
if (err == -ENOSPC) {
/* This are some budgeting problems, print useful information */
down_write(&c->commit_sem);
- spin_lock(&c->space_lock);
- dbg_dump_stack();
- dbg_dump_budg(c);
- spin_unlock(&c->space_lock);
- dbg_dump_lprops(c);
+ dump_stack();
+ ubifs_dump_budg(c, &c->bi);
+ ubifs_dump_lprops(c);
cmt_retries = dbg_check_lprops(c);
up_write(&c->commit_sem);
}
@@ -447,13 +453,11 @@ static int get_dent_type(int mode)
* @ino: buffer in which to pack inode node
* @inode: inode to pack
* @last: indicates the last node of the group
- * @last_reference: non-zero if this is a deletion inode
*/
static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino,
- const struct inode *inode, int last,
- int last_reference)
+ const struct inode *inode, int last)
{
- int data_len = 0;
+ int data_len = 0, last_reference = !inode->i_nlink;
struct ubifs_inode *ui = ubifs_inode(inode);
ino->ch.node_type = UBIFS_INO_NODE;
@@ -465,8 +469,8 @@ static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino,
ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
ino->mtime_sec = cpu_to_le64(inode->i_mtime.tv_sec);
ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
- ino->uid = cpu_to_le32(inode->i_uid);
- ino->gid = cpu_to_le32(inode->i_gid);
+ ino->uid = cpu_to_le32(i_uid_read(inode));
+ ino->gid = cpu_to_le32(i_gid_read(inode));
ino->mode = cpu_to_le32(inode->i_mode);
ino->flags = cpu_to_le32(ui->flags);
ino->size = cpu_to_le64(ui->ui_size);
@@ -596,9 +600,9 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
ubifs_prep_grp_node(c, dent, dlen, 0);
ino = (void *)dent + aligned_dlen;
- pack_inode(c, ino, inode, 0, last_reference);
+ pack_inode(c, ino, inode, 0);
ino = (void *)ino + aligned_ilen;
- pack_inode(c, ino, dir, 1, 0);
+ pack_inode(c, ino, dir, 1);
if (last_reference) {
err = ubifs_add_orphan(c, inode->i_ino);
@@ -606,6 +610,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
release_head(c, BASEHD);
goto out_finish;
}
+ ui->del_cmtno = c->cmt_no;
}
err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync);
@@ -664,6 +669,7 @@ out_free:
out_release:
release_head(c, BASEHD);
+ kfree(dent);
out_ro:
ubifs_ro_mode(c, err);
if (last_reference)
@@ -688,23 +694,33 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
{
struct ubifs_data_node *data;
int err, lnum, offs, compr_type, out_len;
- int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR;
+ int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1;
struct ubifs_inode *ui = ubifs_inode(inode);
- dbg_jnl("ino %lu, blk %u, len %d, key %s", key_inum(c, key),
- key_block(c, key), len, DBGKEY(key));
+ dbg_jnlk(key, "ino %lu, blk %u, len %d, key ",
+ (unsigned long)key_inum(c, key), key_block(c, key), len);
ubifs_assert(len <= UBIFS_BLOCK_SIZE);
- data = kmalloc(dlen, GFP_NOFS);
- if (!data)
- return -ENOMEM;
+ data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN);
+ if (!data) {
+ /*
+ * Fall-back to the write reserve buffer. Note, we might be
+ * currently on the memory reclaim path, when the kernel is
+ * trying to free some memory by writing out dirty pages. The
+ * write reserve buffer helps us to guarantee that we are
+ * always able to write the data.
+ */
+ allocated = 0;
+ mutex_lock(&c->write_reserve_mutex);
+ data = c->write_reserve_buf;
+ }
data->ch.node_type = UBIFS_DATA_NODE;
key_write(c, key, &data->key);
data->size = cpu_to_le32(len);
zero_data_node_unused(data);
- if (!(ui->flags && UBIFS_COMPR_FL))
+ if (!(ui->flags & UBIFS_COMPR_FL))
/* Compression is disabled for this inode */
compr_type = UBIFS_COMPR_NONE;
else
@@ -733,7 +749,10 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
goto out_ro;
finish_reservation(c);
- kfree(data);
+ if (!allocated)
+ mutex_unlock(&c->write_reserve_mutex);
+ else
+ kfree(data);
return 0;
out_release:
@@ -742,7 +761,10 @@ out_ro:
ubifs_ro_mode(c, err);
finish_reservation(c);
out_free:
- kfree(data);
+ if (!allocated)
+ mutex_unlock(&c->write_reserve_mutex);
+ else
+ kfree(data);
return err;
}
@@ -750,30 +772,25 @@ out_free:
* ubifs_jnl_write_inode - flush inode to the journal.
* @c: UBIFS file-system description object
* @inode: inode to flush
- * @deletion: inode has been deleted
*
* This function writes inode @inode to the journal. If the inode is
* synchronous, it also synchronizes the write-buffer. Returns zero in case of
* success and a negative error code in case of failure.
*/
-int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
- int deletion)
+int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
{
- int err, len, lnum, offs, sync = 0;
+ int err, lnum, offs;
struct ubifs_ino_node *ino;
struct ubifs_inode *ui = ubifs_inode(inode);
+ int sync = 0, len = UBIFS_INO_NODE_SZ, last_reference = !inode->i_nlink;
- dbg_jnl("ino %lu%s", inode->i_ino,
- deletion ? " (last reference)" : "");
- if (deletion)
- ubifs_assert(inode->i_nlink == 0);
+ dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink);
- len = UBIFS_INO_NODE_SZ;
/*
* If the inode is being deleted, do not write the attached data. No
* need to synchronize the write-buffer either.
*/
- if (!deletion) {
+ if (!last_reference) {
len += ui->data_len;
sync = IS_SYNC(inode);
}
@@ -786,7 +803,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
if (err)
goto out_free;
- pack_inode(c, ino, inode, 1, deletion);
+ pack_inode(c, ino, inode, 1);
err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync);
if (err)
goto out_release;
@@ -795,7 +812,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
inode->i_ino);
release_head(c, BASEHD);
- if (deletion) {
+ if (last_reference) {
err = ubifs_tnc_remove_ino(c, inode->i_ino);
if (err)
goto out_ro;
@@ -828,6 +845,65 @@ out_free:
}
/**
+ * ubifs_jnl_delete_inode - delete an inode.
+ * @c: UBIFS file-system description object
+ * @inode: inode to delete
+ *
+ * This function deletes inode @inode which includes removing it from orphans,
+ * deleting it from TNC and, in some cases, writing a deletion inode to the
+ * journal.
+ *
+ * When regular file inodes are unlinked or a directory inode is removed, the
+ * 'ubifs_jnl_update()' function writes a corresponding deletion inode and
+ * direntry to the media, and adds the inode to orphans. After this, when the
+ * last reference to this inode has been dropped, this function is called. In
+ * general, it has to write one more deletion inode to the media, because if
+ * a commit happened between 'ubifs_jnl_update()' and
+ * 'ubifs_jnl_delete_inode()', the deletion inode is not in the journal
+ * anymore, and in fact it might not be on the flash anymore, because it might
+ * have been garbage-collected already. And for optimization reasons UBIFS does
+ * not read the orphan area if it has been unmounted cleanly, so it would have
+ * no indication in the journal that there is a deleted inode which has to be
+ * removed from TNC.
+ *
+ * However, if there was no commit between 'ubifs_jnl_update()' and
+ * 'ubifs_jnl_delete_inode()', then there is no need to write the deletion
+ * inode to the media for the second time. And this is quite a typical case.
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode)
+{
+ int err;
+ struct ubifs_inode *ui = ubifs_inode(inode);
+
+ ubifs_assert(inode->i_nlink == 0);
+
+ if (ui->del_cmtno != c->cmt_no)
+ /* A commit happened for sure */
+ return ubifs_jnl_write_inode(c, inode);
+
+ down_read(&c->commit_sem);
+ /*
+ * Check commit number again, because the first test has been done
+ * without @c->commit_sem, so a commit might have happened.
+ */
+ if (ui->del_cmtno != c->cmt_no) {
+ up_read(&c->commit_sem);
+ return ubifs_jnl_write_inode(c, inode);
+ }
+
+ err = ubifs_tnc_remove_ino(c, inode->i_ino);
+ if (err)
+ ubifs_ro_mode(c, err);
+ else
+ ubifs_delete_orphan(c, inode->i_ino);
+ up_read(&c->commit_sem);
+ return err;
+}
+
+/**
* ubifs_jnl_rename - rename a directory entry.
* @c: UBIFS file-system description object
* @old_dir: parent inode of directory entry to rename
@@ -857,10 +933,8 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
int move = (old_dir != new_dir);
struct ubifs_inode *uninitialized_var(new_ui);
- dbg_jnl("dent '%.*s' in dir ino %lu to dent '%.*s' in dir ino %lu",
- old_dentry->d_name.len, old_dentry->d_name.name,
- old_dir->i_ino, new_dentry->d_name.len,
- new_dentry->d_name.name, new_dir->i_ino);
+ dbg_jnl("dent '%pd' in dir ino %lu to dent '%pd' in dir ino %lu",
+ old_dentry, old_dir->i_ino, new_dentry, new_dir->i_ino);
ubifs_assert(ubifs_inode(old_dir)->data_len == 0);
ubifs_assert(ubifs_inode(new_dir)->data_len == 0);
ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex));
@@ -917,16 +991,16 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
p = (void *)dent2 + aligned_dlen2;
if (new_inode) {
- pack_inode(c, p, new_inode, 0, last_reference);
+ pack_inode(c, p, new_inode, 0);
p += ALIGN(ilen, 8);
}
if (!move)
- pack_inode(c, p, old_dir, 1, 0);
+ pack_inode(c, p, old_dir, 1);
else {
- pack_inode(c, p, old_dir, 0, 0);
+ pack_inode(c, p, old_dir, 0);
p += ALIGN(plen, 8);
- pack_inode(c, p, new_dir, 1, 0);
+ pack_inode(c, p, new_dir, 1);
}
if (last_reference) {
@@ -935,6 +1009,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
release_head(c, BASEHD);
goto out_finish;
}
+ new_ui->del_cmtno = c->cmt_no;
}
err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync);
@@ -1074,7 +1149,8 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
ino_t inum = inode->i_ino;
unsigned int blk;
- dbg_jnl("ino %lu, size %lld -> %lld", inum, old_size, new_size);
+ dbg_jnl("ino %lu, size %lld -> %lld",
+ (unsigned long)inum, old_size, new_size);
ubifs_assert(!ui->data_len);
ubifs_assert(S_ISREG(inode->i_mode));
ubifs_assert(mutex_is_locked(&ui->ui_mutex));
@@ -1098,7 +1174,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
dn = (void *)trun + UBIFS_TRUN_NODE_SZ;
blk = new_size >> UBIFS_BLOCK_SHIFT;
data_key_init(c, &key, inum, blk);
- dbg_jnl("last block key %s", DBGKEY(&key));
+ dbg_jnlk(&key, "last block key ");
err = ubifs_tnc_lookup(c, &key, dn);
if (err == -ENOENT)
dlen = 0; /* Not found (so it is a hole) */
@@ -1131,7 +1207,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
if (err)
goto out_free;
- pack_inode(c, ino, inode, 0, 0);
+ pack_inode(c, ino, inode, 0);
ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1);
if (dlen)
ubifs_prep_grp_node(c, dn, dlen, 1);
@@ -1164,7 +1240,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
data_key_init(c, &key, inum, blk);
bit = old_size & (UBIFS_BLOCK_SIZE - 1);
- blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0: 1);
+ blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0 : 1);
data_key_init(c, &to_key, inum, blk);
err = ubifs_tnc_remove_range(c, &key, &to_key);
@@ -1189,7 +1265,6 @@ out_free:
return err;
}
-#ifdef CONFIG_UBIFS_FS_XATTR
/**
* ubifs_jnl_delete_xattr - delete an extended attribute.
@@ -1251,9 +1326,9 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
ubifs_prep_grp_node(c, xent, xlen, 0);
ino = (void *)xent + aligned_xlen;
- pack_inode(c, ino, inode, 0, 1);
+ pack_inode(c, ino, inode, 0);
ino = (void *)ino + UBIFS_INO_NODE_SZ;
- pack_inode(c, ino, host, 1, 0);
+ pack_inode(c, ino, host, 1);
err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync);
if (!sync && !err)
@@ -1310,7 +1385,7 @@ out_ro:
* @host: host inode
*
* This function writes the updated version of an extended attribute inode and
- * the host inode tho the journal (to the base head). The host inode is written
+ * the host inode to the journal (to the base head). The host inode is written
* after the extended attribute inode in order to guarantee that the extended
* attribute will be flushed when the inode is synchronized by 'fsync()' and
* consequently, the write-buffer is synchronized. This function returns zero
@@ -1320,7 +1395,7 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
const struct inode *host)
{
int err, len1, len2, aligned_len, aligned_len1, lnum, offs;
- struct ubifs_inode *host_ui = ubifs_inode(inode);
+ struct ubifs_inode *host_ui = ubifs_inode(host);
struct ubifs_ino_node *ino;
union ubifs_key key;
int sync = IS_DIRSYNC(host);
@@ -1344,8 +1419,8 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
if (err)
goto out_free;
- pack_inode(c, ino, host, 0, 0);
- pack_inode(c, (void *)ino + aligned_len1, inode, 1, 0);
+ pack_inode(c, ino, host, 0);
+ pack_inode(c, (void *)ino + aligned_len1, inode, 1);
err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0);
if (!sync && !err) {
@@ -1384,4 +1459,3 @@ out_free:
return err;
}
-#endif /* CONFIG_UBIFS_FS_XATTR */