aboutsummaryrefslogtreecommitdiff
path: root/fs/ubifs/tnc.c
diff options
context:
space:
mode:
authorArtem Bityutskiy <Artem.Bityutskiy@nokia.com>2008-07-14 19:08:37 +0300
committerArtem Bityutskiy <Artem.Bityutskiy@nokia.com>2008-07-15 17:35:15 +0300
commit1e51764a3c2ac05a23a22b2a95ddee4d9bffb16d (patch)
tree919debdd48aef9eee9ff0e8f465ef2649325b993 /fs/ubifs/tnc.c
parente56a99d5a42dcb91e622ae7a0289d8fb2ddabffb (diff)
UBIFS: add new flash file system
This is a new flash file system. See http://www.linux-mtd.infradead.org/doc/ubifs.html Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com> Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
Diffstat (limited to 'fs/ubifs/tnc.c')
-rw-r--r--fs/ubifs/tnc.c2956
1 files changed, 2956 insertions, 0 deletions
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
new file mode 100644
index 00000000000..e909f4a9644
--- /dev/null
+++ b/fs/ubifs/tnc.c
@@ -0,0 +1,2956 @@
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2006-2008 Nokia Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Authors: Adrian Hunter
+ * Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * This file implements TNC (Tree Node Cache) which caches indexing nodes of
+ * the UBIFS B-tree.
+ *
+ * At the moment the locking rules of the TNC tree are quite simple and
+ * straightforward. We just have a mutex and lock it when we traverse the
+ * tree. If a znode is not in memory, we read it from flash while still having
+ * the mutex locked.
+ */
+
+#include <linux/crc32.h>
+#include "ubifs.h"
+
+/*
+ * Returned codes of 'matches_name()' and 'fallible_matches_name()' functions.
+ * @NAME_LESS: name corresponding to the first argument is less than second
+ * @NAME_MATCHES: names match
+ * @NAME_GREATER: name corresponding to the second argument is greater than
+ * first
+ * @NOT_ON_MEDIA: node referred by zbranch does not exist on the media
+ *
+ * These constants were introduce to improve readability.
+ */
+enum {
+ NAME_LESS = 0,
+ NAME_MATCHES = 1,
+ NAME_GREATER = 2,
+ NOT_ON_MEDIA = 3,
+};
+
+/**
+ * insert_old_idx - record an index node obsoleted since the last commit start.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number of obsoleted index node
+ * @offs: offset of obsoleted index node
+ *
+ * Returns %0 on success, and a negative error code on failure.
+ *
+ * For recovery, there must always be a complete intact version of the index on
+ * flash at all times. That is called the "old index". It is the index as at the
+ * time of the last successful commit. Many of the index nodes in the old index
+ * may be dirty, but they must not be erased until the next successful commit
+ * (at which point that index becomes the old index).
+ *
+ * That means that the garbage collection and the in-the-gaps method of
+ * committing must be able to determine if an index node is in the old index.
+ * Most of the old index nodes can be found by looking up the TNC using the
+ * 'lookup_znode()' function. However, some of the old index nodes may have
+ * been deleted from the current index or may have been changed so much that
+ * they cannot be easily found. In those cases, an entry is added to an RB-tree.
+ * That is what this function does. The RB-tree is ordered by LEB number and
+ * offset because they uniquely identify the old index node.
+ */
+static int insert_old_idx(struct ubifs_info *c, int lnum, int offs)
+{
+ struct ubifs_old_idx *old_idx, *o;
+ struct rb_node **p, *parent = NULL;
+
+ old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS);
+ if (unlikely(!old_idx))
+ return -ENOMEM;
+ old_idx->lnum = lnum;
+ old_idx->offs = offs;
+
+ p = &c->old_idx.rb_node;
+ while (*p) {
+ parent = *p;
+ o = rb_entry(parent, struct ubifs_old_idx, rb);
+ if (lnum < o->lnum)
+ p = &(*p)->rb_left;
+ else if (lnum > o->lnum)
+ p = &(*p)->rb_right;
+ else if (offs < o->offs)
+ p = &(*p)->rb_left;
+ else if (offs > o->offs)
+ p = &(*p)->rb_right;
+ else {
+ ubifs_err("old idx added twice!");
+ kfree(old_idx);
+ return 0;
+ }
+ }
+ rb_link_node(&old_idx->rb, parent, p);
+ rb_insert_color(&old_idx->rb, &c->old_idx);
+ return 0;
+}
+
+/**
+ * insert_old_idx_znode - record a znode obsoleted since last commit start.
+ * @c: UBIFS file-system description object
+ * @znode: znode of obsoleted index node
+ *
+ * Returns %0 on success, and a negative error code on failure.
+ */
+int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode)
+{
+ if (znode->parent) {
+ struct ubifs_zbranch *zbr;
+
+ zbr = &znode->parent->zbranch[znode->iip];
+ if (zbr->len)
+ return insert_old_idx(c, zbr->lnum, zbr->offs);
+ } else
+ if (c->zroot.len)
+ return insert_old_idx(c, c->zroot.lnum,
+ c->zroot.offs);
+ return 0;
+}
+
+/**
+ * ins_clr_old_idx_znode - record a znode obsoleted since last commit start.
+ * @c: UBIFS file-system description object
+ * @znode: znode of obsoleted index node
+ *
+ * Returns %0 on success, and a negative error code on failure.
+ */
+static int ins_clr_old_idx_znode(struct ubifs_info *c,
+ struct ubifs_znode *znode)
+{
+ int err;
+
+ if (znode->parent) {
+ struct ubifs_zbranch *zbr;
+
+ zbr = &znode->parent->zbranch[znode->iip];
+ if (zbr->len) {
+ err = insert_old_idx(c, zbr->lnum, zbr->offs);
+ if (err)
+ return err;
+ zbr->lnum = 0;
+ zbr->offs = 0;
+ zbr->len = 0;
+ }
+ } else
+ if (c->zroot.len) {
+ err = insert_old_idx(c, c->zroot.lnum, c->zroot.offs);
+ if (err)
+ return err;
+ c->zroot.lnum = 0;
+ c->zroot.offs = 0;
+ c->zroot.len = 0;
+ }
+ return 0;
+}
+
+/**
+ * destroy_old_idx - destroy the old_idx RB-tree.
+ * @c: UBIFS file-system description object
+ *
+ * During start commit, the old_idx RB-tree is used to avoid overwriting index
+ * nodes that were in the index last commit but have since been deleted. This
+ * is necessary for recovery i.e. the old index must be kept intact until the
+ * new index is successfully written. The old-idx RB-tree is used for the
+ * in-the-gaps method of writing index nodes and is destroyed every commit.
+ */
+void destroy_old_idx(struct ubifs_info *c)
+{
+ struct rb_node *this = c->old_idx.rb_node;
+ struct ubifs_old_idx *old_idx;
+
+ while (this) {
+ if (this->rb_left) {
+ this = this->rb_left;
+ continue;
+ } else if (this->rb_right) {
+ this = this->rb_right;
+ continue;
+ }
+ old_idx = rb_entry(this, struct ubifs_old_idx, rb);
+ this = rb_parent(this);
+ if (this) {
+ if (this->rb_left == &old_idx->rb)
+ this->rb_left = NULL;
+ else
+ this->rb_right = NULL;
+ }
+ kfree(old_idx);
+ }
+ c->old_idx = RB_ROOT;
+}
+
+/**
+ * copy_znode - copy a dirty znode.
+ * @c: UBIFS file-system description object
+ * @znode: znode to copy
+ *
+ * A dirty znode being committed may not be changed, so it is copied.
+ */
+static struct ubifs_znode *copy_znode(struct ubifs_info *c,
+ struct ubifs_znode *znode)
+{
+ struct ubifs_znode *zn;
+
+ zn = kmalloc(c->max_znode_sz, GFP_NOFS);
+ if (unlikely(!zn))
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(zn, znode, c->max_znode_sz);
+ zn->cnext = NULL;
+ __set_bit(DIRTY_ZNODE, &zn->flags);
+ __clear_bit(COW_ZNODE, &zn->flags);
+
+ ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags));
+ __set_bit(OBSOLETE_ZNODE, &znode->flags);
+
+ if (znode->level != 0) {
+ int i;
+ const int n = zn->child_cnt;
+
+ /* The children now have new parent */
+ for (i = 0; i < n; i++) {
+ struct ubifs_zbranch *zbr = &zn->zbranch[i];
+
+ if (zbr->znode)
+ zbr->znode->parent = zn;
+ }
+ }
+
+ atomic_long_inc(&c->dirty_zn_cnt);
+ return zn;
+}
+
+/**
+ * add_idx_dirt - add dirt due to a dirty znode.
+ * @c: UBIFS file-system description object
+ * @lnum: LEB number of index node
+ * @dirt: size of index node
+ *
+ * This function updates lprops dirty space and the new size of the index.
+ */
+static int add_idx_dirt(struct ubifs_info *c, int lnum, int dirt)
+{
+ c->calc_idx_sz -= ALIGN(dirt, 8);
+ return ubifs_add_dirt(c, lnum, dirt);
+}
+
+/**
+ * dirty_cow_znode - ensure a znode is not being committed.
+ * @c: UBIFS file-system description object
+ * @zbr: branch of znode to check
+ *
+ * Returns dirtied znode on success or negative error code on failure.
+ */
+static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c,
+ struct ubifs_zbranch *zbr)
+{
+ struct ubifs_znode *znode = zbr->znode;
+ struct ubifs_znode *zn;
+ int err;
+
+ if (!test_bit(COW_ZNODE, &znode->flags)) {
+ /* znode is not being committed */
+ if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) {
+ atomic_long_inc(&c->dirty_zn_cnt);
+ atomic_long_dec(&c->clean_zn_cnt);
+ atomic_long_dec(&ubifs_clean_zn_cnt);
+ err = add_idx_dirt(c, zbr->lnum, zbr->len);
+ if (unlikely(err))
+ return ERR_PTR(err);
+ }
+ return znode;
+ }
+
+ zn = copy_znode(c, znode);
+ if (unlikely(IS_ERR(zn)))
+ return zn;
+
+ if (zbr->len) {
+ err = insert_old_idx(c, zbr->lnum, zbr->offs);
+ if (unlikely(err))
+ return ERR_PTR(err);
+ err = add_idx_dirt(c, zbr->lnum, zbr->len);
+ } else
+ err = 0;
+
+ zbr->znode = zn;
+ zbr->lnum = 0;
+ zbr->offs = 0;
+ zbr->len = 0;
+
+ if (unlikely(err))
+ return ERR_PTR(err);
+ return zn;
+}
+
+/**
+ * lnc_add - add a leaf node to the leaf node cache.
+ * @c: UBIFS file-system description object
+ * @zbr: zbranch of leaf node
+ * @node: leaf node
+ *
+ * Leaf nodes are non-index nodes directory entry nodes or data nodes. The
+ * purpose of the leaf node cache is to save re-reading the same leaf node over
+ * and over again. Most things are cached by VFS, however the file system must
+ * cache directory entries for readdir and for resolving hash collisions. The
+ * present implementation of the leaf node cache is extremely simple, and
+ * allows for error returns that are not used but that may be needed if a more
+ * complex implementation is created.
+ *
+ * Note, this function does not add the @node object to LNC directly, but
+ * allocates a copy of the object and adds the copy to LNC. The reason for this
+ * is that @node has been allocated outside of the TNC subsystem and will be
+ * used with @c->tnc_mutex unlock upon return from the TNC subsystem. But LNC
+ * may be changed at any time, e.g. freed by the shrinker.
+ */
+static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr,
+ const void *node)
+{
+ int err;
+ void *lnc_node;
+ const struct ubifs_dent_node *dent = node;
+
+ ubifs_assert(!zbr->leaf);
+ ubifs_assert(zbr->len != 0);
+ ubifs_assert(is_hash_key(c, &zbr->key));
+
+ err = ubifs_validate_entry(c, dent);
+ if (err) {
+ dbg_dump_stack();
+ dbg_dump_node(c, dent);
+ return err;
+ }
+
+ lnc_node = kmalloc(zbr->len, GFP_NOFS);
+ if (!lnc_node)
+ /* We don't have to have the cache, so no error */
+ return 0;
+
+ memcpy(lnc_node, node, zbr->len);
+ zbr->leaf = lnc_node;
+ return 0;
+}
+
+ /**
+ * lnc_add_directly - add a leaf node to the leaf-node-cache.
+ * @c: UBIFS file-system description object
+ * @zbr: zbranch of leaf node
+ * @node: leaf node
+ *
+ * This function is similar to 'lnc_add()', but it does not create a copy of
+ * @node but inserts @node to TNC directly.
+ */
+static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr,
+ void *node)
+{
+ int err;
+
+ ubifs_assert(!zbr->leaf);
+ ubifs_assert(zbr->len != 0);
+
+ err = ubifs_validate_entry(c, node);
+ if (err) {
+ dbg_dump_stack();
+ dbg_dump_node(c, node);
+ return err;
+ }
+
+ zbr->leaf = node;
+ return 0;
+}
+
+/**
+ * lnc_free - remove a leaf node from the leaf node cache.
+ * @zbr: zbranch of leaf node
+ * @node: leaf node
+ */
+static void lnc_free(struct ubifs_zbranch *zbr)
+{
+ if (!zbr->leaf)
+ return;
+ kfree(zbr->leaf);
+ zbr->leaf = NULL;
+}
+
+/**
+ * tnc_read_node_nm - read a "hashed" leaf node.
+ * @c: UBIFS file-system description object
+ * @zbr: key and position of the node
+ * @node: node is returned here
+ *
+ * This function reads a "hashed" node defined by @zbr from the leaf node cache
+ * (in it is there) or from the hash media, in which case the node is also
+ * added to LNC. Returns zero in case of success or a negative negative error
+ * code in case of failure.
+ */
+static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr,
+ void *node)
+{
+ int err;
+
+ ubifs_assert(is_hash_key(c, &zbr->key));
+
+ if (zbr->leaf) {
+ /* Read from the leaf node cache */
+ ubifs_assert(zbr->len != 0);
+ memcpy(node, zbr->leaf, zbr->len);
+ return 0;
+ }
+
+ err = ubifs_tnc_read_node(c, zbr, node);
+ if (err)
+ return err;
+
+ /* Add the node to the leaf node cache */
+ err = lnc_add(c, zbr, node);
+ return err;
+}
+
+/**
+ * try_read_node - read a node if it is a node.
+ * @c: UBIFS file-system description object
+ * @buf: buffer to read to
+ * @type: node type
+ * @len: node length (not aligned)
+ * @lnum: LEB number of node to read
+ * @offs: offset of node to read
+ *
+ * This function tries to read a node of known type and length, checks it and
+ * stores it in @buf. This function returns %1 if a node is present and %0 if
+ * a node is not present. A negative error code is returned for I/O errors.
+ * This function performs that same function as ubifs_read_node except that
+ * it does not require that there is actually a node present and instead
+ * the return code indicates if a node was read.
+ */
+static int try_read_node(const struct ubifs_info *c, void *buf, int type,
+ int len, int lnum, int offs)
+{
+ int err, node_len;
+ struct ubifs_ch *ch = buf;
+ uint32_t crc, node_crc;
+
+ dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
+
+ err = ubi_read(c->ubi, lnum, buf, offs, len);
+ if (err) {
+ ubifs_err("cannot read node type %d from LEB %d:%d, error %d",
+ type, lnum, offs, err);
+ return err;
+ }
+
+ if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC)
+ return 0;
+
+ if (ch->node_type != type)
+ return 0;
+
+ node_len = le32_to_cpu(ch->len);
+ if (node_len != len)
+ return 0;
+
+ crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
+ node_crc = le32_to_cpu(ch->crc);
+ if (crc != node_crc)
+ return 0;
+
+ return 1;
+}
+
+/**
+ * fallible_read_node - try to read a leaf node.
+ * @c: UBIFS file-system description object
+ * @key: key of node to read
+ * @zbr: position of node
+ * @node: node returned
+ *
+ * This function tries to read a node and returns %1 if the node is read, %0
+ * if the node is not present, and a negative error code in the case of error.
+ */
+static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
+ struct ubifs_zbranch *zbr, void *node)
+{
+ int ret;
+
+ dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key));
+
+ ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum,
+ zbr->offs);
+ if (ret == 1) {
+ union ubifs_key node_key;
+ struct ubifs_dent_node *dent = node;
+
+ /* All nodes have key in the same place */
+ key_read(c, &dent->key, &node_key);
+ if (keys_cmp(c, key, &node_key) != 0)
+ ret = 0;
+ }
+ if (ret == 0)
+ dbg_mnt("dangling branch LEB %d:%d len %d, key %s",
+ zbr->lnum, zbr->offs, zbr->len, DBGKEY(key));
+ return ret;
+}
+
+/**
+ * matches_name - determine if a direntry or xattr entry matches a given name.
+ * @c: UBIFS file-system description object
+ * @zbr: zbranch of dent
+ * @nm: name to match
+ *
+ * This function checks if xentry/direntry referred by zbranch @zbr matches name
+ * @nm. Returns %NAME_MATCHES if it does, %NAME_LESS if the name referred by
+ * @zbr is less than @nm, and %NAME_GREATER if it is greater than @nm. In case
+ * of failure, a negative error code is returned.
+ */
+static int matches_name(struct ubifs_info *c, struct ubifs_zbranch *zbr,
+ const struct qstr *nm)
+{
+ struct ubifs_dent_node *dent;
+ int nlen, err;
+
+ /* If possible, match against the dent in the leaf node cache */
+ if (!zbr->leaf) {
+ dent = kmalloc(zbr->len, GFP_NOFS);
+ if (!dent)
+ return -ENOMEM;
+
+ err = ubifs_tnc_read_node(c, zbr, dent);
+ if (err)
+ goto out_free;
+
+ /* Add the node to the leaf node cache */
+ err = lnc_add_directly(c, zbr, dent);
+ if (err)
+ goto out_free;
+ } else
+ dent = zbr->leaf;
+
+ nlen = le16_to_cpu(dent->nlen);
+ err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len));
+ if (err == 0) {
+ if (nlen == nm->len)
+ return NAME_MATCHES;
+ else if (nlen < nm->len)
+ return NAME_LESS;
+ else
+ return NAME_GREATER;
+ } else if (err < 0)
+ return NAME_LESS;
+ else
+ return NAME_GREATER;
+
+out_free:
+ kfree(dent);
+ return err;
+}
+
+/**
+ * get_znode - get a TNC znode that may not be loaded yet.
+ * @c: UBIFS file-system description object
+ * @znode: parent znode
+ * @n: znode branch slot number
+ *
+ * This function returns the znode or a negative error code.
+ */
+static struct ubifs_znode *get_znode(struct ubifs_info *c,
+ struct ubifs_znode *znode, int n)
+{
+ struct ubifs_zbranch *zbr;
+
+ zbr = &znode->zbranch[n];
+ if (zbr->znode)
+ znode = zbr->znode;
+ else
+ znode = ubifs_load_znode(c, zbr, znode, n);
+ return znode;
+}
+
+/**
+ * tnc_next - find next TNC entry.
+ * @c: UBIFS file-system description object
+ * @zn: znode is passed and returned here
+ * @n: znode branch slot number is passed and returned here
+ *
+ * This function returns %0 if the next TNC entry is found, %-ENOENT if there is
+ * no next entry, or a negative error code otherwise.
+ */
+static int tnc_next(struct ubifs_info *c, struct ubifs_znode **zn, int *n)
+{
+ struct ubifs_znode *znode = *zn;
+ int nn = *n;
+
+ nn += 1;
+ if (nn < znode->child_cnt) {
+ *n = nn;
+ return 0;
+ }
+ while (1) {
+ struct ubifs_znode *zp;
+
+ zp = znode->parent;
+ if (!zp)
+ return -ENOENT;
+ nn = znode->iip + 1;
+ znode = zp;
+ if (nn < znode->child_cnt) {
+ znode = get_znode(c, znode, nn);
+ if (IS_ERR(znode))
+ return PTR_ERR(znode);
+ while (znode->level != 0) {
+ znode = get_znode(c, znode, 0);
+ if (IS_ERR(znode))
+ return PTR_ERR(znode);
+ }
+ nn = 0;
+ break;
+ }
+ }
+ *zn = znode;
+ *n = nn;
+ return 0;
+}
+
+/**
+ * tnc_prev - find previous TNC entry.
+ * @c: UBIFS file-system description object
+ * @zn: znode is returned here
+ * @n: znode branch slot number is passed and returned here
+ *
+ * This function returns %0 if the previous TNC entry is found, %-ENOENT if
+ * there is no next entry, or a negative error code otherwise.
+ */
+static int tnc_prev(struct ubifs_info *c, struct ubifs_znode **zn, int *n)
+{
+ struct ubifs_znode *znode = *zn;
+ int nn = *n;
+
+ if (nn > 0) {
+ *n = nn - 1;
+ return 0;
+ }
+ while (1) {
+ struct ubifs_znode *zp;
+
+ zp = znode->parent;
+ if (!zp)
+ return -ENOENT;
+ nn = znode->iip - 1;
+ znode = zp;
+ if (nn >= 0) {
+ znode = get_znode(c, znode, nn);
+ if (IS_ERR(znode))
+ return PTR_ERR(znode);
+ while (znode->level != 0) {
+ nn = znode->child_cnt - 1;
+ znode = get_znode(c, znode, nn);
+ if (IS_ERR(znode))
+ return PTR_ERR(znode);
+ }
+ nn = znode->child_cnt - 1;
+ break;
+ }
+ }
+ *zn = znode;
+ *n = nn;
+ return 0;
+}
+
+/**
+ * resolve_collision - resolve a collision.
+ * @c: UBIFS file-system description object
+ * @key: key of a directory or extended attribute entry
+ * @zn: znode is returned here
+ * @n: zbranch number is passed and returned here
+ * @nm: name of the entry
+ *
+ * This function is called for "hashed" keys to make sure that the found key
+ * really corresponds to the looked up node (directory or extended attribute
+ * entry). It returns %1 and sets @zn and @n if the collision is resolved.
+ * %0 is returned if @nm is not found and @zn and @n are set to the previous
+ * entry, i.e. to the entry after which @nm could follow if it were in TNC.
+ * This means that @n may be set to %-1 if the leftmost key in @zn is the
+ * previous one. A negative error code is returned on failures.
+ */
+static int resolve_collision(struct ubifs_info *c, const union ubifs_key *key,
+ struct ubifs_znode **zn, int *n,
+ const struct qstr *nm)
+{
+ int err;
+
+ err = matches_name(c, &(*zn)->zbranch[*n], nm);
+ if (unlikely(err < 0))
+ return err;
+ if (err == NAME_MATCHES)
+ return 1;
+
+ if (err == NAME_GREATER) {
+ /* Look left */
+ while (1) {
+ err = tnc_prev(c, zn, n);
+ if (err == -ENOENT) {
+ ubifs_assert(*n == 0);
+ *n = -1;
+ return 0;
+ }
+ if (err < 0)
+ return err;
+ if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) {
+ /*
+ * We have found the branch after which we would
+ * like to insert, but inserting in this znode
+ * may still be wrong. Consider the following 3
+ * znodes, in the case where we are resolving a
+ * collision with Key2.
+ *
+ * znode zp
+ * ----------------------
+ * level 1 | Key0 | Key1 |
+ * -----------------------
+ * | |
+ * znode za | | znode zb
+ * ------------ ------------
+ * level 0 | Key0 | | Key2 |
+ * ------------ ------------
+ *
+ * The lookup finds Key2 in znode zb. Lets say
+ * there is no match and the name is greater so
+ * we look left. When we find Key0, we end up
+ * here. If we return now, we will insert into
+ * znode za at slot n = 1. But that is invalid
+ * according to the parent's keys. Key2 must
+ * be inserted into znode zb.
+ *
+ * Note, this problem is not relevant for the
+ * case when we go right, because
+ * 'tnc_insert()' would correct the parent key.
+ */
+ if (*n == (*zn)->child_cnt - 1) {
+ err = tnc_next(c, zn, n);
+ if (err) {
+ /* Should be impossible */
+ ubifs_assert(0);
+ if (err == -ENOENT)
+ err = -EINVAL;
+ return err;
+ }
+ ubifs_assert(*n == 0);
+ *n = -1;
+ }
+ return 0;
+ }
+ err = matches_name(c, &(*zn)->zbranch[*n], nm);
+ if (err < 0)
+ return err;
+ if (err == NAME_LESS)
+ return 0;
+ if (err == NAME_MATCHES)
+ return 1;
+ ubifs_assert(err == NAME_GREATER);
+ }
+ } else {
+ int nn = *n;
+ struct ubifs_znode *znode = *zn;
+
+ /* Look right */
+ while (1) {
+ err = tnc_next(c, &znode, &nn);
+ if (err == -ENOENT)
+ return 0;
+ if (err < 0)
+ return err;
+ if (keys_cmp(c, &znode->zbranch[nn].key, key))
+ return 0;
+ err = matches_name(c, &znode->zbranch[nn], nm);
+ if (err < 0)
+ return err;
+ if (err == NAME_GREATER)
+ return 0;
+ *zn = znode;
+ *n = nn;
+ if (err == NAME_MATCHES)
+ return 1;
+ ubifs_assert(err == NAME_LESS);
+ }
+ }
+}
+
+/**
+ * fallible_matches_name - determine if a dent matches a given name.
+ * @c: UBIFS file-system description object
+ * @zbr: zbranch of dent
+ * @nm: name to match
+ *
+ * This is a "fallible" version of 'matches_name()' function which does not
+ * panic if the direntry/xentry referred by @zbr does not exist on the media.
+ *
+ * This function checks if xentry/direntry referred by zbranch @zbr matches name
+ * @nm. Returns %NAME_MATCHES it does, %NAME_LESS if the name referred by @zbr
+ * is less than @nm, %NAME_GREATER if it is greater than @nm, and @NOT_ON_MEDIA
+ * if xentry/direntry referred by @zbr does not exist on the media. A negative
+ * error code is returned in case of failure.
+ */
+static int fallible_matches_name(struct ubifs_info *c,
+ struct ubifs_zbranch *zbr,
+ const struct qstr *nm)
+{
+ struct ubifs_dent_node *dent;
+ int nlen, err;
+
+ /* If possible, match against the dent in the leaf node cache */
+ if (!zbr->leaf) {
+ dent = kmalloc(zbr->len, GFP_NOFS);
+ if (!dent)
+ return -ENOMEM;
+
+ err = fallible_read_node(c, &zbr->key, zbr, dent);
+ if (err < 0)
+ goto out_free;
+ if (err == 0) {
+ /* The node was not present */
+ err = NOT_ON_MEDIA;
+ goto out_free;
+ }
+ ubifs_assert(err == 1);
+
+ err = lnc_add_directly(c, zbr, dent);
+ if (err)
+ goto out_free;
+ } else
+ dent = zbr->leaf;
+
+ nlen = le16_to_cpu(dent->nlen);
+ err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len));
+ if (err == 0) {
+ if (nlen == nm->len)
+ return NAME_MATCHES;
+ else if (nlen < nm->len)
+ return NAME_LESS;
+ else
+ return NAME_GREATER;
+ } else if (err < 0)
+ return NAME_LESS;
+ else
+ return NAME_GREATER;
+
+out_free:
+ kfree(dent);
+ return err;
+}
+
+/**
+ * fallible_resolve_collision - resolve a collision even if nodes are missing.
+ * @c: UBIFS file-system description object
+ * @key: key
+ * @zn: znode is returned here
+ * @n: branch number is passed and returned here
+ * @nm: name of directory entry
+ * @adding: indicates caller is adding a key to the TNC
+ *
+ * This is a "fallible" version of the 'resolve_collision()' function which
+ * does not panic if one of the nodes referred to by TNC does not exist on the
+ * media. This may happen when replaying the journal if a deleted node was
+ * Garbage-collected and the commit was not done. A branch that refers to a node
+ * that is not present is called a dangling branch. The following are the return
+ * codes for this function:
+ * o if @nm was found, %1 is returned and @zn and @n are set to the found
+ * branch;
+ * o if we are @adding and @nm was not found, %0 is returned;
+ * o if we are not @adding and @nm was not found, but a dangling branch was
+ * found, then %1 is returned and @zn and @n are set to the dangling branch;
+ * o a negative error code is returned in case of failure.
+ */
+static int fallible_resolve_collision(struct ubifs_info *c,
+ const union ubifs_key *key,
+ struct ubifs_znode **zn, int *n,
+ const struct qstr *nm, int adding)
+{
+ struct ubifs_znode *o_znode = NULL, *znode = *zn;
+ int uninitialized_var(o_n), err, cmp, unsure = 0, nn = *n;
+
+ cmp = fallible_matches_name(c, &znode->zbranch[nn], nm);
+ if (unlikely(cmp < 0))
+ return cmp;
+ if (cmp == NAME_MATCHES)
+ return 1;
+ if (cmp == NOT_ON_MEDIA) {
+ o_znode = znode;
+ o_n = nn;
+ /*
+ * We are unlucky and hit a dangling branch straight away.
+ * Now we do not really know where to go to find the needed
+ * branch - to the left or to the right. Well, let's try left.
+ */
+ unsure = 1;
+ } else if (!adding)
+ unsure = 1; /* Remove a dangling branch wherever it is */
+
+ if (cmp == NAME_GREATER || unsure) {
+ /* Look left */
+ while (1) {
+ err = tnc_prev(c, zn, n);
+ if (err == -ENOENT) {
+ ubifs_assert(*n == 0);
+ *n = -1;
+ break;
+ }
+ if (err < 0)
+ return err;
+ if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) {
+ /* See comments in 'resolve_collision()' */
+ if (*n == (*zn)->child_cnt - 1) {
+ err = tnc_next(c, zn, n);
+ if (err) {
+ /* Should be impossible */
+ ubifs_assert(0);
+ if (err == -ENOENT)
+ err = -EINVAL;
+ return err;
+ }
+ ubifs_assert(*n == 0);
+ *n = -1;
+ }
+ break;
+ }
+ err = fallible_matches_name(c, &(*zn)->zbranch[*n], nm);
+ if (err < 0)
+ return err;
+ if (err == NAME_MATCHES)
+ return 1;
+ if (err == NOT_ON_MEDIA) {
+ o_znode = *zn;
+ o_n = *n;
+ continue;
+ }
+ if (!adding)
+ continue;
+ if (err == NAME_LESS)
+ break;
+ else
+ unsure = 0;
+ }
+ }
+
+ if (cmp == NAME_LESS || unsure) {
+ /* Look right */
+ *zn = znode;
+ *n = nn;
+ while (1) {
+ err = tnc_next(c, &znode, &nn);
+ if (err == -ENOENT)
+ break;
+ if (err < 0)
+ return err;
+ if (keys_cmp(c, &znode->zbranch[nn].key, key))
+ break;
+ err = fallible_matches_name(c, &znode->zbranch[nn], nm);
+ if (err < 0)
+ return err;
+ if (err == NAME_GREATER)
+ break;
+ *zn = znode;
+ *n = nn;
+ if (err == NAME_MATCHES)
+ return 1;
+ if (err == NOT_ON_MEDIA) {
+ o_znode = znode;
+ o_n = nn;
+ }
+ }
+ }
+
+ /* Never match a dangling branch when adding */
+ if (adding || !o_znode)
+ return 0;
+
+ dbg_mnt("dangling match LEB %d:%d len %d %s",
+ o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs,
+ o_znode->zbranch[o_n].len, DBGKEY(key));
+ *zn = o_znode;
+ *n = o_n;
+ return 1;
+}
+
+/**
+ * matches_position - determine if a zbranch matches a given position.
+ * @zbr: zbranch of dent
+ * @lnum: LEB number of dent to match
+ * @offs: offset of dent to match
+ *
+ * This function returns %1 if @lnum:@offs matches, and %0 otherwise.
+ */
+static int matches_position(struct ubifs_zbranch *zbr, int lnum, int offs)
+{
+ if (zbr->lnum == lnum && zbr->offs == offs)
+ return 1;
+ else
+ return 0;
+}
+
+/**
+ * resolve_collision_directly - resolve a collision directly.
+ * @c: UBIFS file-system description object
+ * @key: key of directory entry
+ * @zn: znode is passed and returned here
+ * @n: zbranch number is passed and returned here
+ * @lnum: LEB number of dent node to match
+ * @offs: offset of dent node to match
+ *
+ * This function is used for "hashed" keys to make sure the found directory or
+ * extended attribute entry node is what was looked for. It is used when the
+ * flash address of the right node is known (@lnum:@offs) which makes it much
+ * easier to resolve collisions (no need to read entries and match full
+ * names). This function returns %1 and sets @zn and @n if the collision is
+ * resolved, %0 if @lnum:@offs is not found and @zn and @n are set to the
+ * previous directory entry. Otherwise a negative error code is returned.
+ */
+static int resolve_collision_directly(struct ubifs_info *c,
+ const union ubifs_key *key,
+ struct ubifs_znode **zn, int *n,
+ int lnum, int offs)
+{
+ struct ubifs_znode *znode;
+ int nn, err;
+
+ znode = *zn;
+ nn = *n;
+ if (matches_position(&znode->zbranch[nn], lnum, offs))
+ return 1;
+
+ /* Look left */
+ while (1) {
+ err = tnc_prev(c, &znode, &nn);
+ if (err == -ENOENT)
+ break;
+ if (err < 0)
+ return err;
+ if (keys_cmp(c, &znode->zbranch[nn].key, key))
+ break;
+ if (matches_position(&znode->zbranch[nn], lnum, offs)) {
+ *zn = znode;
+ *n = nn;
+ return 1;
+ }
+ }
+
+ /* Look right */
+ znode = *zn;
+ nn = *n;
+ while (1) {
+ err = tnc_next(c, &znode, &nn);
+ if (err == -ENOENT)
+ return 0;
+ if (err < 0)
+ return err;
+ if (keys_cmp(c, &znode->zbranch[nn].key, key))
+ return 0;
+ *zn = znode;
+ *n = nn;
+ if (matches_position(&znode->zbranch[nn], lnum, offs))
+ return 1;
+ }
+}
+
+/**
+ * dirty_cow_bottom_up - dirty a znode and its ancestors.
+ * @c: UBIFS file-system description object
+ * @znode: znode to dirty
+ *
+ * If we do not have a unique key that resides in a znode, then we cannot
+ * dirty that znode from the top down (i.e. by using lookup_level0_dirty)
+ * This function records the path back to the last dirty ancestor, and then
+ * dirties the znodes on that path.
+ */
+static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c,
+ struct ubifs_znode *znode)
+{
+ struct ubifs_znode *zp;
+ int *path = c->bottom_up_buf, p = 0;
+
+ ubifs_assert(c->zroot.znode);
+ ubifs_assert(znode);
+ if (c->zroot.znode->level > BOTTOM_UP_HEIGHT) {
+ kfree(c->bottom_up_buf);
+ c->bottom_up_buf = kmalloc(c->zroot.znode->level * sizeof(int),
+ GFP_NOFS);
+ if (!c->bottom_up_buf)
+ return ERR_PTR(-ENOMEM);
+ path = c->bottom_up_buf;
+ }
+ if (c->zroot.znode->level) {
+ /* Go up until parent is dirty */
+ while (1) {
+ int n;
+
+ zp = znode->parent;
+ if (!zp)
+ break;
+ n = znode->iip;
+ ubifs_assert(p < c->zroot.znode->level);
+ path[p++] = n;
+ if (!zp->cnext && ubifs_zn_dirty(znode))
+ break;
+ znode = zp;
+ }
+ }
+
+ /* Come back down, dirtying as we go */
+ while (1) {
+ struct ubifs_zbranch *zbr;
+
+ zp = znode->parent;
+ if (zp) {
+ ubifs_assert(path[p - 1] >= 0);
+ ubifs_assert(path[p - 1] < zp->child_cnt);
+ zbr = &zp->zbranch[path[--p]];
+ znode = dirty_cow_znode(c, zbr);
+ } else {
+ ubifs_assert(znode == c->zroot.znode);
+ znode = dirty_cow_znode(c, &c->zroot);
+ }
+ if (unlikely(IS_ERR(znode)) || !p)
+ break;
+ ubifs_assert(path[p - 1] >= 0);
+ ubifs_assert(path[p - 1] < znode->child_cnt);
+ znode = znode->zbranch[path[p - 1]].znode;
+ }
+
+ return znode;
+}
+
+/**
+ * ubifs_lookup_level0 - search for zero-level znode.
+ * @c: UBIFS file-system description object
+ * @key: key to lookup
+ * @zn: znode is returned here
+ * @n: znode branch slot number is returned here
+ *
+ * This function looks up the TNC tree and search for zero-level znode which
+ * refers key @key. The found zero-level znode is returned in @zn. There are 3
+ * cases:
+ * o exact match, i.e. the found zero-level znode contains key @key, then %1