diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-04-06 15:00:19 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-04-06 15:00:19 -0700 |
commit | e0724bf6e4a1f2e678d2b2aab01cae22e17862f0 (patch) | |
tree | 559a8fa8e7a92f8ae0e0a27d4e71f408fa7cec62 /fs/ubifs | |
parent | 38d9aefb5ce8f26358b0d5cd933cfa9e267105b1 (diff) | |
parent | de0975781a1a8bc92e07eb7681d10ef9bb5e6df9 (diff) |
Merge branch 'linux-next' of git://git.infradead.org/ubifs-2.6
* 'linux-next' of git://git.infradead.org/ubifs-2.6:
UBIFS: fix recovery bug
UBIFS: add R/O compatibility
UBIFS: fix compiler warnings
UBIFS: fully sort GCed nodes
UBIFS: fix commentaries
UBIFS: introduce a helpful variable
UBIFS: use KERN_CONT
UBIFS: fix lprops committing bug
UBIFS: fix bogus assertion
UBIFS: fix bug where page is marked uptodate when out of space
UBIFS: amend key_hash return value
UBIFS: improve find function interface
UBIFS: list usage cleanup
UBIFS: fix dbg_chk_lpt_sz()
Diffstat (limited to 'fs/ubifs')
-rw-r--r-- | fs/ubifs/budget.c | 37 | ||||
-rw-r--r-- | fs/ubifs/debug.c | 6 | ||||
-rw-r--r-- | fs/ubifs/file.c | 16 | ||||
-rw-r--r-- | fs/ubifs/find.c | 12 | ||||
-rw-r--r-- | fs/ubifs/gc.c | 428 | ||||
-rw-r--r-- | fs/ubifs/journal.c | 7 | ||||
-rw-r--r-- | fs/ubifs/key.h | 6 | ||||
-rw-r--r-- | fs/ubifs/log.c | 5 | ||||
-rw-r--r-- | fs/ubifs/lpt_commit.c | 34 | ||||
-rw-r--r-- | fs/ubifs/recovery.c | 70 | ||||
-rw-r--r-- | fs/ubifs/replay.c | 2 | ||||
-rw-r--r-- | fs/ubifs/sb.c | 36 | ||||
-rw-r--r-- | fs/ubifs/shrinker.c | 6 | ||||
-rw-r--r-- | fs/ubifs/super.c | 37 | ||||
-rw-r--r-- | fs/ubifs/tnc.c | 2 | ||||
-rw-r--r-- | fs/ubifs/ubifs-media.h | 30 | ||||
-rw-r--r-- | fs/ubifs/ubifs.h | 13 |
17 files changed, 482 insertions, 265 deletions
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index f393620890e..af1914462f0 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -194,29 +194,26 @@ static int make_free_space(struct ubifs_info *c) } /** - * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index. + * ubifs_calc_min_idx_lebs - calculate amount of LEBs for the index. * @c: UBIFS file-system description object * - * This function calculates and returns the number of eraseblocks which should - * be kept for index usage. + * This function calculates and returns the number of LEBs which should be kept + * for index usage. */ int ubifs_calc_min_idx_lebs(struct ubifs_info *c) { - int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz; + int idx_lebs; long long idx_size; idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; - /* And make sure we have thrice the index size of space reserved */ - idx_size = idx_size + (idx_size << 1); - + idx_size += idx_size << 1; /* * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' * pair, nor similarly the two variables for the new index size, so we * have to do this costly 64-bit division on fast-path. */ - idx_size += eff_leb_size - 1; - idx_lebs = div_u64(idx_size, eff_leb_size); + idx_lebs = div_u64(idx_size + c->idx_leb_size - 1, c->idx_leb_size); /* * The index head is not available for the in-the-gaps method, so add an * extra LEB to compensate. @@ -310,23 +307,23 @@ static int can_use_rp(struct ubifs_info *c) * do_budget_space - reserve flash space for index and data growth. * @c: UBIFS file-system description object * - * This function makes sure UBIFS has enough free eraseblocks for index growth - * and data. + * This function makes sure UBIFS has enough free LEBs for index growth and + * data. * * When budgeting index space, UBIFS reserves thrice as many LEBs as the index * would take if it was consolidated and written to the flash. This guarantees * that the "in-the-gaps" commit method always succeeds and UBIFS will always * be able to commit dirty index. So this function basically adds amount of * budgeted index space to the size of the current index, multiplies this by 3, - * and makes sure this does not exceed the amount of free eraseblocks. + * and makes sure this does not exceed the amount of free LEBs. * * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might * be large, because UBIFS does not do any index consolidation as long as * there is free space. IOW, the index may take a lot of LEBs, but the LEBs * will contain a lot of dirt. - * o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be - * consolidated to take up to @c->min_idx_lebs LEBs. + * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW, + * the index may be consolidated to take up to @c->min_idx_lebs LEBs. * * This function returns zero in case of success, and %-ENOSPC in case of * failure. @@ -695,12 +692,12 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free) * This function calculates amount of free space to report to user-space. * * Because UBIFS may introduce substantial overhead (the index, node headers, - * alignment, wastage at the end of eraseblocks, etc), it cannot report real - * amount of free flash space it has (well, because not all dirty space is - * reclaimable, UBIFS does not actually know the real amount). If UBIFS did so, - * it would bread user expectations about what free space is. Users seem to - * accustomed to assume that if the file-system reports N bytes of free space, - * they would be able to fit a file of N bytes to the FS. This almost works for + * alignment, wastage at the end of LEBs, etc), it cannot report real amount of + * free flash space it has (well, because not all dirty space is reclaimable, + * UBIFS does not actually know the real amount). If UBIFS did so, it would + * bread user expectations about what free space is. Users seem to accustomed + * to assume that if the file-system reports N bytes of free space, they would + * be able to fit a file of N bytes to the FS. This almost works for * traditional file-systems, because they have way less overhead than UBIFS. * So, to keep users happy, UBIFS tries to take the overhead into account. */ diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index e975bd82f38..ce2cd834361 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -479,9 +479,9 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) "bad or corrupted node)"); else { for (i = 0; i < nlen && dent->name[i]; i++) - printk("%c", dent->name[i]); + printk(KERN_CONT "%c", dent->name[i]); } - printk("\n"); + printk(KERN_CONT "\n"); break; } @@ -1214,7 +1214,7 @@ static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr) /* * Make sure the last key in our znode is less or - * equivalent than the the key in zbranch which goes + * equivalent than the key in the zbranch which goes * after our pointing zbranch. */ cmp = keys_cmp(c, max, diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 0ff89fe71e5..6d34dc7e33e 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -430,6 +430,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, struct ubifs_inode *ui = ubifs_inode(inode); pgoff_t index = pos >> PAGE_CACHE_SHIFT; int uninitialized_var(err), appending = !!(pos + len > inode->i_size); + int skipped_read = 0; struct page *page; ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); @@ -444,7 +445,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, if (!PageUptodate(page)) { /* The page is not loaded from the flash */ - if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) + if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) { /* * We change whole page so no need to load it. But we * have to set the @PG_checked flag to make the further @@ -453,7 +454,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, * the media. */ SetPageChecked(page); - else { + skipped_read = 1; + } else { err = do_readpage(page); if (err) { unlock_page(page); @@ -470,6 +472,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, if (unlikely(err)) { ubifs_assert(err == -ENOSPC); /* + * If we skipped reading the page because we were going to + * write all of it, then it is not up to date. + */ + if (skipped_read) { + ClearPageChecked(page); + ClearPageUptodate(page); + } + /* * Budgeting failed which means it would have to force * write-back but didn't, because we set the @fast flag in the * request. Write-back cannot be done now, while we have the @@ -949,7 +959,7 @@ static int do_writepage(struct page *page, int len) * whole index and correct all inode sizes, which is long an unacceptable. * * To prevent situations like this, UBIFS writes pages back only if they are - * within last synchronized inode size, i.e. the the size which has been + * within the last synchronized inode size, i.e. the size which has been * written to the flash media last time. Otherwise, UBIFS forces inode * write-back, thus making sure the on-flash inode contains current inode size, * and then keeps writing pages back. diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 717d79c97c5..1d54383d126 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -478,7 +478,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, * ubifs_find_free_space - find a data LEB with free space. * @c: the UBIFS file-system description object * @min_space: minimum amount of required free space - * @free: contains amount of free space in the LEB on exit + * @offs: contains offset of where free space starts on exit * @squeeze: whether to try to find space in a non-empty LEB first * * This function looks for an LEB with at least @min_space bytes of free space. @@ -490,7 +490,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, * failed to find a LEB with @min_space bytes of free space and other a negative * error codes in case of failure. */ -int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, +int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, int squeeze) { const struct ubifs_lprops *lprops; @@ -558,10 +558,10 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, spin_unlock(&c->space_lock); } - *free = lprops->free; + *offs = c->leb_size - lprops->free; ubifs_release_lprops(c); - if (*free == c->leb_size) { + if (*offs == 0) { /* * Ensure that empty LEBs have been unmapped. They may not have * been, for example, because of an unclean unmount. Also @@ -573,8 +573,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, return err; } - dbg_find("found LEB %d, free %d", lnum, *free); - ubifs_assert(*free >= min_space); + dbg_find("found LEB %d, free %d", lnum, c->leb_size - *offs); + ubifs_assert(*offs <= c->leb_size - min_space); return lnum; out: diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index a711d33b3d3..f0f5f15d384 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -47,7 +47,7 @@ * have to waste large pieces of free space at the end of LEB B, because nodes * from LEB A would not fit. And the worst situation is when all nodes are of * maximum size. So dark watermark is the amount of free + dirty space in LEB - * which are guaranteed to be reclaimable. If LEB has less space, the GC migh + * which are guaranteed to be reclaimable. If LEB has less space, the GC might * be unable to reclaim it. So, LEBs with free + dirty greater than dark * watermark are "good" LEBs from GC's point of few. The other LEBs are not so * good, and GC takes extra care when moving them. @@ -57,14 +57,6 @@ #include "ubifs.h" /* - * GC tries to optimize the way it fit nodes to available space, and it sorts - * nodes a little. The below constants are watermarks which define "large", - * "medium", and "small" nodes. - */ -#define MEDIUM_NODE_WM (UBIFS_BLOCK_SIZE / 4) -#define SMALL_NODE_WM UBIFS_MAX_DENT_NODE_SZ - -/* * GC may need to move more than one LEB to make progress. The below constants * define "soft" and "hard" limits on the number of LEBs the garbage collector * may move. @@ -116,83 +108,222 @@ static int switch_gc_head(struct ubifs_info *c) } /** - * joinup - bring data nodes for an inode together. - * @c: UBIFS file-system description object - * @sleb: describes scanned LEB - * @inum: inode number - * @blk: block number - * @data: list to which to add data nodes + * list_sort - sort a list. + * @priv: private data, passed to @cmp + * @head: the list to sort + * @cmp: the elements comparison function * - * This function looks at the first few nodes in the scanned LEB @sleb and adds - * them to @data if they are data nodes from @inum and have a larger block - * number than @blk. This function returns %0 on success and a negative error - * code on failure. + * This function has been implemented by Mark J Roberts <mjr@znex.org>. It + * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted + * in ascending order. + * + * The comparison function @cmp is supposed to return a negative value if @a is + * than @b, and a positive value if @a is greater than @b. If @a and @b are + * equivalent, then it does not matter what this function returns. */ -static int joinup(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ino_t inum, - unsigned int blk, struct list_head *data) +static void list_sort(void *priv, struct list_head *head, + int (*cmp)(void *priv, struct list_head *a, + struct list_head *b)) { - int err, cnt = 6, lnum = sleb->lnum, offs; - struct ubifs_scan_node *snod, *tmp; - union ubifs_key *key; + struct list_head *p, *q, *e, *list, *tail, *oldhead; + int insize, nmerges, psize, qsize, i; + + if (list_empty(head)) + return; + + list = head->next; + list_del(head); + insize = 1; + for (;;) { + p = oldhead = list; + list = tail = NULL; + nmerges = 0; + + while (p) { + nmerges++; + q = p; + psize = 0; + for (i = 0; i < insize; i++) { + psize++; + q = q->next == oldhead ? NULL : q->next; + if (!q) + break; + } - list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { - key = &snod->key; - if (key_inum(c, key) == inum && - key_type(c, key) == UBIFS_DATA_KEY && - key_block(c, key) > blk) { - offs = snod->offs; - err = ubifs_tnc_has_node(c, key, 0, lnum, offs, 0); - if (err < 0) - return err; - list_del(&snod->list); - if (err) { - list_add_tail(&snod->list, data); - blk = key_block(c, key); - } else - kfree(snod); - cnt = 6; - } else if (--cnt == 0) + qsize = insize; + while (psize > 0 || (qsize > 0 && q)) { + if (!psize) { + e = q; + q = q->next; + qsize--; + if (q == oldhead) + q = NULL; + } else if (!qsize || !q) { + e = p; + p = p->next; + psize--; + if (p == oldhead) + p = NULL; + } else if (cmp(priv, p, q) <= 0) { + e = p; + p = p->next; + psize--; + if (p == oldhead) + p = NULL; + } else { + e = q; + q = q->next; + qsize--; + if (q == oldhead) + q = NULL; + } + if (tail) + tail->next = e; + else + list = e; + e->prev = tail; + tail = e; + } + p = q; + } + + tail->next = list; + list->prev = tail; + + if (nmerges <= 1) break; + + insize *= 2; } - return 0; + + head->next = list; + head->prev = list->prev; + list->prev->next = head; + list->prev = head; } /** - * move_nodes - move nodes. + * data_nodes_cmp - compare 2 data nodes. + * @priv: UBIFS file-system description object + * @a: first data node + * @a: second data node + * + * This function compares data nodes @a and @b. Returns %1 if @a has greater + * inode or block number, and %-1 otherwise. + */ +int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + ino_t inuma, inumb; + struct ubifs_info *c = priv; + struct ubifs_scan_node *sa, *sb; + + cond_resched(); + sa = list_entry(a, struct ubifs_scan_node, list); + sb = list_entry(b, struct ubifs_scan_node, list); + ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY); + ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY); + + inuma = key_inum(c, &sa->key); + inumb = key_inum(c, &sb->key); + + if (inuma == inumb) { + unsigned int blka = key_block(c, &sa->key); + unsigned int blkb = key_block(c, &sb->key); + + if (blka <= blkb) + return -1; + } else if (inuma <= inumb) + return -1; + + return 1; +} + +/* + * nondata_nodes_cmp - compare 2 non-data nodes. + * @priv: UBIFS file-system description object + * @a: first node + * @a: second node + * + * This function compares nodes @a and @b. It makes sure that inode nodes go + * first and sorted by length in descending order. Directory entry nodes go + * after inode nodes and are sorted in ascending hash valuer order. + */ +int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + int typea, typeb; + ino_t inuma, inumb; + struct ubifs_info *c = priv; + struct ubifs_scan_node *sa, *sb; + + cond_resched(); + sa = list_entry(a, struct ubifs_scan_node, list); + sb = list_entry(b, struct ubifs_scan_node, list); + typea = key_type(c, &sa->key); + typeb = key_type(c, &sb->key); + ubifs_assert(typea != UBIFS_DATA_KEY && typeb != UBIFS_DATA_KEY); + + /* Inodes go before directory entries */ + if (typea == UBIFS_INO_KEY) { + if (typeb == UBIFS_INO_KEY) + return sb->len - sa->len; + return -1; + } + if (typeb == UBIFS_INO_KEY) + return 1; + + ubifs_assert(typea == UBIFS_DENT_KEY && typeb == UBIFS_DENT_KEY); + inuma = key_inum(c, &sa->key); + inumb = key_inum(c, &sb->key); + + if (inuma == inumb) { + uint32_t hasha = key_hash(c, &sa->key); + uint32_t hashb = key_hash(c, &sb->key); + + if (hasha <= hashb) + return -1; + } else if (inuma <= inumb) + return -1; + + return 1; +} + +/** + * sort_nodes - sort nodes for GC. * @c: UBIFS file-system description object - * @sleb: describes nodes to move + * @sleb: describes nodes to sort and contains the result on exit + * @nondata: contains non-data nodes on exit + * @min: minimum node size is returned here * - * This function moves valid nodes from data LEB described by @sleb to the GC - * journal head. The obsolete nodes are dropped. + * This function sorts the list of inodes to garbage collect. First of all, it + * kills obsolete nodes and separates data and non-data nodes to the + * @sleb->nodes and @nondata lists correspondingly. + * + * Data nodes are then sorted in block number order - this is important for + * bulk-read; data nodes with lower inode number go before data nodes with + * higher inode number, and data nodes with lower block number go before data + * nodes with higher block number; * - * When moving nodes we have to deal with classical bin-packing problem: the - * space in the current GC journal head LEB and in @c->gc_lnum are the "bins", - * where the nodes in the @sleb->nodes list are the elements which should be - * fit optimally to the bins. This function uses the "first fit decreasing" - * strategy, although it does not really sort the nodes but just split them on - * 3 classes - large, medium, and small, so they are roughly sorted. + * Non-data nodes are sorted as follows. + * o First go inode nodes - they are sorted in descending length order. + * o Then go directory entry nodes - they are sorted in hash order, which + * should supposedly optimize 'readdir()'. Direntry nodes with lower parent + * inode number go before direntry nodes with higher parent inode number, + * and direntry nodes with lower name hash values go before direntry nodes + * with higher name hash values. * - * This function returns zero in case of success, %-EAGAIN if commit is - * required, and other negative error codes in case of other failures. + * This function returns zero in case of success and a negative error code in + * case of failure. */ -static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) +static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb, + struct list_head *nondata, int *min) { struct ubifs_scan_node *snod, *tmp; - struct list_head data, large, medium, small; - struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; - int avail, err, min = INT_MAX; - unsigned int blk = 0; - ino_t inum = 0; - INIT_LIST_HEAD(&data); - INIT_LIST_HEAD(&large); - INIT_LIST_HEAD(&medium); - INIT_LIST_HEAD(&small); + *min = INT_MAX; - while (!list_empty(&sleb->nodes)) { - struct list_head *lst = sleb->nodes.next; - - snod = list_entry(lst, struct ubifs_scan_node, list); + /* Separate data nodes and non-data nodes */ + list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { + int err; ubifs_assert(snod->type != UBIFS_IDX_NODE); ubifs_assert(snod->type != UBIFS_REF_NODE); @@ -201,53 +332,72 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum, snod->offs, 0); if (err < 0) - goto out; + return err; - list_del(lst); if (!err) { /* The node is obsolete, remove it from the list */ + list_del(&snod->list); kfree(snod); continue; } - /* - * Sort the list of nodes so that data nodes go first, large - * nodes go second, and small nodes go last. - */ - if (key_type(c, &snod->key) == UBIFS_DATA_KEY) { - if (inum != key_inum(c, &snod->key)) { - if (inum) { - /* - * Try to move data nodes from the same - * inode together. - */ - err = joinup(c, sleb, inum, blk, &data); - if (err) - goto out; - } - inum = key_inum(c, &snod->key); - blk = key_block(c, &snod->key); - } - list_add_tail(lst, &data); - } else if (snod->len > MEDIUM_NODE_WM) - list_add_tail(lst, &large); - else if (snod->len > SMALL_NODE_WM) - list_add_tail(lst, &medium); - else - list_add_tail(lst, &small); - - /* And find the smallest node */ - if (snod->len < min) - min = snod->len; + if (snod->len < *min) + *min = snod->len; + + if (key_type(c, &snod->key) != UBIFS_DATA_KEY) + list_move_tail(&snod->list, nondata); } - /* - * Join the tree lists so that we'd have one roughly sorted list - * ('large' will be the head of the joined list). - */ - list_splice(&data, &large); - list_splice(&medium, large.prev); - list_splice(&small, large.prev); + /* Sort data and non-data nodes */ + list_sort(c, &sleb->nodes, &data_nodes_cmp); + list_sort(c, nondata, &nondata_nodes_cmp); + return 0; +} + +/** + * move_node - move a node. + * @c: UBIFS file-system description object + * @sleb: describes the LEB to move nodes from + * @snod: the mode to move + * @wbuf: write-buffer to move node to + * + * This function moves node @snod to @wbuf, changes TNC correspondingly, and + * destroys @snod. Returns zero in case of success and a negative error code in + * case of failure. + */ +static int move_node(struct ubifs_info *c, struct ubifs_scan_leb *sleb, + struct ubifs_scan_node *snod, struct ubifs_wbuf *wbuf) +{ + int err, new_lnum = wbuf->lnum, new_offs = wbuf->offs + wbuf->used; + + cond_resched(); + err = ubifs_wbuf_write_nolock(wbuf, snod->node, snod->len); + if (err) + return err; + + err = ubifs_tnc_replace(c, &snod->key, sleb->lnum, + snod->offs, new_lnum, new_offs, + snod->len); + list_del(&snod->list); + kfree(snod); + return err; +} + +/** + * move_nodes - move nodes. + * @c: UBIFS file-system description object + * @sleb: describes the LEB to move nodes from + * + * This function moves valid nodes from data LEB described by @sleb to the GC + * journal head. This function returns zero in case of success, %-EAGAIN if + * commit is required, and other negative error codes in case of other + * failures. + */ +static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) +{ + int err, min; + LIST_HEAD(nondata); + struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; if (wbuf->lnum == -1) { /* @@ -256,42 +406,59 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) */ err = switch_gc_head(c); if (err) - goto out; + return err; } + err = sort_nodes(c, sleb, &nondata, &min); + if (err) + goto out; + /* Write nodes to their new location. Use the first-fit strategy */ while (1) { - avail = c->leb_size - wbuf->offs - wbuf->used; - list_for_each_entry_safe(snod, tmp, &large, list) { - int new_lnum, new_offs; + int avail; + struct ubifs_scan_node *snod, *tmp; + + /* Move data nodes */ + list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { + avail = c->leb_size - wbuf->offs - wbuf->used; + if (snod->len > avail) + /* + * Do not skip data nodes in order to optimize + * bulk-read. + */ + break; + + err = move_node(c, sleb, snod, wbuf); + if (err) + goto out; + } + /* Move non-data nodes */ + list_for_each_entry_safe(snod, tmp, &nondata, list) { + avail = c->leb_size - wbuf->offs - wbuf->used; if (avail < min) break; - if (snod->len > avail) - /* This node does not fit */ + if (snod->len > avail) { + /* + * Keep going only if this is an inode with + * some data. Otherwise stop and switch the GC + * head. IOW, we assume that data-less inode + * nodes and direntry nodes are roughly of the + * same size. + */ + if (key_type(c, &snod->key) == UBIFS_DENT_KEY || + snod->len == UBIFS_INO_NODE_SZ) + break; continue; + } - cond_resched(); - - new_lnum = wbuf->lnum; - new_offs = wbuf->offs + wbuf->used; - err = ubifs_wbuf_write_nolock(wbuf, snod->node, - snod->len); + err = move_node(c, sleb, snod, wbuf); if (err) goto out; - err = ubifs_tnc_replace(c, &snod->key, sleb->lnum, - snod->offs, new_lnum, new_offs, - snod->len); - if (err) - goto out; - - avail = c->leb_size - wbuf->offs - wbuf->used; - list_del(&snod->list); - kfree(snod); } - if (list_empty(&large)) + if (list_empty(&sleb->nodes) && list_empty(&nondata)) break; /* @@ -306,10 +473,7 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) return 0; out: - list_for_each_entry_safe(snod, tmp, &large, list) { - list_del(&snod->list); - kfree(snod); - } + list_splice_tail(&nondata, &sleb->nodes); return err; } diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index a11ca0958a2..64b5f3a309f 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -114,7 +114,7 @@ static inline void zero_trun_node_unused(struct ubifs_trun_node *trun) */ static int reserve_space(struct ubifs_info *c, int jhead, int len) { - int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze; + int err = 0, err1, retries = 0, avail, lnum, offs, squeeze; struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; /* @@ -139,10 +139,9 @@ again: * Write buffer wasn't seek'ed or there is no enough space - look for an * LEB with some empty space. */ - lnum = ubifs_find_free_space(c, len, &free, squeeze); + lnum = ubifs_find_free_space(c, len, &offs, squeeze); if (lnum >= 0) { /* Found an LEB, add it to the journal head */ - offs = c->leb_size - free; err = ubifs_add_bud_to_log(c, jhead, lnum, offs); if (err) goto out_return; @@ -1366,7 +1365,7 @@ out_ro: * @host: host inode * * This function writes the updated version of an extended attribute inode and - * the host inode tho the journal (to the base head). The host inode is written + * the host inode to the journal (to the base head). The host inode is written * after the extended attribute inode in order to guarantee that the extended * attribute will be flushed when the inode is synchronized by 'fsync()' and * consequently, the write-buffer is synchronized. This function returns zero diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index efb3430a258..5fa27ea031b 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h @@ -381,8 +381,8 @@ static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k) * @c: UBIFS file-system description object * @key: the key to get hash from */ -static inline int key_hash(const struct ubifs_info *c, - const union ubifs_key *key) +static inline uint32_t key_hash(const struct ubifs_info *c, + const union ubifs_key *key) { return key->u32[1] & UBIFS_S_KEY_HASH_MASK; } @@ -392,7 +392,7 @@ static inline int key_hash(const struct ubifs_info *c, * @c: UBIFS file-system description object * @k: the key to get hash from */ -static inline int key_hash_flash(const struct ubifs_info *c, const void *k) +static inline uint32_t key_hash_flash(const struct ubifs_info *c, const void *k) { const union ubifs_key *key = k; diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 3e0aa736755..56e33772a1e 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -239,7 +239,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) } /* - * Make sure the the amount of space in buds will not exceed + * Make sure the amount of space in buds will not exceed the * 'c->max_bud_bytes' limit, because we want to guarantee mount time * limits. * @@ -367,7 +367,6 @@ static void remove_buds(struct ubifs_info *c) bud->jhead, c->leb_size - bud->start, c->cmt_bud_bytes); rb_erase(p1, &c->buds); - list_del(&bud->list); /* * If the commit does not finish, the recovery will need * to replay the journal, in which case the old buds @@ -375,7 +374,7 @@ static void remove_buds(struct ubifs_info *c) * commit i.e. do not allow them to be garbage * collected. */ - list_add(&bud->list, &c->old_buds); + list_move(&bud->list, &c->old_buds); } } spin_unlock(&c->buds_lock); diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 3216a1f277f..8cbfb824802 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -229,7 +229,7 @@ static int layout_cnodes(struct ubifs_info *c) while (offs + len > c->leb_size) { alen = ALIGN(offs, c->min_io_size); upd_ltab(c, lnum, c->leb_size - alen, alen - offs); - dbg_chk_lpt_sz(c, 2, alen - offs); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); err = alloc_lpt_leb(c, &lnum); if (err) goto no_space; @@ -272,7 +272,7 @@ static int layout_cnodes(struct ubifs_info *c) if (offs + c->lsave_sz > c->leb_size) { alen = ALIGN(offs, c->min_io_size); upd_ltab(c, lnum, c->leb_size - alen, alen - offs); - dbg_chk_lpt_sz(c, 2, alen - offs); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); err = alloc_lpt_leb(c, &lnum); if (err) goto no_space; @@ -292,7 +292,7 @@ static int layout_cnodes(struct ubifs_info *c) if (offs + c->ltab_sz > c->leb_size) { alen = ALIGN(offs, c->min_io_size); upd_ltab(c, lnum, c->leb_size - alen, alen - offs); - dbg_chk_lpt_sz(c, 2, alen - offs); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); err = alloc_lpt_leb(c, &lnum); if (err) goto no_space; @@ -416,14 +416,12 @@ static int write_cnodes(struct ubifs_info *c) alen, UBI_SHORTTERM); if (err) return err; - dbg_chk_lpt_sz(c, 4, alen - wlen); } - dbg_chk_lpt_sz(c, 2, 0); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); err = realloc_lpt_leb(c, &lnum); if (err) goto no_space; - offs = 0; - from = 0; + offs = from = 0; ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); err = ubifs_leb_unmap(c, lnum); @@ -477,11 +475,11 @@ static int write_cnodes(struct ubifs_info *c) UBI_SHORTTERM); if (err) return err; - dbg_chk_lpt_sz(c, 2, alen - wlen); + dbg_chk_lpt_sz(c, 2, c->leb_size - offs); err = realloc_lpt_leb(c, &lnum); if (err) goto no_spac |