diff options
Diffstat (limited to 'fs/ubifs/ubifs.h')
| -rw-r--r-- | fs/ubifs/ubifs.h | 526 |
1 files changed, 338 insertions, 188 deletions
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index e4f89f27182..c1f71fe17cc 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -20,8 +20,6 @@ * Adrian Hunter */ -/* Implementation version 0.7 */ - #ifndef __UBIFS_H__ #define __UBIFS_H__ @@ -30,6 +28,7 @@ #include <linux/fs.h> #include <linux/err.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/spinlock.h> #include <linux/mutex.h> @@ -43,16 +42,24 @@ #define UBIFS_VERSION 1 /* Normal UBIFS messages */ -#define ubifs_msg(fmt, ...) \ - printk(KERN_NOTICE "UBIFS: " fmt "\n", ##__VA_ARGS__) +#define ubifs_msg(fmt, ...) pr_notice("UBIFS: " fmt "\n", ##__VA_ARGS__) /* UBIFS error messages */ -#define ubifs_err(fmt, ...) \ - printk(KERN_ERR "UBIFS error (pid %d): %s: " fmt "\n", current->pid, \ +#define ubifs_err(fmt, ...) \ + pr_err("UBIFS error (pid %d): %s: " fmt "\n", current->pid, \ __func__, ##__VA_ARGS__) /* UBIFS warning messages */ -#define ubifs_warn(fmt, ...) \ - printk(KERN_WARNING "UBIFS warning (pid %d): %s: " fmt "\n", \ - current->pid, __func__, ##__VA_ARGS__) +#define ubifs_warn(fmt, ...) \ + pr_warn("UBIFS warning (pid %d): %s: " fmt "\n", \ + current->pid, __func__, ##__VA_ARGS__) +/* + * A variant of 'ubifs_err()' which takes the UBIFS file-sytem description + * object as an argument. + */ +#define ubifs_errc(c, fmt, ...) \ + do { \ + if (!(c)->probing) \ + ubifs_err(fmt, ##__VA_ARGS__); \ + } while (0) /* UBIFS file system VFS magic number */ #define UBIFS_SUPER_MAGIC 0x24051905 @@ -65,6 +72,14 @@ #define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL #define SQNUM_WATERMARK 0xFFFFFFFFFF000000ULL +/* + * Minimum amount of LEBs reserved for the index. At present the index needs at + * least 2 LEBs: one for the index head and one for in-the-gaps method (which + * currently does not cater for the index head and so excludes it from + * consideration). + */ +#define MIN_INDEX_LEBS 2 + /* Minimum amount of data UBIFS writes to the flash */ #define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8) @@ -77,9 +92,6 @@ #define INUM_WARN_WATERMARK 0xFFF00000 #define INUM_WATERMARK 0xFFFFFF00 -/* Largest key size supported in this implementation */ -#define CUR_MAX_KEY_LEN UBIFS_SK_LEN - /* Maximum number of entries in each LPT (LEB category) heap */ #define LPT_HEAP_SZ 256 @@ -89,8 +101,9 @@ */ #define BGT_NAME_PATTERN "ubifs_bgt%d_%d" -/* Default write-buffer synchronization timeout (5 secs) */ -#define DEFAULT_WBUF_TIMEOUT (5 * HZ) +/* Write-buffer synchronization timeout interval in seconds */ +#define WBUF_TIMEOUT_SOFTLIMIT 3 +#define WBUF_TIMEOUT_HARDLIMIT 5 /* Maximum possible inode number (only 32-bit inodes are supported now) */ #define MAX_INUM 0xFFFFFFFF @@ -98,12 +111,10 @@ /* Number of non-data journal heads */ #define NONDATA_JHEADS_CNT 2 -/* Garbage collector head */ -#define GCHD 0 -/* Base journal head number */ -#define BASEHD 1 -/* First "general purpose" journal head */ -#define DATAHD 2 +/* Shorter names for journal head numbers for internal usage */ +#define GCHD UBIFS_GC_HEAD +#define BASEHD UBIFS_BASE_HEAD +#define DATAHD UBIFS_DATA_HEAD /* 'No change' value for 'ubifs_change_lp()' */ #define LPROPS_NC 0x80000001 @@ -113,8 +124,12 @@ * in TNC. However, when replaying, it is handy to introduce fake "truncation" * keys for truncation nodes because the code becomes simpler. So we define * %UBIFS_TRUN_KEY type. + * + * But otherwise, out of the journal reply scope, the truncation keys are + * invalid. */ -#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT +#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT +#define UBIFS_INVALID_KEY UBIFS_KEY_TYPES_CNT /* * How much a directory entry/extended attribute entry adds to the parent/host @@ -141,9 +156,18 @@ */ #define WORST_COMPR_FACTOR 2 +/* + * How much memory is needed for a buffer where we comress a data node. + */ +#define COMPRESSED_DATA_NODE_BUF_SZ \ + (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR) + /* Maximum expected tree height for use by bottom_up_buf */ #define BOTTOM_UP_HEIGHT 64 +/* Maximum number of data nodes to bulk-read */ +#define UBIFS_MAX_BULK_READ 32 + /* * Lockdep classes for UBIFS inode @ui_mutex. */ @@ -211,14 +235,14 @@ enum { * LPT cnode flag bits. * * DIRTY_CNODE: cnode is dirty - * COW_CNODE: cnode is being committed and must be copied before writing * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted), - * so it can (and must) be freed when the commit is finished + * so it can (and must) be freed when the commit is finished + * COW_CNODE: cnode is being committed and must be copied before writing */ enum { DIRTY_CNODE = 0, - COW_CNODE = 1, - OBSOLETE_CNODE = 2, + OBSOLETE_CNODE = 1, + COW_CNODE = 2, }; /* @@ -258,10 +282,10 @@ struct ubifs_old_idx { /* The below union makes it easier to deal with keys */ union ubifs_key { - uint8_t u8[CUR_MAX_KEY_LEN]; - uint32_t u32[CUR_MAX_KEY_LEN/4]; - uint64_t u64[CUR_MAX_KEY_LEN/8]; - __le32 j32[CUR_MAX_KEY_LEN/4]; + uint8_t u8[UBIFS_SK_LEN]; + uint32_t u32[UBIFS_SK_LEN/4]; + uint64_t u64[UBIFS_SK_LEN/8]; + __le32 j32[UBIFS_SK_LEN/4]; }; /** @@ -322,15 +346,18 @@ struct ubifs_gced_idx_leb { * struct ubifs_inode - UBIFS in-memory inode description. * @vfs_inode: VFS inode description object * @creat_sqnum: sequence number at time of creation + * @del_cmtno: commit number corresponding to the time the inode was deleted, + * protected by @c->commit_sem; * @xattr_size: summarized size of all extended attributes in bytes * @xattr_cnt: count of extended attributes this inode has * @xattr_names: sum of lengths of all extended attribute names belonging to * this inode * @dirty: non-zero if the inode is dirty * @xattr: non-zero if this is an extended attribute inode + * @bulk_read: non-zero if bulk-read should be used * @ui_mutex: serializes inode write-back with the rest of VFS operations, - * serializes "clean <-> dirty" state changes, protects @dirty, - * @ui_size, and @xattr_size + * serializes "clean <-> dirty" state changes, serializes bulk-read, + * protects @dirty, @bulk_read, @ui_size, and @xattr_size * @ui_lock: protects @synced_i_size * @synced_i_size: synchronized size of inode, i.e. the value of inode size * currently stored on the flash; used only for regular file @@ -338,6 +365,8 @@ struct ubifs_gced_idx_leb { * @ui_size: inode size used by UBIFS when writing to flash * @flags: inode flags (@UBIFS_COMPR_FL, etc) * @compr_type: default compression type used for this inode + * @last_page_read: page number of last page read (for bulk read) + * @read_in_a_row: number of consecutive pages read in a row (for bulk read) * @data_len: length of the data attached to the inode * @data: inode's data * @@ -365,25 +394,29 @@ struct ubifs_gced_idx_leb { * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot * make sure @inode->i_size is always changed under @ui_mutex, because it - * cannot call 'vmtruncate()' with @ui_mutex locked, because it would deadlock - * with 'ubifs_writepage()' (see file.c). All the other inode fields are - * changed under @ui_mutex, so they do not need "shadow" fields. Note, one + * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would + * deadlock with 'ubifs_writepage()' (see file.c). All the other inode fields + * are changed under @ui_mutex, so they do not need "shadow" fields. Note, one * could consider to rework locking and base it on "shadow" fields. */ struct ubifs_inode { struct inode vfs_inode; unsigned long long creat_sqnum; + unsigned long long del_cmtno; unsigned int xattr_size; unsigned int xattr_cnt; unsigned int xattr_names; unsigned int dirty:1; unsigned int xattr:1; + unsigned int bulk_read:1; + unsigned int compr_type:2; struct mutex ui_mutex; spinlock_t ui_lock; loff_t synced_i_size; loff_t ui_size; int flags; - int compr_type; + pgoff_t last_page_read; + pgoff_t read_in_a_row; int data_len; void *data; }; @@ -408,9 +441,9 @@ struct ubifs_unclean_leb { * LEB properties flags. * * LPROPS_UNCAT: not categorized - * LPROPS_DIRTY: dirty > 0, not index - * LPROPS_DIRTY_IDX: dirty + free > UBIFS_CH_SZ and index - * LPROPS_FREE: free > 0, not empty, not index + * LPROPS_DIRTY: dirty > free, dirty >= @c->dead_wm, not index + * LPROPS_DIRTY_IDX: dirty + free > @c->min_idx_node_sze and index + * LPROPS_FREE: free > 0, dirty < @c->dead_wm, not empty, not index * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs * LPROPS_EMPTY: LEB is empty, not taken * LPROPS_FREEABLE: free + dirty == leb_size, not index, not taken @@ -463,8 +496,8 @@ struct ubifs_lprops { struct ubifs_lpt_lprops { int free; int dirty; - unsigned tgc : 1; - unsigned cmt : 1; + unsigned tgc:1; + unsigned cmt:1; }; /** @@ -472,24 +505,26 @@ struct ubifs_lpt_lprops { * @empty_lebs: number of empty LEBs * @taken_empty_lebs: number of taken LEBs * @idx_lebs: number of indexing LEBs - * @total_free: total free space in bytes - * @total_dirty: total dirty space in bytes - * @total_used: total used space in bytes (includes only data LEBs) - * @total_dead: total dead space in bytes (includes only data LEBs) - * @total_dark: total dark space in bytes (includes only data LEBs) + * @total_free: total free space in bytes (includes all LEBs) + * @total_dirty: total dirty space in bytes (includes all LEBs) + * @total_used: total used space in bytes (does not include index LEBs) + * @total_dead: total dead space in bytes (does not include index LEBs) + * @total_dark: total dark space in bytes (does not include index LEBs) + * + * The @taken_empty_lebs field counts the LEBs that are in the transient state + * of having been "taken" for use but not yet written to. @taken_empty_lebs is + * needed to account correctly for @gc_lnum, otherwise @empty_lebs could be + * used by itself (in which case 'unused_lebs' would be a better name). In the + * case of @gc_lnum, it is "taken" at mount time or whenever a LEB is retained + * by GC, but unlike other empty LEBs that are "taken", it may not be written + * straight away (i.e. before the next commit start or unmount), so either + * @gc_lnum must be specially accounted for, or the current approach followed + * i.e. count it under @taken_empty_lebs. * - * N.B. total_dirty and total_used are different to other total_* fields, - * because they account _all_ LEBs, not just data LEBs. + * @empty_lebs includes @taken_empty_lebs. * - * 'taken_empty_lebs' counts the LEBs that are in the transient state of having - * been 'taken' for use but not yet written to. 'taken_empty_lebs' is needed - * to account correctly for gc_lnum, otherwise 'empty_lebs' could be used - * by itself (in which case 'unused_lebs' would be a better name). In the case - * of gc_lnum, it is 'taken' at mount time or whenever a LEB is retained by GC, - * but unlike other empty LEBs that are 'taken', it may not be written straight - * away (i.e. before the next commit start or unmount), so either gc_lnum must - * be specially accounted for, or the current approach followed i.e. count it - * under 'taken_empty_lebs'. + * @total_used, @total_dead and @total_dark fields do not account indexing + * LEBs. */ struct ubifs_lp_stats { int empty_lebs; @@ -622,17 +657,19 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c, * @offs: write-buffer offset in this logical eraseblock * @avail: number of bytes available in the write-buffer * @used: number of used bytes in the write-buffer - * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM, - * %UBI_UNKNOWN) + * @size: write-buffer size (in [@c->min_io_size, @c->max_write_size] range) * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep * up by 'mutex_lock_nested()). * @sync_callback: write-buffer synchronization callback * @io_mutex: serializes write-buffer I/O * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes * fields + * @softlimit: soft write-buffer timeout interval + * @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit + * and @softlimit + @delta) * @timer: write-buffer timer - * @timeout: timer expire interval in jiffies - * @need_sync: it is set if its timer expired and needs sync + * @no_timer: non-zero if this write-buffer does not have a timer + * @need_sync: non-zero if the timer expired and the wbuf needs sync'ing * @next_ino: points to the next position of the following inode number * @inodes: stores the inode numbers of the nodes which are in wbuf * @@ -653,14 +690,16 @@ struct ubifs_wbuf { int offs; int avail; int used; - int dtype; + int size; int jhead; int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad); struct mutex io_mutex; spinlock_t lock; - struct timer_list timer; - int timeout; - int need_sync; + ktime_t softlimit; + unsigned long long delta; + struct hrtimer timer; + unsigned int no_timer:1; + unsigned int need_sync:1; int next_ino; ino_t *inodes; }; @@ -685,20 +724,22 @@ struct ubifs_bud { * struct ubifs_jhead - journal head. * @wbuf: head's write-buffer * @buds_list: list of bud LEBs belonging to this journal head + * @grouped: non-zero if UBIFS groups nodes when writing to this journal head * * Note, the @buds list is protected by the @c->buds_lock. */ struct ubifs_jhead { struct ubifs_wbuf wbuf; struct list_head buds_list; + unsigned int grouped:1; }; /** * struct ubifs_zbranch - key/coordinate/length branch stored in znodes. * @key: key * @znode: znode address in memory - * @lnum: LEB number of the indexing node - * @offs: offset of the indexing node within @lnum + * @lnum: LEB number of the target node (indexing node or data node) + * @offs: target node offset within @lnum * @len: target node length */ struct ubifs_zbranch { @@ -726,6 +767,9 @@ struct ubifs_zbranch { * @offs: offset of the corresponding indexing node * @len: length of the corresponding indexing node * @zbranch: array of znode branches (@c->fanout elements) + * + * Note! The @lnum, @offs, and @len fields are not really needed - we have them + * only for internal consistency check. They could be removed to save some RAM. */ struct ubifs_znode { struct ubifs_znode *parent; @@ -736,13 +780,35 @@ struct ubifs_znode { int child_cnt; int iip; int alt; -#ifdef CONFIG_UBIFS_FS_DEBUG - int lnum, offs, len; -#endif + int lnum; + int offs; + int len; struct ubifs_zbranch zbranch[]; }; /** + * struct bu_info - bulk-read information. + * @key: first data node key + * @zbranch: zbranches of data nodes to bulk read + * @buf: buffer to read into + * @buf_len: buffer length + * @gc_seq: GC sequence number to detect races with GC + * @cnt: number of data nodes for bulk read + * @blk_cnt: number of data blocks including holes + * @oef: end of file reached + */ +struct bu_info { + union ubifs_key key; + struct ubifs_zbranch zbranch[UBIFS_MAX_BULK_READ]; + void *buf; + int buf_len; + int gc_seq; + int cnt; + int blk_cnt; + int eof; +}; + +/** * struct ubifs_node_range - node length range description data structure. * @len: fixed node length * @min_len: minimum possible node length @@ -779,7 +845,7 @@ struct ubifs_compressor { /** * struct ubifs_budget_req - budget requirements of an operation. * - * @fast: non-zero if the budgeting should try to aquire budget quickly and + * @fast: non-zero if the budgeting should try to acquire budget quickly and * should not try to call write-back * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields * have to be re-calculated @@ -805,21 +871,31 @@ struct ubifs_compressor { * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made * dirty by the re-name operation. + * + * Note, UBIFS aligns node lengths to 8-bytes boundary, so the requester has to + * make sure the amount of inode data which contribute to @new_ino_d and + * @dirtied_ino_d fields are aligned. */ struct ubifs_budget_req { unsigned int fast:1; unsigned int recalculate:1; +#ifndef UBIFS_DEBUG unsigned int new_page:1; unsigned int dirtied_page:1; unsigned int new_dent:1; unsigned int mod_dent:1; unsigned int new_ino:1; unsigned int new_ino_d:13; -#ifndef UBIFS_DEBUG unsigned int dirtied_ino:4; unsigned int dirtied_ino_d:15; #else /* Not bit-fields to check for overflows */ + unsigned int new_page; + unsigned int dirtied_page; + unsigned int new_dent; + unsigned int mod_dent; + unsigned int new_ino; + unsigned int new_ino_d; unsigned int dirtied_ino; unsigned int dirtied_ino_d; #endif @@ -837,6 +913,8 @@ struct ubifs_budget_req { * @dnext: next orphan to delete * @inum: inode number * @new: %1 => added since the last commit, otherwise %0 + * @cmt: %1 => commit pending, otherwise %0 + * @del: %1 => delete pending, otherwise %0 */ struct ubifs_orphan { struct rb_node rb; @@ -845,29 +923,80 @@ struct ubifs_orphan { struct ubifs_orphan *cnext; struct ubifs_orphan *dnext; ino_t inum; - int new; + unsigned new:1; + unsigned cmt:1; + unsigned del:1; }; /** * struct ubifs_mount_opts - UBIFS-specific mount options information. * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) + * @bulk_read: enable/disable bulk-reads (%0 default, %1 disabe, %2 enable) + * @chk_data_crc: enable/disable CRC data checking when reading data nodes + * (%0 default, %1 disabe, %2 enable) + * @override_compr: override default compressor (%0 - do not override and use + * superblock compressor, %1 - override and use compressor + * specified in @compr_type) + * @compr_type: compressor type to override the superblock compressor with + * (%UBIFS_COMPR_NONE, etc) */ struct ubifs_mount_opts { unsigned int unmount_mode:2; + unsigned int bulk_read:2; + unsigned int chk_data_crc:2; + unsigned int override_compr:1; + unsigned int compr_type:2; +}; + +/** + * struct ubifs_budg_info - UBIFS budgeting information. + * @idx_growth: amount of bytes budgeted for index growth + * @data_growth: amount of bytes budgeted for cached data + * @dd_growth: amount of bytes budgeted for cached data that will make + * other data dirty + * @uncommitted_idx: amount of bytes were budgeted for growth of the index, but + * which still have to be taken into account because the index + * has not been committed so far + * @old_idx_sz: size of index on flash + * @min_idx_lebs: minimum number of LEBs required for the index + * @nospace: non-zero if the file-system does not have flash space (used as + * optimization) + * @nospace_rp: the same as @nospace, but additionally means that even reserved + * pool is full + * @page_budget: budget for a page (constant, nenver changed after mount) + * @inode_budget: budget for an inode (constant, nenver changed after mount) + * @dent_budget: budget for a directory entry (constant, nenver changed after + * mount) + */ +struct ubifs_budg_info { + long long idx_growth; + long long data_growth; + long long dd_growth; + long long uncommitted_idx; + unsigned long long old_idx_sz; + int min_idx_lebs; + unsigned int nospace:1; + unsigned int nospace_rp:1; + int page_budget; + int inode_budget; + int dent_budget; }; +struct ubifs_debug_info; + /** * struct ubifs_info - UBIFS file-system description data structure * (per-superblock). * @vfs_sb: VFS @struct super_block object - * @bdi: backing device info object to make VFS happy and disable readahead + * @bdi: backing device info object to make VFS happy and disable read-ahead * * @highest_inum: highest used inode number - * @vfs_gen: VFS inode generation counter * @max_sqnum: current global sequence number - * @cmt_no: commit number (last successfully completed commit) - * @cnt_lock: protects @highest_inum, @vfs_gen, and @max_sqnum counters + * @cmt_no: commit number of the last successfully completed commit, protected + * by @commit_sem + * @cnt_lock: protects @highest_inum and @max_sqnum counters * @fmt_version: UBIFS on-flash format version + * @ro_compat_version: R/O compatibility version * @uuid: UUID from super block * * @lhead_lnum: log head logical eraseblock number @@ -894,13 +1023,14 @@ struct ubifs_mount_opts { * @cmt_state: commit state * @cs_lock: commit state lock * @cmt_wq: wait queue to sleep on if the log is full and a commit is running - * @fast_unmount: do not run journal commit before un-mounting + * * @big_lpt: flag that LPT is too big to write whole during commit - * @check_lpt_free: flag that indicates LPT GC may be needed - * @nospace: non-zero if the file-system does not have flash space (used as - * optimization) - * @nospace_rp: the same as @nospace, but additionally means that even reserved - * pool is full + * @space_fixup: flag indicating that free space in LEBs needs to be cleaned up + * @no_chk_data_crc: do not check CRCs when reading data nodes (except during + * recovery) + * @bulk_read: enable bulk-reads + * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) + * @rw_incompat: the media is not R/W compatible * * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and * @calc_idx_sz @@ -918,13 +1048,20 @@ struct ubifs_mount_opts { * @ileb_nxt: next pre-allocated index LEBs * @old_idx: tree of index nodes obsoleted since the last commit start * @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c - * @new_ihead_lnum: used by debugging to check ihead_lnum - * @new_ihead_offs: used by debugging to check ihead_offs * * @mst_node: master node * @mst_offs: offset of valid master node * @mst_mutex: protects the master node area, @mst_node, and @mst_offs * + * @max_bu_buf_len: maximum bulk-read buffer length + * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu + * @bu: pre-allocated bulk-read information + * + * @write_reserve_mutex: protects @write_reserve_buf + * @write_reserve_buf: on the write path we allocate memory, which might + * sometimes be unavailable, in which case we use this + * write reserve buffer + * * @log_lebs: number of logical eraseblocks in the log * @log_bytes: log size in bytes * @log_last: last LEB of the log @@ -937,7 +1074,6 @@ struct ubifs_mount_opts { * @main_lebs: count of LEBs in the main area * @main_first: first LEB of the main area * @main_bytes: main area size in bytes - * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) * * @key_hash_type: type of the key hash * @key_hash: direntry key hash function @@ -947,43 +1083,42 @@ struct ubifs_mount_opts { * * @min_io_size: minimal input/output unit size * @min_io_shift: number of bits in @min_io_size minus one + * @max_write_size: maximum amount of bytes the underlying flash can write at a + * time (MTD write buffer size) + * @max_write_shift: number of bits in @max_write_size minus one * @leb_size: logical eraseblock size in bytes + * @leb_start: starting offset of logical eraseblocks within physical + * eraseblocks * @half_leb_size: half LEB size + * @idx_leb_size: how many bytes of an LEB are effectively available when it is + * used to store indexing nodes (@leb_size - @max_idx_node_sz) * @leb_cnt: count of logical eraseblocks * @max_leb_cnt: maximum count of logical eraseblocks * @old_leb_cnt: count of logical eraseblocks before re-size * @ro_media: the underlying UBI volume is read-only + * @ro_mount: the file-system was mounted as read-only + * @ro_error: UBIFS switched to R/O mode because an error happened * * @dirty_pg_cnt: number of dirty pages (not used) * @dirty_zn_cnt: number of dirty znodes * @clean_zn_cnt: number of clean znodes * - * @budg_idx_growth: amount of bytes budgeted for index growth - * @budg_data_growth: amount of bytes budgeted for cached data - * @budg_dd_growth: amount of bytes budgeted for cached data that will make - * other data dirty - * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index, - * but which still have to be taken into account because - * the index has not been committed so far - * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, - * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, and @lst; - * @min_idx_lebs: minimum number of LEBs required for the index - * @old_idx_sz: size of index on flash + * @space_lock: protects @bi and @lst + * @lst: lprops statistics + * @bi: budgeting information * @calc_idx_sz: temporary variable which is used to calculate new index size * (contains accurate new index size at end of TNC commit start) - * @lst: lprops statistics - * - * @page_budget: budget for a page - * @inode_budget: budget for an inode - * @dent_budget: budget for a directory entry * * @ref_node_alsz: size of the LEB reference node aligned to the min. flash - * I/O unit + * I/O unit * @mst_node_alsz: master node aligned size * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary * @max_inode_sz: maximum possible inode size in bytes * @max_znode_sz: size of znode in bytes + * + * @leb_overhead: how many bytes are wasted in an LEB when it is filled with + * data nodes of maximum size - used in free space reporting * @dead_wm: LEB dead space watermark * @dark_wm: LEB dark space watermark * @block_cnt: count of 4KiB blocks on the FS @@ -1017,6 +1152,8 @@ struct ubifs_mount_opts { * @sbuf: a buffer of LEB size used by GC and replay for scanning * @idx_gc: list of index LEBs that have been garbage collected * @idx_gc_cnt: number of elements on the idx_gc list + * @gc_seq: incremented for every non-index LEB garbage collected + * @gced_lnum: last non-index LEB that was garbage collected * * @infos_list: links all 'ubifs_info' objects * @umount_mutex: serializes shrinker and un-mount @@ -1045,6 +1182,7 @@ struct ubifs_mount_opts { * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab * @dirty_nn_cnt: number of dirty nnodes * @dirty_pn_cnt: number of dirty pnodes + * @check_lpt_free: flag that indicates LPT GC may be needed * @lpt_sz: LPT size * @lpt_nod_buf: buffer for an on-flash nnode or pnode * @lpt_buf: buffer of LEB size used by LPT @@ -1055,9 +1193,11 @@ struct ubifs_mount_opts { * previous commit start * @uncat_list: list of un-categorized LEBs * @empty_list: list of empty LEBs - * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size) - * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size) + * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size) + * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size) * @freeable_cnt: number of freeable LEBs in @freeable_list + * @in_a_category_cnt: count of lprops which are in a certain category, which + * basically meants that they were loaded from the flash * * @ltab_lnum: LEB number of LPT's own lprops table * @ltab_offs: offset of LPT's own lprops table @@ -1074,40 +1214,35 @@ struct ubifs_mount_opts { * @rp_uid: reserved pool user ID * @rp_gid: reserved pool group ID * - * @empty: if the UBI device is empty - * @replay_tree: temporary tree used during journal replay + * @empty: %1 if the UBI device is empty + * @need_recovery: %1 if the file-system needs recovery + * @replaying: %1 during journal replay + * @mounting: %1 while mounting + * @probing: %1 while attempting to mount if MS_SILENT mount flag is set + * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode * @replay_list: temporary list used during journal replay * @replay_buds: list of buds to replay * @cs_sqnum: sequence number of first node in the log (commit start node) * @replay_sqnum: sequence number of node currently being replayed - * @need_recovery: file-system needs recovery - * @replaying: set to %1 during journal replay - * @unclean_leb_list: LEBs to recover when mounting ro to rw - * @rcvrd_mst_node: recovered master node to write when mounting ro to rw + * @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W + * mode + * @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted + * FS to R/W mode * @size_tree: inode size information for recovery - * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY) * @mount_opts: UBIFS-specific mount options * - * @dbg_buf: a buffer of LEB size used for debugging purposes - * @old_zroot: old index root - used by 'dbg_check_old_index()' - * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' - * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' - * @failure_mode: failure mode for recovery testing - * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls - * @fail_timeout: time in jiffies when delay of failure mode expires - * @fail_cnt: current number of calls to failure mode I/O functions - * @fail_cnt_max: number of calls by which to delay failure mode + * @dbg: debugging-related information */ struct ubifs_info { struct super_block *vfs_sb; struct backing_dev_info bdi; ino_t highest_inum; - unsigned int vfs_gen; unsigned long long max_sqnum; unsigned long long cmt_no; spinlock_t cnt_lock; int fmt_version; + int ro_compat_version; unsigned char uuid[16]; int lhead_lnum; @@ -1131,11 +1266,13 @@ struct ubifs_info { int cmt_state; spinlock_t cs_lock; wait_queue_head_t cmt_wq; - unsigned int fast_unmount:1; + unsigned int big_lpt:1; - unsigned int check_lpt_free:1; - unsigned int nospace:1; - unsigned int nospace_rp:1; + unsigned int space_fixup:1; + unsigned int no_chk_data_crc:1; + unsigned int bulk_read:1; + unsigned int default_compr:2; + unsigned int rw_incompat:1; struct mutex tnc_mutex; struct ubifs_zbranch zroot; @@ -1152,15 +1289,18 @@ struct ubifs_info { int ileb_nxt; struct rb_root old_idx; int *bottom_up_buf; -#ifdef CONFIG_UBIFS_FS_DEBUG - int new_ihead_lnum; - int new_ihead_offs; -#endif struct ubifs_mst_node *mst_node; int mst_offs; struct mutex mst_mutex; + int max_bu_buf_len; + struct mutex bu_mutex; + struct bu_info bu; + + struct mutex write_reserve_mutex; + void *write_reserve_buf; + int log_lebs; long long log_bytes; int log_last; @@ -1173,7 +1313,6 @@ struct ubifs_info { int main_lebs; int main_first; long long main_bytes; - int default_compr; uint8_t key_hash_type; uint32_t (*key_hash)(const char *str, int len); @@ -1183,30 +1322,27 @@ struct ubifs_info { int min_io_size; int min_io_shift; + int max_write_size; + int max_write_shift; int leb_size; + int leb_start; int half_leb_size; + int idx_leb_size; int leb_cnt; int max_leb_cnt; int old_leb_cnt; - int ro_media; + unsigned int ro_media:1; + unsigned int ro_mount:1; + unsigned int ro_error:1; atomic_long_t dirty_pg_cnt; atomic_long_t dirty_zn_cnt; atomic_long_t clean_zn_cnt; - long long budg_idx_growth; - long long budg_data_growth; - long long budg_dd_growth; - long long budg_uncommitted_idx; spinlock_t space_lock; - int min_idx_lebs; - unsigned long long old_idx_sz; - unsigned long long calc_idx_sz; struct ubifs_lp_stats lst; - - int page_budget; - int inode_budget; - int dent_budget; + struct ubifs_budg_info bi; + unsigned long long calc_idx_sz; int ref_node_alsz; int mst_node_alsz; @@ -1214,6 +1350,8 @@ struct ubifs_info { int max_idx_node_sz; long long max_inode_sz; int max_znode_sz; + + int leb_overhead; int dead_wm; int dark_wm; int block_cnt; @@ -1247,6 +1385,8 @@ struct ubifs_info { void *sbuf; struct list_head idx_gc; int idx_gc_cnt; + int gc_seq; + int gced_lnum; struct list_head infos_list; struct mutex umount_mutex; @@ -1275,6 +1415,7 @@ struct ubifs_info { int lpt_drty_flgs; int dirty_nn_cnt; int dirty_pn_cnt; + int check_lpt_free; long long lpt_sz; void *lpt_nod_buf; void *lpt_buf; @@ -1287,6 +1428,7 @@ struct ubifs_info { struct list_head freeable_list; struct list_head frdi_idx_list; int freeable_cnt; + int in_a_category_cnt; int ltab_lnum; int ltab_offs; @@ -1300,64 +1442,63 @@ struct ubifs_info { long long rp_size; long long report_rp_size; - uid_t rp_uid; - gid_t rp_gid; + kuid_t rp_uid; + kgid_t rp_gid; /* The below fields are used only during mounting and re-mounting */ - int empty; - struct rb_root replay_tree; + unsigned int empty:1; + unsigned int need_recovery:1; + unsigned int replaying:1; + unsigned int mounting:1; + unsigned int remounting_rw:1; + unsigned int probing:1; struct list_head replay_list; struct list_head replay_buds; unsigned long long cs_sqnum; unsigned long long replay_sqnum; - int need_recovery; - int replaying; struct list_head unclean_leb_list; struct ubifs_mst_node *rcvrd_mst_node; struct rb_root size_tree; - int remounting_rw; struct ubifs_mount_opts mount_opts; -#ifdef CONFIG_UBIFS_FS_DEBUG - void *dbg_buf; - struct ubifs_zbranch old_zroot; - int old_zroot_level; - unsigned long long old_zroot_sqnum; - int failure_mode; - int fail_delay; - unsigned long fail_timeout; - unsigned int fail_cnt; - unsigned int fail_cnt_max; -#endif + struct ubifs_debug_info *dbg; }; extern struct list_head ubifs_infos; extern spinlock_t ubifs_infos_lock; extern atomic_long_t ubifs_clean_zn_cnt; extern struct kmem_cache *ubifs_inode_slab; -extern struct super_operations ubifs_super_operations; -extern struct address_space_operations ubifs_file_address_operations; -extern struct file_operations ubifs_file_operations; -extern struct inode_operations ubifs_file_inode_operations; -extern struct file_operations ubifs_dir_operations; -extern struct inode_operations ubifs_dir_inode_operations; -extern struct inode_operations ubifs_symlink_inode_operations; +extern const struct super_operations ubifs_super_operations; +extern const struct address_space_operations ubifs_file_address_operations; +extern const struct file_operations ubifs_file_operations; +extern const struct inode_operations ubifs_file_inode_operations; +extern const struct file_operations ubifs_dir_operations; +extern const struct inode_operations ubifs_dir_inode_operations; +extern const struct inode_operations ubifs_symlink_inode_operations; extern struct backing_dev_info ubifs_backing_dev_info; extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; /* io.c */ +void ubifs_ro_mode(struct ubifs_info *c, int err); +int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, + int len, int even_ebadmsg); +int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, + int len); +int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len); +int ubifs_leb_unmap(struct ubifs_info *c, int lnum); +int ubifs_leb_map(struct ubifs_info *c, int lnum); +int ubifs_is_mapped(const struct ubifs_info *c, int lnum); int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); -int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, - int dtype); +int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs); int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf); int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, int lnum, int offs); int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, int lnum, int offs); int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, - int offs, int dtype); + int offs); int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, - int offs, int quiet); + int offs, int quiet, int must_chk_crc); void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last); int ubifs_io_init(struct ubifs_info *c); @@ -1369,7 +1510,7 @@ int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode); /* scan.c */ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, - int offs, void *sbuf); + int offs, void *sbuf, int quiet); void ubifs_scan_destroy(struct ubifs_scan_leb *sleb); int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, int offs, int quiet); @@ -1399,8 +1540,8 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, int deletion, int xent); int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, const union ubifs_key *key, const void *buf, int len); -int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, - int last_reference); +int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode); +int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode); int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, const struct dentry *old_dentry, const struct inode *new_dir, @@ -1423,13 +1564,15 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, struct ubifs_budget_req *req); void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, struct ubifs_budget_req *req); -long long ubifs_budg_get_free_space(struct ubifs_info *c); +long long ubifs_get_free_space(struct ubifs_info *c); +long long ubifs_get_free_space_nolock(struct ubifs_info *c); int ubifs_calc_min_idx_lebs(struct ubifs_info *c); void ubifs_convert_page_budget(struct ubifs_info *c); +long long ubifs_reported_space(const struct ubifs_info *c, long long free); long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); /* find.c */ -int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, +int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, int squeeze); int ubifs_find_free_leb_for_idx(struct ubifs_info *c); int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, @@ -1440,8 +1583,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c); /* tnc.c */ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, struct ubifs_znode **zn, int *n); -int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, - void *node); int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, void *node, const struct qstr *nm); int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, @@ -1471,6 +1612,8 @@ void destroy_old_idx(struct ubifs_info *c); int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, int lnum, int offs); int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode); +int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu); +int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu); /* tnc_misc.c */ struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, @@ -1492,7 +1635,10 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); int ubifs_tnc_end_commit(struct ubifs_info *c); /* shrinker.c */ -int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask); +unsigned long ubifs_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc); +unsigned long ubifs_shrink_count(struct shrinker *shrink, + struct shrink_control *sc); /* commit.c */ int ubifs_bg_thread(void *info); @@ -1511,6 +1657,7 @@ int ubifs_write_master(struct ubifs_info *c); int ubifs_read_superblock(struct ubifs_info *c); struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); +int ubifs_fixup_free_space(struct ubifs_info *c); /* replay.c */ int ubifs_validate_entry(struct ubifs_info *c, @@ -1531,6 +1678,7 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum); int ubifs_orphan_start_commit(struct ubifs_info *c); int ubifs_orphan_end_commit(struct ubifs_info *c); int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only); +int ubifs_clear_orphans(struct ubifs_info *c); /* lpt.c */ int ubifs_calc_lpt_geom(struct ubifs_info *c); @@ -1559,6 +1707,9 @@ void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty); void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode); uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits); struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght); +/* Needed only in debugging code in lpt_commit.c */ +int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf, + struct ubifs_nnode *nnode); /* lpt_commit.c */ int ubifs_lpt_start_commit(struct ubifs_info *c); @@ -1567,13 +1718,11 @@ int ubifs_lpt_post_commit(struct ubifs_info *c); void ubifs_lpt_free(struct ubifs_info *c, int wr_only); /* lprops.c */ -void ubifs_get_lprops(struct ubifs_info *c); const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, const struct ubifs_lprops *lp, int free, int dirty, int flags, int idx_gc_cnt); -void ubifs_release_lprops(struct ubifs_info *c); -void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats); +void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst); void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, int cat); void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, @@ -1590,14 +1739,15 @@ const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c); const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c); const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c); const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c); +int ubifs_calc_dark(const struct ubifs_info *c, int spc); /* file.c */ -int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync); +int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync); int ubifs_setattr(struct dentry *dentry, struct iattr *attr); /* dir.c */ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, - int mode); + umode_t mode); int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); @@ -1616,11 +1766,11 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum); int ubifs_recover_master_node(struct ubifs_info *c); int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, - int offs, void *sbuf, int grouped); + int offs, void *sbuf, int jhead); struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf); -int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); -int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf); +int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf); +int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf); int ubifs_rcvry_gc_commit(struct ubifs_info *c); int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, int deletion, loff_t new_size); @@ -1636,7 +1786,7 @@ long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); /* compressor.c */ int __init ubifs_compressors_init(void); -void __exit ubifs_compressors_exit(void); +void ubifs_compressors_exit(void); void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, int *compr_type); int ubifs_decompress(const void *buf, int len, void *out, int *out_len, |
