diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-20 13:54:51 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-20 13:54:52 -0800 |
commit | a13eea6bd9ee62ceacfc5243d54c84396bc86cb4 (patch) | |
tree | 46192468880c144f2b367deb5188267866ee1fac | |
parent | fcc16882ac4532aaa644bff444f0c5d6228ba71e (diff) | |
parent | 6666e6aa9f36b2bfd6b30072c07b34f2a24becf1 (diff) |
Merge tag 'for-3.8-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull new F2FS filesystem from Jaegeuk Kim:
"Introduce a new file system, Flash-Friendly File System (F2FS), to
Linux 3.8.
Highlights:
- Add initial f2fs source codes
- Fix an endian conversion bug
- Fix build failures on random configs
- Fix the power-off-recovery routine
- Minor cleanup, coding style, and typos patches"
From the Kconfig help text:
F2FS is based on Log-structured File System (LFS), which supports
versatile "flash-friendly" features. The design has been focused on
addressing the fundamental issues in LFS, which are snowball effect
of wandering tree and high cleaning overhead.
Since flash-based storages show different characteristics according to
the internal geometry or flash memory management schemes aka FTL, F2FS
and tools support various parameters not only for configuring on-disk
layout, but also for selecting allocation and cleaning algorithms.
and there's an article by Neil Brown about it on lwn.net:
http://lwn.net/Articles/518988/
* tag 'for-3.8-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (36 commits)
f2fs: fix tracking parent inode number
f2fs: cleanup the f2fs_bio_alloc routine
f2fs: introduce accessor to retrieve number of dentry slots
f2fs: remove redundant call to f2fs_put_page in delete entry
f2fs: make use of GFP_F2FS_ZERO for setting gfp_mask
f2fs: rewrite f2fs_bio_alloc to make it simpler
f2fs: fix a typo in f2fs documentation
f2fs: remove unused variable
f2fs: move error condition for mkdir at proper place
f2fs: remove unneeded initialization
f2fs: check read only condition before beginning write out
f2fs: remove unneeded memset from init_once
f2fs: show error in case of invalid mount arguments
f2fs: fix the compiler warning for uninitialized use of variable
f2fs: resolve build failures
f2fs: adjust kernel coding style
f2fs: fix endian conversion bugs reported by sparse
f2fs: remove unneeded version.h header file from f2fs.h
f2fs: update the f2fs document
f2fs: update Kconfig and Makefile
...
-rw-r--r-- | Documentation/filesystems/00-INDEX | 2 | ||||
-rw-r--r-- | Documentation/filesystems/f2fs.txt | 421 | ||||
-rw-r--r-- | fs/Kconfig | 1 | ||||
-rw-r--r-- | fs/Makefile | 1 | ||||
-rw-r--r-- | fs/f2fs/Kconfig | 53 | ||||
-rw-r--r-- | fs/f2fs/Makefile | 7 | ||||
-rw-r--r-- | fs/f2fs/acl.c | 414 | ||||
-rw-r--r-- | fs/f2fs/acl.h | 57 | ||||
-rw-r--r-- | fs/f2fs/checkpoint.c | 794 | ||||
-rw-r--r-- | fs/f2fs/data.c | 702 | ||||
-rw-r--r-- | fs/f2fs/debug.c | 361 | ||||
-rw-r--r-- | fs/f2fs/dir.c | 672 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 1083 | ||||
-rw-r--r-- | fs/f2fs/file.c | 636 | ||||
-rw-r--r-- | fs/f2fs/gc.c | 742 | ||||
-rw-r--r-- | fs/f2fs/gc.h | 117 | ||||
-rw-r--r-- | fs/f2fs/hash.c | 97 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 268 | ||||
-rw-r--r-- | fs/f2fs/namei.c | 503 | ||||
-rw-r--r-- | fs/f2fs/node.c | 1764 | ||||
-rw-r--r-- | fs/f2fs/node.h | 353 | ||||
-rw-r--r-- | fs/f2fs/recovery.c | 375 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 1791 | ||||
-rw-r--r-- | fs/f2fs/segment.h | 618 | ||||
-rw-r--r-- | fs/f2fs/super.c | 657 | ||||
-rw-r--r-- | fs/f2fs/xattr.c | 440 | ||||
-rw-r--r-- | fs/f2fs/xattr.h | 145 | ||||
-rw-r--r-- | include/linux/f2fs_fs.h | 413 | ||||
-rw-r--r-- | include/uapi/linux/magic.h | 1 |
29 files changed, 13488 insertions, 0 deletions
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 7b52ba7bf32..8042050eb26 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -50,6 +50,8 @@ ext4.txt - info, mount options and specifications for the Ext4 filesystem. files.txt - info on file management in the Linux kernel. +f2fs.txt + - info and mount options for the F2FS filesystem. fuse.txt - info on the Filesystem in User SpacE including mount options. gfs2.txt diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt new file mode 100644 index 00000000000..8fbd8b46ee3 --- /dev/null +++ b/Documentation/filesystems/f2fs.txt @@ -0,0 +1,421 @@ +================================================================================ +WHAT IS Flash-Friendly File System (F2FS)? +================================================================================ + +NAND flash memory-based storage devices, such as SSD, eMMC, and SD cards, have +been equipped on a variety systems ranging from mobile to server systems. Since +they are known to have different characteristics from the conventional rotating +disks, a file system, an upper layer to the storage device, should adapt to the +changes from the sketch in the design level. + +F2FS is a file system exploiting NAND flash memory-based storage devices, which +is based on Log-structured File System (LFS). The design has been focused on +addressing the fundamental issues in LFS, which are snowball effect of wandering +tree and high cleaning overhead. + +Since a NAND flash memory-based storage device shows different characteristic +according to its internal geometry or flash memory management scheme, namely FTL, +F2FS and its tools support various parameters not only for configuring on-disk +layout, but also for selecting allocation and cleaning algorithms. + +The file system formatting tool, "mkfs.f2fs", is available from the following +git tree: +>> git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git + +For reporting bugs and sending patches, please use the following mailing list: +>> linux-f2fs-devel@lists.sourceforge.net + +================================================================================ +BACKGROUND AND DESIGN ISSUES +================================================================================ + +Log-structured File System (LFS) +-------------------------------- +"A log-structured file system writes all modifications to disk sequentially in +a log-like structure, thereby speeding up both file writing and crash recovery. +The log is the only structure on disk; it contains indexing information so that +files can be read back from the log efficiently. In order to maintain large free +areas on disk for fast writing, we divide the log into segments and use a +segment cleaner to compress the live information from heavily fragmented +segments." from Rosenblum, M. and Ousterhout, J. K., 1992, "The design and +implementation of a log-structured file system", ACM Trans. Computer Systems +10, 1, 26–52. + +Wandering Tree Problem +---------------------- +In LFS, when a file data is updated and written to the end of log, its direct +pointer block is updated due to the changed location. Then the indirect pointer +block is also updated due to the direct pointer block update. In this manner, +the upper index structures such as inode, inode map, and checkpoint block are +also updated recursively. This problem is called as wandering tree problem [1], +and in order to enhance the performance, it should eliminate or relax the update +propagation as much as possible. + +[1] Bityutskiy, A. 2005. JFFS3 design issues. http://www.linux-mtd.infradead.org/ + +Cleaning Overhead +----------------- +Since LFS is based on out-of-place writes, it produces so many obsolete blocks +scattered across the whole storage. In order to serve new empty log space, it +needs to reclaim these obsolete blocks seamlessly to users. This job is called +as a cleaning process. + +The process consists of three operations as follows. +1. A victim segment is selected through referencing segment usage table. +2. It loads parent index structures of all the data in the victim identified by + segment summary blocks. +3. It checks the cross-reference between the data and its parent index structure. +4. It moves valid data selectively. + +This cleaning job may cause unexpected long delays, so the most important goal +is to hide the latencies to users. And also definitely, it should reduce the +amount of valid data to be moved, and move them quickly as well. + +================================================================================ +KEY FEATURES +================================================================================ + +Flash Awareness +--------------- +- Enlarge the random write area for better performance, but provide the high + spatial locality +- Align FS data structures to the operational units in FTL as best efforts + +Wandering Tree Problem +---------------------- +- Use a term, “node”, that represents inodes as well as various pointer blocks +- Introduce Node Address Table (NAT) containing the locations of all the “node” + blocks; this will cut off the update propagation. + +Cleaning Overhead +----------------- +- Support a background cleaning process +- Support greedy and cost-benefit algorithms for victim selection policies +- Support multi-head logs for static/dynamic hot and cold data separation +- Introduce adaptive logging for efficient block allocation + +================================================================================ +MOUNT OPTIONS +================================================================================ + +background_gc_off Turn off cleaning operations, namely garbage collection, + triggered in background when I/O subsystem is idle. +disable_roll_forward Disable the roll-forward recovery routine +discard Issue discard/TRIM commands when a segment is cleaned. +no_heap Disable heap-style segment allocation which finds free + segments for data from the beginning of main area, while + for node from the end of main area. +nouser_xattr Disable Extended User Attributes. Note: xattr is enabled + by default if CONFIG_F2FS_FS_XATTR is selected. +noacl Disable POSIX Access Control List. Note: acl is enabled + by default if CONFIG_F2FS_FS_POSIX_ACL is selected. +active_logs=%u Support configuring the number of active logs. In the + current design, f2fs supports only 2, 4, and 6 logs. + Default number is 6. +disable_ext_identify Disable the extension list configured by mkfs, so f2fs + does not aware of cold files such as media files. + +================================================================================ +DEBUGFS ENTRIES +================================================================================ + +/sys/kernel/debug/f2fs/ contains information about all the partitions mounted as +f2fs. Each file shows the whole f2fs information. + +/sys/kernel/debug/f2fs/status includes: + - major file system information managed by f2fs currently + - average SIT information about whole segments + - current memory footprint consumed by f2fs. + +================================================================================ +USAGE +================================================================================ + +1. Download userland tools and compile them. + +2. Skip, if f2fs was compiled statically inside kernel. + Otherwise, insert the f2fs.ko module. + # insmod f2fs.ko + +3. Create a directory trying to mount + # mkdir /mnt/f2fs + +4. Format the block device, and then mount as f2fs + # mkfs.f2fs -l label /dev/block_device + # mount -t f2fs /dev/block_device /mnt/f2fs + +Format options +-------------- +-l [label] : Give a volume label, up to 256 unicode name. +-a [0 or 1] : Split start location of each area for heap-based allocation. + 1 is set by default, which performs this. +-o [int] : Set overprovision ratio in percent over volume size. + 5 is set by default. +-s [int] : Set the number of segments per section. + 1 is set by default. +-z [int] : Set the number of sections per zone. + 1 is set by default. +-e [str] : Set basic extension list. e.g. "mp3,gif,mov" + +================================================================================ +DESIGN +================================================================================ + +On-disk Layout +-------------- + +F2FS divides the whole volume into a number of segments, each of which is fixed +to 2MB in size. A section is composed of consecutive segments, and a zone +consists of a set of sections. By default, section and zone sizes are set to one +segment size identically, but users can easily modify the sizes by mkfs. + +F2FS splits the entire volume into six areas, and all the areas except superblock +consists of multiple segments as described below. + + align with the zone size <-| + |-> align with the segment size + _________________________________________________________________________ + | | | Node | Segment | Segment | | + | Superblock | Checkpoint | Address | Info. | Summary | Main | + | (SB) | (CP) | Table (NAT) | Table (SIT) | Area (SSA) | | + |____________|_____2______|______N______|______N______|______N_____|__N___| + . . + . . + . . + ._________________________________________. + |_Segment_|_..._|_Segment_|_..._|_Segment_| + . . + ._________._________ + |_section_|__...__|_ + . . + .________. + |__zone__| + +- Superblock (SB) + : It is located at the beginning of the partition, and there exist two copies + to avoid file system crash. It contains basic partition information and some + default parameters of f2fs. + +- Checkpoint (CP) + : It contains file system information, bitmaps for valid NAT/SIT sets, orphan + inode lists, and summary entries of current active segments. + +- Node Address Table (NAT) + : It is composed of a block address table for all the node blocks stored in + Main area. + +- Segment Information Table (SIT) + : It contains segment information such as valid block count and bitmap for the + validity of all the blocks. + +- Segment Summary Area (SSA) + : It contains summary entries which contains the owner information of all the + data and node blocks stored in Main area. + +- Main Area + : It contains file and directory data including their indices. + +In order to avoid misalignment between file system and flash-based storage, F2FS +aligns the start block address of CP with the segment size. Also, it aligns the +start block address of Main area with the zone size by reserving some segments +in SSA area. + +Reference the following survey for additional technical details. +https://wiki.linaro.org/WorkingGroups/Kernel/Projects/FlashCardSurvey + +File System Metadata Structure +------------------------------ + +F2FS adopts the checkpointing scheme to maintain file system consistency. At +mount time, F2FS first tries to find the last valid checkpoint data by scanning +CP area. In order to reduce the scanning time, F2FS uses only two copies of CP. +One of them always indicates the last valid data, which is called as shadow copy +mechanism. In addition to CP, NAT and SIT also adopt the shadow copy mechanism. + +For file system consistency, each CP points to which NAT and SIT copies are +valid, as shown as below. + + +--------+----------+---------+ + | CP | NAT | SIT | + +--------+----------+---------+ + . . . . + . . . . + . . . . + +-------+-------+--------+--------+--------+--------+ + | CP #0 | CP #1 | NAT #0 | NAT #1 | SIT #0 | SIT #1 | + +-------+-------+--------+--------+--------+--------+ + | ^ ^ + | | | + `----------------------------------------' + +Index Structure +--------------- + +The key data structure to manage the data locations is a "node". Similar to +traditional file structures, F2FS has three types of node: inode, direct node, +indirect node. F2FS assigns 4KB to an inode block which contains 923 data block +indices, two direct node pointers, two indirect node pointers, and one double +indirect node pointer as described below. One direct node block contains 1018 +data blocks, and one indirect node block contains also 1018 node blocks. Thus, +one inode block (i.e., a file) covers: + + 4KB * (923 + 2 * 1018 + 2 * 1018 * 1018 + 1018 * 1018 * 1018) := 3.94TB. + + Inode block (4KB) + |- data (923) + |- direct node (2) + | `- data (1018) + |- indirect node (2) + | `- direct node (1018) + | `- data (1018) + `- double indirect node (1) + `- indirect node (1018) + `- direct node (1018) + `- data (1018) + +Note that, all the node blocks are mapped by NAT which means the location of +each node is translated by the NAT table. In the consideration of the wandering +tree problem, F2FS is able to cut off the propagation of node updates caused by +leaf data writes. + +Directory Structure +------------------- + +A directory entry occupies 11 bytes, which consists of the following attributes. + +- hash hash value of the file name +- ino inode number +- len the length of file name +- type file type such as directory, symlink, etc + +A dentry block consists of 214 dentry slots and file names. Therein a bitmap is +used to represent whether each dentry is valid or not. A dentry block occupies +4KB with the following composition. + + Dentry Block(4 K) = bitmap (27 bytes) + reserved (3 bytes) + + dentries(11 * 214 bytes) + file name (8 * 214 bytes) + + [Bucket] + +--------------------------------+ + |dentry block 1 | dentry block 2 | + +--------------------------------+ + . . + . . + . [Dentry Block Structure: 4KB] . + +--------+----------+----------+------------+ + | bitmap | reserved | dentries | file names | + +--------+----------+----------+------------+ + [Dentry Block: 4KB] . . + . . + . . + +------+------+-----+------+ + | hash | ino | len | type | + +------+------+-----+------+ + [Dentry Structure: 11 bytes] + +F2FS implements multi-level hash tables for directory structure. Each level has +a hash table with dedicated number of hash buckets as shown below. Note that +"A(2B)" means a bucket includes 2 data blocks. + +---------------------- +A : bucket +B : block +N : MAX_DIR_HASH_DEPTH +---------------------- + +level #0 | A(2B) + | +level #1 | A(2B) - A(2B) + | +level #2 | A(2B) - A(2B) - A(2B) - A(2B) + . | . . . . +level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B) + . | . . . . +level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B) + +The number of blocks and buckets are determined by, + + ,- 2, if n < MAX_DIR_HASH_DEPTH / 2, + # of blocks in level #n = | + `- 4, Otherwise + + ,- 2^n, if n < MAX_DIR_HASH_DEPTH / 2, + # of buckets in level #n = | + `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), Otherwise + +When F2FS finds a file name in a directory, at first a hash value of the file +name is calculated. Then, F2FS scans the hash table in level #0 to find the +dentry consisting of the file name and its inode number. If not found, F2FS +scans the next hash table in level #1. In this way, F2FS scans hash tables in +each levels incrementally from 1 to N. In each levels F2FS needs to scan only +one bucket determined by the following equation, which shows O(log(# of files)) +complexity. + + bucket number to scan in level #n = (hash value) % (# of buckets in level #n) + +In the case of file creation, F2FS finds empty consecutive slots that cover the +file name. F2FS searches the empty slots in the hash tables of whole levels from +1 to N in the same way as the lookup operation. + +The following figure shows an example of two cases holding children. + --------------> Dir <-------------- + | | + child child + + child - child [hole] - child + + child - child - child [hole] - [hole] - child + + Case 1: Case 2: + Number of children = 6, Number of children = 3, + File size = 7 File size = 7 + +Default Block Allocation +------------------------ + +At runtime, F2FS manages six active logs inside "Main" area: Hot/Warm/Cold node +and Hot/Warm/Cold data. + +- Hot node contains direct node blocks of directories. +- Warm node contains direct node blocks except hot node blocks. +- Cold node contains indirect node blocks +- Hot data contains dentry blocks +- Warm data contains data blocks except hot and cold data blocks +- Cold data contains multimedia data or migrated data blocks + +LFS has two schemes for free space management: threaded log and copy-and-compac- +tion. The copy-and-compaction scheme which is known as cleaning, is well-suited +for devices showing very good sequential write performance, since free segments +are served all the time for writing new data. However, it suffers from cleaning +overhead under high utilization. Contrarily, the threaded log scheme suffers +from random writes, but no cleaning process is needed. F2FS adopts a hybrid +scheme where the copy-and-compaction scheme is adopted by default, but the +policy is dynamically changed to the threaded log scheme according to the file +system status. + +In order to align F2FS with underlying flash-based storage, F2FS allocates a +segment in a unit of section. F2FS expects that the section size would be the +same as the unit size of garbage collection in FTL. Furthermore, with respect +to the mapping granularity in FTL, F2FS allocates each section of the active +logs from different zones as much as possible, since FTL can write the data in +the active logs into one allocation unit according to its mapping granularity. + +Cleaning process +---------------- + +F2FS does cleaning both on demand and in the background. On-demand cleaning is +triggered when there are not enough free segments to serve VFS calls. Background +cleaner is operated by a kernel thread, and triggers the cleaning job when the +system is idle. + +F2FS supports two victim selection policies: greedy and cost-benefit algorithms. +In the greedy algorithm, F2FS selects a victim segment having the smallest number +of valid blocks. In the cost-benefit algorithm, F2FS selects a victim segment +according to the segment age and the number of valid blocks in order to address +log block thrashing problem in the greedy algorithm. F2FS adopts the greedy +algorithm for on-demand cleaner, while background cleaner adopts cost-benefit +algorithm. + +In order to identify whether the data in the victim segment are valid or not, +F2FS manages a bitmap. Each bit represents the validity of a block, and the +bitmap is composed of a bit stream covering whole blocks in main area. diff --git a/fs/Kconfig b/fs/Kconfig index eaff24a1950..cfe512fd1ca 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -220,6 +220,7 @@ source "fs/pstore/Kconfig" source "fs/sysv/Kconfig" source "fs/ufs/Kconfig" source "fs/exofs/Kconfig" +source "fs/f2fs/Kconfig" endif # MISC_FILESYSTEMS diff --git a/fs/Makefile b/fs/Makefile index 1d7af79288a..9d53192236f 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -123,6 +123,7 @@ obj-$(CONFIG_DEBUG_FS) += debugfs/ obj-$(CONFIG_OCFS2_FS) += ocfs2/ obj-$(CONFIG_BTRFS_FS) += btrfs/ obj-$(CONFIG_GFS2_FS) += gfs2/ +obj-$(CONFIG_F2FS_FS) += f2fs/ obj-y += exofs/ # Multiple modules obj-$(CONFIG_CEPH_FS) += ceph/ obj-$(CONFIG_PSTORE) += pstore/ diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig new file mode 100644 index 00000000000..fd27e7e6326 --- /dev/null +++ b/fs/f2fs/Kconfig @@ -0,0 +1,53 @@ +config F2FS_FS + tristate "F2FS filesystem support (EXPERIMENTAL)" + depends on BLOCK + help + F2FS is based on Log-structured File System (LFS), which supports + versatile "flash-friendly" features. The design has been focused on + addressing the fundamental issues in LFS, which are snowball effect + of wandering tree and high cleaning overhead. + + Since flash-based storages show different characteristics according to + the internal geometry or flash memory management schemes aka FTL, F2FS + and tools support various parameters not only for configuring on-disk + layout, but also for selecting allocation and cleaning algorithms. + + If unsure, say N. + +config F2FS_STAT_FS + bool "F2FS Status Information" + depends on F2FS_FS && DEBUG_FS + default y + help + /sys/kernel/debug/f2fs/ contains information about all the partitions + mounted as f2fs. Each file shows the whole f2fs information. + + /sys/kernel/debug/f2fs/status includes: + - major file system information managed by f2fs currently + - average SIT information about whole segments + - current memory footprint consumed by f2fs. + +config F2FS_FS_XATTR + bool "F2FS extended attributes" + depends on F2FS_FS + default y + help + Extended attributes are name:value pairs associated with inodes by + the kernel or by users (see the attr(5) manual page, or visit + <http://acl.bestbits.at/> for details). + + If unsure, say N. + +config F2FS_FS_POSIX_ACL + bool "F2FS Access Control Lists" + depends on F2FS_FS_XATTR + select FS_POSIX_ACL + default y + help + Posix Access Control Lists (ACLs) support permissions for users and + gourps beyond the owner/group/world scheme. + + To learn more about Access Control Lists, visit the POSIX ACLs for + Linux website <http://acl.bestbits.at/>. + + If you don't know what Access Control Lists are, say N diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile new file mode 100644 index 00000000000..27a0820340b --- /dev/null +++ b/fs/f2fs/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_F2FS_FS) += f2fs.o + +f2fs-y := dir.o file.o inode.o namei.o hash.o super.o +f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o +f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o +f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o +f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c new file mode 100644 index 00000000000..fed74d193ff --- /dev/null +++ b/fs/f2fs/acl.c @@ -0,0 +1,414 @@ +/* + * fs/f2fs/acl.c + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * Portions of this code from linux/fs/ext2/acl.c + * + * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/f2fs_fs.h> +#include "f2fs.h" +#include "xattr.h" +#include "acl.h" + +#define get_inode_mode(i) ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ + (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) + +static inline size_t f2fs_acl_size(int count) +{ + if (count <= 4) { + return sizeof(struct f2fs_acl_header) + + count * sizeof(struct f2fs_acl_entry_short); + } else { + return sizeof(struct f2fs_acl_header) + + 4 * sizeof(struct f2fs_acl_entry_short) + + (count - 4) * sizeof(struct f2fs_acl_entry); + } +} + +static inline int f2fs_acl_count(size_t size) +{ + ssize_t s; + size -= sizeof(struct f2fs_acl_header); + s = size - 4 * sizeof(struct f2fs_acl_entry_short); + if (s < 0) { + if (size % sizeof(struct f2fs_acl_entry_short)) + return -1; + return size / sizeof(struct f2fs_acl_entry_short); + } else { + if (s % sizeof(struct f2fs_acl_entry)) + return -1; + return s / sizeof(struct f2fs_acl_entry) + 4; + } +} + +static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) +{ + int i, count; + struct posix_acl *acl; + struct f2fs_acl_header *hdr = (struct f2fs_acl_header *)value; + struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1); + const char *end = value + size; + + if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION)) + return ERR_PTR(-EINVAL); + + count = f2fs_acl_count(size); + if (count < 0) + return ERR_PTR(-EINVAL); + if (count == 0) + return NULL; + + acl = posix_acl_alloc(count, GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < count; i++) { + + if ((char *)entry > end) + goto fail; + + acl->a_entries[i].e_tag = le16_to_cpu(entry->e_tag); + acl->a_entries[i].e_perm = le16_to_cpu(entry->e_perm); + + switch (acl->a_entries[i].e_tag) { + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + acl->a_entries[i].e_id = ACL_UNDEFINED_ID; + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry_short)); + break; + + case ACL_USER: + acl->a_entries[i].e_uid = + make_kuid(&init_user_ns, + le32_to_cpu(entry->e_id)); + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry)); + break; + case ACL_GROUP: + acl->a_entries[i].e_gid = + make_kgid(&init_user_ns, + le32_to_cpu(entry->e_id)); + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry)); + break; + default: + goto fail; + } + } + if ((char *)entry != end) + goto fail; + return acl; +fail: + posix_acl_release(acl); + return ERR_PTR(-EINVAL); +} + +static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size) +{ + struct f2fs_acl_header *f2fs_acl; + struct f2fs_acl_entry *entry; + int i; + + f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * + sizeof(struct f2fs_acl_entry), GFP_KERNEL); + if (!f2fs_acl) + return ERR_PTR(-ENOMEM); + + f2fs_acl->a_version = cpu_to_le32(F2FS_ACL_VERSION); + entry = (struct f2fs_acl_entry *)(f2fs_acl + 1); + + for (i = 0; i < acl->a_count; i++) { + + entry->e_tag = cpu_to_le16(acl->a_entries[i].e_tag); + entry->e_perm = cpu_to_le16(acl->a_entries[i].e_perm); + + switch (acl->a_entries[i].e_tag) { + case ACL_USER: + entry->e_id = cpu_to_le32( + from_kuid(&init_user_ns, + acl->a_entries[i].e_uid)); + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry)); + break; + case ACL_GROUP: + entry->e_id = cpu_to_le32( + from_kgid(&init_user_ns, + acl->a_entries[i].e_gid)); + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry)); + break; + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + entry = (struct f2fs_acl_entry *)((char *)entry + + sizeof(struct f2fs_acl_entry_short)); + break; + default: + goto fail; + } + } + *size = f2fs_acl_size(acl->a_count); + return (void *)f2fs_acl; + +fail: + kfree(f2fs_acl); + return ERR_PTR(-EINVAL); +} + +struct posix_acl *f2fs_get_acl(struct inode *inode, int type) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; + void *value = NULL; + struct posix_acl *acl; + int retval; + + if (!test_opt(sbi, POSIX_ACL)) + return NULL; + + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + + if (type == ACL_TYPE_ACCESS) + name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; + + retval = f2fs_getxattr(inode, name_index, "", NULL, 0); + if (retval > 0) { + value = kmalloc(retval, GFP_KERNEL); + if (!value) + return ERR_PTR(-ENOMEM); + retval = f2fs_getxattr(inode, name_index, "", value, retval); + } + + if (retval < 0) { + if (retval == -ENODATA) + acl = NULL; + else + acl = ERR_PTR(retval); + } else { + acl = f2fs_acl_from_disk(value, retval); + } + kfree(value); + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); + + return acl; +} + +static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_inode_info *fi = F2FS_I(inode); + int name_index; |