aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-01-09 15:18:49 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-09 15:18:49 -0800
commit31aeb6c815549948571eec988ad9728c27d7a68d (patch)
treee155438be253924ebb1b792182e406947369b3eb /fs
parentc40f6f8bbc4cbd2902671aacd587400ddca62627 (diff)
parentfc55584175589b70f4c30cb594f09f4bd6ad5d40 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pkl/squashfs-linus
* git://git.kernel.org/pub/scm/linux/kernel/git/pkl/squashfs-linus: MAINTAINERS: squashfs entry Squashfs: documentation Squashfs: initrd support Squashfs: Kconfig entry Squashfs: Makefiles Squashfs: header files Squashfs: block operations Squashfs: cache operations Squashfs: uid/gid lookup operations Squashfs: fragment block operations Squashfs: export operations Squashfs: super block operations Squashfs: symlink operations Squashfs: regular file operations Squashfs: directory readdir operations Squashfs: directory lookup operations Squashfs: inode operations
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig52
-rw-r--r--fs/Makefile1
-rw-r--r--fs/squashfs/Makefile8
-rw-r--r--fs/squashfs/block.c274
-rw-r--r--fs/squashfs/cache.c412
-rw-r--r--fs/squashfs/dir.c235
-rw-r--r--fs/squashfs/export.c155
-rw-r--r--fs/squashfs/file.c502
-rw-r--r--fs/squashfs/fragment.c98
-rw-r--r--fs/squashfs/id.c94
-rw-r--r--fs/squashfs/inode.c346
-rw-r--r--fs/squashfs/namei.c242
-rw-r--r--fs/squashfs/squashfs.h90
-rw-r--r--fs/squashfs/squashfs_fs.h381
-rw-r--r--fs/squashfs/squashfs_fs_i.h45
-rw-r--r--fs/squashfs/squashfs_fs_sb.h76
-rw-r--r--fs/squashfs/super.c440
-rw-r--r--fs/squashfs/symlink.c118
18 files changed, 3569 insertions, 0 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 02cff86af1b..51307b0fdf0 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -932,6 +932,58 @@ config CRAMFS
If unsure, say N.
+config SQUASHFS
+ tristate "SquashFS 4.0 - Squashed file system support"
+ depends on BLOCK
+ select ZLIB_INFLATE
+ help
+ Saying Y here includes support for SquashFS 4.0 (a Compressed
+ Read-Only File System). Squashfs is a highly compressed read-only
+ filesystem for Linux. It uses zlib compression to compress both
+ files, inodes and directories. Inodes in the system are very small
+ and all blocks are packed to minimise data overhead. Block sizes
+ greater than 4K are supported up to a maximum of 1 Mbytes (default
+ block size 128K). SquashFS 4.0 supports 64 bit filesystems and files
+ (larger than 4GB), full uid/gid information, hard links and
+ timestamps.
+
+ Squashfs is intended for general read-only filesystem use, for
+ archival use (i.e. in cases where a .tar.gz file may be used), and in
+ embedded systems where low overhead is needed. Further information
+ and tools are available from http://squashfs.sourceforge.net.
+
+ If you want to compile this as a module ( = code which can be
+ inserted in and removed from the running kernel whenever you want),
+ say M here and read <file:Documentation/modules.txt>. The module
+ will be called squashfs. Note that the root file system (the one
+ containing the directory /) cannot be compiled as a module.
+
+ If unsure, say N.
+
+config SQUASHFS_EMBEDDED
+
+ bool "Additional option for memory-constrained systems"
+ depends on SQUASHFS
+ default n
+ help
+ Saying Y here allows you to specify cache size.
+
+ If unsure, say N.
+
+config SQUASHFS_FRAGMENT_CACHE_SIZE
+ int "Number of fragments cached" if SQUASHFS_EMBEDDED
+ depends on SQUASHFS
+ default "3"
+ help
+ By default SquashFS caches the last 3 fragments read from
+ the filesystem. Increasing this amount may mean SquashFS
+ has to re-read fragments less often from disk, at the expense
+ of extra system memory. Decreasing this amount will mean
+ SquashFS uses less memory at the expense of extra reads from disk.
+
+ Note there must be at least one cached fragment. Anything
+ much more than three will probably not make much difference.
+
config VXFS_FS
tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)"
depends on BLOCK
diff --git a/fs/Makefile b/fs/Makefile
index bc4e14df108..38bc735c67a 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -74,6 +74,7 @@ obj-$(CONFIG_JBD) += jbd/
obj-$(CONFIG_JBD2) += jbd2/
obj-$(CONFIG_EXT2_FS) += ext2/
obj-$(CONFIG_CRAMFS) += cramfs/
+obj-$(CONFIG_SQUASHFS) += squashfs/
obj-y += ramfs/
obj-$(CONFIG_HUGETLBFS) += hugetlbfs/
obj-$(CONFIG_CODA_FS) += coda/
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
new file mode 100644
index 00000000000..8258cf9a031
--- /dev/null
+++ b/fs/squashfs/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the linux squashfs routines.
+#
+
+obj-$(CONFIG_SQUASHFS) += squashfs.o
+squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
+squashfs-y += namei.o super.o symlink.o
+#squashfs-y += squashfs2_0.o
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
new file mode 100644
index 00000000000..c837dfc2b3c
--- /dev/null
+++ b/fs/squashfs/block.c
@@ -0,0 +1,274 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * block.c
+ */
+
+/*
+ * This file implements the low-level routines to read and decompress
+ * datablocks and metadata blocks.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/string.h>
+#include <linux/buffer_head.h>
+#include <linux/zlib.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+/*
+ * Read the metadata block length, this is stored in the first two
+ * bytes of the metadata block.
+ */
+static struct buffer_head *get_block_length(struct super_block *sb,
+ u64 *cur_index, int *offset, int *length)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ struct buffer_head *bh;
+
+ bh = sb_bread(sb, *cur_index);
+ if (bh == NULL)
+ return NULL;
+
+ if (msblk->devblksize - *offset == 1) {
+ *length = (unsigned char) bh->b_data[*offset];
+ put_bh(bh);
+ bh = sb_bread(sb, ++(*cur_index));
+ if (bh == NULL)
+ return NULL;
+ *length |= (unsigned char) bh->b_data[0] << 8;
+ *offset = 1;
+ } else {
+ *length = (unsigned char) bh->b_data[*offset] |
+ (unsigned char) bh->b_data[*offset + 1] << 8;
+ *offset += 2;
+ }
+
+ return bh;
+}
+
+
+/*
+ * Read and decompress a metadata block or datablock. Length is non-zero
+ * if a datablock is being read (the size is stored elsewhere in the
+ * filesystem), otherwise the length is obtained from the first two bytes of
+ * the metadata block. A bit in the length field indicates if the block
+ * is stored uncompressed in the filesystem (usually because compression
+ * generated a larger block - this does occasionally happen with zlib).
+ */
+int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
+ int length, u64 *next_index, int srclength)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ struct buffer_head **bh;
+ int offset = index & ((1 << msblk->devblksize_log2) - 1);
+ u64 cur_index = index >> msblk->devblksize_log2;
+ int bytes, compressed, b = 0, k = 0, page = 0, avail;
+
+
+ bh = kcalloc((msblk->block_size >> msblk->devblksize_log2) + 1,
+ sizeof(*bh), GFP_KERNEL);
+ if (bh == NULL)
+ return -ENOMEM;
+
+ if (length) {
+ /*
+ * Datablock.
+ */
+ bytes = -offset;
+ compressed = SQUASHFS_COMPRESSED_BLOCK(length);
+ length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
+ if (next_index)
+ *next_index = index + length;
+
+ TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
+ index, compressed ? "" : "un", length, srclength);
+
+ if (length < 0 || length > srclength ||
+ (index + length) > msblk->bytes_used)
+ goto read_failure;
+
+ for (b = 0; bytes < length; b++, cur_index++) {
+ bh[b] = sb_getblk(sb, cur_index);
+ if (bh[b] == NULL)
+ goto block_release;
+ bytes += msblk->devblksize;
+ }
+ ll_rw_block(READ, b, bh);
+ } else {
+ /*
+ * Metadata block.
+ */
+ if ((index + 2) > msblk->bytes_used)
+ goto read_failure;
+
+ bh[0] = get_block_length(sb, &cur_index, &offset, &length);
+ if (bh[0] == NULL)
+ goto read_failure;
+ b = 1;
+
+ bytes = msblk->devblksize - offset;
+ compressed = SQUASHFS_COMPRESSED(length);
+ length = SQUASHFS_COMPRESSED_SIZE(length);
+ if (next_index)
+ *next_index = index + length + 2;
+
+ TRACE("Block @ 0x%llx, %scompressed size %d\n", index,
+ compressed ? "" : "un", length);
+
+ if (length < 0 || length > srclength ||
+ (index + length) > msblk->bytes_used)
+ goto block_release;
+
+ for (; bytes < length; b++) {
+ bh[b] = sb_getblk(sb, ++cur_index);
+ if (bh[b] == NULL)
+ goto block_release;
+ bytes += msblk->devblksize;
+ }
+ ll_rw_block(READ, b - 1, bh + 1);
+ }
+
+ if (compressed) {
+ int zlib_err = 0, zlib_init = 0;
+
+ /*
+ * Uncompress block.
+ */
+
+ mutex_lock(&msblk->read_data_mutex);
+
+ msblk->stream.avail_out = 0;
+ msblk->stream.avail_in = 0;
+
+ bytes = length;
+ do {
+ if (msblk->stream.avail_in == 0 && k < b) {
+ avail = min(bytes, msblk->devblksize - offset);
+ bytes -= avail;
+ wait_on_buffer(bh[k]);
+ if (!buffer_uptodate(bh[k]))
+ goto release_mutex;
+
+ if (avail == 0) {
+ offset = 0;
+ put_bh(bh[k++]);
+ continue;
+ }
+
+ msblk->stream.next_in = bh[k]->b_data + offset;
+ msblk->stream.avail_in = avail;
+ offset = 0;
+ }
+
+ if (msblk->stream.avail_out == 0) {
+ msblk->stream.next_out = buffer[page++];
+ msblk->stream.avail_out = PAGE_CACHE_SIZE;
+ }
+
+ if (!zlib_init) {
+ zlib_err = zlib_inflateInit(&msblk->stream);
+ if (zlib_err != Z_OK) {
+ ERROR("zlib_inflateInit returned"
+ " unexpected result 0x%x,"
+ " srclength %d\n", zlib_err,
+ srclength);
+ goto release_mutex;
+ }
+ zlib_init = 1;
+ }
+
+ zlib_err = zlib_inflate(&msblk->stream, Z_NO_FLUSH);
+
+ if (msblk->stream.avail_in == 0 && k < b)
+ put_bh(bh[k++]);
+ } while (zlib_err == Z_OK);
+
+ if (zlib_err != Z_STREAM_END) {
+ ERROR("zlib_inflate returned unexpected result"
+ " 0x%x, srclength %d, avail_in %d,"
+ " avail_out %d\n", zlib_err, srclength,
+ msblk->stream.avail_in,
+ msblk->stream.avail_out);
+ goto release_mutex;
+ }
+
+ zlib_err = zlib_inflateEnd(&msblk->stream);
+ if (zlib_err != Z_OK) {
+ ERROR("zlib_inflateEnd returned unexpected result 0x%x,"
+ " srclength %d\n", zlib_err, srclength);
+ goto release_mutex;
+ }
+ length = msblk->stream.total_out;
+ mutex_unlock(&msblk->read_data_mutex);
+ } else {
+ /*
+ * Block is uncompressed.
+ */
+ int i, in, pg_offset = 0;
+
+ for (i = 0; i < b; i++) {
+ wait_on_buffer(bh[i]);
+ if (!buffer_uptodate(bh[i]))
+ goto block_release;
+ }
+
+ for (bytes = length; k < b; k++) {
+ in = min(bytes, msblk->devblksize - offset);
+ bytes -= in;
+ while (in) {
+ if (pg_offset == PAGE_CACHE_SIZE) {
+ page++;
+ pg_offset = 0;
+ }
+ avail = min_t(int, in, PAGE_CACHE_SIZE -
+ pg_offset);
+ memcpy(buffer[page] + pg_offset,
+ bh[k]->b_data + offset, avail);
+ in -= avail;
+ pg_offset += avail;
+ offset += avail;
+ }
+ offset = 0;
+ put_bh(bh[k]);
+ }
+ }
+
+ kfree(bh);
+ return length;
+
+release_mutex:
+ mutex_unlock(&msblk->read_data_mutex);
+
+block_release:
+ for (; k < b; k++)
+ put_bh(bh[k]);
+
+read_failure:
+ ERROR("sb_bread failed reading block 0x%llx\n", cur_index);
+ kfree(bh);
+ return -EIO;
+}
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
new file mode 100644
index 00000000000..f29eda16d25
--- /dev/null
+++ b/fs/squashfs/cache.c
@@ -0,0 +1,412 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * cache.c
+ */
+
+/*
+ * Blocks in Squashfs are compressed. To avoid repeatedly decompressing
+ * recently accessed data Squashfs uses two small metadata and fragment caches.
+ *
+ * This file implements a generic cache implementation used for both caches,
+ * plus functions layered ontop of the generic cache implementation to
+ * access the metadata and fragment caches.
+ *
+ * To avoid out of memory and fragmentation isssues with vmalloc the cache
+ * uses sequences of kmalloced PAGE_CACHE_SIZE buffers.
+ *
+ * It should be noted that the cache is not used for file datablocks, these
+ * are decompressed and cached in the page-cache in the normal way. The
+ * cache is only used to temporarily cache fragment and metadata blocks
+ * which have been read as as a result of a metadata (i.e. inode or
+ * directory) or fragment access. Because metadata and fragments are packed
+ * together into blocks (to gain greater compression) the read of a particular
+ * piece of metadata or fragment will retrieve other metadata/fragments which
+ * have been packed with it, these because of locality-of-reference may be read
+ * in the near future. Temporarily caching them ensures they are available for
+ * near future access without requiring an additional read and decompress.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/zlib.h>
+#include <linux/pagemap.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+/*
+ * Look-up block in cache, and increment usage count. If not in cache, read
+ * and decompress it from disk.
+ */
+struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb,
+ struct squashfs_cache *cache, u64 block, int length)
+{
+ int i, n;
+ struct squashfs_cache_entry *entry;
+
+ spin_lock(&cache->lock);
+
+ while (1) {
+ for (i = 0; i < cache->entries; i++)
+ if (cache->entry[i].block == block)
+ break;
+
+ if (i == cache->entries) {
+ /*
+ * Block not in cache, if all cache entries are used
+ * go to sleep waiting for one to become available.
+ */
+ if (cache->unused == 0) {
+ cache->num_waiters++;
+ spin_unlock(&cache->lock);
+ wait_event(cache->wait_queue, cache->unused);
+ spin_lock(&cache->lock);
+ cache->num_waiters--;
+ continue;
+ }
+
+ /*
+ * At least one unused cache entry. A simple
+ * round-robin strategy is used to choose the entry to
+ * be evicted from the cache.
+ */
+ i = cache->next_blk;
+ for (n = 0; n < cache->entries; n++) {
+ if (cache->entry[i].refcount == 0)
+ break;
+ i = (i + 1) % cache->entries;
+ }
+
+ cache->next_blk = (i + 1) % cache->entries;
+ entry = &cache->entry[i];
+
+ /*
+ * Initialise choosen cache entry, and fill it in from
+ * disk.
+ */
+ cache->unused--;
+ entry->block = block;
+ entry->refcount = 1;
+ entry->pending = 1;
+ entry->num_waiters = 0;
+ entry->error = 0;
+ spin_unlock(&cache->lock);
+
+ entry->length = squashfs_read_data(sb, entry->data,
+ block, length, &entry->next_index,
+ cache->block_size);
+
+ spin_lock(&cache->lock);
+
+ if (entry->length < 0)
+ entry->error = entry->length;
+
+ entry->pending = 0;
+
+ /*
+ * While filling this entry one or more other processes
+ * have looked it up in the cache, and have slept
+ * waiting for it to become available.
+ */
+ if (entry->num_waiters) {
+ spin_unlock(&cache->lock);
+ wake_up_all(&entry->wait_queue);
+ } else
+ spin_unlock(&cache->lock);
+
+ goto out;
+ }
+
+ /*
+ * Block already in cache. Increment refcount so it doesn't
+ * get reused until we're finished with it, if it was
+ * previously unused there's one less cache entry available
+ * for reuse.
+ */
+ entry = &cache->entry[i];
+ if (entry->refcount == 0)
+ cache->unused--;
+ entry->refcount++;
+
+ /*
+ * If the entry is currently being filled in by another process
+ * go to sleep waiting for it to become available.
+ */
+ if (entry->pending) {
+ entry->num_waiters++;
+ spin_unlock(&cache->lock);
+ wait_event(entry->wait_queue, !entry->pending);
+ } else
+ spin_unlock(&cache->lock);
+
+ goto out;
+ }
+
+out:
+ TRACE("Got %s %d, start block %lld, refcount %d, error %d\n",
+ cache->name, i, entry->block, entry->refcount, entry->error);
+
+ if (entry->error)
+ ERROR("Unable to read %s cache entry [%llx]\n", cache->name,
+ block);
+ return entry;
+}
+
+
+/*
+ * Release cache entry, once usage count is zero it can be reused.
+ */
+void squashfs_cache_put(struct squashfs_cache_entry *entry)
+{
+ struct squashfs_cache *cache = entry->cache;
+
+ spin_lock(&cache->lock);
+ entry->refcount--;
+ if (entry->refcount == 0) {
+ cache->unused++;
+ /*
+ * If there's any processes waiting for a block to become
+ * available, wake one up.
+ */
+ if (cache->num_waiters) {
+ spin_unlock(&cache->lock);
+ wake_up(&cache->wait_queue);
+ return;
+ }
+ }
+ spin_unlock(&cache->lock);
+}
+
+/*
+ * Delete cache reclaiming all kmalloced buffers.
+ */
+void squashfs_cache_delete(struct squashfs_cache *cache)
+{
+ int i, j;
+
+ if (cache == NULL)
+ return;
+
+ for (i = 0; i < cache->entries; i++) {
+ if (cache->entry[i].data) {
+ for (j = 0; j < cache->pages; j++)
+ kfree(cache->entry[i].data[j]);
+ kfree(cache->entry[i].data);
+ }
+ }
+
+ kfree(cache->entry);
+ kfree(cache);
+}
+
+
+/*
+ * Initialise cache allocating the specified number of entries, each of
+ * size block_size. To avoid vmalloc fragmentation issues each entry
+ * is allocated as a sequence of kmalloced PAGE_CACHE_SIZE buffers.
+ */
+struct squashfs_cache *squashfs_cache_init(char *name, int entries,
+ int block_size)
+{
+ int i, j;
+ struct squashfs_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+
+ if (cache == NULL) {
+ ERROR("Failed to allocate %s cache\n", name);
+ return NULL;
+ }
+
+ cache->entry = kcalloc(entries, sizeof(*(cache->entry)), GFP_KERNEL);
+ if (cache->entry == NULL) {
+ ERROR("Failed to allocate %s cache\n", name);
+ goto cleanup;
+ }
+
+ cache->next_blk = 0;
+ cache->unused = entries;
+ cache->entries = entries;
+ cache->block_size = block_size;
+ cache->pages = block_size >> PAGE_CACHE_SHIFT;
+ cache->name = name;
+ cache->num_waiters = 0;
+ spin_lock_init(&cache->lock);
+ init_waitqueue_head(&cache->wait_queue);
+
+ for (i = 0; i < entries; i++) {
+ struct squashfs_cache_entry *entry = &cache->entry[i];
+
+ init_waitqueue_head(&cache->entry[i].wait_queue);
+ entry->cache = cache;
+ entry->block = SQUASHFS_INVALID_BLK;
+ entry->data = kcalloc(cache->pages, sizeof(void *), GFP_KERNEL);
+ if (entry->data == NULL) {
+ ERROR("Failed to allocate %s cache entry\n", name);
+ goto cleanup;
+ }
+
+ for (j = 0; j < cache->pages; j++) {
+ entry->data[j] = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+ if (entry->data[j] == NULL) {
+ ERROR("Failed to allocate %s buffer\n", name);
+ goto cleanup;
+ }
+ }
+ }
+
+ return cache;
+
+cleanup:
+ squashfs_cache_delete(cache);
+ return NULL;
+}
+
+
+/*
+ * Copy upto length bytes from cache entry to buffer starting at offset bytes
+ * into the cache entry. If there's not length bytes then copy the number of
+ * bytes available. In all cases return the number of bytes copied.
+ */
+int squashfs_copy_data(void *buffer, struct squashfs_cache_entry *entry,
+ int offset, int length)
+{
+ int remaining = length;
+
+ if (length == 0)
+ return 0;
+ else if (buffer == NULL)
+ return min(length, entry->length - offset);
+
+ while (offset < entry->length) {
+ void *buff = entry->data[offset / PAGE_CACHE_SIZE]
+ + (offset % PAGE_CACHE_SIZE);
+ int bytes = min_t(int, entry->length - offset,
+ PAGE_CACHE_SIZE - (offset % PAGE_CACHE_SIZE));
+
+ if (bytes >= remaining) {
+ memcpy(buffer, buff, remaining);
+ remaining = 0;
+ break;
+ }
+
+ memcpy(buffer, buff, bytes);
+ buffer += bytes;
+ remaining -= bytes;
+ offset += bytes;
+ }
+
+ return length - remaining;
+}
+
+
+/*
+ * Read length bytes from metadata position <block, offset> (block is the
+ * start of the compressed block on disk, and offset is the offset into
+ * the block once decompressed). Data is packed into consecutive blocks,
+ * and length bytes may require reading more than one block.
+ */
+int squashfs_read_metadata(struct super_block *sb, void *buffer,
+ u64 *block, int *offset, int length)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ int bytes, copied = length;
+ struct squashfs_cache_entry *entry;
+
+ TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset);
+
+ while (length) {
+ entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0);
+ if (entry->error)
+ return entry->error;
+ else if (*offset >= entry->length)
+ return -EIO;
+
+ bytes = squashfs_copy_data(buffer, entry, *offset, length);
+ if (buffer)
+ buffer += bytes;
+ length -= bytes;
+ *offset += bytes;
+
+ if (*offset == entry->length) {
+ *block = entry->next_index;
+ *offset = 0;
+ }
+
+ squashfs_cache_put(entry);
+ }
+
+ return copied;
+}
+
+
+/*
+ * Look-up in the fragmment cache the fragment located at <start_block> in the
+ * filesystem. If necessary read and decompress it from disk.
+ */
+struct squashfs_cache_entry *squashfs_get_fragment(struct super_block *sb,
+ u64 start_block, int length)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+
+ return squashfs_cache_get(sb, msblk->fragment_cache, start_block,
+ length);
+}
+
+
+/*
+ * Read and decompress the datablock located at <start_block> in the
+ * filesystem. The cache is used here to avoid duplicating locking and
+ * read/decompress code.
+ */
+struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *sb,
+ u64 start_block, int length)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+
+ return squashfs_cache_get(sb, msblk->read_page, start_block, length);
+}
+
+
+/*
+ * Read a filesystem table (uncompressed sequence of bytes) from disk
+ */
+int squashfs_read_table(struct super_block *sb, void *buffer, u64 block,
+ int length)
+{
+ int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ int i, res;
+ void **data = kcalloc(pages, sizeof(void *), GFP_KERNEL);
+ if (data == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
+ data[i] = buffer;
+ res = squashfs_read_data(sb, data, block, length |
+ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length);
+ kfree(data);
+ return res;
+}
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c
new file mode 100644
index 00000000000..566b0eaed86
--- /dev/null
+++ b/fs/squashfs/dir.c
@@ -0,0 +1,235 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * dir.c
+ */
+
+/*
+ * This file implements code to read directories from disk.
+ *
+ * See namei.c for a description of directory organisation on disk.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/slab.h>
+#include <linux/zlib.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+static const unsigned char squashfs_filetype_table[] = {
+ DT_UNKNOWN, DT_DIR, DT_REG, DT_LNK, DT_BLK, DT_CHR, DT_FIFO, DT_SOCK
+};
+
+/*
+ * Lookup offset (f_pos) in the directory index, returning the
+ * metadata block containing it.
+ *
+ * If we get an error reading the index then return the part of the index
+ * (if any) we have managed to read - the index isn't essential, just
+ * quicker.
+ */
+static int get_dir_index_using_offset(struct super_block *sb,
+ u64 *next_block, int *next_offset, u64 index_start, int index_offset,
+ int i_count, u64 f_pos)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ int err, i, index, length = 0;
+ struct squashfs_dir_index dir_index;
+
+ TRACE("Entered get_dir_index_using_offset, i_count %d, f_pos %lld\n",
+ i_count, f_pos);
+
+ /*
+ * Translate from external f_pos to the internal f_pos. This
+ * is offset by 3 because we invent "." and ".." entries which are
+ * not actually stored in the directory.
+ */
+ if (f_pos < 3)
+ return f_pos;
+ f_pos -= 3;
+
+ for (i = 0; i < i_count; i++) {
+ err = squashfs_read_metadata(sb, &dir_index, &index_start,
+ &index_offset, sizeof(dir_index));
+ if (err < 0)
+ break;
+
+ index = le32_to_cpu(dir_index.index);
+ if (index > f_pos)
+ /*
+ * Found the index we're looking for.
+ */
+ break;
+
+ err = squashfs_read_metadata(sb, NULL, &index_start,
+ &index_offset, le32_to_cpu(dir_index.size) + 1);
+ if (err < 0)
+ break;
+
+ length = index;
+ *next_block = le32_to_cpu(dir_index.start_block) +
+ msblk->directory_table;
+ }
+
+ *next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE;
+
+ /*
+ * Translate back from internal f_pos to external f_pos.
+ */
+ return length + 3;
+}
+
+
+static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ u64 block = squashfs_i(inode)->start + msblk->directory_table;
+ int offset = squashfs_i(inode)->offset, length = 0, dir_count, size,
+ type, err;
+ unsigned int inode_number;
+ struct squashfs_dir_header dirh;
+ struct squashfs_dir_entry *dire;
+
+ TRACE("Entered squashfs_readdir [%llx:%x]\n", block, offset);
+
+ dire = kmalloc(sizeof(*dire) + SQUASHFS_NAME_LEN + 1, GFP_KERNEL);
+ if (dire == NULL) {
+ ERROR("Failed to allocate squashfs_dir_entry\n");
+ goto finish;
+ }
+
+ /*
+ * Return "." and ".." entries as the first two filenames in the
+ * directory. To maximise compression these two entries are not
+ * stored in the directory, and so we invent them here.
+ *
+ * It also means that the external f_pos is offset by 3 from the
+ * on-disk directory f_pos.
+ */
+ while (file->f_pos < 3) {
+ char *name;
+ int i_ino;
+
+ if (file->f_pos == 0) {
+ name = ".";
+ size = 1;
+ i_ino = inode->i_ino;
+ } else {
+ name = "..";
+ size = 2;
+ i_ino = squashfs_i(inode)->parent;
+ }
+
+ TRACE("Calling filldir(%p, %s, %d, %lld, %d, %d)\n",
+ dirent, name, size, file->f_pos, i_ino,
+ squashfs_filetype_table[1]);
+
+ if (filldir(dirent, name, size, file->f_pos, i_ino,
+ squashfs_filetype_table[1]) < 0) {
+ TRACE("Filldir returned less than 0\n");
+ goto finish;
+ }
+
+ file->f_pos += size;
+ }
+
+ length = get_dir_index_using_offset(inode->i_sb, &block, &offset,
+ squashfs_i(inode)->dir_idx_start,
+ squashfs_i(inode)->dir_idx_offset,
+ squashfs_i(inode)->dir_idx_cnt,
+ file->f_pos);
+
+ while (length < i_size_read(inode)) {
+ /*
+ * Read directory header
+ */
+ err = squashfs_read_metadata(inode->i_sb, &dirh, &block,
+ &offset, sizeof(dirh));
+ if (err < 0)
+ goto failed_read;
+
+ length += sizeof(dirh);
+
+ dir_count = le32_to_cpu(dirh.count) + 1;
+ while (dir_count--) {
+ /*
+ * Read directory entry.
+ */
+ err = squashfs_read_metadata(inode->i_sb, dire, &block,
+ &offset, sizeof(*dire));
+ if (err < 0)
+ goto failed_read;
+
+ size = le16_to_cpu(dire->size) + 1;
+
+ err = squashfs_read_metadata(inode->i_sb, dire->name,
+ &block, &offset, size);
+ if (err < 0)
+ goto failed_read;
+
+ length += sizeof(*dire) + size;
+
+ if (file->f_pos >= length)
+ continue;
+
+ dire->name[size] = '\0';
+ inode_number = le32_to_cpu(dirh.inode_number) +
+ ((short) le16_to_cpu(dire->inode_number));
+ type = le16_to_cp