diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-30 18:44:44 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-30 18:44:44 -0800 |
commit | aa2e7100e38880db7907cb2b7ec6267b2b243771 (patch) | |
tree | 67f9d2479365398c07833d3fc4f794861f7da5b1 /drivers | |
parent | 2def2ef2ae5f3990aabdbe8a755911902707d268 (diff) | |
parent | 7c094fd698de2f333fa39b6da213f880d40b9bfe (diff) |
Merge branch 'akpm' (patches from Andrew Morton)
Merge misc fixes from Andrew Morton:
"A few hotfixes and various leftovers which were awaiting other merges.
Mainly movement of zram into mm/"
* emailed patches fron Andrew Morton <akpm@linux-foundation.org>: (25 commits)
memcg: fix mutex not unlocked on memcg_create_kmem_cache fail path
Documentation/filesystems/vfs.txt: update file_operations documentation
mm, oom: base root bonus on current usage
mm: don't lose the SOFT_DIRTY flag on mprotect
mm/slub.c: fix page->_count corruption (again)
mm/mempolicy.c: fix mempolicy printing in numa_maps
zram: remove zram->lock in read path and change it with mutex
zram: remove workqueue for freeing removed pending slot
zram: introduce zram->tb_lock
zram: use atomic operation for stat
zram: remove unnecessary free
zram: delay pending free request in read path
zram: fix race between reset and flushing pending work
zsmalloc: add maintainers
zram: add zram maintainers
zsmalloc: add copyright
zram: add copyright
zram: remove old private project comment
zram: promote zram from staging
zsmalloc: move it under mm
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/block/Kconfig | 2 | ||||
-rw-r--r-- | drivers/block/Makefile | 1 | ||||
-rw-r--r-- | drivers/block/zram/Kconfig (renamed from drivers/staging/zram/Kconfig) | 1 | ||||
-rw-r--r-- | drivers/block/zram/Makefile (renamed from drivers/staging/zram/Makefile) | 0 | ||||
-rw-r--r-- | drivers/block/zram/zram_drv.c (renamed from drivers/staging/zram/zram_drv.c) | 128 | ||||
-rw-r--r-- | drivers/block/zram/zram_drv.h (renamed from drivers/staging/zram/zram_drv.h) | 32 | ||||
-rw-r--r-- | drivers/net/phy/mdio_bus.c | 1 | ||||
-rw-r--r-- | drivers/staging/Kconfig | 4 | ||||
-rw-r--r-- | drivers/staging/Makefile | 2 | ||||
-rw-r--r-- | drivers/staging/zram/zram.txt | 77 | ||||
-rw-r--r-- | drivers/staging/zsmalloc/Kconfig | 24 | ||||
-rw-r--r-- | drivers/staging/zsmalloc/Makefile | 3 | ||||
-rw-r--r-- | drivers/staging/zsmalloc/zsmalloc-main.c | 1106 | ||||
-rw-r--r-- | drivers/staging/zsmalloc/zsmalloc.h | 50 | ||||
-rw-r--r-- | drivers/video/backlight/lcd.c | 2 |
15 files changed, 59 insertions, 1374 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 9ffa90c6201..014a1cfc41c 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -108,6 +108,8 @@ source "drivers/block/paride/Kconfig" source "drivers/block/mtip32xx/Kconfig" +source "drivers/block/zram/Kconfig" + config BLK_CPQ_DA tristate "Compaq SMART2 support" depends on PCI && VIRT_TO_BUS && 0 diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 816d979c326..02b688d1438 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o +obj-$(CONFIG_ZRAM) += zram/ nvme-y := nvme-core.o nvme-scsi.o skd-y := skd_main.o diff --git a/drivers/staging/zram/Kconfig b/drivers/block/zram/Kconfig index 983314c4134..3450be85039 100644 --- a/drivers/staging/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -14,7 +14,6 @@ config ZRAM disks and maybe many more. See zram.txt for more information. - Project home: <https://compcache.googlecode.com/> config ZRAM_DEBUG bool "Compressed RAM block device debug support" diff --git a/drivers/staging/zram/Makefile b/drivers/block/zram/Makefile index cb0f9ced6a9..cb0f9ced6a9 100644 --- a/drivers/staging/zram/Makefile +++ b/drivers/block/zram/Makefile diff --git a/drivers/staging/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 108f2733106..011e55d820b 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2,6 +2,7 @@ * Compressed RAM block device * * Copyright (C) 2008, 2009, 2010 Nitin Gupta + * 2012, 2013 Minchan Kim * * This code is released using a dual license strategy: BSD/GPL * You can choose the licence that better fits your requirements. @@ -9,7 +10,6 @@ * Released under the terms of 3-clause BSD License * Released under the terms of GNU General Public License Version 2.0 * - * Project home: http://compcache.googlecode.com */ #define KMSG_COMPONENT "zram" @@ -104,7 +104,7 @@ static ssize_t zero_pages_show(struct device *dev, { struct zram *zram = dev_to_zram(dev); - return sprintf(buf, "%u\n", zram->stats.pages_zero); + return sprintf(buf, "%u\n", atomic_read(&zram->stats.pages_zero)); } static ssize_t orig_data_size_show(struct device *dev, @@ -113,7 +113,7 @@ static ssize_t orig_data_size_show(struct device *dev, struct zram *zram = dev_to_zram(dev); return sprintf(buf, "%llu\n", - (u64)(zram->stats.pages_stored) << PAGE_SHIFT); + (u64)(atomic_read(&zram->stats.pages_stored)) << PAGE_SHIFT); } static ssize_t compr_data_size_show(struct device *dev, @@ -140,6 +140,7 @@ static ssize_t mem_used_total_show(struct device *dev, return sprintf(buf, "%llu\n", val); } +/* flag operations needs meta->tb_lock */ static int zram_test_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { @@ -228,6 +229,8 @@ static struct zram_meta *zram_meta_alloc(u64 disksize) goto free_table; } + rwlock_init(&meta->tb_lock); + mutex_init(&meta->buffer_lock); return meta; free_table: @@ -280,6 +283,7 @@ static void handle_zero_page(struct bio_vec *bvec) flush_dcache_page(page); } +/* NOTE: caller should hold meta->tb_lock with write-side */ static void zram_free_page(struct zram *zram, size_t index) { struct zram_meta *meta = zram->meta; @@ -293,21 +297,21 @@ static void zram_free_page(struct zram *zram, size_t index) */ if (zram_test_flag(meta, index, ZRAM_ZERO)) { zram_clear_flag(meta, index, ZRAM_ZERO); - zram->stats.pages_zero--; + atomic_dec(&zram->stats.pages_zero); } return; } if (unlikely(size > max_zpage_size)) - zram->stats.bad_compress--; + atomic_dec(&zram->stats.bad_compress); zs_free(meta->mem_pool, handle); if (size <= PAGE_SIZE / 2) - zram->stats.good_compress--; + atomic_dec(&zram->stats.good_compress); atomic64_sub(meta->table[index].size, &zram->stats.compr_size); - zram->stats.pages_stored--; + atomic_dec(&zram->stats.pages_stored); meta->table[index].handle = 0; meta->table[index].size = 0; @@ -319,20 +323,26 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) size_t clen = PAGE_SIZE; unsigned char *cmem; struct zram_meta *meta = zram->meta; - unsigned long handle = meta->table[index].handle; + unsigned long handle; + u16 size; + + read_lock(&meta->tb_lock); + handle = meta->table[index].handle; + size = meta->table[index].size; if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { + read_unlock(&meta->tb_lock); clear_page(mem); return 0; } cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); - if (meta->table[index].size == PAGE_SIZE) + if (size == PAGE_SIZE) copy_page(mem, cmem); else - ret = lzo1x_decompress_safe(cmem, meta->table[index].size, - mem, &clen); + ret = lzo1x_decompress_safe(cmem, size, mem, &clen); zs_unmap_object(meta->mem_pool, handle); + read_unlock(&meta->tb_lock); /* Should NEVER happen. Return bio error if it does. */ if (unlikely(ret != LZO_E_OK)) { @@ -353,11 +363,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, struct zram_meta *meta = zram->meta; page = bvec->bv_page; + read_lock(&meta->tb_lock); if (unlikely(!meta->table[index].handle) || zram_test_flag(meta, index, ZRAM_ZERO)) { + read_unlock(&meta->tb_lock); handle_zero_page(bvec); return 0; } + read_unlock(&meta->tb_lock); if (is_partial_io(bvec)) /* Use a temporary buffer to decompress the page */ @@ -400,6 +413,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, struct page *page; unsigned char *user_mem, *cmem, *src, *uncmem = NULL; struct zram_meta *meta = zram->meta; + bool locked = false; page = bvec->bv_page; src = meta->compress_buffer; @@ -419,6 +433,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, goto out; } + mutex_lock(&meta->buffer_lock); + locked = true; user_mem = kmap_atomic(page); if (is_partial_io(bvec)) { @@ -433,25 +449,18 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, if (page_zero_filled(uncmem)) { kunmap_atomic(user_mem); /* Free memory associated with this sector now. */ + write_lock(&zram->meta->tb_lock); zram_free_page(zram, index); - - zram->stats.pages_zero++; zram_set_flag(meta, index, ZRAM_ZERO); + write_unlock(&zram->meta->tb_lock); + + atomic_inc(&zram->stats.pages_zero); ret = 0; goto out; } - /* - * zram_slot_free_notify could miss free so that let's - * double check. - */ - if (unlikely(meta->table[index].handle || - zram_test_flag(meta, index, ZRAM_ZERO))) - zram_free_page(zram, index); - ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, meta->compress_workmem); - if (!is_partial_io(bvec)) { kunmap_atomic(user_mem); user_mem = NULL; @@ -464,7 +473,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, } if (unlikely(clen > max_zpage_size)) { - zram->stats.bad_compress++; + atomic_inc(&zram->stats.bad_compress); clen = PAGE_SIZE; src = NULL; if (is_partial_io(bvec)) @@ -494,18 +503,22 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, * Free memory associated with this sector * before overwriting unused sectors. */ + write_lock(&zram->meta->tb_lock); zram_free_page(zram, index); meta->table[index].handle = handle; meta->table[index].size = clen; + write_unlock(&zram->meta->tb_lock); /* Update stats */ atomic64_add(clen, &zram->stats.compr_size); - zram->stats.pages_stored++; + atomic_inc(&zram->stats.pages_stored); if (clen <= PAGE_SIZE / 2) - zram->stats.good_compress++; + atomic_inc(&zram->stats.good_compress); out: + if (locked) + mutex_unlock(&meta->buffer_lock); if (is_partial_io(bvec)) kfree(uncmem); @@ -514,36 +527,15 @@ out: return ret; } -static void handle_pending_slot_free(struct zram *zram) -{ - struct zram_slot_free *free_rq; - - spin_lock(&zram->slot_free_lock); - while (zram->slot_free_rq) { - free_rq = zram->slot_free_rq; - zram->slot_free_rq = free_rq->next; - zram_free_page(zram, free_rq->index); - kfree(free_rq); - } - spin_unlock(&zram->slot_free_lock); -} - static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, struct bio *bio, int rw) { int ret; - if (rw == READ) { - down_read(&zram->lock); - handle_pending_slot_free(zram); + if (rw == READ) ret = zram_bvec_read(zram, bvec, index, offset, bio); - up_read(&zram->lock); - } else { - down_write(&zram->lock); - handle_pending_slot_free(zram); + else ret = zram_bvec_write(zram, bvec, index, offset); - up_write(&zram->lock); - } return ret; } @@ -553,8 +545,6 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) size_t index; struct zram_meta *meta; - flush_work(&zram->free_work); - down_write(&zram->init_lock); if (!zram->init_done) { up_write(&zram->init_lock); @@ -762,40 +752,19 @@ error: bio_io_error(bio); } -static void zram_slot_free(struct work_struct *work) -{ - struct zram *zram; - - zram = container_of(work, struct zram, free_work); - down_write(&zram->lock); - handle_pending_slot_free(zram); - up_write(&zram->lock); -} - -static void add_slot_free(struct zram *zram, struct zram_slot_free *free_rq) -{ - spin_lock(&zram->slot_free_lock); - free_rq->next = zram->slot_free_rq; - zram->slot_free_rq = free_rq; - spin_unlock(&zram->slot_free_lock); -} - static void zram_slot_free_notify(struct block_device *bdev, unsigned long index) { struct zram *zram; - struct zram_slot_free *free_rq; + struct zram_meta *meta; zram = bdev->bd_disk->private_data; - atomic64_inc(&zram->stats.notify_free); - - free_rq = kmalloc(sizeof(struct zram_slot_free), GFP_ATOMIC); - if (!free_rq) - return; + meta = zram->meta; - free_rq->index = index; - add_slot_free(zram, free_rq); - schedule_work(&zram->free_work); + write_lock(&meta->tb_lock); + zram_free_page(zram, index); + write_unlock(&meta->tb_lock); + atomic64_inc(&zram->stats.notify_free); } static const struct block_device_operations zram_devops = { @@ -839,13 +808,8 @@ static int create_device(struct zram *zram, int device_id) { int ret = -ENOMEM; - init_rwsem(&zram->lock); init_rwsem(&zram->init_lock); - INIT_WORK(&zram->free_work, zram_slot_free); - spin_lock_init(&zram->slot_free_lock); - zram->slot_free_rq = NULL; - zram->queue = blk_alloc_queue(GFP_KERNEL); if (!zram->queue) { pr_err("Error allocating disk queue for device %d\n", diff --git a/drivers/staging/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 97a3acf6ab7..ad8aa35bae0 100644 --- a/drivers/staging/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -2,6 +2,7 @@ * Compressed RAM block device * * Copyright (C) 2008, 2009, 2010 Nitin Gupta + * 2012, 2013 Minchan Kim * * This code is released using a dual license strategy: BSD/GPL * You can choose the licence that better fits your requirements. @@ -9,7 +10,6 @@ * Released under the terms of 3-clause BSD License * Released under the terms of GNU General Public License Version 2.0 * - * Project home: http://compcache.googlecode.com */ #ifndef _ZRAM_DRV_H_ @@ -17,8 +17,7 @@ #include <linux/spinlock.h> #include <linux/mutex.h> - -#include "../zsmalloc/zsmalloc.h" +#include <linux/zsmalloc.h> /* * Some arbitrary value. This is just to catch @@ -69,10 +68,6 @@ struct table { u8 flags; } __aligned(4); -/* - * All 64bit fields should only be manipulated by 64bit atomic accessors. - * All modifications to 32bit counter should be protected by zram->lock. - */ struct zram_stats { atomic64_t compr_size; /* compressed size of pages stored */ atomic64_t num_reads; /* failed + successful */ @@ -81,33 +76,23 @@ struct zram_stats { atomic64_t failed_writes; /* can happen when memory is too low */ atomic64_t invalid_io; /* non-page-aligned I/O requests */ atomic64_t notify_free; /* no. of swap slot free notifications */ - u32 pages_zero; /* no. of zero filled pages */ - u32 pages_stored; /* no. of pages currently stored */ - u32 good_compress; /* % of pages with compression ratio<=50% */ - u32 bad_compress; /* % of pages with compression ratio>=75% */ + atomic_t pages_zero; /* no. of zero filled pages */ + atomic_t pages_stored; /* no. of pages currently stored */ + atomic_t good_compress; /* % of pages with compression ratio<=50% */ + atomic_t bad_compress; /* % of pages with compression ratio>=75% */ }; struct zram_meta { + rwlock_t tb_lock; /* protect table */ void *compress_workmem; void *compress_buffer; struct table *table; struct zs_pool *mem_pool; -}; - -struct zram_slot_free { - unsigned long index; - struct zram_slot_free *next; + struct mutex buffer_lock; /* protect compress buffers */ }; struct zram { struct zram_meta *meta; - struct rw_semaphore lock; /* protect compression buffers, table, - * 32bit stat counters against concurrent - * notifications, reads and writes */ - - struct work_struct free_work; /* handle pending free request */ - struct zram_slot_free *slot_free_rq; /* list head of free request */ - struct request_queue *queue; struct gendisk *disk; int init_done; @@ -118,7 +103,6 @@ struct zram { * we can store in a disk. */ u64 disksize; /* bytes */ - spinlock_t slot_free_lock; struct zram_stats stats; }; diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 930694d3a13..71e49000fbf 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -150,6 +150,7 @@ int mdiobus_register(struct mii_bus *bus) err = device_register(&bus->dev); if (err) { pr_err("mii_bus %s failed to register\n", bus->id); + put_device(&bus->dev); return -EINVAL; } diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index 4bb6b11166b..040a51525b4 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -76,10 +76,6 @@ source "drivers/staging/sep/Kconfig" source "drivers/staging/iio/Kconfig" -source "drivers/staging/zsmalloc/Kconfig" - -source "drivers/staging/zram/Kconfig" - source "drivers/staging/wlags49_h2/Kconfig" source "drivers/staging/wlags49_h25/Kconfig" diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index 9f07e5e1609..dea056bf7ff 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -32,8 +32,6 @@ obj-$(CONFIG_VT6656) += vt6656/ obj-$(CONFIG_VME_BUS) += vme/ obj-$(CONFIG_DX_SEP) += sep/ obj-$(CONFIG_IIO) += iio/ -obj-$(CONFIG_ZRAM) += zram/ -obj-$(CONFIG_ZSMALLOC) += zsmalloc/ obj-$(CONFIG_WLAGS49_H2) += wlags49_h2/ obj-$(CONFIG_WLAGS49_H25) += wlags49_h25/ obj-$(CONFIG_FB_SM7XX) += sm7xxfb/ diff --git a/drivers/staging/zram/zram.txt b/drivers/staging/zram/zram.txt deleted file mode 100644 index 765d790ae83..00000000000 --- a/drivers/staging/zram/zram.txt +++ /dev/null @@ -1,77 +0,0 @@ -zram: Compressed RAM based block devices ----------------------------------------- - -Project home: http://compcache.googlecode.com/ - -* Introduction - -The zram module creates RAM based block devices named /dev/zram<id> -(<id> = 0, 1, ...). Pages written to these disks are compressed and stored -in memory itself. These disks allow very fast I/O and compression provides -good amounts of memory savings. Some of the usecases include /tmp storage, -use as swap disks, various caches under /var and maybe many more :) - -Statistics for individual zram devices are exported through sysfs nodes at -/sys/block/zram<id>/ - -* Usage - -Following shows a typical sequence of steps for using zram. - -1) Load Module: - modprobe zram num_devices=4 - This creates 4 devices: /dev/zram{0,1,2,3} - (num_devices parameter is optional. Default: 1) - -2) Set Disksize - Set disk size by writing the value to sysfs node 'disksize'. - The value can be either in bytes or you can use mem suffixes. - Examples: - # Initialize /dev/zram0 with 50MB disksize - echo $((50*1024*1024)) > /sys/block/zram0/disksize - - # Using mem suffixes - echo 256K > /sys/block/zram0/disksize - echo 512M > /sys/block/zram0/disksize - echo 1G > /sys/block/zram0/disksize - -3) Activate: - mkswap /dev/zram0 - swapon /dev/zram0 - - mkfs.ext4 /dev/zram1 - mount /dev/zram1 /tmp - -4) Stats: - Per-device statistics are exported as various nodes under - /sys/block/zram<id>/ - disksize - num_reads - num_writes - invalid_io - notify_free - discard - zero_pages - orig_data_size - compr_data_size - mem_used_total - -5) Deactivate: - swapoff /dev/zram0 - umount /dev/zram1 - -6) Reset: - Write any positive value to 'reset' sysfs node - echo 1 > /sys/block/zram0/reset - echo 1 > /sys/block/zram1/reset - - This frees all the memory allocated for the given device and - resets the disksize to zero. You must set the disksize again - before reusing the device. - -Please report any problems at: - - Mailing list: linux-mm-cc at laptop dot org - - Issue tracker: http://code.google.com/p/compcache/issues/list - -Nitin Gupta -ngupta@vflare.org diff --git a/drivers/staging/zsmalloc/Kconfig b/drivers/staging/zsmalloc/Kconfig deleted file mode 100644 index 9d1f2a24ad6..00000000000 --- a/drivers/staging/zsmalloc/Kconfig +++ /dev/null @@ -1,24 +0,0 @@ -config ZSMALLOC - bool "Memory allocator for compressed pages" - depends on MMU - default n - help - zsmalloc is a slab-based memory allocator designed to store - compressed RAM pages. zsmalloc uses virtual memory mapping - in order to reduce fragmentation. However, this results in a - non-standard allocator interface where a handle, not a pointer, is - returned by an alloc(). This handle must be mapped in order to - access the allocated space. - -config PGTABLE_MAPPING - bool "Use page table mapping to access object in zsmalloc" - depends on ZSMALLOC - help - By default, zsmalloc uses a copy-based object mapping method to - access allocations that span two pages. However, if a particular - architecture (ex, ARM) performs VM mapping faster than copying, - then you should select this. This causes zsmalloc to use page table - mapping rather than copying for object mapping. - - You can check speed with zsmalloc benchmark[1]. - [1] https://github.com/spartacus06/zsmalloc diff --git a/drivers/staging/zsmalloc/Makefile b/drivers/staging/zsmalloc/Makefile deleted file mode 100644 index b134848a590..00000000000 --- a/drivers/staging/zsmalloc/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -zsmalloc-y := zsmalloc-main.o - -obj-$(CONFIG_ZSMALLOC) += zsmalloc.o diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c b/drivers/staging/zsmalloc/zsmalloc-main.c deleted file mode 100644 index 7660c87d8b2..00000000000 --- a/drivers/staging/zsmalloc/zsmalloc-main.c +++ /dev/null @@ -1,1106 +0,0 @@ -/* - * zsmalloc memory allocator - * - * Copyright (C) 2011 Nitin Gupta - * - * This code is released using a dual license strategy: BSD/GPL - * You can choose the license that better fits your requirements. - * - * Released under the terms of 3-clause BSD License - * Released under the terms of GNU General Public License Version 2.0 - */ - -/* - * This allocator is designed for use with zram. Thus, the allocator is - * supposed to work well under low memory conditions. In particular, it - * never attempts higher order page allocation which is very likely to - * fail under memory pressure. On the other hand, if we just use single - * (0-order) pages, it would suffer from very high fragmentation -- - * any object of size PAGE_SIZE/2 or larger would occupy an entire page. - * This was one of the major issues with its predecessor (xvmalloc). - * - * To overcome these issues, zsmalloc allocates a bunch of 0-order pages - * and links them together using various 'struct page' fields. These linked - * pages act as a single higher-order page i.e. an object can span 0-order - * page boundaries. The code refers to these linked pages as a single entity - * called zspage. - * - * For simplicity, zsmalloc can only allocate objects of size up to PAGE_SIZE - * since this satisfies the requirements of all its current users (in the - * worst case, page is incompressible and is thus stored "as-is" i.e. in - * uncompressed form). For allocation requests larger than this size, failure - * is returned (see zs_malloc). - * - * Additionally, zs_malloc() does not return a dereferenceable pointer. - * Instead, it returns an opaque handle (unsigned long) which encodes actual - * location of the allocated object. The reason for this indirection is that - * zsmalloc does not keep zspages permanently mapped since that would cause - * issues on 32-bit systems where the VA region for kernel space mappings - * is very small. So, before using the allocating memory, the object has to - * be mapped using zs_map_object() to get a usable pointer and subsequently - * unmapped using zs_unmap_object(). - * - * Following is how we use various fields and flags of underlying - * struct page(s) to form a zspage. - * - * Usage of struct page fields: - * page->first_page: points to the first component (0-order) page - * page->index (union with page->freelist): offset of the first object - * starting in this page. For the first page, this is - * always 0, so we use this field (aka freelist) to point - * to the first free object in zspage. - * page->lru: links together all component pages (except the first page) - * of a zspage - * - * For _first_ page only: - * - * page->private (union with page->first_page): refers to the - * component page after the first page - * page->freelist: points to the first free object in zspage. - * Free objects are linked together using in-place - * metadata. - * page->objects: maximum number of objects we can store in this - * zspage (class->zspage_order * PAGE_SIZE / class->size) - * page->lru: links together first pages of various zspages. - * Basically forming list of zspages in a fullness group. - * page->mapping: class index and fullness group of the zspage - * - * Usage of struct page flags: - * PG_private: identifies the first component page - * PG_private2: identifies the last component page - * - */ - -#ifdef CONFIG_ZSMALLOC_DEBUG -#define DEBUG -#endif - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/bitops.h> -#include <linux/errno.h> -#include <linux/highmem.h> -#include <linux/string.h> -#include <linux/slab.h> -#include <asm/tlbflush.h> -#include <asm/pgtable.h> -#include <linux/cpumask.h> -#include <linux/cpu.h> -#include <linux/vmalloc.h> -#include <linux/hardirq.h> -#include <linux/spinlock.h> -#include <linux/types.h> - -#include "zsmalloc.h" - -/* - * This must be power of 2 and greater than of equal to sizeof(link_free). - * These two conditions ensure that any 'struct link_free' itself doesn't - * span more than 1 page which avoids complex case of mapping 2 pages simply - * to restore link_free pointer values. - */ -#define ZS_ALIGN 8 - -/* - * A single 'zspage' is composed of up to 2^N discontiguous 0-order (single) - * pages. ZS_MAX_ZSPAGE_ORDER defines upper limit on N. - */ -#define ZS_MAX_ZSPAGE_ORDER 2 -#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER) - -/* - * Object location (<PFN>, <obj_idx>) is encoded as - * as single (unsigned long) handle value. - * - * Note that object index <obj_idx> is relative to system - * page <PFN> it is stored in, so for each sub-page belonging - * to a zspage, obj_idx starts with 0. - * - * This is made more complicated by various memory models and PAE. - */ - -#ifndef MAX_PHYSMEM_BITS -#ifdef CONFIG_HIGHMEM64G -#define MAX_PHYSMEM_BITS 36 -#else /* !CONFIG_HIGHMEM64G */ -/* - * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just - * be PAGE_SHIFT - */ -#define MAX_PHYSMEM_BITS BITS_PER_LONG -#endif -#endif -#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) -#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS) -#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1) - -#define MAX(a, b) ((a) >= (b) ? (a) : (b)) -/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */ -#define ZS_MIN_ALLOC_SIZE \ - MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS)) -#define ZS_MAX_ALLOC_SIZE PAGE_SIZE - -/* - * On systems with 4K page size, this gives 254 size classes! There is a - * trader-off here: - * - Large number of size classes is potentially wasteful as free page are - * spread across these classes - * - Small number of size classes causes large internal fragmentation - * - Probably its better to use specific size classes (empirically - * determined). NOTE: all those class sizes must be set as multiple of - * ZS_ALIGN to make sure link_free itself never has to span 2 pages. - * - * ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN - * (reason above) - */ -#define ZS_SIZE_CLASS_DELTA (PAGE_SIZE >> 8) -#define ZS_SIZE_CLASSES ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / \ - ZS_SIZE_CLASS_DELTA + 1) - -/* - * We do not maintain any list for completely empty or full pages - */ -enum fullness_group { - ZS_ALMOST_FULL, - ZS_ALMOST_EMPTY, - _ZS_NR_FULLNESS_GROUPS, - - ZS_EMPTY, - ZS_FULL -}; - -/* - * We assign a page to ZS_ALMOST_EMPTY fullness group when: - * n <= N / f, where - * n = number of allocated objects - * N = total number of objects zspage can store - * f = 1/fullness_threshold_frac - * - * Similarly, we assign zspage to: - * ZS_ALMOST_FULL when n > N / f - * ZS_EMPTY when n == 0 - * ZS_FULL when n == N - * - * (see: fix_fullness_group()) - */ -static const int fullness_threshold_frac = 4; - -struct size_class { - /* - * Size of objects stored in this class. Must be multiple - * of ZS_ALIGN. - */ - int size; - unsigned int index; - - /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */ - int pages_per_zspage; - - spinlock_t lock; - - /* stats */ - u64 pages_allocated; - - struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS]; -}; - -/* - * Placed within free objects to form a singly linked list. - * For every zspage, first_page->freelist gives head of this list. - * - * This must be power of 2 and less than or equal to ZS_ALIGN - */ -struct link_free { - /* Handle of next free chunk (encodes <PFN, obj_idx>) */ - void *next; -}; - -struct zs_pool { - struct size_class size_class[ZS_SIZE_CLASSES]; - - gfp_t flags; /* allocation flags used when growing pool */ -}; - -/* - * A zspage's class index and fullness group - * are encoded in its (first)page->mapping - */ -#define CLASS_IDX_BITS 28 -#define FULLNESS_BITS 4 -#define CLASS_IDX_MASK ((1 << CLASS_IDX_BITS) - 1) -#define FULLNESS_MASK ((1 << FULLNESS_BITS) - 1) - -struct mapping_area { -#ifdef CONFIG_PGTABLE_MAPPING - struct vm_struct *vm; /* vm area for mapping object that span pages */ -#else - char *vm_buf; /* copy buffer for objects that span pages */ -#endif - char *vm_addr; /* address of kmap_atomic()'ed pages */ - enum zs_mapmode vm_mm; /* mapping mode */ -}; - - -/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ -static DEFINE_PER_CPU(struct mapping_area, zs_map_area); - -static int is_first_page(struct page *page) -{ - return PagePrivate(page); -} - -static int is_last_page(struct page *page) -{ - return PagePrivate2(page); -} - -static void get_zspage_mapping(struct page *page, unsigned int *class_idx, - enum fullness_group *fullness) -{ - unsigned long m; - BUG_ON(!is_first_page(page)); - - m = (unsigned long)page->mapping; - *fullness = m & FULLNESS_MASK; - *class_idx = (m >> FULLNESS_BITS) & CLASS_IDX_MASK; -} - -static void set_zspage_mapping(struct page *page, unsigned int class_idx, - enum fullness_group fullness) -{ - unsigned long m; - BUG_ON(!is_first_page(page)); - - m = ((class_idx & CLASS_IDX_MASK) << FULLNESS_BITS) | - (fullness & FULLNESS_MASK); - page->mapping = (struct address_space *)m; -} - -/* - * zsmalloc divides the pool into various size classes where each - * class maintains a list of zspages where each zspage is divided - * into equal sized chunks. Each allocation falls into one of these - * classes depending on its size. This function returns index of the - * size class which has chunk size big enough to hold the give size. - */ -static int get_size_class_index(int size) -{ - int idx = 0; - - if (likely(size > ZS_MIN_ALLOC_SIZE)) - idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE, - ZS_SIZE_CLASS_DELTA); - - return idx; -} - -/* - * For each s |