diff options
Diffstat (limited to 'fs/ubifs/file.c')
| -rw-r--r-- | fs/ubifs/file.c | 564 |
1 files changed, 433 insertions, 131 deletions
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 3d698e2022b..b5b593c4527 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -21,39 +21,39 @@ */ /* - * This file implements VFS file and inode operations of regular files, device + * This file implements VFS file and inode operations for regular files, device * nodes and symlinks as well as address space operations. * - * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the - * page is dirty and is used for budgeting purposes - dirty pages should not be - * budgeted. The PG_checked flag is set if full budgeting is required for the - * page e.g., when it corresponds to a file hole or it is just beyond the file - * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to - * fail in this function, and the budget is released in 'ubifs_write_end()'. So - * the PG_private and PG_checked flags carry the information about how the page - * was budgeted, to make it possible to release the budget properly. + * UBIFS uses 2 page flags: @PG_private and @PG_checked. @PG_private is set if + * the page is dirty and is used for optimization purposes - dirty pages are + * not budgeted so the flag shows that 'ubifs_write_end()' should not release + * the budget for this page. The @PG_checked flag is set if full budgeting is + * required for the page e.g., when it corresponds to a file hole or it is + * beyond the file size. The budgeting is done in 'ubifs_write_begin()', because + * it is OK to fail in this function, and the budget is released in + * 'ubifs_write_end()'. So the @PG_private and @PG_checked flags carry + * information about how the page was budgeted, to make it possible to release + * the budget properly. * - * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations - * we implement. However, this is not true for '->writepage()', which might be - * called with 'i_mutex' unlocked. For example, when pdflush is performing - * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the - * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is - * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim - * path'. So, in '->writepage()' we are only guaranteed that the page is - * locked. + * A thing to keep in mind: inode @i_mutex is locked in most VFS operations we + * implement. However, this is not true for 'ubifs_writepage()', which may be + * called with @i_mutex unlocked. For example, when flusher thread is doing + * background write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex. + * At "normal" work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g. + * in the "sys_write -> alloc_pages -> direct reclaim path". So, in + * 'ubifs_writepage()' we are only guaranteed that the page is locked. * - * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g., - * readahead path does not have it locked ("sys_read -> generic_file_aio_read - * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is - * not set as well. However, UBIFS disables readahead. - * - * This, for example means that there might be 2 concurrent '->writepage()' - * calls for the same inode, but different inode dirty pages. + * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the + * read-ahead path does not lock it ("sys_read -> generic_file_aio_read -> + * ondemand_readahead -> readpage"). In case of readahead, @I_SYNC flag is not + * set as well. However, UBIFS disables readahead. */ #include "ubifs.h" +#include <linux/aio.h> #include <linux/mount.h> #include <linux/namei.h> +#include <linux/slab.h> static int read_block(struct inode *inode, void *addr, unsigned int block, struct ubifs_data_node *dn) @@ -72,8 +72,8 @@ static int read_block(struct inode *inode, void *addr, unsigned int block, return err; } - ubifs_assert(dn->ch.sqnum > ubifs_inode(inode)->creat_sqnum); - + ubifs_assert(le64_to_cpu(dn->ch.sqnum) > + ubifs_inode(inode)->creat_sqnum); len = le32_to_cpu(dn->size); if (len <= 0 || len > UBIFS_BLOCK_SIZE) goto dump; @@ -98,7 +98,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block, dump: ubifs_err("bad data node (block %u, inode %lu)", block, inode->i_ino); - dbg_dump_node(c, dn); + ubifs_dump_node(c, dn); return -EINVAL; } @@ -147,6 +147,12 @@ static int do_readpage(struct page *page) err = ret; if (err != -ENOENT) break; + } else if (block + 1 == beyond) { + int dlen = le32_to_cpu(dn->size); + int ilen = i_size & (UBIFS_BLOCK_SIZE - 1); + + if (ilen && ilen < dlen) + memset(addr + ilen, 0, dlen - ilen); } } if (++i >= UBIFS_BLOCKS_PER_PAGE) @@ -207,13 +213,14 @@ static void release_new_page_budget(struct ubifs_info *c) */ static void release_existing_page_budget(struct ubifs_info *c) { - struct ubifs_budget_req req = { .dd_growth = c->page_budget}; + struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget}; ubifs_release_budget(c, &req); } static int write_begin_slow(struct address_space *mapping, - loff_t pos, unsigned len, struct page **pagep) + loff_t pos, unsigned len, struct page **pagep, + unsigned flags) { struct inode *inode = mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; @@ -241,14 +248,14 @@ static int write_begin_slow(struct address_space *mapping, if (unlikely(err)) return err; - page = __grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, flags); if (unlikely(!page)) { ubifs_release_budget(c, &req); return -ENOMEM; } if (!PageUptodate(page)) { - if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) + if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) SetPageChecked(page); else { err = do_readpage(page); @@ -423,31 +430,35 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, struct ubifs_inode *ui = ubifs_inode(inode); pgoff_t index = pos >> PAGE_CACHE_SHIFT; int uninitialized_var(err), appending = !!(pos + len > inode->i_size); + int skipped_read = 0; struct page *page; - ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); + ubifs_assert(!c->ro_media && !c->ro_mount); - if (unlikely(c->ro_media)) + if (unlikely(c->ro_error)) return -EROFS; /* Try out the fast-path part first */ - page = __grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, flags); if (unlikely(!page)) return -ENOMEM; if (!PageUptodate(page)) { /* The page is not loaded from the flash */ - if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) + if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) { /* * We change whole page so no need to load it. But we - * have to set the @PG_checked flag to make the further - * code the page is new. This might be not true, but it - * is better to budget more that to read the page from - * the media. + * do not know whether this page exists on the media or + * not, so we assume the latter because it requires + * larger budget. The assumption is that it is better + * to budget a bit more than to read the page from the + * media. Thus, we are setting the @PG_checked flag + * here. */ SetPageChecked(page); - else { + skipped_read = 1; + } else { err = do_readpage(page); if (err) { unlock_page(page); @@ -464,6 +475,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, if (unlikely(err)) { ubifs_assert(err == -ENOSPC); /* + * If we skipped reading the page because we were going to + * write all of it, then it is not up to date. + */ + if (skipped_read) { + ClearPageChecked(page); + ClearPageUptodate(page); + } + /* * Budgeting failed which means it would have to force * write-back but didn't, because we set the @fast flag in the * request. Write-back cannot be done now, while we have the @@ -477,12 +496,12 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, unlock_page(page); page_cache_release(page); - return write_begin_slow(mapping, pos, len, pagep); + return write_begin_slow(mapping, pos, len, pagep, flags); } /* - * Whee, we aquired budgeting quickly - without involving - * garbage-collection, committing or forceing write-back. We return + * Whee, we acquired budgeting quickly - without involving + * garbage-collection, committing or forcing write-back. We return * with @ui->ui_mutex locked if we are appending pages, and unlocked * otherwise. This is an optimization (slightly hacky though). */ @@ -543,10 +562,11 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, dbg_gen("copied %d instead of %d, read page and repeat", copied, len); cancel_budget(c, page, ui, appending); + ClearPageChecked(page); /* * Return 0 to force VFS to repeat the whole operation, or the - * error code if 'do_readpage()' failes. + * error code if 'do_readpage()' fails. */ copied = do_readpage(page); goto out; @@ -577,8 +597,297 @@ out: return copied; } +/** + * populate_page - copy data nodes into a page for bulk-read. + * @c: UBIFS file-system description object + * @page: page + * @bu: bulk-read information + * @n: next zbranch slot + * + * This function returns %0 on success and a negative error code on failure. + */ +static int populate_page(struct ubifs_info *c, struct page *page, + struct bu_info *bu, int *n) +{ + int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 0, read = 0; + struct inode *inode = page->mapping->host; + loff_t i_size = i_size_read(inode); + unsigned int page_block; + void *addr, *zaddr; + pgoff_t end_index; + + dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx", + inode->i_ino, page->index, i_size, page->flags); + + addr = zaddr = kmap(page); + + end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; + if (!i_size || page->index > end_index) { + hole = 1; + memset(addr, 0, PAGE_CACHE_SIZE); + goto out_hole; + } + + page_block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; + while (1) { + int err, len, out_len, dlen; + + if (nn >= bu->cnt) { + hole = 1; + memset(addr, 0, UBIFS_BLOCK_SIZE); + } else if (key_block(c, &bu->zbranch[nn].key) == page_block) { + struct ubifs_data_node *dn; + + dn = bu->buf + (bu->zbranch[nn].offs - offs); + + ubifs_assert(le64_to_cpu(dn->ch.sqnum) > + ubifs_inode(inode)->creat_sqnum); + + len = le32_to_cpu(dn->size); + if (len <= 0 || len > UBIFS_BLOCK_SIZE) + goto out_err; + + dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; + out_len = UBIFS_BLOCK_SIZE; + err = ubifs_decompress(&dn->data, dlen, addr, &out_len, + le16_to_cpu(dn->compr_type)); + if (err || len != out_len) + goto out_err; + + if (len < UBIFS_BLOCK_SIZE) + memset(addr + len, 0, UBIFS_BLOCK_SIZE - len); + + nn += 1; + read = (i << UBIFS_BLOCK_SHIFT) + len; + } else if (key_block(c, &bu->zbranch[nn].key) < page_block) { + nn += 1; + continue; + } else { + hole = 1; + memset(addr, 0, UBIFS_BLOCK_SIZE); + } + if (++i >= UBIFS_BLOCKS_PER_PAGE) + break; + addr += UBIFS_BLOCK_SIZE; + page_block += 1; + } + + if (end_index == page->index) { + int len = i_size & (PAGE_CACHE_SIZE - 1); + + if (len && len < read) + memset(zaddr + len, 0, read - len); + } + +out_hole: + if (hole) { + SetPageChecked(page); + dbg_gen("hole"); + } + + SetPageUptodate(page); + ClearPageError(page); + flush_dcache_page(page); + kunmap(page); + *n = nn; + return 0; + +out_err: + ClearPageUptodate(page); + SetPageError(page); + flush_dcache_page(page); + kunmap(page); + ubifs_err("bad data node (block %u, inode %lu)", + page_block, inode->i_ino); + return -EINVAL; +} + +/** + * ubifs_do_bulk_read - do bulk-read. + * @c: UBIFS file-system description object + * @bu: bulk-read information + * @page1: first page to read + * + * This function returns %1 if the bulk-read is done, otherwise %0 is returned. + */ +static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu, + struct page *page1) +{ + pgoff_t offset = page1->index, end_index; + struct address_space *mapping = page1->mapping; + struct inode *inode = mapping->host; + struct ubifs_inode *ui = ubifs_inode(inode); + int err, page_idx, page_cnt, ret = 0, n = 0; + int allocate = bu->buf ? 0 : 1; + loff_t isize; + + err = ubifs_tnc_get_bu_keys(c, bu); + if (err) + goto out_warn; + + if (bu->eof) { + /* Turn off bulk-read at the end of the file */ + ui->read_in_a_row = 1; + ui->bulk_read = 0; + } + + page_cnt = bu->blk_cnt >> UBIFS_BLOCKS_PER_PAGE_SHIFT; + if (!page_cnt) { + /* + * This happens when there are multiple blocks per page and the + * blocks for the first page we are looking for, are not + * together. If all the pages were like this, bulk-read would + * reduce performance, so we turn it off for a while. + */ + goto out_bu_off; + } + + if (bu->cnt) { + if (allocate) { + /* + * Allocate bulk-read buffer depending on how many data + * nodes we are going to read. + */ + bu->buf_len = bu->zbranch[bu->cnt - 1].offs + + bu->zbranch[bu->cnt - 1].len - + bu->zbranch[0].offs; + ubifs_assert(bu->buf_len > 0); + ubifs_assert(bu->buf_len <= c->leb_size); + bu->buf = kmalloc(bu->buf_len, GFP_NOFS | __GFP_NOWARN); + if (!bu->buf) + goto out_bu_off; + } + + err = ubifs_tnc_bulk_read(c, bu); + if (err) + goto out_warn; + } + + err = populate_page(c, page1, bu, &n); + if (err) + goto out_warn; + + unlock_page(page1); + ret = 1; + + isize = i_size_read(inode); + if (isize == 0) + goto out_free; + end_index = ((isize - 1) >> PAGE_CACHE_SHIFT); + + for (page_idx = 1; page_idx < page_cnt; page_idx++) { + pgoff_t page_offset = offset + page_idx; + struct page *page; + + if (page_offset > end_index) + break; + page = find_or_create_page(mapping, page_offset, + GFP_NOFS | __GFP_COLD); + if (!page) + break; + if (!PageUptodate(page)) + err = populate_page(c, page, bu, &n); + unlock_page(page); + page_cache_release(page); + if (err) + break; + } + + ui->last_page_read = offset + page_idx - 1; + +out_free: + if (allocate) + kfree(bu->buf); + return ret; + +out_warn: + ubifs_warn("ignoring error %d and skipping bulk-read", err); + goto out_free; + +out_bu_off: + ui->read_in_a_row = ui->bulk_read = 0; + goto out_free; +} + +/** + * ubifs_bulk_read - determine whether to bulk-read and, if so, do it. + * @page: page from which to start bulk-read. + * + * Some flash media are capable of reading sequentially at faster rates. UBIFS + * bulk-read facility is designed to take advantage of that, by reading in one + * go consecutive data nodes that are also located consecutively in the same + * LEB. This function returns %1 if a bulk-read is done and %0 otherwise. + */ +static int ubifs_bulk_read(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct ubifs_info *c = inode->i_sb->s_fs_info; + struct ubifs_inode *ui = ubifs_inode(inode); + pgoff_t index = page->index, last_page_read = ui->last_page_read; + struct bu_info *bu; + int err = 0, allocated = 0; + + ui->last_page_read = index; + if (!c->bulk_read) + return 0; + + /* + * Bulk-read is protected by @ui->ui_mutex, but it is an optimization, + * so don't bother if we cannot lock the mutex. + */ + if (!mutex_trylock(&ui->ui_mutex)) + return 0; + + if (index != last_page_read + 1) { + /* Turn off bulk-read if we stop reading sequentially */ + ui->read_in_a_row = 1; + if (ui->bulk_read) + ui->bulk_read = 0; + goto out_unlock; + } + + if (!ui->bulk_read) { + ui->read_in_a_row += 1; + if (ui->read_in_a_row < 3) + goto out_unlock; + /* Three reads in a row, so switch on bulk-read */ + ui->bulk_read = 1; + } + + /* + * If possible, try to use pre-allocated bulk-read information, which + * is protected by @c->bu_mutex. + */ + if (mutex_trylock(&c->bu_mutex)) + bu = &c->bu; + else { + bu = kmalloc(sizeof(struct bu_info), GFP_NOFS | __GFP_NOWARN); + if (!bu) + goto out_unlock; + + bu->buf = NULL; + allocated = 1; + } + + bu->buf_len = c->max_bu_buf_len; + data_key_init(c, &bu->key, inode->i_ino, + page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT); + err = ubifs_do_bulk_read(c, bu, page); + + if (!allocated) + mutex_unlock(&c->bu_mutex); + else + kfree(bu); + +out_unlock: + mutex_unlock(&ui->ui_mutex); + return err; +} + static int ubifs_readpage(struct file *file, struct page *page) { + if (ubifs_bulk_read(page)) + return 0; do_readpage(page); unlock_page(page); return 0; @@ -594,8 +903,9 @@ static int do_writepage(struct page *page, int len) struct ubifs_info *c = inode->i_sb->s_fs_info; #ifdef UBIFS_DEBUG + struct ubifs_inode *ui = ubifs_inode(inode); spin_lock(&ui->ui_lock); - ubifs_assert(page->index <= ui->synced_i_size << PAGE_CACHE_SIZE); + ubifs_assert(page->index <= ui->synced_i_size >> PAGE_CACHE_SHIFT); spin_unlock(&ui->ui_lock); #endif @@ -654,7 +964,7 @@ static int do_writepage(struct page *page, int len) * whole index and correct all inode sizes, which is long an unacceptable. * * To prevent situations like this, UBIFS writes pages back only if they are - * within last synchronized inode size, i.e. the the size which has been + * within the last synchronized inode size, i.e. the size which has been * written to the flash media last time. Otherwise, UBIFS forces inode * write-back, thus making sure the on-flash inode contains current inode size, * and then keeps writing pages back. @@ -663,11 +973,15 @@ static int do_writepage(struct page *page, int len) * the page locked, and it locks @ui_mutex. However, write-back does take inode * @i_mutex, which means other VFS operations may be run on this inode at the * same time. And the problematic one is truncation to smaller size, from where - * we have to call 'vmtruncate()', which first changes @inode->i_size, then - * drops the truncated pages. And while dropping the pages, it takes the page - * lock. This means that 'do_truncation()' cannot call 'vmtruncate()' with - * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This - * means that @inode->i_size is changed while @ui_mutex is unlocked. + * we have to call 'truncate_setsize()', which first changes @inode->i_size, + * then drops the truncated pages. And while dropping the pages, it takes the + * page lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' + * with @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. + * This means that @inode->i_size is changed while @ui_mutex is unlocked. + * + * XXX(truncate): with the new truncate sequence this is not true anymore, + * and the calls to truncate_setsize can be move around freely. They should + * be moved to the very end of the truncate sequence. * * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond * inode size. How do we do this if @inode->i_size may became smaller while we @@ -708,7 +1022,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) /* Is the page fully inside @i_size? */ if (page->index < end_index) { if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) { - err = inode->i_sb->s_op->write_inode(inode, 1); + err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) goto out_unlock; /* @@ -730,13 +1044,13 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memset(kaddr + len, 0, PAGE_CACHE_SIZE - len); flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (i_size > synced_i_size) { - err = inode->i_sb->s_op->write_inode(inode, 1); + err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) goto out_unlock; } @@ -821,9 +1135,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, budgeted = 0; } - err = vmtruncate(inode, new_size); - if (err) - goto out_budg; + truncate_setsize(inode, new_size); if (offset) { pgoff_t index = new_size >> PAGE_CACHE_SHIFT; @@ -870,16 +1182,16 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, ui->ui_size = inode->i_size; /* Truncation changes inode [mc]time */ inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); - /* The other attributes may be changed at the same time as well */ + /* Other attributes may be changed at the same time as well */ do_attr_changes(inode, attr); - err = ubifs_jnl_truncate(c, inode, old_size, new_size); mutex_unlock(&ui->ui_mutex); + out_budg: if (budgeted) ubifs_release_budget(c, &req); else { - c->nospace = c->nospace_rp = 0; + c->bi.nospace = c->bi.nospace_rp = 0; smp_wmb(); } return err; @@ -910,16 +1222,14 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, if (attr->ia_valid & ATTR_SIZE) { dbg_gen("size %lld -> %lld", inode->i_size, new_size); - err = vmtruncate(inode, new_size); - if (err) - goto out; + truncate_setsize(inode, new_size); } mutex_lock(&ui->ui_mutex); if (attr->ia_valid & ATTR_SIZE) { /* Truncation changes inode [mc]time */ inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); - /* 'vmtruncate()' changed @i_size, update @ui_size */ + /* 'truncate_setsize()' changed @i_size, update @ui_size */ ui->ui_size = inode->i_size; } @@ -939,11 +1249,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, if (release) ubifs_release_budget(c, &req); if (IS_SYNC(inode)) - err = inode->i_sb->s_op->write_inode(inode, 1); - return err; - -out: - ubifs_release_budget(c, &req); + err = inode->i_sb->s_op->write_inode(inode, NULL); return err; } @@ -959,7 +1265,7 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; - err = dbg_check_synced_i_size(inode); + err = dbg_check_synced_i_size(c, inode); if (err) return err; @@ -972,13 +1278,14 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr) return err; } -static void ubifs_invalidatepage(struct page *page, unsigned long offset) +static void ubifs_invalidatepage(struct page *page, unsigned int offset, + unsigned int length) { struct inode *inode = page->mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; ubifs_assert(PagePrivate(page)); - if (offset) + if (offset || length < PAGE_CACHE_SIZE) /* Partial page remains dirty */ return; @@ -1000,22 +1307,31 @@ static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd) return NULL; } -int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync) +int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; int err; dbg_gen("syncing inode %lu", inode->i_ino); - /* - * VFS has already synchronized dirty pages for this inode. Synchronize - * the inode unless this is a 'datasync()' call. - */ + if (c->ro_mount) + /* + * For some really strange reasons VFS does not filter out + * 'fsync()' for R/O mounted file-systems as per 2.6.39. + */ + return 0; + + err = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (err) + return err; + mutex_lock(&inode->i_mutex); + + /* Synchronize the inode unless this is a 'datasync()' call. */ if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) { - err = inode->i_sb->s_op->write_inode(inode, 1); + err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) - return err; + goto out; } /* @@ -1023,10 +1339,9 @@ int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync) * them. */ err = ubifs_sync_wbufs_by_inode(c, inode); - if (err) - return err; - - return 0; +out: + mutex_unlock(&inode->i_mutex); + return err; } /** @@ -1049,17 +1364,17 @@ static inline int mctime_update_needed(const struct inode *inode, /** * update_ctime - update mtime and ctime of an inode. - * @c: UBIFS file-system description object * @inode: inode to update * * This function updates mtime and ctime of the inode if it is not equivalent to * current time. Returns zero in case of success and a negative error code in * case of failure. */ -static int update_mctime(struct ubifs_info *c, struct inode *inode) +static int update_mctime(struct inode *inode) { struct timespec now = ubifs_current_time(inode); struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_info *c = inode->i_sb->s_fs_info; if (mctime_update_needed(inode, &now)) { int err, release; @@ -1082,29 +1397,13 @@ static int update_mctime(struct ubifs_info *c, struct inode *inode) return 0; } -static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t ubifs_write_iter(struct kiocb *iocb, struct iov_iter *from) { - int err; - ssize_t ret; - struct inode *inode = iocb->ki_filp->f_mapping->host; - struct ubifs_info *c = inode->i_sb->s_fs_info; - - err = update_mctime(c, inode); + int err = update_mctime(file_inode(iocb->ki_filp)); if (err) return err; - ret = generic_file_aio_write(iocb, iov, nr_segs, pos); - if (ret < 0) - return ret; - - if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) { - err = ubifs_sync_wbufs_by_inode(c, inode); - if (err) - return err; - } - - return ret; + return generic_file_write_iter(iocb, from); } static int ubifs_set_page_dirty(struct page *page) @@ -1136,12 +1435,14 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) } /* - * mmap()d file has taken write protection fault and is being made - * writable. UBIFS must ensure page is budgeted for. + * mmap()d file has taken write protection fault and is being made writable. + * UBIFS must ensure page is budgeted for. */ -static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) +static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, + struct vm_fault *vmf) { - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct page *page = vmf->page; + struct inode *inode = file_inode(vma->vm_file); struct ubifs_info *c = inode->i_sb->s_fs_info; struct timespec now = ubifs_current_time(inode); struct ubifs_budget_req req = { .new_page = 1 }; @@ -1149,10 +1450,10 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index, i_size_read(inode)); - ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY)); + ubifs_assert(!c->ro_media && !c->ro_mount); - if (unlikely(c->ro_media)) - return -EROFS; + if (unlikely(c->ro_error)) + return VM_FAULT_SIGBUS; /* -EROFS */ /* * We have not locked @page so far so we may budget for changing the @@ -1183,9 +1484,9 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) err = ubifs_budget_space(c, &req); if (unlikely(err)) { if (err == -ENOSPC) - ubifs_warn("out of space for mmapped file " - "(inode number %lu)", inode->i_ino); - return err; + ubifs_warn("out of space for mmapped file (inode number %lu)", + inode->i_ino); + return VM_FAULT_SIGBUS; } lock_page(page); @@ -1219,25 +1520,28 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) ubifs_release_dirty_inode_budget(c, ui); } - unlock_page(page); - return 0; + wait_for_stable_page(page); + return VM_FAULT_LOCKED; out_unlock: unlock_page(page); ubifs_release_budget(c, &req); + if (err) + err = VM_FAULT_SIGBUS; return err; } -static struct vm_operations_struct ubifs_file_vm_ops = { +static const struct vm_operations_struct ubifs_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = ubifs_vm_page_mkwrite, + .remap_pages = generic_file_remap_pages, }; static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) { int err; - /* 'generic_file_mmap()' takes care of NOMMU case */ err = generic_file_mmap(file, vma); if (err) return err; @@ -1245,7 +1549,7 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -struct address_space_operations ubifs_file_address_operations = { +const struct address_space_operations ubifs_file_address_operations = { .readpage = ubifs_readpage, .writepage = ubifs_writepage, .write_begin = ubifs_write_begin, @@ -1255,35 +1559,33 @@ struct address_space_operations ubifs_file_address_operations = { .releasepage = ubifs_releasepage, }; -struct inode_operations ubifs_file_inode_operations = { +const struct inode_operations ubifs_file_inode_operations = { .setattr = ubifs_setattr, .getattr = ubifs_getattr, -#ifdef CONFIG_UBIFS_FS_XATTR .setxattr = ubifs_setxattr, .getxattr = ubifs_getxattr, .listxattr = ubifs_listxattr, .removexattr = ubifs_removexattr, -#endif }; -struct inode_operations ubifs_symlink_inode_operations = { +const struct inode_operations ubifs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = ubifs_follow_link, .setattr = ubifs_setattr, .getattr = ubifs_getattr, }; -struct file_operations ubifs_file_operations = { +const struct file_operations ubifs_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = ubifs_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = generic_file_read_iter, + .write_iter = ubifs_write_iter, .mmap = ubifs_file_mmap, .fsync = ubifs_fsync, .unlocked_ioctl = ubifs_ioctl, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, #ifdef CONFIG_COMPAT .compat_ioctl = ubifs_compat_ioctl, #endif |
