diff options
author | Dmitry Torokhov <dtor_core@ameritech.net> | 2005-09-09 20:14:47 -0500 |
---|---|---|
committer | Dmitry Torokhov <dtor_core@ameritech.net> | 2005-09-09 20:14:47 -0500 |
commit | d344c5e0856ad03278d8700b503762dbc8b86e12 (patch) | |
tree | a6d893a643470a3c2580a58f3228a55fa1fd1d82 /fs/ntfs | |
parent | 010988e888a0abbe7118635c1b33d049caae6b29 (diff) | |
parent | 87fc767b832ef5a681a0ff9d203c3289bc3be2bf (diff) |
Manual merge with Linus
Diffstat (limited to 'fs/ntfs')
-rw-r--r-- | fs/ntfs/ChangeLog | 70 | ||||
-rw-r--r-- | fs/ntfs/Makefile | 2 | ||||
-rw-r--r-- | fs/ntfs/aops.c | 293 | ||||
-rw-r--r-- | fs/ntfs/attrib.c | 125 | ||||
-rw-r--r-- | fs/ntfs/attrib.h | 2 | ||||
-rw-r--r-- | fs/ntfs/compress.c | 8 | ||||
-rw-r--r-- | fs/ntfs/dir.c | 3 | ||||
-rw-r--r-- | fs/ntfs/file.c | 9 | ||||
-rw-r--r-- | fs/ntfs/index.c | 1 | ||||
-rw-r--r-- | fs/ntfs/inode.c | 227 | ||||
-rw-r--r-- | fs/ntfs/lcnalloc.c | 39 | ||||
-rw-r--r-- | fs/ntfs/lcnalloc.h | 21 | ||||
-rw-r--r-- | fs/ntfs/logfile.c | 251 | ||||
-rw-r--r-- | fs/ntfs/logfile.h | 8 | ||||
-rw-r--r-- | fs/ntfs/malloc.h | 48 | ||||
-rw-r--r-- | fs/ntfs/mft.c | 4 | ||||
-rw-r--r-- | fs/ntfs/runlist.c | 374 | ||||
-rw-r--r-- | fs/ntfs/runlist.h | 3 | ||||
-rw-r--r-- | fs/ntfs/super.c | 16 | ||||
-rw-r--r-- | fs/ntfs/unistr.c | 3 |
20 files changed, 1062 insertions, 445 deletions
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog index 9eecc9939df..e4fd6134244 100644 --- a/fs/ntfs/ChangeLog +++ b/fs/ntfs/ChangeLog @@ -22,6 +22,76 @@ ToDo/Notes: - Enable the code for setting the NT4 compatibility flag when we start making NTFS 1.2 specific modifications. +2.1.24 - Lots of bug fixes and support more clean journal states. + + - Support journals ($LogFile) which have been modified by chkdsk. This + means users can boot into Windows after we marked the volume dirty. + The Windows boot will run chkdsk and then reboot. The user can then + immediately boot into Linux rather than having to do a full Windows + boot first before rebooting into Linux and we will recognize such a + journal and empty it as it is clean by definition. + - Support journals ($LogFile) with only one restart page as well as + journals with two different restart pages. We sanity check both and + either use the only sane one or the more recent one of the two in the + case that both are valid. + - Modify fs/ntfs/malloc.h::ntfs_malloc_nofs() to do the kmalloc() based + allocations with __GFP_HIGHMEM, analogous to how the vmalloc() based + allocations are done. + - Add fs/ntfs/malloc.h::ntfs_malloc_nofs_nofail() which is analogous to + ntfs_malloc_nofs() but it performs allocations with __GFP_NOFAIL and + hence cannot fail. + - Use ntfs_malloc_nofs_nofail() in the two critical regions in + fs/ntfs/runlist.c::ntfs_runlists_merge(). This means we no longer + need to panic() if the allocation fails as it now cannot fail. + - Fix two nasty runlist merging bugs that had gone unnoticed so far. + Thanks to Stefano Picerno for the bug report. + - Remove two bogus BUG_ON()s from fs/ntfs/mft.c. + - Fix handling of valid but empty mapping pairs array in + fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress(). + - Report unrepresentable inodes during ntfs_readdir() as KERN_WARNING + messages and include the inode number. Thanks to Yura Pakhuchiy for + pointing this out. + - Change ntfs_rl_truncate_nolock() to throw away the runlist if the new + length is zero. + - Add runlist.[hc]::ntfs_rl_punch_nolock() which punches a caller + specified hole into a runlist. + - Fix a bug in fs/ntfs/index.c::ntfs_index_lookup(). When the returned + index entry is in the index root, we forgot to set the @ir pointer in + the index context. Thanks to Yura Pakhuchiy for finding this bug. + - Remove bogus setting of PageError in ntfs_read_compressed_block(). + - Add fs/ntfs/attrib.[hc]::ntfs_resident_attr_value_resize(). + - Fix a bug in ntfs_map_runlist_nolock() where we forgot to protect + access to the allocated size in the ntfs inode with the size lock. + - Fix ntfs_attr_vcn_to_lcn_nolock() and ntfs_attr_find_vcn_nolock() to + return LCN_ENOENT when there is no runlist and the allocated size is + zero. + - Fix load_attribute_list() to handle the case of a NULL runlist. + - Fix handling of sparse attributes in ntfs_attr_make_non_resident(). + - Add BUG() checks to ntfs_attr_make_non_resident() and ntfs_attr_set() + to ensure that these functions are never called for compressed or + encrypted attributes. + - Fix cluster (de)allocators to work when the runlist is NULL and more + importantly to take a locked runlist rather than them locking it + which leads to lock reversal. + - Truncate {a,c,m}time to the ntfs supported time granularity when + updating the times in the inode in ntfs_setattr(). + - Fixup handling of sparse, compressed, and encrypted attributes in + fs/ntfs/inode.c::ntfs_read_locked_{,attr_,index_}inode(), + fs/ntfs/aops.c::ntfs_{read,write}page(). + - Make ntfs_write_block() not instantiate sparse blocks if they contain + only zeroes. + - Optimize fs/ntfs/aops.c::ntfs_write_block() by extending the page + lock protection over the buffer submission for i/o which allows the + removal of the get_bh()/put_bh() pairs for each buffer. + - Fix fs/ntfs/aops.c::ntfs_{read,write}_block() to handle the case + where a concurrent truncate has truncated the runlist under our feet. + - Fix page_has_buffers()/page_buffers() handling in fs/ntfs/aops.c. + - In fs/ntfs/aops.c::ntfs_end_buffer_async_read(), use a bit spin lock + in the first buffer head instead of a driver global spin lock to + improve scalability. + - Minor fix to error handling and error message display in + fs/ntfs/aops.c::ntfs_prepare_nonresident_write(). + 2.1.23 - Implement extension of resident files and make writing safe as well as many bug fixes, cleanups, and enhancements... diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile index f083f27d8b6..894b2b876d3 100644 --- a/fs/ntfs/Makefile +++ b/fs/ntfs/Makefile @@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \ index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ unistr.o upcase.o -EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.23\" +EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.24\" ifeq ($(CONFIG_NTFS_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 78adad7a988..545236414d5 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -55,9 +55,8 @@ */ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) { - static DEFINE_SPINLOCK(page_uptodate_lock); unsigned long flags; - struct buffer_head *tmp; + struct buffer_head *first, *tmp; struct page *page; ntfs_inode *ni; int page_uptodate = 1; @@ -89,11 +88,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) } } else { clear_buffer_uptodate(bh); + SetPageError(page); ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.", (unsigned long long)bh->b_blocknr); - SetPageError(page); } - spin_lock_irqsave(&page_uptodate_lock, flags); + first = page_buffers(page); + local_irq_save(flags); + bit_spin_lock(BH_Uptodate_Lock, &first->b_state); clear_buffer_async_read(bh); unlock_buffer(bh); tmp = bh; @@ -108,7 +109,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) } tmp = tmp->b_this_page; } while (tmp != bh); - spin_unlock_irqrestore(&page_uptodate_lock, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); /* * If none of the buffers had errors then we can set the page uptodate, * but we first have to perform the post read mst fixups, if the @@ -141,7 +143,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) unlock_page(page); return; still_busy: - spin_unlock_irqrestore(&page_uptodate_lock, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); return; } @@ -185,13 +188,15 @@ static int ntfs_read_block(struct page *page) blocksize_bits = VFS_I(ni)->i_blkbits; blocksize = 1 << blocksize_bits; - if (!page_has_buffers(page)) + if (!page_has_buffers(page)) { create_empty_buffers(page, blocksize, 0); - bh = head = page_buffers(page); - if (unlikely(!bh)) { - unlock_page(page); - return -ENOMEM; + if (unlikely(!page_has_buffers(page))) { + unlock_page(page); + return -ENOMEM; + } } + bh = head = page_buffers(page); + BUG_ON(!bh); iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); read_lock_irqsave(&ni->size_lock, flags); @@ -204,6 +209,7 @@ static int ntfs_read_block(struct page *page) nr = i = 0; do { u8 *kaddr; + int err; if (unlikely(buffer_uptodate(bh))) continue; @@ -211,6 +217,7 @@ static int ntfs_read_block(struct page *page) arr[nr++] = bh; continue; } + err = 0; bh->b_bdev = vol->sb->s_bdev; /* Is the block within the allowed limits? */ if (iblock < lblock) { @@ -252,7 +259,6 @@ lock_retry_remap: goto handle_hole; /* If first try and runlist unmapped, map and retry. */ if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { - int err; is_retry = TRUE; /* * Attempt to map runlist, dropping lock for @@ -263,20 +269,30 @@ lock_retry_remap: if (likely(!err)) goto lock_retry_remap; rl = NULL; - lcn = err; } else if (!rl) up_read(&ni->runlist.lock); + /* + * If buffer is outside the runlist, treat it as a + * hole. This can happen due to concurrent truncate + * for example. + */ + if (err == -ENOENT || lcn == LCN_ENOENT) { + err = 0; + goto handle_hole; + } /* Hard error, zero out region. */ + if (!err) + err = -EIO; bh->b_blocknr = -1; SetPageError(page); ntfs_error(vol->sb, "Failed to read from inode 0x%lx, " "attribute type 0x%x, vcn 0x%llx, " "offset 0x%x because its location on " "disk could not be determined%s " - "(error code %lli).", ni->mft_no, + "(error code %i).", ni->mft_no, ni->type, (unsigned long long)vcn, vcn_ofs, is_retry ? " even after " - "retrying" : "", (long long)lcn); + "retrying" : "", err); } /* * Either iblock was outside lblock limits or @@ -289,9 +305,10 @@ handle_hole: handle_zblock: kaddr = kmap_atomic(page, KM_USER0); memset(kaddr + i * blocksize, 0, blocksize); - flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); - set_buffer_uptodate(bh); + flush_dcache_page(page); + if (likely(!err)) + set_buffer_uptodate(bh); } while (i++, iblock++, (bh = bh->b_this_page) != head); /* Release the lock if we took it. */ @@ -367,31 +384,38 @@ retry_readpage: return 0; } ni = NTFS_I(page->mapping->host); - + /* + * Only $DATA attributes can be encrypted and only unnamed $DATA + * attributes can be compressed. Index root can have the flags set but + * this means to create compressed/encrypted files, not that the + * attribute is compressed/encrypted. + */ + if (ni->type != AT_INDEX_ROOT) { + /* If attribute is encrypted, deny access, just like NT4. */ + if (NInoEncrypted(ni)) { + BUG_ON(ni->type != AT_DATA); + err = -EACCES; + goto err_out; + } + /* Compressed data streams are handled in compress.c. */ + if (NInoNonResident(ni) && NInoCompressed(ni)) { + BUG_ON(ni->type != AT_DATA); + BUG_ON(ni->name_len); + return ntfs_read_compressed_block(page); + } + } /* NInoNonResident() == NInoIndexAllocPresent() */ if (NInoNonResident(ni)) { - /* - * Only unnamed $DATA attributes can be compressed or - * encrypted. - */ - if (ni->type == AT_DATA && !ni->name_len) { - /* If file is encrypted, deny access, just like NT4. */ - if (NInoEncrypted(ni)) { - err = -EACCES; - goto err_out; - } - /* Compressed data streams are handled in compress.c. */ - if (NInoCompressed(ni)) - return ntfs_read_compressed_block(page); - } - /* Normal data stream. */ + /* Normal, non-resident data stream. */ return ntfs_read_block(page); } /* * Attribute is resident, implying it is not compressed or encrypted. * This also means the attribute is smaller than an mft record and * hence smaller than a page, so can simply zero out any pages with - * index above 0. + * index above 0. Note the attribute can actually be marked compressed + * but if it is resident the actual data is not compressed so we are + * ok to ignore the compressed flag here. */ if (unlikely(page->index > 0)) { kaddr = kmap_atomic(page, KM_USER0); @@ -511,19 +535,21 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc) BUG_ON(!PageUptodate(page)); create_empty_buffers(page, blocksize, (1 << BH_Uptodate) | (1 << BH_Dirty)); + if (unlikely(!page_has_buffers(page))) { + ntfs_warning(vol->sb, "Error allocating page " + "buffers. Redirtying page so we try " + "again later."); + /* + * Put the page back on mapping->dirty_pages, but leave + * its buffers' dirty state as-is. + */ + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } } bh = head = page_buffers(page); - if (unlikely(!bh)) { - ntfs_warning(vol->sb, "Error allocating page buffers. " - "Redirtying page so we try again later."); - /* - * Put the page back on mapping->dirty_pages, but leave its - * buffer's dirty state as-is. - */ - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return 0; - } + BUG_ON(!bh); /* NOTE: Different naming scheme to ntfs_read_block()! */ @@ -670,6 +696,27 @@ lock_retry_remap: } /* It is a hole, need to instantiate it. */ if (lcn == LCN_HOLE) { + u8 *kaddr; + unsigned long *bpos, *bend; + + /* Check if the buffer is zero. */ + kaddr = kmap_atomic(page, KM_USER0); + bpos = (unsigned long *)(kaddr + bh_offset(bh)); + bend = (unsigned long *)((u8*)bpos + blocksize); + do { + if (unlikely(*bpos)) + break; + } while (likely(++bpos < bend)); + kunmap_atomic(kaddr, KM_USER0); + if (bpos == bend) { + /* + * Buffer is zero and sparse, no need to write + * it. + */ + bh->b_blocknr = -1; + clear_buffer_dirty(bh); + continue; + } // TODO: Instantiate the hole. // clear_buffer_new(bh); // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); @@ -690,20 +737,37 @@ lock_retry_remap: if (likely(!err)) goto lock_retry_remap; rl = NULL; - lcn = err; } else if (!rl) up_read(&ni->runlist.lock); + /* + * If buffer is outside the runlist, truncate has cut it out + * of the runlist. Just clean and clear the buffer and set it + * uptodate so it can get discarded by the VM. + */ + if (err == -ENOENT || lcn == LCN_ENOENT) { + u8 *kaddr; + + bh->b_blocknr = -1; + clear_buffer_dirty(bh); + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + bh_offset(bh), 0, blocksize); + kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(page); + set_buffer_uptodate(bh); + err = 0; + continue; + } /* Failed to map the buffer, even after retrying. */ + if (!err) + err = -EIO; bh->b_blocknr = -1; ntfs_error(vol->sb, "Failed to write to inode 0x%lx, " "attribute type 0x%x, vcn 0x%llx, offset 0x%x " "because its location on disk could not be " - "determined%s (error code %lli).", ni->mft_no, + "determined%s (error code %i).", ni->mft_no, ni->type, (unsigned long long)vcn, vcn_ofs, is_retry ? " even after " - "retrying" : "", (long long)lcn); - if (!err) - err = -EIO; + "retrying" : "", err); break; } while (block++, (bh = bh->b_this_page) != head); @@ -714,7 +778,7 @@ lock_retry_remap: /* For the error case, need to reset bh to the beginning. */ bh = head; - /* Just an optimization, so ->readpage() isn't called later. */ + /* Just an optimization, so ->readpage() is not called later. */ if (unlikely(!PageUptodate(page))) { int uptodate = 1; do { @@ -730,7 +794,6 @@ lock_retry_remap: /* Setup all mapped, dirty buffers for async write i/o. */ do { - get_bh(bh); if (buffer_mapped(bh) && buffer_dirty(bh)) { lock_buffer(bh); if (test_clear_buffer_dirty(bh)) { @@ -768,14 +831,8 @@ lock_retry_remap: BUG_ON(PageWriteback(page)); set_page_writeback(page); /* Keeps try_to_free_buffers() away. */ - unlock_page(page); - /* - * Submit the prepared buffers for i/o. Note the page is unlocked, - * and the async write i/o completion handler can end_page_writeback() - * at any time after the *first* submit_bh(). So the buffers can then - * disappear... - */ + /* Submit the prepared buffers for i/o. */ need_end_writeback = TRUE; do { struct buffer_head *next = bh->b_this_page; @@ -783,9 +840,9 @@ lock_retry_remap: submit_bh(WRITE, bh); need_end_writeback = FALSE; } - put_bh(bh); bh = next; } while (bh != head); + unlock_page(page); /* If no i/o was started, need to end_page_writeback(). */ if (unlikely(need_end_writeback)) @@ -860,7 +917,6 @@ static int ntfs_write_mst_block(struct page *page, sync = (wbc->sync_mode == WB_SYNC_ALL); /* Make sure we have mapped buffers. */ - BUG_ON(!page_has_buffers(page)); bh = head = page_buffers(page); BUG_ON(!bh); @@ -1280,38 +1336,42 @@ retry_writepage: ntfs_debug("Write outside i_size - truncated?"); return 0; } + /* + * Only $DATA attributes can be encrypted and only unnamed $DATA + * attributes can be compressed. Index root can have the flags set but + * this means to create compressed/encrypted files, not that the + * attribute is compressed/encrypted. + */ + if (ni->type != AT_INDEX_ROOT) { + /* If file is encrypted, deny access, just like NT4. */ + if (NInoEncrypted(ni)) { + unlock_page(page); + BUG_ON(ni->type != AT_DATA); + ntfs_debug("Denying write access to encrypted " + "file."); + return -EACCES; + } + /* Compressed data streams are handled in compress.c. */ + if (NInoNonResident(ni) && NInoCompressed(ni)) { + BUG_ON(ni->type != AT_DATA); + BUG_ON(ni->name_len); + // TODO: Implement and replace this with + // return ntfs_write_compressed_block(page); + unlock_page(page); + ntfs_error(vi->i_sb, "Writing to compressed files is " + "not supported yet. Sorry."); + return -EOPNOTSUPP; + } + // TODO: Implement and remove this check. + if (NInoNonResident(ni) && NInoSparse(ni)) { + unlock_page(page); + ntfs_error(vi->i_sb, "Writing to sparse files is not " + "supported yet. Sorry."); + return -EOPNOTSUPP; + } + } /* NInoNonResident() == NInoIndexAllocPresent() */ if (NInoNonResident(ni)) { - /* - * Only unnamed $DATA attributes can be compressed, encrypted, - * and/or sparse. - */ - if (ni->type == AT_DATA && !ni->name_len) { - /* If file is encrypted, deny access, just like NT4. */ - if (NInoEncrypted(ni)) { - unlock_page(page); - ntfs_debug("Denying write access to encrypted " - "file."); - return -EACCES; - } - /* Compressed data streams are handled in compress.c. */ - if (NInoCompressed(ni)) { - // TODO: Implement and replace this check with - // return ntfs_write_compressed_block(page); - unlock_page(page); - ntfs_error(vi->i_sb, "Writing to compressed " - "files is not supported yet. " - "Sorry."); - return -EOPNOTSUPP; - } - // TODO: Implement and remove this check. - if (NInoSparse(ni)) { - unlock_page(page); - ntfs_error(vi->i_sb, "Writing to sparse files " - "is not supported yet. Sorry."); - return -EOPNOTSUPP; - } - } /* We have to zero every time due to mmap-at-end-of-file. */ if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) { /* The page straddles i_size. */ @@ -1324,14 +1384,16 @@ retry_writepage: /* Handle mst protected attributes. */ if (NInoMstProtected(ni)) return ntfs_write_mst_block(page, wbc); - /* Normal data stream. */ + /* Normal, non-resident data stream. */ return ntfs_write_block(page, wbc); } /* - * Attribute is resident, implying it is not compressed, encrypted, - * sparse, or mst protected. This also means the attribute is smaller - * than an mft record and hence smaller than a page, so can simply - * return error on any pages with index above 0. + * Attribute is resident, implying it is not compressed, encrypted, or + * mst protected. This also means the attribute is smaller than an mft + * record and hence smaller than a page, so can simply return error on + * any pages with index above 0. Note the attribute can actually be + * marked compressed but if it is resident the actual data is not + * compressed so we are ok to ignore the compressed flag here. */ BUG_ON(page_has_buffers(page)); BUG_ON(!PageUptodate(page)); @@ -1380,30 +1442,14 @@ retry_writepage: BUG_ON(PageWriteback(page)); set_page_writeback(page); unlock_page(page); - /* - * Here, we don't need to zero the out of bounds area everytime because - * the below memcpy() already takes care of the mmap-at-end-of-file - * requirements. If the file is converted to a non-resident one, then - * the code path use is switched to the non-resident one where the - * zeroing happens on each ntfs_writepage() invocation. - * - * The above also applies nicely when i_size is decreased. - * - * When i_size is increased, the memory between the old and new i_size - * _must_ be zeroed (or overwritten with new data). Otherwise we will - * expose data to userspace/disk which should never have been exposed. - * - * FIXME: Ensure that i_size increases do the zeroing/overwriting and - * if we cannot guarantee that, then enable the zeroing below. If the - * zeroing below is enabled, we MUST move the unlock_page() from above - * to after the kunmap_atomic(), i.e. just before the - * end_page_writeback(). - * UPDATE: ntfs_prepare/commit_write() do the zeroing on i_size - * increases for resident attributes so those are ok. - * TODO: ntfs_truncate(), others? + * Here, we do not need to zero the out of bounds area everytime + * because the below memcpy() already takes care of the + * mmap-at-end-of-file requirements. If the file is converted to a + * non-resident one, then the code path use is switched to the + * non-resident one where the zeroing happens on each ntfs_writepage() + * invocation. */ - attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); i_size = i_size_read(vi); if (unlikely(attr_len > i_size)) { @@ -1681,27 +1727,25 @@ lock_retry_remap: if (likely(!err)) goto lock_retry_remap; rl = NULL; - lcn = err; } else if (!rl) up_read(&ni->runlist.lock); /* * Failed to map the buffer, even after * retrying. */ + if (!err) + err = -EIO; bh->b_blocknr = -1; ntfs_error(vol->sb, "Failed to write to inode " "0x%lx, attribute type 0x%x, " "vcn 0x%llx, offset 0x%x " "because its location on disk " "could not be determined%s " - "(error code %lli).", + "(error code %i).", ni->mft_no, ni->type, (unsigned long long)vcn, vcn_ofs, is_retry ? " even " - "after retrying" : "", - (long long)lcn); - if (!err) - err = -EIO; + "after retrying" : "", err); goto err_out; } /* We now have a successful remap, i.e. lcn >= 0. */ @@ -2357,6 +2401,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) { buffers_to_free = bh; } bh = head = page_buffers(page); + BUG_ON(!bh); do { bh_ofs = bh_offset(bh); if (bh_ofs + bh_size <= ofs) diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index cd0f9e740b1..3f9a4ff42ee 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -43,6 +43,9 @@ * which is not an error as such. This is -ENOENT. It means that @vcn is out * of bounds of the runlist. * + * Note the runlist can be NULL after this function returns if @vcn is zero and + * the attribute has zero allocated size, i.e. there simply is no runlist. + * * Locking: - The runlist must be locked for writing. * - This function modifies the runlist. */ @@ -54,6 +57,7 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn) ATTR_RECORD *a; ntfs_attr_search_ctx *ctx; runlist_element *rl; + unsigned long flags; int err = 0; ntfs_debug("Mapping runlist part containing vcn 0x%llx.", @@ -85,8 +89,11 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn) * ntfs_mapping_pairs_decompress() fails. */ end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1; - if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1)) + if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1)) { + read_lock_irqsave(&ni->size_lock, flags); end_vcn = ni->allocated_size >> ni->vol->cluster_size_bits; + read_unlock_irqrestore(&ni->size_lock, flags); + } if (unlikely(vcn >= end_vcn)) { err = -ENOENT; goto err_out; @@ -165,6 +172,7 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn, const BOOL write_locked) { LCN lcn; + unsigned long flags; BOOL is_retry = FALSE; ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.", @@ -173,6 +181,14 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn, BUG_ON(!ni); BUG_ON(!NInoNonResident(ni)); BUG_ON(vcn < 0); + if (!ni->runlist.rl) { + read_lock_irqsave(&ni->size_lock, flags); + if (!ni->allocated_size) { + read_unlock_irqrestore(&ni->size_lock, flags); + return LCN_ENOENT; + } + read_unlock_irqrestore(&ni->size_lock, flags); + } retry_remap: /* Convert vcn to lcn. If that fails map the runlist and retry once. */ lcn = ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn); @@ -255,6 +271,7 @@ retry_remap: runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn, const BOOL write_locked) { + unsigned long flags; runlist_element *rl; int err = 0; BOOL is_retry = FALSE; @@ -265,6 +282,14 @@ runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn, BUG_ON(!ni); BUG_ON(!NInoNonResident(ni)); BUG_ON(vcn < 0); + if (!ni->runlist.rl) { + read_lock_irqsave(&ni->size_lock, flags); + if (!ni->allocated_size) { + read_unlock_irqrestore(&ni->size_lock, flags); + return ERR_PTR(-ENOENT); + } + read_unlock_irqrestore(&ni->size_lock, flags); + } retry_remap: rl = ni->runlist.rl; if (likely(rl && vcn >= rl[0].vcn)) { @@ -528,6 +553,11 @@ int load_attribute_list(ntfs_volume *vol, runlist *runlist, u8 *al_start, block_size_bits = sb->s_blocksize_bits; down_read(&runlist->lock); rl = runlist->rl; + if (!rl) { + ntfs_error(sb, "Cannot read attribute list since runlist is " + "missing."); + goto err_out; + } /* Read all clusters specified by the runlist one run at a time. */ while (rl->length) { lcn = ntfs_rl_vcn_to_lcn(rl, rl->vcn); @@ -1247,6 +1277,46 @@ int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size) } /** + * ntfs_resident_attr_value_resize - resize the value of a resident attribute + * @m: mft record containing attribute record + * @a: attribute record whose value to resize + * @new_size: new size in bytes to which to resize the attribute value of @a + * + * Resize the value of the attribute @a in the mft record @m to @new_size bytes. + * If the value is made bigger, the newly allocated space is cleared. + * + * Return 0 on success and -errno on error. The following error codes are + * defined: + * -ENOSPC - Not enough space in the mft record @m to perform the resize. + * + * Note: On error, no modifications have been performed whatsoever. + * + * Warning: If you make a record smaller without having copied all the data you + * are interested in the data may be overwritten. + */ +int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a, + const u32 new_size) +{ + u32 old_size; + + /* Resize the resident part of the attribute record. */ + if (ntfs_attr_record_resize(m, a, + le16_to_cpu(a->data.resident.value_offset) + new_size)) + return -ENOSPC; + /* + * The resize succeeded! If we made the attribute value bigger, clear + * the area between the old size and @new_size. + */ + old_size = le32_to_cpu(a->data.resident.value_length); + if (new_size > old_size) + memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) + + old_size, 0, new_size - old_size); + /* Finally update the length of the attribute value. */ + a->data.resident.value_length = cpu_to_le32(new_size); + return 0; +} + +/** * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute * @ni: ntfs inode describing the attribute to convert * @@ -1302,6 +1372,12 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni) return err; } /* + * FIXME: Compressed and encrypted attributes are not supported when + * writing and we should never have gotten here for them. + */ + BUG_ON(NInoCompressed(ni)); + BUG_ON(NInoEncrypted(ni)); + /* * The size needs to be aligned to a cluster boundary for allocation * purposes. */ @@ -1377,10 +1453,15 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni) BUG_ON(a->non_resident); /* * Calculate new offsets for the name and the mapping pairs array. - * We assume the attribute is not compressed or sparse. */ - name_ofs = (offsetof(ATTR_REC, - data.non_resident.compressed_size) + 7) & ~7; + if (NInoSparse(ni) || NInoCompressed(ni)) + name_ofs = (offsetof(ATTR_REC, + data.non_resident.compressed_size) + + sizeof(a->data.non_resident.compressed_size) + + 7) & ~7; + else + name_ofs = (offsetof(ATTR_REC, + data.non_resident.compressed_size) + 7) & ~7; mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7; /* * Determine the size of the resident part of the now non-resident @@ -1419,24 +1500,23 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni) memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset), a->name_length * sizeof(ntfschar)); a->name_offset = cpu_to_le16(name_ofs); - /* - * FIXME: For now just clear all of these as we do not support them - * when writing. - */ - a->flags &= cpu_to_le16(0xffff & ~le16_to_cpu(ATTR_IS_SPARSE | - ATTR_IS_ENCRYPTED | ATTR_COMPRESSION_MASK)); /* Setup the fields specific to non-resident attributes. */ a->data.non_resident.lowest_vcn = 0; a->data.non_resident.highest_vcn = cpu_to_sle64((new_size - 1) >> vol->cluster_size_bits); a->data.non_resident.mapping_pairs_offset = cpu_to_le16(mp_ofs); - a->data.non_resident.compression_unit = 0; memset(&a->data.non_resident.reserved, 0, sizeof(a->data.non_resident.reserved)); a->data.non_resident.allocated_size = cpu_to_sle64(new_size); a->data.non_resident.data_size = a->data.non_resident.initialized_size = cpu_to_sle64(attr_size); + if (NInoSparse(ni) || NInoCompressed(ni)) { + a->data.non_resident.compression_unit = 4; + a->data.non_resident.compressed_size = + a->data.non_resident.allocated_size; + } else + a->data.non_resident.compression_unit = 0; /* Generate the mapping pairs array into the attribute record. */ err = ntfs_mapping_pairs_build(vol, (u8*)a + mp_ofs, arec_size - mp_ofs, rl, 0, -1, NULL); @@ -1446,16 +1526,19 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni) goto undo_err_out; } /* Setup the in-memory attribute structure to be non-resident. */ - /* - * FIXME: For now just clear all of these as we do not support them - * when writing. - */ - NInoClearSparse(ni); - NInoClearEncrypted(ni); - NInoClearCompressed(ni); ni->runlist.rl = rl; write_lock_irqsave(&ni->size_lock, flags); ni->allocated_size = new_size; + if (NInoSparse(ni) || NInoCompressed(ni)) { + ni->itype.compressed.size = ni->allocated_size; + ni->itype.compressed.block_size = 1U << + (a->data.non_resident.compression_unit + + vol->cluster_size_bits); + ni->itype.compressed.block_size_bits = + ffs(ni->itype.compressed.block_size) - 1; + ni->itype.compressed.block_clusters = 1U << + a->data.non_resident.compression_unit; + } write_unlock_irqrestore(&ni->size_lock, flags); /* * This needs to be last since the address space operations ->readpage @@ -1603,6 +1686,12 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val) BUG_ON(cnt < 0); if (!cnt) goto done; + /* + * FIXME: Compressed and encrypted attributes are not supported when + * writing and we should never have gotten here for them. + */ + BUG_ON(NInoCompressed(ni)); + BUG_ON(NInoEncrypted(ni)); mapping = VFS_I(ni)->i_mapping; /* Work out the starting index and page offset. */ idx = ofs >> PAGE_CACHE_SHIFT; diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h index 0e4ac6d3c0e..0618ed6fd7b 100644 --- a/fs/ntfs/attrib.h +++ b/fs/ntfs/attrib.h @@ -99,6 +99,8 @@ extern int ntfs_attr_can_be_resident(const ntfs_volume *vol, const ATTR_TYPE type); extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size); +extern int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a, + const u32 new_size); extern int ntfs_attr_make_non_resident(ntfs_inode *ni); diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index 6d265cfd49a..25d24106f89 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -539,7 +539,6 @@ int ntfs_read_compressed_block(struct page *page) if (unlikely(!pages || !bhs)) { kfree(bhs); kfree(pages); - SetPageError(page); unlock_page(page); ntfs_error(vol->sb, "Failed to allocate internal buffers."); return -ENOMEM; @@ -871,9 +870,6 @@ lock_retry_remap: for (; prev_cur_page < cur_page; prev_cur_page++) { page = pages[prev_cur_page]; if (page) { - if (prev_cur_page == xpage && - !xpage_done) - SetPageError(page); flush_dcache_page(page); kunmap(page); unlock_page(page); @@ -904,8 +900,6 @@ lock_retry_remap: "Terminating them with extreme " "prejudice. Inode 0x%lx, page index " "0x%lx.", ni->mft_no, page->index); - if (cur_page == xpage && !xpage_done) - SetPageError(page); flush_dcache_page(page); kunmap(page); unlock_page(page); @@ -953,8 +947,6 @@ err_out: for (i = cur_page; i < max_page; i++) { page = pages[i]; if (page) { - if (i == xpage && !xpage_done) - SetPageError(page); flush_dcache_page(page); kunmap(page); unlock_page(page); diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index 46779471c54..795c3d1930f 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -1051,7 +1051,8 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos, ie->key.file_name.file_name_length, &name, NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1); if (name_len <= 0) { - ntfs_debug("Skipping unrepresentable file."); + ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.", + (long long)MREF_LE(ie->data.dir.indexed_file)); return 0; } if (ie->key.file_name.file_attributes & diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index e0f530ce6b9..be9fd1dd423 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -1,7 +1,7 @@ /* - * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. + * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -94,6 +94,11 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry, if (!datasync || !NInoNonResident(NTFS_I(vi))) ret = ntfs_write_inode(vi, 1); write_inode_now(vi, !datasync); + /* + * NOTE: If we were to use mapping->private_list (see ext2 and + * fs/buffer.c) for dirty blocks then we could optimize the below to be + * sync_mapping_buffers(vi->i_mapping). + */ err = sync_blockdev(vi->i_sb->s_bdev); if (unlikely(err && !ret)) ret = err; diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c index 11fd5307d78..8f2d5727546 100644 --- a/fs/ntfs/index.c +++ b/fs/ntfs/index.c @@ -205,6 +205,7 @@ int ntfs_index_lookup(const void *key, const int key_len, &ie->key, key_len)) { ir_done: ictx->is_in_root = TRUE; + ictx->ir = ir; ictx->actx = actx; ictx->base_ni = base_ni; ictx->ia = NULL; diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 886214a77f9..dc4bbe3acf5 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -1013,41 +1013,50 @@ skip_large_dir_stuff: } a = ctx->attr; /* Setup the state. */ - if (a->non_resident) { - NInoSetNonResident(ni); - if (a->flags & (ATTR_COMPRESSION_MASK | - ATTR_IS_SPARSE)) { - if (a->flags & ATTR_COMPRESSION_MASK) { - NInoSetCompressed(ni); - if (vol->cluster_size > 4096) { - ntfs_error(vi->i_sb, "Found " + if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) { + if (a->flags & ATTR_COMPRESSION_MASK) { + NInoSetCompressed(ni); + if (vol->cluster_size > 4096) { + ntfs_error(vi->i_sb, "Found " "compressed data but " "compression is " "disabled due to " "cluster size (%i) > " "4kiB.", vol->cluster_size); - goto unm_err_out; - } - if ((a->flags & ATTR_COMPRESSION_MASK) - != ATTR_IS_COMPRESSED) { - ntfs_error(vi->i_sb, "Found " - "unknown compression " - "method or corrupt " - "file."); - goto unm_err_out; - } + goto unm_err_out; + } + if ((a->flags & ATTR_COMPRESSION_MASK) + != ATTR_IS_COMPRESSED) { + ntfs_error(vi->i_sb, "Found unknown " + "compression method " + "or corrupt file."); + goto unm_err_out; } - if (a->flags & ATTR_IS_SPARSE) - NInoSetSparse(ni); + } + if (a->flags & ATTR_IS_SPARSE) + NInoSetSparse(ni); + } + if (a->flags & ATTR_IS_ENCRYPTED) { + if (NInoCompressed(ni)) { + ntfs_error(vi->i_sb, "Found encrypted and " + "compressed data."); + goto unm_err_out; + } + NInoSetEncrypted(ni); + } + if (a->non_resident) { + NInoSetNonResident(ni); + if (NInoCompressed(ni) || NInoSparse(ni)) { if (a->data.non_resident.compression_unit != 4) { ntfs_error(vi->i_sb, "Found " - "nonstandard compression unit " - "(%u instead of 4). Cannot " - "handle this.", - a->data.non_resident. - compression_unit); + "nonstandard " + "compression unit (%u " + "instead of 4). " + "Cannot handle this.", + a->data.non_resident. + compression_unit); err = -EOPNOTSUPP; goto unm_err_out; } @@ -1065,14 +1074,6 @@ skip_large_dir_stuff: a->data.non_resident. compressed_size); } - if (a->flags & ATTR_IS_ENCRYPTED) { - if (a->flags & ATTR_COMPRESSION_MASK) { - ntfs_error(vi->i_sb, "Found encrypted " - "and compressed data."); - goto unm_err_out; - } - NInoSetEncrypted(ni); - } if (a->data.non_resident.lowest_vcn) { ntfs_error(vi->i_sb, "First extent of $DATA " "attribute has non zero " @@ -1212,6 +1213,75 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) if (unlikely(err)) goto unm_err_out; a = ctx->attr; + if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) { + if (a->flags & ATTR_COMPRESSION_MASK) { + NInoSetCompressed(ni); + if ((ni->type != AT_DATA) || (ni->type == AT_DATA && + ni->name_len)) { + ntfs_error(vi->i_sb, "Found compressed " + "non-data or named data " + "attribute. Please report " + "you saw this message to " + "linux-ntfs-dev@lists." + "sourceforge.net"); + goto unm_err_out; + } + if (vol->cluster_size > 4096) { + ntfs_error(vi->i_sb, "Found compressed " + "attribute but compression is " + "disabled due to cluster size " + "(%i) > 4kiB.", + vol->cluster_size); + goto unm_err_out; + } + if ((a->flags & ATTR_COMPRESSION_MASK) != + ATTR_IS_COMPRESSED) { + ntfs_error(vi->i_sb, "Found unknown " + "compression method."); + goto unm_err_out; + } + } + /* + * The encryption flag set in an index root just means to + * compress all files. + */ + if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) { + ntfs_error(vi->i_sb, "Found mst protected attribute " + "but the attribute is %s. Please " + "report you saw this message to " + "linux-ntfs-dev@lists.sourceforge.net", + NInoCompressed(ni) ? "compressed" : + "sparse"); + goto unm_err_out; + } + if (a->flags & ATTR_IS_SPARSE) + NInoSetSparse(ni); + } + if (a->flags & ATTR_IS_ENCRYPTED) { + if (NInoCompressed(ni)) { + ntfs_error(vi->i_sb, "Found encrypted and compressed " + "data."); + goto unm_err_out; + } + /* + * The encryption flag set in an index root just means to + * encrypt all files. + */ + if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) { + ntfs_error(vi->i_sb, "Found mst protected attribute " + "but the attribute is encrypted. " + "Please report you saw this message " + "to linux-ntfs-dev@lists.sourceforge." + "net"); + goto unm_err_out; + } + if (ni->type != AT_DATA) { + ntfs_error(vi->i_sb, "Found encrypted non-data " + "attribute."); + goto unm_err_out; + } + NInoSetEncrypted(ni); + } if (!a->non_resident) { /* Ensure the attribute name is placed before the value. */ if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >= @@ -1220,11 +1290,10 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) "the attribute value."); goto unm_err_out; } - if (NInoMstProtected(ni) || a->flags) { + if (NInoMstProtected(ni)) { ntfs_error(vi->i_sb, "Found mst protected attribute " - "or attribute with non-zero flags but " - "the attribute is resident. Please " - "report you saw this message to " + "but the attribute is resident. " + "Please report you saw this message to " "linux-ntfs-dev@lists.sourceforge.net"); goto unm_err_out; } @@ -1250,50 +1319,8 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) "the mapping pairs array."); goto unm_err_out; } - if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) { - if (a->flags & ATTR_COMPRESSION_MASK) { - NInoSetCompressed(ni); - if ((ni->type != AT_DATA) || (ni->type == - AT_DATA && ni->name_len)) { - ntfs_error(vi->i_sb, "Found compressed " - "non-data or named " - "data attribute. " - "Please report you " - "saw this message to " - "linux-ntfs-dev@lists." - "sourceforge.net"); - goto unm_err_out; - } - if (vol->cluster_size > 4096) { - ntfs_error(vi->i_sb, "Found compressed " - "attribute but " - "compression is " - "disabled due to " - "cluster size (%i) > " - "4kiB.", - vol->cluster_size); - goto unm_err_out; - } - if ((a->flags & ATTR_COMPRESSION_MASK) != - ATTR_IS_COMPRESSED) { - ntfs_error(vi->i_sb, "Found unknown " - "compression method."); - goto unm_err_out; - } - } - if (NInoMstProtected(ni)) { - ntfs_error(vi->i_sb, "Found mst protected " - "attribute but the attribute " - "is %s. Please report you " - "saw this message to " - "linux-ntfs-dev@lists." - "sourceforge.net", - NInoCompressed(ni) ? - "compressed" : "sparse"); - goto unm_err_out; - } - if (a->flags & ATTR_IS_SPARSE) - NInoSetSparse(ni); + if ((NInoCompressed(ni) || NInoSparse(ni)) && + ni->type != AT_INDEX_ROOT) { if (a->data.non_resident.compression_unit != 4) { ntfs_error(vi->i_sb, "Found nonstandard " "compression unit (%u instead " @@ -1313,23 +1340,6 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) ni->itype.compressed.size = sle64_to_cpu( a->data.non_resident.compressed_size); } - if (a->flags & ATTR_IS_ENCRYPTED) { - if (a->flags & ATTR_COMPRESSION_MASK) { - ntfs_error(vi->i_sb, "Found encrypted and " - "compressed data."); - goto unm_err_out; - } - if (NInoMstProtected(ni)) { - ntfs_error(vi->i_sb, "Found mst protected " - "attribute but the attribute " - "is encrypted. Please report " - "you saw this message to " - "linux-ntfs-dev@lists." - "sourceforge.net"); - goto unm_err_out; - } - NInoSetEncrypted(ni); - } if (a->data.non_resident.lowest_vcn) { ntfs_error(vi->i_sb, "First extent of attribute has " "non-zero lowest_vcn."); @@ -1348,12 +1358,12 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) vi->i_mapping->a_ops = &ntfs_mst_aops; else vi->i_mapping->a_ops = &ntfs_aops; - if (NInoCompressed(ni) || NInoSparse(ni)) + if ((NInoCompressed(ni) || NInoSparse(ni)) && ni->type != AT_INDEX_ROOT) vi->i_blocks = ni->itype.compressed.size >> 9; else vi->i_blocks = ni->allocated_size >> 9; /* - * Make sure the base inode doesn't go away and attach it to the + * Make sure the base inode does not go away and attach it to the * attribute inode. */ igrab(base_vi); @@ -1480,7 +1490,10 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi) "after the attribute value."); goto unm_err_out; } - /* Compressed/encrypted/sparse index root is not allowed. */ + /* + * Compressed/encrypted/sparse index root is not allowed, except for + * directories of course but those are not dealt with here. + */ if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED | ATTR_IS_SPARSE)) { ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index " @@ -2430,16 +2443,18 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr) * We skipped the truncate but must still update * timestamps. */ - ia_valid |= ATTR_MTIME|ATTR_CTIME; + ia_valid |= ATTR_MTIME | ATTR_CTIME; } } - if (ia_valid & ATTR_ATIME) - vi->i_atime = attr->ia_atime; + vi->i_atime = timespec_trunc(attr->ia_atime, + vi->i_sb->s_time_gran); if (ia_valid & ATTR_MTIME) - vi->i_mtime = attr->ia_mtime; + vi->i_mtime = timespec_trunc(attr->ia_mtime, + vi->i_sb->s_time_gran); if (ia_valid & ATTR_CTIME) - vi->i_ctime = attr->ia_ctime; + vi->i_ctime = timespec_trunc(attr->ia_ctime, + vi->i_sb->s_time_gran); mark_inode_dirty(vi); out: return err; diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c index a4bc07616e5..7b593429068 100644 --- a/fs/ntfs/lcnalloc.c +++ b/fs/ntfs/lcnalloc.c @@ -54,6 +54,8 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, int ret = 0; ntfs_debug("Entering."); + if (!rl) + return 0; for (; rl->length; rl++) { int err; @@ -163,17 +165,9 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn, BUG_ON(zone < FIRST_ZONE); BUG_ON(zone > LAST_ZONE); - /* Return empty runlist if @count == 0 */ - // FIXME: Do we want to just return NULL instead? (AIA) - if (!count) { - rl = ntfs_malloc_nofs(PAGE_SIZE); - if (!rl) - return ERR_PTR(-ENOMEM); - rl[0].vcn = start_vcn; - rl[0].lcn = LCN_RL_NOT_MAPPED; - rl[0].length = 0; - return rl; - } + /* Return NULL if @count is zero. */ + if (!count) + return NULL; /* Take the lcnbmp lock for writing. */ down_write(&vol->lcnbmp_lock); /* @@ -788,7 +782,8 @@ out: * @vi: vfs inode whose runlist describes the clusters to free * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters * @count: number of clusters to free or -1 for all clusters - * @is_rollback: if TRUE this is a rollback operation + * @write_locked: true if the runlist is locked for writing + * @is_rollback: true if this is a rollback operation * * Free @count clusters starting at the cluster @start_vcn in the runlist * described by the vfs inode @vi. @@ -806,17 +801,17 @@ out: * Return the number of deallocated clusters (not counting sparse ones) on * success and -errno on error. * - * Locking: - The runlist described by @vi must be unlocked on entry and is - * unlocked on return. - * - This function takes the runlist lock of @vi for reading and - * sometimes for writing and sometimes modifies the runlist. + * Locking: - The runlist described by @vi must be locked on entry and is + * locked on return. Note if the runlist is locked for reading the + * lock may be dropped and reacquired. Note the runlist may be + * modified when needed runlist fragments need to be mapped. * - The volume lcn bitmap must be unlocked on entry and is unlocked * on return. * - This function takes the volume lcn bitmap lock for writing and * modifies the bitmap contents. */ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, - const BOOL is_rollback) + const BOOL write_locked, const BOOL is_rollback) { s64 delta, to_free, total_freed, real_freed; ntfs_inode *ni; @@ -848,8 +843,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, total_freed = real_freed = 0; - down_read(&ni->runlist.lock); - rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, FALSE); + rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, write_locked); if (IS_ERR(rl)) { if (!is_rollback) ntfs_error(vol->sb, "Failed to find first runlist " @@ -903,7 +897,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, /* Attempt to map runlist. */ vcn = rl->vcn; - rl = ntfs_attr_find_vcn_nolock(ni, vcn, FALSE); + rl = ntfs_attr_find_vcn_nolock(ni, vcn, write_locked); if (IS_ERR(rl)) { err = PTR_ERR(rl); if (!is_rollback) @@ -950,7 +944,6 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, /* Update the total done clusters. */ total_freed += to_free; } - up_read(&ni->runlist.lock); if (likely(!is_rollback)) up_write(&vol->lcnbmp_lock); @@ -960,7 +953,6 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count, ntfs_debug("Done."); return real_freed; err_out: - up_read(&ni->runlist.lock); if (is_rollback) return err; /* If no real clusters were freed, no need to rollback. */ @@ -973,7 +965,8 @@ err_out: * If rollback fails, set the volume errors flag, emit an error * message, and return the error code. */ - delta = __ntfs_cluster_free(vi, start_vcn, total_freed, TRUE); + delta = __ntfs_cluster_free(vi, start_vcn, total_freed, write_locked, + TRUE); if (delta < 0) { ntfs_error(vol->sb, "Failed to rollback (error %i). Leaving " "inconsistent metadata! Unmount and run " diff --git a/fs/ntfs/lcnalloc.h b/fs/ntfs/lcnalloc.h index 4cac1c024af..e4d7fb98d68 100644 --- a/fs/ntfs/lcnalloc.h +++ b/fs/ntfs/lcnalloc.h @@ -43,13 +43,14 @@ extern runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const NTFS_CLUSTER_ALLOCATION_ZONES zone); extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, - s64 count, const BOOL is_rollback); + s64 count, const BOOL write_locked, const BOOL is_rollback); /** * ntfs_cluster_free - free clusters on an ntfs volume * @vi: vfs inode whose runlist describes the clusters to free * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters * @count: number of clusters to free or -1 for all clusters + * @write_locked: true if the runlist is locked for writing * * Free @count clusters starting at the cluster @start_vcn in the runlist * described by the vfs inode @vi. @@ -64,19 +65,19 @@ extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, * Return the number of deallocated clusters (not counting sparse ones) on * success and -errno on error. * - * Locking: - The runlist described by @vi must be unlocked on entry and is - * unlocked on return. - * - This function takes the runlist lock of @vi for reading and - * sometimes for writing and sometimes modifies the runlist. + * Locking: - The runlist described by @vi must be locked on entry and is + * locked on return. Note if the runlist is locked for reading the + * lock may be dropped and reacquired. Note the runlist may be + * modified when needed runlist fragments need to be mapped. * - The volume lcn bitmap must be unlocked on entry and is unlocked * on return. * - This function takes the volume lcn bitmap lock for writing and * modifies the bitmap contents. */ static inline s64 ntfs_cluster_free(struct inode *vi, const VCN start_vcn, - s64 count) + s64 count, const BOOL write_locked) { - return __ntfs_cluster_free(vi, start_vcn, count, FALSE); + return __ntfs_cluster_free(vi, start_vcn, count, write_locked, FALSE); } extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, @@ -93,8 +94,10 @@ extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, * * Return 0 on success and -errno on error. * - * Locking: This function takes the volume lcn bitmap lock for writing and - * modifies the bitmap contents. + * Locking: - This function takes the volume lcn bitmap lock for writing and + * modifies the bitmap contents. + * - The caller must have locked the runlist @rl for reading or + * writing. */ static inline int ntfs_cluster_free_from_rl(ntfs_volume *vol, const runlist_element *rl) diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c index 8edb8e20fb0..0173e95500d 100644 --- a/fs/ntfs/logfile.c +++ b/fs/ntfs/logfile.c @@ -121,7 +121,7 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi, */ if (!ntfs_is_chkd_record(rp->magic) && sle64_to_cpu(rp->chkdsk_lsn)) { ntfs_error(vi->i_sb, "$LogFile restart page is not modified " - "chkdsk but a chkdsk LSN is specified."); + "by chkdsk but a chkdsk LSN is specified."); return FALSE; } ntfs_debug("Done."); @@ -312,10 +312,12 @@ err_out: * @vi: $LogFile inode to which the restart page belongs * @rp: restart page to check * @pos: position in @vi at which the restart page resides - * @wrp: copy of the multi sector transfer deprotected restart page + * @wrp: [OUT] copy of the multi sector transfer deprotected restart page + * @lsn: [OUT] set to the current logfile lsn on success * - * Check the restart page @rp for consistency and return TRUE if it is - * consistent and FALSE otherwise. + * Check the restart page @rp for consistency and return 0 if it is consistent + * and -errno otherwise. The restart page may have been modified by chkdsk in + * which case its magic is CHKD instead of RSTR. * * This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not * require the full restart page. @@ -323,25 +325,33 @@ err_out: * If @wrp is not NULL, on success, *@wrp will point to a buffer containing a * copy of the complete multi sector transfer deprotected page. On failure, * *@wrp is undefined. + * + * Simillarly, if @lsn is not NULL, on succes *@lsn will be set to the current + * logfile lsn according to this restart page. On failure, *@lsn is undefined. + * + * The following error codes are defined: + * -EINVAL - The restart page is inconsistent. + * -ENOMEM - Not enough memory to load the restart page. + * -EIO - Failed to reading from $LogFile. */ -static BOOL ntfs_check_and_load_restart_page(struct inode *vi, - RESTART_PAGE_HEADER *rp, s64 pos, RESTART_PAGE_HEADER **wrp) +static int ntfs_check_and_load_restart_page(struct inode *vi, + RESTART_PAGE_HEADER *rp, s64 pos, RESTART_PAGE_HEADER **wrp, + LSN *lsn) { RESTART_AREA *ra; RESTART_PAGE_HEADER *trp; - int size; - BOOL ret; + int size, err; ntfs_debug("Entering."); /* Check the restart page header for consistency. */ if (!ntfs_check_restart_page_header(vi, rp, pos)) { /* Error output already done inside the function. */ - return FALSE; + return -EINVAL; } /* Check the restart area for consistency. */ if (!ntfs_check_restart_area(vi, rp)) { /* Error output already done inside the function. */ - return FALSE; + return -EINVAL; } ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset)); /* @@ -352,7 +362,7 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi, if (!trp) { ntfs_error(vi->i_sb, "Failed to allocate memory for $LogFile " "restart page buffer."); - return FALSE; + return -ENOMEM; } /* * Read the whole of the restart page into the buffer. If it fits @@ -379,6 +389,9 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi, if (IS_ERR(page)) { ntfs_error(vi->i_sb, "Error mapping $LogFile " "page (index %lu).", idx); + err = PTR_ERR(page); + if (err != -EIO && err != -ENOMEM) + err = -EIO; goto err_out; } size = min_t(int, to_read, PAGE_CACHE_SIZE); @@ -392,29 +405,57 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi, /* Perform the multi sector transfer deprotection on the buffer. */ if (post_read_mst_fixup((NTFS_RECORD*)trp, le32_to_cpu(rp->system_page_size))) { - ntfs_error(vi->i_sb, "Multi sector transfer error detected in " - "$LogFile restart page."); - goto err_out; + /* + * A multi sector tranfer error was detected. We only need to + * abort if the restart page contents exceed the multi sector + * transfer fixup of the first sector. + */ + if (le16_to_cpu(rp->restart_area_offset) + + le16_to_cpu(ra->restart_area_length) > + NTFS_BLOCK_SIZE - sizeof(u16)) { + ntfs_error(vi->i_sb, "Multi sector transfer error " + "detected in $LogFile restart page."); + err = -EINVAL; + goto err_out; + } + } + /* + * If the restart page is modified by chkdsk or there are no active + * logfile clients, the logfile is consistent. Otherwise, need to + * check the log client records for consistency, too. + */ + err = 0; + if (ntfs_is_rstr_record(rp->magic) && + ra->client_in_use_list != LOGFILE_NO_CLIENT) { + if (!ntfs_check_log_client_array(vi, trp)) { + err = -EINVAL; + goto err_out; + } + } + if (lsn) { + if (ntfs_is_rstr_record(rp->magic)) + *lsn = sle64_to_cpu(ra->current_lsn); + else /* if (ntfs_is_chkd_record(rp->magic)) */ + *lsn = sle64_to_cpu(rp->chkdsk_lsn); } - /* Check the log client records for consistency. */ - ret = ntfs_check_log_client_array(vi, trp); - if (ret && wrp) - *wrp = trp; - else - ntfs_free(trp); ntfs_debug("Done."); - return ret; + if (wrp) + *wrp = trp; + else { err_out: - ntfs_free(trp); - return FALSE; + ntfs_free(trp); + } + return err; } /** * ntfs_check_logfile - check the journal for consistency * @log_vi: struct inode of loaded journal $LogFile to check + * @rp: [OUT] on success this is a copy of the current restart page * * Check the $LogFile journal for consistency and return TRUE if it is - * consistent and FALSE if not. + * consistent and FALSE if not. On success, the current restart page is + * returned in *@rp. Caller must call ntfs_free(*@rp) when finished with it. * * At present we only check the two restart pages and ignore the log record * pages. @@ -424,19 +465,18 @@ err_out: * if the $LogFile was created on a system with a different page size to ours * yet and mst deprotection would fail if our page size is smaller. */ -BOOL ntfs_check_logfile(struct inode *log_vi) +BOOL ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp) { - s64 size, pos, rstr1_pos, rstr2_pos; + s64 size, pos; + LSN rstr1_lsn, rstr2_lsn; ntfs_volume *vol = NTFS_SB(log_vi->i_sb); struct address_space *mapping = log_vi->i_mapping; struct page *page = NULL; u8 *kaddr = NULL; RESTART_PAGE_HEADER *rstr1_ph = NULL; RESTART_PAGE_HEADER *rstr2_ph = NULL; - int log_page_size, log_page_mask, ofs; + int log_page_size, log_page_mask, err; BOOL logfile_is_empty = TRUE; - BOOL rstr1_found = FALSE; - BOOL rstr2_found = FALSE; u8 log_page_bits; ntfs_debug("Entering."); @@ -491,7 +531,7 @@ BOOL ntfs_check_logfile(struct inode *log_vi) if (IS_ERR(page)) { ntfs_error(vol->sb, "Error mapping $LogFile " "page (index %lu).", idx); - return FALSE; + goto err_out; } } kaddr = (u8*)page_address(page) + (pos & ~PAGE_CACHE_MASK); @@ -510,99 +550,95 @@ BOOL ntfs_check_logfile(struct inode *log_vi) */ if (ntfs_is_rcrd_recordp((le32*)kaddr)) break; - /* - * A modified by chkdsk restart page means we cannot handle - * this log file. - */ - if (ntfs_is_chkd_recordp((le32*)kaddr)) { - ntfs_error(vol->sb, "$LogFile has been modified by " - "chkdsk. Mount this volume in " - "Windows."); - goto err_out; - } - /* If not a restart page, continue. */ - if (!ntfs_is_rstr_recordp((le32*)kaddr)) { - /* Skip to the minimum page size for the next one. */ + /* If not a (modified by chkdsk) restart page, continue. */ + if (!ntfs_is_rstr_recordp((le32*)kaddr) && + !ntfs_is_chkd_recordp((le32*)kaddr)) { if (!pos) pos = NTFS_BLOCK_SIZE >> 1; continue; } - /* We now know we have a restart page. */ - if (!pos) { - rstr1_found = TRUE; - rstr1_pos = pos; - } else { - if (rstr2_found) { - ntfs_error(vol->sb, "Found more than two " - "restart pages in $LogFile."); - goto err_out; - } - rstr2_found = TRUE; - rstr2_pos = pos; - } /* - * Check the restart page for consistency and get a copy of the - * complete multi sector transfer deprotected restart page. + * Check the (modified by chkdsk) restart page for consistency + * and get a copy of the complete multi sector transfer + * deprotected restart page. */ - if (!ntfs_check_and_load_restart_page(log_vi, + err = ntfs_check_and_load_restart_page(log_vi, (RESTART_PAGE_HEADER*)kaddr, pos, - !pos ? &rstr1_ph : &rstr2_ph)) { - /* Error output already done inside the function. */ - goto err_out; + !rstr1_ph ? &rstr1_ph : &rstr2_ph, + !rstr1_ph ? &rstr1_lsn : &rstr2_lsn); + if (!err) { + /* + * If we have now found the first (modified by chkdsk) + * restart page, continue looking for the second one. + */ + if (!pos) { + pos = NTFS_BLOCK_SIZE >> 1; + continue; + } + /* + * We have now found the second (modified by chkdsk) + * restart page, so we can stop looking. + */ + break; } /* - * We have a valid restart page. The next one must be after - * a whole system page size as specified by the valid restart - * page. + * Error output already done inside the function. Note, we do + * not abort if the restart page was invalid as we might still + * find a valid one further in the file. */ + if (err != -EINVAL) { + ntfs_unmap_page(page); + goto err_out; + } + /* Continue looking. */ if (!pos) - pos = le32_to_cpu(rstr1_ph->system_page_size) >> 1; + pos = NTFS_BLOCK_SIZE >> 1; } - if (page) { + if (page) ntfs_unmap_page(page); - page = NULL; - } if (logfile_is_empty) { NVolSetLogFileEmpty(vol); is_empty: ntfs_debug("Done. ($LogFile is empty.)"); return TRUE; } - if (!rstr1_found || !rstr2_found) { - ntfs_error(vol->sb, "Did not find two restart pages in " - "$LogFile."); - goto err_out; + if (!rstr1_ph) { + BUG_ON(rstr2_ph); + ntfs_error(vol->sb, "Did not find any restart pages in " + "$LogFile and it was not empty."); + return FALSE; + } + /* If both restart pages were found, use the more recent one. */ + if (rstr2_ph) { + /* + * If the second restart area is more recent, switch to it. + * Otherwise just throw it away. + */ + if (rstr2_lsn > rstr1_lsn) { + ntfs_free(rstr1_ph); + rstr1_ph = rstr2_ph; + /* rstr1_lsn = rstr2_lsn; */ + } else + ntfs_free(rstr2_ph); + rstr2_ph = NULL; } - /* - * The two restart areas must be identical except for the update - * sequence number. - */ - ofs = le16_to_cpu(rstr1_ph->usa_ofs); - if (memcmp(rstr1_ph, rstr2_ph, ofs) || (ofs += sizeof(u16), - memcmp((u8*)rstr1_ph + ofs, (u8*)rstr2_ph + ofs, - le32_to_cpu(rstr1_ph->system_page_size) - ofs))) { - ntfs_error(vol->sb, "The two restart pages in $LogFile do not " - "match."); - goto err_out; - } - ntfs_free(rstr1_ph); - ntfs_free(rstr2_ph); /* All consistency checks passed. */ + if (rp) + *rp = rstr1_ph; + else + ntfs_free(rstr1_ph); ntfs_debug("Done."); return TRUE; err_out: - if (page) - ntfs_unmap_page(page); if (rstr1_ph) ntfs_free(rstr1_ph); - if (rstr2_ph) - ntfs_free(rstr2_ph); return FALSE; } /** * ntfs_is_logfile_clean - check in the journal if the volume is clean * @log_vi: struct inode of loaded journal $LogFile to check + * @rp: copy of the current restart page * * Analyze the $LogFile journal and return TRUE if it indicates the volume was * shutdown cleanly and FALSE if not. @@ -619,11 +655,9 @@ err_out: * is empty this function requires that NVolLogFileEmpty() is true otherwise an * empty volume will be reported as dirty. */ -BOOL ntfs_is_logfile_clean(struct inode *log_vi) +BOOL ntfs_is_logfile_clean(struct inode *log_vi, const RESTART_PAGE_HEADER *rp) { ntfs_volume *vol = NTFS_SB(log_vi->i_sb); - struct page *page; - RESTART_PAGE_HEADER *rp; RESTART_AREA *ra; ntfs_debug("Entering."); @@ -632,24 +666,15 @@ BOOL ntfs_is_logfile_clean(struct inode *log_vi) ntfs_debug("Done. ($LogFile is empty.)"); return TRUE; } - /* - * Read the first restart page. It will be possibly incomplete and - * will not be multi sector transfer deprotected but we only need the - * first NTFS_BLOCK_SIZE bytes so it does not matter. - */ - page = ntfs_map_page(log_vi->i_mapping, 0); - if (IS_ERR(page)) { - ntfs_error(vol->sb, "Error mapping $LogFile page (index 0)."); + BUG_ON(!rp); + if (!ntfs_is_rstr_record(rp->magic) && + !ntfs_is_chkd_record(rp->magic)) { + ntfs_error(vol->sb, "Restart page buffer is invalid. This is " + "probably a bug in that the $LogFile should " + "have been consistency checked before calling " + "this function."); return FALSE; } - rp = (RESTART_PAGE_HEADER*)page_address(page); - if (!ntfs_is_rstr_record(rp->magic)) { - ntfs_error(vol->sb, "No restart page found at offset zero in " - "$LogFile. This is probably a bug in that " - "the $LogFile should have been consistency " - "checked before calling this function."); - goto err_out; - } ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset)); /* * If the $LogFile has active clients, i.e. it is open, and we do not @@ -659,15 +684,11 @@ BOOL ntfs_is_logfile_clean(struct inode *log_vi) if (ra->client_in_use_list != LOGFILE_NO_CLIENT && !(ra->flags & RESTART_VOLUME_IS_CLEAN)) { ntfs_debug("Done. $LogFile indicates a dirty shutdown."); - goto err_out; + return FALSE; } - ntfs_unmap_page(page); /* $LogFile indicates a clean shutdown. */ ntfs_debug("Done. $LogFile indicates a clean shutdown."); return TRUE; -err_out: - ntfs_unmap_page(page); - return FALSE; } /** diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h index 4ee4378de06..42388f95ea6 100644 --- a/fs/ntfs/logfile.h +++ b/fs/ntfs/logfile.h @@ -2,7 +2,7 @@ * logfile.h - Defines for NTFS kernel journal ($LogFile) handling. Part of * the Linux-NTFS project. * - * Copyright (c) 2000-2004 Anton Altaparmakov + * Copyright (c) 2000-2005 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -296,9 +296,11 @@ typedef struct { /* sizeof() = 160 (0xa0) bytes */ } __attribute__ ((__packed__)) LOG_CLIENT_RECORD; -extern BOOL ntfs_check_logfile(struct inode *log_vi); +extern BOOL ntfs_check_logfile(struct inode *log_vi, + RESTART_PAGE_HEADER **rp); -extern BOOL ntfs_is_logfile_clean(struct inode *log_vi); +extern BOOL ntfs_is_logfile_clean(struct inode *log_vi, + const RESTART_PAGE_HEADER *rp); extern BOOL ntfs_empty_logfile(struct inode *log_vi); diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index fac5944df6d..9994e019a3c 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h @@ -27,27 +27,63 @@ #include <linux/highmem.h> /** - * ntfs_malloc_nofs - allocate memory in multiples of pages - * @size number of bytes to allocate + * __ntfs_malloc - allocate memory in multiples of pages + * @size: number of bytes to allocate + * @gfp_mask: extra flags for the allocator + * + * Internal function. You probably want ntfs_malloc_nofs()... * * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and * returns a pointer to the allocated memory. * * If there was insufficient memory to complete the request, return NULL. + * Depending on @gfp_mask the allocation may be guaranteed to succeed. */ -static inline void *ntfs_malloc_nofs(unsigned long size) +static inline void *__ntfs_malloc(unsigned long size, + unsigned int __nocast gfp_mask) { if (likely(size <= PAGE_SIZE)) { BUG_ON(!size); /* kmalloc() has per-CPU caches so is faster for now. */ - return kmalloc(PAGE_SIZE, GFP_NOFS); - /* return (void *)__get_free_page(GFP_NOFS | __GFP_HIGHMEM); */ + return kmalloc(PAGE_SIZE, gfp_mask); + /* return (void *)__get_free_page(gfp_mask); */ } if (likely(size >> PAGE_SHIFT < num_physpages)) - return __vmalloc(size, GFP_NOFS | __GFP_HIGHMEM, PAGE_KERNEL); + return __vmalloc(size, gfp_mask, PAGE_KERNEL); return NULL; } +/** + * ntfs_malloc_nofs - allocate memory in multiples of pages + * @size: number of bytes to allocate + * + * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and + * returns a pointer to the allocated memory. + * + * If there was insufficient memory to complete the request, return NULL. + */ +static inline void *ntfs_malloc_nofs(unsigned long size) +{ + return __ntfs_malloc(size, GFP_NOFS | __GFP_HIGHMEM); +} + +/** + * ntfs_malloc_nofs_nofail - allocate memory in multiples of pages + * @size: number of bytes to allocate + * + * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and + * returns a pointer to the allocated memory. + * + * This function guarantees that the allocation will succeed. It will sleep + * for as long as it takes to complete the allocation. + * + * If there was insufficient memory to complete the request, return NULL. + */ +static inline void *ntfs_malloc_nofs_nofail(unsigned long size) +{ + return __ntfs_malloc(size, GFP_NOFS | __GFP_HIGHMEM | __GFP_NOFAIL); +} + static inline void ntfs_free(void *addr) { if (likely(((unsigned long)addr < VMALLOC_START) || diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 317f7c679fd..2c32b84385a 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -511,7 +511,6 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, } while (bh); tail->b_this_page = head; attach_page_buffers(page, head); - BUG_ON(!page_has_buffers(page)); } bh = head = page_buffers(page); BUG_ON(!bh); @@ -692,7 +691,6 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) */ if (!NInoTestClearDirty(ni)) goto done; - BUG_ON(!page_has_buffers(page)); bh = head = page_buffers(page); BUG_ON(!bh); rl = NULL; @@ -1955,7 +1953,7 @@ restore_undo_alloc: a = ctx->attr; a->data.non_resident.highest_vcn = cpu_to_sle64(old_last_vcn - 1); undo_alloc: - if (ntfs_cluster_free(vol->mft_ino, old_last_vcn, -1) < 0) { + if (ntfs_cluster_free(vol->mft_ino, old_last_vcn, -1, TRUE) < 0) { ntfs_error(vol->sb, "Failed to free clusters from mft data " "attribute.%s", es); NVolSetErrors(vol); diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c index 758855b0414..f5b2ac92908 100644 --- a/fs/ntfs/runlist.c +++ b/fs/ntfs/runlist.c @@ -35,7 +35,7 @@ static inline void ntfs_rl_mm(runlist_element *base, int dst, int src, int size) { if (likely((dst != src) && (size > 0))) - memmove(base + dst, base + src, size * sizeof (*base)); + memmove(base + dst, base + src, size * sizeof(*base)); } /** @@ -95,6 +95,51 @@ static inline runlist_element *ntfs_rl_realloc(runlist_element *rl, } /** + * ntfs_rl_realloc_nofail - Reallocate memory for runlists + * @rl: original runlist + * @old_size: number of runlist elements in the original runlist @rl + * @new_size: number of runlist elements we need space for + * + * As the runlists grow, more memory will be required. To prevent the + * kernel having to allocate and reallocate large numbers of small bits of + * memory, this function returns an entire page of memory. + * + * This function guarantees that the allocation will succeed. It will sleep + * for as long as it takes to complete the allocation. + * + * It is up to the caller to serialize access to the runlist @rl. + * + * N.B. If the new allocation doesn't require a different number of pages in + * memory, the function will return the original pointer. + * + * On success, return a pointer to the newly allocated, or recycled, memory. + * On error, return -errno. The following error codes are defined: + * -ENOMEM - Not enough memory to allocate runlist array. + * -EINVAL - Invalid parameters were passed in. + */ +static inline runlist_element *ntfs_rl_realloc_nofail(runlist_element *rl, + int old_size, int new_size) +{ + runlist_element *new_rl; + + old_size = PAGE_ALIGN(old_size * sizeof(*rl)); + new_size = PAGE_ALIGN(new_size * sizeof(*rl)); + if (old_size == new_size) + return rl; + + new_rl = ntfs_malloc_nofs_nofail(new_size); + BUG_ON(!new_rl); + + if (likely(rl != NULL)) { + if (unlikely(old_size > new_size)) + old_size = new_size; + memcpy(new_rl, rl, old_size); + ntfs_free(rl); + } + return new_rl; +} + +/** * ntfs_are_rl_mergeable - test if two runlists can be joined together * @dst: original runlist * @src: new runlist to test for mergeability with @dst @@ -497,6 +542,7 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl, /* Scan to the end of the source runlist. */ for (dend = 0; likely(drl[dend].length); dend++) ; + dend++; drl = ntfs_rl_realloc(drl, dend, dend + 1); if (IS_ERR(drl)) return drl; @@ -566,8 +612,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl, ((drl[dins].vcn + drl[dins].length) <= /* End of hole */ (srl[send - 1].vcn + srl[send - 1].length))); - /* Or we'll lose an end marker */ - if (start && finish && (drl[dins].length == 0)) + /* Or we will lose an end marker. */ + if (finish && !drl[dins].length) ss++; if (marker && (drl[dins].vcn + drl[dins].length > srl[send - 1].vcn)) finish = FALSE; @@ -621,11 +667,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl, if (drl[ds].lcn != LCN_RL_NOT_MAPPED) { /* Add an unmapped runlist element. */ if (!slots) { - /* FIXME/TODO: We need to have the - * extra memory already! (AIA) */ - drl = ntfs_rl_realloc(drl, ds, ds + 2); - if (!drl) - goto critical_error; + drl = ntfs_rl_realloc_nofail(drl, ds, + ds + 2); slots = 2; } ds++; @@ -640,13 +683,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl, drl[ds].length = marker_vcn - drl[ds].vcn; /* Finally add the ENOENT terminator. */ ds++; - if (!slots) { - /* FIXME/TODO: We need to have the extra - * memory already! (AIA) */ - drl = ntfs_rl_realloc(drl, ds, ds + 1); - if (!drl) - goto critical_error; - } + if (!slots) + drl = ntfs_rl_realloc_nofail(drl, ds, ds + 1); drl[ds].vcn = marker_vcn; drl[ds].lcn = LCN_ENOENT; drl[ds].length = (s64)0; @@ -659,11 +697,6 @@ finished: ntfs_debug("Merged runlist:"); ntfs_debug_dump_runlist(drl); return drl; - -critical_error: - /* Critical error! We cannot afford to fail here. */ - ntfs_error(NULL, "Critical error! Not enough memory."); - panic("NTFS: Cannot continue."); } /** @@ -727,6 +760,9 @@ runlist_element *ntfs_mapping_pairs_decompress(const ntfs_volume *vol, ntfs_error(vol->sb, "Corrupt attribute."); return ERR_PTR(-EIO); } + /* If the mapping pairs array is valid but empty, nothing to do. */ + if (!vcn && !*buf) + return old_rl; /* Current position in runlist array. */ rlpos = 0; /* Allocate first page and set current runlist size to one page. */ @@ -1419,6 +1455,7 @@ err_out: /** * ntfs_rl_truncate_nolock - truncate a runlist starting at a specified vcn + * @vol: ntfs volume (needed for error output) * @runlist: runlist to truncate * @new_length: the new length of the runlist in VCNs * @@ -1426,12 +1463,16 @@ err_out: * holding the runlist elements to a length of @new_length VCNs. * * If @new_length lies within the runlist, the runlist elements with VCNs of - * @new_length and above are discarded. + * @new_length and above are discarded. As a special case if @new_length is + * zero, the runlist is discarded and set to NULL. * * If @new_length lies beyond the runlist, a sparse runlist element is added to * the end of the runlist @runlist or if the last runlist element is a sparse * one already, this is extended. * + * Note, no checking is done for unmapped runlist elements. It is assumed that + * the caller has mapped any elements that need to be mapped already. + * * Return 0 on success and -errno on error. * * Locking: The caller must hold @runlist->lock for writing. @@ -1446,6 +1487,13 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist, BUG_ON(!runlist); BUG_ON(new_length < 0); rl = runlist->rl; + if (!new_length) { + ntfs_debug("Freeing runlist."); + runlist->rl = NULL; + if (rl) + ntfs_free(rl); + return 0; + } if (unlikely(!rl)) { /* * Create a runlist consisting of a sparse runlist element of @@ -1553,4 +1601,288 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist, return 0; } +/** + * ntfs_rl_punch_nolock - punch a hole into a runlist + * @vol: ntfs volume (needed for error output) + * @runlist: runlist to punch a hole into + * @start: starting VCN of the hole to be created + * @length: size of the hole to be created in units of clusters + * + * Punch a hole into the runlist @runlist starting at VCN @start and of size + * @length clusters. + * + * Return 0 on success and -errno on error, in which case @runlist has not been + * modified. + * + * If @start and/or @start + @length are outside the runlist return error code + * -ENOENT. + * + * If the runlist contains unmapped or error elements between @start and @start + * + @length return error code -EINVAL. + * + * Locking: The caller must hold @runlist->lock for writing. + */ +int ntfs_rl_punch_nolock(const ntfs_volume *vol, runlist *const runlist, + const VCN start, const s64 length) +{ + const VCN end = start + length; + s64 delta; + runlist_element *rl, *rl_end, *rl_real_end, *trl; + int old_size; + BOOL lcn_fixup = FALSE; + + ntfs_debug("Entering for start 0x%llx, length 0x%llx.", + (long long)start, (long long)length); + BUG_ON(!runlist); + BUG_ON(start < 0); + BUG_ON(length < 0); + BUG_ON(end < 0); + rl = runlist->rl; + if (unlikely(!rl)) { + if (likely(!start && !length)) + return 0; + return -ENOENT; + } + /* Find @start in the runlist. */ + while (likely(rl->length && start >= rl[1].vcn)) + rl++; + rl_end = rl; + /* Find @end in the runlist. */ + while (likely(rl_end->length && end >= rl_end[1].vcn)) { + /* Verify there are no unmapped or error elements. */ + if (unlikely(rl_end->lcn < LCN_HOLE)) + return -EINVAL; + rl_end++; + } + /* Check the last element. */ + if (unlikely(rl_end->length && rl_end->lcn < LCN_HOLE)) + return -EINVAL; + /* This covers @start being out of bounds, too. */ + if (!rl_end->length && end > rl_end->vcn) + return -ENOENT; + if (!length) + return 0; + if (!rl->length) + return -ENOENT; + rl_real_end = rl_end; + /* Determine the runlist size. */ + while (likely(rl_real_end->length)) + rl_real_end++; + old_size = rl_real_end - runlist->rl + 1; + /* If @start is in a hole simply extend the hole. */ + if (rl->lcn == LCN_HOLE) { + /* + * If both @start and @end are in the same sparse run, we are + * done. + */ + if (end <= rl[1].vcn) { + ntfs_debug("Done (requested hole is already sparse)."); + return 0; + } +extend_hole: + /* Extend the hole. */ + rl->length = end - rl->vcn; + /* If @end is in a hole, merge it with the current one. */ + if (rl_end->lcn == LCN_HOLE) { + rl_end++; + rl->length = rl_end->vcn - rl->vcn; + } + /* We have done the hole. Now deal with the remaining tail. */ + rl++; + /* Cut out all runlist elements up to @end. */ + if (rl < rl_end) + memmove(rl, rl_end, (rl_real_end - rl_end + 1) * + sizeof(*rl)); + /* Adjust the beginning of the tail if necessary. */ + if (end > rl->vcn) { + s64 delta = end - rl->vcn; + rl->vcn = end; + rl->length -= delta; + /* Only adjust the lcn if it is real. */ + if (rl->lcn >= 0) + rl->lcn += delta; + } +shrink_allocation: + /* Reallocate memory if the allocation changed. */ + if (rl < rl_end) { + rl = ntfs_rl_realloc(runlist->rl, old_size, + old_size - (rl_end - rl)); + if (IS_ERR(rl)) + ntfs_warning(vol->sb, "Failed to shrink " + "runlist buffer. This just " + "wastes a bit of memory " + "temporarily so we ignore it " + "and return success."); + else + runlist->rl = rl; + } + ntfs_debug("Done (extend hole)."); + return 0; + } + /* + * If @start is at the beginning of a run things are easier as there is + * no need to split the first run. + */ + if (start == rl->vcn) { + /* + * @start is at the beginning of a run. + * + * If the previous run is sparse, extend its hole. + * + * If @end is not in the same run, switch the run to be sparse + * and extend the newly created hole. + * + * Thus both of these cases reduce the problem to the above + * case of "@start is in a hole". + */ + if (rl > runlist->rl && (rl - 1)->lcn == LCN_HOLE) { + rl--; + goto extend_hole; + } + if (end >= rl[1].vcn) { + rl->lcn = LCN_HOLE; + goto extend_hole; + } + /* + * The final case is when @end is in the same run as @start. + * For this need to split the run into two. One run for the + * sparse region between the beginning of the old run, i.e. + * @start, and @end and one for the remaining non-sparse + * region, i.e. between @end and the end of the old run. + */ + trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 1); + if (IS_ERR(trl)) + goto enomem_out; + old_size++; + if (runlist->rl != trl) { + rl = trl + (rl - runlist->rl); + rl_end = trl + (rl_end - runlist->rl); + rl_real_end = trl + (rl_real_end - runlist->rl); + runlist->rl = trl; + } +split_end: + /* Shift all the runs up by one. */ + memmove(rl + 1, rl, (rl_real_end - rl + 1) * sizeof(*rl)); + /* Finally, setup the two split runs. */ + rl->lcn = LCN_HOLE; + rl->length = length; + rl++; + rl->vcn += length; + /* Only adjust the lcn if it is real. */ + if (rl->lcn >= 0 || lcn_fixup) + rl->lcn += length; + rl->length -= length; + ntfs_debug("Done (split one)."); + return 0; + } + /* + * @start is neither in a hole nor at the beginning of a run. + * + * If @end is in a hole, things are easier as simply truncating the run + * @start is in to end at @start - 1, deleting all runs after that up + * to @end, and finally extending the beginning of the run @end is in + * to be @start is all that is needed. + */ + if (rl_end->lcn == LCN_HOLE) { + /* Truncate the run containing @start. */ + rl->length = start - rl->vcn; + rl++; + /* Cut out all runlist elements up to @end. */ + if (rl < rl_end) + memmove(rl, rl_end, (rl_real_end - rl_end + 1) * + sizeof(*rl)); + /* Extend the beginning of the run @end is in to be @start. */ + rl->vcn = start; + rl->length = rl[1].vcn - start; + goto shrink_allocation; + } + /* + * If @end is not in a hole there are still two cases to distinguish. + * Either @end is or is not in the same run as @start. + * + * The second case is easier as it can be reduced to an already solved + * problem by truncating the run @start is in to end at @start - 1. + * Then, if @end is in the next run need to split the run into a sparse + * run followed by a non-sparse run (already covered above) and if @end + * is not in the next run switching it to be sparse, again reduces the + * problem to the already covered case of "@start is in a hole". + */ + if (end >= rl[1].vcn) { + /* + * If @end is not in the next run, reduce the problem to the + * case of "@start is in a hole". + */ + if (rl[1].length && end >= rl[2].vcn) { + /* Truncate the run containing @start. */ + rl->length = start - rl->vcn; + rl++; + rl->vcn = start; + rl->lcn = LCN_HOLE; + goto extend_hole; + } + trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 1); + if (IS_ERR(trl)) + goto enomem_out; + old_size++; + if (runlist->rl != trl) { + rl = trl + (rl - runlist->rl); + rl_end = trl + (rl_end - runlist->rl); + rl_real_end = trl + (rl_real_end - runlist->rl); + runlist->rl = trl; + } + /* Truncate the run containing @start. */ + rl->length = start - rl->vcn; + rl++; + /* + * @end is in the next run, reduce the problem to the case + * where "@start is at the beginning of a run and @end is in + * the same run as @start". + */ + delta = rl->vcn - start; + rl->vcn = start; + if (rl->lcn >= 0) { + rl->lcn -= delta; + /* Need this in case the lcn just became negative. */ + lcn_fixup = TRUE; + } + rl->length += delta; + goto split_end; + } + /* + * The first case from above, i.e. @end is in the same run as @start. + * We need to split the run into three. One run for the non-sparse + * region between the beginning of the old run and @start, one for the + * sparse region between @start and @end, and one for the remaining + * non-sparse region, i.e. between @end and the end of the old run. + */ + trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 2); + if (IS_ERR(trl)) + goto enomem_out; + old_size += 2; + if (runlist->rl != trl) { + rl = trl + (rl - runlist->rl); + rl_end = trl + (rl_end - runlist->rl); + rl_real_end = trl + (rl_real_end - runlist->rl); + runlist->rl = trl; + } + /* Shift all the runs up by two. */ + memmove(rl + 2, rl, (rl_real_end - rl + 1) * sizeof(*rl)); + /* Finally, setup the three split runs. */ + rl->length = start - rl->vcn; + rl++; + rl->vcn = start; + rl->lcn = LCN_HOLE; + rl->length = length; + rl++; + delta = end - rl->vcn; + rl->vcn = end; + rl->lcn += delta; + rl->length -= delta; + ntfs_debug("Done (split both)."); + return 0; +enomem_out: + ntfs_error(vol->sb, "Not enough memory to extend runlist buffer."); + return -ENOMEM; +} + #endif /* NTFS_RW */ diff --git a/fs/ntfs/runlist.h b/fs/ntfs/runlist.h index aa0ee6540e7..47728fbb610 100644 --- a/fs/ntfs/runlist.h +++ b/fs/ntfs/runlist.h @@ -94,6 +94,9 @@ extern int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst, extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist, const s64 new_length); +int ntfs_rl_punch_nolock(const ntfs_volume *vol, runlist *const runlist, + const VCN start, const s64 length); + #endif /* NTFS_RW */ #endif /* _LINUX_NTFS_RUNLIST_H */ diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 41aa8eb6755..b2b39296126 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -1133,7 +1133,8 @@ mft_unmap_out: * * Return TRUE on success or FALSE on error. */ -static BOOL load_and_check_logfile(ntfs_volume *vol) +static BOOL load_and_check_logfile(ntfs_volume *vol, + RESTART_PAGE_HEADER **rp) { struct inode *tmp_ino; @@ -1145,7 +1146,7 @@ static BOOL load_and_check_logfile(ntfs_volume *vol) /* Caller will display error message. */ return FALSE; } - if (!ntfs_check_logfile(tmp_ino)) { + if (!ntfs_check_logfile(tmp_ino, rp)) { iput(tmp_ino); /* ntfs_check_logfile() will have displayed error output. */ return FALSE; @@ -1689,6 +1690,7 @@ static BOOL load_system_files(ntfs_volume *vol) VOLUME_INFORMATION *vi; ntfs_attr_search_ctx *ctx; #ifdef NTFS_RW + RESTART_PAGE_HEADER *rp; int err; #endif /* NTFS_RW */ @@ -1841,8 +1843,9 @@ get_ctx_vol_failed: * Get the inode for the logfile, check it and determine if the volume * was shutdown cleanly. */ - if (!load_and_check_logfile(vol) || - !ntfs_is_logfile_clean(vol->logfile_ino)) { + rp = NULL; + if (!load_and_check_logfile(vol, &rp) || + !ntfs_is_logfile_clean(vol->logfile_ino, rp)) { static const char *es1a = "Failed to load $LogFile"; static const char *es1b = "$LogFile is not clean"; static const char *es2 = ". Mount in Windows."; @@ -1857,6 +1860,10 @@ get_ctx_vol_failed: "continue nor on_errors=" "remount-ro was specified%s", es1, es2); + if (vol->logfile_ino) { + BUG_ON(!rp); + ntfs_free(rp); + } goto iput_logfile_err_out; } sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; @@ -1867,6 +1874,7 @@ get_ctx_vol_failed: /* This will prevent a read-write remount. */ NVolSetErrors(vol); } + ntfs_free(rp); #endif /* NTFS_RW */ /* Get the root directory inode so we can do path lookups. */ vol->root_ino = ntfs_iget(sb, FILE_root); diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c index 19c42e231b4..a389a5a16c8 100644 --- a/fs/ntfs/unistr.c +++ b/fs/ntfs/unistr.c @@ -372,7 +372,8 @@ retry: wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o, return -EINVAL; conversion_err: ntfs_error(vol->sb, "Unicode name contains characters that cannot be " - "converted to character set %s.", nls->charset); + "converted to character set %s. You might want to " + "try to use the mount option nls=utf8.", nls->charset); if (ns != *outs) kfree(ns); if (wc != -ENAMETOOLONG) |