diff options
Diffstat (limited to 'fs/ntfs')
41 files changed, 5447 insertions, 4262 deletions
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog deleted file mode 100644 index de58579a1d0..00000000000 --- a/fs/ntfs/ChangeLog +++ /dev/null @@ -1,1580 +0,0 @@ -ToDo/Notes: - - Find and fix bugs. - - In between ntfs_prepare/commit_write, need exclusion between - simultaneous file extensions. This is given to us by holding i_sem - on the inode. The only places in the kernel when a file is resized - are prepare/commit write and truncate for both of which i_sem is - held. Just have to be careful in readpage/writepage and all other - helpers not running under i_sem that we play nice... - Also need to be careful with initialized_size extention in - ntfs_prepare_write. Basically, just be _very_ careful in this code... - UPDATE: The only things that need to be checked are read/writepage - which do not hold i_sem. Note writepage cannot change i_size but it - needs to cope with a concurrent i_size change, just like readpage. - Also both need to cope with concurrent changes to the other sizes, - i.e. initialized/allocated/compressed size, as well. - - Implement mft.c::sync_mft_mirror_umount(). We currently will just - leave the volume dirty on umount if the final iput(vol->mft_ino) - causes a write of any mirrored mft records due to the mft mirror - inode having been discarded already. Whether this can actually ever - happen is unclear however so it is worth waiting until someone hits - the problem. - - Enable the code for setting the NT4 compatibility flag when we start - making NTFS 1.2 specific modifications. - -2.1.24 - Lots of bug fixes and support more clean journal states. - - - Support journals ($LogFile) which have been modified by chkdsk. This - means users can boot into Windows after we marked the volume dirty. - The Windows boot will run chkdsk and then reboot. The user can then - immediately boot into Linux rather than having to do a full Windows - boot first before rebooting into Linux and we will recognize such a - journal and empty it as it is clean by definition. Note, this only - works if chkdsk left the journal in an obviously clean state. - - Support journals ($LogFile) with only one restart page as well as - journals with two different restart pages. We sanity check both and - either use the only sane one or the more recent one of the two in the - case that both are valid. - - Add fs/ntfs/malloc.h::ntfs_malloc_nofs_nofail() which is analogous to - ntfs_malloc_nofs() but it performs allocations with __GFP_NOFAIL and - hence cannot fail. - - Use ntfs_malloc_nofs_nofail() in the two critical regions in - fs/ntfs/runlist.c::ntfs_runlists_merge(). This means we no longer - need to panic() if the allocation fails as it now cannot fail. - - Fix two nasty runlist merging bugs that had gone unnoticed so far. - Thanks to Stefano Picerno for the bug report. - - Remove two bogus BUG_ON()s from fs/ntfs/mft.c. - - Fix handling of valid but empty mapping pairs array in - fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress(). - - Report unrepresentable inodes during ntfs_readdir() as KERN_WARNING - messages and include the inode number. Thanks to Yura Pakhuchiy for - pointing this out. - - Change ntfs_rl_truncate_nolock() to throw away the runlist if the new - length is zero. - - Add runlist.[hc]::ntfs_rl_punch_nolock() which punches a caller - specified hole into a runlist. - - Fix a bug in fs/ntfs/index.c::ntfs_index_lookup(). When the returned - index entry is in the index root, we forgot to set the @ir pointer in - the index context. Thanks to Yura Pakhuchiy for finding this bug. - - Remove bogus setting of PageError in ntfs_read_compressed_block(). - - Add fs/ntfs/attrib.[hc]::ntfs_resident_attr_value_resize(). - - Fix a bug in ntfs_map_runlist_nolock() where we forgot to protect - access to the allocated size in the ntfs inode with the size lock. - - Fix ntfs_attr_vcn_to_lcn_nolock() and ntfs_attr_find_vcn_nolock() to - return LCN_ENOENT when there is no runlist and the allocated size is - zero. - - Fix load_attribute_list() to handle the case of a NULL runlist. - - Fix handling of sparse attributes in ntfs_attr_make_non_resident(). - - Add BUG() checks to ntfs_attr_make_non_resident() and ntfs_attr_set() - to ensure that these functions are never called for compressed or - encrypted attributes. - - Fix cluster (de)allocators to work when the runlist is NULL and more - importantly to take a locked runlist rather than them locking it - which leads to lock reversal. - - Truncate {a,c,m}time to the ntfs supported time granularity when - updating the times in the inode in ntfs_setattr(). - - Fixup handling of sparse, compressed, and encrypted attributes in - fs/ntfs/inode.c::ntfs_read_locked_{,attr_,index_}inode(), - fs/ntfs/aops.c::ntfs_{read,write}page(). - - Make ntfs_write_block() not instantiate sparse blocks if they contain - only zeroes. - - Optimize fs/ntfs/aops.c::ntfs_write_block() by extending the page - lock protection over the buffer submission for i/o which allows the - removal of the get_bh()/put_bh() pairs for each buffer. - - Fix fs/ntfs/aops.c::ntfs_{read,write}_block() to handle the case - where a concurrent truncate has truncated the runlist under our feet. - - Fix page_has_buffers()/page_buffers() handling in fs/ntfs/aops.c. - - In fs/ntfs/aops.c::ntfs_end_buffer_async_read(), use a bit spin lock - in the first buffer head instead of a driver global spin lock to - improve scalability. - - Minor fix to error handling and error message display in - fs/ntfs/aops.c::ntfs_prepare_nonresident_write(). - - Change the mount options {u,f,d}mask to always parse the number as - an octal number to conform to how chmod(1) works, too. Thanks to - Giuseppe Bilotta and Horst von Brand for pointing out the errors of - my ways. - - Fix various bugs in the runlist merging code. (Based on libntfs - changes by Richard Russon.) - - Fix sparse warnings that have crept in over time. - - Change ntfs_cluster_free() to require a write locked runlist on entry - since we otherwise get into a lock reversal deadlock if a read locked - runlist is passed in. In the process also change it to take an ntfs - inode instead of a vfs inode as parameter. - - Fix the definition of the CHKD ntfs record magic. It had an off by - two error causing it to be CHKB instead of CHKD. - - Fix a stupid bug in __ntfs_bitmap_set_bits_in_run() which caused the - count to become negative and hence we had a wild memset() scribbling - all over the system's ram. - -2.1.23 - Implement extension of resident files and make writing safe as well as - many bug fixes, cleanups, and enhancements... - - - Add printk rate limiting for ntfs_warning() and ntfs_error() when - compiled without debug. This avoids a possible denial of service - attack. Thanks to Carl-Daniel Hailfinger from SuSE for pointing this - out. - - Fix compilation warnings on ia64. (Randy Dunlap) - - Use i_size_{read,write}() instead of reading i_size by hand and cache - the value where apropriate. - - Add size_lock to the ntfs_inode structure. This is an rw spinlock - and it locks against access to the inode sizes. Note, ->size_lock - is also accessed from irq context so you must use the _irqsave and - _irqrestore lock and unlock functions, respectively. Protect all - accesses to allocated_size, initialized_size, and compressed_size. - - Minor optimization to fs/ntfs/super.c::ntfs_statfs() and its helpers. - - Implement extension of resident files in the regular file write code - paths (fs/ntfs/aops.c::ntfs_{prepare,commit}_write()). At present - this only works until the data attribute becomes too big for the mft - record after which we abort the write returning -EOPNOTSUPP from - ntfs_prepare_write(). - - Add disable_sparse mount option together with a per volume sparse - enable bit which is set appropriately and a per inode sparse disable - bit which is preset on some system file inodes as appropriate. - - Enforce that sparse support is disabled on NTFS volumes pre 3.0. - - Fix a bug in fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress() in - the creation of the unmapped runlist element for the base attribute - extent. - - Split ntfs_map_runlist() into ntfs_map_runlist() and a non-locking - helper ntfs_map_runlist_nolock() which is used by ntfs_map_runlist(). - This allows us to map runlist fragments with the runlist lock already - held without having to drop and reacquire it around the call. Adapt - all callers. - - Change ntfs_find_vcn() to ntfs_find_vcn_nolock() which takes a locked - runlist. This allows us to find runlist elements with the runlist - lock already held without having to drop and reacquire it around the - call. Adapt all callers. - - Change time to u64 in time.h::ntfs2utc() as it otherwise generates a - warning in the do_div() call on sparc32. Thanks to Meelis Roos for - the report and analysis of the warning. - - Fix a nasty runlist merge bug when merging two holes. - - Set the ntfs_inode->allocated_size to the real allocated size in the - mft record for resident attributes (fs/ntfs/inode.c). - - Small readability cleanup to use "a" instead of "ctx->attr" - everywhere (fs/ntfs/inode.c). - - Make fs/ntfs/namei.c::ntfs_get_{parent,dentry} static and move the - definition of ntfs_export_ops from fs/ntfs/super.c to namei.c. Also, - declare ntfs_export_ops in fs/ntfs/ntfs.h. - - Correct sparse file handling. The compressed values need to be - checked and set in the ntfs inode as done for compressed files and - the compressed size needs to be used for vfs inode->i_blocks instead - of the allocated size, again, as done for compressed files. - - Add AT_EA in addition to AT_DATA to whitelist for being allowed to be - non-resident in fs/ntfs/attrib.c::ntfs_attr_can_be_non_resident(). - - Add fs/ntfs/attrib.c::ntfs_attr_vcn_to_lcn_nolock() used by the new - write code. - - Fix bug in fs/ntfs/attrib.c::ntfs_find_vcn_nolock() where after - dropping the read lock and taking the write lock we were not checking - whether someone else did not already do the work we wanted to do. - - Rename fs/ntfs/attrib.c::ntfs_find_vcn_nolock() to - ntfs_attr_find_vcn_nolock() and update all callers. - - Add fs/ntfs/attrib.[hc]::ntfs_attr_make_non_resident(). - - Fix sign of various error return values to be negative in - fs/ntfs/lcnalloc.c. - - Modify ->readpage and ->writepage (fs/ntfs/aops.c) so they detect and - handle the case where an attribute is converted from resident to - non-resident by a concurrent file write. - - Remove checks for NULL before calling kfree() since kfree() does the - checking itself. (Jesper Juhl) - - Some utilities modify the boot sector but do not update the checksum. - Thus, relax the checking in fs/ntfs/super.c::is_boot_sector_ntfs() to - only emit a warning when the checksum is incorrect rather than - refusing the mount. Thanks to Bernd Casimir for pointing this - problem out. - - Update attribute definition handling. - - Add NTFS_MAX_CLUSTER_SIZE and NTFS_MAX_PAGES_PER_CLUSTER constants. - - Use NTFS_MAX_CLUSTER_SIZE in super.c instead of hard coding 0x10000. - - Use MAX_BUF_PER_PAGE instead of variable sized array allocation for - better code generation and one less sparse warning in fs/ntfs/aops.c. - - Remove spurious void pointer casts from fs/ntfs/. (Pekka Enberg) - - Use C99 style structure initialization after memory allocation where - possible (fs/ntfs/{attrib.c,index.c,super.c}). Thanks to Al Viro and - Pekka Enberg. - - Stamp the transaction log ($UsnJrnl), aka user space journal, if it - is active on the volume and we are mounting read-write or remounting - from read-only to read-write. - - Fix a bug in address space operations error recovery code paths where - if the runlist was not mapped at all and a mapping error occured we - would leave the runlist locked on exit to the function so that the - next access to the same file would try to take the lock and deadlock. - - Detect the case when Windows has been suspended to disk on the volume - to be mounted and if this is the case do not allow (re)mounting - read-write. This is done by parsing hiberfil.sys if present. - - Fix several occurences of a bug where we would perform 'var & ~const' - with a 64-bit variable and a int, i.e. 32-bit, constant. This causes - the higher order 32-bits of the 64-bit variable to be zeroed. To fix - this cast the 'const' to the same 64-bit type as 'var'. - - Change the runlist terminator of the newly allocated cluster(s) to - LCN_ENOENT in ntfs_attr_make_non_resident(). Otherwise the runlist - code gets confused. - - Add an extra parameter @last_vcn to ntfs_get_size_for_mapping_pairs() - and ntfs_mapping_pairs_build() to allow the runlist encoding to be - partial which is desirable when filling holes in sparse attributes. - Update all callers. - - Change ntfs_map_runlist_nolock() to only decompress the mapping pairs - if the requested vcn is inside it. Otherwise we get into problems - when we try to map an out of bounds vcn because we then try to map - the already mapped runlist fragment which causes - ntfs_mapping_pairs_decompress() to fail and return error. Update - ntfs_attr_find_vcn_nolock() accordingly. - - Fix a nasty deadlock that appeared in recent kernels. - The situation: VFS inode X on a mounted ntfs volume is dirty. For - same inode X, the ntfs_inode is dirty and thus corresponding on-disk - inode, i.e. mft record, which is in a dirty PAGE_CACHE_PAGE belonging - to the table of inodes, i.e. $MFT, inode 0. - What happens: - Process 1: sys_sync()/umount()/whatever... calls - __sync_single_inode() for $MFT -> do_writepages() -> write_page for - the dirty page containing the on-disk inode X, the page is now locked - -> ntfs_write_mst_block() which clears PageUptodate() on the page to - prevent anyone else getting hold of it whilst it does the write out. - This is necessary as the on-disk inode needs "fixups" applied before - the write to disk which are removed again after the write and - PageUptodate is then set again. It then analyses the page looking - for dirty on-disk inodes and when it finds one it calls - ntfs_may_write_mft_record() to see if it is safe to write this - on-disk inode. This then calls ilookup5() to check if the - corresponding VFS inode is in icache(). This in turn calls ifind() - which waits on the inode lock via wait_on_inode whilst holding the - global inode_lock. - Process 2: pdflush results in a call to __sync_single_inode for the - same VFS inode X on the ntfs volume. This locks the inode (I_LOCK) - then calls write-inode -> ntfs_write_inode -> map_mft_record() -> - read_cache_page() for the page (in page cache of table of inodes - $MFT, inode 0) containing the on-disk inode. This page has - PageUptodate() clear because of Process 1 (see above) so - read_cache_page() blocks when it tries to take the page lock for the - page so it can call ntfs_read_page(). - Thus Process 1 is holding the page lock on the page containing the - on-disk inode X and it is waiting on the inode X to be unlocked in - ifind() so it can write the page out and then unlock the page. - And Process 2 is holding the inode lock on inode X and is waiting for - the page to be unlocked so it can call ntfs_readpage() or discover - that Process 1 set PageUptodate() again and use the page. - Thus we have a deadlock due to ifind() waiting on the inode lock. - The solution: The fix is to use the newly introduced - ilookup5_nowait() which does not wait on the inode's lock and hence - avoids the deadlock. This is safe as we do not care about the VFS - inode and only use the fact that it is in the VFS inode cache and the - fact that the vfs and ntfs inodes are one struct in memory to find - the ntfs inode in memory if present. Also, the ntfs inode has its - own locking so it does not matter if the vfs inode is locked. - - Fix bug in mft record writing where we forgot to set the device in - the buffers when mapping them after the VM had discarded them. - Thanks to Martin MOKREJÅ for the bug report. - -2.1.22 - Many bug and race fixes and error handling improvements. - - - Improve error handling in fs/ntfs/inode.c::ntfs_truncate(). - - Change fs/ntfs/inode.c::ntfs_truncate() to return an error code - instead of void and provide a helper ntfs_truncate_vfs() for the - vfs ->truncate method. - - Add a new ntfs inode flag NInoTruncateFailed() and modify - fs/ntfs/inode.c::ntfs_truncate() to set and clear it appropriately. - - Fix min_size and max_size definitions in ATTR_DEF structure in - fs/ntfs/layout.h to be signed. - - Add attribute definition handling helpers to fs/ntfs/attrib.[hc]: - ntfs_attr_size_bounds_check(), ntfs_attr_can_be_non_resident(), and - ntfs_attr_can_be_resident(), which in turn use the new private helper - ntfs_attr_find_in_attrdef(). - - In fs/ntfs/aops.c::mark_ntfs_record_dirty(), take the - mapping->private_lock around the dirtying of the buffer heads - analagous to the way it is done in __set_page_dirty_buffers(). - - Ensure the mft record size does not exceed the PAGE_CACHE_SIZE at - mount time as this cannot work with the current implementation. - - Check for location of attribute name and improve error handling in - general in fs/ntfs/inode.c::ntfs_read_locked_inode() and friends. - - In fs/ntfs/aops.c::ntfs_writepage(), if the page is fully outside - i_size, i.e. race with truncate, invalidate the buffers on the page - so that they become freeable and hence the page does not leak. - - Remove unused function fs/ntfs/runlist.c::ntfs_rl_merge(). (Adrian - Bunk) - - Fix stupid bug in fs/ntfs/attrib.c::ntfs_attr_find() that resulted in - a NULL pointer dereference in the error code path when a corrupt - attribute was found. (Thanks to Domen Puncer for the bug report.) - - Add MODULE_VERSION() to fs/ntfs/super.c. - - Make several functions and variables static. (Adrian Bunk) - - Modify fs/ntfs/aops.c::mark_ntfs_record_dirty() so it allocates - buffers for the page if they are not present and then marks the - buffers belonging to the ntfs record dirty. This causes the buffers - to become busy and hence they are safe from removal until the page - has been written out. - - Fix stupid bug in fs/ntfs/attrib.c::ntfs_external_attr_find() in the - error handling code path that resulted in a BUG() due to trying to - unmap an extent mft record when the mapping of it had failed and it - thus was not mapped. (Thanks to Ken MacFerrin for the bug report.) - - Drop the runlist lock after the vcn has been read in - fs/ntfs/lcnalloc.c::__ntfs_cluster_free(). - - Rewrite handling of multi sector transfer errors. We now do not set - PageError() when such errors are detected in the async i/o handler - fs/ntfs/aops.c::ntfs_end_buffer_async_read(). All users of mst - protected attributes now check the magic of each ntfs record as they - use it and act appropriately. This has the effect of making errors - granular per ntfs record rather than per page which solves the case - where we cannot access any of the ntfs records in a page when a - single one of them had an mst error. (Thanks to Ken MacFerrin for - the bug report.) - - Fix error handling in fs/ntfs/quota.c::ntfs_mark_quotas_out_of_date() - where we failed to release i_sem on the $Quota/$Q attribute inode. - - Fix bug in handling of bad inodes in fs/ntfs/namei.c::ntfs_lookup(). - - Add mapping of unmapped buffers to all remaining code paths, i.e. - fs/ntfs/aops.c::ntfs_write_mst_block(), mft.c::ntfs_sync_mft_mirror(), - and write_mft_record_nolock(). From now on we require that the - complete runlist for the mft mirror is always mapped into memory. - - Add creation of buffers to fs/ntfs/mft.c::ntfs_sync_mft_mirror(). - - Improve error handling in fs/ntfs/aops.c::ntfs_{read,write}_block(). - - Cleanup fs/ntfs/aops.c::ntfs_{read,write}page() since we know that a - resident attribute will be smaller than a page which makes the code - simpler. Also make the code more tolerant to concurrent ->truncate. - -2.1.21 - Fix some races and bugs, rewrite mft write code, add mft allocator. - - - Implement extent mft record deallocation - fs/ntfs/mft.c::ntfs_extent_mft_record_free(). - - Splitt runlist related functions off from attrib.[hc] to runlist.[hc]. - - Add vol->mft_data_pos and initialize it at mount time. - - Rename init_runlist() to ntfs_init_runlist(), ntfs_vcn_to_lcn() to - ntfs_rl_vcn_to_lcn(), decompress_mapping_pairs() to - ntfs_mapping_pairs_decompress(), ntfs_merge_runlists() to - ntfs_runlists_merge() and adapt all callers. - - Add fs/ntfs/runlist.[hc]::ntfs_get_nr_significant_bytes(), - ntfs_get_size_for_mapping_pairs(), ntfs_write_significant_bytes(), - and ntfs_mapping_pairs_build(), adapted from libntfs. - - Make fs/ntfs/lcnalloc.c::ntfs_cluster_free_from_rl_nolock() not - static and add a declaration for it to lcnalloc.h. - - Add fs/ntfs/lcnalloc.h::ntfs_cluster_free_from_rl() which is a static - inline wrapper for ntfs_cluster_free_from_rl_nolock() which takes the - cluster bitmap lock for the duration of the call. - - Add fs/ntfs/attrib.[hc]::ntfs_attr_record_resize(). - - Implement the equivalent of memset() for an ntfs attribute in - fs/ntfs/attrib.[hc]::ntfs_attr_set() and switch - fs/ntfs/logfile.c::ntfs_empty_logfile() to using it. - - Remove unnecessary casts from LCN_* constants. - - Implement fs/ntfs/runlist.c::ntfs_rl_truncate_nolock(). - - Add MFT_RECORD_OLD as a copy of MFT_RECORD in fs/ntfs/layout.h and - change MFT_RECORD to contain the NTFS 3.1+ specific fields. - - Add a helper function fs/ntfs/aops.c::mark_ntfs_record_dirty() which - marks all buffers belonging to an ntfs record dirty, followed by - marking the page the ntfs record is in dirty and also marking the vfs - inode containing the ntfs record dirty (I_DIRTY_PAGES). - - Switch fs/ntfs/index.h::ntfs_index_entry_mark_dirty() to using the - new helper fs/ntfs/aops.c::mark_ntfs_record_dirty() and remove the no - longer needed fs/ntfs/index.[hc]::__ntfs_index_entry_mark_dirty(). - - Move ntfs_{un,}map_page() from ntfs.h to aops.h and fix resulting - include errors. - - Move the typedefs for runlist_element and runlist from types.h to - runlist.h and fix resulting include errors. - - Remove unused {__,}format_mft_record() from fs/ntfs/mft.c. - - Modify fs/ntfs/mft.c::__mark_mft_record_dirty() to use the helper - mark_ntfs_record_dirty() which also changes the behaviour in that we - now set the buffers belonging to the mft record dirty as well as the - page itself. - - Update fs/ntfs/mft.c::write_mft_record_nolock() and sync_mft_mirror() - to cope with the fact that there now are dirty buffers in mft pages. - - Update fs/ntfs/inode.c::ntfs_write_inode() to also use the helper - mark_ntfs_record_dirty() and thus to set the buffers belonging to the - mft record dirty as well as the page itself. - - Fix compiler warnings on x86-64 in fs/ntfs/dir.c. (Randy Dunlap, - slightly modified by me) - - Add fs/ntfs/mft.c::try_map_mft_record() which fails with -EALREADY if - the mft record is already locked and otherwise behaves the same way - as fs/ntfs/mft.c::map_mft_record(). - - Modify fs/ntfs/mft.c::write_mft_record_nolock() so that it only - writes the mft record if the buffers belonging to it are dirty. - Otherwise we assume that it was written out by other means already. - - Attempting to write outside initialized size is _not_ a bug so remove - the bug check from fs/ntfs/aops.c::ntfs_write_mst_block(). It is in - fact required to write outside initialized size when preparing to - extend the initialized size. - - Map the page instead of using page_address() before writing to it in - fs/ntfs/aops.c::ntfs_mft_writepage(). - - Provide exclusion between opening an inode / mapping an mft record - and accessing the mft record in fs/ntfs/mft.c::ntfs_mft_writepage() - by setting the page not uptodate throughout ntfs_mft_writepage(). - - Clear the page uptodate flag in fs/ntfs/aops.c::ntfs_write_mst_block() - to ensure noone can see the page whilst the mst fixups are applied. - - Add the helper fs/ntfs/mft.c::ntfs_may_write_mft_record() which - checks if an mft record may be written out safely obtaining any - necessary locks in the process. This is used by - fs/ntfs/aops.c::ntfs_write_mst_block(). - - Modify fs/ntfs/aops.c::ntfs_write_mst_block() to also work for - writing mft records and improve its error handling in the process. - Now if any of the records in the page fail to be written out, all - other records will be written out instead of aborting completely. - - Remove ntfs_mft_aops and update all users to use ntfs_mst_aops. - - Modify fs/ntfs/inode.c::ntfs_read_locked_inode() to set the - ntfs_mst_aops for all inodes which are NInoMstProtected() and - ntfs_aops for all other inodes. - - Rename fs/ntfs/mft.c::sync_mft_mirror{,_umount}() to - ntfs_sync_mft_mirror{,_umount}() and change their parameters so they - no longer require an ntfs inode to be present. Update all callers. - - Cleanup the error handling in fs/ntfs/mft.c::ntfs_sync_mft_mirror(). - - Clear the page uptodate flag in fs/ntfs/mft.c::ntfs_sync_mft_mirror() - to ensure noone can see the page whilst the mst fixups are applied. - - Remove the no longer needed fs/ntfs/mft.c::ntfs_mft_writepage() and - fs/ntfs/mft.c::try_map_mft_record(). - - Fix callers of fs/ntfs/aops.c::mark_ntfs_record_dirty() to call it - with the ntfs inode which contains the page rather than the ntfs - inode the mft record of which is in the page. - - Fix race condition in fs/ntfs/inode.c::ntfs_put_inode() by moving the - index inode bitmap inode release code from there to - fs/ntfs/inode.c::ntfs_clear_big_inode(). (Thanks to Christoph - Hellwig for spotting this.) - - Fix race condition in fs/ntfs/inode.c::ntfs_put_inode() by taking the - inode semaphore around the code that sets ni->itype.index.bmp_ino to - NULL and reorganize the code to optimize it a bit. (Thanks to - Christoph Hellwig for spotting this.) - - Modify fs/ntfs/aops.c::mark_ntfs_record_dirty() to no longer take the - ntfs inode as a parameter as this is confusing and misleading and the - needed ntfs inode is available via NTFS_I(page->mapping->host). - Adapt all callers to this change. - - Modify fs/ntfs/mft.c::write_mft_record_nolock() and - fs/ntfs/aops.c::ntfs_write_mst_block() to only check the dirty state - of the first buffer in a record and to take this as the ntfs record - dirty state. We cannot look at the dirty state for subsequent - buffers because we might be racing with - fs/ntfs/aops.c::mark_ntfs_record_dirty(). - - Move the static inline ntfs_init_big_inode() from fs/ntfs/inode.c to - inode.h and make fs/ntfs/inode.c::__ntfs_init_inode() non-static and - add a declaration for it to inode.h. Fix some compilation issues - that resulted due to #includes and header file interdependencies. - - Simplify setup of i_mode in fs/ntfs/inode.c::ntfs_read_locked_inode(). - - Add helpers fs/ntfs/layout.h::MK_MREF() and MK_LE_MREF(). - - Modify fs/ntfs/mft.c::map_extent_mft_record() to only verify the mft - record sequence number if it is specified (i.e. not zero). - - Add fs/ntfs/mft.[hc]::ntfs_mft_record_alloc() and various helper - functions used by it. - - Update Documentation/filesystems/ntfs.txt with instructions on how to - use the Device-Mapper driver with NTFS ftdisk/LDM raid. This removes - the linear raid problem with the Software RAID / MD driver when one - or more of the devices has an odd number of sectors. - -2.1.20 - Fix two stupid bugs introduced in 2.1.18 release. - - - Fix stupid bug in fs/ntfs/attrib.c::ntfs_attr_reinit_search_ctx() - where we did not clear ctx->al_entry but it was still set due to - changes in ntfs_attr_lookup() and ntfs_external_attr_find() in - particular. - - Fix another stupid bug in fs/ntfs/attrib.c::ntfs_external_attr_find() - where we forgot to unmap the extent mft record when we had finished - enumerating an attribute which caused a bug check to trigger when the - VFS calls ->clear_inode. - -2.1.19 - Many cleanups, improvements, and a minor bug fix. - - - Update ->setattr (fs/ntfs/inode.c::ntfs_setattr()) to refuse to - change the uid, gid, and mode of an inode as we do not support NTFS - ACLs yet. - - Remove BKL use from ntfs_setattr() syncing up with the rest of the - kernel. - - Get rid of the ugly transparent union in fs/ntfs/dir.c::ntfs_readdir() - and ntfs_filldir() as per suggestion from Al Viro. - - Change '\0' and L'\0' to simply 0 as per advice from Linus Torvalds. - - Update ->truncate (fs/ntfs/inode.c::ntfs_truncate()) to check if the - inode size has changed and to only output an error if so. - - Rename fs/ntfs/attrib.h::attribute_value_length() to ntfs_attr_size(). - - Add le{16,32,64} as well as sle{16,32,64} data types to - fs/ntfs/types.h. - - Change ntfschar to be le16 instead of u16 in fs/ntfs/types.h. - - Add le versions of VCN, LCN, and LSN called leVCN, leLCN, and leLSN, - respectively, to fs/ntfs/types.h. - - Update endianness conversion macros in fs/ntfs/endian.h to use the - new types as appropriate. - - Do proper type casting when using sle64_to_cpup() in fs/ntfs/dir.c - and index.c. - - Add leMFT_REF data type to fs/ntfs/layout.h. - - Update all NTFS header files with the new little endian data types. - Affected files are fs/ntfs/layout.h, logfile.h, and time.h. - - Do proper type casting when using ntfs_is_*_recordp() in - fs/ntfs/logfile.c, mft.c, and super.c. - - Fix all the sparse bitwise warnings. Had to change all the typedef - enums storing little endian values to simple enums plus a typedef for - the datatype to make sparse happy. - - Fix a bug found by the new sparse bitwise warnings where the default - upcase table was defined as a pointer to wchar_t rather than ntfschar - in fs/ntfs/ntfs.h and super.c. - - Change {const_,}cpu_to_le{16,32}(0) to just 0 as suggested by Al Viro. - -2.1.18 - Fix scheduling latencies at mount time as well as an endianness bug. - - - Remove vol->nr_mft_records as it was pretty meaningless and optimize - the calculation of total/free inodes as used by statfs(). - - Fix scheduling latencies in ntfs_fill_super() by dropping the BKL - because the code itself is using the ntfs_lock semaphore which - provides safe locking. (Ingo Molnar) - - Fix a potential bug in fs/ntfs/mft.c::map_extent_mft_record() that - could occur in the future for when we start closing/freeing extent - inodes if we don't set base_ni->ext.extent_ntfs_inos to NULL after - we free it. - - Rename {find,lookup}_attr() to ntfs_attr_{find,lookup}() as well as - find_external_attr() to ntfs_external_attr_find() to cleanup the - namespace a bit and to be more consistent with libntfs. - - Rename {{re,}init,get,put}_attr_search_ctx() to - ntfs_attr_{{re,}init,get,put}_search_ctx() as well as the type - attr_search_context to ntfs_attr_search_ctx. - - Force use of ntfs_attr_find() in ntfs_attr_lookup() when searching - for the attribute list attribute itself. - - Fix endianness bug in ntfs_external_attr_find(). - - Change ntfs_{external_,}attr_find() to return 0 on success, -ENOENT - if the attribute is not found, and -EIO on real error. In the case - of -ENOENT, the search context is updated to describe the attribute - before which the attribute being searched for would need to be - inserted if such an action were to be desired and in the case of - ntfs_external_attr_find() the search context is also updated to - indicate the attribute list entry before which the attribute list - entry of the attribute being searched for would need to be inserted - if such an action were to be desired. Also make ntfs_find_attr() - static and remove its prototype from attrib.h as it is not used - anywhere other than attrib.c. Update ntfs_attr_lookup() and all - callers of ntfs_{external,}attr_{find,lookup}() for the new return - values. - - Minor cleanup of fs/ntfs/inode.c::ntfs_init_locked_inode(). - -2.1.17 - Fix bugs in mount time error code paths and other updates. - - - Implement bitmap modification code (fs/ntfs/bitmap.[hc]). This - includes functions to set/clear a single bit or a run of bits. - - Add fs/ntfs/attrib.[hc]::ntfs_find_vcn() which returns the locked - runlist element containing a particular vcn. It also takes care of - mapping any needed runlist fragments. - - Implement cluster (de-)allocation code (fs/ntfs/lcnalloc.[hc]). - - Load attribute definition table from $AttrDef at mount time. - - Fix bugs in mount time error code paths involving (de)allocation of - the default and volume upcase tables. - - Remove ntfs_nr_mounts as it is no longer used. - -2.1.16 - Implement access time updates, file sync, async io, and read/writev. - - - Add support for readv/writev and aio_read/aio_write (fs/ntfs/file.c). - This is done by setting the appropriate file operations pointers to - the generic helper functions provided by mm/filemap.c. - - Implement fsync, fdatasync, and msync both for files (fs/ntfs/file.c) - and directories (fs/ntfs/dir.c). - - Add support for {a,m,c}time updates to inode.c::ntfs_write_inode(). - Note, except for the root directory and any other system files opened - by the user, the system files will not have their access times - updated as they are only accessed at the inode level an hence the - file level functions which cause the times to be updated are never - invoked. - -2.1.15 - Invalidate quotas when (re)mounting read-write. - - - Add new element itype.index.collation_rule to the ntfs inode - structure and set it appropriately in ntfs_read_locked_inode(). - - Implement a new inode type "index" to allow efficient access to the - indices found in various system files and adapt inode handling - accordingly (fs/ntfs/inode.[hc]). An index inode is essentially an - attribute inode (NInoAttr() is true) with an attribute type of - AT_INDEX_ALLOCATION. As such, it is no longer allowed to call - ntfs_attr_iget() with an attribute type of AT_INDEX_ALLOCATION as - there would be no way to distinguish between normal attribute inodes - and index inodes. The function to obtain an index inode is - ntfs_index_iget() and it uses the helper function - ntfs_read_locked_index_inode(). Note, we do not overload - ntfs_attr_iget() as indices consist of multiple attributes so using - ntfs_attr_iget() to obtain an index inode would be confusing. - - Ensure that there is no overflow when doing page->index << - PAGE_CACHE_SHIFT by casting page->index to s64 in fs/ntfs/aops.c. - - Use atomic kmap instead of kmap() in fs/ntfs/aops.c::ntfs_read_page() - and ntfs_read_block(). - - Use case sensitive attribute lookups instead of case insensitive ones. - - Lock all page cache pages belonging to mst protected attributes while - accessing them to ensure we never see corrupt data while the page is - under writeout. - - Add framework for generic ntfs collation (fs/ntfs/collation.[hc]). - We have ntfs_is_collation_rule_supported() to check if the collation - rule you want to use is supported and ntfs_collation() which actually - collates two data items. We currently only support COLLATION_BINARY - and COLLATION_NTOFS_ULONG but support for other collation rules will - be added as the need arises. - - Add a new type, ntfs_index_context, to allow retrieval of an index - entry using the corresponding index key. To get an index context, - use ntfs_index_ctx_get() and to release it, use ntfs_index_ctx_put(). - This also adds a new slab cache for the index contexts. To lookup a - key in an index inode, use ntfs_index_lookup(). After modifying an - index entry, call ntfs_index_entry_flush_dcache_page() followed by - ntfs_index_entry_mark_dirty() to ensure the changes are written out - to disk. For details see fs/ntfs/index.[hc]. Note, at present, if - an index entry is in the index allocation attribute rather than the - index root attribute it will not be written out (you will get a - warning message about discarded changes instead). - - Load the quota file ($Quota) and check if quota tracking is enabled - and if so, mark the quotas out of date. This causes windows to - rescan the volume on boot and update all quota entries. - - Add a set_page_dirty address space operation for ntfs_m[fs]t_aops. - It is simply set to __set_page_dirty_nobuffers() to make sure that - running set_page_dirty() on a page containing mft/ntfs records will - not affect the dirty state of the page buffers. - - Add fs/ntfs/index.c::__ntfs_index_entry_mark_dirty() which sets all - buffers that are inside the ntfs record in the page dirty after which - it sets the page dirty. This allows ->writepage to only write the - dirty index records rather than having to write all the records in - the page. Modify fs/ntfs/index.h::ntfs_index_entry_mark_dirty() to - use this rather than __set_page_dirty_nobuffers(). - - Implement fs/ntfs/aops.c::ntfs_write_mst_block() which enables the - writing of page cache pages belonging to mst protected attributes - like the index allocation attribute in directory indices and other - indices like $Quota/$Q, etc. This means that the quota is now marked - out of date on all volumes rather than only on ones where the quota - defaults entry is in the index root attribute of the $Quota/$Q index. - -2.1.14 - Fix an NFSd caused deadlock reported by several users. - - - Modify fs/ntfs/ntfs_readdir() to copy the index root attribute value - to a buffer so that we can put the search context and unmap the mft - record before calling the filldir() callback. We need to do this - because of NFSd which calls ->lookup() from its filldir callback() - and this causes NTFS to deadlock as ntfs_lookup() maps the mft record - of the directory and since ntfs_readdir() has got it mapped already - ntfs_lookup() deadlocks. - -2.1.13 - Enable overwriting of resident files and housekeeping of system files. - - - Implement writing of mft records (fs/ntfs/mft.[hc]), which includes - keeping the mft mirror in sync with the mft when mirrored mft records - are written. The functions are write_mft_record{,_nolock}(). The - implementation is quite rudimentary for now with lots of things not - implemented yet but I am not sure any of them can actually occur so - I will wait for people to hit each one and only then implement it. - - Commit open system inodes at umount time. This should make it - virtually impossible for sync_mft_mirror_umount() to ever be needed. - - Implement ->write_inode (fs/ntfs/inode.c::ntfs_write_inode()) for the - ntfs super operations. This gives us inode writing via the VFS inode - dirty code paths. Note: Access time updates are not implemented yet. - - Implement fs/ntfs/mft.[hc]::{,__}mark_mft_record_dirty() and make - fs/ntfs/aops.c::ntfs_writepage() and ntfs_commit_write() use it, thus - finally enabling resident file overwrite! (-8 This also includes a - placeholder for ->writepage (ntfs_mft_writepage()), which for now - just redirties the page and returns. Also, at umount time, we for - now throw away all mft data page cache pages after the last call to - ntfs_commit_inode() in the hope that all inodes will have been - written out by then and hence no dirty (meta)data will be lost. We - also check for this case and emit an error message telling the user - to run chkdsk. - - Use set_page_writeback() and end_page_writeback() in the resident - attribute code path of fs/ntfs/aops.c::ntfs_writepage() otherwise - the radix-tree tag PAGECACHE_TAG_DIRTY remains set even though the - page is clean. - - Implement ntfs_mft_writepage() so it now checks if any of the mft - records in the page are dirty and if so redirties the page and - returns. Otherwise it just returns (after doing set_page_writeback(), - unlock_page(), end_page_writeback() or the radix-tree tag - PAGECACHE_TAG_DIRTY remains set even though the page is clean), thus - alowing the VM to do with the page as it pleases. Also, at umount - time, now only throw away dirty mft (meta)data pages if dirty inodes - are present and ask the user to email us if they see this happening. - - Add functions ntfs_{clear,set}_volume_flags(), to modify the volume - information flags (fs/ntfs/super.c). - - Mark the volume dirty when (re)mounting read-write and mark it clean - when unmounting or remounting read-only. If any volume errors are - found, the volume is left marked dirty to force chkdsk to run. - - Add code to set the NT4 compatibility flag when (re)mounting - read-write for newer NTFS versions but leave it commented out for now - since we do not make any modifications that are NTFS 1.2 specific yet - and since setting this flag breaks Captive-NTFS which is not nice. - This code must be enabled once we start writing NTFS 1.2 specific - changes otherwise Windows NTFS driver might crash / cause corruption. - -2.1.12 - Fix the second fix to the decompression engine and some cleanups. - - - Add a new address space operations struct, ntfs_mst_aops, for mst - protected attributes. This is because the default ntfs_aops do not - make sense with mst protected data and were they to write anything to - such an attribute they would cause data corruption so we provide - ntfs_mst_aops which does not have any write related operations set. - - Cleanup dirty ntfs inode handling (fs/ntfs/inode.[hc]) which also - includes an adapted ntfs_commit_inode() and an implementation of - ntfs_write_inode() which for now just cleans dirty inodes without - writing them (it does emit a warning that this is happening). - - Undo the second decompression engine fix (see 2.1.9 release ChangeLog - entry) as it was only fixing a theoretical bug but at the same time - it badly broke the handling of sparse and uncompressed compression - blocks. - -2.1.11 - Driver internal cleanups. - - - Only build logfile.o if building the driver with read-write support. - - Really final white space cleanups. - - Use generic_ffs() instead of ffs() in logfile.c which allows the - log_page_size variable to be optimized by gcc into a constant. - - Rename uchar_t to ntfschar everywhere as uchar_t is unsigned 1-byte - char as defined by POSIX and as found on some systems. - -2.1.10 - Force read-only (re)mounting of volumes with unsupported volume flags. - - - Finish off the white space cleanups (remove trailing spaces, etc). - - Clean up ntfs_fill_super() and ntfs_read_inode_mount() by removing - the kludges around the first iget(). Instead of (re)setting ->s_op - we have the $MFT inode set up by explicit new_inode() / set ->i_ino / - insert_inode_hash() / call ntfs_read_inode_mount() directly. This - kills the need for second super_operations and allows to return error - from ntfs_read_inode_mount() without resorting to ugly "poisoning" - tricks. (Al Viro) - - Force read-only (re)mounting if any of the following bits are set in - the volume information flags: - VOLUME_IS_DIRTY, VOLUME_RESIZE_LOG_FILE, - VOLUME_UPGRADE_ON_MOUNT, VOLUME_DELETE_USN_UNDERWAY, - VOLUME_REPAIR_OBJECT_ID, VOLUME_MODIFIED_BY_CHKDSK - To make this easier we define VOLUME_MUST_MOUNT_RO_MASK with all the - above bits set so the test is made easy. - -2.1.9 - Fix two bugs in decompression engine. - - - Fix a bug where we would not always detect that we have reached the - end of a compression block because we were ending at minus one byte - which is effectively the same as being at the end. The fix is to - check whether the uncompressed buffer has been fully filled and if so - we assume we have reached the end of the compression block. A big - thank you to Marcin GibuÅ‚a for the bug report, the assistance in - tracking down the bug and testing the fix. - - Fix a possible bug where when a compressed read is truncated to the - end of the file, the offset inside the last page was not truncated. - -2.1.8 - Handle $MFT mirror and $LogFile, improve time handling, and cleanups. - - - Use get_bh() instead of manual atomic_inc() in fs/ntfs/compress.c. - - Modify fs/ntfs/time.c::ntfs2utc(), get_current_ntfs_time(), and - utc2ntfs() to work with struct timespec instead of time_t on the - Linux UTC time side thus preserving the full precision of the NTFS - time and only loosing up to 99 nano-seconds in the Linux UTC time. - - Move fs/ntfs/time.c to fs/ntfs/time.h and make the time functions - static inline. - - Remove unused ntfs_dirty_inode(). - - Cleanup super operations declaration in fs/ntfs/super.c. - - Wrap flush_dcache_mft_record_page() in #ifdef NTFS_RW. - - Add NInoTestSetFoo() and NInoTestClearFoo() macro magic to - fs/ntfs/inode.h and use it to declare NInoTest{Set,Clear}Dirty. - - Move typedefs for ntfs_attr and test_t from fs/ntfs/inode.c to - fs/ntfs/inode.h so they can be used elsewhere. - - Determine the mft mirror size as the number of mirrored mft records - and store it in ntfs_volume->mftmirr_size (fs/ntfs/super.c). - - Load the mft mirror at mount time and compare the mft records stored - in it to the ones in the mft. Force a read-only mount if the two do - not match (fs/ntfs/super.c). - - Fix type casting related warnings on 64-bit architectures. Thanks - to Meelis Roos for reporting them. - - Move %L to %ll as %L is floating point and %ll is integer which is - what we want. - - Read the journal ($LogFile) and determine if the volume has been - shutdown cleanly and force a read-only mount if not (fs/ntfs/super.c - and fs/ntfs/logfile.c). This is a little bit of a crude check in - that we only look at the restart areas and not at the actual log - records so that there will be a very small number of cases where we - think that a volume is dirty when in fact it is clean. This should - only affect volumes that have not been shutdown cleanly and did not - have any pending, non-check-pointed i/o. - - If the $LogFile indicates a clean shutdown and a read-write (re)mount - is requested, empty $LogFile by overwriting it with 0xff bytes to - ensure that Windows cannot cause data corruption by replaying a stale - journal after Linux has written to the volume. - -2.1.7 - Enable NFS exporting of mounted NTFS volumes. - - - Set i_generation in the VFS inode from the seq_no of the NTFS inode. - - Make ntfs_lookup() NFS export safe, i.e. use d_splice_alias(), etc. - - Implement ->get_dentry() in fs/ntfs/namei.c::ntfs_get_dentry() as the - default doesn't allow inode number 0 which is a valid inode on NTFS - and even if it did allow that it uses iget() instead of ntfs_iget() - which makes it useless for us. - - Implement ->get_parent() in fs/ntfs/namei.c::ntfs_get_parent() as the - default just returns -EACCES which is not very useful. - - Define export operations (->s_export_op) for NTFS (ntfs_export_ops) - and set them up in the super block at mount time (super.c) this - allows mounted NTFS volumes to be exported via NFS. - - Add missing return -EOPNOTSUPP; in - fs/ntfs/aops.c::ntfs_commit_nonresident_write(). - - Enforce no atime and no dir atime updates at mount/remount time as - they are not implemented yet anyway. - - Move a few assignments in fs/ntfs/attrib.c::load_attribute_list() to - after a NULL check. Thanks to Dave Jones for pointing this out. - -2.1.6 - Fix minor bug in handling of compressed directories. - - - Fix bug in handling of compressed directories. A compressed - directory is not really compressed so when we set the ->i_blocks - field of a compressed directory inode we were setting it from the - non-existing field ni->itype.compressed.size which gave random - results... For directories we now always use ni->allocated_size. - -2.1.5 - Fix minor bug in attribute list attribute handling. - - - Fix bug in attribute list handling. Actually it is not as much a bug - as too much protection in that we were not allowing attribute lists - which waste space on disk while Windows XP clearly allows it and in - fact creates such attribute lists so our driver was failing. - - Update NTFS documentation ready for 2.6 kernel release. - -2.1.4 - Reduce compiler requirements. - - - Remove all uses of unnamed structs and unions in the driver to make - old and newer gcc versions happy. Makes it a bit uglier IMO but at - least people will stop hassling me about it. - -2.1.3 - Important bug fixes in corner cases. - - - super.c::parse_ntfs_boot_sector(): Correct the check for 64-bit - clusters. (Philipp Thomas) - - attrib.c::load_attribute_list(): Fix bug when initialized_size is a - multiple of the block_size but not the cluster size. (Szabolcs - Szakacsits <szaka@sienet.hu>) - -2.1.2 - Important bug fixes aleviating the hangs in statfs. - - - Fix buggy free cluster and free inode determination logic. - -2.1.1 - Minor updates. - - - Add handling for initialized_size != data_size in compressed files. - - Reduce function local stack usage from 0x3d4 bytes to just noise in - fs/ntfs/upcase.c. (Randy Dunlap <rddunlap@osdl.ord>) - - Remove compiler warnings for newer gcc. - - Pages are no longer kmapped by mm/filemap.c::generic_file_write() - around calls to ->{prepare,commit}_write. Adapt NTFS appropriately - in fs/ntfs/aops.c::ntfs_prepare_nonresident_write() by using - kmap_atomic(KM_USER0). - -2.1.0 - First steps towards write support: implement file overwrite. - - - Add configuration option for developmental write support with an - appropriately scary configuration help text. - - Initial implementation of fs/ntfs/aops.c::ntfs_writepage() and its - helper fs/ntfs/aops.c::ntfs_write_block(). This enables mmap(2) based - overwriting of existing files on ntfs. Note: Resident files are - only written into memory, and not written out to disk at present, so - avoid writing to files smaller than about 1kiB. - - Initial implementation of fs/ntfs/aops.c::ntfs_prepare_write(), its - helper fs/ntfs/aops.c::ntfs_prepare_nonresident_write() and their - counterparts, fs/ntfs/aops.c::ntfs_commit_write(), and - fs/ntfs/aops.c::ntfs_commit_nonresident_write(), respectively. Also, - add generic_file_write() to the ntfs file operations (fs/ntfs/file.c). - This enables write(2) based overwriting of existing files on ntfs. - Note: As with mmap(2) based overwriting, resident files are only - written into memory, and not written out to disk at present, so avoid - writing to files smaller than about 1kiB. - - Implement ->truncate (fs/ntfs/inode.c::ntfs_truncate()) and - ->setattr() (fs/ntfs/inode.c::ntfs_setattr()) inode operations for - files with the purpose of intercepting and aborting all i_size - changes which we do not support yet. ntfs_truncate() actually only - emits a warning message but AFAICS our interception of i_size changes - elsewhere means ntfs_truncate() never gets called for i_size changes. - It is only called from generic_file_write() when we fail in - ntfs_prepare_{,nonresident_}write() in order to discard any - instantiated buffers beyond i_size. Thus i_size is not actually - changed so our warning message is enough. Unfortunately it is not - possible to easily determine if i_size is being changed or not hence - we just emit an appropriately worded error message. - -2.0.25 - Small bug fixes and cleanups. - - - Unlock the page in an out of memory error code path in - fs/ntfs/aops.c::ntfs_read_block(). - - If fs/ntfs/aops.c::ntfs_read_page() is called on an uptodate page, - just unlock the page and return. (This can happen due to ->writepage - clearing PageUptodate() during write out of MstProtected() - attributes. - - Remove leaked write code again. - -2.0.24 - Cleanups. - - - Treat BUG_ON() as ASSERT() not VERIFY(), i.e. do not use side effects - inside BUG_ON(). (Adam J. Richter) - - Split logical OR expressions inside BUG_ON() into individual BUG_ON() - calls for improved debugging. (Adam J. Richter) - - Add errors flag to the ntfs volume state, accessed via - NVol{,Set,Clear}Errors(vol). - - Do not allow read-write remounts of read-only volumes with errors. - - Clarify comment for ntfs file operation sendfile which was added by - Christoph Hellwig a while ago (just using generic_file_sendfile()) - to say that ntfs ->sendfile is only used for the case where the - source data is on the ntfs partition and the destination is - somewhere else, i.e. nothing we need to concern ourselves with. - - Add generic_file_write() as our ntfs file write operation. - -2.0.23 - Major bug fixes (races, deadlocks, non-i386 architectures). - - - Massive internal locking changes to mft record locking. Fixes lock - recursion and replaces the mrec_lock read/write semaphore with a - mutex. Also removes the now superfluous mft_count. This fixes several - race conditions and deadlocks, especially in the future write code. - - Fix ntfs over loopback for compressed files by adding an - optimization barrier. (gcc was screwing up otherwise ?) - - Miscellaneous cleanups all over the code and a fix or two in error - handling code paths. - Thanks go to Christoph Hellwig for pointing out the following two: - - Remove now unused function fs/ntfs/malloc.h::vmalloc_nofs(). - - Fix ntfs_free() for ia64 and parisc by checking for VMALLOC_END, too. - -2.0.22 - Cleanups, mainly to ntfs_readdir(), and use C99 initializers. - - - Change fs/ntfs/dir.c::ntfs_reddir() to only read/write ->f_pos once - at entry/exit respectively. - - Use C99 initializers for structures. - - Remove unused variable blocks from fs/ntfs/aops.c::ntfs_read_block(). - -2.0.21 - Check for, and refuse to work with too large files/directories/volumes. - - - Limit volume size at mount time to 2TiB on architectures where - unsigned long is 32-bits (fs/ntfs/super.c::parse_ntfs_boot_sector()). - This is the most we can do without overflowing the 32-bit limit of - the block device size imposed on us by sb_bread() and sb_getblk() - for the time being. - - Limit file/directory size at open() time to 16TiB on architectures - where unsigned long is 32-bits (fs/ntfs/file.c::ntfs_file_open() and - fs/ntfs/dir.c::ntfs_dir_open()). This is the most we can do without - overflowing the page cache page index. - -2.0.20 - Support non-resident directory index bitmaps, fix page leak in readdir. - - - Move the directory index bitmap to use an attribute inode instead of - having special fields for it inside the ntfs inode structure. This - means that the index bitmaps now use the page cache for i/o, too, - and also as a side effect we get support for non-resident index - bitmaps for free. - - Simplify/cleanup error handling in fs/ntfs/dir.c::ntfs_readdir() and - fix a page leak that manifested itself in some cases. - - Add fs/ntfs/inode.c::ntfs_put_inode(), which we need to release the - index bitmap inode on the final iput(). - -2.0.19 - Fix race condition, improvements, and optimizations in i/o interface. - - - Apply block optimization added to fs/ntfs/aops.c::ntfs_read_block() - to fs/ntfs/compress.c::ntfs_file_read_compressed_block() as well. - - Drop the "file" from ntfs_file_read_compressed_block(). - - Rename fs/ntfs/aops.c::ntfs_enb_buffer_read_async() to - ntfs_end_buffer_async_read() (more like the fs/buffer.c counterpart). - - Update ntfs_end_buffer_async_read() with the improved logic from - its updated counterpart fs/buffer.c::end_buffer_async_read(). Apply - further logic improvements to better determine when we set PageError. - - Update submission of buffers in fs/ntfs/aops.c::ntfs_read_block() to - check for the buffers being uptodate first in line with the updated - fs/buffer.c::block_read_full_page(). This plugs a small race - condition. - -2.0.18 - Fix race condition in reading of compressed files. - - - There was a narrow window between checking a buffer head for being - uptodate and locking it in ntfs_file_read_compressed_block(). We now - lock the buffer and then check whether it is uptodate or not. - -2.0.17 - Cleanups and optimizations - shrinking the ToDo list. - - - Modify fs/ntfs/inode.c::ntfs_read_locked_inode() to return an error - code and update callers, i.e. ntfs_iget(), to pass that error code - up instead of just using -EIO. - - Modifications to super.c to ensure that both mount and remount - cannot set any write related options when the driver is compiled - read-only. - - Optimize block resolution in fs/ntfs/aops.c::ntfs_read_block() to - cache the current runlist element. This should improve performance - when reading very large and/or very fragmented data. - -2.0.16 - Convert access to $MFT/$BITMAP to attribute inode API. - - - Fix a stupid bug introduced in 2.0.15 where we were unmapping the - wrong inode in fs/ntfs/inode.c::ntfs_attr_iget(). - - Fix debugging check in fs/ntfs/aops.c::ntfs_read_block(). - - Convert $MFT/$BITMAP access to attribute inode API and remove all - remnants of the ugly mftbmp address space and operations hack. This - means we finally have only one readpage function as well as only one - async io completion handler. Yey! The mft bitmap is now just an - attribute inode and is accessed from vol->mftbmp_ino just as if it - were a normal file. Fake inodes rule. (-: - -2.0.15 - Fake inodes based attribute i/o via the pagecache, fixes and cleanups. - - - Fix silly bug in fs/ntfs/super.c::parse_options() which was causing - remounts to fail when the partition had an entry in /etc/fstab and - the entry specified the nls= option. - - Apply same macro magic used in fs/ntfs/inode.h to fs/ntfs/volume.h to - expand all the helper functions NVolFoo(), NVolSetFoo(), and - NVolClearFoo(). - - Move copyright statement from driver initialisation message to - module description (fs/super.c). This makes the initialisation - message fit on one line and fits in better with rest of kernel. - - Update fs/ntfs/attrib.c::map_run_list() to work on both real and - attribute inodes, and both for files and directories. - - Implement fake attribute inodes allowing all attribute i/o to go via - the page cache and to use all the normal vfs/mm functionality: - - Add ntfs_attr_iget() and its helper ntfs_read_locked_attr_inode() - to fs/ntfs/inode.c. - - Add needed cleanup code to ntfs_clear_big_inode(). - - Merge address space operations for files and directories (aops.c), - now just have ntfs_aops: - - Rename: - end_buffer_read_attr_async() -> ntfs_end_buffer_read_async(), - ntfs_attr_read_block() -> ntfs_read_block(), - ntfs_file_read_page() -> ntfs_readpage(). - - Rewrite fs/ntfs/aops.c::ntfs_readpage() to work on both real and - attribute inodes, and both for files and directories. - - Remove obsolete fs/ntfs/aops.c::ntfs_mst_readpage(). - -2.0.14 - Run list merging code cleanup, minor locking changes, typo fixes. - - - Change fs/ntfs/super.c::ntfs_statfs() to not rely on BKL by moving - the locking out of super.c::get_nr_free_mft_records() and taking and - dropping the mftbmp_lock rw_semaphore in ntfs_statfs() itself. - - Bring attribute runlist merging code (fs/ntfs/attrib.c) in sync with - current userspace ntfs library code. This means that if a merge - fails the original runlists are always left unmodified instead of - being silently corrupted. - - Misc typo fixes. - -2.0.13 - Use iget5_locked() in preparation for fake inodes and small cleanups. - - - Remove nr_mft_bits and the now superfluous union with nr_mft_records - from ntfs_volume structure. - - Remove nr_lcn_bits and the now superfluous union with nr_clusters - from ntfs_volume structure. - - Use iget5_locked() and friends instead of conventional iget(). Wrap - the call in fs/ntfs/inode.c::ntfs_iget() and update callers of iget() - to use ntfs_iget(). Leave only one iget() call at mount time so we - don't need an ntfs_iget_mount(). - - Change fs/ntfs/inode.c::ntfs_new_extent_inode() to take mft_no as an - additional argument. - -2.0.12 - Initial cleanup of address space operations following 2.0.11 changes. - - - Merge fs/ntfs/aops.c::end_buffer_read_mst_async() and - fs/ntfs/aops.c::end_buffer_read_file_async() into one function - fs/ntfs/aops.c::end_buffer_read_attr_async() using NInoMstProtected() - to determine whether to apply mst fixups or not. - - Above change allows merging fs/ntfs/aops.c::ntfs_file_read_block() - and fs/ntfs/aops.c::ntfs_mst_readpage() into one function - fs/ntfs/aops.c::ntfs_attr_read_block(). Also, create a tiny wrapper - fs/ntfs/aops.c::ntfs_mst_readpage() to transform the parameters from - the VFS readpage function prototype to the ntfs_attr_read_block() - function prototype. - -2.0.11 - Initial preparations for fake inode based attribute i/o. - - - Move definition of ntfs_inode_state_bits to fs/ntfs/inode.h and - do some macro magic (adapted from include/linux/buffer_head.h) to - expand all the helper functions NInoFoo(), NInoSetFoo(), and - NInoClearFoo(). - - Add new flag to ntfs_inode_state_bits: NI_Sparse. - - Add new fields to ntfs_inode structure to allow use of fake inodes - for attribute i/o: type, name, name_len. Also add new state bits: - NI_Attr, which, if set, indicates the inode is a fake inode, and - NI_MstProtected, which, if set, indicates the attribute uses multi - sector transfer protection, i.e. fixups need to be applied after - reads and before/after writes. - - Rename fs/ntfs/inode.c::ntfs_{new,clear,destroy}_inode() to - ntfs_{new,clear,destroy}_extent_inode() and update callers. - - Use ntfs_clear_extent_inode() in fs/ntfs/inode.c::__ntfs_clear_inode() - instead of ntfs_destroy_extent_inode(). - - Cleanup memory deallocations in {__,}ntfs_clear_{,big_}inode(). - - Make all operations on ntfs inode state bits use the NIno* functions. - - Set up the new ntfs inode fields and state bits in - fs/ntfs/inode.c::ntfs_read_inode() and add appropriate cleanup of - allocated memory to __ntfs_clear_inode(). - - Cleanup ntfs_inode structure a bit for better ordering of elements - w.r.t. their size to allow better packing of the structure in memory. - -2.0.10 - There can only be 2^32 - 1 inodes on an NTFS volume. - - - Add check at mount time to verify that the number of inodes on the - volume does not exceed 2^32 - 1, which is the maximum allowed for - NTFS according to Microsoft. - - Change mft_no member of ntfs_inode structure to be unsigned long. - Update all users. This makes ntfs_inode->mft_no just a copy of struct - inode->i_ino. But we can't just always use struct inode->i_ino and - remove mft_no because extent inodes do not have an attached struct - inode. - -2.0.9 - Decompression engine now uses a single buffer and other cleanups. - - - Change decompression engine to use a single buffer protected by a - spin lock instead of per-CPU buffers. (Rusty Russell) - - Do not update cb_pos when handling a partial final page during - decompression of a sparse compression block, as the value is later - reset without being read/used. (Rusty Russell) - - Switch to using the new KM_BIO_SRC_IRQ for atomic kmap()s. (Andrew - Morton) - - Change buffer size in ntfs_readdir()/ntfs_filldir() to use - NLS_MAX_CHARSET_SIZE which makes the buffers almost 1kiB each but - it also makes everything safer so it is a good thing. - - Miscellaneous minor cleanups to comments. - -2.0.8 - Major updates for handling of case sensitivity and dcache aliasing. - - Big thanks go to Al Viro and other inhabitants of #kernel for investing - their time to discuss the case sensitivity and dcache aliasing issues. - - - Remove unused source file fs/ntfs/attraops.c. - - Remove show_inodes mount option(s), thus dropping support for - displaying of short file names. - - Remove deprecated mount option posix. - - Restore show_sys_files mount option. - - Add new mount option case_sensitive, to determine if the driver - treats file names as case sensitive or not. If case sensitive, create - file names in the POSIX namespace. Otherwise create file names in the - LONG/WIN32 namespace. Note, files remain accessible via their short - file name, if it exists. - - Remove really dumb logic bug in boot sector recovery code. - - Fix dcache aliasing issues wrt short/long file names via changes - to fs/ntfs/dir.c::ntfs_lookup_inode_by_name() and - fs/ntfs/namei.c::ntfs_lookup(): - - Add additional argument to ntfs_lookup_inode_by_name() in which we - return information about the matching file name if the case is not - matching or the match is a short file name. See comments above the - function definition for details. - - Change ntfs_lookup() to only create dcache entries for the correctly - cased file name and only for the WIN32 namespace counterpart of DOS - namespace file names. This ensures we have only one dentry per - directory and also removes all dcache aliasing issues between short - and long file names once we add write support. See comments above - function for details. - - Fix potential 1 byte overflow in fs/ntfs/unistr.c::ntfs_ucstonls(). - -2.0.7 - Minor cleanups and updates for changes in core kernel code. - - - Remove much of the NULL struct element initializers. - - Various updates to make compatible with recent kernels. - - Remove defines of MAX_BUF_PER_PAGE and include linux/buffer_head.h - in fs/ntfs/ntfs.h instead. - - Remove no longer needed KERNEL_VERSION checks. We are now in the - kernel proper so they are no longer needed. - -2.0.6 - Major bugfix to make compatible with other kernel changes. - - - Initialize the mftbmp address space properly now that there are more - fields in the struct address_space. This was leading to hangs and - oopses on umount since 2.5.12 because of changes to other parts of - the kernel. We probably want a kernel generic init_address_space() - function... - - Drop BKL from ntfs_readdir() after consultation with Al Viro. The - only caller of ->readdir() is vfs_readdir() which holds i_sem during - the call, and i_sem is sufficient protection against changes in the - directory inode (including ->i_size). - - Use generic_file_llseek() for directories (as opposed to - default_llseek()) as this downs i_sem instead of the BKL which is - what we now need for exclusion against ->f_pos changes considering we - no longer take the BKL in ntfs_readdir(). - -2.0.5 - Major bugfix. Buffer overflow in extent inode handling. - - - No need to set old blocksize in super.c::ntfs_fill_super() as the - VFS does so via invocation of deactivate_super() calling - fs->fill_super() calling block_kill_super() which does it. - - BKL moved from VFS into dir.c::ntfs_readdir(). (Linus Torvalds) - -> Do we really need it? I don't think so as we have exclusion on - the directory ntfs_inode rw_semaphore mrec_lock. We mmight have to - move the ->f_pos accesses under the mrec_lock though. Check this... - - Fix really, really, really stupid buffer overflow in extent inode - handling in mft.c::map_extent_mft_record(). - -2.0.4 - Cleanups and updates for kernel 2.5.11. - - - Add documentation on how to use the MD driver to be able to use NTFS - stripe and volume sets in Linux and generally cleanup documentation - a bit. - Remove all uses of kdev_t in favour of struct block_device *: - - Change compress.c::ntfs_file_read_compressed_block() to use - sb_getblk() instead of getblk(). - - Change super.c::ntfs_fill_super() to use bdev_hardsect_size() instead - of get_hardsect_size(). - - No need to get old blocksize in super.c::ntfs_fill_super() as - fs/super.c::get_sb_bdev() already does this. - - Set bh->b_bdev instead of bh->b_dev throughout aops.c. - -2.0.3 - Small bug fixes, cleanups, and performance improvements. - - - Remove some dead code from mft.c. - - Optimize readpage and read_block functions throughout aops.c so that - only initialized blocks are read. Non-initialized ones have their - buffer head mapped, zeroed, and set up to date, without scheduling - any i/o. Thanks to Al Viro for advice on how to avoid the device i/o. - Thanks go to Andrew Morton for spotting the below: - - Fix buglet in allocate_compression_buffers() error code path. - - Call flush_dcache_page() after modifying page cache page contents in - ntfs_file_readpage(). - - Check for existence of page buffers throughout aops.c before calling - create_empty_buffers(). This happens when an I/O error occurs and the - read is retried. (It also happens once writing is implemented so that - needed doing anyway but I had left it for later...) - - Don't BUG_ON() uptodate and/or mapped buffers throughout aops.c in - readpage and read_block functions. Reasoning same as above (i.e. I/O - error retries and future write code paths.) - -2.0.2 - Minor updates and cleanups. - - - Cleanup: rename mst.c::__post_read_mst_fixup to post_write_mst_fixup - and cleanup the code a bit, removing the unused size parameter. - - Change default fmask to 0177 and update documentation. - - Change attrib.c::get_attr_search_ctx() to return the search context - directly instead of taking the address of a pointer. A return value - of NULL means the allocation failed. Updated all callers - appropriately. - - Update to 2.5.9 kernel (preserving backwards compatibility) by - replacing all occurences of page->buffers with page_buffers(page). - - Fix minor bugs in runlist merging, also minor cleanup. - - Updates to bootsector layout and mft mirror contents descriptions. - - Small bug fix in error detection in unistr.c and some cleanups. - - Grow name buffer allocations in unistr.c in aligned mutlipled of 64 - bytes. - -2.0.1 - Minor updates. - - - Make default umask correspond to documentation. - - Improve documentation. - - Set default mode to include execute bit. The {u,f,d}mask can be used - to take it away if desired. This allows binaries to be executed from - a mounted ntfs partition. - -2.0.0 - New version number. Remove TNG from the name. Now in the kernel. - - - Add kill_super, just keeping up with the vfs changes in the kernel. - - Repeat some changes from tng-0.0.8 that somehow got lost on the way - from the CVS import into BitKeeper. - - Begin to implement proper handling of allocated_size vs - initialized_size vs data_size (i.e. i_size). Done are - mft.c::ntfs_mft_readpage(), aops.c::end_buffer_read_index_async(), - and attrib.c::load_attribute_list(). - - Lock the runlist in attrib.c::load_attribute_list() while using it. - - Fix memory leak in ntfs_file_read_compressed_block() and generally - clean up compress.c a little, removing some uncommented/unused debug - code. - - Tidy up dir.c a little bit. - - Don't bother getting the runlist in inode.c::ntfs_read_inode(). - - Merge mft.c::ntfs_mft_readpage() and aops.c::ntfs_index_readpage() - creating aops.c::ntfs_mst_readpage(), improving the handling of - holes and overflow in the process and implementing the correct - equivalent of ntfs_file_get_block() in ntfs_mst_readpage() itself. - I am aiming for correctness at the moment. Modularisation can come - later. - - Rename aops.c::end_buffer_read_index_async() to - end_buffer_read_mst_async() and optimize the overflow checking and - handling. - - Use the host of the mftbmp address space mapping to hold the ntfs - volume. This is needed so the async i/o completion handler can - retrieve a pointer to the volume. Hopefully this will not cause - problems elsewhere in the kernel... Otherwise will need to use a - fake inode. - - Complete implementation of proper handling of allocated_size vs - initialized_size vs data_size (i.e. i_size) in whole driver. - Basically aops.c is now completely rewritten. - - Change NTFS driver name to just NTFS and set version number to 2.0.0 - to make a clear distinction from the old driver which is still on - version 1.1.22. - -tng-0.0.8 - 08/03/2002 - Now using BitKeeper, http://linux-ntfs.bkbits.net/ - - - Replace bdevname(sb->s_dev) with sb->s_id. - - Remove now superfluous new-line characters in all callers of - ntfs_debug(). - - Apply kludge in ntfs_read_inode(), setting i_nlink to 1 for - directories. Without this the "find" utility gets very upset which is - fair enough as Linux/Unix do not support directory hard links. - - Further runlist merging work. (Richard Russon) - - Backwards compatibility for gcc-2.95. (Richard Russon) - - Update to kernel 2.5.5-pre1 and rediff the now tiny patch. - - Convert to new filesystem declaration using ->ntfs_get_sb() and - replacing ntfs_read_super() with ntfs_fill_super(). - - Set s_maxbytes to MAX_LFS_FILESIZE to avoid page cache page index - overflow on 32-bit architectures. - - Cleanup upcase loading code to use ntfs_(un)map_page(). - - Disable/reenable preemtion in critical sections of compession engine. - - Replace device size determination in ntfs_fill_super() with - sb->s_bdev->bd_inode->i_size (in bytes) and remove now superfluous - function super.c::get_nr_blocks(). - - Implement a mount time option (show_inodes) allowing choice of which - types of inode names readdir() returns and modify ntfs_filldir() - accordingly. There are several parameters to show_inodes: - system: system files - win32: long file names (including POSIX file names) [DEFAULT] - long: same as win32 - dos: short file names only (excluding POSIX file names) - short: same as dos - posix: same as both win32 and dos - all: all file names - Note that the options are additive, i.e. specifying: - -o show_inodes=system,show_inodes=win32,show_inodes=dos - is the same as specifying: - -o show_inodes=all - Note that the "posix" and "all" options will show all directory - names, BUT the link count on each directory inode entry is set to 1, - due to Linux not supporting directory hard links. This may well - confuse some userspace applications, since the directory names will - have the same inode numbers. Thus it is NOT advisable to use the - "posix" or "all" options. We provide them only for completeness sake. - - Add copies of allocated_size, initialized_size, and compressed_size to - the ntfs inode structure and set them up in - inode.c::ntfs_read_inode(). These reflect the unnamed data attribute - for files and the index allocation attribute for directories. - - Add copies of allocated_size and initialized_size to ntfs inode for - $BITMAP attribute of large directories and set them up in - inode.c::ntfs_read_inode(). - - Add copies of allocated_size and initialized_size to ntfs volume for - $BITMAP attribute of $MFT and set them up in - super.c::load_system_files(). - - Parse deprecated ntfs driver options (iocharset, show_sys_files, - posix, and utf8) and tell user what the new options to use are. Note - we still do support them but they will be removed with kernel 2.7.x. - - Change all occurences of integer long long printf formatting to hex - as printk() will not support long long integer format if/when the - div64 patch goes into the kernel. - - Make slab caches have stable names and change the names to what they - were intended to be. These changes are required/made possible by the - new slab cache name handling which removes the length limitation by - requiring the caller of kmem_cache_create() to supply a stable name - which is then referenced but not copied. - - Rename run_list structure to run_list_element and create a new - run_list structure containing a pointer to a run_list_element - structure and a read/write semaphore. Adapt all users of runlists - to new scheme and take and release the lock as needed. This fixes a - nasty race as the run_list changes even when inodes are locked for - reading and even when the inode isn't locked at all, so we really - needed the serialization. We use a semaphore rather than a spinlock - as memory allocations can sleep and doing everything GFP_ATOMIC - would be silly. - - Cleanup read_inode() removing all code checking for lowest_vcn != 0. - This can never happen due to the nature of lookup_attr() and how we - support attribute lists. If it did happen it would imply the inode - being corrupt. - - Check for lowest_vcn != 0 in ntfs_read_inode() and mark the inode as - bad if found. - - Update to 2.5.6-pre2 changes in struct address_space. - - Use parent_ino() when accessing d_parent inode number in dir.c. - - Import Sourceforge CVS repository into BitKeeper repository: - http://linux-ntfs.bkbits.net/ntfs-tng-2.5 - - Update fs/Makefile, fs/Config.help, fs/Config.in, and - Documentation/filesystems/ntfs.txt for NTFS TNG. - - Create kernel configuration option controlling whether debugging - is enabled or not. - - Add the required export of end_buffer_io_sync() from the patches - directory to the kernel code. - - Update inode.c::ntfs_show_options() with show_inodes mount option. - - Update errors mount option. - -tng-0.0.7 - 13/02/2002 - The driver is now feature complete for read-only! - - - Cleanup mft.c and it's debug/error output in particular. Fix a minor - bug in mapping of extent inodes. Update all the comments to fit all - the recent code changes. - - Modify vcn_to_lcn() to cope with entirely unmapped runlists. - - Cleanups in compress.c, mostly comments and folding help. - - Implement attrib.c::map_run_list() as a generic helper. - - Make compress.c::ntfs_file_read_compressed_block() use map_run_list() - thus making code shorter and enabling attribute list support. - - Cleanup incorrect use of [su]64 with %L printf format specifier in - all source files. Type casts to [unsigned] long long added to correct - the mismatches (important for architectures which have long long not - being 64 bits). - - Merge async io completion handlers for directory indexes and $MFT - data into one by setting the index_block_size{_bits} of the ntfs - inode for $MFT to the mft_record_size{_bits} of the ntfs_volume. - - Cleanup aops.c, update comments. - - Make ntfs_file_get_block() use map_run_list() so all files now - support attribute lists. - - Make ntfs_dir_readpage() almost verbatim copy of - block_read_full_page() by using ntfs_file_get_block() with only real - difference being the use of our own async io completion handler - rather than the default one, thus reducing the amount of code and - automatically enabling attribute list support for directory indices. - - Fix bug in load_attribute_list() - forgot to call brelse in error - code path. - - Change parameters to find_attr() and lookup_attr(). We no longer - pass in the upcase table and its length. These can be gotten from - ctx->ntfs_ino->vol->upcase{_len}. Update all callers. - - Cleanups in attrib.c. - - Implement merging of runlists, attrib.c::merge_run_lists() and its - helpers. (Richard Russon) - - Attribute lists part 2, attribute extents and multi part runlists: - enable proper support for LCN_RL_NOT_MAPPED and automatic mapping of - further runlist parts via attrib.c::map_run_list(). - - Tiny endianness bug fix in decompress_mapping_pairs(). - -tng-0.0.6 - Encrypted directories, bug fixes, cleanups, debugging enhancements. - - - Enable encrypted directories. (Their index root is marked encrypted - to indicate that new files in that directory should be created - encrypted.) - - Fix bug in NInoBmpNonResident() macro. (Cut and paste error.) - - Enable $Extend system directory. Most (if not all) extended system - files do not have unnamed data attributes so ntfs_read_inode() had to - special case them but that is ok, as the special casing recovery - happens inside an error code path so there is zero slow down in the - normal fast path. The special casing is done by introducing a new - function inode.c::ntfs_is_extended_system_file() which checks if any - of the hard links in the inode point to $Extend as being their parent - directory and if they do we assume this is an extended system file. - - Create a sysctl/proc interface to allow {dis,en}abling of debug output - when compiled with -DDEBUG. Default is debug messages to be disabled. - To enable them, one writes a non-zero value to /proc/sys/fs/ntfs-debug - (if /proc is enabled) or uses sysctl(2) to effect the same (if sysctl - interface is enabled). Inspired by old ntfs driver. - - Add debug_msgs insmod/kernel boot parameter to set whether debug - messages are {dis,en}abled. This is useful to enable debug messages - during ntfs initialization and is the only way to activate debugging - when the sysctl interface is not enabled. - - Cleanup debug output in various places. - - Remove all dollar signs ($) from the source (except comments) to - enable compilation on architectures whose gcc compiler does not - support dollar signs in the names of variables/constants. Attribute - types now start with AT_ instead of $ and $I30 is now just I30. - - Cleanup ntfs_lookup() and add consistency check of sequence numbers. - - Load complete runlist for $MFT/$BITMAP during mount and cleanup - access functions. This means we now cope with $MFT/$BITMAP being - spread accross several mft records. - - Disable modification of mft_zone_multiplier on remount. We can always - reenable this later on if we really want to, but we will need to make - sure we readjust the mft_zone size / layout accordingly. - -tng-0.0.5 - Modernize for 2.5.x and further in line-ing with Al Viro's comments. - - - Use sb_set_blocksize() instead of set_blocksize() and verify the - return value. - - Use sb_bread() instead of bread() throughout. - - Add index_vcn_size{_bits} to ntfs_inode structure to store the size - of a directory index block vcn. Apply resulting simplifications in - dir.c everywhere. - - Fix a small bug somewhere (but forgot what it was). - - Change ntfs_{debug,error,warning} to enable gcc to do type checking - on the printf-format parameter list and fix bugs reported by gcc - as a result. (Richard Russon) - - Move inode allocation strategy to Al's new stuff but maintain the - divorce of ntfs_inode from struct inode. To achieve this we have two - separate slab caches, one for big ntfs inodes containing a struct - inode and pure ntfs inodes and at the same time fix some faulty - error code paths in ntfs_read_inode(). - - Show mount options in proc (inode.c::ntfs_show_options()). - -tng-0.0.4 - Big changes, getting in line with Al Viro's comments. - - - Modified (un)map_mft_record functions to be common for read and write - case. To specify which is which, added extra parameter at front of - parameter list. Pass either READ or WRITE to this, each has the - obvious meaning. - - General cleanups to allow for easier folding in vi. - - attrib.c::decompress_mapping_pairs() now accepts the old runlist - argument, and invokes attrib.c::merge_run_lists() to merge the old - and the new runlists. - - Removed attrib.c::find_first_attr(). - - Implemented loading of attribute list and complete runlist for $MFT. - This means we now cope with $MFT being spread across several mft - records. - - Adapt to 2.5.2-pre9 and the changed create_empty_buffers() syntax. - - Adapt major/minor/kdev_t/[bk]devname stuff to new 2.5.x kernels. - - Make ntfs_volume be allocated via kmalloc() instead of using a slab - cache. There are too little ntfs_volume structures at any one time - to justify a private slab cache. - - Fix bogus kmap() use in async io completion. Now use kmap_atomic(). - Use KM_BIO_IRQ on advice from IRC/kernel... - - Use ntfs_map_page() in map_mft_record() and create ->readpage method - for reading $MFT (ntfs_mft_readpage). In the process create dedicated - address space operations (ntfs_mft_aops) for $MFT inode mapping. Also - removed the now superfluous exports from the kernel core patch. - - Fix a bug where kfree() was used insted of ntfs_free(). - - Change map_mft_record() to take ntfs_inode as argument instead of - vfs inode. Dito for unmap_mft_record(). Adapt all callers. - - Add pointer to ntfs_volume to ntfs_inode. - - Add mft record number and sequence number to ntfs_inode. Stop using - i_ino and i_generation for in-driver purposes. - - Implement attrib.c::merge_run_lists(). (Richard Russon) - - Remove use of proper inodes by extent inodes. Move i_ino and - i_generation to ntfs_inode to do this. Apply simplifications that - result and remove iget_no_wait(), etc. - - Pass ntfs_inode everywhere in the driver (used to be struct inode). - - Add reference counting in ntfs_inode for the ntfs inode itself and - for the mapped mft record. - - Extend mft record mapping so we can (un)map extent mft records (new - functions (un)map_extent_mft_record), and so mappings are reference - counted and don't have to happen twice if already mapped - just ref - count increases. - - Add -o iocharset as alias to -o nls for backwards compatibility. - - The latest core patch is now tiny. In fact just a single additional - export is necessary over the base kernel. - -tng-0.0.3 - Cleanups, enhancements, bug fixes. - - - Work on attrib.c::decompress_mapping_pairs() to detect base extents - and setup the runlist appropriately using knowledge provided by the - sizes in the base attribute record. - - Balance the get_/put_attr_search_ctx() calls so we don't leak memory - any more. - - Introduce ntfs_malloc_nofs() and ntfs_free() to allocate/free a single - page or use vmalloc depending on the amount of memory requested. - - Cleanup error output. The __FUNCTION__ "(): " is now added - automatically. Introduced a new header file debug.h to support this - and also moved ntfs_debug() function into it. - - Make reading of compressed files more intelligent and especially get - rid of the vmalloc_nofs() from readpage(). This now uses per CPU - buffers (allocated at first mount with cluster size <= 4kiB and - deallocated on last umount with cluster size <= 4kiB), and - asynchronous io for the compressed data using a list of buffer heads. - Er, we use synchronous io as async io only works on whole pages - covered by buffers and not on individual buffer heads... - - Bug fix for reading compressed files with sparse compression blocks. - -tng-0.0.2 - Now handles larger/fragmented/compressed volumes/files/dirs. - - - Fixed handling of directories when cluster size exceeds index block - size. - - Hide DOS only name space directory entries from readdir() but allow - them in lookup(). This should fix the problem that Linux doesn't - support directory hard links, while still allowing access to entries - via their short file name. This also has the benefit of mimicking - what Windows users are used to, so it is the ideal solution. - - Implemented sync_page everywhere so no more hangs in D state when - waiting for a page. - - Stop using bforget() in favour of brelse(). - - Stop locking buffers unnecessarily. - - Implemented compressed files (inode->mapping contains uncompressed - data, raw compressed data is currently bread() into a vmalloc()ed - memory buffer). - - Enable compressed directories. (Their index root is marked compressed - to indicate that new files in that directory should be created - compressed.) - - Use vsnprintf rather than vsprintf in the ntfs_error and ntfs_warning - functions. (Thanks to Will Dyson for pointing this out.) - - Moved the ntfs_inode and ntfs_volume (the former ntfs_inode_info and - ntfs_sb_info) out of the common inode and super_block structures and - started using the generic_ip and generic_sbp pointers instead. This - makes ntfs entirely private with respect to the kernel tree. - - Detect compiler version and abort with error message if gcc less than - 2.96 is used. - - Fix bug in name comparison function in unistr.c. - - Implement attribute lists part 1, the infrastructure: search contexts - and operations, find_external_attr(), lookup_attr()) and make the - code use the infrastructure. - - Fix stupid buffer overflow bug that became apparent on larger run - list containing attributes. - - Fix bugs in readdir() that became apparent on larger directories. - - The driver is now really useful and survives the test - find . -type f -exec md5sum "{}" \; - without any error messages on a over 1GiB sized partition with >16k - files on it, including compressed files and directories and many files - and directories with attribute lists. - -tng-0.0.1 - The first useful version. - - - Added ntfs_lookup(). - - Added default upcase generation and handling. - - Added compile options to be shown on module init. - - Many bug fixes that were "hidden" before. - - Update to latest kernel. - - Added ntfs_readdir(). - - Added file operations for mmap(), read(), open() and llseek(). We just - use the generic ones. The whole point of going through implementing - readpage() methods and where possible get_block() call backs is that - this allows us to make use of the generic high level methods provided - by the kernel. - - The driver is now actually useful! Yey. (-: It undoubtedly has got bugs - though and it doesn't implement accesssing compressed files yet. Also, - accessing files with attribute list attributes is not implemented yet - either. But for small or simple filesystems it should work and allow - you to list directories, use stat on directory entries and the file - system, open, read, mmap and llseek around in files. A big mile stone - has been reached! - -tng-0.0.0 - Initial version tag. - - Initial driver implementation. The driver can mount and umount simple - NTFS filesystems (i.e. ones without attribute lists in the system - files). If the mount fails there might be problems in the error handling - code paths, so be warned. Otherwise it seems to be loading the system - files nicely and the mft record read mapping/unmapping seems to be - working nicely, too. Proof of inode metadata in the page cache and non- - resident file unnamed stream data in the page cache concepts is thus - complete. diff --git a/fs/ntfs/Kconfig b/fs/ntfs/Kconfig new file mode 100644 index 00000000000..f5a868cc915 --- /dev/null +++ b/fs/ntfs/Kconfig @@ -0,0 +1,78 @@ +config NTFS_FS + tristate "NTFS file system support" + select NLS + help + NTFS is the file system of Microsoft Windows NT, 2000, XP and 2003. + + Saying Y or M here enables read support. There is partial, but + safe, write support available. For write support you must also + say Y to "NTFS write support" below. + + There are also a number of user-space tools available, called + ntfsprogs. These include ntfsundelete and ntfsresize, that work + without NTFS support enabled in the kernel. + + This is a rewrite from scratch of Linux NTFS support and replaced + the old NTFS code starting with Linux 2.5.11. A backport to + the Linux 2.4 kernel series is separately available as a patch + from the project web site. + + For more information see <file:Documentation/filesystems/ntfs.txt> + and <http://www.linux-ntfs.org/>. + + To compile this file system support as a module, choose M here: the + module will be called ntfs. + + If you are not using Windows NT, 2000, XP or 2003 in addition to + Linux on your computer it is safe to say N. + +config NTFS_DEBUG + bool "NTFS debugging support" + depends on NTFS_FS + help + If you are experiencing any problems with the NTFS file system, say + Y here. This will result in additional consistency checks to be + performed by the driver as well as additional debugging messages to + be written to the system log. Note that debugging messages are + disabled by default. To enable them, supply the option debug_msgs=1 + at the kernel command line when booting the kernel or as an option + to insmod when loading the ntfs module. Once the driver is active, + you can enable debugging messages by doing (as root): + echo 1 > /proc/sys/fs/ntfs-debug + Replacing the "1" with "0" would disable debug messages. + + If you leave debugging messages disabled, this results in little + overhead, but enabling debug messages results in very significant + slowdown of the system. + + When reporting bugs, please try to have available a full dump of + debugging messages while the misbehaviour was occurring. + +config NTFS_RW + bool "NTFS write support" + depends on NTFS_FS + help + This enables the partial, but safe, write support in the NTFS driver. + + The only supported operation is overwriting existing files, without + changing the file length. No file or directory creation, deletion or + renaming is possible. Note only non-resident files can be written to + so you may find that some very small files (<500 bytes or so) cannot + be written to. + + While we cannot guarantee that it will not damage any data, we have + so far not received a single report where the driver would have + damaged someones data so we assume it is perfectly safe to use. + + Note: While write support is safe in this version (a rewrite from + scratch of the NTFS support), it should be noted that the old NTFS + write support, included in Linux 2.5.10 and before (since 1997), + is not safe. + + This is currently useful with TopologiLinux. TopologiLinux is run + on top of any DOS/Microsoft Windows system without partitioning your + hard disk. Unlike other Linux distributions TopologiLinux does not + need its own partition. For more information see + <http://topologi-linux.sourceforge.net/> + + It is perfectly safe to say N here. diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile index 894b2b876d3..30206b23843 100644 --- a/fs/ntfs/Makefile +++ b/fs/ntfs/Makefile @@ -2,18 +2,13 @@ obj-$(CONFIG_NTFS_FS) += ntfs.o -ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \ - index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ - unistr.o upcase.o +ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \ + index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ + unistr.o upcase.o -EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.24\" +ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o -ifeq ($(CONFIG_NTFS_DEBUG),y) -EXTRA_CFLAGS += -DDEBUG -endif +ccflags-y := -DNTFS_VERSION=\"2.1.30\" +ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG +ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW -ifeq ($(CONFIG_NTFS_RW),y) -EXTRA_CFLAGS += -DNTFS_RW - -ntfs-objs += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o -endif diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 5e80c07c6a4..d267ea6aa1a 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -2,7 +2,7 @@ * aops.c - NTFS kernel address space operations and page cache handling. * Part of the Linux-NTFS project. * - * Copyright (c) 2001-2005 Anton Altaparmakov + * Copyright (c) 2001-2007 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -22,6 +22,8 @@ */ #include <linux/errno.h> +#include <linux/fs.h> +#include <linux/gfp.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/swap.h> @@ -85,17 +87,19 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) } /* Check for the current buffer head overflowing. */ if (unlikely(file_ofs + bh->b_size > init_size)) { - u8 *kaddr; int ofs; + void *kaddr; ofs = 0; if (file_ofs < init_size) ofs = init_size - file_ofs; - kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); + local_irq_save(flags); + kaddr = kmap_atomic(page); memset(kaddr + bh_offset(bh) + ofs, 0, bh->b_size - ofs); - kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); flush_dcache_page(page); + kunmap_atomic(kaddr); + local_irq_restore(flags); } } else { clear_buffer_uptodate(bh); @@ -142,11 +146,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) recs = PAGE_CACHE_SIZE / rec_size; /* Should have been verified before we got here... */ BUG_ON(!recs); - kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); + local_irq_save(flags); + kaddr = kmap_atomic(page); for (i = 0; i < recs; i++) post_read_mst_fixup((NTFS_RECORD*)(kaddr + i * rec_size), rec_size); - kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); + kunmap_atomic(kaddr); + local_irq_restore(flags); flush_dcache_page(page); if (likely(page_uptodate && !PageError(page))) SetPageUptodate(page); @@ -200,8 +206,8 @@ static int ntfs_read_block(struct page *page) /* $MFT/$DATA must have its complete runlist in memory at all times. */ BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni)); - blocksize_bits = VFS_I(ni)->i_blkbits; - blocksize = 1 << blocksize_bits; + blocksize = vol->sb->s_blocksize; + blocksize_bits = vol->sb->s_blocksize_bits; if (!page_has_buffers(page)) { create_empty_buffers(page, blocksize, 0); @@ -240,8 +246,7 @@ static int ntfs_read_block(struct page *page) rl = NULL; nr = i = 0; do { - u8 *kaddr; - int err; + int err = 0; if (unlikely(buffer_uptodate(bh))) continue; @@ -249,11 +254,10 @@ static int ntfs_read_block(struct page *page) arr[nr++] = bh; continue; } - err = 0; bh->b_bdev = vol->sb->s_bdev; /* Is the block within the allowed limits? */ if (iblock < lblock) { - BOOL is_retry = FALSE; + bool is_retry = false; /* Convert iblock into corresponding vcn and offset. */ vcn = (VCN)iblock << blocksize_bits >> @@ -291,7 +295,7 @@ lock_retry_remap: goto handle_hole; /* If first try and runlist unmapped, map and retry. */ if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { - is_retry = TRUE; + is_retry = true; /* * Attempt to map runlist, dropping lock for * the duration. @@ -335,10 +339,7 @@ handle_hole: bh->b_blocknr = -1UL; clear_buffer_mapped(bh); handle_zblock: - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + i * blocksize, 0, blocksize); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(page); + zero_user(page, i * blocksize, blocksize); if (likely(!err)) set_buffer_uptodate(bh); } while (i++, iblock++, (bh = bh->b_this_page) != head); @@ -400,7 +401,7 @@ static int ntfs_readpage(struct file *file, struct page *page) loff_t i_size; struct inode *vi; ntfs_inode *ni, *base_ni; - u8 *kaddr; + u8 *addr; ntfs_attr_search_ctx *ctx; MFT_RECORD *mrec; unsigned long flags; @@ -409,6 +410,15 @@ static int ntfs_readpage(struct file *file, struct page *page) retry_readpage: BUG_ON(!PageLocked(page)); + vi = page->mapping->host; + i_size = i_size_read(vi); + /* Is the page fully outside i_size? (truncate in progress) */ + if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT)) { + zero_user(page, 0, PAGE_CACHE_SIZE); + ntfs_debug("Read outside i_size - truncated?"); + goto done; + } /* * This can potentially happen because we clear PageUptodate() during * ntfs_writepage() of MstProtected() attributes. @@ -417,7 +427,6 @@ retry_readpage: unlock_page(page); return 0; } - vi = page->mapping->host; ni = NTFS_I(vi); /* * Only $DATA attributes can be encrypted and only unnamed $DATA @@ -455,10 +464,7 @@ retry_readpage: * ok to ignore the compressed flag here. */ if (unlikely(page->index > 0)) { - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr, 0, PAGE_CACHE_SIZE); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user(page, 0, PAGE_CACHE_SIZE); goto done; } if (!NInoAttr(ni)) @@ -498,15 +504,15 @@ retry_readpage: /* Race with shrinking truncate. */ attr_len = i_size; } - kaddr = kmap_atomic(page, KM_USER0); + addr = kmap_atomic(page); /* Copy the data to the page. */ - memcpy(kaddr, (u8*)ctx->attr + + memcpy(addr, (u8*)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset), attr_len); /* Zero the remainder of the page. */ - memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); + memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(addr); put_unm_err_out: ntfs_attr_put_search_ctx(ctx); unm_err_out: @@ -557,7 +563,7 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc) unsigned long flags; unsigned int blocksize, vcn_ofs; int err; - BOOL need_end_writeback; + bool need_end_writeback; unsigned char blocksize_bits; vi = page->mapping->host; @@ -569,10 +575,8 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc) BUG_ON(!NInoNonResident(ni)); BUG_ON(NInoMstProtected(ni)); - - blocksize_bits = vi->i_blkbits; - blocksize = 1 << blocksize_bits; - + blocksize = vol->sb->s_blocksize; + blocksize_bits = vol->sb->s_blocksize_bits; if (!page_has_buffers(page)) { BUG_ON(!PageUptodate(page)); create_empty_buffers(page, blocksize, @@ -627,7 +631,7 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc) rl = NULL; err = 0; do { - BOOL is_retry = FALSE; + bool is_retry = false; if (unlikely(block >= dblock)) { /* @@ -742,14 +746,14 @@ lock_retry_remap: unsigned long *bpos, *bend; /* Check if the buffer is zero. */ - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); bpos = (unsigned long *)(kaddr + bh_offset(bh)); bend = (unsigned long *)((u8*)bpos + blocksize); do { if (unlikely(*bpos)) break; } while (likely(++bpos < bend)); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (bpos == bend) { /* * Buffer is zero and sparse, no need to write @@ -769,7 +773,7 @@ lock_retry_remap: } /* If first try and runlist unmapped, map and retry. */ if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { - is_retry = TRUE; + is_retry = true; /* * Attempt to map runlist, dropping lock for * the duration. @@ -787,14 +791,9 @@ lock_retry_remap: * uptodate so it can get discarded by the VM. */ if (err == -ENOENT || lcn == LCN_ENOENT) { - u8 *kaddr; - bh->b_blocknr = -1; clear_buffer_dirty(bh); - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + bh_offset(bh), 0, blocksize); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(page); + zero_user(page, bh_offset(bh), blocksize); set_buffer_uptodate(bh); err = 0; continue; @@ -875,12 +874,12 @@ lock_retry_remap: set_page_writeback(page); /* Keeps try_to_free_buffers() away. */ /* Submit the prepared buffers for i/o. */ - need_end_writeback = TRUE; + need_end_writeback = true; do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { submit_bh(WRITE, bh); - need_end_writeback = FALSE; + need_end_writeback = false; } bh = next; } while (bh != head); @@ -933,7 +932,7 @@ static int ntfs_write_mst_block(struct page *page, runlist_element *rl; int i, nr_locked_nis, nr_recs, nr_bhs, max_bhs, bhs_per_rec, err, err2; unsigned bh_size, rec_size_bits; - BOOL sync, is_mft, page_is_dirty, rec_is_dirty; + bool sync, is_mft, page_is_dirty, rec_is_dirty; unsigned char bh_size_bits; ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " @@ -949,8 +948,8 @@ static int ntfs_write_mst_block(struct page *page, */ BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) || (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION))); - bh_size_bits = vi->i_blkbits; - bh_size = 1 << bh_size_bits; + bh_size = vol->sb->s_blocksize; + bh_size_bits = vol->sb->s_blocksize_bits; max_bhs = PAGE_CACHE_SIZE / bh_size; BUG_ON(!max_bhs); BUG_ON(max_bhs > MAX_BUF_PER_PAGE); @@ -976,10 +975,10 @@ static int ntfs_write_mst_block(struct page *page, rl = NULL; err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0; - page_is_dirty = rec_is_dirty = FALSE; + page_is_dirty = rec_is_dirty = false; rec_start_bh = NULL; do { - BOOL is_retry = FALSE; + bool is_retry = false; if (likely(block < rec_block)) { if (unlikely(block >= dblock)) { @@ -1010,10 +1009,10 @@ static int ntfs_write_mst_block(struct page *page, } if (!buffer_dirty(bh)) { /* Clean records are not written out. */ - rec_is_dirty = FALSE; + rec_is_dirty = false; continue; } - rec_is_dirty = TRUE; + rec_is_dirty = true; rec_start_bh = bh; } /* Need to map the buffer if it is not mapped already. */ @@ -1054,7 +1053,7 @@ lock_retry_remap: */ if (!is_mft && !is_retry && lcn == LCN_RL_NOT_MAPPED) { - is_retry = TRUE; + is_retry = true; /* * Attempt to map runlist, dropping * lock for the duration. @@ -1064,7 +1063,7 @@ lock_retry_remap: if (likely(!err2)) goto lock_retry_remap; if (err2 == -ENOMEM) - page_is_dirty = TRUE; + page_is_dirty = true; lcn = err2; } else { err2 = -EIO; @@ -1146,7 +1145,7 @@ lock_retry_remap: * means we need to redirty the page before * returning. */ - page_is_dirty = TRUE; + page_is_dirty = true; /* * Remove the buffers in this mft record from * the list of buffers to write. @@ -1196,7 +1195,7 @@ lock_retry_remap: tbh = bhs[i]; if (!tbh) continue; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) BUG(); /* The buffer dirty state is now irrelevant, just clean it. */ clear_buffer_dirty(tbh); @@ -1279,18 +1278,18 @@ unm_done: tni = locked_nis[nr_locked_nis]; /* Get the base inode. */ - down(&tni->extent_lock); + mutex_lock(&tni->extent_lock); if (tni->nr_extents >= 0) base_tni = tni; else { base_tni = tni->ext.base_ntfs_ino; BUG_ON(!base_tni); } - up(&tni->extent_lock); + mutex_unlock(&tni->extent_lock); ntfs_debug("Unlocking %s inode 0x%lx.", tni == base_tni ? "base" : "extent", tni->mft_no); - up(&tni->mrec_lock); + mutex_unlock(&tni->mrec_lock); atomic_dec(&tni->count); iput(VFS_I(base_tni)); } @@ -1357,7 +1356,7 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc) loff_t i_size; struct inode *vi = page->mapping->host; ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi); - char *kaddr; + char *addr; ntfs_attr_search_ctx *ctx = NULL; MFT_RECORD *m = NULL; u32 attr_len; @@ -1373,7 +1372,7 @@ retry_writepage: * The page may have dirty, unmapped buffers. Make them * freeable here, so the page does not leak. */ - block_invalidatepage(page, 0); + block_invalidatepage(page, 0, PAGE_CACHE_SIZE); unlock_page(page); ntfs_debug("Write outside i_size - truncated?"); return 0; @@ -1391,8 +1390,7 @@ retry_writepage: if (NInoEncrypted(ni)) { unlock_page(page); BUG_ON(ni->type != AT_DATA); - ntfs_debug("Denying write access to encrypted " - "file."); + ntfs_debug("Denying write access to encrypted file."); return -EACCES; } /* Compressed data streams are handled in compress.c. */ @@ -1420,10 +1418,7 @@ retry_writepage: if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) { /* The page straddles i_size. */ unsigned int ofs = i_size & ~PAGE_CACHE_MASK; - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(page); + zero_user_segment(page, ofs, PAGE_CACHE_SIZE); } /* Handle mst protected attributes. */ if (NInoMstProtected(ni)) @@ -1500,16 +1495,16 @@ retry_writepage: /* Shrinking cannot fail. */ BUG_ON(err); } - kaddr = kmap_atomic(page, KM_USER0); + addr = kmap_atomic(page); /* Copy the data from the page to the mft record. */ memcpy((u8*)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset), - kaddr, attr_len); + addr, attr_len); /* Zero out of bounds area in the page cache page. */ - memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_mft_record_page(ctx->ntfs_ino); + memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); + kunmap_atomic(addr); flush_dcache_page(page); + flush_dcache_mft_record_page(ctx->ntfs_ino); /* We are done with the page. */ end_page_writeback(page); /* Finally, mark the mft record dirty, so it gets written back. */ @@ -1532,7 +1527,6 @@ err_out: "error %i.", err); SetPageError(page); NVolSetErrors(ni->vol); - make_bad_inode(vi); } unlock_page(page); if (ctx) @@ -1542,861 +1536,38 @@ err_out: return err; } -/** - * ntfs_prepare_nonresident_write - - * - */ -static int ntfs_prepare_nonresident_write(struct page *page, - unsigned from, unsigned to) -{ - VCN vcn; - LCN lcn; - s64 initialized_size; - loff_t i_size; - sector_t block, ablock, iblock; - struct inode *vi; - ntfs_inode *ni; - ntfs_volume *vol; - runlist_element *rl; - struct buffer_head *bh, *head, *wait[2], **wait_bh = wait; - unsigned long flags; - unsigned int vcn_ofs, block_start, block_end, blocksize; - int err; - BOOL is_retry; - unsigned char blocksize_bits; - - vi = page->mapping->host; - ni = NTFS_I(vi); - vol = ni->vol; - - ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " - "0x%lx, from = %u, to = %u.", ni->mft_no, ni->type, - page->index, from, to); - - BUG_ON(!NInoNonResident(ni)); - - blocksize_bits = vi->i_blkbits; - blocksize = 1 << blocksize_bits; - - /* - * create_empty_buffers() will create uptodate/dirty buffers if the - * page is uptodate/dirty. - */ - if (!page_has_buffers(page)) - create_empty_buffers(page, blocksize, 0); - bh = head = page_buffers(page); - if (unlikely(!bh)) - return -ENOMEM; - - /* The first block in the page. */ - block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); - - read_lock_irqsave(&ni->size_lock, flags); - /* - * The first out of bounds block for the allocated size. No need to - * round up as allocated_size is in multiples of cluster size and the - * minimum cluster size is 512 bytes, which is equal to the smallest - * blocksize. - */ - ablock = ni->allocated_size >> blocksize_bits; - i_size = i_size_read(vi); - initialized_size = ni->initialized_size; - read_unlock_irqrestore(&ni->size_lock, flags); - - /* The last (fully or partially) initialized block. */ - iblock = initialized_size >> blocksize_bits; - - /* Loop through all the buffers in the page. */ - block_start = 0; - rl = NULL; - err = 0; - do { - block_end = block_start + blocksize; - /* - * If buffer @bh is outside the write, just mark it uptodate - * if the page is uptodate and continue with the next buffer. - */ - if (block_end <= from || block_start >= to) { - if (PageUptodate(page)) { - if (!buffer_uptodate(bh)) - set_buffer_uptodate(bh); - } - continue; - } - /* - * @bh is at least partially being written to. - * Make sure it is not marked as new. - */ - //if (buffer_new(bh)) - // clear_buffer_new(bh); - - if (block >= ablock) { - // TODO: block is above allocated_size, need to - // allocate it. Best done in one go to accommodate not - // only block but all above blocks up to and including: - // ((page->index << PAGE_CACHE_SHIFT) + to + blocksize - // - 1) >> blobksize_bits. Obviously will need to round - // up to next cluster boundary, too. This should be - // done with a helper function, so it can be reused. - ntfs_error(vol->sb, "Writing beyond allocated size " - "is not supported yet. Sorry."); - err = -EOPNOTSUPP; - goto err_out; - // Need to update ablock. - // Need to set_buffer_new() on all block bhs that are - // newly allocated. - } - /* - * Now we have enough allocated size to fulfill the whole - * request, i.e. block < ablock is true. - */ - if (unlikely((block >= iblock) && - (initialized_size < i_size))) { - /* - * If this page is fully outside initialized size, zero - * out all pages between the current initialized size - * and the current page. Just use ntfs_readpage() to do - * the zeroing transparently. - */ - if (block > iblock) { - // TODO: - // For each page do: - // - read_cache_page() - // Again for each page do: - // - wait_on_page_locked() - // - Check (PageUptodate(page) && - // !PageError(page)) - // Update initialized size in the attribute and - // in the inode. - // Again, for each page do: - // __set_page_dirty_buffers(); - // page_cache_release() - // We don't need to wait on the writes. - // Update iblock. - } - /* - * The current page straddles initialized size. Zero - * all non-uptodate buffers and set them uptodate (and - * dirty?). Note, there aren't any non-uptodate buffers - * if the page is uptodate. - * FIXME: For an uptodate page, the buffers may need to - * be written out because they were not initialized on - * disk before. - */ - if (!PageUptodate(page)) { - // TODO: - // Zero any non-uptodate buffers up to i_size. - // Set them uptodate and dirty. - } - // TODO: - // Update initialized size in the attribute and in the - // inode (up to i_size). - // Update iblock. - // FIXME: This is inefficient. Try to batch the two - // size changes to happen in one go. - ntfs_error(vol->sb, "Writing beyond initialized size " - "is not supported yet. Sorry."); - err = -EOPNOTSUPP; - goto err_out; - // Do NOT set_buffer_new() BUT DO clear buffer range - // outside write request range. - // set_buffer_uptodate() on complete buffers as well as - // set_buffer_dirty(). - } - - /* Need to map unmapped buffers. */ - if (!buffer_mapped(bh)) { - /* Unmapped buffer. Need to map it. */ - bh->b_bdev = vol->sb->s_bdev; - - /* Convert block into corresponding vcn and offset. */ - vcn = (VCN)block << blocksize_bits >> - vol->cluster_size_bits; - vcn_ofs = ((VCN)block << blocksize_bits) & - vol->cluster_size_mask; - - is_retry = FALSE; - if (!rl) { -lock_retry_remap: - down_read(&ni->runlist.lock); - rl = ni->runlist.rl; - } - if (likely(rl != NULL)) { - /* Seek to element containing target vcn. */ - while (rl->length && rl[1].vcn <= vcn) - rl++; - lcn = ntfs_rl_vcn_to_lcn(rl, vcn); - } else - lcn = LCN_RL_NOT_MAPPED; - if (unlikely(lcn < 0)) { - /* - * We extended the attribute allocation above. - * If we hit an ENOENT here it means that the - * allocation was insufficient which is a bug. - */ - BUG_ON(lcn == LCN_ENOENT); - - /* It is a hole, need to instantiate it. */ - if (lcn == LCN_HOLE) { - // TODO: Instantiate the hole. - // clear_buffer_new(bh); - // unmap_underlying_metadata(bh->b_bdev, - // bh->b_blocknr); - // For non-uptodate buffers, need to - // zero out the region outside the - // request in this bh or all bhs, - // depending on what we implemented - // above. - // Need to flush_dcache_page(). - // Or could use set_buffer_new() - // instead? - ntfs_error(vol->sb, "Writing into " - "sparse regions is " - "not supported yet. " - "Sorry."); - err = -EOPNOTSUPP; - if (!rl) - up_read(&ni->runlist.lock); - goto err_out; - } else if (!is_retry && - lcn == LCN_RL_NOT_MAPPED) { - is_retry = TRUE; - /* - * Attempt to map runlist, dropping - * lock for the duration. - */ - up_read(&ni->runlist.lock); - err = ntfs_map_runlist(ni, vcn); - if (likely(!err)) - goto lock_retry_remap; - rl = NULL; - } else if (!rl) - up_read(&ni->runlist.lock); - /* - * Failed to map the buffer, even after - * retrying. - */ - if (!err) - err = -EIO; - bh->b_blocknr = -1; - ntfs_error(vol->sb, "Failed to write to inode " - "0x%lx, attribute type 0x%x, " - "vcn 0x%llx, offset 0x%x " - "because its location on disk " - "could not be determined%s " - "(error code %i).", - ni->mft_no, ni->type, - (unsigned long long)vcn, - vcn_ofs, is_retry ? " even " - "after retrying" : "", err); - goto err_out; - } - /* We now have a successful remap, i.e. lcn >= 0. */ - - /* Setup buffer head to correct block. */ - bh->b_blocknr = ((lcn << vol->cluster_size_bits) - + vcn_ofs) >> blocksize_bits; - set_buffer_mapped(bh); - - // FIXME: Something analogous to this is needed for - // each newly allocated block, i.e. BH_New. - // FIXME: Might need to take this out of the - // if (!buffer_mapped(bh)) {}, depending on how we - // implement things during the allocated_size and - // initialized_size extension code above. - if (buffer_new(bh)) { - clear_buffer_new(bh); - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); - if (PageUptodate(page)) { - set_buffer_uptodate(bh); - continue; - } - /* - * Page is _not_ uptodate, zero surrounding - * region. NOTE: This is how we decide if to - * zero or not! - */ - if (block_end > to || block_start < from) { - void *kaddr; - - kaddr = kmap_atomic(page, KM_USER0); - if (block_end > to) - memset(kaddr + to, 0, - block_end - to); - if (block_start < from) - memset(kaddr + block_start, 0, - from - - block_start); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - } - continue; - } - } - /* @bh is mapped, set it uptodate if the page is uptodate. */ - if (PageUptodate(page)) { - if (!buffer_uptodate(bh)) - set_buffer_uptodate(bh); - continue; - } - /* - * The page is not uptodate. The buffer is mapped. If it is not - * uptodate, and it is only partially being written to, we need - * to read the buffer in before the write, i.e. right now. - */ - if (!buffer_uptodate(bh) && - (block_start < from || block_end > to)) { - ll_rw_block(READ, 1, &bh); - *wait_bh++ = bh; - } - } while (block++, block_start = block_end, - (bh = bh->b_this_page) != head); - - /* Release the lock if we took it. */ - if (rl) { - up_read(&ni->runlist.lock); - rl = NULL; - } - - /* If we issued read requests, let them complete. */ - while (wait_bh > wait) { - wait_on_buffer(*--wait_bh); - if (!buffer_uptodate(*wait_bh)) - return -EIO; - } - - ntfs_debug("Done."); - return 0; -err_out: - /* - * Zero out any newly allocated blocks to avoid exposing stale data. - * If BH_New is set, we know that the block was newly allocated in the - * above loop. - * FIXME: What about initialized_size increments? Have we done all the - * required zeroing above? If not this error handling is broken, and - * in particular the if (block_end <= from) check is completely bogus. - */ - bh = head; - block_start = 0; - is_retry = FALSE; - do { - block_end = block_start + blocksize; - if (block_end <= from) - continue; - if (block_start >= to) - break; - if (buffer_new(bh)) { - void *kaddr; - - clear_buffer_new(bh); - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + block_start, 0, bh->b_size); - kunmap_atomic(kaddr, KM_USER0); - set_buffer_uptodate(bh); - mark_buffer_dirty(bh); - is_retry = TRUE; - } - } while (block_start = block_end, (bh = bh->b_this_page) != head); - if (is_retry) - flush_dcache_page(page); - if (rl) - up_read(&ni->runlist.lock); - return err; -} - -/** - * ntfs_prepare_write - prepare a page for receiving data - * - * This is called from generic_file_write() with i_sem held on the inode - * (@page->mapping->host). The @page is locked but not kmap()ped. The source - * data has not yet been copied into the @page. - * - * Need to extend the attribute/fill in holes if necessary, create blocks and - * make partially overwritten blocks uptodate, - * - * i_size is not to be modified yet. - * - * Return 0 on success or -errno on error. - * - * Should be using block_prepare_write() [support for sparse files] or - * cont_prepare_write() [no support for sparse files]. Cannot do that due to - * ntfs specifics but can look at them for implementation guidance. - * - * Note: In the range, @from is inclusive and @to is exclusive, i.e. @from is - * the first byte in the page that will be written to and @to is the first byte - * after the last byte that will be written to. - */ -static int ntfs_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - s64 new_size; - loff_t i_size; - struct inode *vi = page->mapping->host; - ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi); - ntfs_volume *vol = ni->vol; - ntfs_attr_search_ctx *ctx = NULL; - MFT_RECORD *m = NULL; - ATTR_RECORD *a; - u8 *kaddr; - u32 attr_len; - int err; - - ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " - "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type, - page->index, from, to); - BUG_ON(!PageLocked(page)); - BUG_ON(from > PAGE_CACHE_SIZE); - BUG_ON(to > PAGE_CACHE_SIZE); - BUG_ON(from > to); - BUG_ON(NInoMstProtected(ni)); - /* - * If a previous ntfs_truncate() failed, repeat it and abort if it - * fails again. - */ - if (unlikely(NInoTruncateFailed(ni))) { - down_write(&vi->i_alloc_sem); - err = ntfs_truncate(vi); - up_write(&vi->i_alloc_sem); - if (err || NInoTruncateFailed(ni)) { - if (!err) - err = -EIO; - goto err_out; - } - } - /* If the attribute is not resident, deal with it elsewhere. */ - if (NInoNonResident(ni)) { - /* - * Only unnamed $DATA attributes can be compressed, encrypted, - * and/or sparse. - */ - if (ni->type == AT_DATA && !ni->name_len) { - /* If file is encrypted, deny access, just like NT4. */ - if (NInoEncrypted(ni)) { - ntfs_debug("Denying write access to encrypted " - "file."); - return -EACCES; - } - /* Compressed data streams are handled in compress.c. */ - if (NInoCompressed(ni)) { - // TODO: Implement and replace this check with - // return ntfs_write_compressed_block(page); - ntfs_error(vi->i_sb, "Writing to compressed " - "files is not supported yet. " - "Sorry."); - return -EOPNOTSUPP; - } - // TODO: Implement and remove this check. - if (NInoSparse(ni)) { - ntfs_error(vi->i_sb, "Writing to sparse files " - "is not supported yet. Sorry."); - return -EOPNOTSUPP; - } - } - /* Normal data stream. */ - return ntfs_prepare_nonresident_write(page, from, to); - } - /* - * Attribute is resident, implying it is not compressed, encrypted, or - * sparse. - */ - BUG_ON(page_has_buffers(page)); - new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to; - /* If we do not need to resize the attribute allocation we are done. */ - if (new_size <= i_size_read(vi)) - goto done; - /* Map, pin, and lock the (base) mft record. */ - if (!NInoAttr(ni)) - base_ni = ni; - else - base_ni = ni->ext.base_ntfs_ino; - m = map_mft_record(base_ni); - if (IS_ERR(m)) { - err = PTR_ERR(m); - m = NULL; - ctx = NULL; - goto err_out; - } - ctx = ntfs_attr_get_search_ctx(base_ni, m); - if (unlikely(!ctx)) { - err = -ENOMEM; - goto err_out; - } - err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, - CASE_SENSITIVE, 0, NULL, 0, ctx); - if (unlikely(err)) { - if (err == -ENOENT) - err = -EIO; - goto err_out; - } - m = ctx->mrec; - a = ctx->attr; - /* The total length of the attribute value. */ - attr_len = le32_to_cpu(a->data.resident.value_length); - /* Fix an eventual previous failure of ntfs_commit_write(). */ - i_size = i_size_read(vi); - if (unlikely(attr_len > i_size)) { - attr_len = i_size; - a->data.resident.value_length = cpu_to_le32(attr_len); - } - /* If we do not need to resize the attribute allocation we are done. */ - if (new_size <= attr_len) - goto done_unm; - /* Check if new size is allowed in $AttrDef. */ - err = ntfs_attr_size_bounds_check(vol, ni->type, new_size); - if (unlikely(err)) { - if (err == -ERANGE) { - ntfs_error(vol->sb, "Write would cause the inode " - "0x%lx to exceed the maximum size for " - "its attribute type (0x%x). Aborting " - "write.", vi->i_ino, - le32_to_cpu(ni->type)); - } else { - ntfs_error(vol->sb, "Inode 0x%lx has unknown " - "attribute type 0x%x. Aborting " - "write.", vi->i_ino, - le32_to_cpu(ni->type)); - err = -EIO; - } - goto err_out2; - } - /* - * Extend the attribute record to be able to store the new attribute - * size. - */ - if (new_size >= vol->mft_record_size || ntfs_attr_record_resize(m, a, - le16_to_cpu(a->data.resident.value_offset) + - new_size)) { - /* Not enough space in the mft record. */ - ntfs_error(vol->sb, "Not enough space in the mft record for " - "the resized attribute value. This is not " - "supported yet. Aborting write."); - err = -EOPNOTSUPP; - goto err_out2; - } - /* - * We have enough space in the mft record to fit the write. This - * implies the attribute is smaller than the mft record and hence the - * attribute must be in a single page and hence page->index must be 0. - */ - BUG_ON(page->index); - /* - * If the beginning of the write is past the old size, enlarge the - * attribute value up to the beginning of the write and fill it with - * zeroes. - */ - if (from > attr_len) { - memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) + - attr_len, 0, from - attr_len); - a->data.resident.value_length = cpu_to_le32(from); - /* Zero the corresponding area in the page as well. */ - if (PageUptodate(page)) { - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + attr_len, 0, from - attr_len); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(page); - } - } - flush_dcache_mft_record_page(ctx->ntfs_ino); - mark_mft_record_dirty(ctx->ntfs_ino); -done_unm: - ntfs_attr_put_search_ctx(ctx); - unmap_mft_record(base_ni); - /* - * Because resident attributes are handled by memcpy() to/from the - * corresponding MFT record, and because this form of i/o is byte - * aligned rather than block aligned, there is no need to bring the - * page uptodate here as in the non-resident case where we need to - * bring the buffers straddled by the write uptodate before - * generic_file_write() does the copying from userspace. - * - * We thus defer the uptodate bringing of the page region outside the - * region written to to ntfs_commit_write(), which makes the code - * simpler and saves one atomic kmap which is good. - */ -done: - ntfs_debug("Done."); - return 0; -err_out: - if (err == -ENOMEM) - ntfs_warning(vi->i_sb, "Error allocating memory required to " - "prepare the write."); - else { - ntfs_error(vi->i_sb, "Resident attribute prepare write failed " - "with error %i.", err); - NVolSetErrors(vol); - make_bad_inode(vi); - } -err_out2: - if (ctx) - ntfs_attr_put_search_ctx(ctx); - if (m) - unmap_mft_record(base_ni); - return err; -} - -/** - * ntfs_commit_nonresident_write - - * - */ -static int ntfs_commit_nonresident_write(struct page *page, - unsigned from, unsigned to) -{ - s64 pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to; - struct inode *vi = page->mapping->host; - struct buffer_head *bh, *head; - unsigned int block_start, block_end, blocksize; - BOOL partial; - - ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " - "0x%lx, from = %u, to = %u.", vi->i_ino, - NTFS_I(vi)->type, page->index, from, to); - blocksize = 1 << vi->i_blkbits; - - // FIXME: We need a whole slew of special cases in here for compressed - // files for example... - // For now, we know ntfs_prepare_write() would have failed so we can't - // get here in any of the cases which we have to special case, so we - // are just a ripped off, unrolled generic_commit_write(). - - bh = head = page_buffers(page); - block_start = 0; - partial = FALSE; - do { - block_end = block_start + blocksize; - if (block_end <= from || block_start >= to) { - if (!buffer_uptodate(bh)) - partial = TRUE; - } else { - set_buffer_uptodate(bh); - mark_buffer_dirty(bh); - } - } while (block_start = block_end, (bh = bh->b_this_page) != head); - /* - * If this is a partial write which happened to make all buffers - * uptodate then we can optimize away a bogus ->readpage() for the next - * read(). Here we 'discover' whether the page went uptodate as a - * result of this (potentially partial) write. - */ - if (!partial) - SetPageUptodate(page); - /* - * Not convinced about this at all. See disparity comment above. For - * now we know ntfs_prepare_write() would have failed in the write - * exceeds i_size case, so this will never trigger which is fine. - */ - if (pos > i_size_read(vi)) { - ntfs_error(vi->i_sb, "Writing beyond the existing file size is " - "not supported yet. Sorry."); - return -EOPNOTSUPP; - // vi->i_size = pos; - // mark_inode_dirty(vi); - } - ntfs_debug("Done."); - return 0; -} - -/** - * ntfs_commit_write - commit the received data - * - * This is called from generic_file_write() with i_sem held on the inode - * (@page->mapping->host). The @page is locked but not kmap()ped. The source - * data has already been copied into the @page. ntfs_prepare_write() has been - * called before the data copied and it returned success so we can take the - * results of various BUG checks and some error handling for granted. - * - * Need to mark modified blocks dirty so they get written out later when - * ntfs_writepage() is invoked by the VM. - * - * Return 0 on success or -errno on error. - * - * Should be using generic_commit_write(). This marks buffers uptodate and - * dirty, sets the page uptodate if all buffers in the page are uptodate, and - * updates i_size if the end of io is beyond i_size. In that case, it also - * marks the inode dirty. - * - * Cannot use generic_commit_write() due to ntfs specialities but can look at - * it for implementation guidance. - * - * If things have gone as outlined in ntfs_prepare_write(), then we do not - * need to do any page content modifications here at all, except in the write - * to resident attribute case, where we need to do the uptodate bringing here - * which we combine with the copying into the mft record which means we save - * one atomic kmap. - */ -static int ntfs_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - struct inode *vi = page->mapping->host; - ntfs_inode *base_ni, *ni = NTFS_I(vi); - char *kaddr, *kattr; - ntfs_attr_search_ctx *ctx; - MFT_RECORD *m; - ATTR_RECORD *a; - u32 attr_len; - int err; - - ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " - "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type, - page->index, from, to); - /* If the attribute is not resident, deal with it elsewhere. */ - if (NInoNonResident(ni)) { - /* Only unnamed $DATA attributes can be compressed/encrypted. */ - if (ni->type == AT_DATA && !ni->name_len) { - /* Encrypted files need separate handling. */ - if (NInoEncrypted(ni)) { - // We never get here at present! - BUG(); - } - /* Compressed data streams are handled in compress.c. */ - if (NInoCompressed(ni)) { - // TODO: Implement this! - // return ntfs_write_compressed_block(page); - // We never get here at present! - BUG(); - } - } - /* Normal data stream. */ - return ntfs_commit_nonresident_write(page, from, to); - } - /* - * Attribute is resident, implying it is not compressed, encrypted, or - * sparse. - */ - if (!NInoAttr(ni)) - base_ni = ni; - else - base_ni = ni->ext.base_ntfs_ino; - /* Map, pin, and lock the mft record. */ - m = map_mft_record(base_ni); - if (IS_ERR(m)) { - err = PTR_ERR(m); - m = NULL; - ctx = NULL; - goto err_out; - } - ctx = ntfs_attr_get_search_ctx(base_ni, m); - if (unlikely(!ctx)) { - err = -ENOMEM; - goto err_out; - } - err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, - CASE_SENSITIVE, 0, NULL, 0, ctx); - if (unlikely(err)) { - if (err == -ENOENT) - err = -EIO; - goto err_out; - } - a = ctx->attr; - /* The total length of the attribute value. */ - attr_len = le32_to_cpu(a->data.resident.value_length); - BUG_ON(from > attr_len); - kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset); - kaddr = kmap_atomic(page, KM_USER0); - /* Copy the received data from the page to the mft record. */ - memcpy(kattr + from, kaddr + from, to - from); - /* Update the attribute length if necessary. */ - if (to > attr_len) { - attr_len = to; - a->data.resident.value_length = cpu_to_le32(attr_len); - } - /* - * If the page is not uptodate, bring the out of bounds area(s) - * uptodate by copying data from the mft record to the page. - */ - if (!PageUptodate(page)) { - if (from > 0) - memcpy(kaddr, kattr, from); - if (to < attr_len) - memcpy(kaddr + to, kattr + to, attr_len - to); - /* Zero the region outside the end of the attribute value. */ - if (attr_len < PAGE_CACHE_SIZE) - memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); - /* - * The probability of not having done any of the above is - * extremely small, so we just flush unconditionally. - */ - flush_dcache_page(page); - SetPageUptodate(page); - } - kunmap_atomic(kaddr, KM_USER0); - /* Update i_size if necessary. */ - if (i_size_read(vi) < attr_len) { - unsigned long flags; - - write_lock_irqsave(&ni->size_lock, flags); - ni->allocated_size = ni->initialized_size = attr_len; - i_size_write(vi, attr_len); - write_unlock_irqrestore(&ni->size_lock, flags); - } - /* Mark the mft record dirty, so it gets written back. */ - flush_dcache_mft_record_page(ctx->ntfs_ino); - mark_mft_record_dirty(ctx->ntfs_ino); - ntfs_attr_put_search_ctx(ctx); - unmap_mft_record(base_ni); - ntfs_debug("Done."); - return 0; -err_out: - if (err == -ENOMEM) { - ntfs_warning(vi->i_sb, "Error allocating memory required to " - "commit the write."); - if (PageUptodate(page)) { - ntfs_warning(vi->i_sb, "Page is uptodate, setting " - "dirty so the write will be retried " - "later on by the VM."); - /* - * Put the page on mapping->dirty_pages, but leave its - * buffers' dirty state as-is. - */ - __set_page_dirty_nobuffers(page); - err = 0; - } else - ntfs_error(vi->i_sb, "Page is not uptodate. Written " - "data has been lost."); - } else { - ntfs_error(vi->i_sb, "Resident attribute commit write failed " - "with error %i.", err); - NVolSetErrors(ni->vol); - make_bad_inode(vi); - } - if (ctx) - ntfs_attr_put_search_ctx(ctx); - if (m) - unmap_mft_record(base_ni); - return err; -} - #endif /* NTFS_RW */ /** * ntfs_aops - general address space operations for inodes and attributes */ -struct address_space_operations ntfs_aops = { +const struct address_space_operations ntfs_aops = { .readpage = ntfs_readpage, /* Fill page with data. */ - .sync_page = block_sync_page, /* Currently, just unplugs the - disk request queue. */ #ifdef NTFS_RW .writepage = ntfs_writepage, /* Write dirty page to disk. */ - .prepare_write = ntfs_prepare_write, /* Prepare page and buffers - ready to receive data. */ - .commit_write = ntfs_commit_write, /* Commit received data. */ #endif /* NTFS_RW */ + .migratepage = buffer_migrate_page, /* Move a page cache page from + one physical page to an + other. */ + .error_remove_page = generic_error_remove_page, }; /** * ntfs_mst_aops - general address space operations for mst protecteed inodes * and attributes */ -struct address_space_operations ntfs_mst_aops = { +const struct address_space_operations ntfs_mst_aops = { .readpage = ntfs_readpage, /* Fill page with data. */ - .sync_page = block_sync_page, /* Currently, just unplugs the - disk request queue. */ #ifdef NTFS_RW .writepage = ntfs_writepage, /* Write dirty page to disk. */ .set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty without touching the buffers belonging to the page. */ #endif /* NTFS_RW */ + .migratepage = buffer_migrate_page, /* Move a page cache page from + one physical page to an + other. */ + .error_remove_page = generic_error_remove_page, }; #ifdef NTFS_RW @@ -2424,7 +1595,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) { BUG_ON(!PageUptodate(page)); end = ofs + ni->itype.index.block_size; - bh_size = 1 << VFS_I(ni)->i_blkbits; + bh_size = VFS_I(ni)->i_sb->s_blocksize; spin_lock(&mapping->private_lock); if (unlikely(!page_has_buffers(page))) { spin_unlock(&mapping->private_lock); diff --git a/fs/ntfs/aops.h b/fs/ntfs/aops.h index 3b74e66ca2f..caecc58f529 100644 --- a/fs/ntfs/aops.h +++ b/fs/ntfs/aops.h @@ -80,19 +80,17 @@ static inline void ntfs_unmap_page(struct page *page) * * The unlocked and uptodate page is returned on success or an encoded error * on failure. Caller has to test for error using the IS_ERR() macro on the - * return value. If that evaluates to TRUE, the negative error code can be + * return value. If that evaluates to 'true', the negative error code can be * obtained using PTR_ERR() on the return value of ntfs_map_page(). */ static inline struct page *ntfs_map_page(struct address_space *mapping, unsigned long index) { - struct page *page = read_cache_page(mapping, index, - (filler_t*)mapping->a_ops->readpage, NULL); + struct page *page = read_mapping_page(mapping, index, NULL); if (!IS_ERR(page)) { - wait_on_page_locked(page); kmap(page); - if (PageUptodate(page) && !PageError(page)) + if (!PageError(page)) return page; ntfs_unmap_page(page); return ERR_PTR(-EIO); diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index 3f9a4ff42ee..250ed5b20c8 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -1,7 +1,7 @@ /** * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2005 Anton Altaparmakov + * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc. * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -21,7 +21,10 @@ */ #include <linux/buffer_head.h> +#include <linux/sched.h> +#include <linux/slab.h> #include <linux/swap.h> +#include <linux/writeback.h> #include "attrib.h" #include "debug.h" @@ -36,9 +39,27 @@ * ntfs_map_runlist_nolock - map (a part of) a runlist of an ntfs inode * @ni: ntfs inode for which to map (part of) a runlist * @vcn: map runlist part containing this vcn + * @ctx: active attribute search context if present or NULL if not * * Map the part of a runlist containing the @vcn of the ntfs inode @ni. * + * If @ctx is specified, it is an active search context of @ni and its base mft + * record. This is needed when ntfs_map_runlist_nolock() encounters unmapped + * runlist fragments and allows their mapping. If you do not have the mft + * record mapped, you can specify @ctx as NULL and ntfs_map_runlist_nolock() + * will perform the necessary mapping and unmapping. + * + * Note, ntfs_map_runlist_nolock() saves the state of @ctx on entry and + * restores it before returning. Thus, @ctx will be left pointing to the same + * attribute on return as on entry. However, the actual pointers in @ctx may + * point to different memory locations on return, so you must remember to reset + * any cached pointers from the @ctx, i.e. after the call to + * ntfs_map_runlist_nolock(), you will probably want to do: + * m = ctx->mrec; + * a = ctx->attr; + * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that + * you cache ctx->mrec in a variable @m of type MFT_RECORD *. + * * Return 0 on success and -errno on error. There is one special error code * which is not an error as such. This is -ENOENT. It means that @vcn is out * of bounds of the runlist. @@ -46,19 +67,32 @@ * Note the runlist can be NULL after this function returns if @vcn is zero and * the attribute has zero allocated size, i.e. there simply is no runlist. * - * Locking: - The runlist must be locked for writing. - * - This function modifies the runlist. + * WARNING: If @ctx is supplied, regardless of whether success or failure is + * returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx + * is no longer valid, i.e. you need to either call + * ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it. + * In that case PTR_ERR(@ctx->mrec) will give you the error code for + * why the mapping of the old inode failed. + * + * Locking: - The runlist described by @ni must be locked for writing on entry + * and is locked on return. Note the runlist will be modified. + * - If @ctx is NULL, the base mft record of @ni must not be mapped on + * entry and it will be left unmapped on return. + * - If @ctx is not NULL, the base mft record must be mapped on entry + * and it will be left mapped on return. */ -int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn) +int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn, ntfs_attr_search_ctx *ctx) { VCN end_vcn; + unsigned long flags; ntfs_inode *base_ni; MFT_RECORD *m; ATTR_RECORD *a; - ntfs_attr_search_ctx *ctx; runlist_element *rl; - unsigned long flags; + struct page *put_this_page = NULL; int err = 0; + bool ctx_is_temporary, ctx_needs_reset; + ntfs_attr_search_ctx old_ctx = { NULL, }; ntfs_debug("Mapping runlist part containing vcn 0x%llx.", (unsigned long long)vcn); @@ -66,20 +100,77 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn) base_ni = ni; else base_ni = ni->ext.base_ntfs_ino; - m = map_mft_record(base_ni); - if (IS_ERR(m)) - return PTR_ERR(m); - ctx = ntfs_attr_get_search_ctx(base_ni, m); - if (unlikely(!ctx)) { - err = -ENOMEM; - goto err_out; + if (!ctx) { + ctx_is_temporary = ctx_needs_reset = true; + m = map_mft_record(base_ni); + if (IS_ERR(m)) + return PTR_ERR(m); + ctx = ntfs_attr_get_search_ctx(base_ni, m); + if (unlikely(!ctx)) { + err = -ENOMEM; + goto err_out; + } + } else { + VCN allocated_size_vcn; + + BUG_ON(IS_ERR(ctx->mrec)); + a = ctx->attr; + BUG_ON(!a->non_resident); + ctx_is_temporary = false; + end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn); + read_lock_irqsave(&ni->size_lock, flags); + allocated_size_vcn = ni->allocated_size >> + ni->vol->cluster_size_bits; + read_unlock_irqrestore(&ni->size_lock, flags); + if (!a->data.non_resident.lowest_vcn && end_vcn <= 0) + end_vcn = allocated_size_vcn - 1; + /* + * If we already have the attribute extent containing @vcn in + * @ctx, no need to look it up again. We slightly cheat in + * that if vcn exceeds the allocated size, we will refuse to + * map the runlist below, so there is definitely no need to get + * the right attribute extent. + */ + if (vcn >= allocated_size_vcn || (a->type == ni->type && + a->name_length == ni->name_len && + !memcmp((u8*)a + le16_to_cpu(a->name_offset), + ni->name, ni->name_len) && + sle64_to_cpu(a->data.non_resident.lowest_vcn) + <= vcn && end_vcn >= vcn)) + ctx_needs_reset = false; + else { + /* Save the old search context. */ + old_ctx = *ctx; + /* + * If the currently mapped (extent) inode is not the + * base inode we will unmap it when we reinitialize the + * search context which means we need to get a + * reference to the page containing the mapped mft + * record so we do not accidentally drop changes to the + * mft record when it has not been marked dirty yet. + */ + if (old_ctx.base_ntfs_ino && old_ctx.ntfs_ino != + old_ctx.base_ntfs_ino) { + put_this_page = old_ctx.ntfs_ino->page; + page_cache_get(put_this_page); + } + /* + * Reinitialize the search context so we can lookup the + * needed attribute extent. + */ + ntfs_attr_reinit_search_ctx(ctx); + ctx_needs_reset = true; + } } - err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, - CASE_SENSITIVE, vcn, NULL, 0, ctx); - if (unlikely(err)) { - if (err == -ENOENT) - err = -EIO; - goto err_out; + if (ctx_needs_reset) { + err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, + CASE_SENSITIVE, vcn, NULL, 0, ctx); + if (unlikely(err)) { + if (err == -ENOENT) + err = -EIO; + goto err_out; + } + BUG_ON(!ctx->attr->non_resident); } a = ctx->attr; /* @@ -89,12 +180,7 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn) * ntfs_mapping_pairs_decompress() fails. */ end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1; - if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1)) { - read_lock_irqsave(&ni->size_lock, flags); - end_vcn = ni->allocated_size >> ni->vol->cluster_size_bits; - read_unlock_irqrestore(&ni->size_lock, flags); - } - if (unlikely(vcn >= end_vcn)) { + if (unlikely(vcn && vcn >= end_vcn)) { err = -ENOENT; goto err_out; } @@ -104,9 +190,93 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn) else ni->runlist.rl = rl; err_out: - if (likely(ctx)) - ntfs_attr_put_search_ctx(ctx); - unmap_mft_record(base_ni); + if (ctx_is_temporary) { + if (likely(ctx)) + ntfs_attr_put_search_ctx(ctx); + unmap_mft_record(base_ni); + } else if (ctx_needs_reset) { + /* + * If there is no attribute list, restoring the search context + * is accomplished simply by copying the saved context back over + * the caller supplied context. If there is an attribute list, + * things are more complicated as we need to deal with mapping + * of mft records and resulting potential changes in pointers. + */ + if (NInoAttrList(base_ni)) { + /* + * If the currently mapped (extent) inode is not the + * one we had before, we need to unmap it and map the + * old one. + */ + if (ctx->ntfs_ino != old_ctx.ntfs_ino) { + /* + * If the currently mapped inode is not the + * base inode, unmap it. + */ + if (ctx->base_ntfs_ino && ctx->ntfs_ino != + ctx->base_ntfs_ino) { + unmap_extent_mft_record(ctx->ntfs_ino); + ctx->mrec = ctx->base_mrec; + BUG_ON(!ctx->mrec); + } + /* + * If the old mapped inode is not the base + * inode, map it. + */ + if (old_ctx.base_ntfs_ino && + old_ctx.ntfs_ino != + old_ctx.base_ntfs_ino) { +retry_map: + ctx->mrec = map_mft_record( + old_ctx.ntfs_ino); + /* + * Something bad has happened. If out + * of memory retry till it succeeds. + * Any other errors are fatal and we + * return the error code in ctx->mrec. + * Let the caller deal with it... We + * just need to fudge things so the + * caller can reinit and/or put the + * search context safely. + */ + if (IS_ERR(ctx->mrec)) { + if (PTR_ERR(ctx->mrec) == + -ENOMEM) { + schedule(); + goto retry_map; + } else + old_ctx.ntfs_ino = + old_ctx. + base_ntfs_ino; + } + } + } + /* Update the changed pointers in the saved context. */ + if (ctx->mrec != old_ctx.mrec) { + if (!IS_ERR(ctx->mrec)) + old_ctx.attr = (ATTR_RECORD*)( + (u8*)ctx->mrec + + ((u8*)old_ctx.attr - + (u8*)old_ctx.mrec)); + old_ctx.mrec = ctx->mrec; + } + } + /* Restore the search context to the saved one. */ + *ctx = old_ctx; + /* + * We drop the reference on the page we took earlier. In the + * case that IS_ERR(ctx->mrec) is true this means we might lose + * some changes to the mft record that had been made between + * the last time it was marked dirty/written out and now. This + * at this stage is not a problem as the mapping error is fatal + * enough that the mft record cannot be written out anyway and + * the caller is very likely to shutdown the whole inode + * immediately and mark the volume dirty for chkdsk to pick up + * the pieces anyway. + */ + if (put_this_page) + page_cache_release(put_this_page); + } return err; } @@ -122,8 +292,8 @@ err_out: * of bounds of the runlist. * * Locking: - The runlist must be unlocked on entry and is unlocked on return. - * - This function takes the runlist lock for writing and modifies the - * runlist. + * - This function takes the runlist lock for writing and may modify + * the runlist. */ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn) { @@ -133,7 +303,7 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn) /* Make sure someone else didn't do the work while we were sleeping. */ if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <= LCN_RL_NOT_MAPPED)) - err = ntfs_map_runlist_nolock(ni, vcn); + err = ntfs_map_runlist_nolock(ni, vcn, NULL); up_write(&ni->runlist.lock); return err; } @@ -164,21 +334,21 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn) * LCN_EIO Critical error (runlist/file is corrupt, i/o error, etc). * * Locking: - The runlist must be locked on entry and is left locked on return. - * - If @write_locked is FALSE, i.e. the runlist is locked for reading, + * - If @write_locked is 'false', i.e. the runlist is locked for reading, * the lock may be dropped inside the function so you cannot rely on * the runlist still being the same when this function returns. */ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn, - const BOOL write_locked) + const bool write_locked) { LCN lcn; unsigned long flags; - BOOL is_retry = FALSE; + bool is_retry = false; + BUG_ON(!ni); ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.", ni->mft_no, (unsigned long long)vcn, write_locked ? "write" : "read"); - BUG_ON(!ni); BUG_ON(!NInoNonResident(ni)); BUG_ON(vcn < 0); if (!ni->runlist.rl) { @@ -212,13 +382,13 @@ retry_remap: goto retry_remap; } } - err = ntfs_map_runlist_nolock(ni, vcn); + err = ntfs_map_runlist_nolock(ni, vcn, NULL); if (!write_locked) { up_write(&ni->runlist.lock); down_read(&ni->runlist.lock); } if (likely(!err)) { - is_retry = TRUE; + is_retry = true; goto retry_remap; } if (err == -ENOENT) @@ -236,9 +406,9 @@ retry_remap: /** * ntfs_attr_find_vcn_nolock - find a vcn in the runlist of an ntfs inode - * @ni: ntfs inode describing the runlist to search - * @vcn: vcn to find - * @write_locked: true if the runlist is locked for writing + * @ni: ntfs inode describing the runlist to search + * @vcn: vcn to find + * @ctx: active attribute search context if present or NULL if not * * Find the virtual cluster number @vcn in the runlist described by the ntfs * inode @ni and return the address of the runlist element containing the @vcn. @@ -246,9 +416,22 @@ retry_remap: * If the @vcn is not mapped yet, the attempt is made to map the attribute * extent containing the @vcn and the vcn to lcn conversion is retried. * - * If @write_locked is true the caller has locked the runlist for writing and - * if false for reading. - * + * If @ctx is specified, it is an active search context of @ni and its base mft + * record. This is needed when ntfs_attr_find_vcn_nolock() encounters unmapped + * runlist fragments and allows their mapping. If you do not have the mft + * record mapped, you can specify @ctx as NULL and ntfs_attr_find_vcn_nolock() + * will perform the necessary mapping and unmapping. + * + * Note, ntfs_attr_find_vcn_nolock() saves the state of @ctx on entry and + * restores it before returning. Thus, @ctx will be left pointing to the same + * attribute on return as on entry. However, the actual pointers in @ctx may + * point to different memory locations on return, so you must remember to reset + * any cached pointers from the @ctx, i.e. after the call to + * ntfs_attr_find_vcn_nolock(), you will probably want to do: + * m = ctx->mrec; + * a = ctx->attr; + * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that + * you cache ctx->mrec in a variable @m of type MFT_RECORD *. * Note you need to distinguish between the lcn of the returned runlist element * being >= 0 and LCN_HOLE. In the later case you have to return zeroes on * read and allocate clusters on write. @@ -263,23 +446,32 @@ retry_remap: * -ENOMEM - Not enough memory to map runlist. * -EIO - Critical error (runlist/file is corrupt, i/o error, etc). * - * Locking: - The runlist must be locked on entry and is left locked on return. - * - If @write_locked is FALSE, i.e. the runlist is locked for reading, - * the lock may be dropped inside the function so you cannot rely on - * the runlist still being the same when this function returns. + * WARNING: If @ctx is supplied, regardless of whether success or failure is + * returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx + * is no longer valid, i.e. you need to either call + * ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it. + * In that case PTR_ERR(@ctx->mrec) will give you the error code for + * why the mapping of the old inode failed. + * + * Locking: - The runlist described by @ni must be locked for writing on entry + * and is locked on return. Note the runlist may be modified when + * needed runlist fragments need to be mapped. + * - If @ctx is NULL, the base mft record of @ni must not be mapped on + * entry and it will be left unmapped on return. + * - If @ctx is not NULL, the base mft record must be mapped on entry + * and it will be left mapped on return. */ runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn, - const BOOL write_locked) + ntfs_attr_search_ctx *ctx) { unsigned long flags; runlist_element *rl; int err = 0; - BOOL is_retry = FALSE; + bool is_retry = false; - ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.", - ni->mft_no, (unsigned long long)vcn, - write_locked ? "write" : "read"); BUG_ON(!ni); + ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, with%s ctx.", + ni->mft_no, (unsigned long long)vcn, ctx ? "" : "out"); BUG_ON(!NInoNonResident(ni)); BUG_ON(vcn < 0); if (!ni->runlist.rl) { @@ -312,33 +504,22 @@ retry_remap: } if (!err && !is_retry) { /* - * The @vcn is in an unmapped region, map the runlist and - * retry. + * If the search context is invalid we cannot map the unmapped + * region. */ - if (!write_locked) { - up_read(&ni->runlist.lock); - down_write(&ni->runlist.lock); - if (unlikely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) != - LCN_RL_NOT_MAPPED)) { - up_write(&ni->runlist.lock); - down_read(&ni->runlist.lock); + if (IS_ERR(ctx->mrec)) + err = PTR_ERR(ctx->mrec); + else { + /* + * The @vcn is in an unmapped region, map the runlist + * and retry. + */ + err = ntfs_map_runlist_nolock(ni, vcn, ctx); + if (likely(!err)) { + is_retry = true; goto retry_remap; } } - err = ntfs_map_runlist_nolock(ni, vcn); - if (!write_locked) { - up_write(&ni->runlist.lock); - down_read(&ni->runlist.lock); - } - if (likely(!err)) { - is_retry = TRUE; - goto retry_remap; - } - /* - * -EINVAL coming from a failed mapping attempt is equivalent - * to i/o error for us as it should not happen in our code - * paths. - */ if (err == -EINVAL) err = -EIO; } else if (!err) @@ -375,8 +556,8 @@ retry_remap: * On actual error, ntfs_attr_find() returns -EIO. In this case @ctx->attr is * undefined and in particular do not rely on it not changing. * - * If @ctx->is_first is TRUE, the search begins with @ctx->attr itself. If it - * is FALSE, the search begins after @ctx->attr. + * If @ctx->is_first is 'true', the search begins with @ctx->attr itself. If it + * is 'false', the search begins after @ctx->attr. * * If @ic is IGNORE_CASE, the @name comparisson is not case sensitive and * @ctx->ntfs_ino must be set to the ntfs inode to which the mft record @@ -416,11 +597,11 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name, /* * Iterate over attributes in mft record starting at @ctx->attr, or the - * attribute following that, if @ctx->is_first is TRUE. + * attribute following that, if @ctx->is_first is 'true'. */ if (ctx->is_first) { a = ctx->attr; - ctx->is_first = FALSE; + ctx->is_first = false; } else a = (ATTR_RECORD*)((u8*)ctx->attr + le32_to_cpu(ctx->attr->length)); @@ -707,11 +888,11 @@ static int ntfs_external_attr_find(const ATTR_TYPE type, ctx->al_entry = (ATTR_LIST_ENTRY*)al_start; /* * Iterate over entries in attribute list starting at @ctx->al_entry, - * or the entry following that, if @ctx->is_first is TRUE. + * or the entry following that, if @ctx->is_first is 'true'. */ if (ctx->is_first) { al_entry = ctx->al_entry; - ctx->is_first = FALSE; + ctx->is_first = false; } else al_entry = (ATTR_LIST_ENTRY*)((u8*)ctx->al_entry + le16_to_cpu(ctx->al_entry->length)); @@ -865,7 +1046,7 @@ do_next_attr_loop: le32_to_cpu(ctx->mrec->bytes_allocated)) break; if (a->type == AT_END) - continue; + break; if (!a->length) break; if (al_entry->instance != a->instance) @@ -944,7 +1125,7 @@ not_found: ctx->mrec = ctx->base_mrec; ctx->attr = (ATTR_RECORD*)((u8*)ctx->mrec + le16_to_cpu(ctx->mrec->attrs_offset)); - ctx->is_first = TRUE; + ctx->is_first = true; ctx->ntfs_ino = base_ni; ctx->base_ntfs_ino = NULL; ctx->base_mrec = NULL; @@ -1000,7 +1181,7 @@ not_found: * for, i.e. if one wants to add the attribute to the mft record this is the * correct place to insert its attribute list entry into. * - * When -errno != -ENOENT, an error occured during the lookup. @ctx->attr is + * When -errno != -ENOENT, an error occurred during the lookup. @ctx->attr is * then undefined and in particular you should not rely on it not changing. */ int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name, @@ -1011,6 +1192,7 @@ int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name, ntfs_inode *base_ni; ntfs_debug("Entering."); + BUG_ON(IS_ERR(ctx->mrec)); if (ctx->base_ntfs_ino) base_ni = ctx->base_ntfs_ino; else @@ -1040,7 +1222,7 @@ static inline void ntfs_attr_init_search_ctx(ntfs_attr_search_ctx *ctx, /* Sanity checks are performed elsewhere. */ .attr = (ATTR_RECORD*)((u8*)mrec + le16_to_cpu(mrec->attrs_offset)), - .is_first = TRUE, + .is_first = true, .ntfs_ino = ni, }; } @@ -1059,7 +1241,7 @@ void ntfs_attr_reinit_search_ctx(ntfs_attr_search_ctx *ctx) { if (likely(!ctx->base_ntfs_ino)) { /* No attribute list. */ - ctx->is_first = TRUE; + ctx->is_first = true; /* Sanity checks are performed elsewhere. */ ctx->attr = (ATTR_RECORD*)((u8*)ctx->mrec + le16_to_cpu(ctx->mrec->attrs_offset)); @@ -1088,7 +1270,7 @@ ntfs_attr_search_ctx *ntfs_attr_get_search_ctx(ntfs_inode *ni, MFT_RECORD *mrec) { ntfs_attr_search_ctx *ctx; - ctx = kmem_cache_alloc(ntfs_attr_ctx_cache, SLAB_NOFS); + ctx = kmem_cache_alloc(ntfs_attr_ctx_cache, GFP_NOFS); if (ctx) ntfs_attr_init_search_ctx(ctx, ni, mrec); return ctx; @@ -1227,7 +1409,7 @@ int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, const ATTR_TYPE type) */ int ntfs_attr_can_be_resident(const ntfs_volume *vol, const ATTR_TYPE type) { - if (type == AT_INDEX_ALLOCATION || type == AT_EA) + if (type == AT_INDEX_ALLOCATION) return -EPERM; return 0; } @@ -1319,10 +1501,17 @@ int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a, /** * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute * @ni: ntfs inode describing the attribute to convert + * @data_size: size of the resident data to copy to the non-resident attribute * * Convert the resident ntfs attribute described by the ntfs inode @ni to a * non-resident one. * + * @data_size must be equal to the attribute value size. This is needed since + * we need to know the size before we can map the mft record and our callers + * always know it. The reason we cannot simply read the size from the vfs + * inode i_size is that this is not necessarily uptodate. This happens when + * ntfs_attr_make_non_resident() is called in the ->truncate call path(s). + * * Return 0 on success and -errno on error. The following error return codes * are defined: * -EPERM - The attribute is not allowed to be non-resident. @@ -1341,9 +1530,9 @@ int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a, * NOTE to self: No changes in the attribute list are required to move from * a resident to a non-resident attribute. * - * Locking: - The caller must hold i_sem on the inode. + * Locking: - The caller must hold i_mutex on the inode. */ -int ntfs_attr_make_non_resident(ntfs_inode *ni) +int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size) { s64 new_size; struct inode *vi = VFS_I(ni); @@ -1381,11 +1570,9 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni) * The size needs to be aligned to a cluster boundary for allocation * purposes. */ - new_size = (i_size_read(vi) + vol->cluster_size - 1) & + new_size = (data_size + vol->cluster_size - 1) & ~(vol->cluster_size - 1); if (new_size > 0) { - runlist_element *rl2; - /* * Will need the page later and since the page lock nests * outside all ntfs locks, we need to get the page now. @@ -1396,7 +1583,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni) return -ENOMEM; /* Start by allocating clusters to hold the attribute value. */ rl = ntfs_cluster_alloc(vol, 0, new_size >> - vol->cluster_size_bits, -1, DATA_ZONE); + vol->cluster_size_bits, -1, DATA_ZONE, true); if (IS_ERR(rl)) { err = PTR_ERR(rl); ntfs_debug("Failed to allocate cluster%s, error code " @@ -1405,12 +1592,6 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni) err); goto page_err_out; } - /* Change the runlist terminator to LCN_ENOENT. */ - rl2 = rl; - while (rl2->length) - rl2++; - BUG_ON(rl2->lcn != LCN_RL_NOT_MAPPED); - rl2->lcn = LCN_ENOENT; } else { rl = NULL; page = NULL; @@ -1473,14 +1654,14 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni) * attribute value. */ attr_size = le32_to_cpu(a->data.resident.value_length); - BUG_ON(attr_size != i_size_read(vi)); + BUG_ON(attr_size != data_size); if (page && !PageUptodate(page)) { - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memcpy(kaddr, (u8*)a + le16_to_cpu(a->data.resident.value_offset), attr_size); memset(kaddr + attr_size, 0, PAGE_CACHE_SIZE - attr_size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); flush_dcache_page(page); SetPageUptodate(page); } @@ -1512,7 +1693,9 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni) a->data.non_resident.initialized_size = cpu_to_sle64(attr_size); if (NInoSparse(ni) || NInoCompressed(ni)) { - a->data.non_resident.compression_unit = 4; + a->data.non_resident.compression_unit = 0; + if (NInoCompressed(ni) || vol->major_ver < 3) + a->data.non_resident.compression_unit = 4; a->data.non_resident.compressed_size = a->data.non_resident.allocated_size; } else @@ -1531,19 +1714,28 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni) ni->allocated_size = new_size; if (NInoSparse(ni) || NInoCompressed(ni)) { ni->itype.compressed.size = ni->allocated_size; - ni->itype.compressed.block_size = 1U << - (a->data.non_resident.compression_unit + - vol->cluster_size_bits); - ni->itype.compressed.block_size_bits = - ffs(ni->itype.compressed.block_size) - 1; - ni->itype.compressed.block_clusters = 1U << - a->data.non_resident.compression_unit; - } + if (a->data.non_resident.compression_unit) { + ni->itype.compressed.block_size = 1U << (a->data. + non_resident.compression_unit + + vol->cluster_size_bits); + ni->itype.compressed.block_size_bits = + ffs(ni->itype.compressed.block_size) - + 1; + ni->itype.compressed.block_clusters = 1U << + a->data.non_resident.compression_unit; + } else { + ni->itype.compressed.block_size = 0; + ni->itype.compressed.block_size_bits = 0; + ni->itype.compressed.block_clusters = 0; + } + vi->i_blocks = ni->itype.compressed.size >> 9; + } else + vi->i_blocks = ni->allocated_size >> 9; write_unlock_irqrestore(&ni->size_lock, flags); /* * This needs to be last since the address space operations ->readpage * and ->writepage can run concurrently with us as they are not - * serialized on i_sem. Note, we are not allowed to fail once we flip + * serialized on i_mutex. Note, we are not allowed to fail once we flip * this switch, which is another reason to do this last. */ NInoSetNonResident(ni); @@ -1556,7 +1748,6 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni) if (page) { set_page_dirty(page); unlock_page(page); - mark_page_accessed(page); page_cache_release(page); } ntfs_debug("Done."); @@ -1614,9 +1805,9 @@ undo_err_out: sizeof(a->data.resident.reserved)); /* Copy the data from the page back to the attribute value. */ if (page) { - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memcpy((u8*)a + mp_ofs, kaddr, attr_size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } /* Setup the allocated size in the ntfs inode in case it changed. */ write_lock_irqsave(&ni->size_lock, flags); @@ -1652,6 +1843,634 @@ page_err_out: } /** + * ntfs_attr_extend_allocation - extend the allocated space of an attribute + * @ni: ntfs inode of the attribute whose allocation to extend + * @new_alloc_size: new size in bytes to which to extend the allocation to + * @new_data_size: new size in bytes to which to extend the data to + * @data_start: beginning of region which is required to be non-sparse + * + * Extend the allocated space of an attribute described by the ntfs inode @ni + * to @new_alloc_size bytes. If @data_start is -1, the whole extension may be + * implemented as a hole in the file (as long as both the volume and the ntfs + * inode @ni have sparse support enabled). If @data_start is >= 0, then the + * region between the old allocated size and @data_start - 1 may be made sparse + * but the regions between @data_start and @new_alloc_size must be backed by + * actual clusters. + * + * If @new_data_size is -1, it is ignored. If it is >= 0, then the data size + * of the attribute is extended to @new_data_size. Note that the i_size of the + * vfs inode is not updated. Only the data size in the base attribute record + * is updated. The caller has to update i_size separately if this is required. + * WARNING: It is a BUG() for @new_data_size to be smaller than the old data + * size as well as for @new_data_size to be greater than @new_alloc_size. + * + * For resident attributes this involves resizing the attribute record and if + * necessary moving it and/or other attributes into extent mft records and/or + * converting the attribute to a non-resident attribute which in turn involves + * extending the allocation of a non-resident attribute as described below. + * + * For non-resident attributes this involves allocating clusters in the data + * zone on the volume (except for regions that are being made sparse) and + * extending the run list to describe the allocated clusters as well as + * updating the mapping pairs array of the attribute. This in turn involves + * resizing the attribute record and if necessary moving it and/or other + * attributes into extent mft records and/or splitting the attribute record + * into multiple extent attribute records. + * + * Also, the attribute list attribute is updated if present and in some of the + * above cases (the ones where extent mft records/attributes come into play), + * an attribute list attribute is created if not already present. + * + * Return the new allocated size on success and -errno on error. In the case + * that an error is encountered but a partial extension at least up to + * @data_start (if present) is possible, the allocation is partially extended + * and this is returned. This means the caller must check the returned size to + * determine if the extension was partial. If @data_start is -1 then partial + * allocations are not performed. + * + * WARNING: Do not call ntfs_attr_extend_allocation() for $MFT/$DATA. + * + * Locking: This function takes the runlist lock of @ni for writing as well as + * locking the mft record of the base ntfs inode. These locks are maintained + * throughout execution of the function. These locks are required so that the + * attribute can be resized safely and so that it can for example be converted + * from resident to non-resident safely. + * + * TODO: At present attribute list attribute handling is not implemented. + * + * TODO: At present it is not safe to call this function for anything other + * than the $DATA attribute(s) of an uncompressed and unencrypted file. + */ +s64 ntfs_attr_extend_allocation(ntfs_inode *ni, s64 new_alloc_size, + const s64 new_data_size, const s64 data_start) +{ + VCN vcn; + s64 ll, allocated_size, start = data_start; + struct inode *vi = VFS_I(ni); + ntfs_volume *vol = ni->vol; + ntfs_inode *base_ni; + MFT_RECORD *m; + ATTR_RECORD *a; + ntfs_attr_search_ctx *ctx; + runlist_element *rl, *rl2; + unsigned long flags; + int err, mp_size; + u32 attr_len = 0; /* Silence stupid gcc warning. */ + bool mp_rebuilt; + +#ifdef DEBUG + read_lock_irqsave(&ni->size_lock, flags); + allocated_size = ni->allocated_size; + read_unlock_irqrestore(&ni->size_lock, flags); + ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, " + "old_allocated_size 0x%llx, " + "new_allocated_size 0x%llx, new_data_size 0x%llx, " + "data_start 0x%llx.", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), + (unsigned long long)allocated_size, + (unsigned long long)new_alloc_size, + (unsigned long long)new_data_size, + (unsigned long long)start); +#endif +retry_extend: + /* + * For non-resident attributes, @start and @new_size need to be aligned + * to cluster boundaries for allocation purposes. + */ + if (NInoNonResident(ni)) { + if (start > 0) + start &= ~(s64)vol->cluster_size_mask; + new_alloc_size = (new_alloc_size + vol->cluster_size - 1) & + ~(s64)vol->cluster_size_mask; + } + BUG_ON(new_data_size >= 0 && new_data_size > new_alloc_size); + /* Check if new size is allowed in $AttrDef. */ + err = ntfs_attr_size_bounds_check(vol, ni->type, new_alloc_size); + if (unlikely(err)) { + /* Only emit errors when the write will fail completely. */ + read_lock_irqsave(&ni->size_lock, flags); + allocated_size = ni->allocated_size; + read_unlock_irqrestore(&ni->size_lock, flags); + if (start < 0 || start >= allocated_size) { + if (err == -ERANGE) { + ntfs_error(vol->sb, "Cannot extend allocation " + "of inode 0x%lx, attribute " + "type 0x%x, because the new " + "allocation would exceed the " + "maximum allowed size for " + "this attribute type.", + vi->i_ino, (unsigned) + le32_to_cpu(ni->type)); + } else { + ntfs_error(vol->sb, "Cannot extend allocation " + "of inode 0x%lx, attribute " + "type 0x%x, because this " + "attribute type is not " + "defined on the NTFS volume. " + "Possible corruption! You " + "should run chkdsk!", + vi->i_ino, (unsigned) + le32_to_cpu(ni->type)); + } + } + /* Translate error code to be POSIX conformant for write(2). */ + if (err == -ERANGE) + err = -EFBIG; + else + err = -EIO; + return err; + } + if (!NInoAttr(ni)) + base_ni = ni; + else + base_ni = ni->ext.base_ntfs_ino; + /* + * We will be modifying both the runlist (if non-resident) and the mft + * record so lock them both down. + */ + down_write(&ni->runlist.lock); + m = map_mft_record(base_ni); + if (IS_ERR(m)) { + err = PTR_ERR(m); + m = NULL; + ctx = NULL; + goto err_out; + } + ctx = ntfs_attr_get_search_ctx(base_ni, m); + if (unlikely(!ctx)) { + err = -ENOMEM; + goto err_out; + } + read_lock_irqsave(&ni->size_lock, flags); + allocated_size = ni->allocated_size; + read_unlock_irqrestore(&ni->size_lock, flags); + /* + * If non-resident, seek to the last extent. If resident, there is + * only one extent, so seek to that. + */ + vcn = NInoNonResident(ni) ? allocated_size >> vol->cluster_size_bits : + 0; + /* + * Abort if someone did the work whilst we waited for the locks. If we + * just converted the attribute from resident to non-resident it is + * likely that exactly this has happened already. We cannot quite + * abort if we need to update the data size. + */ + if (unlikely(new_alloc_size <= allocated_size)) { + ntfs_debug("Allocated size already exceeds requested size."); + new_alloc_size = allocated_size; + if (new_data_size < 0) + goto done; + /* + * We want the first attribute extent so that we can update the + * data size. + */ + vcn = 0; + } + err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, + CASE_SENSITIVE, vcn, NULL, 0, ctx); + if (unlikely(err)) { + if (err == -ENOENT) + err = -EIO; + goto err_out; + } + m = ctx->mrec; + a = ctx->attr; + /* Use goto to reduce indentation. */ + if (a->non_resident) + goto do_non_resident_extend; + BUG_ON(NInoNonResident(ni)); + /* The total length of the attribute value. */ + attr_len = le32_to_cpu(a->data.resident.value_length); + /* + * Extend the attribute record to be able to store the new attribute + * size. ntfs_attr_record_resize() will not do anything if the size is + * not changing. + */ + if (new_alloc_size < vol->mft_record_size && + !ntfs_attr_record_resize(m, a, + le16_to_cpu(a->data.resident.value_offset) + + new_alloc_size)) { + /* The resize succeeded! */ + write_lock_irqsave(&ni->size_lock, flags); + ni->allocated_size = le32_to_cpu(a->length) - + le16_to_cpu(a->data.resident.value_offset); + write_unlock_irqrestore(&ni->size_lock, flags); + if (new_data_size >= 0) { + BUG_ON(new_data_size < attr_len); + a->data.resident.value_length = + cpu_to_le32((u32)new_data_size); + } + goto flush_done; + } + /* + * We have to drop all the locks so we can call + * ntfs_attr_make_non_resident(). This could be optimised by try- + * locking the first page cache page and only if that fails dropping + * the locks, locking the page, and redoing all the locking and + * lookups. While this would be a huge optimisation, it is not worth + * it as this is definitely a slow code path. + */ + ntfs_attr_put_search_ctx(ctx); + unmap_mft_record(base_ni); + up_write(&ni->runlist.lock); + /* + * Not enough space in the mft record, try to make the attribute + * non-resident and if successful restart the extension process. + */ + err = ntfs_attr_make_non_resident(ni, attr_len); + if (likely(!err)) + goto retry_extend; + /* + * Could not make non-resident. If this is due to this not being + * permitted for this attribute type or there not being enough space, + * try to make other attributes non-resident. Otherwise fail. + */ + if (unlikely(err != -EPERM && err != -ENOSPC)) { + /* Only emit errors when the write will fail completely. */ + read_lock_irqsave(&ni->size_lock, flags); + allocated_size = ni->allocated_size; + read_unlock_irqrestore(&ni->size_lock, flags); + if (start < 0 || start >= allocated_size) + ntfs_error(vol->sb, "Cannot extend allocation of " + "inode 0x%lx, attribute type 0x%x, " + "because the conversion from resident " + "to non-resident attribute failed " + "with error code %i.", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), err); + if (err != -ENOMEM) + err = -EIO; + goto conv_err_out; + } + /* TODO: Not implemented from here, abort. */ + read_lock_irqsave(&ni->size_lock, flags); + allocated_size = ni->allocated_size; + read_unlock_irqrestore(&ni->size_lock, flags); + if (start < 0 || start >= allocated_size) { + if (err == -ENOSPC) + ntfs_error(vol->sb, "Not enough space in the mft " + "record/on disk for the non-resident " + "attribute value. This case is not " + "implemented yet."); + else /* if (err == -EPERM) */ + ntfs_error(vol->sb, "This attribute type may not be " + "non-resident. This case is not " + "implemented yet."); + } + err = -EOPNOTSUPP; + goto conv_err_out; +#if 0 + // TODO: Attempt to make other attributes non-resident. + if (!err) + goto do_resident_extend; + /* + * Both the attribute list attribute and the standard information + * attribute must remain in the base inode. Thus, if this is one of + * these attributes, we have to try to move other attributes out into + * extent mft records instead. + */ + if (ni->type == AT_ATTRIBUTE_LIST || + ni->type == AT_STANDARD_INFORMATION) { + // TODO: Attempt to move other attributes into extent mft + // records. + err = -EOPNOTSUPP; + if (!err) + goto do_resident_extend; + goto err_out; + } + // TODO: Attempt to move this attribute to an extent mft record, but + // only if it is not already the only attribute in an mft record in + // which case there would be nothing to gain. + err = -EOPNOTSUPP; + if (!err) + goto do_resident_extend; + /* There is nothing we can do to make enough space. )-: */ + goto err_out; +#endif +do_non_resident_extend: + BUG_ON(!NInoNonResident(ni)); + if (new_alloc_size == allocated_size) { + BUG_ON(vcn); + goto alloc_done; + } + /* + * If the data starts after the end of the old allocation, this is a + * $DATA attribute and sparse attributes are enabled on the volume and + * for this inode, then create a sparse region between the old + * allocated size and the start of the data. Otherwise simply proceed + * with filling the whole space between the old allocated size and the + * new allocated size with clusters. + */ + if ((start >= 0 && start <= allocated_size) || ni->type != AT_DATA || + !NVolSparseEnabled(vol) || NInoSparseDisabled(ni)) + goto skip_sparse; + // TODO: This is not implemented yet. We just fill in with real + // clusters for now... + ntfs_debug("Inserting holes is not-implemented yet. Falling back to " + "allocating real clusters instead."); +skip_sparse: + rl = ni->runlist.rl; + if (likely(rl)) { + /* Seek to the end of the runlist. */ + while (rl->length) + rl++; + } + /* If this attribute extent is not mapped, map it now. */ + if (unlikely(!rl || rl->lcn == LCN_RL_NOT_MAPPED || + (rl->lcn == LCN_ENOENT && rl > ni->runlist.rl && + (rl-1)->lcn == LCN_RL_NOT_MAPPED))) { + if (!rl && !allocated_size) + goto first_alloc; + rl = ntfs_mapping_pairs_decompress(vol, a, ni->runlist.rl); + if (IS_ERR(rl)) { + err = PTR_ERR(rl); + if (start < 0 || start >= allocated_size) + ntfs_error(vol->sb, "Cannot extend allocation " + "of inode 0x%lx, attribute " + "type 0x%x, because the " + "mapping of a runlist " + "fragment failed with error " + "code %i.", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), + err); + if (err != -ENOMEM) + err = -EIO; + goto err_out; + } + ni->runlist.rl = rl; + /* Seek to the end of the runlist. */ + while (rl->length) + rl++; + } + /* + * We now know the runlist of the last extent is mapped and @rl is at + * the end of the runlist. We want to begin allocating clusters + * starting at the last allocated cluster to reduce fragmentation. If + * there are no valid LCNs in the attribute we let the cluster + * allocator choose the starting cluster. + */ + /* If the last LCN is a hole or simillar seek back to last real LCN. */ + while (rl->lcn < 0 && rl > ni->runlist.rl) + rl--; +first_alloc: + // FIXME: Need to implement partial allocations so at least part of the + // write can be performed when start >= 0. (Needed for POSIX write(2) + // conformance.) + rl2 = ntfs_cluster_alloc(vol, allocated_size >> vol->cluster_size_bits, + (new_alloc_size - allocated_size) >> + vol->cluster_size_bits, (rl && (rl->lcn >= 0)) ? + rl->lcn + rl->length : -1, DATA_ZONE, true); + if (IS_ERR(rl2)) { + err = PTR_ERR(rl2); + if (start < 0 || start >= allocated_size) + ntfs_error(vol->sb, "Cannot extend allocation of " + "inode 0x%lx, attribute type 0x%x, " + "because the allocation of clusters " + "failed with error code %i.", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), err); + if (err != -ENOMEM && err != -ENOSPC) + err = -EIO; + goto err_out; + } + rl = ntfs_runlists_merge(ni->runlist.rl, rl2); + if (IS_ERR(rl)) { + err = PTR_ERR(rl); + if (start < 0 || start >= allocated_size) + ntfs_error(vol->sb, "Cannot extend allocation of " + "inode 0x%lx, attribute type 0x%x, " + "because the runlist merge failed " + "with error code %i.", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), err); + if (err != -ENOMEM) + err = -EIO; + if (ntfs_cluster_free_from_rl(vol, rl2)) { + ntfs_error(vol->sb, "Failed to release allocated " + "cluster(s) in error code path. Run " + "chkdsk to recover the lost " + "cluster(s)."); + NVolSetErrors(vol); + } + ntfs_free(rl2); + goto err_out; + } + ni->runlist.rl = rl; + ntfs_debug("Allocated 0x%llx clusters.", (long long)(new_alloc_size - + allocated_size) >> vol->cluster_size_bits); + /* Find the runlist element with which the attribute extent starts. */ + ll = sle64_to_cpu(a->data.non_resident.lowest_vcn); + rl2 = ntfs_rl_find_vcn_nolock(rl, ll); + BUG_ON(!rl2); + BUG_ON(!rl2->length); + BUG_ON(rl2->lcn < LCN_HOLE); + mp_rebuilt = false; + /* Get the size for the new mapping pairs array for this extent. */ + mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1); + if (unlikely(mp_size <= 0)) { + err = mp_size; + if (start < 0 || start >= allocated_size) + ntfs_error(vol->sb, "Cannot extend allocation of " + "inode 0x%lx, attribute type 0x%x, " + "because determining the size for the " + "mapping pairs failed with error code " + "%i.", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), err); + err = -EIO; + goto undo_alloc; + } + /* Extend the attribute record to fit the bigger mapping pairs array. */ + attr_len = le32_to_cpu(a->length); + err = ntfs_attr_record_resize(m, a, mp_size + + le16_to_cpu(a->data.non_resident.mapping_pairs_offset)); + if (unlikely(err)) { + BUG_ON(err != -ENOSPC); + // TODO: Deal with this by moving this extent to a new mft + // record or by starting a new extent in a new mft record, + // possibly by extending this extent partially and filling it + // and creating a new extent for the remainder, or by making + // other attributes non-resident and/or by moving other + // attributes out of this mft record. + if (start < 0 || start >= allocated_size) + ntfs_error(vol->sb, "Not enough space in the mft " + "record for the extended attribute " + "record. This case is not " + "implemented yet."); + err = -EOPNOTSUPP; + goto undo_alloc; + } + mp_rebuilt = true; + /* Generate the mapping pairs array directly into the attr record. */ + err = ntfs_mapping_pairs_build(vol, (u8*)a + + le16_to_cpu(a->data.non_resident.mapping_pairs_offset), + mp_size, rl2, ll, -1, NULL); + if (unlikely(err)) { + if (start < 0 || start >= allocated_size) + ntfs_error(vol->sb, "Cannot extend allocation of " + "inode 0x%lx, attribute type 0x%x, " + "because building the mapping pairs " + "failed with error code %i.", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), err); + err = -EIO; + goto undo_alloc; + } + /* Update the highest_vcn. */ + a->data.non_resident.highest_vcn = cpu_to_sle64((new_alloc_size >> + vol->cluster_size_bits) - 1); + /* + * We now have extended the allocated size of the attribute. Reflect + * this in the ntfs_inode structure and the attribute record. + */ + if (a->data.non_resident.lowest_vcn) { + /* + * We are not in the first attribute extent, switch to it, but + * first ensure the changes will make it to disk later. + */ + flush_dcache_mft_record_page(ctx->ntfs_ino); + mark_mft_record_dirty(ctx->ntfs_ino); + ntfs_attr_reinit_search_ctx(ctx); + err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, + CASE_SENSITIVE, 0, NULL, 0, ctx); + if (unlikely(err)) + goto restore_undo_alloc; + /* @m is not used any more so no need to set it. */ + a = ctx->attr; + } + write_lock_irqsave(&ni->size_lock, flags); + ni->allocated_size = new_alloc_size; + a->data.non_resident.allocated_size = cpu_to_sle64(new_alloc_size); + /* + * FIXME: This would fail if @ni is a directory, $MFT, or an index, + * since those can have sparse/compressed set. For example can be + * set compressed even though it is not compressed itself and in that + * case the bit means that files are to be created compressed in the + * directory... At present this is ok as this code is only called for + * regular files, and only for their $DATA attribute(s). + * FIXME: The calculation is wrong if we created a hole above. For now + * it does not matter as we never create holes. + */ + if (NInoSparse(ni) || NInoCompressed(ni)) { + ni->itype.compressed.size += new_alloc_size - allocated_size; + a->data.non_resident.compressed_size = + cpu_to_sle64(ni->itype.compressed.size); + vi->i_blocks = ni->itype.compressed.size >> 9; + } else + vi->i_blocks = new_alloc_size >> 9; + write_unlock_irqrestore(&ni->size_lock, flags); +alloc_done: + if (new_data_size >= 0) { + BUG_ON(new_data_size < + sle64_to_cpu(a->data.non_resident.data_size)); + a->data.non_resident.data_size = cpu_to_sle64(new_data_size); + } +flush_done: + /* Ensure the changes make it to disk. */ + flush_dcache_mft_record_page(ctx->ntfs_ino); + mark_mft_record_dirty(ctx->ntfs_ino); +done: + ntfs_attr_put_search_ctx(ctx); + unmap_mft_record(base_ni); + up_write(&ni->runlist.lock); + ntfs_debug("Done, new_allocated_size 0x%llx.", + (unsigned long long)new_alloc_size); + return new_alloc_size; +restore_undo_alloc: + if (start < 0 || start >= allocated_size) + ntfs_error(vol->sb, "Cannot complete extension of allocation " + "of inode 0x%lx, attribute type 0x%x, because " + "lookup of first attribute extent failed with " + "error code %i.", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), err); + if (err == -ENOENT) + err = -EIO; + ntfs_attr_reinit_search_ctx(ctx); + if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE, + allocated_size >> vol->cluster_size_bits, NULL, 0, + ctx)) { + ntfs_error(vol->sb, "Failed to find last attribute extent of " + "attribute in error code path. Run chkdsk to " + "recover."); + write_lock_irqsave(&ni->size_lock, flags); + ni->allocated_size = new_alloc_size; + /* + * FIXME: This would fail if @ni is a directory... See above. + * FIXME: The calculation is wrong if we created a hole above. + * For now it does not matter as we never create holes. + */ + if (NInoSparse(ni) || NInoCompressed(ni)) { + ni->itype.compressed.size += new_alloc_size - + allocated_size; + vi->i_blocks = ni->itype.compressed.size >> 9; + } else + vi->i_blocks = new_alloc_size >> 9; + write_unlock_irqrestore(&ni->size_lock, flags); + ntfs_attr_put_search_ctx(ctx); + unmap_mft_record(base_ni); + up_write(&ni->runlist.lock); + /* + * The only thing that is now wrong is the allocated size of the + * base attribute extent which chkdsk should be able to fix. + */ + NVolSetErrors(vol); + return err; + } + ctx->attr->data.non_resident.highest_vcn = cpu_to_sle64( + (allocated_size >> vol->cluster_size_bits) - 1); +undo_alloc: + ll = allocated_size >> vol->cluster_size_bits; + if (ntfs_cluster_free(ni, ll, -1, ctx) < 0) { + ntfs_error(vol->sb, "Failed to release allocated cluster(s) " + "in error code path. Run chkdsk to recover " + "the lost cluster(s)."); + NVolSetErrors(vol); + } + m = ctx->mrec; + a = ctx->attr; + /* + * If the runlist truncation fails and/or the search context is no + * longer valid, we cannot resize the attribute record or build the + * mapping pairs array thus we mark the inode bad so that no access to + * the freed clusters can happen. + */ + if (ntfs_rl_truncate_nolock(vol, &ni->runlist, ll) || IS_ERR(m)) { + ntfs_error(vol->sb, "Failed to %s in error code path. Run " + "chkdsk to recover.", IS_ERR(m) ? + "restore attribute search context" : + "truncate attribute runlist"); + NVolSetErrors(vol); + } else if (mp_rebuilt) { + if (ntfs_attr_record_resize(m, a, attr_len)) { + ntfs_error(vol->sb, "Failed to restore attribute " + "record in error code path. Run " + "chkdsk to recover."); + NVolSetErrors(vol); + } else /* if (success) */ { + if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu( + a->data.non_resident. + mapping_pairs_offset), attr_len - + le16_to_cpu(a->data.non_resident. + mapping_pairs_offset), rl2, ll, -1, + NULL)) { + ntfs_error(vol->sb, "Failed to restore " + "mapping pairs array in error " + "code path. Run chkdsk to " + "recover."); + NVolSetErrors(vol); + } + flush_dcache_mft_record_page(ctx->ntfs_ino); + mark_mft_record_dirty(ctx->ntfs_ino); + } + } +err_out: + if (ctx) + ntfs_attr_put_search_ctx(ctx); + if (m) + unmap_mft_record(base_ni); + up_write(&ni->runlist.lock); +conv_err_out: + ntfs_debug("Failed. Returning error code %i.", err); + return err; +} + +/** * ntfs_attr_set - fill (a part of) an attribute with a byte * @ni: ntfs inode describing the attribute to fill * @ofs: offset inside the attribute at which to start to fill @@ -1678,7 +2497,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val) struct page *page; u8 *kaddr; pgoff_t idx, end; - unsigned int start_ofs, end_ofs, size; + unsigned start_ofs, end_ofs, size; ntfs_debug("Entering for ofs 0x%llx, cnt 0x%llx, val 0x%hx.", (long long)ofs, (long long)cnt, val); @@ -1707,18 +2526,10 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val) end >>= PAGE_CACHE_SHIFT; /* If there is a first partial page, need to do it the slow way. */ if (start_ofs) { - page = read_cache_page(mapping, idx, - (filler_t*)mapping->a_ops->readpage, NULL); + page = read_mapping_page(mapping, idx, NULL); if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to read first partial " - "page (sync error, index 0x%lx).", idx); - return PTR_ERR(page); - } - wait_on_page_locked(page); - if (unlikely(!PageUptodate(page))) { - ntfs_error(vol->sb, "Failed to read first partial page " - "(async error, index 0x%lx).", idx); - page_cache_release(page); + "page (error, index 0x%lx).", idx); return PTR_ERR(page); } /* @@ -1728,12 +2539,14 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val) size = PAGE_CACHE_SIZE; if (idx == end) size = end_ofs; - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memset(kaddr + start_ofs, val, size - start_ofs); flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); set_page_dirty(page); page_cache_release(page); + balance_dirty_pages_ratelimited(mapping); + cond_resched(); if (idx == end) goto done; idx++; @@ -1747,10 +2560,10 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val) "page (index 0x%lx).", idx); return -ENOMEM; } - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memset(kaddr, val, PAGE_CACHE_SIZE); flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); /* * If the page has buffers, mark them uptodate since buffer * state and not page state is definitive in 2.6 kernels. @@ -1773,29 +2586,25 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val) /* Finally unlock and release the page. */ unlock_page(page); page_cache_release(page); + balance_dirty_pages_ratelimited(mapping); + cond_resched(); } /* If there is a last partial page, need to do it the slow way. */ if (end_ofs) { - page = read_cache_page(mapping, idx, - (filler_t*)mapping->a_ops->readpage, NULL); + page = read_mapping_page(mapping, idx, NULL); if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to read last partial page " - "(sync error, index 0x%lx).", idx); - return PTR_ERR(page); - } - wait_on_page_locked(page); - if (unlikely(!PageUptodate(page))) { - ntfs_error(vol->sb, "Failed to read last partial page " - "(async error, index 0x%lx).", idx); - page_cache_release(page); + "(error, index 0x%lx).", idx); return PTR_ERR(page); } - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memset(kaddr, val, end_ofs); flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); set_page_dirty(page); page_cache_release(page); + balance_dirty_pages_ratelimited(mapping); + cond_resched(); } done: ntfs_debug("Done."); diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h index 0618ed6fd7b..3c8b74c99b8 100644 --- a/fs/ntfs/attrib.h +++ b/fs/ntfs/attrib.h @@ -40,10 +40,10 @@ * Structure must be initialized to zero before the first call to one of the * attribute search functions. Initialize @mrec to point to the mft record to * search, and @attr to point to the first attribute within @mrec (not necessary - * if calling the _first() functions), and set @is_first to TRUE (not necessary + * if calling the _first() functions), and set @is_first to 'true' (not necessary * if calling the _first() functions). * - * If @is_first is TRUE, the search begins with @attr. If @is_first is FALSE, + * If @is_first is 'true', the search begins with @attr. If @is_first is 'false', * the search begins after @attr. This is so that, after the first call to one * of the search attribute functions, we can call the function again, without * any modification of the search context, to automagically get the next @@ -52,7 +52,7 @@ typedef struct { MFT_RECORD *mrec; ATTR_RECORD *attr; - BOOL is_first; + bool is_first; ntfs_inode *ntfs_ino; ATTR_LIST_ENTRY *al_entry; ntfs_inode *base_ntfs_ino; @@ -60,14 +60,15 @@ typedef struct { ATTR_RECORD *base_attr; } ntfs_attr_search_ctx; -extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn); +extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn, + ntfs_attr_search_ctx *ctx); extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn); extern LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn, - const BOOL write_locked); + const bool write_locked); extern runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, - const VCN vcn, const BOOL write_locked); + const VCN vcn, ntfs_attr_search_ctx *ctx); int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name, const u32 name_len, const IGNORE_CASE_BOOL ic, @@ -102,7 +103,10 @@ extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size); extern int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a, const u32 new_size); -extern int ntfs_attr_make_non_resident(ntfs_inode *ni); +extern int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size); + +extern s64 ntfs_attr_extend_allocation(ntfs_inode *ni, s64 new_alloc_size, + const s64 new_data_size, const s64 data_start); extern int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val); diff --git a/fs/ntfs/bitmap.c b/fs/ntfs/bitmap.c index 7a190cdc60e..0809cf87609 100644 --- a/fs/ntfs/bitmap.c +++ b/fs/ntfs/bitmap.c @@ -34,18 +34,18 @@ * @start_bit: first bit to set * @count: number of bits to set * @value: value to set the bits to (i.e. 0 or 1) - * @is_rollback: if TRUE this is a rollback operation + * @is_rollback: if 'true' this is a rollback operation * * Set @count bits starting at bit @start_bit in the bitmap described by the * vfs inode @vi to @value, where @value is either 0 or 1. * - * @is_rollback should always be FALSE, it is for internal use to rollback + * @is_rollback should always be 'false', it is for internal use to rollback * errors. You probably want to use ntfs_bitmap_set_bits_in_run() instead. * * Return 0 on success and -errno on error. */ int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit, - const s64 count, const u8 value, const BOOL is_rollback) + const s64 count, const u8 value, const bool is_rollback) { s64 cnt = count; pgoff_t index, end_index; @@ -172,7 +172,7 @@ rollback: return PTR_ERR(page); if (count != cnt) pos = __ntfs_bitmap_set_bits_in_run(vi, start_bit, count - cnt, - value ? 0 : 1, TRUE); + value ? 0 : 1, true); else pos = 0; if (!pos) { diff --git a/fs/ntfs/bitmap.h b/fs/ntfs/bitmap.h index bb50d6bc921..72c9ad8be70 100644 --- a/fs/ntfs/bitmap.h +++ b/fs/ntfs/bitmap.h @@ -30,7 +30,7 @@ #include "types.h" extern int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit, - const s64 count, const u8 value, const BOOL is_rollback); + const s64 count, const u8 value, const bool is_rollback); /** * ntfs_bitmap_set_bits_in_run - set a run of bits in a bitmap to a value @@ -48,7 +48,7 @@ static inline int ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit, const s64 count, const u8 value) { return __ntfs_bitmap_set_bits_in_run(vi, start_bit, count, value, - FALSE); + false); } /** diff --git a/fs/ntfs/collate.h b/fs/ntfs/collate.h index e027f36fcc2..aba83347e5f 100644 --- a/fs/ntfs/collate.h +++ b/fs/ntfs/collate.h @@ -26,7 +26,7 @@ #include "types.h" #include "volume.h" -static inline BOOL ntfs_is_collation_rule_supported(COLLATION_RULE cr) { +static inline bool ntfs_is_collation_rule_supported(COLLATION_RULE cr) { int i; /* @@ -35,12 +35,12 @@ static inline BOOL ntfs_is_collation_rule_supported(COLLATION_RULE cr) { * now. */ if (unlikely(cr != COLLATION_BINARY && cr != COLLATION_NTOFS_ULONG)) - return FALSE; + return false; i = le32_to_cpu(cr); if (likely(((i >= 0) && (i <= 0x02)) || ((i >= 0x10) && (i <= 0x13)))) - return TRUE; - return FALSE; + return true; + return false; } extern int ntfs_collate(ntfs_volume *vol, COLLATION_RULE cr, diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index 25d24106f89..f82498c35e7 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -25,6 +25,7 @@ #include <linux/buffer_head.h> #include <linux/blkdev.h> #include <linux/vmalloc.h> +#include <linux/slab.h> #include "attrib.h" #include "inode.h" @@ -57,7 +58,7 @@ typedef enum { /** * ntfs_compression_buffer - one buffer for the decompression engine */ -static u8 *ntfs_compression_buffer = NULL; +static u8 *ntfs_compression_buffer; /** * ntfs_cb_lock - spinlock which protects ntfs_compression_buffer @@ -67,7 +68,7 @@ static DEFINE_SPINLOCK(ntfs_cb_lock); /** * allocate_compression_buffers - allocate the decompression buffers * - * Caller has to hold the ntfs_lock semaphore. + * Caller has to hold the ntfs_lock mutex. * * Return 0 on success or -ENOMEM if the allocations failed. */ @@ -84,7 +85,7 @@ int allocate_compression_buffers(void) /** * free_compression_buffers - free the decompression buffers * - * Caller has to hold the ntfs_lock semaphore. + * Caller has to hold the ntfs_lock mutex. */ void free_compression_buffers(void) { @@ -500,7 +501,7 @@ int ntfs_read_compressed_block(struct page *page) VCN start_vcn = (((s64)index << PAGE_CACHE_SHIFT) & ~cb_size_mask) >> vol->cluster_size_bits; /* - * The first vcn after the last wanted vcn (minumum alignment is again + * The first vcn after the last wanted vcn (minimum alignment is again * PAGE_CACHE_SIZE. */ VCN end_vcn = ((((s64)(index + 1UL) << PAGE_CACHE_SHIFT) + cb_size - 1) @@ -561,6 +562,16 @@ int ntfs_read_compressed_block(struct page *page) read_unlock_irqrestore(&ni->size_lock, flags); max_page = ((i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - offset; + /* Is the page fully outside i_size? (truncate in progress) */ + if (xpage >= max_page) { + kfree(bhs); + kfree(pages); + zero_user(page, 0, PAGE_CACHE_SIZE); + ntfs_debug("Compressed read outside i_size - truncated?"); + SetPageUptodate(page); + unlock_page(page); + return 0; + } if (nr_pages < max_page) max_page = nr_pages; for (i = 0; i < max_page; i++, offset++) { @@ -600,7 +611,7 @@ do_next_cb: rl = NULL; for (vcn = start_vcn, start_vcn += cb_clusters; vcn < start_vcn; vcn++) { - BOOL is_retry = FALSE; + bool is_retry = false; if (!rl) { lock_retry_remap: @@ -626,7 +637,7 @@ lock_retry_remap: break; if (is_retry || lcn != LCN_RL_NOT_MAPPED) goto rl_err; - is_retry = TRUE; + is_retry = true; /* * Attempt to map runlist, dropping lock for the * duration. @@ -655,7 +666,7 @@ lock_retry_remap: for (i = 0; i < nr_bhs; i++) { struct buffer_head *tbh = bhs[i]; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) continue; if (unlikely(buffer_uptodate(tbh))) { unlock_buffer(tbh); @@ -687,8 +698,7 @@ lock_retry_remap: "uptodate! Unplugging the disk queue " "and rescheduling."); get_bh(tbh); - blk_run_address_space(mapping); - schedule(); + io_schedule(); put_bh(tbh); if (unlikely(!buffer_uptodate(tbh))) goto read_err; @@ -917,7 +927,7 @@ lock_retry_remap: return 0; ntfs_debug("Failed. Returning error code %s.", err == -EOVERFLOW ? - "EOVERFLOW" : (!err ? "EIO" : "unkown error")); + "EOVERFLOW" : (!err ? "EIO" : "unknown error")); return err < 0 ? err : -EIO; read_err: diff --git a/fs/ntfs/debug.c b/fs/ntfs/debug.c index 807150e2c2b..dd6103cc93c 100644 --- a/fs/ntfs/debug.c +++ b/fs/ntfs/debug.c @@ -18,16 +18,9 @@ * distribution in the file COPYING); if not, write to the Free Software * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "debug.h" -/* - * A static buffer to hold the error string being displayed and a spinlock - * to protect concurrent accesses to it. - */ -static char err_buf[1024]; -static DEFINE_SPINLOCK(err_buf_lock); - /** * __ntfs_warning - output a warning to the syslog * @function: name of function outputting the warning @@ -50,6 +43,7 @@ static DEFINE_SPINLOCK(err_buf_lock); void __ntfs_warning(const char *function, const struct super_block *sb, const char *fmt, ...) { + struct va_format vaf; va_list args; int flen = 0; @@ -59,17 +53,15 @@ void __ntfs_warning(const char *function, const struct super_block *sb, #endif if (function) flen = strlen(function); - spin_lock(&err_buf_lock); va_start(args, fmt); - vsnprintf(err_buf, sizeof(err_buf), fmt, args); - va_end(args); + vaf.fmt = fmt; + vaf.va = &args; if (sb) - printk(KERN_ERR "NTFS-fs warning (device %s): %s(): %s\n", - sb->s_id, flen ? function : "", err_buf); + pr_warn("(device %s): %s(): %pV\n", + sb->s_id, flen ? function : "", &vaf); else - printk(KERN_ERR "NTFS-fs warning: %s(): %s\n", - flen ? function : "", err_buf); - spin_unlock(&err_buf_lock); + pr_warn("%s(): %pV\n", flen ? function : "", &vaf); + va_end(args); } /** @@ -94,6 +86,7 @@ void __ntfs_warning(const char *function, const struct super_block *sb, void __ntfs_error(const char *function, const struct super_block *sb, const char *fmt, ...) { + struct va_format vaf; va_list args; int flen = 0; @@ -103,17 +96,15 @@ void __ntfs_error(const char *function, const struct super_block *sb, #endif if (function) flen = strlen(function); - spin_lock(&err_buf_lock); va_start(args, fmt); - vsnprintf(err_buf, sizeof(err_buf), fmt, args); - va_end(args); + vaf.fmt = fmt; + vaf.va = &args; if (sb) - printk(KERN_ERR "NTFS-fs error (device %s): %s(): %s\n", - sb->s_id, flen ? function : "", err_buf); + pr_err("(device %s): %s(): %pV\n", + sb->s_id, flen ? function : "", &vaf); else - printk(KERN_ERR "NTFS-fs error: %s(): %s\n", - flen ? function : "", err_buf); - spin_unlock(&err_buf_lock); + pr_err("%s(): %pV\n", flen ? function : "", &vaf); + va_end(args); } #ifdef DEBUG @@ -124,6 +115,7 @@ int debug_msgs = 0; void __ntfs_debug (const char *file, int line, const char *function, const char *fmt, ...) { + struct va_format vaf; va_list args; int flen = 0; @@ -131,13 +123,11 @@ void __ntfs_debug (const char *file, int line, const char *function, return; if (function) flen = strlen(function); - spin_lock(&err_buf_lock); va_start(args, fmt); - vsnprintf(err_buf, sizeof(err_buf), fmt, args); + vaf.fmt = fmt; + vaf.va = &args; + pr_debug("(%s, %d): %s(): %pV", file, line, flen ? function : "", &vaf); va_end(args); - printk(KERN_DEBUG "NTFS-fs DEBUG (%s, %d): %s(): %s\n", file, line, - flen ? function : "", err_buf); - spin_unlock(&err_buf_lock); } /* Dump a runlist. Caller has to provide synchronisation for @rl. */ @@ -149,12 +139,12 @@ void ntfs_debug_dump_runlist(const runlist_element *rl) if (!debug_msgs) return; - printk(KERN_DEBUG "NTFS-fs DEBUG: Dumping runlist (values in hex):\n"); + pr_debug("Dumping runlist (values in hex):\n"); if (!rl) { - printk(KERN_DEBUG "Run list not present.\n"); + pr_debug("Run list not present.\n"); return; } - printk(KERN_DEBUG "VCN LCN Run length\n"); + pr_debug("VCN LCN Run length\n"); for (i = 0; ; i++) { LCN lcn = (rl + i)->lcn; @@ -163,13 +153,13 @@ void ntfs_debug_dump_runlist(const runlist_element *rl) if (index > -LCN_ENOENT - 1) index = 3; - printk(KERN_DEBUG "%-16Lx %s %-16Lx%s\n", + pr_debug("%-16Lx %s %-16Lx%s\n", (long long)(rl + i)->vcn, lcn_str[index], (long long)(rl + i)->length, (rl + i)->length ? "" : " (runlist end)"); } else - printk(KERN_DEBUG "%-16Lx %-16Lx %-16Lx%s\n", + pr_debug("%-16Lx %-16Lx %-16Lx%s\n", (long long)(rl + i)->vcn, (long long)(rl + i)->lcn, (long long)(rl + i)->length, diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h index 8ac37c33d12..61bf091e32a 100644 --- a/fs/ntfs/debug.h +++ b/fs/ntfs/debug.h @@ -30,7 +30,9 @@ extern int debug_msgs; -#if 0 /* Fool kernel-doc since it doesn't do macros yet */ +extern __printf(4, 5) +void __ntfs_debug(const char *file, int line, const char *function, + const char *format, ...); /** * ntfs_debug - write a debug level message to syslog * @f: a printf format string containing the message @@ -39,29 +41,31 @@ extern int debug_msgs; * ntfs_debug() writes a DEBUG level message to the syslog but only if the * driver was compiled with -DDEBUG. Otherwise, the call turns into a NOP. */ -static void ntfs_debug(const char *f, ...); -#endif - -extern void __ntfs_debug (const char *file, int line, const char *function, - const char *format, ...) __attribute__ ((format (printf, 4, 5))); #define ntfs_debug(f, a...) \ - __ntfs_debug(__FILE__, __LINE__, __FUNCTION__, f, ##a) + __ntfs_debug(__FILE__, __LINE__, __func__, f, ##a) extern void ntfs_debug_dump_runlist(const runlist_element *rl); #else /* !DEBUG */ -#define ntfs_debug(f, a...) do {} while (0) +#define ntfs_debug(fmt, ...) \ +do { \ + if (0) \ + no_printk(fmt, ##__VA_ARGS__); \ +} while (0) + #define ntfs_debug_dump_runlist(rl) do {} while (0) #endif /* !DEBUG */ -extern void __ntfs_warning(const char *function, const struct super_block *sb, - const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); -#define ntfs_warning(sb, f, a...) __ntfs_warning(__FUNCTION__, sb, f, ##a) +extern __printf(3, 4) +void __ntfs_warning(const char *function, const struct super_block *sb, + const char *fmt, ...); +#define ntfs_warning(sb, f, a...) __ntfs_warning(__func__, sb, f, ##a) -extern void __ntfs_error(const char *function, const struct super_block *sb, - const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); -#define ntfs_error(sb, f, a...) __ntfs_error(__FUNCTION__, sb, f, ##a) +extern __printf(3, 4) +void __ntfs_error(const char *function, const struct super_block *sb, + const char *fmt, ...); +#define ntfs_error(sb, f, a...) __ntfs_error(__func__, sb, f, ##a) #endif /* _LINUX_NTFS_DEBUG_H */ diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index 795c3d1930f..9e38dafa3bc 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -1,7 +1,7 @@ /** * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2005 Anton Altaparmakov + * Copyright (c) 2001-2007 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -20,8 +20,8 @@ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include <linux/smp_lock.h> #include <linux/buffer_head.h> +#include <linux/slab.h> #include "dir.h" #include "aops.h" @@ -33,8 +33,8 @@ /** * The little endian Unicode string $I30 as a global constant. */ -ntfschar I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'), - const_cpu_to_le16('3'), const_cpu_to_le16('0'), 0 }; +ntfschar I30[5] = { cpu_to_le16('$'), cpu_to_le16('I'), + cpu_to_le16('3'), cpu_to_le16('0'), 0 }; /** * ntfs_lookup_inode_by_name - find an inode in a directory given its name @@ -69,7 +69,7 @@ ntfschar I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'), * work but we don't care for how quickly one can access them. This also fixes * the dcache aliasing issues. * - * Locking: - Caller must hold i_sem on the directory. + * Locking: - Caller must hold i_mutex on the directory. * - Each page cache page in the index allocation mapping must be * locked whilst being accessed otherwise we may find a corrupt * page due to it being under ->writepage at the moment which @@ -1004,13 +1004,11 @@ dir_err_out: /** * ntfs_filldir - ntfs specific filldir method * @vol: current ntfs volume - * @fpos: position in the directory * @ndir: ntfs inode of current directory * @ia_page: page in which the index allocation buffer @ie is in resides * @ie: current index entry * @name: buffer to use for the converted name - * @dirent: vfs filldir callback context - * @filldir: vfs filldir callback + * @actor: what to feed the entries to * * Convert the Unicode @name to the loaded NLS and pass it to the @filldir * callback. @@ -1024,12 +1022,12 @@ dir_err_out: * retake the lock if we are returning a non-zero value as ntfs_readdir() * would need to drop the lock immediately anyway. */ -static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos, +static inline int ntfs_filldir(ntfs_volume *vol, ntfs_inode *ndir, struct page *ia_page, INDEX_ENTRY *ie, - u8 *name, void *dirent, filldir_t filldir) + u8 *name, struct dir_context *actor) { unsigned long mref; - int name_len, rc; + int name_len; unsigned dt_type; FILE_NAME_TYPE_FLAGS name_type; @@ -1068,13 +1066,14 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos, if (ia_page) unlock_page(ia_page); ntfs_debug("Calling filldir for %s with len %i, fpos 0x%llx, inode " - "0x%lx, DT_%s.", name, name_len, fpos, mref, + "0x%lx, DT_%s.", name, name_len, actor->pos, mref, dt_type == DT_DIR ? "DIR" : "REG"); - rc = filldir(dirent, name, name_len, fpos, mref, dt_type); + if (!dir_emit(actor, name, name_len, mref, dt_type)) + return 1; /* Relock the page but not if we are aborting ->readdir. */ - if (!rc && ia_page) + if (ia_page) lock_page(ia_page); - return rc; + return 0; } /* @@ -1085,11 +1084,11 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos, * While this will return the names in random order this doesn't matter for * ->readdir but OTOH results in a faster ->readdir. * - * VFS calls ->readdir without BKL but with i_sem held. This protects the VFS + * VFS calls ->readdir without BKL but with i_mutex held. This protects the VFS * parts (e.g. ->f_pos and ->i_size, and it also protects against directory * modifications). * - * Locking: - Caller must hold i_sem on the directory. + * Locking: - Caller must hold i_mutex on the directory. * - Each page cache page in the index allocation mapping must be * locked whilst being accessed otherwise we may find a corrupt * page due to it being under ->writepage at the moment which @@ -1097,11 +1096,11 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos, * removes them again after the write is complete after which it * unlocks the page. */ -static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +static int ntfs_readdir(struct file *file, struct dir_context *actor) { s64 ia_pos, ia_start, prev_ia_pos, bmp_pos; - loff_t fpos, i_size; - struct inode *bmp_vi, *vdir = filp->f_dentry->d_inode; + loff_t i_size; + struct inode *bmp_vi, *vdir = file_inode(file); struct super_block *sb = vdir->i_sb; ntfs_inode *ndir = NTFS_I(vdir); ntfs_volume *vol = NTFS_SB(sb); @@ -1116,47 +1115,29 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) u8 *kaddr, *bmp, *index_end; ntfs_attr_search_ctx *ctx; - fpos = filp->f_pos; ntfs_debug("Entering for inode 0x%lx, fpos 0x%llx.", - vdir->i_ino, fpos); + vdir->i_ino, actor->pos); rc = err = 0; /* Are we at end of dir yet? */ i_size = i_size_read(vdir); - if (fpos >= i_size + vol->mft_record_size) - goto done; + if (actor->pos >= i_size + vol->mft_record_size) + return 0; /* Emulate . and .. for all directories. */ - if (!fpos) { - ntfs_debug("Calling filldir for . with len 1, fpos 0x0, " - "inode 0x%lx, DT_DIR.", vdir->i_ino); - rc = filldir(dirent, ".", 1, fpos, vdir->i_ino, DT_DIR); - if (rc) - goto done; - fpos++; - } - if (fpos == 1) { - ntfs_debug("Calling filldir for .. with len 2, fpos 0x1, " - "inode 0x%lx, DT_DIR.", - parent_ino(filp->f_dentry)); - rc = filldir(dirent, "..", 2, fpos, - parent_ino(filp->f_dentry), DT_DIR); - if (rc) - goto done; - fpos++; - } + if (!dir_emit_dots(file, actor)) + return 0; m = NULL; ctx = NULL; /* * Allocate a buffer to store the current name being processed * converted to format determined by current NLS. */ - name = (u8*)kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, - GFP_NOFS); + name = kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, GFP_NOFS); if (unlikely(!name)) { err = -ENOMEM; goto err_out; } /* Are we jumping straight into the index allocation attribute? */ - if (fpos >= vol->mft_record_size) + if (actor->pos >= vol->mft_record_size) goto skip_index_root; /* Get hold of the mft record for the directory. */ m = map_mft_record(ndir); @@ -1171,7 +1152,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) goto err_out; } /* Get the offset into the index root attribute. */ - ir_pos = (s64)fpos; + ir_pos = (s64)actor->pos; /* Find the index root attribute in the mft record. */ err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, 0, ctx); @@ -1191,7 +1172,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) * map the mft record without deadlocking. */ rc = le32_to_cpu(ctx->attr->data.resident.value_length); - ir = (INDEX_ROOT*)kmalloc(rc, GFP_NOFS); + ir = kmalloc(rc, GFP_NOFS); if (unlikely(!ir)) { err = -ENOMEM; goto err_out; @@ -1227,10 +1208,9 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (ir_pos > (u8*)ie - (u8*)ir) continue; /* Advance the position even if going to skip the entry. */ - fpos = (u8*)ie - (u8*)ir; + actor->pos = (u8*)ie - (u8*)ir; /* Submit the name to the filldir callback. */ - rc = ntfs_filldir(vol, fpos, ndir, NULL, ie, name, dirent, - filldir); + rc = ntfs_filldir(vol, ndir, NULL, ie, name, actor); if (rc) { kfree(ir); goto abort; @@ -1243,23 +1223,19 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (!NInoIndexAllocPresent(ndir)) goto EOD; /* Advance fpos to the beginning of the index allocation. */ - fpos = vol->mft_record_size; + actor->pos = vol->mft_record_size; skip_index_root: kaddr = NULL; prev_ia_pos = -1LL; /* Get the offset into the index allocation attribute. */ - ia_pos = (s64)fpos - vol->mft_record_size; + ia_pos = (s64)actor->pos - vol->mft_record_size; ia_mapping = vdir->i_mapping; - bmp_vi = ndir->itype.index.bmp_ino; - if (unlikely(!bmp_vi)) { - ntfs_debug("Inode 0x%lx, regetting index bitmap.", vdir->i_ino); - bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4); - if (IS_ERR(bmp_vi)) { - ntfs_error(sb, "Failed to get bitmap attribute."); - err = PTR_ERR(bmp_vi); - goto err_out; - } - ndir->itype.index.bmp_ino = bmp_vi; + ntfs_debug("Inode 0x%lx, getting index bitmap.", vdir->i_ino); + bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4); + if (IS_ERR(bmp_vi)) { + ntfs_error(sb, "Failed to get bitmap attribute."); + err = PTR_ERR(bmp_vi); + goto err_out; } bmp_mapping = bmp_vi->i_mapping; /* Get the starting bitmap bit position and sanity check it. */ @@ -1267,7 +1243,7 @@ skip_index_root: if (unlikely(bmp_pos >> 3 >= i_size_read(bmp_vi))) { ntfs_error(sb, "Current index allocation position exceeds " "index bitmap size."); - goto err_out; + goto iput_err_out; } /* Get the starting bit position in the current bitmap page. */ cur_bmp_pos = bmp_pos & ((PAGE_CACHE_SIZE * 8) - 1); @@ -1283,7 +1259,7 @@ get_next_bmp_page: ntfs_error(sb, "Reading index bitmap failed."); err = PTR_ERR(bmp_page); bmp_page = NULL; - goto err_out; + goto iput_err_out; } bmp = (u8*)page_address(bmp_page); /* Find next index block in use. */ @@ -1414,7 +1390,7 @@ find_next_index_buffer: if (ia_pos - ia_start > (u8*)ie - (u8*)ia) continue; /* Advance the position even if going to skip the entry. */ - fpos = (u8*)ie - (u8*)ia + + actor->pos = (u8*)ie - (u8*)ia + (sle64_to_cpu(ia->index_block_vcn) << ndir->itype.index.vcn_size_bits) + vol->mft_record_size; @@ -1424,12 +1400,12 @@ find_next_index_buffer: * before returning, unless a non-zero value is returned in * which case the page is left unlocked. */ - rc = ntfs_filldir(vol, fpos, ndir, ia_page, ie, name, dirent, - filldir); + rc = ntfs_filldir(vol, ndir, ia_page, ie, name, actor); if (rc) { /* @ia_page is already unlocked in this case. */ ntfs_unmap_page(ia_page); ntfs_unmap_page(bmp_page); + iput(bmp_vi); goto abort; } } @@ -1440,24 +1416,19 @@ unm_EOD: ntfs_unmap_page(ia_page); } ntfs_unmap_page(bmp_page); + iput(bmp_vi); EOD: /* We are finished, set fpos to EOD. */ - fpos = i_size + vol->mft_record_size; + actor->pos = i_size + vol->mft_record_size; abort: kfree(name); -done: -#ifdef DEBUG - if (!rc) - ntfs_debug("EOD, fpos 0x%llx, returning 0.", fpos); - else - ntfs_debug("filldir returned %i, fpos 0x%llx, returning 0.", - rc, fpos); -#endif - filp->f_pos = fpos; return 0; err_out: - if (bmp_page) + if (bmp_page) { ntfs_unmap_page(bmp_page); +iput_err_out: + iput(bmp_vi); + } if (ia_page) { unlock_page(ia_page); ntfs_unmap_page(ia_page); @@ -1471,7 +1442,6 @@ err_out: if (!err) err = -EIO; ntfs_debug("Failed. Returning error code %i.", -err); - filp->f_pos = fpos; return err; } @@ -1520,25 +1490,39 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp) * Note: In the past @filp could be NULL so we ignore it as we don't need it * anyway. * - * Locking: Caller must hold i_sem on the inode. + * Locking: Caller must hold i_mutex on the inode. * * TODO: We should probably also write all attribute/index inodes associated * with this inode but since we have no simple way of getting to them we ignore * this problem for now. We do write the $BITMAP attribute if it is present * which is the important one for a directory so things are not too bad. */ -static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry, - int datasync) +static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end, + int datasync) { - struct inode *vi = dentry->d_inode; - ntfs_inode *ni = NTFS_I(vi); + struct inode *bmp_vi, *vi = filp->f_mapping->host; int err, ret; + ntfs_attr na; ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); + + err = filemap_write_and_wait_range(vi->i_mapping, start, end); + if (err) + return err; + mutex_lock(&vi->i_mutex); + BUG_ON(!S_ISDIR(vi->i_mode)); - if (NInoIndexAllocPresent(ni) && ni->itype.index.bmp_ino) - write_inode_now(ni->itype.index.bmp_ino, !datasync); - ret = ntfs_write_inode(vi, 1); + /* If the bitmap attribute inode is in memory sync it, too. */ + na.mft_no = vi->i_ino; + na.type = AT_BITMAP; + na.name = I30; + na.name_len = 4; + bmp_vi = ilookup5(vi->i_sb, vi->i_ino, (test_t)ntfs_test_inode, &na); + if (bmp_vi) { + write_inode_now(bmp_vi, !datasync); + iput(bmp_vi); + } + ret = __ntfs_write_inode(vi, 1); write_inode_now(vi, !datasync); err = sync_blockdev(vi->i_sb->s_bdev); if (unlikely(err && !ret)) @@ -1548,15 +1532,16 @@ static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry, else ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error " "%u.", datasync ? "data" : "", vi->i_ino, -ret); + mutex_unlock(&vi->i_mutex); return ret; } #endif /* NTFS_RW */ -struct file_operations ntfs_dir_ops = { +const struct file_operations ntfs_dir_ops = { .llseek = generic_file_llseek, /* Seek inside directory. */ .read = generic_read_dir, /* Return -EISDIR. */ - .readdir = ntfs_readdir, /* Read directory contents. */ + .iterate = ntfs_readdir, /* Read directory contents. */ #ifdef NTFS_RW .fsync = ntfs_dir_fsync, /* Sync a directory to disk. */ /*.aio_fsync = ,*/ /* Sync all outstanding async diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index be9fd1dd423..5c9e2c81cb1 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -1,7 +1,7 @@ /* * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2005 Anton Altaparmakov + * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc. * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -19,11 +19,26 @@ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include <linux/pagemap.h> #include <linux/buffer_head.h> +#include <linux/gfp.h> +#include <linux/pagemap.h> +#include <linux/pagevec.h> +#include <linux/sched.h> +#include <linux/swap.h> +#include <linux/uio.h> +#include <linux/writeback.h> +#include <linux/aio.h> +#include <asm/page.h> +#include <asm/uaccess.h> + +#include "attrib.h" +#include "bitmap.h" #include "inode.h" #include "debug.h" +#include "lcnalloc.h" +#include "malloc.h" +#include "mft.h" #include "ntfs.h" /** @@ -48,7 +63,7 @@ static int ntfs_file_open(struct inode *vi, struct file *filp) { if (sizeof(unsigned long) < 8) { if (i_size_read(vi) > MAX_LFS_FILESIZE) - return -EFBIG; + return -EOVERFLOW; } return generic_file_open(vi, filp); } @@ -56,9 +71,2075 @@ static int ntfs_file_open(struct inode *vi, struct file *filp) #ifdef NTFS_RW /** + * ntfs_attr_extend_initialized - extend the initialized size of an attribute + * @ni: ntfs inode of the attribute to extend + * @new_init_size: requested new initialized size in bytes + * @cached_page: store any allocated but unused page here + * @lru_pvec: lru-buffering pagevec of the caller + * + * Extend the initialized size of an attribute described by the ntfs inode @ni + * to @new_init_size bytes. This involves zeroing any non-sparse space between + * the old initialized size and @new_init_size both in the page cache and on + * disk (if relevant complete pages are already uptodate in the page cache then + * these are simply marked dirty). + * + * As a side-effect, the file size (vfs inode->i_size) may be incremented as, + * in the resident attribute case, it is tied to the initialized size and, in + * the non-resident attribute case, it may not fall below the initialized size. + * + * Note that if the attribute is resident, we do not need to touch the page + * cache at all. This is because if the page cache page is not uptodate we + * bring it uptodate later, when doing the write to the mft record since we + * then already have the page mapped. And if the page is uptodate, the + * non-initialized region will already have been zeroed when the page was + * brought uptodate and the region may in fact already have been overwritten + * with new data via mmap() based writes, so we cannot just zero it. And since + * POSIX specifies that the behaviour of resizing a file whilst it is mmap()ped + * is unspecified, we choose not to do zeroing and thus we do not need to touch + * the page at all. For a more detailed explanation see ntfs_truncate() in + * fs/ntfs/inode.c. + * + * Return 0 on success and -errno on error. In the case that an error is + * encountered it is possible that the initialized size will already have been + * incremented some way towards @new_init_size but it is guaranteed that if + * this is the case, the necessary zeroing will also have happened and that all + * metadata is self-consistent. + * + * Locking: i_mutex on the vfs inode corrseponsind to the ntfs inode @ni must be + * held by the caller. + */ +static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size) +{ + s64 old_init_size; + loff_t old_i_size; + pgoff_t index, end_index; + unsigned long flags; + struct inode *vi = VFS_I(ni); + ntfs_inode *base_ni; + MFT_RECORD *m = NULL; + ATTR_RECORD *a; + ntfs_attr_search_ctx *ctx = NULL; + struct address_space *mapping; + struct page *page = NULL; + u8 *kattr; + int err; + u32 attr_len; + + read_lock_irqsave(&ni->size_lock, flags); + old_init_size = ni->initialized_size; + old_i_size = i_size_read(vi); + BUG_ON(new_init_size > ni->allocated_size); + read_unlock_irqrestore(&ni->size_lock, flags); + ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, " + "old_initialized_size 0x%llx, " + "new_initialized_size 0x%llx, i_size 0x%llx.", + vi->i_ino, (unsigned)le32_to_cpu(ni->type), + (unsigned long long)old_init_size, + (unsigned long long)new_init_size, old_i_size); + if (!NInoAttr(ni)) + base_ni = ni; + else + base_ni = ni->ext.base_ntfs_ino; + /* Use goto to reduce indentation and we need the label below anyway. */ + if (NInoNonResident(ni)) + goto do_non_resident_extend; + BUG_ON(old_init_size != old_i_size); + m = map_mft_record(base_ni); + if (IS_ERR(m)) { + err = PTR_ERR(m); + m = NULL; + goto err_out; + } + ctx = ntfs_attr_get_search_ctx(base_ni, m); + if (unlikely(!ctx)) { + err = -ENOMEM; + goto err_out; + } + err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, + CASE_SENSITIVE, 0, NULL, 0, ctx); + if (unlikely(err)) { + if (err == -ENOENT) + err = -EIO; + goto err_out; + } + m = ctx->mrec; + a = ctx->attr; + BUG_ON(a->non_resident); + /* The total length of the attribute value. */ + attr_len = le32_to_cpu(a->data.resident.value_length); + BUG_ON(old_i_size != (loff_t)attr_len); + /* + * Do the zeroing in the mft record and update the attribute size in + * the mft record. + */ + kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset); + memset(kattr + attr_len, 0, new_init_size - attr_len); + a->data.resident.value_length = cpu_to_le32((u32)new_init_size); + /* Finally, update the sizes in the vfs and ntfs inodes. */ + write_lock_irqsave(&ni->size_lock, flags); + i_size_write(vi, new_init_size); + ni->initialized_size = new_init_size; + write_unlock_irqrestore(&ni->size_lock, flags); + goto done; +do_non_resident_extend: + /* + * If the new initialized size @new_init_size exceeds the current file + * size (vfs inode->i_size), we need to extend the file size to the + * new initialized size. + */ + if (new_init_size > old_i_size) { + m = map_mft_record(base_ni); + if (IS_ERR(m)) { + err = PTR_ERR(m); + m = NULL; + goto err_out; + } + ctx = ntfs_attr_get_search_ctx(base_ni, m); + if (unlikely(!ctx)) { + err = -ENOMEM; + goto err_out; + } + err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, + CASE_SENSITIVE, 0, NULL, 0, ctx); + if (unlikely(err)) { + if (err == -ENOENT) + err = -EIO; + goto err_out; + } + m = ctx->mrec; + a = ctx->attr; + BUG_ON(!a->non_resident); + BUG_ON(old_i_size != (loff_t) + sle64_to_cpu(a->data.non_resident.data_size)); + a->data.non_resident.data_size = cpu_to_sle64(new_init_size); + flush_dcache_mft_record_page(ctx->ntfs_ino); + mark_mft_record_dirty(ctx->ntfs_ino); + /* Update the file size in the vfs inode. */ + i_size_write(vi, new_init_size); + ntfs_attr_put_search_ctx(ctx); + ctx = NULL; + unmap_mft_record(base_ni); + m = NULL; + } + mapping = vi->i_mapping; + index = old_init_size >> PAGE_CACHE_SHIFT; + end_index = (new_init_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + do { + /* + * Read the page. If the page is not present, this will zero + * the uninitialized regions for us. + */ + page = read_mapping_page(mapping, index, NULL); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto init_err_out; + } + if (unlikely(PageError(page))) { + page_cache_release(page); + err = -EIO; + goto init_err_out; + } + /* + * Update the initialized size in the ntfs inode. This is + * enough to make ntfs_writepage() work. + */ + write_lock_irqsave(&ni->size_lock, flags); + ni->initialized_size = (s64)(index + 1) << PAGE_CACHE_SHIFT; + if (ni->initialized_size > new_init_size) + ni->initialized_size = new_init_size; + write_unlock_irqrestore(&ni->size_lock, flags); + /* Set the page dirty so it gets written out. */ + set_page_dirty(page); + page_cache_release(page); + /* + * Play nice with the vm and the rest of the system. This is + * very much needed as we can potentially be modifying the + * initialised size from a very small value to a really huge + * value, e.g. + * f = open(somefile, O_TRUNC); + * truncate(f, 10GiB); + * seek(f, 10GiB); + * write(f, 1); + * And this would mean we would be marking dirty hundreds of + * thousands of pages or as in the above example more than + * two and a half million pages! + * + * TODO: For sparse pages could optimize this workload by using + * the FsMisc / MiscFs page bit as a "PageIsSparse" bit. This + * would be set in readpage for sparse pages and here we would + * not need to mark dirty any pages which have this bit set. + * The only caveat is that we have to clear the bit everywhere + * where we allocate any clusters that lie in the page or that + * contain the page. + * + * TODO: An even greater optimization would be for us to only + * call readpage() on pages which are not in sparse regions as + * determined from the runlist. This would greatly reduce the + * number of pages we read and make dirty in the case of sparse + * files. + */ + balance_dirty_pages_ratelimited(mapping); + cond_resched(); + } while (++index < end_index); + read_lock_irqsave(&ni->size_lock, flags); + BUG_ON(ni->initialized_size != new_init_size); + read_unlock_irqrestore(&ni->size_lock, flags); + /* Now bring in sync the initialized_size in the mft record. */ + m = map_mft_record(base_ni); + if (IS_ERR(m)) { + err = PTR_ERR(m); + m = NULL; + goto init_err_out; + } + ctx = ntfs_attr_get_search_ctx(base_ni, m); + if (unlikely(!ctx)) { + err = -ENOMEM; + goto init_err_out; + } + err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, + CASE_SENSITIVE, 0, NULL, 0, ctx); + if (unlikely(err)) { + if (err == -ENOENT) + err = -EIO; + goto init_err_out; + } + m = ctx->mrec; + a = ctx->attr; + BUG_ON(!a->non_resident); + a->data.non_resident.initialized_size = cpu_to_sle64(new_init_size); +done: + flush_dcache_mft_record_page(ctx->ntfs_ino); + mark_mft_record_dirty(ctx->ntfs_ino); + if (ctx) + ntfs_attr_put_search_ctx(ctx); + if (m) + unmap_mft_record(base_ni); + ntfs_debug("Done, initialized_size 0x%llx, i_size 0x%llx.", + (unsigned long long)new_init_size, i_size_read(vi)); + return 0; +init_err_out: + write_lock_irqsave(&ni->size_lock, flags); + ni->initialized_size = old_init_size; + write_unlock_irqrestore(&ni->size_lock, flags); +err_out: + if (ctx) + ntfs_attr_put_search_ctx(ctx); + if (m) + unmap_mft_record(base_ni); + ntfs_debug("Failed. Returning error code %i.", err); + return err; +} + +/** + * ntfs_fault_in_pages_readable - + * + * Fault a number of userspace pages into pagetables. + * + * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes + * with more than two userspace pages as well as handling the single page case + * elegantly. + * + * If you find this difficult to understand, then think of the while loop being + * the following code, except that we do without the integer variable ret: + * + * do { + * ret = __get_user(c, uaddr); + * uaddr += PAGE_SIZE; + * } while (!ret && uaddr < end); + * + * Note, the final __get_user() may well run out-of-bounds of the user buffer, + * but _not_ out-of-bounds of the page the user buffer belongs to, and since + * this is only a read and not a write, and since it is still in the same page, + * it should not matter and this makes the code much simpler. + */ +static inline void ntfs_fault_in_pages_readable(const char __user *uaddr, + int bytes) +{ + const char __user *end; + volatile char c; + + /* Set @end to the first byte outside the last page we care about. */ + end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes); + + while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end)) + ; +} + +/** + * ntfs_fault_in_pages_readable_iovec - + * + * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs. + */ +static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov, + size_t iov_ofs, int bytes) +{ + do { + const char __user *buf; + unsigned len; + + buf = iov->iov_base + iov_ofs; + len = iov->iov_len - iov_ofs; + if (len > bytes) + len = bytes; + ntfs_fault_in_pages_readable(buf, len); + bytes -= len; + iov++; + iov_ofs = 0; + } while (bytes); +} + +/** + * __ntfs_grab_cache_pages - obtain a number of locked pages + * @mapping: address space mapping from which to obtain page cache pages + * @index: starting index in @mapping at which to begin obtaining pages + * @nr_pages: number of page cache pages to obtain + * @pages: array of pages in which to return the obtained page cache pages + * @cached_page: allocated but as yet unused page + * @lru_pvec: lru-buffering pagevec of caller + * + * Obtain @nr_pages locked page cache pages from the mapping @mapping and + * starting at index @index. + * + * If a page is newly created, add it to lru list + * + * Note, the page locks are obtained in ascending page index order. + */ +static inline int __ntfs_grab_cache_pages(struct address_space *mapping, + pgoff_t index, const unsigned nr_pages, struct page **pages, + struct page **cached_page) +{ + int err, nr; + + BUG_ON(!nr_pages); + err = nr = 0; + do { + pages[nr] = find_lock_page(mapping, index); + if (!pages[nr]) { + if (!*cached_page) { + *cached_page = page_cache_alloc(mapping); + if (unlikely(!*cached_page)) { + err = -ENOMEM; + goto err_out; + } + } + err = add_to_page_cache_lru(*cached_page, mapping, index, + GFP_KERNEL); + if (unlikely(err)) { + if (err == -EEXIST) + continue; + goto err_out; + } + pages[nr] = *cached_page; + *cached_page = NULL; + } + index++; + nr++; + } while (nr < nr_pages); +out: + return err; +err_out: + while (nr > 0) { + unlock_page(pages[--nr]); + page_cache_release(pages[nr]); + } + goto out; +} + +static inline int ntfs_submit_bh_for_read(struct buffer_head *bh) +{ + lock_buffer(bh); + get_bh(bh); + bh->b_end_io = end_buffer_read_sync; + return submit_bh(READ, bh); +} + +/** + * ntfs_prepare_pages_for_non_resident_write - prepare pages for receiving data + * @pages: array of destination pages + * @nr_pages: number of pages in @pages + * @pos: byte position in file at which the write begins + * @bytes: number of bytes to be written + * + * This is called for non-resident attributes from ntfs_file_buffered_write() + * with i_mutex held on the inode (@pages[0]->mapping->host). There are + * @nr_pages pages in @pages which are locked but not kmap()ped. The source + * data has not yet been copied into the @pages. + * + * Need to fill any holes with actual clusters, allocate buffers if necessary, + * ensure all the buffers are mapped, and bring uptodate any buffers that are + * only partially being written to. + * + * If @nr_pages is greater than one, we are guaranteed that the cluster size is + * greater than PAGE_CACHE_SIZE, that all pages in @pages are entirely inside + * the same cluster and that they are the entirety of that cluster, and that + * the cluster is sparse, i.e. we need to allocate a cluster to fill the hole. + * + * i_size is not to be modified yet. + * + * Return 0 on success or -errno on error. + */ +static int ntfs_prepare_pages_for_non_resident_write(struct page **pages, + unsigned nr_pages, s64 pos, size_t bytes) +{ + VCN vcn, highest_vcn = 0, cpos, cend, bh_cpos, bh_cend; + LCN lcn; + s64 bh_pos, vcn_len, end, initialized_size; + sector_t lcn_block; + struct page *page; + struct inode *vi; + ntfs_inode *ni, *base_ni = NULL; + ntfs_volume *vol; + runlist_element *rl, *rl2; + struct buffer_head *bh, *head, *wait[2], **wait_bh = wait; + ntfs_attr_search_ctx *ctx = NULL; + MFT_RECORD *m = NULL; + ATTR_RECORD *a = NULL; + unsigned long flags; + u32 attr_rec_len = 0; + unsigned blocksize, u; + int err, mp_size; + bool rl_write_locked, was_hole, is_retry; + unsigned char blocksize_bits; + struct { + u8 runlist_merged:1; + u8 mft_attr_mapped:1; + u8 mp_rebuilt:1; + u8 attr_switched:1; + } status = { 0, 0, 0, 0 }; + + BUG_ON(!nr_pages); + BUG_ON(!pages); + BUG_ON(!*pages); + vi = pages[0]->mapping->host; + ni = NTFS_I(vi); + vol = ni->vol; + ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page " + "index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.", + vi->i_ino, ni->type, pages[0]->index, nr_pages, + (long long)pos, bytes); + blocksize = vol->sb->s_blocksize; + blocksize_bits = vol->sb->s_blocksize_bits; + u = 0; + do { + page = pages[u]; + BUG_ON(!page); + /* + * create_empty_buffers() will create uptodate/dirty buffers if + * the page is uptodate/dirty. + */ + if (!page_has_buffers(page)) { + create_empty_buffers(page, blocksize, 0); + if (unlikely(!page_has_buffers(page))) + return -ENOMEM; + } + } while (++u < nr_pages); + rl_write_locked = false; + rl = NULL; + err = 0; + vcn = lcn = -1; + vcn_len = 0; + lcn_block = -1; + was_hole = false; + cpos = pos >> vol->cluster_size_bits; + end = pos + bytes; + cend = (end + vol->cluster_size - 1) >> vol->cluster_size_bits; + /* + * Loop over each page and for each page over each buffer. Use goto to + * reduce indentation. + */ + u = 0; +do_next_page: + page = pages[u]; + bh_pos = (s64)page->index << PAGE_CACHE_SHIFT; + bh = head = page_buffers(page); + do { + VCN cdelta; + s64 bh_end; + unsigned bh_cofs; + + /* Clear buffer_new on all buffers to reinitialise state. */ + if (buffer_new(bh)) + clear_buffer_new(bh); + bh_end = bh_pos + blocksize; + bh_cpos = bh_pos >> vol->cluster_size_bits; + bh_cofs = bh_pos & vol->cluster_size_mask; + if (buffer_mapped(bh)) { + /* + * The buffer is already mapped. If it is uptodate, + * ignore it. + */ + if (buffer_uptodate(bh)) + continue; + /* + * The buffer is not uptodate. If the page is uptodate + * set the buffer uptodate and otherwise ignore it. + */ + if (PageUptodate(page)) { + set_buffer_uptodate(bh); + continue; + } + /* + * Neither the page nor the buffer are uptodate. If + * the buffer is only partially being written to, we + * need to read it in before the write, i.e. now. + */ + if ((bh_pos < pos && bh_end > pos) || + (bh_pos < end && bh_end > end)) { + /* + * If the buffer is fully or partially within + * the initialized size, do an actual read. + * Otherwise, simply zero the buffer. + */ + read_lock_irqsave(&ni->size_lock, flags); + initialized_size = ni->initialized_size; + read_unlock_irqrestore(&ni->size_lock, flags); + if (bh_pos < initialized_size) { + ntfs_submit_bh_for_read(bh); + *wait_bh++ = bh; + } else { + zero_user(page, bh_offset(bh), + blocksize); + set_buffer_uptodate(bh); + } + } + continue; + } + /* Unmapped buffer. Need to map it. */ + bh->b_bdev = vol->sb->s_bdev; + /* + * If the current buffer is in the same clusters as the map + * cache, there is no need to check the runlist again. The + * map cache is made up of @vcn, which is the first cached file + * cluster, @vcn_len which is the number of cached file + * clusters, @lcn is the device cluster corresponding to @vcn, + * and @lcn_block is the block number corresponding to @lcn. + */ + cdelta = bh_cpos - vcn; + if (likely(!cdelta || (cdelta > 0 && cdelta < vcn_len))) { +map_buffer_cached: + BUG_ON(lcn < 0); + bh->b_blocknr = lcn_block + + (cdelta << (vol->cluster_size_bits - + blocksize_bits)) + + (bh_cofs >> blocksize_bits); + set_buffer_mapped(bh); + /* + * If the page is uptodate so is the buffer. If the + * buffer is fully outside the write, we ignore it if + * it was already allocated and we mark it dirty so it + * gets written out if we allocated it. On the other + * hand, if we allocated the buffer but we are not + * marking it dirty we set buffer_new so we can do + * error recovery. + */ + if (PageUptodate(page)) { + if (!buffer_uptodate(bh)) + set_buffer_uptodate(bh); + if (unlikely(was_hole)) { + /* We allocated the buffer. */ + unmap_underlying_metadata(bh->b_bdev, + bh->b_blocknr); + if (bh_end <= pos || bh_pos >= end) + mark_buffer_dirty(bh); + else + set_buffer_new(bh); + } + continue; + } + /* Page is _not_ uptodate. */ + if (likely(!was_hole)) { + /* + * Buffer was already allocated. If it is not + * uptodate and is only partially being written + * to, we need to read it in before the write, + * i.e. now. + */ + if (!buffer_uptodate(bh) && bh_pos < end && + bh_end > pos && + (bh_pos < pos || + bh_end > end)) { + /* + * If the buffer is fully or partially + * within the initialized size, do an + * actual read. Otherwise, simply zero + * the buffer. + */ + read_lock_irqsave(&ni->size_lock, + flags); + initialized_size = ni->initialized_size; + read_unlock_irqrestore(&ni->size_lock, + flags); + if (bh_pos < initialized_size) { + ntfs_submit_bh_for_read(bh); + *wait_bh++ = bh; + } else { + zero_user(page, bh_offset(bh), + blocksize); + set_buffer_uptodate(bh); + } + } + continue; + } + /* We allocated the buffer. */ + unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); + /* + * If the buffer is fully outside the write, zero it, + * set it uptodate, and mark it dirty so it gets + * written out. If it is partially being written to, + * zero region surrounding the write but leave it to + * commit write to do anything else. Finally, if the + * buffer is fully being overwritten, do nothing. + */ + if (bh_end <= pos || bh_pos >= end) { + if (!buffer_uptodate(bh)) { + zero_user(page, bh_offset(bh), + blocksize); + set_buffer_uptodate(bh); + } + mark_buffer_dirty(bh); + continue; + } + set_buffer_new(bh); + if (!buffer_uptodate(bh) && + (bh_pos < pos || bh_end > end)) { + u8 *kaddr; + unsigned pofs; + + kaddr = kmap_atomic(page); + if (bh_pos < pos) { + pofs = bh_pos & ~PAGE_CACHE_MASK; + memset(kaddr + pofs, 0, pos - bh_pos); + } + if (bh_end > end) { + pofs = end & ~PAGE_CACHE_MASK; + memset(kaddr + pofs, 0, bh_end - end); + } + kunmap_atomic(kaddr); + flush_dcache_page(page); + } + continue; + } + /* + * Slow path: this is the first buffer in the cluster. If it + * is outside allocated size and is not uptodate, zero it and + * set it uptodate. + */ + read_lock_irqsave(&ni->size_lock, flags); + initialized_size = ni->allocated_size; + read_unlock_irqrestore(&ni->size_lock, flags); + if (bh_pos > initialized_size) { + if (PageUptodate(page)) { + if (!buffer_uptodate(bh)) + set_buffer_uptodate(bh); + } else if (!buffer_uptodate(bh)) { + zero_user(page, bh_offset(bh), blocksize); + set_buffer_uptodate(bh); + } + continue; + } + is_retry = false; + if (!rl) { + down_read(&ni->runlist.lock); +retry_remap: + rl = ni->runlist.rl; + } + if (likely(rl != NULL)) { + /* Seek to element containing target cluster. */ + while (rl->length && rl[1].vcn <= bh_cpos) + rl++; + lcn = ntfs_rl_vcn_to_lcn(rl, bh_cpos); + if (likely(lcn >= 0)) { + /* + * Successful remap, setup the map cache and + * use that to deal with the buffer. + */ + was_hole = false; + vcn = bh_cpos; + vcn_len = rl[1].vcn - vcn; + lcn_block = lcn << (vol->cluster_size_bits - + blocksize_bits); + cdelta = 0; + /* + * If the number of remaining clusters touched + * by the write is smaller or equal to the + * number of cached clusters, unlock the + * runlist as the map cache will be used from + * now on. + */ + if (likely(vcn + vcn_len >= cend)) { + if (rl_write_locked) { + up_write(&ni->runlist.lock); + rl_write_locked = false; + } else + up_read(&ni->runlist.lock); + rl = NULL; + } + goto map_buffer_cached; + } + } else + lcn = LCN_RL_NOT_MAPPED; + /* + * If it is not a hole and not out of bounds, the runlist is + * probably unmapped so try to map it now. + */ + if (unlikely(lcn != LCN_HOLE && lcn != LCN_ENOENT)) { + if (likely(!is_retry && lcn == LCN_RL_NOT_MAPPED)) { + /* Attempt to map runlist. */ + if (!rl_write_locked) { + /* + * We need the runlist locked for + * writing, so if it is locked for + * reading relock it now and retry in + * case it changed whilst we dropped + * the lock. + */ + up_read(&ni->runlist.lock); + down_write(&ni->runlist.lock); + rl_write_locked = true; + goto retry_remap; + } + err = ntfs_map_runlist_nolock(ni, bh_cpos, + NULL); + if (likely(!err)) { + is_retry = true; + goto retry_remap; + } + /* + * If @vcn is out of bounds, pretend @lcn is + * LCN_ENOENT. As long as the buffer is out + * of bounds this will work fine. + */ + if (err == -ENOENT) { + lcn = LCN_ENOENT; + err = 0; + goto rl_not_mapped_enoent; + } + } else + err = -EIO; + /* Failed to map the buffer, even after retrying. */ + bh->b_blocknr = -1; + ntfs_error(vol->sb, "Failed to write to inode 0x%lx, " + "attribute type 0x%x, vcn 0x%llx, " + "vcn offset 0x%x, because its " + "location on disk could not be " + "determined%s (error code %i).", + ni->mft_no, ni->type, + (unsigned long long)bh_cpos, + (unsigned)bh_pos & + vol->cluster_size_mask, + is_retry ? " even after retrying" : "", + err); + break; + } +rl_not_mapped_enoent: + /* + * The buffer is in a hole or out of bounds. We need to fill + * the hole, unless the buffer is in a cluster which is not + * touched by the write, in which case we just leave the buffer + * unmapped. This can only happen when the cluster size is + * less than the page cache size. + */ + if (unlikely(vol->cluster_size < PAGE_CACHE_SIZE)) { + bh_cend = (bh_end + vol->cluster_size - 1) >> + vol->cluster_size_bits; + if ((bh_cend <= cpos || bh_cpos >= cend)) { + bh->b_blocknr = -1; + /* + * If the buffer is uptodate we skip it. If it + * is not but the page is uptodate, we can set + * the buffer uptodate. If the page is not + * uptodate, we can clear the buffer and set it + * uptodate. Whether this is worthwhile is + * debatable and this could be removed. + */ + if (PageUptodate(page)) { + if (!buffer_uptodate(bh)) + set_buffer_uptodate(bh); + } else if (!buffer_uptodate(bh)) { + zero_user(page, bh_offset(bh), + blocksize); + set_buffer_uptodate(bh); + } + continue; + } + } + /* + * Out of bounds buffer is invalid if it was not really out of + * bounds. + */ + BUG_ON(lcn != LCN_HOLE); + /* + * We need the runlist locked for writing, so if it is locked + * for reading relock it now and retry in case it changed + * whilst we dropped the lock. + */ + BUG_ON(!rl); + if (!rl_write_locked) { + up_read(&ni->runlist.lock); + down_write(&ni->runlist.lock); + rl_write_locked = true; + goto retry_remap; + } + /* Find the previous last allocated cluster. */ + BUG_ON(rl->lcn != LCN_HOLE); + lcn = -1; + rl2 = rl; + while (--rl2 >= ni->runlist.rl) { + if (rl2->lcn >= 0) { + lcn = rl2->lcn + rl2->length; + break; + } + } + rl2 = ntfs_cluster_alloc(vol, bh_cpos, 1, lcn, DATA_ZONE, + false); + if (IS_ERR(rl2)) { + err = PTR_ERR(rl2); + ntfs_debug("Failed to allocate cluster, error code %i.", + err); + break; + } + lcn = rl2->lcn; + rl = ntfs_runlists_merge(ni->runlist.rl, rl2); + if (IS_ERR(rl)) { + err = PTR_ERR(rl); + if (err != -ENOMEM) + err = -EIO; + if (ntfs_cluster_free_from_rl(vol, rl2)) { + ntfs_error(vol->sb, "Failed to release " + "allocated cluster in error " + "code path. Run chkdsk to " + "recover the lost cluster."); + NVolSetErrors(vol); + } + ntfs_free(rl2); + break; + } + ni->runlist.rl = rl; + status.runlist_merged = 1; + ntfs_debug("Allocated cluster, lcn 0x%llx.", + (unsigned long long)lcn); + /* Map and lock the mft record and get the attribute record. */ + if (!NInoAttr(ni)) + base_ni = ni; + else + base_ni = ni->ext.base_ntfs_ino; + m = map_mft_record(base_ni); + if (IS_ERR(m)) { + err = PTR_ERR(m); + break; + } + ctx = ntfs_attr_get_search_ctx(base_ni, m); + if (unlikely(!ctx)) { + err = -ENOMEM; + unmap_mft_record(base_ni); + break; + } + status.mft_attr_mapped = 1; + err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, + CASE_SENSITIVE, bh_cpos, NULL, 0, ctx); + if (unlikely(err)) { + if (err == -ENOENT) + err = -EIO; + break; + } + m = ctx->mrec; + a = ctx->attr; + /* + * Find the runlist element with which the attribute extent + * starts. Note, we cannot use the _attr_ version because we + * have mapped the mft record. That is ok because we know the + * runlist fragment must be mapped already to have ever gotten + * here, so we can just use the _rl_ version. + */ + vcn = sle64_to_cpu(a->data.non_resident.lowest_vcn); + rl2 = ntfs_rl_find_vcn_nolock(rl, vcn); + BUG_ON(!rl2); + BUG_ON(!rl2->length); + BUG_ON(rl2->lcn < LCN_HOLE); + highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn); + /* + * If @highest_vcn is zero, calculate the real highest_vcn + * (which can really be zero). + */ + if (!highest_vcn) + highest_vcn = (sle64_to_cpu( + a->data.non_resident.allocated_size) >> + vol->cluster_size_bits) - 1; + /* + * Determine the size of the mapping pairs array for the new + * extent, i.e. the old extent with the hole filled. + */ + mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, vcn, + highest_vcn); + if (unlikely(mp_size <= 0)) { + if (!(err = mp_size)) + err = -EIO; + ntfs_debug("Failed to get size for mapping pairs " + "array, error code %i.", err); + break; + } + /* + * Resize the attribute record to fit the new mapping pairs + * array. + */ + attr_rec_len = le32_to_cpu(a->length); + err = ntfs_attr_record_resize(m, a, mp_size + le16_to_cpu( + a->data.non_resident.mapping_pairs_offset)); + if (unlikely(err)) { + BUG_ON(err != -ENOSPC); + // TODO: Deal with this by using the current attribute + // and fill it with as much of the mapping pairs + // array as possible. Then loop over each attribute + // extent rewriting the mapping pairs arrays as we go + // along and if when we reach the end we have not + // enough space, try to resize the last attribute + // extent and if even that fails, add a new attribute + // extent. + // We could also try to resize at each step in the hope + // that we will not need to rewrite every single extent. + // Note, we may need to decompress some extents to fill + // the runlist as we are walking the extents... + ntfs_error(vol->sb, "Not enough space in the mft " + "record for the extended attribute " + "record. This case is not " + "implemented yet."); + err = -EOPNOTSUPP; + break ; + } + status.mp_rebuilt = 1; + /* + * Generate the mapping pairs array directly into the attribute + * record. + */ + err = ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu( + a->data.non_resident.mapping_pairs_offset), + mp_size, rl2, vcn, highest_vcn, NULL); + if (unlikely(err)) { + ntfs_error(vol->sb, "Cannot fill hole in inode 0x%lx, " + "attribute type 0x%x, because building " + "the mapping pairs failed with error " + "code %i.", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), err); + err = -EIO; + break; + } + /* Update the highest_vcn but only if it was not set. */ + if (unlikely(!a->data.non_resident.highest_vcn)) + a->data.non_resident.highest_vcn = + cpu_to_sle64(highest_vcn); + /* + * If the attribute is sparse/compressed, update the compressed + * size in the ntfs_inode structure and the attribute record. + */ + if (likely(NInoSparse(ni) || NInoCompressed(ni))) { + /* + * If we are not in the first attribute extent, switch + * to it, but first ensure the changes will make it to + * disk later. + */ + if (a->data.non_resident.lowest_vcn) { + flush_dcache_mft_record_page(ctx->ntfs_ino); + mark_mft_record_dirty(ctx->ntfs_ino); + ntfs_attr_reinit_search_ctx(ctx); + err = ntfs_attr_lookup(ni->type, ni->name, + ni->name_len, CASE_SENSITIVE, + 0, NULL, 0, ctx); + if (unlikely(err)) { + status.attr_switched = 1; + break; + } + /* @m is not used any more so do not set it. */ + a = ctx->attr; + } + write_lock_irqsave(&ni->size_lock, flags); + ni->itype.compressed.size += vol->cluster_size; + a->data.non_resident.compressed_size = + cpu_to_sle64(ni->itype.compressed.size); + write_unlock_irqrestore(&ni->size_lock, flags); + } + /* Ensure the changes make it to disk. */ + flush_dcache_mft_record_page(ctx->ntfs_ino); + mark_mft_record_dirty(ctx->ntfs_ino); + ntfs_attr_put_search_ctx(ctx); + unmap_mft_record(base_ni); + /* Successfully filled the hole. */ + status.runlist_merged = 0; + status.mft_attr_mapped = 0; + status.mp_rebuilt = 0; + /* Setup the map cache and use that to deal with the buffer. */ + was_hole = true; + vcn = bh_cpos; + vcn_len = 1; + lcn_block = lcn << (vol->cluster_size_bits - blocksize_bits); + cdelta = 0; + /* + * If the number of remaining clusters in the @pages is smaller + * or equal to the number of cached clusters, unlock the + * runlist as the map cache will be used from now on. + */ + if (likely(vcn + vcn_len >= cend)) { + up_write(&ni->runlist.lock); + rl_write_locked = false; + rl = NULL; + } + goto map_buffer_cached; + } while (bh_pos += blocksize, (bh = bh->b_this_page) != head); + /* If there are no errors, do the next page. */ + if (likely(!err && ++u < nr_pages)) + goto do_next_page; + /* If there are no errors, release the runlist lock if we took it. */ + if (likely(!err)) { + if (unlikely(rl_write_locked)) { + up_write(&ni->runlist.lock); + rl_write_locked = false; + } else if (unlikely(rl)) + up_read(&ni->runlist.lock); + rl = NULL; + } + /* If we issued read requests, let them complete. */ + read_lock_irqsave(&ni->size_lock, flags); + initialized_size = ni->initialized_size; + read_unlock_irqrestore(&ni->size_lock, flags); + while (wait_bh > wait) { + bh = *--wait_bh; + wait_on_buffer(bh); + if (likely(buffer_uptodate(bh))) { + page = bh->b_page; + bh_pos = ((s64)page->index << PAGE_CACHE_SHIFT) + + bh_offset(bh); + /* + * If the buffer overflows the initialized size, need + * to zero the overflowing region. + */ + if (unlikely(bh_pos + blocksize > initialized_size)) { + int ofs = 0; + + if (likely(bh_pos < initialized_size)) + ofs = initialized_size - bh_pos; + zero_user_segment(page, bh_offset(bh) + ofs, + blocksize); + } + } else /* if (unlikely(!buffer_uptodate(bh))) */ + err = -EIO; + } + if (likely(!err)) { + /* Clear buffer_new on all buffers. */ + u = 0; + do { + bh = head = page_buffers(pages[u]); + do { + if (buffer_new(bh)) + clear_buffer_new(bh); + } while ((bh = bh->b_this_page) != head); + } while (++u < nr_pages); + ntfs_debug("Done."); + return err; + } + if (status.attr_switched) { + /* Get back to the attribute extent we modified. */ + ntfs_attr_reinit_search_ctx(ctx); + if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len, + CASE_SENSITIVE, bh_cpos, NULL, 0, ctx)) { + ntfs_error(vol->sb, "Failed to find required " + "attribute extent of attribute in " + "error code path. Run chkdsk to " + "recover."); + write_lock_irqsave(&ni->size_lock, flags); + ni->itype.compressed.size += vol->cluster_size; + write_unlock_irqrestore(&ni->size_lock, flags); + flush_dcache_mft_record_page(ctx->ntfs_ino); + mark_mft_record_dirty(ctx->ntfs_ino); + /* + * The only thing that is now wrong is the compressed + * size of the base attribute extent which chkdsk + * should be able to fix. + */ + NVolSetErrors(vol); + } else { + m = ctx->mrec; + a = ctx->attr; + status.attr_switched = 0; + } + } + /* + * If the runlist has been modified, need to restore it by punching a + * hole into it and we then need to deallocate the on-disk cluster as + * well. Note, we only modify the runlist if we are able to generate a + * new mapping pairs array, i.e. only when the mapped attribute extent + * is not switched. + */ + if (status.runlist_merged && !status.attr_switched) { + BUG_ON(!rl_write_locked); + /* Make the file cluster we allocated sparse in the runlist. */ + if (ntfs_rl_punch_nolock(vol, &ni->runlist, bh_cpos, 1)) { + ntfs_error(vol->sb, "Failed to punch hole into " + "attribute runlist in error code " + "path. Run chkdsk to recover the " + "lost cluster."); + NVolSetErrors(vol); + } else /* if (success) */ { + status.runlist_merged = 0; + /* + * Deallocate the on-disk cluster we allocated but only + * if we succeeded in punching its vcn out of the + * runlist. + */ + down_write(&vol->lcnbmp_lock); + if (ntfs_bitmap_clear_bit(vol->lcnbmp_ino, lcn)) { + ntfs_error(vol->sb, "Failed to release " + "allocated cluster in error " + "code path. Run chkdsk to " + "recover the lost cluster."); + NVolSetErrors(vol); + } + up_write(&vol->lcnbmp_lock); + } + } + /* + * Resize the attribute record to its old size and rebuild the mapping + * pairs array. Note, we only can do this if the runlist has been + * restored to its old state which also implies that the mapped + * attribute extent is not switched. + */ + if (status.mp_rebuilt && !status.runlist_merged) { + if (ntfs_attr_record_resize(m, a, attr_rec_len)) { + ntfs_error(vol->sb, "Failed to restore attribute " + "record in error code path. Run " + "chkdsk to recover."); + NVolSetErrors(vol); + } else /* if (success) */ { + if (ntfs_mapping_pairs_build(vol, (u8*)a + + le16_to_cpu(a->data.non_resident. + mapping_pairs_offset), attr_rec_len - + le16_to_cpu(a->data.non_resident. + mapping_pairs_offset), ni->runlist.rl, + vcn, highest_vcn, NULL)) { + ntfs_error(vol->sb, "Failed to restore " + "mapping pairs array in error " + "code path. Run chkdsk to " + "recover."); + NVolSetErrors(vol); + } + flush_dcache_mft_record_page(ctx->ntfs_ino); + mark_mft_record_dirty(ctx->ntfs_ino); + } + } + /* Release the mft record and the attribute. */ + if (status.mft_attr_mapped) { + ntfs_attr_put_search_ctx(ctx); + unmap_mft_record(base_ni); + } + /* Release the runlist lock. */ + if (rl_write_locked) + up_write(&ni->runlist.lock); + else if (rl) + up_read(&ni->runlist.lock); + /* + * Zero out any newly allocated blocks to avoid exposing stale data. + * If BH_New is set, we know that the block was newly allocated above + * and that it has not been fully zeroed and marked dirty yet. + */ + nr_pages = u; + u = 0; + end = bh_cpos << vol->cluster_size_bits; + do { + page = pages[u]; + bh = head = page_buffers(page); + do { + if (u == nr_pages && + ((s64)page->index << PAGE_CACHE_SHIFT) + + bh_offset(bh) >= end) + break; + if (!buffer_new(bh)) + continue; + clear_buffer_new(bh); + if (!buffer_uptodate(bh)) { + if (PageUptodate(page)) + set_buffer_uptodate(bh); + else { + zero_user(page, bh_offset(bh), + blocksize); + set_buffer_uptodate(bh); + } + } + mark_buffer_dirty(bh); + } while ((bh = bh->b_this_page) != head); + } while (++u <= nr_pages); + ntfs_error(vol->sb, "Failed. Returning error code %i.", err); + return err; +} + +/* + * Copy as much as we can into the pages and return the number of bytes which + * were successfully copied. If a fault is encountered then clear the pages + * out to (ofs + bytes) and return the number of bytes which were copied. + */ +static inline size_t ntfs_copy_from_user(struct page **pages, + unsigned nr_pages, unsigned ofs, const char __user *buf, + size_t bytes) +{ + struct page **last_page = pages + nr_pages; + char *addr; + size_t total = 0; + unsigned len; + int left; + + do { + len = PAGE_CACHE_SIZE - ofs; + if (len > bytes) + len = bytes; + addr = kmap_atomic(*pages); + left = __copy_from_user_inatomic(addr + ofs, buf, len); + kunmap_atomic(addr); + if (unlikely(left)) { + /* Do it the slow way. */ + addr = kmap(*pages); + left = __copy_from_user(addr + ofs, buf, len); + kunmap(*pages); + if (unlikely(left)) + goto err_out; + } + total += len; + bytes -= len; + if (!bytes) + break; + buf += len; + ofs = 0; + } while (++pages < last_page); +out: + return total; +err_out: + total += len - left; + /* Zero the rest of the target like __copy_from_user(). */ + while (++pages < last_page) { + bytes -= len; + if (!bytes) + break; + len = PAGE_CACHE_SIZE; + if (len > bytes) + len = bytes; + zero_user(*pages, 0, len); + } + goto out; +} + +static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr, + const struct iovec *iov, size_t iov_ofs, size_t bytes) +{ + size_t total = 0; + + while (1) { + const char __user *buf = iov->iov_base + iov_ofs; + unsigned len; + size_t left; + + len = iov->iov_len - iov_ofs; + if (len > bytes) + len = bytes; + left = __copy_from_user_inatomic(vaddr, buf, len); + total += len; + bytes -= len; + vaddr += len; + if (unlikely(left)) { + total -= left; + break; + } + if (!bytes) + break; + iov++; + iov_ofs = 0; + } + return total; +} + +static inline void ntfs_set_next_iovec(const struct iovec **iovp, + size_t *iov_ofsp, size_t bytes) +{ + const struct iovec *iov = *iovp; + size_t iov_ofs = *iov_ofsp; + + while (bytes) { + unsigned len; + + len = iov->iov_len - iov_ofs; + if (len > bytes) + len = bytes; + bytes -= len; + iov_ofs += len; + if (iov->iov_len == iov_ofs) { + iov++; + iov_ofs = 0; + } + } + *iovp = iov; + *iov_ofsp = iov_ofs; +} + +/* + * This has the same side-effects and return value as ntfs_copy_from_user(). + * The difference is that on a fault we need to memset the remainder of the + * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s + * single-segment behaviour. + * + * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when + * atomic and when not atomic. This is ok because it calls + * __copy_from_user_inatomic() and it is ok to call this when non-atomic. In + * fact, the only difference between __copy_from_user_inatomic() and + * __copy_from_user() is that the latter calls might_sleep() and the former + * should not zero the tail of the buffer on error. And on many architectures + * __copy_from_user_inatomic() is just defined to __copy_from_user() so it + * makes no difference at all on those architectures. + */ +static inline size_t ntfs_copy_from_user_iovec(struct page **pages, + unsigned nr_pages, unsigned ofs, const struct iovec **iov, + size_t *iov_ofs, size_t bytes) +{ + struct page **last_page = pages + nr_pages; + char *addr; + size_t copied, len, total = 0; + + do { + len = PAGE_CACHE_SIZE - ofs; + if (len > bytes) + len = bytes; + addr = kmap_atomic(*pages); + copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs, + *iov, *iov_ofs, len); + kunmap_atomic(addr); + if (unlikely(copied != len)) { + /* Do it the slow way. */ + addr = kmap(*pages); + copied = __ntfs_copy_from_user_iovec_inatomic(addr + + ofs, *iov, *iov_ofs, len); + if (unlikely(copied != len)) + goto err_out; + kunmap(*pages); + } + total += len; + ntfs_set_next_iovec(iov, iov_ofs, len); + bytes -= len; + if (!bytes) + break; + ofs = 0; + } while (++pages < last_page); +out: + return total; +err_out: + BUG_ON(copied > len); + /* Zero the rest of the target like __copy_from_user(). */ + memset(addr + ofs + copied, 0, len - copied); + kunmap(*pages); + total += copied; + ntfs_set_next_iovec(iov, iov_ofs, copied); + while (++pages < last_page) { + bytes -= len; + if (!bytes) + break; + len = PAGE_CACHE_SIZE; + if (len > bytes) + len = bytes; + zero_user(*pages, 0, len); + } + goto out; +} + +static inline void ntfs_flush_dcache_pages(struct page **pages, + unsigned nr_pages) +{ + BUG_ON(!nr_pages); + /* + * Warning: Do not do the decrement at the same time as the call to + * flush_dcache_page() because it is a NULL macro on i386 and hence the + * decrement never happens so the loop never terminates. + */ + do { + --nr_pages; + flush_dcache_page(pages[nr_pages]); + } while (nr_pages > 0); +} + +/** + * ntfs_commit_pages_after_non_resident_write - commit the received data + * @pages: array of destination pages + * @nr_pages: number of pages in @pages + * @pos: byte position in file at which the write begins + * @bytes: number of bytes to be written + * + * See description of ntfs_commit_pages_after_write(), below. + */ +static inline int ntfs_commit_pages_after_non_resident_write( + struct page **pages, const unsigned nr_pages, + s64 pos, size_t bytes) +{ + s64 end, initialized_size; + struct inode *vi; + ntfs_inode *ni, *base_ni; + struct buffer_head *bh, *head; + ntfs_attr_search_ctx *ctx; + MFT_RECORD *m; + ATTR_RECORD *a; + unsigned long flags; + unsigned blocksize, u; + int err; + + vi = pages[0]->mapping->host; + ni = NTFS_I(vi); + blocksize = vi->i_sb->s_blocksize; + end = pos + bytes; + u = 0; + do { + s64 bh_pos; + struct page *page; + bool partial; + + page = pages[u]; + bh_pos = (s64)page->index << PAGE_CACHE_SHIFT; + bh = head = page_buffers(page); + partial = false; + do { + s64 bh_end; + + bh_end = bh_pos + blocksize; + if (bh_end <= pos || bh_pos >= end) { + if (!buffer_uptodate(bh)) + partial = true; + } else { + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + } + } while (bh_pos += blocksize, (bh = bh->b_this_page) != head); + /* + * If all buffers are now uptodate but the page is not, set the + * page uptodate. + */ + if (!partial && !PageUptodate(page)) + SetPageUptodate(page); + } while (++u < nr_pages); + /* + * Finally, if we do not need to update initialized_size or i_size we + * are finished. + */ + read_lock_irqsave(&ni->size_lock, flags); + initialized_size = ni->initialized_size; + read_unlock_irqrestore(&ni->size_lock, flags); + if (end <= initialized_size) { + ntfs_debug("Done."); + return 0; + } + /* + * Update initialized_size/i_size as appropriate, both in the inode and + * the mft record. + */ + if (!NInoAttr(ni)) + base_ni = ni; + else + base_ni = ni->ext.base_ntfs_ino; + /* Map, pin, and lock the mft record. */ + m = map_mft_record(base_ni); + if (IS_ERR(m)) { + err = PTR_ERR(m); + m = NULL; + ctx = NULL; + goto err_out; + } + BUG_ON(!NInoNonResident(ni)); + ctx = ntfs_attr_get_search_ctx(base_ni, m); + if (unlikely(!ctx)) { + err = -ENOMEM; + goto err_out; + } + err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, + CASE_SENSITIVE, 0, NULL, 0, ctx); + if (unlikely(err)) { + if (err == -ENOENT) + err = -EIO; + goto err_out; + } + a = ctx->attr; + BUG_ON(!a->non_resident); + write_lock_irqsave(&ni->size_lock, flags); + BUG_ON(end > ni->allocated_size); + ni->initialized_size = end; + a->data.non_resident.initialized_size = cpu_to_sle64(end); + if (end > i_size_read(vi)) { + i_size_write(vi, end); + a->data.non_resident.data_size = + a->data.non_resident.initialized_size; + } + write_unlock_irqrestore(&ni->size_lock, flags); + /* Mark the mft record dirty, so it gets written back. */ + flush_dcache_mft_record_page(ctx->ntfs_ino); + mark_mft_record_dirty(ctx->ntfs_ino); + ntfs_attr_put_search_ctx(ctx); + unmap_mft_record(base_ni); + ntfs_debug("Done."); + return 0; +err_out: + if (ctx) + ntfs_attr_put_search_ctx(ctx); + if (m) + unmap_mft_record(base_ni); + ntfs_error(vi->i_sb, "Failed to update initialized_size/i_size (error " + "code %i).", err); + if (err != -ENOMEM) + NVolSetErrors(ni->vol); + return err; +} + +/** + * ntfs_commit_pages_after_write - commit the received data + * @pages: array of destination pages + * @nr_pages: number of pages in @pages + * @pos: byte position in file at which the write begins + * @bytes: number of bytes to be written + * + * This is called from ntfs_file_buffered_write() with i_mutex held on the inode + * (@pages[0]->mapping->host). There are @nr_pages pages in @pages which are + * locked but not kmap()ped. The source data has already been copied into the + * @page. ntfs_prepare_pages_for_non_resident_write() has been called before + * the data was copied (for non-resident attributes only) and it returned + * success. + * + * Need to set uptodate and mark dirty all buffers within the boundary of the + * write. If all buffers in a page are uptodate we set the page uptodate, too. + * + * Setting the buffers dirty ensures that they get written out later when + * ntfs_writepage() is invoked by the VM. + * + * Finally, we need to update i_size and initialized_size as appropriate both + * in the inode and the mft record. + * + * This is modelled after fs/buffer.c::generic_commit_write(), which marks + * buffers uptodate and dirty, sets the page uptodate if all buffers in the + * page are uptodate, and updates i_size if the end of io is beyond i_size. In + * that case, it also marks the inode dirty. + * + * If things have gone as outlined in + * ntfs_prepare_pages_for_non_resident_write(), we do not need to do any page + * content modifications here for non-resident attributes. For resident + * attributes we need to do the uptodate bringing here which we combine with + * the copying into the mft record which means we save one atomic kmap. + * + * Return 0 on success or -errno on error. + */ +static int ntfs_commit_pages_after_write(struct page **pages, + const unsigned nr_pages, s64 pos, size_t bytes) +{ + s64 end, initialized_size; + loff_t i_size; + struct inode *vi; + ntfs_inode *ni, *base_ni; + struct page *page; + ntfs_attr_search_ctx *ctx; + MFT_RECORD *m; + ATTR_RECORD *a; + char *kattr, *kaddr; + unsigned long flags; + u32 attr_len; + int err; + + BUG_ON(!nr_pages); + BUG_ON(!pages); + page = pages[0]; + BUG_ON(!page); + vi = page->mapping->host; + ni = NTFS_I(vi); + ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page " + "index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.", + vi->i_ino, ni->type, page->index, nr_pages, + (long long)pos, bytes); + if (NInoNonResident(ni)) + return ntfs_commit_pages_after_non_resident_write(pages, + nr_pages, pos, bytes); + BUG_ON(nr_pages > 1); + /* + * Attribute is resident, implying it is not compressed, encrypted, or + * sparse. + */ + if (!NInoAttr(ni)) + base_ni = ni; + else + base_ni = ni->ext.base_ntfs_ino; + BUG_ON(NInoNonResident(ni)); + /* Map, pin, and lock the mft record. */ + m = map_mft_record(base_ni); + if (IS_ERR(m)) { + err = PTR_ERR(m); + m = NULL; + ctx = NULL; + goto err_out; + } + ctx = ntfs_attr_get_search_ctx(base_ni, m); + if (unlikely(!ctx)) { + err = -ENOMEM; + goto err_out; + } + err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, + CASE_SENSITIVE, 0, NULL, 0, ctx); + if (unlikely(err)) { + if (err == -ENOENT) + err = -EIO; + goto err_out; + } + a = ctx->attr; + BUG_ON(a->non_resident); + /* The total length of the attribute value. */ + attr_len = le32_to_cpu(a->data.resident.value_length); + i_size = i_size_read(vi); + BUG_ON(attr_len != i_size); + BUG_ON(pos > attr_len); + end = pos + bytes; + BUG_ON(end > le32_to_cpu(a->length) - + le16_to_cpu(a->data.resident.value_offset)); + kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset); + kaddr = kmap_atomic(page); + /* Copy the received data from the page to the mft record. */ + memcpy(kattr + pos, kaddr + pos, bytes); + /* Update the attribute length if necessary. */ + if (end > attr_len) { + attr_len = end; + a->data.resident.value_length = cpu_to_le32(attr_len); + } + /* + * If the page is not uptodate, bring the out of bounds area(s) + * uptodate by copying data from the mft record to the page. + */ + if (!PageUptodate(page)) { + if (pos > 0) + memcpy(kaddr, kattr, pos); + if (end < attr_len) + memcpy(kaddr + end, kattr + end, attr_len - end); + /* Zero the region outside the end of the attribute value. */ + memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); + flush_dcache_page(page); + SetPageUptodate(page); + } + kunmap_atomic(kaddr); + /* Update initialized_size/i_size if necessary. */ + read_lock_irqsave(&ni->size_lock, flags); + initialized_size = ni->initialized_size; + BUG_ON(end > ni->allocated_size); + read_unlock_irqrestore(&ni->size_lock, flags); + BUG_ON(initialized_size != i_size); + if (end > initialized_size) { + write_lock_irqsave(&ni->size_lock, flags); + ni->initialized_size = end; + i_size_write(vi, end); + write_unlock_irqrestore(&ni->size_lock, flags); + } + /* Mark the mft record dirty, so it gets written back. */ + flush_dcache_mft_record_page(ctx->ntfs_ino); + mark_mft_record_dirty(ctx->ntfs_ino); + ntfs_attr_put_search_ctx(ctx); + unmap_mft_record(base_ni); + ntfs_debug("Done."); + return 0; +err_out: + if (err == -ENOMEM) { + ntfs_warning(vi->i_sb, "Error allocating memory required to " + "commit the write."); + if (PageUptodate(page)) { + ntfs_warning(vi->i_sb, "Page is uptodate, setting " + "dirty so the write will be retried " + "later on by the VM."); + /* + * Put the page on mapping->dirty_pages, but leave its + * buffers' dirty state as-is. + */ + __set_page_dirty_nobuffers(page); + err = 0; + } else + ntfs_error(vi->i_sb, "Page is not uptodate. Written " + "data has been lost."); + } else { + ntfs_error(vi->i_sb, "Resident attribute commit write failed " + "with error %i.", err); + NVolSetErrors(ni->vol); + } + if (ctx) + ntfs_attr_put_search_ctx(ctx); + if (m) + unmap_mft_record(base_ni); + return err; +} + +static void ntfs_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, inode->i_size); + ntfs_truncate_vfs(inode); + } +} + +/** + * ntfs_file_buffered_write - + * + * Locking: The vfs is holding ->i_mutex on the inode. + */ +static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, + const struct iovec *iov, unsigned long nr_segs, + loff_t pos, loff_t *ppos, size_t count) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *vi = mapping->host; + ntfs_inode *ni = NTFS_I(vi); + ntfs_volume *vol = ni->vol; + struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER]; + struct page *cached_page = NULL; + char __user *buf = NULL; + s64 end, ll; + VCN last_vcn; + LCN lcn; + unsigned long flags; + size_t bytes, iov_ofs = 0; /* Offset in the current iovec. */ + ssize_t status, written; + unsigned nr_pages; + int err; + + ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, " + "pos 0x%llx, count 0x%lx.", + vi->i_ino, (unsigned)le32_to_cpu(ni->type), + (unsigned long long)pos, (unsigned long)count); + if (unlikely(!count)) + return 0; + BUG_ON(NInoMstProtected(ni)); + /* + * If the attribute is not an index root and it is encrypted or + * compressed, we cannot write to it yet. Note we need to check for + * AT_INDEX_ALLOCATION since this is the type of both directory and + * index inodes. + */ + if (ni->type != AT_INDEX_ALLOCATION) { + /* If file is encrypted, deny access, just like NT4. */ + if (NInoEncrypted(ni)) { + /* + * Reminder for later: Encrypted files are _always_ + * non-resident so that the content can always be + * encrypted. + */ + ntfs_debug("Denying write access to encrypted file."); + return -EACCES; + } + if (NInoCompressed(ni)) { + /* Only unnamed $DATA attribute can be compressed. */ + BUG_ON(ni->type != AT_DATA); + BUG_ON(ni->name_len); + /* + * Reminder for later: If resident, the data is not + * actually compressed. Only on the switch to non- + * resident does compression kick in. This is in + * contrast to encrypted files (see above). + */ + ntfs_error(vi->i_sb, "Writing to compressed files is " + "not implemented yet. Sorry."); + return -EOPNOTSUPP; + } + } + /* + * If a previous ntfs_truncate() failed, repeat it and abort if it + * fails again. + */ + if (unlikely(NInoTruncateFailed(ni))) { + inode_dio_wait(vi); + err = ntfs_truncate(vi); + if (err || NInoTruncateFailed(ni)) { + if (!err) + err = -EIO; + ntfs_error(vol->sb, "Cannot perform write to inode " + "0x%lx, attribute type 0x%x, because " + "ntfs_truncate() failed (error code " + "%i).", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), err); + return err; + } + } + /* The first byte after the write. */ + end = pos + count; + /* + * If the write goes beyond the allocated size, extend the allocation + * to cover the whole of the write, rounded up to the nearest cluster. + */ + read_lock_irqsave(&ni->size_lock, flags); + ll = ni->allocated_size; + read_unlock_irqrestore(&ni->size_lock, flags); + if (end > ll) { + /* Extend the allocation without changing the data size. */ + ll = ntfs_attr_extend_allocation(ni, end, -1, pos); + if (likely(ll >= 0)) { + BUG_ON(pos >= ll); + /* If the extension was partial truncate the write. */ + if (end > ll) { + ntfs_debug("Truncating write to inode 0x%lx, " + "attribute type 0x%x, because " + "the allocation was only " + "partially extended.", + vi->i_ino, (unsigned) + le32_to_cpu(ni->type)); + end = ll; + count = ll - pos; + } + } else { + err = ll; + read_lock_irqsave(&ni->size_lock, flags); + ll = ni->allocated_size; + read_unlock_irqrestore(&ni->size_lock, flags); + /* Perform a partial write if possible or fail. */ + if (pos < ll) { + ntfs_debug("Truncating write to inode 0x%lx, " + "attribute type 0x%x, because " + "extending the allocation " + "failed (error code %i).", + vi->i_ino, (unsigned) + le32_to_cpu(ni->type), err); + end = ll; + count = ll - pos; + } else { + ntfs_error(vol->sb, "Cannot perform write to " + "inode 0x%lx, attribute type " + "0x%x, because extending the " + "allocation failed (error " + "code %i).", vi->i_ino, + (unsigned) + le32_to_cpu(ni->type), err); + return err; + } + } + } + written = 0; + /* + * If the write starts beyond the initialized size, extend it up to the + * beginning of the write and initialize all non-sparse space between + * the old initialized size and the new one. This automatically also + * increments the vfs inode->i_size to keep it above or equal to the + * initialized_size. + */ + read_lock_irqsave(&ni->size_lock, flags); + ll = ni->initialized_size; + read_unlock_irqrestore(&ni->size_lock, flags); + if (pos > ll) { + err = ntfs_attr_extend_initialized(ni, pos); + if (err < 0) { + ntfs_error(vol->sb, "Cannot perform write to inode " + "0x%lx, attribute type 0x%x, because " + "extending the initialized size " + "failed (error code %i).", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), err); + status = err; + goto err_out; + } + } + /* + * Determine the number of pages per cluster for non-resident + * attributes. + */ + nr_pages = 1; + if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni)) + nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT; + /* Finally, perform the actual write. */ + last_vcn = -1; + if (likely(nr_segs == 1)) + buf = iov->iov_base; + do { + VCN vcn; + pgoff_t idx, start_idx; + unsigned ofs, do_pages, u; + size_t copied; + + start_idx = idx = pos >> PAGE_CACHE_SHIFT; + ofs = pos & ~PAGE_CACHE_MASK; + bytes = PAGE_CACHE_SIZE - ofs; + do_pages = 1; + if (nr_pages > 1) { + vcn = pos >> vol->cluster_size_bits; + if (vcn != last_vcn) { + last_vcn = vcn; + /* + * Get the lcn of the vcn the write is in. If + * it is a hole, need to lock down all pages in + * the cluster. + */ + down_read(&ni->runlist.lock); + lcn = ntfs_attr_vcn_to_lcn_nolock(ni, pos >> + vol->cluster_size_bits, false); + up_read(&ni->runlist.lock); + if (unlikely(lcn < LCN_HOLE)) { + status = -EIO; + if (lcn == LCN_ENOMEM) + status = -ENOMEM; + else + ntfs_error(vol->sb, "Cannot " + "perform write to " + "inode 0x%lx, " + "attribute type 0x%x, " + "because the attribute " + "is corrupt.", + vi->i_ino, (unsigned) + le32_to_cpu(ni->type)); + break; + } + if (lcn == LCN_HOLE) { + start_idx = (pos & ~(s64) + vol->cluster_size_mask) + >> PAGE_CACHE_SHIFT; + bytes = vol->cluster_size - (pos & + vol->cluster_size_mask); + do_pages = nr_pages; + } + } + } + if (bytes > count) + bytes = count; + /* + * Bring in the user page(s) that we will copy from _first_. + * Otherwise there is a nasty deadlock on copying from the same + * page(s) as we are writing to, without it/them being marked + * up-to-date. Note, at present there is nothing to stop the + * pages being swapped out between us bringing them into memory + * and doing the actual copying. + */ + if (likely(nr_segs == 1)) + ntfs_fault_in_pages_readable(buf, bytes); + else + ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes); + /* Get and lock @do_pages starting at index @start_idx. */ + status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages, + pages, &cached_page); + if (unlikely(status)) + break; + /* + * For non-resident attributes, we need to fill any holes with + * actual clusters and ensure all bufferes are mapped. We also + * need to bring uptodate any buffers that are only partially + * being written to. + */ + if (NInoNonResident(ni)) { + status = ntfs_prepare_pages_for_non_resident_write( + pages, do_pages, pos, bytes); + if (unlikely(status)) { + loff_t i_size; + + do { + unlock_page(pages[--do_pages]); + page_cache_release(pages[do_pages]); + } while (do_pages); + /* + * The write preparation may have instantiated + * allocated space outside i_size. Trim this + * off again. We can ignore any errors in this + * case as we will just be waisting a bit of + * allocated space, which is not a disaster. + */ + i_size = i_size_read(vi); + if (pos + bytes > i_size) { + ntfs_write_failed(mapping, pos + bytes); + } + break; + } + } + u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index; + if (likely(nr_segs == 1)) { + copied = ntfs_copy_from_user(pages + u, do_pages - u, + ofs, buf, bytes); + buf += copied; + } else + copied = ntfs_copy_from_user_iovec(pages + u, + do_pages - u, ofs, &iov, &iov_ofs, + bytes); + ntfs_flush_dcache_pages(pages + u, do_pages - u); + status = ntfs_commit_pages_after_write(pages, do_pages, pos, + bytes); + if (likely(!status)) { + written += copied; + count -= copied; + pos += copied; + if (unlikely(copied != bytes)) + status = -EFAULT; + } + do { + unlock_page(pages[--do_pages]); + page_cache_release(pages[do_pages]); + } while (do_pages); + if (unlikely(status)) + break; + balance_dirty_pages_ratelimited(mapping); + cond_resched(); + } while (count); +err_out: + *ppos = pos; + if (cached_page) + page_cache_release(cached_page); + ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", + written ? "written" : "status", (unsigned long)written, + (long)status); + return written ? written : status; +} + +/** + * ntfs_file_aio_write_nolock - + */ +static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, + const struct iovec *iov, unsigned long nr_segs, loff_t *ppos) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + loff_t pos; + size_t count; /* after file limit checks */ + ssize_t written, err; + + count = iov_length(iov, nr_segs); + pos = *ppos; + /* We can write back this queue in page reclaim. */ + current->backing_dev_info = mapping->backing_dev_info; + written = 0; + err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); + if (err) + goto out; + if (!count) + goto out; + err = file_remove_suid(file); + if (err) + goto out; + err = file_update_time(file); + if (err) + goto out; + written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos, + count); +out: + current->backing_dev_info = NULL; + return written ? written : err; +} + +/** + * ntfs_file_aio_write - + */ +static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + ssize_t ret; + + BUG_ON(iocb->ki_pos != pos); + + mutex_lock(&inode->i_mutex); + ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); + mutex_unlock(&inode->i_mutex); + if (ret > 0) { + int err = generic_write_sync(file, iocb->ki_pos - ret, ret); + if (err < 0) + ret = err; + } + return ret; +} + +/** * ntfs_file_fsync - sync a file to disk * @filp: file to be synced - * @dentry: dentry describing the file to sync * @datasync: if non-zero only flush user data and not metadata * * Data integrity sync of a file to disk. Used for fsync, fdatasync, and msync @@ -74,25 +2155,28 @@ static int ntfs_file_open(struct inode *vi, struct file *filp) * Also, if @datasync is true, we do not wait on the inode to be written out * but we always wait on the page cache pages to be written out. * - * Note: In the past @filp could be NULL so we ignore it as we don't need it - * anyway. - * - * Locking: Caller must hold i_sem on the inode. + * Locking: Caller must hold i_mutex on the inode. * * TODO: We should probably also write all attribute/index inodes associated * with this inode but since we have no simple way of getting to them we ignore * this problem for now. */ -static int ntfs_file_fsync(struct file *filp, struct dentry *dentry, - int datasync) +static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end, + int datasync) { - struct inode *vi = dentry->d_inode; + struct inode *vi = filp->f_mapping->host; int err, ret = 0; ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); + + err = filemap_write_and_wait_range(vi->i_mapping, start, end); + if (err) + return err; + mutex_lock(&vi->i_mutex); + BUG_ON(S_ISDIR(vi->i_mode)); if (!datasync || !NInoNonResident(NTFS_I(vi))) - ret = ntfs_write_inode(vi, 1); + ret = __ntfs_write_inode(vi, 1); write_inode_now(vi, !datasync); /* * NOTE: If we were to use mapping->private_list (see ext2 and @@ -107,54 +2191,52 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry, else ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error " "%u.", datasync ? "data" : "", vi->i_ino, -ret); + mutex_unlock(&vi->i_mutex); return ret; } #endif /* NTFS_RW */ -struct file_operations ntfs_file_ops = { - .llseek = generic_file_llseek, /* Seek inside file. */ - .read = generic_file_read, /* Read from file. */ - .aio_read = generic_file_aio_read, /* Async read from file. */ - .readv = generic_file_readv, /* Read from file. */ +const struct file_operations ntfs_file_ops = { + .llseek = generic_file_llseek, /* Seek inside file. */ + .read = new_sync_read, /* Read from file. */ + .read_iter = generic_file_read_iter, /* Async read from file. */ #ifdef NTFS_RW - .write = generic_file_write, /* Write to file. */ - .aio_write = generic_file_aio_write, /* Async write to file. */ - .writev = generic_file_writev, /* Write to file. */ - /*.release = ,*/ /* Last file is closed. See - fs/ext2/file.c:: - ext2_release_file() for - how to use this to discard - preallocated space for - write opened files. */ - .fsync = ntfs_file_fsync, /* Sync a file to disk. */ - /*.aio_fsync = ,*/ /* Sync all outstanding async - i/o operations on a - kiocb. */ + .write = do_sync_write, /* Write to file. */ + .aio_write = ntfs_file_aio_write, /* Async write to file. */ + /*.release = ,*/ /* Last file is closed. See + fs/ext2/file.c:: + ext2_release_file() for + how to use this to discard + preallocated space for + write opened files. */ + .fsync = ntfs_file_fsync, /* Sync a file to disk. */ + /*.aio_fsync = ,*/ /* Sync all outstanding async + i/o operations on a + kiocb. */ #endif /* NTFS_RW */ - /*.ioctl = ,*/ /* Perform function on the - mounted filesystem. */ - .mmap = generic_file_mmap, /* Mmap file. */ - .open = ntfs_file_open, /* Open file. */ - .sendfile = generic_file_sendfile, /* Zero-copy data send with - the data source being on - the ntfs partition. We - do not need to care about - the data destination. */ - /*.sendpage = ,*/ /* Zero-copy data send with - the data destination being - on the ntfs partition. We - do not need to care about - the data source. */ + /*.ioctl = ,*/ /* Perform function on the + mounted filesystem. */ + .mmap = generic_file_mmap, /* Mmap file. */ + .open = ntfs_file_open, /* Open file. */ + .splice_read = generic_file_splice_read /* Zero-copy data send with + the data source being on + the ntfs partition. We do + not need to care about the + data destination. */ + /*.sendpage = ,*/ /* Zero-copy data send with + the data destination being + on the ntfs partition. We + do not need to care about + the data source. */ }; -struct inode_operations ntfs_file_inode_ops = { +const struct inode_operations ntfs_file_inode_ops = { #ifdef NTFS_RW - .truncate = ntfs_truncate_vfs, .setattr = ntfs_setattr, #endif /* NTFS_RW */ }; -struct file_operations ntfs_empty_file_ops = {}; +const struct file_operations ntfs_empty_file_ops = {}; -struct inode_operations ntfs_empty_inode_ops = {}; +const struct inode_operations ntfs_empty_inode_ops = {}; diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c index 8f2d5727546..096c135691a 100644 --- a/fs/ntfs/index.c +++ b/fs/ntfs/index.c @@ -19,6 +19,8 @@ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/slab.h> + #include "aops.h" #include "collate.h" #include "debug.h" @@ -32,13 +34,13 @@ * Allocate a new index context, initialize it with @idx_ni and return it. * Return NULL if allocation failed. * - * Locking: Caller must hold i_sem on the index inode. + * Locking: Caller must hold i_mutex on the index inode. */ ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni) { ntfs_index_context *ictx; - ictx = kmem_cache_alloc(ntfs_index_ctx_cache, SLAB_NOFS); + ictx = kmem_cache_alloc(ntfs_index_ctx_cache, GFP_NOFS); if (ictx) *ictx = (ntfs_index_context){ .idx_ni = idx_ni }; return ictx; @@ -50,7 +52,7 @@ ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni) * * Release the index context @ictx, releasing all associated resources. * - * Locking: Caller must hold i_sem on the index inode. + * Locking: Caller must hold i_mutex on the index inode. */ void ntfs_index_ctx_put(ntfs_index_context *ictx) { @@ -106,7 +108,7 @@ void ntfs_index_ctx_put(ntfs_index_context *ictx) * or ntfs_index_entry_write() before the call to ntfs_index_ctx_put() to * ensure that the changes are written to disk. * - * Locking: - Caller must hold i_sem on the index inode. + * Locking: - Caller must hold i_mutex on the index inode. * - Each page cache page in the index allocation mapping must be * locked whilst being accessed otherwise we may find a corrupt * page due to it being under ->writepage at the moment which @@ -204,7 +206,7 @@ int ntfs_index_lookup(const void *key, const int key_len, if ((key_len == le16_to_cpu(ie->key_length)) && !memcmp(key, &ie->key, key_len)) { ir_done: - ictx->is_in_root = TRUE; + ictx->is_in_root = true; ictx->ir = ir; ictx->actx = actx; ictx->base_ni = base_ni; @@ -374,7 +376,7 @@ fast_descend_into_child_node: if ((key_len == le16_to_cpu(ie->key_length)) && !memcmp(key, &ie->key, key_len)) { ia_done: - ictx->is_in_root = FALSE; + ictx->is_in_root = false; ictx->actx = NULL; ictx->base_ni = NULL; ictx->ia = ia; diff --git a/fs/ntfs/index.h b/fs/ntfs/index.h index 846a489e869..8745469c398 100644 --- a/fs/ntfs/index.h +++ b/fs/ntfs/index.h @@ -37,12 +37,12 @@ * @entry: index entry (points into @ir or @ia) * @data: index entry data (points into @entry) * @data_len: length in bytes of @data - * @is_in_root: TRUE if @entry is in @ir and FALSE if it is in @ia + * @is_in_root: 'true' if @entry is in @ir and 'false' if it is in @ia * @ir: index root if @is_in_root and NULL otherwise * @actx: attribute search context if @is_in_root and NULL otherwise * @base_ni: base inode if @is_in_root and NULL otherwise - * @ia: index block if @is_in_root is FALSE and NULL otherwise - * @page: page if @is_in_root is FALSE and NULL otherwise + * @ia: index block if @is_in_root is 'false' and NULL otherwise + * @page: page if @is_in_root is 'false' and NULL otherwise * * @idx_ni is the index inode this context belongs to. * @@ -50,11 +50,11 @@ * are the index entry data and its length in bytes, respectively. @data * simply points into @entry. This is probably what the user is interested in. * - * If @is_in_root is TRUE, @entry is in the index root attribute @ir described + * If @is_in_root is 'true', @entry is in the index root attribute @ir described * by the attribute search context @actx and the base inode @base_ni. @ia and * @page are NULL in this case. * - * If @is_in_root is FALSE, @entry is in the index allocation attribute and @ia + * If @is_in_root is 'false', @entry is in the index allocation attribute and @ia * and @page point to the index allocation block and the mapped, locked page it * is in, respectively. @ir, @actx and @base_ni are NULL in this case. * @@ -77,7 +77,7 @@ typedef struct { INDEX_ENTRY *entry; void *data; u16 data_len; - BOOL is_in_root; + bool is_in_root; INDEX_ROOT *ir; ntfs_attr_search_ctx *actx; ntfs_inode *base_ni; diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 7ec04513180..f47af5e6e23 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -1,7 +1,7 @@ /** * inode.c - NTFS kernel inode handling. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2005 Anton Altaparmakov + * Copyright (c) 2001-2007 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -19,17 +19,24 @@ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include <linux/pagemap.h> #include <linux/buffer_head.h> -#include <linux/smp_lock.h> -#include <linux/quotaops.h> +#include <linux/fs.h> +#include <linux/mm.h> #include <linux/mount.h> +#include <linux/mutex.h> +#include <linux/pagemap.h> +#include <linux/quotaops.h> +#include <linux/slab.h> +#include <linux/log2.h> +#include <linux/aio.h> #include "aops.h" +#include "attrib.h" +#include "bitmap.h" #include "dir.h" #include "debug.h" #include "inode.h" -#include "attrib.h" +#include "lcnalloc.h" #include "malloc.h" #include "mft.h" #include "time.h" @@ -48,7 +55,7 @@ * * Return 1 if the attributes match and 0 if not. * - * NOTE: This function runs with the inode_lock spin lock held so it is not + * NOTE: This function runs with the inode_hash_lock spin lock held so it is not * allowed to sleep. */ int ntfs_test_inode(struct inode *vi, ntfs_attr *na) @@ -88,11 +95,11 @@ int ntfs_test_inode(struct inode *vi, ntfs_attr *na) * If initializing the normal file/directory inode, set @na->type to AT_UNUSED. * In that case, @na->name and @na->name_len should be set to NULL and 0, * respectively. Although that is not strictly necessary as - * ntfs_read_inode_locked() will fill them in later. + * ntfs_read_locked_inode() will fill them in later. * * Return 0 on success and -errno on error. * - * NOTE: This function runs with the inode_lock spin lock held so it is not + * NOTE: This function runs with the inode->i_lock spin lock held so it is not * allowed to sleep. (Hence the GFP_ATOMIC allocation.) */ static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na) @@ -130,11 +137,11 @@ static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na) BUG_ON(!na->name); i = na->name_len * sizeof(ntfschar); - ni->name = (ntfschar*)kmalloc(i + sizeof(ntfschar), GFP_ATOMIC); + ni->name = kmalloc(i + sizeof(ntfschar), GFP_ATOMIC); if (!ni->name) return -ENOMEM; memcpy(ni->name, na->name, i); - ni->name[i] = 0; + ni->name[na->name_len] = 0; } return 0; } @@ -164,8 +171,8 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no) { struct inode *vi; - ntfs_attr na; int err; + ntfs_attr na; na.mft_no = mft_no; na.type = AT_UNUSED; @@ -222,8 +229,8 @@ struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPE type, ntfschar *name, u32 name_len) { struct inode *vi; - ntfs_attr na; int err; + ntfs_attr na; /* Make sure no one calls ntfs_attr_iget() for indices. */ BUG_ON(type == AT_INDEX_ALLOCATION); @@ -280,8 +287,8 @@ struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name, u32 name_len) { struct inode *vi; - ntfs_attr na; int err; + ntfs_attr na; na.mft_no = base_vi->i_ino; na.type = AT_INDEX_ALLOCATION; @@ -317,7 +324,7 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb) ntfs_inode *ni; ntfs_debug("Entering."); - ni = kmem_cache_alloc(ntfs_big_inode_cache, SLAB_NOFS); + ni = kmem_cache_alloc(ntfs_big_inode_cache, GFP_NOFS); if (likely(ni != NULL)) { ni->state = 0; return VFS_I(ni); @@ -326,6 +333,12 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb) return NULL; } +static void ntfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode)); +} + void ntfs_destroy_big_inode(struct inode *inode) { ntfs_inode *ni = NTFS_I(inode); @@ -334,7 +347,7 @@ void ntfs_destroy_big_inode(struct inode *inode) BUG_ON(ni->page); if (!atomic_dec_and_test(&ni->count)) BUG(); - kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode)); + call_rcu(&inode->i_rcu, ntfs_i_callback); } static inline ntfs_inode *ntfs_alloc_extent_inode(void) @@ -342,7 +355,7 @@ static inline ntfs_inode *ntfs_alloc_extent_inode(void) ntfs_inode *ni; ntfs_debug("Entering."); - ni = kmem_cache_alloc(ntfs_inode_cache, SLAB_NOFS); + ni = kmem_cache_alloc(ntfs_inode_cache, GFP_NOFS); if (likely(ni != NULL)) { ni->state = 0; return ni; @@ -360,6 +373,12 @@ static void ntfs_destroy_extent_inode(ntfs_inode *ni) kmem_cache_free(ntfs_inode_cache, ni); } +/* + * The attribute runlist lock has separate locking rules from the + * normal runlist lock, so split the two lock-classes: + */ +static struct lock_class_key attr_list_rl_lock_class; + /** * __ntfs_init_inode - initialize ntfs specific part of an inode * @sb: super block of mounted volume @@ -381,23 +400,31 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni) atomic_set(&ni->count, 1); ni->vol = NTFS_SB(sb); ntfs_init_runlist(&ni->runlist); - init_MUTEX(&ni->mrec_lock); + mutex_init(&ni->mrec_lock); ni->page = NULL; ni->page_ofs = 0; ni->attr_list_size = 0; ni->attr_list = NULL; ntfs_init_runlist(&ni->attr_list_rl); - ni->itype.index.bmp_ino = NULL; + lockdep_set_class(&ni->attr_list_rl.lock, + &attr_list_rl_lock_class); ni->itype.index.block_size = 0; ni->itype.index.vcn_size = 0; ni->itype.index.collation_rule = 0; ni->itype.index.block_size_bits = 0; ni->itype.index.vcn_size_bits = 0; - init_MUTEX(&ni->extent_lock); + mutex_init(&ni->extent_lock); ni->nr_extents = 0; ni->ext.base_ntfs_ino = NULL; } +/* + * Extent inodes get MFT-mapped in a nested way, while the base inode + * is still mapped. Teach this nesting to the lock validator by creating + * a separate class for nested inode's mrec_lock's: + */ +static struct lock_class_key extent_inode_mrec_lock_key; + inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb, unsigned long mft_no) { @@ -406,6 +433,7 @@ inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb, ntfs_debug("Entering."); if (likely(ni != NULL)) { __ntfs_init_inode(sb, ni); + lockdep_set_class(&ni->mrec_lock, &extent_inode_mrec_lock_key); ni->mft_no = mft_no; ni->type = AT_UNUSED; ni->name = NULL; @@ -509,7 +537,7 @@ err_corrupt_attr: * the ntfs inode. * * Q: What locks are held when the function is called? - * A: i_state has I_LOCK set, hence the inode is locked, also + * A: i_state has I_NEW set, hence the inode is locked, also * i_count is set to 1, so it is not going to go away * i_flags is set to 0 and we have no business touching it. Only an ioctl() * is allowed to write to them. We should of course be honouring them but @@ -523,6 +551,7 @@ static int ntfs_read_locked_inode(struct inode *vi) { ntfs_volume *vol = NTFS_SB(vi->i_sb); ntfs_inode *ni; + struct inode *bvi; MFT_RECORD *m; ATTR_RECORD *a; STANDARD_INFORMATION *si; @@ -533,8 +562,6 @@ static int ntfs_read_locked_inode(struct inode *vi) /* Setup the generic vfs inode parts now. */ - /* This is the optimal IO size (for stat), not the fs block size. */ - vi->i_blksize = PAGE_CACHE_SIZE; /* * This is for checking whether an inode has changed w.r.t. a file so * that the file can be updated if necessary (compare with f_version). @@ -585,7 +612,7 @@ static int ntfs_read_locked_inode(struct inode *vi) * might be tricky due to vfs interactions. Need to think about this * some more when implementing the unlink command. */ - vi->i_nlink = le16_to_cpu(m->link_count); + set_nlink(vi, le16_to_cpu(m->link_count)); /* * FIXME: Reparse points can have the directory bit set even though * they would be S_IFLNK. Need to deal with this further below when we @@ -595,7 +622,7 @@ static int ntfs_read_locked_inode(struct inode *vi) */ /* Everyone gets all permissions. */ vi->i_mode |= S_IRWXUGO; - /* If read-only, noone gets write permissions. */ + /* If read-only, no one gets write permissions. */ if (IS_RDONLY(vi)) vi->i_mode &= ~S_IWUGO; if (m->flags & MFT_RECORD_IS_DIRECTORY) { @@ -607,7 +634,7 @@ static int ntfs_read_locked_inode(struct inode *vi) vi->i_mode &= ~vol->dmask; /* Things break without this kludge! */ if (vi->i_nlink > 1) - vi->i_nlink = 1; + set_nlink(vi, 1); } else { vi->i_mode |= S_IFREG; /* Apply the file permissions mask set in the mount options. */ @@ -676,13 +703,28 @@ static int ntfs_read_locked_inode(struct inode *vi) ntfs_debug("Attribute list found in inode 0x%lx.", vi->i_ino); NInoSetAttrList(ni); a = ctx->attr; - if (a->flags & ATTR_IS_ENCRYPTED || - a->flags & ATTR_COMPRESSION_MASK || - a->flags & ATTR_IS_SPARSE) { + if (a->flags & ATTR_COMPRESSION_MASK) { ntfs_error(vi->i_sb, "Attribute list attribute is " - "compressed/encrypted/sparse."); + "compressed."); goto unm_err_out; } + if (a->flags & ATTR_IS_ENCRYPTED || + a->flags & ATTR_IS_SPARSE) { + if (a->non_resident) { + ntfs_error(vi->i_sb, "Non-resident attribute " + "list attribute is encrypted/" + "sparse."); + goto unm_err_out; + } + ntfs_warning(vi->i_sb, "Resident attribute list " + "attribute in inode 0x%lx is marked " + "encrypted/sparse which is not true. " + "However, Windows allows this and " + "chkdsk does not detect or correct it " + "so we will just ignore the invalid " + "flags and pretend they are not set.", + vi->i_ino); + } /* Now allocate memory for the attribute list. */ ni->attr_list_size = (u32)ntfs_attr_size(a); ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); @@ -744,7 +786,6 @@ skip_attr_list_load: */ if (S_ISDIR(vi->i_mode)) { loff_t bvi_size; - struct inode *bvi; ntfs_inode *bni; INDEX_ROOT *ir; u8 *ir_end, *index_end; @@ -949,13 +990,12 @@ skip_attr_list_load: err = PTR_ERR(bvi); goto unm_err_out; } - ni->itype.index.bmp_ino = bvi; bni = NTFS_I(bvi); if (NInoCompressed(bni) || NInoEncrypted(bni) || NInoSparse(bni)) { ntfs_error(vi->i_sb, "$BITMAP attribute is compressed " "and/or encrypted and/or sparse."); - goto unm_err_out; + goto iput_unm_err_out; } /* Consistency check bitmap size vs. index allocation size. */ bvi_size = i_size_read(bvi); @@ -964,8 +1004,10 @@ skip_attr_list_load: ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) " "for index allocation (0x%llx).", bvi_size << 3, vi->i_size); - goto unm_err_out; + goto iput_unm_err_out; } + /* No longer need the bitmap attribute inode. */ + iput(bvi); skip_large_dir_stuff: /* Setup the operations for this inode. */ vi->i_op = &ntfs_dir_inode_ops; @@ -1048,10 +1090,10 @@ skip_large_dir_stuff: if (a->non_resident) { NInoSetNonResident(ni); if (NInoCompressed(ni) || NInoSparse(ni)) { - if (a->data.non_resident.compression_unit != - 4) { + if (NInoCompressed(ni) && a->data.non_resident. + compression_unit != 4) { ntfs_error(vi->i_sb, "Found " - "nonstandard " + "non-standard " "compression unit (%u " "instead of 4). " "Cannot handle this.", @@ -1060,16 +1102,26 @@ skip_large_dir_stuff: err = -EOPNOTSUPP; goto unm_err_out; } - ni->itype.compressed.block_clusters = 1U << - a->data.non_resident. - compression_unit; - ni->itype.compressed.block_size = 1U << ( - a->data.non_resident. - compression_unit + - vol->cluster_size_bits); - ni->itype.compressed.block_size_bits = ffs( - ni->itype.compressed. - block_size) - 1; + if (a->data.non_resident.compression_unit) { + ni->itype.compressed.block_size = 1U << + (a->data.non_resident. + compression_unit + + vol->cluster_size_bits); + ni->itype.compressed.block_size_bits = + ffs(ni->itype. + compressed. + block_size) - 1; + ni->itype.compressed.block_clusters = + 1U << a->data. + non_resident. + compression_unit; + } else { + ni->itype.compressed.block_size = 0; + ni->itype.compressed.block_size_bits = + 0; + ni->itype.compressed.block_clusters = + 0; + } ni->itype.compressed.size = sle64_to_cpu( a->data.non_resident. compressed_size); @@ -1130,7 +1182,8 @@ no_data_attr_special_case: vi->i_blocks = ni->allocated_size >> 9; ntfs_debug("Done."); return 0; - +iput_unm_err_out: + iput(bvi); unm_err_out: if (!err) err = -EIO; @@ -1161,7 +1214,7 @@ err_out: * necessary fields in @vi as well as initializing the ntfs inode. * * Q: What locks are held when the function is called? - * A: i_state has I_LOCK set, hence the inode is locked, also + * A: i_state has I_NEW set, hence the inode is locked, also * i_count is set to 1, so it is not going to go away * * Return 0 on success and -errno on error. In the error case, the inode will @@ -1186,11 +1239,10 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) base_ni = NTFS_I(base_vi); /* Just mirror the values from the base inode. */ - vi->i_blksize = base_vi->i_blksize; vi->i_version = base_vi->i_version; vi->i_uid = base_vi->i_uid; vi->i_gid = base_vi->i_gid; - vi->i_nlink = base_vi->i_nlink; + set_nlink(vi, base_vi->i_nlink); vi->i_mtime = base_vi->i_mtime; vi->i_ctime = base_vi->i_ctime; vi->i_atime = base_vi->i_atime; @@ -1322,8 +1374,9 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) goto unm_err_out; } if (NInoCompressed(ni) || NInoSparse(ni)) { - if (a->data.non_resident.compression_unit != 4) { - ntfs_error(vi->i_sb, "Found nonstandard " + if (NInoCompressed(ni) && a->data.non_resident. + compression_unit != 4) { + ntfs_error(vi->i_sb, "Found non-standard " "compression unit (%u instead " "of 4). Cannot handle this.", a->data.non_resident. @@ -1331,13 +1384,22 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) err = -EOPNOTSUPP; goto unm_err_out; } - ni->itype.compressed.block_clusters = 1U << - a->data.non_resident.compression_unit; - ni->itype.compressed.block_size = 1U << ( - a->data.non_resident.compression_unit + - vol->cluster_size_bits); - ni->itype.compressed.block_size_bits = ffs( - ni->itype.compressed.block_size) - 1; + if (a->data.non_resident.compression_unit) { + ni->itype.compressed.block_size = 1U << + (a->data.non_resident. + compression_unit + + vol->cluster_size_bits); + ni->itype.compressed.block_size_bits = + ffs(ni->itype.compressed. + block_size) - 1; + ni->itype.compressed.block_clusters = 1U << + a->data.non_resident. + compression_unit; + } else { + ni->itype.compressed.block_size = 0; + ni->itype.compressed.block_size_bits = 0; + ni->itype.compressed.block_clusters = 0; + } ni->itype.compressed.size = sle64_to_cpu( a->data.non_resident.compressed_size); } @@ -1352,9 +1414,6 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) ni->allocated_size = sle64_to_cpu( a->data.non_resident.allocated_size); } - /* Setup the operations for this attribute inode. */ - vi->i_op = NULL; - vi->i_fop = NULL; if (NInoMstProtected(ni)) vi->i_mapping->a_ops = &ntfs_mst_aops; else @@ -1390,7 +1449,6 @@ err_out: "Run chkdsk.", err, vi->i_ino, ni->type, ni->name_len, base_vi->i_ino); make_bad_inode(vi); - make_bad_inode(base_vi); if (err != -ENOMEM) NVolSetErrors(vol); return err; @@ -1423,7 +1481,7 @@ err_out: * normal directory inodes. * * Q: What locks are held when the function is called? - * A: i_state has I_LOCK set, hence the inode is locked, also + * A: i_state has I_NEW set, hence the inode is locked, also * i_count is set to 1, so it is not going to go away * * Return 0 on success and -errno on error. In the error case, the inode will @@ -1447,11 +1505,10 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi) ni = NTFS_I(vi); base_ni = NTFS_I(base_vi); /* Just mirror the values from the base inode. */ - vi->i_blksize = base_vi->i_blksize; vi->i_version = base_vi->i_version; vi->i_uid = base_vi->i_uid; vi->i_gid = base_vi->i_gid; - vi->i_nlink = base_vi->i_nlink; + set_nlink(vi, base_vi->i_nlink); vi->i_mtime = base_vi->i_mtime; vi->i_ctime = base_vi->i_ctime; vi->i_atime = base_vi->i_atime; @@ -1521,7 +1578,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi) ntfs_debug("Index collation rule is 0x%x.", le32_to_cpu(ir->collation_rule)); ni->itype.index.block_size = le32_to_cpu(ir->index_block_size); - if (ni->itype.index.block_size & (ni->itype.index.block_size - 1)) { + if (!is_power_of_2(ni->itype.index.block_size)) { ntfs_error(vi->i_sb, "Index block size (%u) is not a power of " "two.", ni->itype.index.block_size); goto unm_err_out; @@ -1575,6 +1632,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi) "$INDEX_ALLOCATION attribute."); goto unm_err_out; } + a = ctx->attr; if (!a->non_resident) { ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is " "resident."); @@ -1643,11 +1701,9 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi) vi->i_size); goto iput_unm_err_out; } - ni->itype.index.bmp_ino = bvi; + iput(bvi); skip_large_index_stuff: /* Setup the operations for this index inode. */ - vi->i_op = NULL; - vi->i_fop = NULL; vi->i_mapping->a_ops = &ntfs_mst_aops; vi->i_blocks = ni->allocated_size >> 9; /* @@ -1660,7 +1716,6 @@ skip_large_index_stuff: ntfs_debug("Done."); return 0; - iput_unm_err_out: iput(bvi); unm_err_out: @@ -1680,6 +1735,15 @@ err_out: return err; } +/* + * The MFT inode has special locking, so teach the lock validator + * about this by splitting off the locking rules of the MFT from + * the locking rules of other inodes. The MFT inode can never be + * accessed from the VFS side (or even internally), only by the + * map_mft functions. + */ +static struct lock_class_key mft_ni_runlist_lock_key, mft_ni_mrec_lock_key; + /** * ntfs_read_inode_mount - special read_inode for mount time use only * @vi: inode to read @@ -1808,19 +1872,33 @@ int ntfs_read_inode_mount(struct inode *vi) } else /* if (!err) */ { ATTR_LIST_ENTRY *al_entry, *next_al_entry; u8 *al_end; + static const char *es = " Not allowed. $MFT is corrupt. " + "You should run chkdsk."; ntfs_debug("Attribute list attribute found in $MFT."); NInoSetAttrList(ni); a = ctx->attr; - if (a->flags & ATTR_IS_ENCRYPTED || - a->flags & ATTR_COMPRESSION_MASK || - a->flags & ATTR_IS_SPARSE) { + if (a->flags & ATTR_COMPRESSION_MASK) { ntfs_error(sb, "Attribute list attribute is " - "compressed/encrypted/sparse. Not " - "allowed. $MFT is corrupt. You should " - "run chkdsk."); + "compressed.%s", es); goto put_err_out; } + if (a->flags & ATTR_IS_ENCRYPTED || + a->flags & ATTR_IS_SPARSE) { + if (a->non_resident) { + ntfs_error(sb, "Non-resident attribute list " + "attribute is encrypted/" + "sparse.%s", es); + goto put_err_out; + } + ntfs_warning(sb, "Resident attribute list attribute " + "in $MFT system file is marked " + "encrypted/sparse which is not true. " + "However, Windows allows this and " + "chkdsk does not detect or correct it " + "so we will just ignore the invalid " + "flags and pretend they are not set."); + } /* Now allocate memory for the attribute list. */ ni->attr_list_size = (u32)ntfs_attr_size(a); ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); @@ -1903,8 +1981,7 @@ int ntfs_read_inode_mount(struct inode *vi) goto em_put_err_out; next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry + le16_to_cpu(al_entry->length)); - if (le32_to_cpu(al_entry->type) > - const_le32_to_cpu(AT_DATA)) + if (le32_to_cpu(al_entry->type) > le32_to_cpu(AT_DATA)) goto em_put_err_out; if (AT_DATA != al_entry->type) continue; @@ -2046,7 +2123,8 @@ int ntfs_read_inode_mount(struct inode *vi) * ntfs_read_inode() will have set up the default ones. */ /* Set uid and gid to root. */ - vi->i_uid = vi->i_gid = 0; + vi->i_uid = GLOBAL_ROOT_UID; + vi->i_gid = GLOBAL_ROOT_GID; /* Regular file. No access for anyone. */ vi->i_mode = S_IFREG; /* No VFS initiated operations allowed for $MFT. */ @@ -2092,6 +2170,14 @@ int ntfs_read_inode_mount(struct inode *vi) ntfs_attr_put_search_ctx(ctx); ntfs_debug("Done."); ntfs_free(m); + + /* + * Split the locking rules of the MFT inode from the + * locking rules of other inodes: + */ + lockdep_set_class(&ni->runlist.lock, &mft_ni_runlist_lock_key); + lockdep_set_class(&ni->mrec_lock, &mft_ni_mrec_lock_key); + return 0; em_put_err_out: @@ -2106,37 +2192,6 @@ err_out: return -1; } -/** - * ntfs_put_inode - handler for when the inode reference count is decremented - * @vi: vfs inode - * - * The VFS calls ntfs_put_inode() every time the inode reference count (i_count) - * is about to be decremented (but before the decrement itself. - * - * If the inode @vi is a directory with two references, one of which is being - * dropped, we need to put the attribute inode for the directory index bitmap, - * if it is present, otherwise the directory inode would remain pinned for - * ever. - */ -void ntfs_put_inode(struct inode *vi) -{ - if (S_ISDIR(vi->i_mode) && atomic_read(&vi->i_count) == 2) { - ntfs_inode *ni = NTFS_I(vi); - if (NInoIndexAllocPresent(ni)) { - struct inode *bvi = NULL; - down(&vi->i_sem); - if (atomic_read(&vi->i_count) == 2) { - bvi = ni->itype.index.bmp_ino; - if (bvi) - ni->itype.index.bmp_ino = NULL; - } - up(&vi->i_sem); - if (bvi) - iput(bvi); - } - } -} - static void __ntfs_clear_inode(ntfs_inode *ni) { /* Free all alocated memory. */ @@ -2189,7 +2244,7 @@ void ntfs_clear_extent_inode(ntfs_inode *ni) } /** - * ntfs_clear_big_inode - clean up the ntfs specific part of an inode + * ntfs_evict_big_inode - clean up the ntfs specific part of an inode * @vi: vfs inode pending annihilation * * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode() @@ -2198,25 +2253,16 @@ void ntfs_clear_extent_inode(ntfs_inode *ni) * * If the MFT record is dirty, we commit it before doing anything else. */ -void ntfs_clear_big_inode(struct inode *vi) +void ntfs_evict_big_inode(struct inode *vi) { ntfs_inode *ni = NTFS_I(vi); - /* - * If the inode @vi is an index inode we need to put the attribute - * inode for the index bitmap, if it is present, otherwise the index - * inode would disappear and the attribute inode for the index bitmap - * would no longer be referenced from anywhere and thus it would remain - * pinned for ever. - */ - if (NInoAttr(ni) && (ni->type == AT_INDEX_ALLOCATION) && - NInoIndexAllocPresent(ni) && ni->itype.index.bmp_ino) { - iput(ni->itype.index.bmp_ino); - ni->itype.index.bmp_ino = NULL; - } + truncate_inode_pages_final(&vi->i_data); + clear_inode(vi); + #ifdef NTFS_RW if (NInoDirty(ni)) { - BOOL was_bad = (is_bad_inode(vi)); + bool was_bad = (is_bad_inode(vi)); /* Committing the inode also commits all extent inodes. */ ntfs_commit_inode(vi); @@ -2254,20 +2300,20 @@ void ntfs_clear_big_inode(struct inode *vi) /** * ntfs_show_options - show mount options in /proc/mounts * @sf: seq_file in which to write our mount options - * @mnt: vfs mount whose mount options to display + * @root: root of the mounted tree whose mount options to display * * Called by the VFS once for each mounted ntfs volume when someone reads * /proc/mounts in order to display the NTFS specific mount options of each - * mount. The mount options of the vfs mount @mnt are written to the seq file + * mount. The mount options of fs specified by @root are written to the seq file * @sf and success is returned. */ -int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt) +int ntfs_show_options(struct seq_file *sf, struct dentry *root) { - ntfs_volume *vol = NTFS_SB(mnt->mnt_sb); + ntfs_volume *vol = NTFS_SB(root->d_sb); int i; - seq_printf(sf, ",uid=%i", vol->uid); - seq_printf(sf, ",gid=%i", vol->gid); + seq_printf(sf, ",uid=%i", from_kuid_munged(&init_user_ns, vol->uid)); + seq_printf(sf, ",gid=%i", from_kgid_munged(&init_user_ns, vol->gid)); if (vol->fmask == vol->dmask) seq_printf(sf, ",umask=0%o", vol->fmask); else { @@ -2291,11 +2337,16 @@ int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt) #ifdef NTFS_RW +static const char *es = " Leaving inconsistent metadata. Unmount and run " + "chkdsk."; + /** * ntfs_truncate - called when the i_size of an ntfs inode is changed * @vi: inode for which the i_size was changed * - * We do not support i_size changes yet. + * We only support i_size changes for normal files at present, i.e. not + * compressed and not encrypted. This is enforced in ntfs_setattr(), see + * below. * * The kernel guarantees that @vi is a regular file (S_ISREG() is true) and * that the change is allowed. @@ -2305,81 +2356,505 @@ int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt) * * Returns 0 on success or -errno on error. * - * Called with ->i_sem held. In all but one case ->i_alloc_sem is held for - * writing. The only case where ->i_alloc_sem is not held is - * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called - * with the current i_size as the offset which means that it is a noop as far - * as ntfs_truncate() is concerned. + * Called with ->i_mutex held. */ int ntfs_truncate(struct inode *vi) { - ntfs_inode *ni = NTFS_I(vi); + s64 new_size, old_size, nr_freed, new_alloc_size, old_alloc_size; + VCN highest_vcn; + unsigned long flags; + ntfs_inode *base_ni, *ni = NTFS_I(vi); ntfs_volume *vol = ni->vol; ntfs_attr_search_ctx *ctx; MFT_RECORD *m; ATTR_RECORD *a; const char *te = " Leaving file length out of sync with i_size."; - int err; + int err, mp_size, size_change, alloc_change; + u32 attr_len; ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); BUG_ON(NInoAttr(ni)); + BUG_ON(S_ISDIR(vi->i_mode)); + BUG_ON(NInoMstProtected(ni)); BUG_ON(ni->nr_extents < 0); - m = map_mft_record(ni); +retry_truncate: + /* + * Lock the runlist for writing and map the mft record to ensure it is + * safe to mess with the attribute runlist and sizes. + */ + down_write(&ni->runlist.lock); + if (!NInoAttr(ni)) + base_ni = ni; + else + base_ni = ni->ext.base_ntfs_ino; + m = map_mft_record(base_ni); if (IS_ERR(m)) { err = PTR_ERR(m); ntfs_error(vi->i_sb, "Failed to map mft record for inode 0x%lx " "(error code %d).%s", vi->i_ino, err, te); ctx = NULL; m = NULL; - goto err_out; + goto old_bad_out; } - ctx = ntfs_attr_get_search_ctx(ni, m); + ctx = ntfs_attr_get_search_ctx(base_ni, m); if (unlikely(!ctx)) { ntfs_error(vi->i_sb, "Failed to allocate a search context for " "inode 0x%lx (not enough memory).%s", vi->i_ino, te); err = -ENOMEM; - goto err_out; + goto old_bad_out; } err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx); if (unlikely(err)) { - if (err == -ENOENT) + if (err == -ENOENT) { ntfs_error(vi->i_sb, "Open attribute is missing from " "mft record. Inode 0x%lx is corrupt. " - "Run chkdsk.", vi->i_ino); - else + "Run chkdsk.%s", vi->i_ino, te); + err = -EIO; + } else ntfs_error(vi->i_sb, "Failed to lookup attribute in " - "inode 0x%lx (error code %d).", - vi->i_ino, err); - goto err_out; + "inode 0x%lx (error code %d).%s", + vi->i_ino, err, te); + goto old_bad_out; } + m = ctx->mrec; a = ctx->attr; - /* If the size has not changed there is nothing to do. */ - if (ntfs_attr_size(a) == i_size_read(vi)) - goto done; - // TODO: Implement the truncate... - ntfs_error(vi->i_sb, "Inode size has changed but this is not " - "implemented yet. Resetting inode size to old value. " - " This is most likely a bug in the ntfs driver!"); - i_size_write(vi, ntfs_attr_size(a)); -done: + /* + * The i_size of the vfs inode is the new size for the attribute value. + */ + new_size = i_size_read(vi); + /* The current size of the attribute value is the old size. */ + old_size = ntfs_attr_size(a); + /* Calculate the new allocated size. */ + if (NInoNonResident(ni)) + new_alloc_size = (new_size + vol->cluster_size - 1) & + ~(s64)vol->cluster_size_mask; + else + new_alloc_size = (new_size + 7) & ~7; + /* The current allocated size is the old allocated size. */ + read_lock_irqsave(&ni->size_lock, flags); + old_alloc_size = ni->allocated_size; + read_unlock_irqrestore(&ni->size_lock, flags); + /* + * The change in the file size. This will be 0 if no change, >0 if the + * size is growing, and <0 if the size is shrinking. + */ + size_change = -1; + if (new_size - old_size >= 0) { + size_change = 1; + if (new_size == old_size) + size_change = 0; + } + /* As above for the allocated size. */ + alloc_change = -1; + if (new_alloc_size - old_alloc_size >= 0) { + alloc_change = 1; + if (new_alloc_size == old_alloc_size) + alloc_change = 0; + } + /* + * If neither the size nor the allocation are being changed there is + * nothing to do. + */ + if (!size_change && !alloc_change) + goto unm_done; + /* If the size is changing, check if new size is allowed in $AttrDef. */ + if (size_change) { + err = ntfs_attr_size_bounds_check(vol, ni->type, new_size); + if (unlikely(err)) { + if (err == -ERANGE) { + ntfs_error(vol->sb, "Truncate would cause the " + "inode 0x%lx to %simum size " + "for its attribute type " + "(0x%x). Aborting truncate.", + vi->i_ino, + new_size > old_size ? "exceed " + "the max" : "go under the min", + le32_to_cpu(ni->type)); + err = -EFBIG; + } else { + ntfs_error(vol->sb, "Inode 0x%lx has unknown " + "attribute type 0x%x. " + "Aborting truncate.", + vi->i_ino, + le32_to_cpu(ni->type)); + err = -EIO; + } + /* Reset the vfs inode size to the old size. */ + i_size_write(vi, old_size); + goto err_out; + } + } + if (NInoCompressed(ni) || NInoEncrypted(ni)) { + ntfs_warning(vi->i_sb, "Changes in inode size are not " + "supported yet for %s files, ignoring.", + NInoCompressed(ni) ? "compressed" : + "encrypted"); + err = -EOPNOTSUPP; + goto bad_out; + } + if (a->non_resident) + goto do_non_resident_truncate; + BUG_ON(NInoNonResident(ni)); + /* Resize the attribute record to best fit the new attribute size. */ + if (new_size < vol->mft_record_size && + !ntfs_resident_attr_value_resize(m, a, new_size)) { + /* The resize succeeded! */ + flush_dcache_mft_record_page(ctx->ntfs_ino); + mark_mft_record_dirty(ctx->ntfs_ino); + write_lock_irqsave(&ni->size_lock, flags); + /* Update the sizes in the ntfs inode and all is done. */ + ni->allocated_size = le32_to_cpu(a->length) - + le16_to_cpu(a->data.resident.value_offset); + /* + * Note ntfs_resident_attr_value_resize() has already done any + * necessary data clearing in the attribute record. When the + * file is being shrunk vmtruncate() will already have cleared + * the top part of the last partial page, i.e. since this is + * the resident case this is the page with index 0. However, + * when the file is being expanded, the page cache page data + * between the old data_size, i.e. old_size, and the new_size + * has not been zeroed. Fortunately, we do not need to zero it + * either since on one hand it will either already be zero due + * to both readpage and writepage clearing partial page data + * beyond i_size in which case there is nothing to do or in the + * case of the file being mmap()ped at the same time, POSIX + * specifies that the behaviour is unspecified thus we do not + * have to do anything. This means that in our implementation + * in the rare case that the file is mmap()ped and a write + * occurred into the mmap()ped region just beyond the file size + * and writepage has not yet been called to write out the page + * (which would clear the area beyond the file size) and we now + * extend the file size to incorporate this dirty region + * outside the file size, a write of the page would result in + * this data being written to disk instead of being cleared. + * Given both POSIX and the Linux mmap(2) man page specify that + * this corner case is undefined, we choose to leave it like + * that as this is much simpler for us as we cannot lock the + * relevant page now since we are holding too many ntfs locks + * which would result in a lock reversal deadlock. + */ + ni->initialized_size = new_size; + write_unlock_irqrestore(&ni->size_lock, flags); + goto unm_done; + } + /* If the above resize failed, this must be an attribute extension. */ + BUG_ON(size_change < 0); + /* + * We have to drop all the locks so we can call + * ntfs_attr_make_non_resident(). This could be optimised by try- + * locking the first page cache page and only if that fails dropping + * the locks, locking the page, and redoing all the locking and + * lookups. While this would be a huge optimisation, it is not worth + * it as this is definitely a slow code path as it only ever can happen + * once for any given file. + */ ntfs_attr_put_search_ctx(ctx); - unmap_mft_record(ni); - NInoClearTruncateFailed(ni); - ntfs_debug("Done."); - return 0; -err_out: - if (err != -ENOMEM) { + unmap_mft_record(base_ni); + up_write(&ni->runlist.lock); + /* + * Not enough space in the mft record, try to make the attribute + * non-resident and if successful restart the truncation process. + */ + err = ntfs_attr_make_non_resident(ni, old_size); + if (likely(!err)) + goto retry_truncate; + /* + * Could not make non-resident. If this is due to this not being + * permitted for this attribute type or there not being enough space, + * try to make other attributes non-resident. Otherwise fail. + */ + if (unlikely(err != -EPERM && err != -ENOSPC)) { + ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, attribute " + "type 0x%x, because the conversion from " + "resident to non-resident attribute failed " + "with error code %i.", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), err); + if (err != -ENOMEM) + err = -EIO; + goto conv_err_out; + } + /* TODO: Not implemented from here, abort. */ + if (err == -ENOSPC) + ntfs_error(vol->sb, "Not enough space in the mft record/on " + "disk for the non-resident attribute value. " + "This case is not implemented yet."); + else /* if (err == -EPERM) */ + ntfs_error(vol->sb, "This attribute type may not be " + "non-resident. This case is not implemented " + "yet."); + err = -EOPNOTSUPP; + goto conv_err_out; +#if 0 + // TODO: Attempt to make other attributes non-resident. + if (!err) + goto do_resident_extend; + /* + * Both the attribute list attribute and the standard information + * attribute must remain in the base inode. Thus, if this is one of + * these attributes, we have to try to move other attributes out into + * extent mft records instead. + */ + if (ni->type == AT_ATTRIBUTE_LIST || + ni->type == AT_STANDARD_INFORMATION) { + // TODO: Attempt to move other attributes into extent mft + // records. + err = -EOPNOTSUPP; + if (!err) + goto do_resident_extend; + goto err_out; + } + // TODO: Attempt to move this attribute to an extent mft record, but + // only if it is not already the only attribute in an mft record in + // which case there would be nothing to gain. + err = -EOPNOTSUPP; + if (!err) + goto do_resident_extend; + /* There is nothing we can do to make enough space. )-: */ + goto err_out; +#endif +do_non_resident_truncate: + BUG_ON(!NInoNonResident(ni)); + if (alloc_change < 0) { + highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn); + if (highest_vcn > 0 && + old_alloc_size >> vol->cluster_size_bits > + highest_vcn + 1) { + /* + * This attribute has multiple extents. Not yet + * supported. + */ + ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, " + "attribute type 0x%x, because the " + "attribute is highly fragmented (it " + "consists of multiple extents) and " + "this case is not implemented yet.", + vi->i_ino, + (unsigned)le32_to_cpu(ni->type)); + err = -EOPNOTSUPP; + goto bad_out; + } + } + /* + * If the size is shrinking, need to reduce the initialized_size and + * the data_size before reducing the allocation. + */ + if (size_change < 0) { + /* + * Make the valid size smaller (i_size is already up-to-date). + */ + write_lock_irqsave(&ni->size_lock, flags); + if (new_size < ni->initialized_size) { + ni->initialized_size = new_size; + a->data.non_resident.initialized_size = + cpu_to_sle64(new_size); + } + a->data.non_resident.data_size = cpu_to_sle64(new_size); + write_unlock_irqrestore(&ni->size_lock, flags); + flush_dcache_mft_record_page(ctx->ntfs_ino); + mark_mft_record_dirty(ctx->ntfs_ino); + /* If the allocated size is not changing, we are done. */ + if (!alloc_change) + goto unm_done; + /* + * If the size is shrinking it makes no sense for the + * allocation to be growing. + */ + BUG_ON(alloc_change > 0); + } else /* if (size_change >= 0) */ { + /* + * The file size is growing or staying the same but the + * allocation can be shrinking, growing or staying the same. + */ + if (alloc_change > 0) { + /* + * We need to extend the allocation and possibly update + * the data size. If we are updating the data size, + * since we are not touching the initialized_size we do + * not need to worry about the actual data on disk. + * And as far as the page cache is concerned, there + * will be no pages beyond the old data size and any + * partial region in the last page between the old and + * new data size (or the end of the page if the new + * data size is outside the page) does not need to be + * modified as explained above for the resident + * attribute truncate case. To do this, we simply drop + * the locks we hold and leave all the work to our + * friendly helper ntfs_attr_extend_allocation(). + */ + ntfs_attr_put_search_ctx(ctx); + unmap_mft_record(base_ni); + up_write(&ni->runlist.lock); + err = ntfs_attr_extend_allocation(ni, new_size, + size_change > 0 ? new_size : -1, -1); + /* + * ntfs_attr_extend_allocation() will have done error + * output already. + */ + goto done; + } + if (!alloc_change) + goto alloc_done; + } + /* alloc_change < 0 */ + /* Free the clusters. */ + nr_freed = ntfs_cluster_free(ni, new_alloc_size >> + vol->cluster_size_bits, -1, ctx); + m = ctx->mrec; + a = ctx->attr; + if (unlikely(nr_freed < 0)) { + ntfs_error(vol->sb, "Failed to release cluster(s) (error code " + "%lli). Unmount and run chkdsk to recover " + "the lost cluster(s).", (long long)nr_freed); NVolSetErrors(vol); - make_bad_inode(vi); + nr_freed = 0; + } + /* Truncate the runlist. */ + err = ntfs_rl_truncate_nolock(vol, &ni->runlist, + new_alloc_size >> vol->cluster_size_bits); + /* + * If the runlist truncation failed and/or the search context is no + * longer valid, we cannot resize the attribute record or build the + * mapping pairs array thus we mark the inode bad so that no access to + * the freed clusters can happen. + */ + if (unlikely(err || IS_ERR(m))) { + ntfs_error(vol->sb, "Failed to %s (error code %li).%s", + IS_ERR(m) ? + "restore attribute search context" : + "truncate attribute runlist", + IS_ERR(m) ? PTR_ERR(m) : err, es); + err = -EIO; + goto bad_out; + } + /* Get the size for the shrunk mapping pairs array for the runlist. */ + mp_size = ntfs_get_size_for_mapping_pairs(vol, ni->runlist.rl, 0, -1); + if (unlikely(mp_size <= 0)) { + ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, " + "attribute type 0x%x, because determining the " + "size for the mapping pairs failed with error " + "code %i.%s", vi->i_ino, + (unsigned)le32_to_cpu(ni->type), mp_size, es); + err = -EIO; + goto bad_out; + } + /* + * Shrink the attribute record for the new mapping pairs array. Note, + * this cannot fail since we are making the attribute smaller thus by + * definition there is enough space to do so. + */ + attr_len = le32_to_cpu(a->length); + err = ntfs_attr_record_resize(m, a, mp_size + + le16_to_cpu(a->data.non_resident.mapping_pairs_offset)); + BUG_ON(err); + /* + * Generate the mapping pairs array directly into the attribute record. + */ + err = ntfs_mapping_pairs_build(vol, (u8*)a + + le16_to_cpu(a->data.non_resident.mapping_pairs_offset), + mp_size, ni->runlist.rl, 0, -1, NULL); + if (unlikely(err)) { + ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, " + "attribute type 0x%x, because building the " + "mapping pairs failed with error code %i.%s", + vi->i_ino, (unsigned)le32_to_cpu(ni->type), + err, es); + err = -EIO; + goto bad_out; + } + /* Update the allocated/compressed size as well as the highest vcn. */ + a->data.non_resident.highest_vcn = cpu_to_sle64((new_alloc_size >> + vol->cluster_size_bits) - 1); + write_lock_irqsave(&ni->size_lock, flags); + ni->allocated_size = new_alloc_size; + a->data.non_resident.allocated_size = cpu_to_sle64(new_alloc_size); + if (NInoSparse(ni) || NInoCompressed(ni)) { + if (nr_freed) { + ni->itype.compressed.size -= nr_freed << + vol->cluster_size_bits; + BUG_ON(ni->itype.compressed.size < 0); + a->data.non_resident.compressed_size = cpu_to_sle64( + ni->itype.compressed.size); + vi->i_blocks = ni->itype.compressed.size >> 9; + } + } else + vi->i_blocks = new_alloc_size >> 9; + write_unlock_irqrestore(&ni->size_lock, flags); + /* + * We have shrunk the allocation. If this is a shrinking truncate we + * have already dealt with the initialized_size and the data_size above + * and we are done. If the truncate is only changing the allocation + * and not the data_size, we are also done. If this is an extending + * truncate, need to extend the data_size now which is ensured by the + * fact that @size_change is positive. + */ +alloc_done: + /* + * If the size is growing, need to update it now. If it is shrinking, + * we have already updated it above (before the allocation change). + */ + if (size_change > 0) + a->data.non_resident.data_size = cpu_to_sle64(new_size); + /* Ensure the modified mft record is written out. */ + flush_dcache_mft_record_page(ctx->ntfs_ino); + mark_mft_record_dirty(ctx->ntfs_ino); +unm_done: + ntfs_attr_put_search_ctx(ctx); + unmap_mft_record(base_ni); + up_write(&ni->runlist.lock); +done: + /* Update the mtime and ctime on the base inode. */ + /* normally ->truncate shouldn't update ctime or mtime, + * but ntfs did before so it got a copy & paste version + * of file_update_time. one day someone should fix this + * for real. + */ + if (!IS_NOCMTIME(VFS_I(base_ni)) && !IS_RDONLY(VFS_I(base_ni))) { + struct timespec now = current_fs_time(VFS_I(base_ni)->i_sb); + int sync_it = 0; + + if (!timespec_equal(&VFS_I(base_ni)->i_mtime, &now) || + !timespec_equal(&VFS_I(base_ni)->i_ctime, &now)) + sync_it = 1; + VFS_I(base_ni)->i_mtime = now; + VFS_I(base_ni)->i_ctime = now; + + if (sync_it) + mark_inode_dirty_sync(VFS_I(base_ni)); + } + + if (likely(!err)) { + NInoClearTruncateFailed(ni); + ntfs_debug("Done."); } + return err; +old_bad_out: + old_size = -1; +bad_out: + if (err != -ENOMEM && err != -EOPNOTSUPP) + NVolSetErrors(vol); + if (err != -EOPNOTSUPP) + NInoSetTruncateFailed(ni); + else if (old_size >= 0) + i_size_write(vi, old_size); +err_out: if (ctx) ntfs_attr_put_search_ctx(ctx); if (m) - unmap_mft_record(ni); - NInoSetTruncateFailed(ni); + unmap_mft_record(base_ni); + up_write(&ni->runlist.lock); +out: + ntfs_debug("Failed. Returning error code %i.", err); return err; +conv_err_out: + if (err != -ENOMEM && err != -EOPNOTSUPP) + NVolSetErrors(vol); + if (err != -EOPNOTSUPP) + NInoSetTruncateFailed(ni); + else + i_size_write(vi, old_size); + goto out; } /** @@ -2390,9 +2865,11 @@ err_out: * * See ntfs_truncate() description above for details. */ +#ifdef NTFS_RW void ntfs_truncate_vfs(struct inode *vi) { ntfs_truncate(vi); } +#endif /** * ntfs_setattr - called from notify_change() when an attribute is being changed @@ -2406,11 +2883,7 @@ void ntfs_truncate_vfs(struct inode *vi) { * We also abort all changes of user, group, and mode as we do not implement * the NTFS ACLs yet. * - * Called with ->i_sem held. For the ATTR_SIZE (i.e. ->truncate) case, also - * called with ->i_alloc_sem held for writing. - * - * Basically this is a copy of generic notify_change() and inode_setattr() - * functionality, except we intercept and abort changes in i_size. + * Called with ->i_mutex held. */ int ntfs_setattr(struct dentry *dentry, struct iattr *attr) { @@ -2420,8 +2893,7 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr) err = inode_change_ok(vi, attr); if (err) - return err; - + goto out; /* We do not support NTFS ACLs yet. */ if (ia_valid & (ATTR_UID | ATTR_GID | ATTR_MODE)) { ntfs_warning(vi->i_sb, "Changes in user/group/mode are not " @@ -2429,14 +2901,24 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr) err = -EOPNOTSUPP; goto out; } - if (ia_valid & ATTR_SIZE) { if (attr->ia_size != i_size_read(vi)) { - ntfs_warning(vi->i_sb, "Changes in inode size are not " - "supported yet, ignoring."); - err = -EOPNOTSUPP; - // TODO: Implement... - // err = vmtruncate(vi, attr->ia_size); + ntfs_inode *ni = NTFS_I(vi); + /* + * FIXME: For now we do not support resizing of + * compressed or encrypted files yet. + */ + if (NInoCompressed(ni) || NInoEncrypted(ni)) { + ntfs_warning(vi->i_sb, "Changes in inode size " + "are not supported yet for " + "%s files, ignoring.", + NInoCompressed(ni) ? + "compressed" : "encrypted"); + err = -EOPNOTSUPP; + } else { + truncate_setsize(vi, attr->ia_size); + ntfs_truncate_vfs(vi); + } if (err || ia_valid == ATTR_SIZE) goto out; } else { @@ -2479,7 +2961,7 @@ out: * * Return 0 on success and -errno on error. */ -int ntfs_write_inode(struct inode *vi, int sync) +int __ntfs_write_inode(struct inode *vi, int sync) { sle64 nt; ntfs_inode *ni = NTFS_I(vi); @@ -2487,7 +2969,7 @@ int ntfs_write_inode(struct inode *vi, int sync) MFT_RECORD *m; STANDARD_INFORMATION *si; int err = 0; - BOOL modified = FALSE; + bool modified = false; ntfs_debug("Entering for %sinode 0x%lx.", NInoAttr(ni) ? "attr " : "", vi->i_ino); @@ -2529,7 +3011,7 @@ int ntfs_write_inode(struct inode *vi, int sync) sle64_to_cpu(si->last_data_change_time), (long long)sle64_to_cpu(nt)); si->last_data_change_time = nt; - modified = TRUE; + modified = true; } nt = utc2ntfs(vi->i_ctime); if (si->last_mft_change_time != nt) { @@ -2538,7 +3020,7 @@ int ntfs_write_inode(struct inode *vi, int sync) sle64_to_cpu(si->last_mft_change_time), (long long)sle64_to_cpu(nt)); si->last_mft_change_time = nt; - modified = TRUE; + modified = true; } nt = utc2ntfs(vi->i_atime); if (si->last_access_time != nt) { @@ -2547,7 +3029,7 @@ int ntfs_write_inode(struct inode *vi, int sync) (long long)sle64_to_cpu(si->last_access_time), (long long)sle64_to_cpu(nt)); si->last_access_time = nt; - modified = TRUE; + modified = true; } /* * If we just modified the standard information attribute we need to @@ -2565,15 +3047,18 @@ int ntfs_write_inode(struct inode *vi, int sync) * record will be cleaned and written out to disk below, i.e. before * this function returns. */ - if (modified && !NInoTestSetDirty(ctx->ntfs_ino)) - mark_ntfs_record_dirty(ctx->ntfs_ino->page, - ctx->ntfs_ino->page_ofs); + if (modified) { + flush_dcache_mft_record_page(ctx->ntfs_ino); + if (!NInoTestSetDirty(ctx->ntfs_ino)) + mark_ntfs_record_dirty(ctx->ntfs_ino->page, + ctx->ntfs_ino->page_ofs); + } ntfs_attr_put_search_ctx(ctx); /* Now the access times are updated, write the base mft record. */ if (NInoDirty(ni)) err = write_mft_record(ni, m, sync); /* Write all attached extent mft records. */ - down(&ni->extent_lock); + mutex_lock(&ni->extent_lock); if (ni->nr_extents > 0) { ntfs_inode **extent_nis = ni->ext.extent_ntfs_inos; int i; @@ -2600,7 +3085,7 @@ int ntfs_write_inode(struct inode *vi, int sync) } } } - up(&ni->extent_lock); + mutex_unlock(&ni->extent_lock); unmap_mft_record(ni); if (unlikely(err)) goto err_out; @@ -2615,9 +3100,7 @@ err_out: "retries later."); mark_inode_dirty(vi); } else { - ntfs_error(vi->i_sb, "Failed (error code %i): Marking inode " - "as bad. You should run chkdsk.", -err); - make_bad_inode(vi); + ntfs_error(vi->i_sb, "Failed (error %i): Run chkdsk.", -err); NVolSetErrors(ni->vol); } return err; diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h index 3de5c023196..76b6cfb579d 100644 --- a/fs/ntfs/inode.h +++ b/fs/ntfs/inode.h @@ -2,7 +2,7 @@ * inode.h - Defines for inode structures NTFS Linux kernel driver. Part of * the Linux-NTFS project. * - * Copyright (c) 2001-2005 Anton Altaparmakov + * Copyright (c) 2001-2007 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -24,12 +24,13 @@ #ifndef _LINUX_NTFS_INODE_H #define _LINUX_NTFS_INODE_H -#include <linux/mm.h> +#include <linux/atomic.h> + #include <linux/fs.h> -#include <linux/seq_file.h> #include <linux/list.h> -#include <asm/atomic.h> -#include <asm/semaphore.h> +#include <linux/mm.h> +#include <linux/mutex.h> +#include <linux/seq_file.h> #include "layout.h" #include "volume.h" @@ -81,7 +82,7 @@ struct _ntfs_inode { * The following fields are only valid for real inodes and extent * inodes. */ - struct semaphore mrec_lock; /* Lock for serializing access to the + struct mutex mrec_lock; /* Lock for serializing access to the mft record belonging to this inode. */ struct page *page; /* The page containing the mft record of the inode. This should only be touched by the @@ -100,8 +101,6 @@ struct _ntfs_inode { runlist attr_list_rl; /* Run list for the attribute list value. */ union { struct { /* It is a directory, $MFT, or an index inode. */ - struct inode *bmp_ino; /* Attribute inode for the - index $BITMAP. */ u32 block_size; /* Size of an index block. */ u32 vcn_size; /* Size of a vcn in this index. */ @@ -119,7 +118,7 @@ struct _ntfs_inode { u8 block_clusters; /* Number of clusters per cb. */ } compressed; } itype; - struct semaphore extent_lock; /* Lock for accessing/modifying the + struct mutex extent_lock; /* Lock for accessing/modifying the below . */ s32 nr_extents; /* For a base mft record, the number of attached extent inodes (0 if none), for extent records and for fake @@ -280,7 +279,7 @@ extern struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name, extern struct inode *ntfs_alloc_big_inode(struct super_block *sb); extern void ntfs_destroy_big_inode(struct inode *inode); -extern void ntfs_clear_big_inode(struct inode *vi); +extern void ntfs_evict_big_inode(struct inode *vi); extern void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni); @@ -299,9 +298,7 @@ extern void ntfs_clear_extent_inode(ntfs_inode *ni); extern int ntfs_read_inode_mount(struct inode *vi); -extern void ntfs_put_inode(struct inode *vi); - -extern int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt); +extern int ntfs_show_options(struct seq_file *sf, struct dentry *root); #ifdef NTFS_RW @@ -310,15 +307,19 @@ extern void ntfs_truncate_vfs(struct inode *vi); extern int ntfs_setattr(struct dentry *dentry, struct iattr *attr); -extern int ntfs_write_inode(struct inode *vi, int sync); +extern int __ntfs_write_inode(struct inode *vi, int sync); static inline void ntfs_commit_inode(struct inode *vi) { if (!is_bad_inode(vi)) - ntfs_write_inode(vi, 1); + __ntfs_write_inode(vi, 1); return; } +#else + +static inline void ntfs_truncate_vfs(struct inode *vi) {} + #endif /* NTFS_RW */ #endif /* _LINUX_NTFS_INODE_H */ diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h index 5c248d404f0..809c0e6d8e0 100644 --- a/fs/ntfs/layout.h +++ b/fs/ntfs/layout.h @@ -31,19 +31,8 @@ #include "types.h" -/* - * Constant endianness conversion defines. - */ -#define const_le16_to_cpu(x) __constant_le16_to_cpu(x) -#define const_le32_to_cpu(x) __constant_le32_to_cpu(x) -#define const_le64_to_cpu(x) __constant_le64_to_cpu(x) - -#define const_cpu_to_le16(x) __constant_cpu_to_le16(x) -#define const_cpu_to_le32(x) __constant_cpu_to_le32(x) -#define const_cpu_to_le64(x) __constant_cpu_to_le64(x) - /* The NTFS oem_id "NTFS " */ -#define magicNTFS const_cpu_to_le64(0x202020205346544eULL) +#define magicNTFS cpu_to_le64(0x202020205346544eULL) /* * Location of bootsector on partition: @@ -114,25 +103,25 @@ typedef struct { */ enum { /* Found in $MFT/$DATA. */ - magic_FILE = const_cpu_to_le32(0x454c4946), /* Mft entry. */ - magic_INDX = const_cpu_to_le32(0x58444e49), /* Index buffer. */ - magic_HOLE = const_cpu_to_le32(0x454c4f48), /* ? (NTFS 3.0+?) */ + magic_FILE = cpu_to_le32(0x454c4946), /* Mft entry. */ + magic_INDX = cpu_to_le32(0x58444e49), /* Index buffer. */ + magic_HOLE = cpu_to_le32(0x454c4f48), /* ? (NTFS 3.0+?) */ /* Found in $LogFile/$DATA. */ - magic_RSTR = const_cpu_to_le32(0x52545352), /* Restart page. */ - magic_RCRD = const_cpu_to_le32(0x44524352), /* Log record page. */ + magic_RSTR = cpu_to_le32(0x52545352), /* Restart page. */ + magic_RCRD = cpu_to_le32(0x44524352), /* Log record page. */ /* Found in $LogFile/$DATA. (May be found in $MFT/$DATA, also?) */ - magic_CHKD = const_cpu_to_le32(0x444b4843), /* Modified by chkdsk. */ + magic_CHKD = cpu_to_le32(0x444b4843), /* Modified by chkdsk. */ /* Found in all ntfs record containing records. */ - magic_BAAD = const_cpu_to_le32(0x44414142), /* Failed multi sector + magic_BAAD = cpu_to_le32(0x44414142), /* Failed multi sector transfer was detected. */ /* * Found in $LogFile/$DATA when a page is full of 0xff bytes and is * thus not initialized. Page must be initialized before using it. */ - magic_empty = const_cpu_to_le32(0xffffffff) /* Record is empty. */ + magic_empty = cpu_to_le32(0xffffffff) /* Record is empty. */ }; typedef le32 NTFS_RECORD_TYPE; @@ -142,13 +131,13 @@ typedef le32 NTFS_RECORD_TYPE; * operator! (-8 */ -static inline BOOL __ntfs_is_magic(le32 x, NTFS_RECORD_TYPE r) +static inline bool __ntfs_is_magic(le32 x, NTFS_RECORD_TYPE r) { return (x == r); } #define ntfs_is_magic(x, m) __ntfs_is_magic(x, magic_##m) -static inline BOOL __ntfs_is_magicp(le32 *p, NTFS_RECORD_TYPE r) +static inline bool __ntfs_is_magicp(le32 *p, NTFS_RECORD_TYPE r) { return (*p == r); } @@ -258,8 +247,8 @@ typedef enum { * information about the mft record in which they are present. */ enum { - MFT_RECORD_IN_USE = const_cpu_to_le16(0x0001), - MFT_RECORD_IS_DIRECTORY = const_cpu_to_le16(0x0002), + MFT_RECORD_IN_USE = cpu_to_le16(0x0001), + MFT_RECORD_IS_DIRECTORY = cpu_to_le16(0x0002), } __attribute__ ((__packed__)); typedef le16 MFT_RECORD_FLAGS; @@ -297,7 +286,7 @@ typedef le16 MFT_RECORD_FLAGS; * fragmented. Volume free space includes the empty part of the mft zone and * when the volume's free 88% are used up, the mft zone is shrunk by a factor * of 2, thus making more space available for more files/data. This process is - * repeated everytime there is no more free space except for the mft zone until + * repeated every time there is no more free space except for the mft zone until * there really is no more free space. */ @@ -309,7 +298,7 @@ typedef le16 MFT_RECORD_FLAGS; * Note: The _LE versions will return a CPU endian formatted value! */ #define MFT_REF_MASK_CPU 0x0000ffffffffffffULL -#define MFT_REF_MASK_LE const_cpu_to_le64(MFT_REF_MASK_CPU) +#define MFT_REF_MASK_LE cpu_to_le64(MFT_REF_MASK_CPU) typedef u64 MFT_REF; typedef le64 leMFT_REF; @@ -323,7 +312,7 @@ typedef le64 leMFT_REF; #define MREF_LE(x) ((unsigned long)(le64_to_cpu(x) & MFT_REF_MASK_CPU)) #define MSEQNO_LE(x) ((u16)((le64_to_cpu(x) >> 48) & 0xffff)) -#define IS_ERR_MREF(x) (((x) & 0x0000800000000000ULL) ? 1 : 0) +#define IS_ERR_MREF(x) (((x) & 0x0000800000000000ULL) ? true : false) #define ERR_MREF(x) ((u64)((s64)(x))) #define MREF_ERR(x) ((int)((s64)(x))) @@ -477,25 +466,25 @@ typedef struct { * a revealing choice of symbol I do not know what is... (-; */ enum { - AT_UNUSED = const_cpu_to_le32( 0), - AT_STANDARD_INFORMATION = const_cpu_to_le32( 0x10), - AT_ATTRIBUTE_LIST = const_cpu_to_le32( 0x20), - AT_FILE_NAME = const_cpu_to_le32( 0x30), - AT_OBJECT_ID = const_cpu_to_le32( 0x40), - AT_SECURITY_DESCRIPTOR = const_cpu_to_le32( 0x50), - AT_VOLUME_NAME = const_cpu_to_le32( 0x60), - AT_VOLUME_INFORMATION = const_cpu_to_le32( 0x70), - AT_DATA = const_cpu_to_le32( 0x80), - AT_INDEX_ROOT = const_cpu_to_le32( 0x90), - AT_INDEX_ALLOCATION = const_cpu_to_le32( 0xa0), - AT_BITMAP = const_cpu_to_le32( 0xb0), - AT_REPARSE_POINT = const_cpu_to_le32( 0xc0), - AT_EA_INFORMATION = const_cpu_to_le32( 0xd0), - AT_EA = const_cpu_to_le32( 0xe0), - AT_PROPERTY_SET = const_cpu_to_le32( 0xf0), - AT_LOGGED_UTILITY_STREAM = const_cpu_to_le32( 0x100), - AT_FIRST_USER_DEFINED_ATTRIBUTE = const_cpu_to_le32( 0x1000), - AT_END = const_cpu_to_le32(0xffffffff) + AT_UNUSED = cpu_to_le32( 0), + AT_STANDARD_INFORMATION = cpu_to_le32( 0x10), + AT_ATTRIBUTE_LIST = cpu_to_le32( 0x20), + AT_FILE_NAME = cpu_to_le32( 0x30), + AT_OBJECT_ID = cpu_to_le32( 0x40), + AT_SECURITY_DESCRIPTOR = cpu_to_le32( 0x50), + AT_VOLUME_NAME = cpu_to_le32( 0x60), + AT_VOLUME_INFORMATION = cpu_to_le32( 0x70), + AT_DATA = cpu_to_le32( 0x80), + AT_INDEX_ROOT = cpu_to_le32( 0x90), + AT_INDEX_ALLOCATION = cpu_to_le32( 0xa0), + AT_BITMAP = cpu_to_le32( 0xb0), + AT_REPARSE_POINT = cpu_to_le32( 0xc0), + AT_EA_INFORMATION = cpu_to_le32( 0xd0), + AT_EA = cpu_to_le32( 0xe0), + AT_PROPERTY_SET = cpu_to_le32( 0xf0), + AT_LOGGED_UTILITY_STREAM = cpu_to_le32( 0x100), + AT_FIRST_USER_DEFINED_ATTRIBUTE = cpu_to_le32( 0x1000), + AT_END = cpu_to_le32(0xffffffff) }; typedef le32 ATTR_TYPE; @@ -539,13 +528,13 @@ typedef le32 ATTR_TYPE; * equal then the second le32 values would be compared, etc. */ enum { - COLLATION_BINARY = const_cpu_to_le32(0x00), - COLLATION_FILE_NAME = const_cpu_to_le32(0x01), - COLLATION_UNICODE_STRING = const_cpu_to_le32(0x02), - COLLATION_NTOFS_ULONG = const_cpu_to_le32(0x10), - COLLATION_NTOFS_SID = const_cpu_to_le32(0x11), - COLLATION_NTOFS_SECURITY_HASH = const_cpu_to_le32(0x12), - COLLATION_NTOFS_ULONGS = const_cpu_to_le32(0x13), + COLLATION_BINARY = cpu_to_le32(0x00), + COLLATION_FILE_NAME = cpu_to_le32(0x01), + COLLATION_UNICODE_STRING = cpu_to_le32(0x02), + COLLATION_NTOFS_ULONG = cpu_to_le32(0x10), + COLLATION_NTOFS_SID = cpu_to_le32(0x11), + COLLATION_NTOFS_SECURITY_HASH = cpu_to_le32(0x12), + COLLATION_NTOFS_ULONGS = cpu_to_le32(0x13), }; typedef le32 COLLATION_RULE; @@ -559,25 +548,25 @@ typedef le32 COLLATION_RULE; * NT4. */ enum { - ATTR_DEF_INDEXABLE = const_cpu_to_le32(0x02), /* Attribute can be + ATTR_DEF_INDEXABLE = cpu_to_le32(0x02), /* Attribute can be indexed. */ - ATTR_DEF_MULTIPLE = const_cpu_to_le32(0x04), /* Attribute type + ATTR_DEF_MULTIPLE = cpu_to_le32(0x04), /* Attribute type can be present multiple times in the mft records of an inode. */ - ATTR_DEF_NOT_ZERO = const_cpu_to_le32(0x08), /* Attribute value + ATTR_DEF_NOT_ZERO = cpu_to_le32(0x08), /* Attribute value must contain at least one non-zero byte. */ - ATTR_DEF_INDEXED_UNIQUE = const_cpu_to_le32(0x10), /* Attribute must be + ATTR_DEF_INDEXED_UNIQUE = cpu_to_le32(0x10), /* Attribute must be indexed and the attribute value must be unique for the attribute type in all of the mft records of an inode. */ - ATTR_DEF_NAMED_UNIQUE = const_cpu_to_le32(0x20), /* Attribute must be + ATTR_DEF_NAMED_UNIQUE = cpu_to_le32(0x20), /* Attribute must be named and the name must be unique for the attribute type in all of the mft records of an inode. */ - ATTR_DEF_RESIDENT = const_cpu_to_le32(0x40), /* Attribute must be + ATTR_DEF_RESIDENT = cpu_to_le32(0x40), /* Attribute must be resident. */ - ATTR_DEF_ALWAYS_LOG = const_cpu_to_le32(0x80), /* Always log + ATTR_DEF_ALWAYS_LOG = cpu_to_le32(0x80), /* Always log modifications to this attribute, regardless of whether it is resident or non-resident. Without this, only log @@ -614,12 +603,12 @@ typedef struct { * Attribute flags (16-bit). */ enum { - ATTR_IS_COMPRESSED = const_cpu_to_le16(0x0001), - ATTR_COMPRESSION_MASK = const_cpu_to_le16(0x00ff), /* Compression method + ATTR_IS_COMPRESSED = cpu_to_le16(0x0001), + ATTR_COMPRESSION_MASK = cpu_to_le16(0x00ff), /* Compression method mask. Also, first illegal value. */ - ATTR_IS_ENCRYPTED = const_cpu_to_le16(0x4000), - ATTR_IS_SPARSE = const_cpu_to_le16(0x8000), + ATTR_IS_ENCRYPTED = cpu_to_le16(0x4000), + ATTR_IS_SPARSE = cpu_to_le16(0x8000), } __attribute__ ((__packed__)); typedef le16 ATTR_FLAGS; @@ -769,7 +758,7 @@ typedef struct { compressed. (This effectively limits the compression unit size to be a power of two clusters.) WinNT4 only uses a value of 4. - Sparse files also have this set to 4. */ + Sparse files have this set to 0 on XPSP2. */ /* 35*/ u8 reserved[5]; /* Align to 8-byte boundary. */ /* The sizes below are only used when lowest_vcn is zero, as otherwise it would be difficult to keep them up-to-date.*/ @@ -801,53 +790,56 @@ typedef struct { typedef ATTR_RECORD ATTR_REC; /* - * File attribute flags (32-bit). + * File attribute flags (32-bit) appearing in the file_attributes fields of the + * STANDARD_INFORMATION attribute of MFT_RECORDs and the FILENAME_ATTR + * attributes of MFT_RECORDs and directory index entries. + * + * All of the below flags appear in the directory index entries but only some + * appear in the STANDARD_INFORMATION attribute whilst only some others appear + * in the FILENAME_ATTR attribute of MFT_RECORDs. Unless otherwise stated the + * flags appear in all of the above. */ enum { - /* - * The following flags are only present in the STANDARD_INFORMATION - * attribute (in the field file_attributes). - */ - FILE_ATTR_READONLY = const_cpu_to_le32(0x00000001), - FILE_ATTR_HIDDEN = const_cpu_to_le32(0x00000002), - FILE_ATTR_SYSTEM = const_cpu_to_le32(0x00000004), - /* Old DOS volid. Unused in NT. = const_cpu_to_le32(0x00000008), */ + FILE_ATTR_READONLY = cpu_to_le32(0x00000001), + FILE_ATTR_HIDDEN = cpu_to_le32(0x00000002), + FILE_ATTR_SYSTEM = cpu_to_le32(0x00000004), + /* Old DOS volid. Unused in NT. = cpu_to_le32(0x00000008), */ - FILE_ATTR_DIRECTORY = const_cpu_to_le32(0x00000010), + FILE_ATTR_DIRECTORY = cpu_to_le32(0x00000010), /* Note, FILE_ATTR_DIRECTORY is not considered valid in NT. It is reserved for the DOS SUBDIRECTORY flag. */ - FILE_ATTR_ARCHIVE = const_cpu_to_le32(0x00000020), - FILE_ATTR_DEVICE = const_cpu_to_le32(0x00000040), - FILE_ATTR_NORMAL = const_cpu_to_le32(0x00000080), + FILE_ATTR_ARCHIVE = cpu_to_le32(0x00000020), + FILE_ATTR_DEVICE = cpu_to_le32(0x00000040), + FILE_ATTR_NORMAL = cpu_to_le32(0x00000080), - FILE_ATTR_TEMPORARY = const_cpu_to_le32(0x00000100), - FILE_ATTR_SPARSE_FILE = const_cpu_to_le32(0x00000200), - FILE_ATTR_REPARSE_POINT = const_cpu_to_le32(0x00000400), - FILE_ATTR_COMPRESSED = const_cpu_to_le32(0x00000800), + FILE_ATTR_TEMPORARY = cpu_to_le32(0x00000100), + FILE_ATTR_SPARSE_FILE = cpu_to_le32(0x00000200), + FILE_ATTR_REPARSE_POINT = cpu_to_le32(0x00000400), + FILE_ATTR_COMPRESSED = cpu_to_le32(0x00000800), - FILE_ATTR_OFFLINE = const_cpu_to_le32(0x00001000), - FILE_ATTR_NOT_CONTENT_INDEXED = const_cpu_to_le32(0x00002000), - FILE_ATTR_ENCRYPTED = const_cpu_to_le32(0x00004000), + FILE_ATTR_OFFLINE = cpu_to_le32(0x00001000), + FILE_ATTR_NOT_CONTENT_INDEXED = cpu_to_le32(0x00002000), + FILE_ATTR_ENCRYPTED = cpu_to_le32(0x00004000), - FILE_ATTR_VALID_FLAGS = const_cpu_to_le32(0x00007fb7), + FILE_ATTR_VALID_FLAGS = cpu_to_le32(0x00007fb7), /* Note, FILE_ATTR_VALID_FLAGS masks out the old DOS VolId and the FILE_ATTR_DEVICE and preserves everything else. This mask is used to obtain all flags that are valid for reading. */ - FILE_ATTR_VALID_SET_FLAGS = const_cpu_to_le32(0x000031a7), + FILE_ATTR_VALID_SET_FLAGS = cpu_to_le32(0x000031a7), /* Note, FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT, F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest. This mask - is used to to obtain all flags that are valid for setting. */ - + is used to obtain all flags that are valid for setting. */ /* - * The following flags are only present in the FILE_NAME attribute (in - * the field file_attributes). + * The flag FILE_ATTR_DUP_FILENAME_INDEX_PRESENT is present in all + * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION + * attribute of an mft record. */ - FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT = const_cpu_to_le32(0x10000000), + FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT = cpu_to_le32(0x10000000), /* Note, this is a copy of the corresponding bit from the mft record, telling us whether this is a directory or not, i.e. whether it has an index root attribute or not. */ - FILE_ATTR_DUP_VIEW_INDEX_PRESENT = const_cpu_to_le32(0x20000000), + FILE_ATTR_DUP_VIEW_INDEX_PRESENT = cpu_to_le32(0x20000000), /* Note, this is a copy of the corresponding bit from the mft record, telling us whether this file has a view index present (eg. object id index, quota index, one of the security indexes or the encrypting @@ -887,7 +879,7 @@ typedef struct { Windows this is only updated when accessed if some time delta has passed since the last update. Also, - last access times updates can be + last access time updates can be disabled altogether for speed. */ /* 32*/ FILE_ATTR_FLAGS file_attributes; /* Flags describing the file. */ /* 36*/ union { @@ -1021,10 +1013,17 @@ enum { FILE_NAME_POSIX = 0x00, /* This is the largest namespace. It is case sensitive and allows all Unicode characters except for: '\0' and '/'. Beware that in - WinNT/2k files which eg have the same name except for their case - will not be distinguished by the standard utilities and thus a "del - filename" will delete both "filename" and "fileName" without - warning. */ + WinNT/2k/2003 by default files which eg have the same name except + for their case will not be distinguished by the standard utilities + and thus a "del filename" will delete both "filename" and "fileName" + without warning. However if for example Services For Unix (SFU) are + installed and the case sensitive option was enabled at installation + time, then you can create/access/delete such files. + Note that even SFU places restrictions on the filenames beyond the + '\0' and '/' and in particular the following set of characters is + not allowed: '"', '/', '<', '>', '\'. All other characters, + including the ones no allowed in WIN32 namespace are allowed. + Tested with SFU 3.5 (this is now free) running on Windows XP. */ FILE_NAME_WIN32 = 0x01, /* The standard WinNT/2k NTFS long filenames. Case insensitive. All Unicode chars except: '\0', '"', '*', '/', ':', '<', '>', '?', '\', @@ -1064,11 +1063,22 @@ typedef struct { modified. */ /* 20*/ sle64 last_access_time; /* Time this mft record was last accessed. */ -/* 28*/ sle64 allocated_size; /* Byte size of allocated space for the - data attribute. NOTE: Is a multiple - of the cluster size. */ -/* 30*/ sle64 data_size; /* Byte size of actual data in data - attribute. */ +/* 28*/ sle64 allocated_size; /* Byte size of on-disk allocated space + for the unnamed data attribute. So + for normal $DATA, this is the + allocated_size from the unnamed + $DATA attribute and for compressed + and/or sparse $DATA, this is the + compressed_size from the unnamed + $DATA attribute. For a directory or + other inode without an unnamed $DATA + attribute, this is always 0. NOTE: + This is a multiple of the cluster + size. */ +/* 30*/ sle64 data_size; /* Byte size of actual data in unnamed + data attribute. For a directory or + other inode without an unnamed $DATA + attribute, this is always 0. */ /* 38*/ FILE_ATTR_FLAGS file_attributes; /* Flags describing the file. */ /* 3c*/ union { /* 3c*/ struct { @@ -1425,42 +1435,42 @@ enum { /* Specific rights for files and directories are as follows: */ /* Right to read data from the file. (FILE) */ - FILE_READ_DATA = const_cpu_to_le32(0x00000001), + FILE_READ_DATA = cpu_to_le32(0x00000001), /* Right to list contents of a directory. (DIRECTORY) */ - FILE_LIST_DIRECTORY = const_cpu_to_le32(0x00000001), + FILE_LIST_DIRECTORY = cpu_to_le32(0x00000001), /* Right to write data to the file. (FILE) */ - FILE_WRITE_DATA = const_cpu_to_le32(0x00000002), + FILE_WRITE_DATA = cpu_to_le32(0x00000002), /* Right to create a file in the directory. (DIRECTORY) */ - FILE_ADD_FILE = const_cpu_to_le32(0x00000002), + FILE_ADD_FILE = cpu_to_le32(0x00000002), /* Right to append data to the file. (FILE) */ - FILE_APPEND_DATA = const_cpu_to_le32(0x00000004), + FILE_APPEND_DATA = cpu_to_le32(0x00000004), /* Right to create a subdirectory. (DIRECTORY) */ - FILE_ADD_SUBDIRECTORY = const_cpu_to_le32(0x00000004), + FILE_ADD_SUBDIRECTORY = cpu_to_le32(0x00000004), /* Right to read extended attributes. (FILE/DIRECTORY) */ - FILE_READ_EA = const_cpu_to_le32(0x00000008), + FILE_READ_EA = cpu_to_le32(0x00000008), /* Right to write extended attributes. (FILE/DIRECTORY) */ - FILE_WRITE_EA = const_cpu_to_le32(0x00000010), + FILE_WRITE_EA = cpu_to_le32(0x00000010), /* Right to execute a file. (FILE) */ - FILE_EXECUTE = const_cpu_to_le32(0x00000020), + FILE_EXECUTE = cpu_to_le32(0x00000020), /* Right to traverse the directory. (DIRECTORY) */ - FILE_TRAVERSE = const_cpu_to_le32(0x00000020), + FILE_TRAVERSE = cpu_to_le32(0x00000020), /* * Right to delete a directory and all the files it contains (its * children), even if the files are read-only. (DIRECTORY) */ - FILE_DELETE_CHILD = const_cpu_to_le32(0x00000040), + FILE_DELETE_CHILD = cpu_to_le32(0x00000040), /* Right to read file attributes. (FILE/DIRECTORY) */ - FILE_READ_ATTRIBUTES = const_cpu_to_le32(0x00000080), + FILE_READ_ATTRIBUTES = cpu_to_le32(0x00000080), /* Right to change file attributes. (FILE/DIRECTORY) */ - FILE_WRITE_ATTRIBUTES = const_cpu_to_le32(0x00000100), + FILE_WRITE_ATTRIBUTES = cpu_to_le32(0x00000100), /* * The standard rights (bits 16 to 23). These are independent of the @@ -1468,27 +1478,27 @@ enum { */ /* Right to delete the object. */ - DELETE = const_cpu_to_le32(0x00010000), + DELETE = cpu_to_le32(0x00010000), /* * Right to read the information in the object's security descriptor, * not including the information in the SACL, i.e. right to read the * security descriptor and owner. */ - READ_CONTROL = const_cpu_to_le32(0x00020000), + READ_CONTROL = cpu_to_le32(0x00020000), /* Right to modify the DACL in the object's security descriptor. */ - WRITE_DAC = const_cpu_to_le32(0x00040000), + WRITE_DAC = cpu_to_le32(0x00040000), /* Right to change the owner in the object's security descriptor. */ - WRITE_OWNER = const_cpu_to_le32(0x00080000), + WRITE_OWNER = cpu_to_le32(0x00080000), /* * Right to use the object for synchronization. Enables a process to * wait until the object is in the signalled state. Some object types * do not support this access right. */ - SYNCHRONIZE = const_cpu_to_le32(0x00100000), + SYNCHRONIZE = cpu_to_le32(0x00100000), /* * The following STANDARD_RIGHTS_* are combinations of the above for @@ -1496,25 +1506,25 @@ enum { */ /* These are currently defined to READ_CONTROL. */ - STANDARD_RIGHTS_READ = const_cpu_to_le32(0x00020000), - STANDARD_RIGHTS_WRITE = const_cpu_to_le32(0x00020000), - STANDARD_RIGHTS_EXECUTE = const_cpu_to_le32(0x00020000), + STANDARD_RIGHTS_READ = cpu_to_le32(0x00020000), + STANDARD_RIGHTS_WRITE = cpu_to_le32(0x00020000), + STANDARD_RIGHTS_EXECUTE = cpu_to_le32(0x00020000), /* Combines DELETE, READ_CONTROL, WRITE_DAC, and WRITE_OWNER access. */ - STANDARD_RIGHTS_REQUIRED = const_cpu_to_le32(0x000f0000), + STANDARD_RIGHTS_REQUIRED = cpu_to_le32(0x000f0000), /* * Combines DELETE, READ_CONTROL, WRITE_DAC, WRITE_OWNER, and * SYNCHRONIZE access. */ - STANDARD_RIGHTS_ALL = const_cpu_to_le32(0x001f0000), + STANDARD_RIGHTS_ALL = cpu_to_le32(0x001f0000), /* * The access system ACL and maximum allowed access types (bits 24 to * 25, bits 26 to 27 are reserved). */ - ACCESS_SYSTEM_SECURITY = const_cpu_to_le32(0x01000000), - MAXIMUM_ALLOWED = const_cpu_to_le32(0x02000000), + ACCESS_SYSTEM_SECURITY = cpu_to_le32(0x01000000), + MAXIMUM_ALLOWED = cpu_to_le32(0x02000000), /* * The generic rights (bits 28 to 31). These map onto the standard and @@ -1522,10 +1532,10 @@ enum { */ /* Read, write, and execute access. */ - GENERIC_ALL = const_cpu_to_le32(0x10000000), + GENERIC_ALL = cpu_to_le32(0x10000000), /* Execute access. */ - GENERIC_EXECUTE = const_cpu_to_le32(0x20000000), + GENERIC_EXECUTE = cpu_to_le32(0x20000000), /* * Write access. For files, this maps onto: @@ -1534,7 +1544,7 @@ enum { * For directories, the mapping has the same numerical value. See * above for the descriptions of the rights granted. */ - GENERIC_WRITE = const_cpu_to_le32(0x40000000), + GENERIC_WRITE = cpu_to_le32(0x40000000), /* * Read access. For files, this maps onto: @@ -1543,7 +1553,7 @@ enum { * For directories, the mapping has the same numberical value. See * above for the descriptions of the rights granted. */ - GENERIC_READ = const_cpu_to_le32(0x80000000), + GENERIC_READ = cpu_to_le32(0x80000000), }; typedef le32 ACCESS_MASK; @@ -1583,8 +1593,8 @@ typedef struct { * The object ACE flags (32-bit). */ enum { - ACE_OBJECT_TYPE_PRESENT = const_cpu_to_le32(1), - ACE_INHERITED_OBJECT_TYPE_PRESENT = const_cpu_to_le32(2), + ACE_OBJECT_TYPE_PRESENT = cpu_to_le32(1), + ACE_INHERITED_OBJECT_TYPE_PRESENT = cpu_to_le32(2), }; typedef le32 OBJECT_ACE_FLAGS; @@ -1647,13 +1657,13 @@ typedef enum { * pointed to by the Owner field was provided by a defaulting mechanism * rather than explicitly provided by the original provider of the * security descriptor. This may affect the treatment of the SID with - * respect to inheritence of an owner. + * respect to inheritance of an owner. * * SE_GROUP_DEFAULTED - This boolean flag, when set, indicates that the SID in * the Group field was provided by a defaulting mechanism rather than * explicitly provided by the original provider of the security * descriptor. This may affect the treatment of the SID with respect to - * inheritence of a primary group. + * inheritance of a primary group. * * SE_DACL_PRESENT - This boolean flag, when set, indicates that the security * descriptor contains a discretionary ACL. If this flag is set and the @@ -1664,7 +1674,7 @@ typedef enum { * pointed to by the Dacl field was provided by a defaulting mechanism * rather than explicitly provided by the original provider of the * security descriptor. This may affect the treatment of the ACL with - * respect to inheritence of an ACL. This flag is ignored if the + * respect to inheritance of an ACL. This flag is ignored if the * DaclPresent flag is not set. * * SE_SACL_PRESENT - This boolean flag, when set, indicates that the security @@ -1676,7 +1686,7 @@ typedef enum { * pointed to by the Sacl field was provided by a defaulting mechanism * rather than explicitly provided by the original provider of the * security descriptor. This may affect the treatment of the ACL with - * respect to inheritence of an ACL. This flag is ignored if the + * respect to inheritance of an ACL. This flag is ignored if the * SaclPresent flag is not set. * * SE_SELF_RELATIVE - This boolean flag, when set, indicates that the security @@ -1685,23 +1695,23 @@ typedef enum { * expressed as offsets from the beginning of the security descriptor. */ enum { - SE_OWNER_DEFAULTED = const_cpu_to_le16(0x0001), - SE_GROUP_DEFAULTED = const_cpu_to_le16(0x0002), - SE_DACL_PRESENT = const_cpu_to_le16(0x0004), - SE_DACL_DEFAULTED = const_cpu_to_le16(0x0008), - - SE_SACL_PRESENT = const_cpu_to_le16(0x0010), - SE_SACL_DEFAULTED = const_cpu_to_le16(0x0020), - - SE_DACL_AUTO_INHERIT_REQ = const_cpu_to_le16(0x0100), - SE_SACL_AUTO_INHERIT_REQ = const_cpu_to_le16(0x0200), - SE_DACL_AUTO_INHERITED = const_cpu_to_le16(0x0400), - SE_SACL_AUTO_INHERITED = const_cpu_to_le16(0x0800), - - SE_DACL_PROTECTED = const_cpu_to_le16(0x1000), - SE_SACL_PROTECTED = const_cpu_to_le16(0x2000), - SE_RM_CONTROL_VALID = const_cpu_to_le16(0x4000), - SE_SELF_RELATIVE = const_cpu_to_le16(0x8000) + SE_OWNER_DEFAULTED = cpu_to_le16(0x0001), + SE_GROUP_DEFAULTED = cpu_to_le16(0x0002), + SE_DACL_PRESENT = cpu_to_le16(0x0004), + SE_DACL_DEFAULTED = cpu_to_le16(0x0008), + + SE_SACL_PRESENT = cpu_to_le16(0x0010), + SE_SACL_DEFAULTED = cpu_to_le16(0x0020), + + SE_DACL_AUTO_INHERIT_REQ = cpu_to_le16(0x0100), + SE_SACL_AUTO_INHERIT_REQ = cpu_to_le16(0x0200), + SE_DACL_AUTO_INHERITED = cpu_to_le16(0x0400), + SE_SACL_AUTO_INHERITED = cpu_to_le16(0x0800), + + SE_DACL_PROTECTED = cpu_to_le16(0x1000), + SE_SACL_PROTECTED = cpu_to_le16(0x2000), + SE_RM_CONTROL_VALID = cpu_to_le16(0x4000), + SE_SELF_RELATIVE = cpu_to_le16(0x8000) } __attribute__ ((__packed__)); typedef le16 SECURITY_DESCRIPTOR_CONTROL; @@ -1889,20 +1899,21 @@ typedef struct { * Possible flags for the volume (16-bit). */ enum { - VOLUME_IS_DIRTY = const_cpu_to_le16(0x0001), - VOLUME_RESIZE_LOG_FILE = const_cpu_to_le16(0x0002), - VOLUME_UPGRADE_ON_MOUNT = const_cpu_to_le16(0x0004), - VOLUME_MOUNTED_ON_NT4 = const_cpu_to_le16(0x0008), + VOLUME_IS_DIRTY = cpu_to_le16(0x0001), + VOLUME_RESIZE_LOG_FILE = cpu_to_le16(0x0002), + VOLUME_UPGRADE_ON_MOUNT = cpu_to_le16(0x0004), + VOLUME_MOUNTED_ON_NT4 = cpu_to_le16(0x0008), - VOLUME_DELETE_USN_UNDERWAY = const_cpu_to_le16(0x0010), - VOLUME_REPAIR_OBJECT_ID = const_cpu_to_le16(0x0020), + VOLUME_DELETE_USN_UNDERWAY = cpu_to_le16(0x0010), + VOLUME_REPAIR_OBJECT_ID = cpu_to_le16(0x0020), - VOLUME_MODIFIED_BY_CHKDSK = const_cpu_to_le16(0x8000), + VOLUME_CHKDSK_UNDERWAY = cpu_to_le16(0x4000), + VOLUME_MODIFIED_BY_CHKDSK = cpu_to_le16(0x8000), - VOLUME_FLAGS_MASK = const_cpu_to_le16(0x803f), + VOLUME_FLAGS_MASK = cpu_to_le16(0xc03f), /* To make our life easier when checking if we must mount read-only. */ - VOLUME_MUST_MOUNT_RO_MASK = const_cpu_to_le16(0x8027), + VOLUME_MUST_MOUNT_RO_MASK = cpu_to_le16(0xc027), } __attribute__ ((__packed__)); typedef le16 VOLUME_FLAGS; @@ -1997,14 +2008,14 @@ typedef struct { * * When a directory is small enough to fit inside the index root then this * is the only attribute describing the directory. When the directory is too - * large to fit in the index root, on the other hand, two aditional attributes + * large to fit in the index root, on the other hand, two additional attributes * are present: an index allocation attribute, containing sub-nodes of the B+ * directory tree (see below), and a bitmap attribute, describing which virtual * cluster numbers (vcns) in the index allocation attribute are in use by an * index block. * * NOTE: The root directory (FILE_root) contains an entry for itself. Other - * dircetories do not contain entries for themselves, though. + * directories do not contain entries for themselves, though. */ typedef struct { ATTR_TYPE type; /* Type of the indexed attribute. Is @@ -2087,26 +2098,26 @@ typedef struct { * The user quota flags. Names explain meaning. */ enum { - QUOTA_FLAG_DEFAULT_LIMITS = const_cpu_to_le32(0x00000001), - QUOTA_FLAG_LIMIT_REACHED = const_cpu_to_le32(0x00000002), - QUOTA_FLAG_ID_DELETED = const_cpu_to_le32(0x00000004), + QUOTA_FLAG_DEFAULT_LIMITS = cpu_to_le32(0x00000001), + QUOTA_FLAG_LIMIT_REACHED = cpu_to_le32(0x00000002), + QUOTA_FLAG_ID_DELETED = cpu_to_le32(0x00000004), - QUOTA_FLAG_USER_MASK = const_cpu_to_le32(0x00000007), + QUOTA_FLAG_USER_MASK = cpu_to_le32(0x00000007), /* This is a bit mask for the user quota flags. */ /* * These flags are only present in the quota defaults index entry, i.e. * in the entry where owner_id = QUOTA_DEFAULTS_ID. */ - QUOTA_FLAG_TRACKING_ENABLED = const_cpu_to_le32(0x00000010), - QUOTA_FLAG_ENFORCEMENT_ENABLED = const_cpu_to_le32(0x00000020), - QUOTA_FLAG_TRACKING_REQUESTED = const_cpu_to_le32(0x00000040), - QUOTA_FLAG_LOG_THRESHOLD = const_cpu_to_le32(0x00000080), - - QUOTA_FLAG_LOG_LIMIT = const_cpu_to_le32(0x00000100), - QUOTA_FLAG_OUT_OF_DATE = const_cpu_to_le32(0x00000200), - QUOTA_FLAG_CORRUPT = const_cpu_to_le32(0x00000400), - QUOTA_FLAG_PENDING_DELETES = const_cpu_to_le32(0x00000800), + QUOTA_FLAG_TRACKING_ENABLED = cpu_to_le32(0x00000010), + QUOTA_FLAG_ENFORCEMENT_ENABLED = cpu_to_le32(0x00000020), + QUOTA_FLAG_TRACKING_REQUESTED = cpu_to_le32(0x00000040), + QUOTA_FLAG_LOG_THRESHOLD = cpu_to_le32(0x00000080), + + QUOTA_FLAG_LOG_LIMIT = cpu_to_le32(0x00000100), + QUOTA_FLAG_OUT_OF_DATE = cpu_to_le32(0x00000200), + QUOTA_FLAG_CORRUPT = cpu_to_le32(0x00000400), + QUOTA_FLAG_PENDING_DELETES = cpu_to_le32(0x00000800), }; typedef le32 QUOTA_FLAGS; @@ -2150,9 +2161,9 @@ typedef struct { * Predefined owner_id values (32-bit). */ enum { - QUOTA_INVALID_ID = const_cpu_to_le32(0x00000000), - QUOTA_DEFAULTS_ID = const_cpu_to_le32(0x00000001), - QUOTA_FIRST_USER_ID = const_cpu_to_le32(0x00000100), + QUOTA_INVALID_ID = cpu_to_le32(0x00000000), + QUOTA_DEFAULTS_ID = cpu_to_le32(0x00000001), + QUOTA_FIRST_USER_ID = cpu_to_le32(0x00000100), }; /* @@ -2167,14 +2178,14 @@ typedef enum { * Index entry flags (16-bit). */ enum { - INDEX_ENTRY_NODE = const_cpu_to_le16(1), /* This entry contains a + INDEX_ENTRY_NODE = cpu_to_le16(1), /* This entry contains a sub-node, i.e. a reference to an index block in form of a virtual cluster number (see below). */ - INDEX_ENTRY_END = const_cpu_to_le16(2), /* This signifies the last + INDEX_ENTRY_END = cpu_to_le16(2), /* This signifies the last entry in an index block. The index entry does not represent a file but it can point to a sub-node. */ - INDEX_ENTRY_SPACE_FILLER = const_cpu_to_le16(0xffff), /* gcc: Force + INDEX_ENTRY_SPACE_FILLER = cpu_to_le16(0xffff), /* gcc: Force enum bit width to 16-bit. */ } __attribute__ ((__packed__)); @@ -2272,7 +2283,7 @@ typedef struct { // the key_length is zero, then the vcn immediately // follows the INDEX_ENTRY_HEADER. Regardless of // key_length, the address of the 8-byte boundary - // alligned vcn of INDEX_ENTRY{_HEADER} *ie is given by + // aligned vcn of INDEX_ENTRY{_HEADER} *ie is given by // (char*)ie + le16_to_cpu(ie*)->length) - sizeof(VCN), // where sizeof(VCN) can be hardcoded as 8 if wanted. */ } __attribute__ ((__packed__)) INDEX_ENTRY; @@ -2312,26 +2323,26 @@ typedef struct { * These are the predefined reparse point tags: */ enum { - IO_REPARSE_TAG_IS_ALIAS = const_cpu_to_le32(0x20000000), - IO_REPARSE_TAG_IS_HIGH_LATENCY = const_cpu_to_le32(0x40000000), - IO_REPARSE_TAG_IS_MICROSOFT = const_cpu_to_le32(0x80000000), + IO_REPARSE_TAG_IS_ALIAS = cpu_to_le32(0x20000000), + IO_REPARSE_TAG_IS_HIGH_LATENCY = cpu_to_le32(0x40000000), + IO_REPARSE_TAG_IS_MICROSOFT = cpu_to_le32(0x80000000), - IO_REPARSE_TAG_RESERVED_ZERO = const_cpu_to_le32(0x00000000), - IO_REPARSE_TAG_RESERVED_ONE = const_cpu_to_le32(0x00000001), - IO_REPARSE_TAG_RESERVED_RANGE = const_cpu_to_le32(0x00000001), + IO_REPARSE_TAG_RESERVED_ZERO = cpu_to_le32(0x00000000), + IO_REPARSE_TAG_RESERVED_ONE = cpu_to_le32(0x00000001), + IO_REPARSE_TAG_RESERVED_RANGE = cpu_to_le32(0x00000001), - IO_REPARSE_TAG_NSS = const_cpu_to_le32(0x68000005), - IO_REPARSE_TAG_NSS_RECOVER = const_cpu_to_le32(0x68000006), - IO_REPARSE_TAG_SIS = const_cpu_to_le32(0x68000007), - IO_REPARSE_TAG_DFS = const_cpu_to_le32(0x68000008), + IO_REPARSE_TAG_NSS = cpu_to_le32(0x68000005), + IO_REPARSE_TAG_NSS_RECOVER = cpu_to_le32(0x68000006), + IO_REPARSE_TAG_SIS = cpu_to_le32(0x68000007), + IO_REPARSE_TAG_DFS = cpu_to_le32(0x68000008), - IO_REPARSE_TAG_MOUNT_POINT = const_cpu_to_le32(0x88000003), + IO_REPARSE_TAG_MOUNT_POINT = cpu_to_le32(0x88000003), - IO_REPARSE_TAG_HSM = const_cpu_to_le32(0xa8000004), + IO_REPARSE_TAG_HSM = cpu_to_le32(0xa8000004), - IO_REPARSE_TAG_SYMBOLIC_LINK = const_cpu_to_le32(0xe8000000), + IO_REPARSE_TAG_SYMBOLIC_LINK = cpu_to_le32(0xe8000000), - IO_REPARSE_TAG_VALID_VALUES = const_cpu_to_le32(0xe000ffff), + IO_REPARSE_TAG_VALID_VALUES = cpu_to_le32(0xe000ffff), }; /* @@ -2367,7 +2378,9 @@ typedef struct { * Extended attribute flags (8-bit). */ enum { - NEED_EA = 0x80 + NEED_EA = 0x80 /* If set the file to which the EA belongs + cannot be interpreted without understanding + the associates extended attributes. */ } __attribute__ ((__packed__)); typedef u8 EA_FLAGS; @@ -2375,20 +2388,20 @@ typedef u8 EA_FLAGS; /* * Attribute: Extended attribute (EA) (0xe0). * - * NOTE: Always non-resident. (Is this true?) + * NOTE: Can be resident or non-resident. * * Like the attribute list and the index buffer list, the EA attribute value is * a sequence of EA_ATTR variable length records. - * - * FIXME: It appears weird that the EA name is not unicode. Is it true? */ typedef struct { le32 next_entry_offset; /* Offset to the next EA_ATTR. */ EA_FLAGS flags; /* Flags describing the EA. */ - u8 ea_name_length; /* Length of the name of the EA in bytes. */ + u8 ea_name_length; /* Length of the name of the EA in bytes + excluding the '\0' byte terminator. */ le16 ea_value_length; /* Byte size of the EA's value. */ - u8 ea_name[0]; /* Name of the EA. */ - u8 ea_value[0]; /* The value of the EA. Immediately follows + u8 ea_name[0]; /* Name of the EA. Note this is ASCII, not + Unicode and it is zero terminated. */ + u8 ea_value[0]; /* The value of the EA. Immediately follows the name. */ } __attribute__ ((__packed__)) EA_ATTR; diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c index 5af3bf0b7ee..1711b710b64 100644 --- a/fs/ntfs/lcnalloc.c +++ b/fs/ntfs/lcnalloc.c @@ -76,6 +76,7 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, * @count: number of clusters to allocate * @start_lcn: starting lcn at which to allocate the clusters (or -1 if none) * @zone: zone from which to allocate the clusters + * @is_extension: if 'true', this is an attribute extension * * Allocate @count clusters preferably starting at cluster @start_lcn or at the * current allocator position if @start_lcn is -1, on the mounted ntfs volume @@ -86,6 +87,13 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, * @start_vcn specifies the vcn of the first allocated cluster. This makes * merging the resulting runlist with the old runlist easier. * + * If @is_extension is 'true', the caller is allocating clusters to extend an + * attribute and if it is 'false', the caller is allocating clusters to fill a + * hole in an attribute. Practically the difference is that if @is_extension + * is 'true' the returned runlist will be terminated with LCN_ENOENT and if + * @is_extension is 'false' the runlist will be terminated with + * LCN_RL_NOT_MAPPED. + * * You need to check the return value with IS_ERR(). If this is false, the * function was successful and the return value is a runlist describing the * allocated cluster(s). If IS_ERR() is true, the function failed and @@ -137,7 +145,8 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, */ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn, const s64 count, const LCN start_lcn, - const NTFS_CLUSTER_ALLOCATION_ZONES zone) + const NTFS_CLUSTER_ALLOCATION_ZONES zone, + const bool is_extension) { LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn; LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size; @@ -310,7 +319,7 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn, continue; } bit = 1 << (lcn & 7); - ntfs_debug("bit %i.", bit); + ntfs_debug("bit 0x%x.", bit); /* If the bit is already set, go onto the next one. */ if (*byte & bit) { lcn++; @@ -729,7 +738,7 @@ out: /* Add runlist terminator element. */ if (likely(rl)) { rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length; - rl[rlpos].lcn = LCN_RL_NOT_MAPPED; + rl[rlpos].lcn = is_extension ? LCN_ENOENT : LCN_RL_NOT_MAPPED; rl[rlpos].length = 0; } if (likely(page && !IS_ERR(page))) { @@ -782,6 +791,7 @@ out: * @ni: ntfs inode whose runlist describes the clusters to free * @start_vcn: vcn in the runlist of @ni at which to start freeing clusters * @count: number of clusters to free or -1 for all clusters + * @ctx: active attribute search context if present or NULL if not * @is_rollback: true if this is a rollback operation * * Free @count clusters starting at the cluster @start_vcn in the runlist @@ -791,15 +801,39 @@ out: * deallocated. Thus, to completely free all clusters in a runlist, use * @start_vcn = 0 and @count = -1. * - * @is_rollback should always be FALSE, it is for internal use to rollback + * If @ctx is specified, it is an active search context of @ni and its base mft + * record. This is needed when __ntfs_cluster_free() encounters unmapped + * runlist fragments and allows their mapping. If you do not have the mft + * record mapped, you can specify @ctx as NULL and __ntfs_cluster_free() will + * perform the necessary mapping and unmapping. + * + * Note, __ntfs_cluster_free() saves the state of @ctx on entry and restores it + * before returning. Thus, @ctx will be left pointing to the same attribute on + * return as on entry. However, the actual pointers in @ctx may point to + * different memory locations on return, so you must remember to reset any + * cached pointers from the @ctx, i.e. after the call to __ntfs_cluster_free(), + * you will probably want to do: + * m = ctx->mrec; + * a = ctx->attr; + * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that + * you cache ctx->mrec in a variable @m of type MFT_RECORD *. + * + * @is_rollback should always be 'false', it is for internal use to rollback * errors. You probably want to use ntfs_cluster_free() instead. * - * Note, ntfs_cluster_free() does not modify the runlist at all, so the caller - * has to deal with it later. + * Note, __ntfs_cluster_free() does not modify the runlist, so you have to + * remove from the runlist or mark sparse the freed runs later. * * Return the number of deallocated clusters (not counting sparse ones) on * success and -errno on error. * + * WARNING: If @ctx is supplied, regardless of whether success or failure is + * returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx + * is no longer valid, i.e. you need to either call + * ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it. + * In that case PTR_ERR(@ctx->mrec) will give you the error code for + * why the mapping of the old inode failed. + * * Locking: - The runlist described by @ni must be locked for writing on entry * and is locked on return. Note the runlist may be modified when * needed runlist fragments need to be mapped. @@ -807,9 +841,13 @@ out: * on return. * - This function takes the volume lcn bitmap lock for writing and * modifies the bitmap contents. + * - If @ctx is NULL, the base mft record of @ni must not be mapped on + * entry and it will be left unmapped on return. + * - If @ctx is not NULL, the base mft record must be mapped on entry + * and it will be left mapped on return. */ s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count, - const BOOL is_rollback) + ntfs_attr_search_ctx *ctx, const bool is_rollback) { s64 delta, to_free, total_freed, real_freed; ntfs_volume *vol; @@ -839,7 +877,7 @@ s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count, total_freed = real_freed = 0; - rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, TRUE); + rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, ctx); if (IS_ERR(rl)) { if (!is_rollback) ntfs_error(vol->sb, "Failed to find first runlist " @@ -893,7 +931,7 @@ s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count, /* Attempt to map runlist. */ vcn = rl->vcn; - rl = ntfs_attr_find_vcn_nolock(ni, vcn, TRUE); + rl = ntfs_attr_find_vcn_nolock(ni, vcn, ctx); if (IS_ERR(rl)) { err = PTR_ERR(rl); if (!is_rollback) @@ -961,7 +999,7 @@ err_out: * If rollback fails, set the volume errors flag, emit an error * message, and return the error code. */ - delta = __ntfs_cluster_free(ni, start_vcn, total_freed, TRUE); + delta = __ntfs_cluster_free(ni, start_vcn, total_freed, ctx, true); if (delta < 0) { ntfs_error(vol->sb, "Failed to rollback (error %i). Leaving " "inconsistent metadata! Unmount and run " diff --git a/fs/ntfs/lcnalloc.h b/fs/ntfs/lcnalloc.h index a6a8827882e..2adb0431694 100644 --- a/fs/ntfs/lcnalloc.h +++ b/fs/ntfs/lcnalloc.h @@ -27,6 +27,7 @@ #include <linux/fs.h> +#include "attrib.h" #include "types.h" #include "inode.h" #include "runlist.h" @@ -41,16 +42,18 @@ typedef enum { extern runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn, const s64 count, const LCN start_lcn, - const NTFS_CLUSTER_ALLOCATION_ZONES zone); + const NTFS_CLUSTER_ALLOCATION_ZONES zone, + const bool is_extension); extern s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, - s64 count, const BOOL is_rollback); + s64 count, ntfs_attr_search_ctx *ctx, const bool is_rollback); /** * ntfs_cluster_free - free clusters on an ntfs volume * @ni: ntfs inode whose runlist describes the clusters to free * @start_vcn: vcn in the runlist of @ni at which to start freeing clusters * @count: number of clusters to free or -1 for all clusters + * @ctx: active attribute search context if present or NULL if not * * Free @count clusters starting at the cluster @start_vcn in the runlist * described by the ntfs inode @ni. @@ -59,12 +62,36 @@ extern s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, * deallocated. Thus, to completely free all clusters in a runlist, use * @start_vcn = 0 and @count = -1. * - * Note, ntfs_cluster_free() does not modify the runlist at all, so the caller - * has to deal with it later. + * If @ctx is specified, it is an active search context of @ni and its base mft + * record. This is needed when ntfs_cluster_free() encounters unmapped runlist + * fragments and allows their mapping. If you do not have the mft record + * mapped, you can specify @ctx as NULL and ntfs_cluster_free() will perform + * the necessary mapping and unmapping. + * + * Note, ntfs_cluster_free() saves the state of @ctx on entry and restores it + * before returning. Thus, @ctx will be left pointing to the same attribute on + * return as on entry. However, the actual pointers in @ctx may point to + * different memory locations on return, so you must remember to reset any + * cached pointers from the @ctx, i.e. after the call to ntfs_cluster_free(), + * you will probably want to do: + * m = ctx->mrec; + * a = ctx->attr; + * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that + * you cache ctx->mrec in a variable @m of type MFT_RECORD *. + * + * Note, ntfs_cluster_free() does not modify the runlist, so you have to remove + * from the runlist or mark sparse the freed runs later. * * Return the number of deallocated clusters (not counting sparse ones) on * success and -errno on error. * + * WARNING: If @ctx is supplied, regardless of whether success or failure is + * returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx + * is no longer valid, i.e. you need to either call + * ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it. + * In that case PTR_ERR(@ctx->mrec) will give you the error code for + * why the mapping of the old inode failed. + * * Locking: - The runlist described by @ni must be locked for writing on entry * and is locked on return. Note the runlist may be modified when * needed runlist fragments need to be mapped. @@ -72,11 +99,15 @@ extern s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, * on return. * - This function takes the volume lcn bitmap lock for writing and * modifies the bitmap contents. + * - If @ctx is NULL, the base mft record of @ni must not be mapped on + * entry and it will be left unmapped on return. + * - If @ctx is not NULL, the base mft record must be mapped on entry + * and it will be left mapped on return. */ static inline s64 ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, - s64 count) + s64 count, ntfs_attr_search_ctx *ctx) { - return __ntfs_cluster_free(ni, start_vcn, count, FALSE); + return __ntfs_cluster_free(ni, start_vcn, count, ctx, false); } extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol, diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c index 0fd70295cca..c71de292c5a 100644 --- a/fs/ntfs/logfile.c +++ b/fs/ntfs/logfile.c @@ -1,7 +1,7 @@ /* * logfile.c - NTFS kernel journal handling. Part of the Linux-NTFS project. * - * Copyright (c) 2002-2005 Anton Altaparmakov + * Copyright (c) 2002-2007 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -26,6 +26,7 @@ #include <linux/highmem.h> #include <linux/buffer_head.h> #include <linux/bitops.h> +#include <linux/log2.h> #include "attrib.h" #include "aops.h" @@ -41,18 +42,18 @@ * @rp: restart page header to check * @pos: position in @vi at which the restart page header resides * - * Check the restart page header @rp for consistency and return TRUE if it is - * consistent and FALSE otherwise. + * Check the restart page header @rp for consistency and return 'true' if it is + * consistent and 'false' otherwise. * * This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not * require the full restart page. */ -static BOOL ntfs_check_restart_page_header(struct inode *vi, +static bool ntfs_check_restart_page_header(struct inode *vi, RESTART_PAGE_HEADER *rp, s64 pos) { u32 logfile_system_page_size, logfile_log_page_size; u16 ra_ofs, usa_count, usa_ofs, usa_end = 0; - BOOL have_usa = TRUE; + bool have_usa = true; ntfs_debug("Entering."); /* @@ -65,9 +66,9 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi, logfile_log_page_size < NTFS_BLOCK_SIZE || logfile_system_page_size & (logfile_system_page_size - 1) || - logfile_log_page_size & (logfile_log_page_size - 1)) { + !is_power_of_2(logfile_log_page_size)) { ntfs_error(vi->i_sb, "$LogFile uses unsupported page size."); - return FALSE; + return false; } /* * We must be either at !pos (1st restart page) or at pos = system page @@ -76,7 +77,7 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi, if (pos && pos != logfile_system_page_size) { ntfs_error(vi->i_sb, "Found restart area in incorrect " "position in $LogFile."); - return FALSE; + return false; } /* We only know how to handle version 1.1. */ if (sle16_to_cpu(rp->major_ver) != 1 || @@ -85,14 +86,14 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi, "supported. (This driver supports version " "1.1 only.)", (int)sle16_to_cpu(rp->major_ver), (int)sle16_to_cpu(rp->minor_ver)); - return FALSE; + return false; } /* * If chkdsk has been run the restart page may not be protected by an * update sequence array. */ if (ntfs_is_chkd_record(rp->magic) && !le16_to_cpu(rp->usa_count)) { - have_usa = FALSE; + have_usa = false; goto skip_usa_checks; } /* Verify the size of the update sequence array. */ @@ -100,7 +101,7 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi, if (usa_count != le16_to_cpu(rp->usa_count)) { ntfs_error(vi->i_sb, "$LogFile restart page specifies " "inconsistent update sequence array count."); - return FALSE; + return false; } /* Verify the position of the update sequence array. */ usa_ofs = le16_to_cpu(rp->usa_ofs); @@ -109,7 +110,7 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi, usa_end > NTFS_BLOCK_SIZE - sizeof(u16)) { ntfs_error(vi->i_sb, "$LogFile restart page specifies " "inconsistent update sequence array offset."); - return FALSE; + return false; } skip_usa_checks: /* @@ -124,7 +125,7 @@ skip_usa_checks: ra_ofs > logfile_system_page_size) { ntfs_error(vi->i_sb, "$LogFile restart page specifies " "inconsistent restart area offset."); - return FALSE; + return false; } /* * Only restart pages modified by chkdsk are allowed to have chkdsk_lsn @@ -133,10 +134,10 @@ skip_usa_checks: if (!ntfs_is_chkd_record(rp->magic) && sle64_to_cpu(rp->chkdsk_lsn)) { ntfs_error(vi->i_sb, "$LogFile restart page is not modified " "by chkdsk but a chkdsk LSN is specified."); - return FALSE; + return false; } ntfs_debug("Done."); - return TRUE; + return true; } /** @@ -145,7 +146,7 @@ skip_usa_checks: * @rp: restart page whose restart area to check * * Check the restart area of the restart page @rp for consistency and return - * TRUE if it is consistent and FALSE otherwise. + * 'true' if it is consistent and 'false' otherwise. * * This function assumes that the restart page header has already been * consistency checked. @@ -153,7 +154,7 @@ skip_usa_checks: * This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not * require the full restart page. */ -static BOOL ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp) +static bool ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp) { u64 file_size; RESTART_AREA *ra; @@ -172,7 +173,7 @@ static BOOL ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp) NTFS_BLOCK_SIZE - sizeof(u16)) { ntfs_error(vi->i_sb, "$LogFile restart area specifies " "inconsistent file offset."); - return FALSE; + return false; } /* * Now that we can access ra->client_array_offset, make sure everything @@ -186,7 +187,7 @@ static BOOL ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp) ra_ofs + ca_ofs > NTFS_BLOCK_SIZE - sizeof(u16)) { ntfs_error(vi->i_sb, "$LogFile restart area specifies " "inconsistent client array offset."); - return FALSE; + return false; } /* * The restart area must end within the system page size both when @@ -203,7 +204,7 @@ static BOOL ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp) "of the system page size specified by the " "restart page header and/or the specified " "restart area length is inconsistent."); - return FALSE; + return false; } /* * The ra->client_free_list and ra->client_in_use_list must be either @@ -218,7 +219,7 @@ static BOOL ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp) le16_to_cpu(ra->log_clients))) { ntfs_error(vi->i_sb, "$LogFile restart area specifies " "overflowing client free and/or in use lists."); - return FALSE; + return false; } /* * Check ra->seq_number_bits against ra->file_size for consistency. @@ -233,24 +234,24 @@ static BOOL ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp) if (le32_to_cpu(ra->seq_number_bits) != 67 - fs_bits) { ntfs_error(vi->i_sb, "$LogFile restart area specifies " "inconsistent sequence number bits."); - return FALSE; + return false; } /* The log record header length must be a multiple of 8. */ if (((le16_to_cpu(ra->log_record_header_length) + 7) & ~7) != le16_to_cpu(ra->log_record_header_length)) { ntfs_error(vi->i_sb, "$LogFile restart area specifies " "inconsistent log record header length."); - return FALSE; + return false; } /* Dito for the log page data offset. */ if (((le16_to_cpu(ra->log_page_data_offset) + 7) & ~7) != le16_to_cpu(ra->log_page_data_offset)) { ntfs_error(vi->i_sb, "$LogFile restart area specifies " "inconsistent log page data offset."); - return FALSE; + return false; } ntfs_debug("Done."); - return TRUE; + return true; } /** @@ -259,7 +260,7 @@ static BOOL ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp) * @rp: restart page whose log client array to check * * Check the log client array of the restart page @rp for consistency and - * return TRUE if it is consistent and FALSE otherwise. + * return 'true' if it is consistent and 'false' otherwise. * * This function assumes that the restart page header and the restart area have * already been consistency checked. @@ -268,13 +269,13 @@ static BOOL ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp) * function needs @rp->system_page_size bytes in @rp, i.e. it requires the full * restart page and the page must be multi sector transfer deprotected. */ -static BOOL ntfs_check_log_client_array(struct inode *vi, +static bool ntfs_check_log_client_array(struct inode *vi, RESTART_PAGE_HEADER *rp) { RESTART_AREA *ra; LOG_CLIENT_RECORD *ca, *cr; u16 nr_clients, idx; - BOOL in_free_list, idx_is_first; + bool in_free_list, idx_is_first; ntfs_debug("Entering."); ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset)); @@ -290,9 +291,9 @@ static BOOL ntfs_check_log_client_array(struct inode *vi, */ nr_clients = le16_to_cpu(ra->log_clients); idx = le16_to_cpu(ra->client_free_list); - in_free_list = TRUE; + in_free_list = true; check_list: - for (idx_is_first = TRUE; idx != LOGFILE_NO_CLIENT_CPU; nr_clients--, + for (idx_is_first = true; idx != LOGFILE_NO_CLIENT_CPU; nr_clients--, idx = le16_to_cpu(cr->next_client)) { if (!nr_clients || idx >= le16_to_cpu(ra->log_clients)) goto err_out; @@ -302,20 +303,20 @@ check_list: if (idx_is_first) { if (cr->prev_client != LOGFILE_NO_CLIENT) goto err_out; - idx_is_first = FALSE; + idx_is_first = false; } } /* Switch to and check the in use list if we just did the free list. */ if (in_free_list) { - in_free_list = FALSE; + in_free_list = false; idx = le16_to_cpu(ra->client_in_use_list); goto check_list; } ntfs_debug("Done."); - return TRUE; + return true; err_out: ntfs_error(vi->i_sb, "$LogFile log client array is corrupt."); - return FALSE; + return false; } /** @@ -337,7 +338,7 @@ err_out: * copy of the complete multi sector transfer deprotected page. On failure, * *@wrp is undefined. * - * Simillarly, if @lsn is not NULL, on succes *@lsn will be set to the current + * Simillarly, if @lsn is not NULL, on success *@lsn will be set to the current * logfile lsn according to this restart page. On failure, *@lsn is undefined. * * The following error codes are defined: @@ -468,8 +469,8 @@ err_out: * @log_vi: struct inode of loaded journal $LogFile to check * @rp: [OUT] on success this is a copy of the current restart page * - * Check the $LogFile journal for consistency and return TRUE if it is - * consistent and FALSE if not. On success, the current restart page is + * Check the $LogFile journal for consistency and return 'true' if it is + * consistent and 'false' if not. On success, the current restart page is * returned in *@rp. Caller must call ntfs_free(*@rp) when finished with it. * * At present we only check the two restart pages and ignore the log record @@ -480,7 +481,7 @@ err_out: * if the $LogFile was created on a system with a different page size to ours * yet and mst deprotection would fail if our page size is smaller. */ -BOOL ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp) +bool ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp) { s64 size, pos; LSN rstr1_lsn, rstr2_lsn; @@ -491,7 +492,7 @@ BOOL ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp) RESTART_PAGE_HEADER *rstr1_ph = NULL; RESTART_PAGE_HEADER *rstr2_ph = NULL; int log_page_size, log_page_mask, err; - BOOL logfile_is_empty = TRUE; + bool logfile_is_empty = true; u8 log_page_bits; ntfs_debug("Entering."); @@ -515,10 +516,10 @@ BOOL ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp) log_page_size = PAGE_CACHE_SIZE; log_page_mask = log_page_size - 1; /* - * Use generic_ffs() instead of ffs() to enable the compiler to + * Use ntfs_ffs() instead of ffs() to enable the compiler to * optimize log_page_size and log_page_bits into constants. */ - log_page_bits = generic_ffs(log_page_size) - 1; + log_page_bits = ntfs_ffs(log_page_size) - 1; size &= ~(s64)(log_page_size - 1); /* * Ensure the log file is big enough to store at least the two restart @@ -527,7 +528,7 @@ BOOL ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp) if (size < log_page_size * 2 || (size - log_page_size * 2) >> log_page_bits < MinLogRecordPages) { ntfs_error(vol->sb, "$LogFile is too small."); - return FALSE; + return false; } /* * Read through the file looking for a restart page. Since the restart @@ -556,7 +557,7 @@ BOOL ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp) * means we are done. */ if (!ntfs_is_empty_recordp((le32*)kaddr)) - logfile_is_empty = FALSE; + logfile_is_empty = false; else if (!logfile_is_empty) break; /* @@ -615,13 +616,13 @@ BOOL ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp) NVolSetLogFileEmpty(vol); is_empty: ntfs_debug("Done. ($LogFile is empty.)"); - return TRUE; + return true; } if (!rstr1_ph) { BUG_ON(rstr2_ph); ntfs_error(vol->sb, "Did not find any restart pages in " "$LogFile and it was not empty."); - return FALSE; + return false; } /* If both restart pages were found, use the more recent one. */ if (rstr2_ph) { @@ -648,11 +649,11 @@ is_empty: else ntfs_free(rstr1_ph); ntfs_debug("Done."); - return TRUE; + return true; err_out: if (rstr1_ph) ntfs_free(rstr1_ph); - return FALSE; + return false; } /** @@ -660,22 +661,22 @@ err_out: * @log_vi: struct inode of loaded journal $LogFile to check * @rp: copy of the current restart page * - * Analyze the $LogFile journal and return TRUE if it indicates the volume was - * shutdown cleanly and FALSE if not. + * Analyze the $LogFile journal and return 'true' if it indicates the volume was + * shutdown cleanly and 'false' if not. * * At present we only look at the two restart pages and ignore the log record * pages. This is a little bit crude in that there will be a very small number * of cases where we think that a volume is dirty when in fact it is clean. * This should only affect volumes that have not been shutdown cleanly but did * not have any pending, non-check-pointed i/o, i.e. they were completely idle - * at least for the five seconds preceeding the unclean shutdown. + * at least for the five seconds preceding the unclean shutdown. * * This function assumes that the $LogFile journal has already been consistency * checked by a call to ntfs_check_logfile() and in particular if the $LogFile * is empty this function requires that NVolLogFileEmpty() is true otherwise an * empty volume will be reported as dirty. */ -BOOL ntfs_is_logfile_clean(struct inode *log_vi, const RESTART_PAGE_HEADER *rp) +bool ntfs_is_logfile_clean(struct inode *log_vi, const RESTART_PAGE_HEADER *rp) { ntfs_volume *vol = NTFS_SB(log_vi->i_sb); RESTART_AREA *ra; @@ -684,7 +685,7 @@ BOOL ntfs_is_logfile_clean(struct inode *log_vi, const RESTART_PAGE_HEADER *rp) /* An empty $LogFile must have been clean before it got emptied. */ if (NVolLogFileEmpty(vol)) { ntfs_debug("Done. ($LogFile is empty.)"); - return TRUE; + return true; } BUG_ON(!rp); if (!ntfs_is_rstr_record(rp->magic) && @@ -693,7 +694,7 @@ BOOL ntfs_is_logfile_clean(struct inode *log_vi, const RESTART_PAGE_HEADER *rp) "probably a bug in that the $LogFile should " "have been consistency checked before calling " "this function."); - return FALSE; + return false; } ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset)); /* @@ -704,44 +705,159 @@ BOOL ntfs_is_logfile_clean(struct inode *log_vi, const RESTART_PAGE_HEADER *rp) if (ra->client_in_use_list != LOGFILE_NO_CLIENT && !(ra->flags & RESTART_VOLUME_IS_CLEAN)) { ntfs_debug("Done. $LogFile indicates a dirty shutdown."); - return FALSE; + return false; } /* $LogFile indicates a clean shutdown. */ ntfs_debug("Done. $LogFile indicates a clean shutdown."); - return TRUE; + return true; } /** * ntfs_empty_logfile - empty the contents of the $LogFile journal * @log_vi: struct inode of loaded journal $LogFile to empty * - * Empty the contents of the $LogFile journal @log_vi and return TRUE on - * success and FALSE on error. + * Empty the contents of the $LogFile journal @log_vi and return 'true' on + * success and 'false' on error. * * This function assumes that the $LogFile journal has already been consistency * checked by a call to ntfs_check_logfile() and that ntfs_is_logfile_clean() * has been used to ensure that the $LogFile is clean. */ -BOOL ntfs_empty_logfile(struct inode *log_vi) +bool ntfs_empty_logfile(struct inode *log_vi) { - ntfs_volume *vol = NTFS_SB(log_vi->i_sb); + VCN vcn, end_vcn; + ntfs_inode *log_ni = NTFS_I(log_vi); + ntfs_volume *vol = log_ni->vol; + struct super_block *sb = vol->sb; + runlist_element *rl; + unsigned long flags; + unsigned block_size, block_size_bits; + int err; + bool should_wait = true; ntfs_debug("Entering."); - if (!NVolLogFileEmpty(vol)) { - int err; - - err = ntfs_attr_set(NTFS_I(log_vi), 0, i_size_read(log_vi), - 0xff); - if (unlikely(err)) { - ntfs_error(vol->sb, "Failed to fill $LogFile with " - "0xff bytes (error code %i).", err); - return FALSE; - } - /* Set the flag so we do not have to do it again on remount. */ - NVolSetLogFileEmpty(vol); + if (NVolLogFileEmpty(vol)) { + ntfs_debug("Done."); + return true; } + /* + * We cannot use ntfs_attr_set() because we may be still in the middle + * of a mount operation. Thus we do the emptying by hand by first + * zapping the page cache pages for the $LogFile/$DATA attribute and + * then emptying each of the buffers in each of the clusters specified + * by the runlist by hand. + */ + block_size = sb->s_blocksize; + block_size_bits = sb->s_blocksize_bits; + vcn = 0; + read_lock_irqsave(&log_ni->size_lock, flags); + end_vcn = (log_ni->initialized_size + vol->cluster_size_mask) >> + vol->cluster_size_bits; + read_unlock_irqrestore(&log_ni->size_lock, flags); + truncate_inode_pages(log_vi->i_mapping, 0); + down_write(&log_ni->runlist.lock); + rl = log_ni->runlist.rl; + if (unlikely(!rl || vcn < rl->vcn || !rl->length)) { +map_vcn: + err = ntfs_map_runlist_nolock(log_ni, vcn, NULL); + if (err) { + ntfs_error(sb, "Failed to map runlist fragment (error " + "%d).", -err); + goto err; + } + rl = log_ni->runlist.rl; + BUG_ON(!rl || vcn < rl->vcn || !rl->length); + } + /* Seek to the runlist element containing @vcn. */ + while (rl->length && vcn >= rl[1].vcn) + rl++; + do { + LCN lcn; + sector_t block, end_block; + s64 len; + + /* + * If this run is not mapped map it now and start again as the + * runlist will have been updated. + */ + lcn = rl->lcn; + if (unlikely(lcn == LCN_RL_NOT_MAPPED)) { + vcn = rl->vcn; + goto map_vcn; + } + /* If this run is not valid abort with an error. */ + if (unlikely(!rl->length || lcn < LCN_HOLE)) + goto rl_err; + /* Skip holes. */ + if (lcn == LCN_HOLE) + continue; + block = lcn << vol->cluster_size_bits >> block_size_bits; + len = rl->length; + if (rl[1].vcn > end_vcn) + len = end_vcn - rl->vcn; + end_block = (lcn + len) << vol->cluster_size_bits >> + block_size_bits; + /* Iterate over the blocks in the run and empty them. */ + do { + struct buffer_head *bh; + + /* Obtain the buffer, possibly not uptodate. */ + bh = sb_getblk(sb, block); + BUG_ON(!bh); + /* Setup buffer i/o submission. */ + lock_buffer(bh); + bh->b_end_io = end_buffer_write_sync; + get_bh(bh); + /* Set the entire contents of the buffer to 0xff. */ + memset(bh->b_data, -1, block_size); + if (!buffer_uptodate(bh)) + set_buffer_uptodate(bh); + if (buffer_dirty(bh)) + clear_buffer_dirty(bh); + /* + * Submit the buffer and wait for i/o to complete but + * only for the first buffer so we do not miss really + * serious i/o errors. Once the first buffer has + * completed ignore errors afterwards as we can assume + * that if one buffer worked all of them will work. + */ + submit_bh(WRITE, bh); + if (should_wait) { + should_wait = false; + wait_on_buffer(bh); + if (unlikely(!buffer_uptodate(bh))) + goto io_err; + } + brelse(bh); + } while (++block < end_block); + } while ((++rl)->vcn < end_vcn); + up_write(&log_ni->runlist.lock); + /* + * Zap the pages again just in case any got instantiated whilst we were + * emptying the blocks by hand. FIXME: We may not have completed + * writing to all the buffer heads yet so this may happen too early. + * We really should use a kernel thread to do the emptying + * asynchronously and then we can also set the volume dirty and output + * an error message if emptying should fail. + */ + truncate_inode_pages(log_vi->i_mapping, 0); + /* Set the flag so we do not have to do it again on remount. */ + NVolSetLogFileEmpty(vol); ntfs_debug("Done."); - return TRUE; + return true; +io_err: + ntfs_error(sb, "Failed to write buffer. Unmount and run chkdsk."); + goto dirty_err; +rl_err: + ntfs_error(sb, "Runlist is corrupt. Unmount and run chkdsk."); +dirty_err: + NVolSetErrors(vol); + err = -EIO; +err: + up_write(&log_ni->runlist.lock); + ntfs_error(sb, "Failed to fill $LogFile with 0xff bytes (error %d).", + -err); + return false; } #endif /* NTFS_RW */ diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h index a51f3dd0e9e..aa2b6ac3f0a 100644 --- a/fs/ntfs/logfile.h +++ b/fs/ntfs/logfile.h @@ -104,7 +104,7 @@ typedef struct { * in this particular client array. Also inside the client records themselves, * this means that there are no client records preceding or following this one. */ -#define LOGFILE_NO_CLIENT const_cpu_to_le16(0xffff) +#define LOGFILE_NO_CLIENT cpu_to_le16(0xffff) #define LOGFILE_NO_CLIENT_CPU 0xffff /* @@ -112,8 +112,8 @@ typedef struct { * information about the log file in which they are present. */ enum { - RESTART_VOLUME_IS_CLEAN = const_cpu_to_le16(0x0002), - RESTART_SPACE_FILLER = const_cpu_to_le16(0xffff), /* gcc: Force enum bit width to 16. */ + RESTART_VOLUME_IS_CLEAN = cpu_to_le16(0x0002), + RESTART_SPACE_FILLER = cpu_to_le16(0xffff), /* gcc: Force enum bit width to 16. */ } __attribute__ ((__packed__)); typedef le16 RESTART_AREA_FLAGS; @@ -222,7 +222,7 @@ typedef struct { /* 24*/ sle64 file_size; /* Usable byte size of the log file. If the restart_area_offset + the offset of the file_size are > 510 then corruption has - occured. This is the very first check when + occurred. This is the very first check when starting with the restart_area as if it fails it means that some of the above values will be corrupted by the multi sector @@ -296,13 +296,13 @@ typedef struct { /* sizeof() = 160 (0xa0) bytes */ } __attribute__ ((__packed__)) LOG_CLIENT_RECORD; -extern BOOL ntfs_check_logfile(struct inode *log_vi, +extern bool ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp); -extern BOOL ntfs_is_logfile_clean(struct inode *log_vi, +extern bool ntfs_is_logfile_clean(struct inode *log_vi, const RESTART_PAGE_HEADER *rp); -extern BOOL ntfs_empty_logfile(struct inode *log_vi); +extern bool ntfs_empty_logfile(struct inode *log_vi); #endif /* NTFS_RW */ diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index 590887b943f..a44b14cbcee 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h @@ -39,8 +39,7 @@ * If there was insufficient memory to complete the request, return NULL. * Depending on @gfp_mask the allocation may be guaranteed to succeed. */ -static inline void *__ntfs_malloc(unsigned long size, - gfp_t gfp_mask) +static inline void *__ntfs_malloc(unsigned long size, gfp_t gfp_mask) { if (likely(size <= PAGE_SIZE)) { BUG_ON(!size); @@ -48,7 +47,7 @@ static inline void *__ntfs_malloc(unsigned long size, return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM); /* return (void *)__get_free_page(gfp_mask); */ } - if (likely(size >> PAGE_SHIFT < num_physpages)) + if (likely((size >> PAGE_SHIFT) < totalram_pages)) return __vmalloc(size, gfp_mask, PAGE_KERNEL); return NULL; } @@ -86,8 +85,7 @@ static inline void *ntfs_malloc_nofs_nofail(unsigned long size) static inline void ntfs_free(void *addr) { - if (likely(((unsigned long)addr < VMALLOC_START) || - ((unsigned long)addr >= VMALLOC_END ))) { + if (!is_vmalloc_addr(addr)) { kfree(addr); /* free_page((unsigned long)addr); */ return; diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index b011369b595..3014a36a255 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -1,7 +1,7 @@ /** * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2005 Anton Altaparmakov + * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc. * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -21,6 +21,7 @@ */ #include <linux/buffer_head.h> +#include <linux/slab.h> #include <linux/swap.h> #include "attrib.h" @@ -49,7 +50,8 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni) ntfs_volume *vol = ni->vol; struct inode *mft_vi = vol->mft_ino; struct page *page; - unsigned long index, ofs, end_index; + unsigned long index, end_index; + unsigned ofs; BUG_ON(ni->page); /* @@ -71,7 +73,7 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni) if (index > end_index || (i_size & ~PAGE_CACHE_MASK) < ofs + vol->mft_record_size) { page = ERR_PTR(-ENOENT); - ntfs_error(vol->sb, "Attemt to read mft record 0x%lx, " + ntfs_error(vol->sb, "Attempt to read mft record 0x%lx, " "which is beyond the end of the mft. " "This is probably a bug in the ntfs " "driver.", ni->mft_no); @@ -92,6 +94,7 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni) "Run chkdsk.", ni->mft_no); ntfs_unmap_page(page); page = ERR_PTR(-EIO); + NVolSetErrors(vol); } err_out: ni->page = NULL; @@ -103,8 +106,8 @@ err_out: * map_mft_record - map, pin and lock an mft record * @ni: ntfs inode whose MFT record to map * - * First, take the mrec_lock semaphore. We might now be sleeping, while waiting - * for the semaphore if it was already locked by someone else. + * First, take the mrec_lock mutex. We might now be sleeping, while waiting + * for the mutex if it was already locked by someone else. * * The page of the record is mapped using map_mft_record_page() before being * returned to the caller. @@ -134,9 +137,9 @@ err_out: * So that code will end up having to own the mrec_lock of all mft * records/inodes present in the page before I/O can proceed. In that case we * wouldn't need to bother with PG_locked and PG_uptodate as nobody will be - * accessing anything without owning the mrec_lock semaphore. But we do need - * to use them because of the read_cache_page() invocation and the code becomes - * so much simpler this way that it is well worth it. + * accessing anything without owning the mrec_lock mutex. But we do need to + * use them because of the read_cache_page() invocation and the code becomes so + * much simpler this way that it is well worth it. * * The mft record is now ours and we return a pointer to it. You need to check * the returned pointer with IS_ERR() and if that is true, PTR_ERR() will return @@ -159,13 +162,13 @@ MFT_RECORD *map_mft_record(ntfs_inode *ni) atomic_inc(&ni->count); /* Serialize access to this mft record. */ - down(&ni->mrec_lock); + mutex_lock(&ni->mrec_lock); m = map_mft_record_page(ni); if (likely(!IS_ERR(m))) return m; - up(&ni->mrec_lock); + mutex_unlock(&ni->mrec_lock); atomic_dec(&ni->count); ntfs_error(ni->vol->sb, "Failed with error code %lu.", -PTR_ERR(m)); return m; @@ -216,7 +219,7 @@ void unmap_mft_record(ntfs_inode *ni) ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no); unmap_mft_record_page(ni); - up(&ni->mrec_lock); + mutex_unlock(&ni->mrec_lock); atomic_dec(&ni->count); /* * If pure ntfs_inode, i.e. no vfs inode attached, we leave it to @@ -249,7 +252,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref, int i; unsigned long mft_no = MREF(mref); u16 seq_no = MSEQNO(mref); - BOOL destroy_ni = FALSE; + bool destroy_ni = false; ntfs_debug("Mapping extent mft record 0x%lx (base mft record 0x%lx).", mft_no, base_ni->mft_no); @@ -260,7 +263,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref, * in which case just return it. If not found, add it to the base * inode before returning it. */ - down(&base_ni->extent_lock); + mutex_lock(&base_ni->extent_lock); if (base_ni->nr_extents > 0) { extent_nis = base_ni->ext.extent_ntfs_inos; for (i = 0; i < base_ni->nr_extents; i++) { @@ -273,7 +276,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref, } } if (likely(ni != NULL)) { - up(&base_ni->extent_lock); + mutex_unlock(&base_ni->extent_lock); atomic_dec(&base_ni->count); /* We found the record; just have to map and return it. */ m = map_mft_record(ni); @@ -300,7 +303,7 @@ map_err_out: /* Record wasn't there. Get a new ntfs inode and initialize it. */ ni = ntfs_new_extent_inode(base_ni->vol->sb, mft_no); if (unlikely(!ni)) { - up(&base_ni->extent_lock); + mutex_unlock(&base_ni->extent_lock); atomic_dec(&base_ni->count); return ERR_PTR(-ENOMEM); } @@ -311,7 +314,7 @@ map_err_out: /* Now map the record. */ m = map_mft_record(ni); if (IS_ERR(m)) { - up(&base_ni->extent_lock); + mutex_unlock(&base_ni->extent_lock); atomic_dec(&base_ni->count); ntfs_clear_extent_inode(ni); goto map_err_out; @@ -320,7 +323,7 @@ map_err_out: if (seq_no && (le16_to_cpu(m->sequence_number) != seq_no)) { ntfs_error(base_ni->vol->sb, "Found stale extent mft " "reference! Corrupt filesystem. Run chkdsk."); - destroy_ni = TRUE; + destroy_ni = true; m = ERR_PTR(-EIO); goto unm_err_out; } @@ -329,11 +332,11 @@ map_err_out: ntfs_inode **tmp; int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *); - tmp = (ntfs_inode **)kmalloc(new_size, GFP_NOFS); + tmp = kmalloc(new_size, GFP_NOFS); if (unlikely(!tmp)) { ntfs_error(base_ni->vol->sb, "Failed to allocate " "internal buffer."); - destroy_ni = TRUE; + destroy_ni = true; m = ERR_PTR(-ENOMEM); goto unm_err_out; } @@ -346,14 +349,14 @@ map_err_out: base_ni->ext.extent_ntfs_inos = tmp; } base_ni->ext.extent_ntfs_inos[base_ni->nr_extents++] = ni; - up(&base_ni->extent_lock); + mutex_unlock(&base_ni->extent_lock); atomic_dec(&base_ni->count); ntfs_debug("Done 2."); *ntfs_ino = ni; return m; unm_err_out: unmap_mft_record(ni); - up(&base_ni->extent_lock); + mutex_unlock(&base_ni->extent_lock); atomic_dec(&base_ni->count); /* * If the extent inode was not attached to the base inode we need to @@ -382,13 +385,12 @@ unm_err_out: * it is dirty in the inode meta data rather than the data page cache of the * inode, and thus there are no data pages that need writing out. Therefore, a * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the - * other hand, is not sufficient, because I_DIRTY_DATASYNC needs to be set to - * ensure ->write_inode is called from generic_osync_inode() and this needs to - * happen or the file data would not necessarily hit the device synchronously, - * even though the vfs inode has the O_SYNC flag set. Also, I_DIRTY_DATASYNC - * simply "feels" better than just I_DIRTY_SYNC, since the file data has not - * actually hit the block device yet, which is not what I_DIRTY_SYNC on its own - * would suggest. + * other hand, is not sufficient, because ->write_inode needs to be called even + * in case of fdatasync. This needs to happen or the file data would not + * necessarily hit the device synchronously, even though the vfs inode has the + * O_SYNC flag set. Also, I_DIRTY_DATASYNC simply "feels" better than just + * I_DIRTY_SYNC, since the file data has not actually hit the block device yet, + * which is not what I_DIRTY_SYNC on its own would suggest. */ void __mark_mft_record_dirty(ntfs_inode *ni) { @@ -398,12 +400,12 @@ void __mark_mft_record_dirty(ntfs_inode *ni) BUG_ON(NInoAttr(ni)); mark_ntfs_record_dirty(ni->page, ni->page_ofs); /* Determine the base vfs inode and mark it dirty, too. */ - down(&ni->extent_lock); + mutex_lock(&ni->extent_lock); if (likely(ni->nr_extents >= 0)) base_ni = ni; else base_ni = ni->ext.base_ntfs_ino; - up(&ni->extent_lock); + mutex_unlock(&ni->extent_lock); __mark_inode_dirty(VFS_I(base_ni), I_DIRTY_SYNC | I_DIRTY_DATASYNC); } @@ -472,7 +474,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, runlist_element *rl; unsigned int block_start, block_end, m_start, m_end, page_ofs; int i_bhs, nr_bhs, err = 0; - unsigned char blocksize_bits = vol->mftmirr_ino->i_blkbits; + unsigned char blocksize_bits = vol->sb->s_blocksize_bits; ntfs_debug("Entering for inode 0x%lx.", mft_no); BUG_ON(!max_bhs); @@ -584,7 +586,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { struct buffer_head *tbh = bhs[i_bhs]; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) BUG(); BUG_ON(!buffer_uptodate(tbh)); clear_buffer_dirty(tbh); @@ -649,10 +651,7 @@ err_out: * fs/ntfs/aops.c::mark_ntfs_record_dirty(). * * On success, clean the mft record and return 0. On error, leave the mft - * record dirty and return -errno. The caller should call make_bad_inode() on - * the base inode to ensure no more access happens to this inode. We do not do - * it here as the caller may want to finish writing other extent mft records - * first to minimize on-disk metadata inconsistencies. + * record dirty and return -errno. * * NOTE: We always perform synchronous i/o and ignore the @sync parameter. * However, if the mft record has a counterpart in the mft mirror and @sync is @@ -671,8 +670,8 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) { ntfs_volume *vol = ni->vol; struct page *page = ni->page; - unsigned char blocksize_bits = vol->mft_ino->i_blkbits; - unsigned int blocksize = 1 << blocksize_bits; + unsigned int blocksize = vol->sb->s_blocksize; + unsigned char blocksize_bits = vol->sb->s_blocksize_bits; int max_bhs = vol->mft_record_size / blocksize; struct buffer_head *bhs[max_bhs]; struct buffer_head *bh, *head; @@ -780,7 +779,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { struct buffer_head *tbh = bhs[i_bhs]; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) BUG(); BUG_ON(!buffer_uptodate(tbh)); clear_buffer_dirty(tbh); @@ -858,7 +857,7 @@ err_out: * caller is responsible for unlocking the ntfs inode and unpinning the base * vfs inode. * - * Return TRUE if the mft record may be written out and FALSE if not. + * Return 'true' if the mft record may be written out and 'false' if not. * * The caller has locked the page and cleared the uptodate flag on it which * means that we can safely write out any dirty mft records that do not have @@ -869,7 +868,7 @@ err_out: * Here is a description of the tests we perform: * * If the inode is found in icache we know the mft record must be a base mft - * record. If it is dirty, we do not write it and return FALSE as the vfs + * record. If it is dirty, we do not write it and return 'false' as the vfs * inode write paths will result in the access times being updated which would * cause the base mft record to be redirtied and written out again. (We know * the access time update will modify the base mft record because Windows @@ -878,11 +877,11 @@ err_out: * * If the inode is in icache and not dirty, we attempt to lock the mft record * and if we find the lock was already taken, it is not safe to write the mft - * record and we return FALSE. + * record and we return 'false'. * * If we manage to obtain the lock we have exclusive access to the mft record, * which also allows us safe writeout of the mft record. We then set - * @locked_ni to the locked ntfs inode and return TRUE. + * @locked_ni to the locked ntfs inode and return 'true'. * * Note we cannot just lock the mft record and sleep while waiting for the lock * because this would deadlock due to lock reversal (normally the mft record is @@ -892,24 +891,24 @@ err_out: * If the inode is not in icache we need to perform further checks. * * If the mft record is not a FILE record or it is a base mft record, we can - * safely write it and return TRUE. + * safely write it and return 'true'. * * We now know the mft record is an extent mft record. We check if the inode * corresponding to its base mft record is in icache and obtain a reference to - * it if it is. If it is not, we can safely write it and return TRUE. + * it if it is. If it is not, we can safely write it and return 'true'. * * We now have the base inode for the extent mft record. We check if it has an * ntfs inode for the extent mft record attached and if not it is safe to write - * the extent mft record and we return TRUE. + * the extent mft record and we return 'true'. * * The ntfs inode for the extent mft record is attached to the base inode so we * attempt to lock the extent mft record and if we find the lock was already - * taken, it is not safe to write the extent mft record and we return FALSE. + * taken, it is not safe to write the extent mft record and we return 'false'. * * If we manage to obtain the lock we have exclusive access to the extent mft * record, which also allows us safe writeout of the extent mft record. We * set the ntfs inode of the extent mft record clean and then set @locked_ni to - * the now locked ntfs inode and return TRUE. + * the now locked ntfs inode and return 'true'. * * Note, the reason for actually writing dirty mft records here and not just * relying on the vfs inode dirty code paths is that we can have mft records @@ -923,7 +922,7 @@ err_out: * appear if the mft record is reused for a new inode before it got written * out. */ -BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, +bool ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, const MFT_RECORD *m, ntfs_inode **locked_ni) { struct super_block *sb = vol->sb; @@ -978,16 +977,16 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, mft_no); atomic_dec(&ni->count); iput(vi); - return FALSE; + return false; } ntfs_debug("Inode 0x%lx is not dirty.", mft_no); /* The inode is not dirty, try to take the mft record lock. */ - if (unlikely(down_trylock(&ni->mrec_lock))) { + if (unlikely(!mutex_trylock(&ni->mrec_lock))) { ntfs_debug("Mft record 0x%lx is already locked, do " "not write it.", mft_no); atomic_dec(&ni->count); iput(vi); - return FALSE; + return false; } ntfs_debug("Managed to lock mft record 0x%lx, write it.", mft_no); @@ -996,7 +995,7 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, * return the locked ntfs inode. */ *locked_ni = ni; - return TRUE; + return true; } ntfs_debug("Inode 0x%lx is not in icache.", mft_no); /* The inode is not in icache. */ @@ -1004,13 +1003,13 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, if (!ntfs_is_mft_record(m->magic)) { ntfs_debug("Mft record 0x%lx is not a FILE record, write it.", mft_no); - return TRUE; + return true; } /* Write the mft record if it is a base inode. */ if (!m->base_mft_record) { ntfs_debug("Mft record 0x%lx is a base record, write it.", mft_no); - return TRUE; + return true; } /* * This is an extent mft record. Check if the inode corresponding to @@ -1034,7 +1033,7 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, */ ntfs_debug("Base inode 0x%lx is not in icache, write the " "extent record.", na.mft_no); - return TRUE; + return true; } ntfs_debug("Base inode 0x%lx is in icache.", na.mft_no); /* @@ -1042,17 +1041,17 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, * corresponding to this extent mft record attached. */ ni = NTFS_I(vi); - down(&ni->extent_lock); + mutex_lock(&ni->extent_lock); if (ni->nr_extents <= 0) { /* * The base inode has no attached extent inodes, write this * extent mft record. */ - up(&ni->extent_lock); + mutex_unlock(&ni->extent_lock); iput(vi); ntfs_debug("Base inode 0x%lx has no attached extent inodes, " "write the extent record.", na.mft_no); - return TRUE; + return true; } /* Iterate over the attached extent inodes. */ extent_nis = ni->ext.extent_ntfs_inos; @@ -1071,28 +1070,28 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, * extent mft record. */ if (!eni) { - up(&ni->extent_lock); + mutex_unlock(&ni->extent_lock); iput(vi); ntfs_debug("Extent inode 0x%lx is not attached to its base " "inode 0x%lx, write the extent record.", mft_no, na.mft_no); - return TRUE; + return true; } ntfs_debug("Extent inode 0x%lx is attached to its base inode 0x%lx.", mft_no, na.mft_no); /* Take a reference to the extent ntfs inode. */ atomic_inc(&eni->count); - up(&ni->extent_lock); + mutex_unlock(&ni->extent_lock); /* * Found the extent inode coresponding to this extent mft record. * Try to take the mft record lock. */ - if (unlikely(down_trylock(&eni->mrec_lock))) { + if (unlikely(!mutex_trylock(&eni->mrec_lock))) { atomic_dec(&eni->count); iput(vi); ntfs_debug("Extent mft record 0x%lx is already locked, do " "not write it.", mft_no); - return FALSE; + return false; } ntfs_debug("Managed to lock extent mft record 0x%lx, write it.", mft_no); @@ -1104,7 +1103,7 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, * the locked extent ntfs inode. */ *locked_ni = eni; - return TRUE; + return true; } static const char *es = " Leaving inconsistent metadata. Unmount and run " @@ -1192,7 +1191,7 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol, if (size) { page = ntfs_map_page(mftbmp_mapping, ofs >> PAGE_CACHE_SHIFT); - if (unlikely(IS_ERR(page))) { + if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to read mft " "bitmap, aborting."); return PTR_ERR(page); @@ -1308,7 +1307,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) ll = mftbmp_ni->allocated_size; read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); rl = ntfs_attr_find_vcn_nolock(mftbmp_ni, - (ll - 1) >> vol->cluster_size_bits, TRUE); + (ll - 1) >> vol->cluster_size_bits, NULL); if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) { up_write(&mftbmp_ni->runlist.lock); ntfs_error(vol->sb, "Failed to determine last allocated " @@ -1354,7 +1353,8 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) up_write(&vol->lcnbmp_lock); ntfs_unmap_page(page); /* Allocate a cluster from the DATA_ZONE. */ - rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE); + rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE, + true); if (IS_ERR(rl2)) { up_write(&mftbmp_ni->runlist.lock); ntfs_error(vol->sb, "Failed to allocate a cluster for " @@ -1367,7 +1367,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) ntfs_error(vol->sb, "Failed to merge runlists for mft " "bitmap."); if (ntfs_cluster_free_from_rl(vol, rl2)) { - ntfs_error(vol->sb, "Failed to dealocate " + ntfs_error(vol->sb, "Failed to deallocate " "allocated cluster.%s", es); NVolSetErrors(vol); } @@ -1442,7 +1442,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol) // Note: It will need to be a special mft record and if none of // those are available it gets rather complicated... ntfs_error(vol->sb, "Not enough space in this mft record to " - "accomodate extended mft bitmap attribute " + "accommodate extended mft bitmap attribute " "extent. Cannot handle this yet."); ret = -EOPNOTSUPP; goto undo_alloc; @@ -1724,7 +1724,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) ATTR_RECORD *a = NULL; int ret, mp_size; u32 old_alen = 0; - BOOL mp_rebuilt = FALSE; + bool mp_rebuilt = false; ntfs_debug("Extending mft data allocation."); mft_ni = NTFS_I(vol->mft_ino); @@ -1738,7 +1738,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) ll = mft_ni->allocated_size; read_unlock_irqrestore(&mft_ni->size_lock, flags); rl = ntfs_attr_find_vcn_nolock(mft_ni, - (ll - 1) >> vol->cluster_size_bits, TRUE); + (ll - 1) >> vol->cluster_size_bits, NULL); if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) { up_write(&mft_ni->runlist.lock); ntfs_error(vol->sb, "Failed to determine last allocated " @@ -1779,7 +1779,8 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) nr > min_nr ? "default" : "minimal", (long long)nr); old_last_vcn = rl[1].vcn; do { - rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE); + rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE, + true); if (likely(!IS_ERR(rl2))) break; if (PTR_ERR(rl2) != -ENOSPC || nr == min_nr) { @@ -1804,7 +1805,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) ntfs_error(vol->sb, "Failed to merge runlists for mft data " "attribute."); if (ntfs_cluster_free_from_rl(vol, rl2)) { - ntfs_error(vol->sb, "Failed to dealocate clusters " + ntfs_error(vol->sb, "Failed to deallocate clusters " "from the mft data attribute.%s", es); NVolSetErrors(vol); } @@ -1878,12 +1879,12 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol) // and we would then need to update all references to this mft // record appropriately. This is rather complicated... ntfs_error(vol->sb, "Not enough space in this mft record to " - "accomodate extended mft data attribute " + "accommodate extended mft data attribute " "extent. Cannot handle this yet."); ret = -EOPNOTSUPP; goto undo_alloc; } - mp_rebuilt = TRUE; + mp_rebuilt = true; /* Generate the mapping pairs array directly into the attr record. */ ret = ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(a->data.non_resident.mapping_pairs_offset), @@ -1951,20 +1952,21 @@ restore_undo_alloc: NVolSetErrors(vol); return ret; } - a = ctx->attr; - a->data.non_resident.highest_vcn = cpu_to_sle64(old_last_vcn - 1); + ctx->attr->data.non_resident.highest_vcn = + cpu_to_sle64(old_last_vcn - 1); undo_alloc: - if (ntfs_cluster_free(mft_ni, old_last_vcn, -1) < 0) { + if (ntfs_cluster_free(mft_ni, old_last_vcn, -1, ctx) < 0) { ntfs_error(vol->sb, "Failed to free clusters from mft data " "attribute.%s", es); NVolSetErrors(vol); } + a = ctx->attr; if (ntfs_rl_truncate_nolock(vol, &mft_ni->runlist, old_last_vcn)) { ntfs_error(vol->sb, "Failed to truncate mft data attribute " "runlist.%s", es); NVolSetErrors(vol); } - if (mp_rebuilt) { + if (mp_rebuilt && !IS_ERR(ctx->mrec)) { if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu( a->data.non_resident.mapping_pairs_offset), old_alen - le16_to_cpu( @@ -1981,6 +1983,10 @@ undo_alloc: } flush_dcache_mft_record_page(ctx->ntfs_ino); mark_mft_record_dirty(ctx->ntfs_ino); + } else if (IS_ERR(ctx->mrec)) { + ntfs_error(vol->sb, "Failed to restore attribute search " + "context.%s", es); + NVolSetErrors(vol); } if (ctx) ntfs_attr_put_search_ctx(ctx); @@ -2112,7 +2118,7 @@ static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no) } /* Read, map, and pin the page containing the mft record. */ page = ntfs_map_page(mft_vi->i_mapping, index); - if (unlikely(IS_ERR(page))) { + if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to map page containing mft record " "to format 0x%llx.", (long long)mft_no); return PTR_ERR(page); @@ -2249,7 +2255,7 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode, unsigned int ofs; int err; le16 seq_no, usn; - BOOL record_formatted = FALSE; + bool record_formatted = false; if (base_ni) { ntfs_debug("Entering (allocating an extent mft record for " @@ -2351,7 +2357,7 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode, } #ifdef DEBUG read_lock_irqsave(&mftbmp_ni->size_lock, flags); - ntfs_debug("Status of mftbmp after initialized extention: " + ntfs_debug("Status of mftbmp after initialized extension: " "allocated_size 0x%llx, data_size 0x%llx, " "initialized_size 0x%llx.", (long long)mftbmp_ni->allocated_size, @@ -2448,7 +2454,7 @@ have_alloc_rec: mft_ni->initialized_size = new_initialized_size; } write_unlock_irqrestore(&mft_ni->size_lock, flags); - record_formatted = TRUE; + record_formatted = true; /* Update the mft data attribute record to reflect the new sizes. */ m = map_mft_record(mft_ni); if (IS_ERR(m)) { @@ -2513,7 +2519,7 @@ mft_rec_already_initialized: ofs = (bit << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; /* Read, map, and pin the page containing the mft record. */ page = ntfs_map_page(vol->mft_ino->i_mapping, index); - if (unlikely(IS_ERR(page))) { + if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to map page containing allocated " "mft record 0x%llx.", (long long)bit); err = PTR_ERR(page); @@ -2570,6 +2576,8 @@ mft_rec_already_initialized: flush_dcache_page(page); SetPageUptodate(page); if (base_ni) { + MFT_RECORD *m_tmp; + /* * Setup the base mft record in the extent mft record. This * completes initialization of the allocated extent mft record @@ -2582,11 +2590,11 @@ mft_rec_already_initialized: * attach it to the base inode @base_ni and map, pin, and lock * its, i.e. the allocated, mft record. */ - m = map_extent_mft_record(base_ni, bit, &ni); - if (IS_ERR(m)) { + m_tmp = map_extent_mft_record(base_ni, bit, &ni); + if (IS_ERR(m_tmp)) { ntfs_error(vol->sb, "Failed to map allocated extent " "mft record 0x%llx.", (long long)bit); - err = PTR_ERR(m); + err = PTR_ERR(m_tmp); /* Set the mft record itself not in use. */ m->flags &= cpu_to_le16( ~le16_to_cpu(MFT_RECORD_IN_USE)); @@ -2597,6 +2605,7 @@ mft_rec_already_initialized: ntfs_unmap_page(page); goto undo_mftbmp_alloc; } + BUG_ON(m != m_tmp); /* * Make sure the allocated mft record is written out to disk. * No need to set the inode dirty because the caller is going @@ -2632,11 +2641,6 @@ mft_rec_already_initialized: } vi->i_ino = bit; /* - * This is the optimal IO size (for stat), not the fs block - * size. - */ - vi->i_blksize = PAGE_CACHE_SIZE; - /* * This is for checking whether an inode has changed w.r.t. a * file so that the file can be updated if necessary (compare * with f_version). @@ -2664,7 +2668,7 @@ mft_rec_already_initialized: ni->name_len = 4; ni->itype.index.block_size = 4096; - ni->itype.index.block_size_bits = generic_ffs(4096) - 1; + ni->itype.index.block_size_bits = ntfs_ffs(4096) - 1; ni->itype.index.collation_rule = COLLATION_FILE_NAME; if (vol->cluster_size <= ni->itype.index.block_size) { ni->itype.index.vcn_size = vol->cluster_size; @@ -2703,7 +2707,7 @@ mft_rec_already_initialized: * have its page mapped and it is very easy to do. */ atomic_inc(&ni->count); - down(&ni->mrec_lock); + mutex_lock(&ni->mrec_lock); ni->page = page; ni->page_ofs = ofs; /* @@ -2790,22 +2794,22 @@ int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m) BUG_ON(NInoAttr(ni)); BUG_ON(ni->nr_extents != -1); - down(&ni->extent_lock); + mutex_lock(&ni->extent_lock); base_ni = ni->ext.base_ntfs_ino; - up(&ni->extent_lock); + mutex_unlock(&ni->extent_lock); BUG_ON(base_ni->nr_extents <= 0); ntfs_debug("Entering for extent inode 0x%lx, base inode 0x%lx.\n", mft_no, base_ni->mft_no); - down(&base_ni->extent_lock); + mutex_lock(&base_ni->extent_lock); /* Make sure we are holding the only reference to the extent inode. */ if (atomic_read(&ni->count) > 2) { ntfs_error(vol->sb, "Tried to free busy extent inode 0x%lx, " "not freeing.", base_ni->mft_no); - up(&base_ni->extent_lock); + mutex_unlock(&base_ni->extent_lock); return -EBUSY; } @@ -2823,7 +2827,7 @@ int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m) break; } - up(&base_ni->extent_lock); + mutex_unlock(&base_ni->extent_lock); if (unlikely(err)) { ntfs_error(vol->sb, "Extent inode 0x%lx is not attached to " @@ -2838,7 +2842,7 @@ int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m) */ /* Mark the mft record as not in use. */ - m->flags &= const_cpu_to_le16(~const_le16_to_cpu(MFT_RECORD_IN_USE)); + m->flags &= ~MFT_RECORD_IN_USE; /* Increment the sequence number, skipping zero, if it is not zero. */ old_seq_no = m->sequence_number; @@ -2882,16 +2886,16 @@ rollback_error: return 0; rollback: /* Rollback what we did... */ - down(&base_ni->extent_lock); + mutex_lock(&base_ni->extent_lock); extent_nis = base_ni->ext.extent_ntfs_inos; if (!(base_ni->nr_extents & 3)) { int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode*); - extent_nis = (ntfs_inode**)kmalloc(new_size, GFP_NOFS); + extent_nis = kmalloc(new_size, GFP_NOFS); if (unlikely(!extent_nis)) { ntfs_error(vol->sb, "Failed to allocate internal " "buffer during rollback.%s", es); - up(&base_ni->extent_lock); + mutex_unlock(&base_ni->extent_lock); NVolSetErrors(vol); goto rollback_error; } @@ -2906,7 +2910,7 @@ rollback: m->flags |= MFT_RECORD_IN_USE; m->sequence_number = old_seq_no; extent_nis[base_ni->nr_extents++] = ni; - up(&base_ni->extent_lock); + mutex_unlock(&base_ni->extent_lock); mark_mft_record_dirty(ni); return err; } diff --git a/fs/ntfs/mft.h b/fs/ntfs/mft.h index 407de2cef1d..b52bf87b99d 100644 --- a/fs/ntfs/mft.h +++ b/fs/ntfs/mft.h @@ -97,10 +97,7 @@ extern int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync); * uptodate. * * On success, clean the mft record and return 0. On error, leave the mft - * record dirty and return -errno. The caller should call make_bad_inode() on - * the base inode to ensure no more access happens to this inode. We do not do - * it here as the caller may want to finish writing other extent mft records - * first to minimize on-disk metadata inconsistencies. + * record dirty and return -errno. */ static inline int write_mft_record(ntfs_inode *ni, MFT_RECORD *m, int sync) { @@ -114,7 +111,7 @@ static inline int write_mft_record(ntfs_inode *ni, MFT_RECORD *m, int sync) return err; } -extern BOOL ntfs_may_write_mft_record(ntfs_volume *vol, +extern bool ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no, const MFT_RECORD *m, ntfs_inode **locked_ni); diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index 351dbc3b6e4..436f36037e0 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -2,7 +2,7 @@ * namei.c - NTFS kernel directory inode operations. Part of the Linux-NTFS * project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2006 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -21,7 +21,9 @@ */ #include <linux/dcache.h> +#include <linux/exportfs.h> #include <linux/security.h> +#include <linux/slab.h> #include "attrib.h" #include "debug.h" @@ -96,10 +98,10 @@ * name. We then convert the name to the current NLS code page, and proceed * searching for a dentry with this name, etc, as in case 2), above. * - * Locking: Caller must hold i_sem on the directory. + * Locking: Caller must hold i_mutex on the directory. */ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, - struct nameidata *nd) + unsigned int flags) { ntfs_volume *vol = NTFS_SB(dir_ino->i_sb); struct inode *dent_inode; @@ -115,7 +117,9 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, uname_len = ntfs_nlstoucs(vol, dent->d_name.name, dent->d_name.len, &uname); if (uname_len < 0) { - ntfs_error(vol->sb, "Failed to convert name to Unicode."); + if (uname_len != -ENAMETOOLONG) + ntfs_error(vol->sb, "Failed to convert name to " + "Unicode."); return ERR_PTR(uname_len); } mref = ntfs_lookup_inode_by_name(NTFS_I(dir_ino), uname, uname_len, @@ -157,7 +161,7 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, /* Return the error code. */ return (struct dentry *)dent_inode; } - /* It is guaranteed that name is no longer allocated at this point. */ + /* It is guaranteed that @name is no longer allocated at this point. */ if (MREF_ERR(mref) == -ENOENT) { ntfs_debug("Entry was not found, adding negative dentry."); /* The dcache will handle negative entries. */ @@ -168,11 +172,9 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, ntfs_error(vol->sb, "ntfs_lookup_ino_by_name() failed with error " "code %i.", -MREF_ERR(mref)); return ERR_PTR(MREF_ERR(mref)); - // TODO: Consider moving this lot to a separate function! (AIA) handle_name: { - struct dentry *real_dent, *new_dent; MFT_RECORD *m; ntfs_attr_search_ctx *ctx; ntfs_inode *ni = NTFS_I(dent_inode); @@ -253,93 +255,9 @@ handle_name: } nls_name.hash = full_name_hash(nls_name.name, nls_name.len); - /* - * Note: No need for dent->d_lock lock as i_sem is held on the - * parent inode. - */ - - /* Does a dentry matching the nls_name exist already? */ - real_dent = d_lookup(dent->d_parent, &nls_name); - /* If not, create it now. */ - if (!real_dent) { - real_dent = d_alloc(dent->d_parent, &nls_name); - kfree(nls_name.name); - if (!real_dent) { - err = -ENOMEM; - goto err_out; - } - new_dent = d_splice_alias(dent_inode, real_dent); - if (new_dent) - dput(real_dent); - else - new_dent = real_dent; - ntfs_debug("Done. (Created new dentry.)"); - return new_dent; - } + dent = d_add_ci(dent, dent_inode, &nls_name); kfree(nls_name.name); - /* Matching dentry exists, check if it is negative. */ - if (real_dent->d_inode) { - if (unlikely(real_dent->d_inode != dent_inode)) { - /* This can happen because bad inodes are unhashed. */ - BUG_ON(!is_bad_inode(dent_inode)); - BUG_ON(!is_bad_inode(real_dent->d_inode)); - } - /* - * Already have the inode and the dentry attached, decrement - * the reference count to balance the ntfs_iget() we did - * earlier on. We found the dentry using d_lookup() so it - * cannot be disconnected and thus we do not need to worry - * about any NFS/disconnectedness issues here. - */ - iput(dent_inode); - ntfs_debug("Done. (Already had inode and dentry.)"); - return real_dent; - } - /* - * Negative dentry: instantiate it unless the inode is a directory and - * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED), - * in which case d_move() that in place of the found dentry. - */ - if (!S_ISDIR(dent_inode->i_mode)) { - /* Not a directory; everything is easy. */ - d_instantiate(real_dent, dent_inode); - ntfs_debug("Done. (Already had negative file dentry.)"); - return real_dent; - } - spin_lock(&dcache_lock); - if (list_empty(&dent_inode->i_dentry)) { - /* - * Directory without a 'disconnected' dentry; we need to do - * d_instantiate() by hand because it takes dcache_lock which - * we already hold. - */ - list_add(&real_dent->d_alias, &dent_inode->i_dentry); - real_dent->d_inode = dent_inode; - spin_unlock(&dcache_lock); - security_d_instantiate(real_dent, dent_inode); - ntfs_debug("Done. (Already had negative directory dentry.)"); - return real_dent; - } - /* - * Directory with a 'disconnected' dentry; get a reference to the - * 'disconnected' dentry. - */ - new_dent = list_entry(dent_inode->i_dentry.next, struct dentry, - d_alias); - dget_locked(new_dent); - spin_unlock(&dcache_lock); - /* Do security vodoo. */ - security_d_instantiate(real_dent, dent_inode); - /* Move new_dent in place of real_dent. */ - d_move(new_dent, real_dent); - /* Balance the ntfs_iget() we did above. */ - iput(dent_inode); - /* Throw away real_dent. */ - dput(real_dent); - /* Use new_dent as the actual dentry. */ - ntfs_debug("Done. (Already had negative, disconnected directory " - "dentry.)"); - return new_dent; + return dent; eio_err_out: ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk."); @@ -358,7 +276,7 @@ err_out: /** * Inode operations for directories. */ -struct inode_operations ntfs_dir_inode_ops = { +const struct inode_operations ntfs_dir_inode_ops = { .lookup = ntfs_lookup, /* VFS: Lookup directory. */ }; @@ -374,7 +292,7 @@ struct inode_operations ntfs_dir_inode_ops = { * The code is based on the ext3 ->get_parent() implementation found in * fs/ext3/namei.c::ext3_get_parent(). * - * Note: ntfs_get_parent() is called with @child_dent->d_inode->i_sem down. + * Note: ntfs_get_parent() is called with @child_dent->d_inode->i_mutex down. * * Return the dentry of the parent directory on success or the error code on * error (IS_ERR() is true). @@ -387,8 +305,6 @@ static struct dentry *ntfs_get_parent(struct dentry *child_dent) ntfs_attr_search_ctx *ctx; ATTR_RECORD *attr; FILE_NAME_ATTR *fn; - struct inode *parent_vi; - struct dentry *parent_dent; unsigned long parent_ino; int err; @@ -428,78 +344,44 @@ try_next: /* Release the search context and the mft record of the child. */ ntfs_attr_put_search_ctx(ctx); unmap_mft_record(ni); - /* Get the inode of the parent directory. */ - parent_vi = ntfs_iget(vi->i_sb, parent_ino); - if (IS_ERR(parent_vi) || unlikely(is_bad_inode(parent_vi))) { - if (!IS_ERR(parent_vi)) - iput(parent_vi); - ntfs_error(vi->i_sb, "Failed to get parent directory inode " - "0x%lx of child inode 0x%lx.", parent_ino, - vi->i_ino); - return ERR_PTR(-EACCES); - } - /* Finally get a dentry for the parent directory and return it. */ - parent_dent = d_alloc_anon(parent_vi); - if (unlikely(!parent_dent)) { - iput(parent_vi); - return ERR_PTR(-ENOMEM); - } - ntfs_debug("Done for inode 0x%lx.", vi->i_ino); - return parent_dent; + + return d_obtain_alias(ntfs_iget(vi->i_sb, parent_ino)); } -/** - * ntfs_get_dentry - find a dentry for the inode from a file handle sub-fragment - * @sb: super block identifying the mounted ntfs volume - * @fh: the file handle sub-fragment - * - * Find a dentry for the inode given a file handle sub-fragment. This function - * is called from fs/exportfs/expfs.c::find_exported_dentry() which in turn is - * called from the default ->decode_fh() which is export_decode_fh() in the - * same file. The code is closely based on the default ->get_dentry() helper - * fs/exportfs/expfs.c::get_object(). - * - * The @fh contains two 32-bit unsigned values, the first one is the inode - * number and the second one is the inode generation. - * - * Return the dentry on success or the error code on error (IS_ERR() is true). - */ -static struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh) +static struct inode *ntfs_nfs_get_inode(struct super_block *sb, + u64 ino, u32 generation) { - struct inode *vi; - struct dentry *dent; - unsigned long ino = ((u32 *)fh)[0]; - u32 gen = ((u32 *)fh)[1]; + struct inode *inode; - ntfs_debug("Entering for inode 0x%lx, generation 0x%x.", ino, gen); - vi = ntfs_iget(sb, ino); - if (IS_ERR(vi)) { - ntfs_error(sb, "Failed to get inode 0x%lx.", ino); - return (struct dentry *)vi; - } - if (unlikely(is_bad_inode(vi) || vi->i_generation != gen)) { - /* We didn't find the right inode. */ - ntfs_error(sb, "Inode 0x%lx, bad count: %d %d or version 0x%x " - "0x%x.", vi->i_ino, vi->i_nlink, - atomic_read(&vi->i_count), vi->i_generation, - gen); - iput(vi); - return ERR_PTR(-ESTALE); - } - /* Now find a dentry. If possible, get a well-connected one. */ - dent = d_alloc_anon(vi); - if (unlikely(!dent)) { - iput(vi); - return ERR_PTR(-ENOMEM); + inode = ntfs_iget(sb, ino); + if (!IS_ERR(inode)) { + if (is_bad_inode(inode) || inode->i_generation != generation) { + iput(inode); + inode = ERR_PTR(-ESTALE); + } } - ntfs_debug("Done for inode 0x%lx, generation 0x%x.", ino, gen); - return dent; + + return inode; +} + +static struct dentry *ntfs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + ntfs_nfs_get_inode); +} + +static struct dentry *ntfs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + ntfs_nfs_get_inode); } /** * Export operations allowing NFS exporting of mounted NTFS partitions. * - * We use the default ->decode_fh() and ->encode_fh() for now. Note that they + * We use the default ->encode_fh() for now. Note that they * use 32 bits to store the inode number which is an unsigned long so on 64-bit * architectures is usually 64 bits so it would all fail horribly on huge * volumes. I guess we need to define our own encode and decode fh functions @@ -515,10 +397,9 @@ static struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh) * allowing the inode number 0 which is used in NTFS for the system file $MFT * and due to using iget() whereas NTFS needs ntfs_iget(). */ -struct export_operations ntfs_export_ops = { +const struct export_operations ntfs_export_ops = { .get_parent = ntfs_get_parent, /* Find the parent of a given directory. */ - .get_dentry = ntfs_get_dentry, /* Find a dentry for the inode - given a file handle - sub-fragment. */ + .fh_to_dentry = ntfs_fh_to_dentry, + .fh_to_parent = ntfs_fh_to_parent, }; diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h index 446b5014115..d6a340bf80f 100644 --- a/fs/ntfs/ntfs.h +++ b/fs/ntfs/ntfs.h @@ -50,26 +50,26 @@ typedef enum { /* Global variables. */ /* Slab caches (from super.c). */ -extern kmem_cache_t *ntfs_name_cache; -extern kmem_cache_t *ntfs_inode_cache; -extern kmem_cache_t *ntfs_big_inode_cache; -extern kmem_cache_t *ntfs_attr_ctx_cache; -extern kmem_cache_t *ntfs_index_ctx_cache; +extern struct kmem_cache *ntfs_name_cache; +extern struct kmem_cache *ntfs_inode_cache; +extern struct kmem_cache *ntfs_big_inode_cache; +extern struct kmem_cache *ntfs_attr_ctx_cache; +extern struct kmem_cache *ntfs_index_ctx_cache; /* The various operations structs defined throughout the driver files. */ -extern struct address_space_operations ntfs_aops; -extern struct address_space_operations ntfs_mst_aops; +extern const struct address_space_operations ntfs_aops; +extern const struct address_space_operations ntfs_mst_aops; -extern struct file_operations ntfs_file_ops; -extern struct inode_operations ntfs_file_inode_ops; +extern const struct file_operations ntfs_file_ops; +extern const struct inode_operations ntfs_file_inode_ops; -extern struct file_operations ntfs_dir_ops; -extern struct inode_operations ntfs_dir_inode_ops; +extern const struct file_operations ntfs_dir_ops; +extern const struct inode_operations ntfs_dir_inode_ops; -extern struct file_operations ntfs_empty_file_ops; -extern struct inode_operations ntfs_empty_inode_ops; +extern const struct file_operations ntfs_empty_file_ops; +extern const struct inode_operations ntfs_empty_inode_ops; -extern struct export_operations ntfs_export_ops; +extern const struct export_operations ntfs_export_ops; /** * NTFS_SB - return the ntfs volume given a vfs super block @@ -91,7 +91,7 @@ extern void free_compression_buffers(void); /* From fs/ntfs/super.c */ #define default_upcase_len 0x10000 -extern struct semaphore ntfs_lock; +extern struct mutex ntfs_lock; typedef struct { int val; @@ -105,7 +105,7 @@ extern int pre_write_mst_fixup(NTFS_RECORD *b, const u32 size); extern void post_write_mst_fixup(NTFS_RECORD *b); /* From fs/ntfs/unistr.c */ -extern BOOL ntfs_are_names_equal(const ntfschar *s1, size_t s1_len, +extern bool ntfs_are_names_equal(const ntfschar *s1, size_t s1_len, const ntfschar *s2, size_t s2_len, const IGNORE_CASE_BOOL ic, const ntfschar *upcase, const u32 upcase_size); @@ -132,4 +132,33 @@ extern int ntfs_ucstonls(const ntfs_volume *vol, const ntfschar *ins, /* From fs/ntfs/upcase.c */ extern ntfschar *generate_default_upcase(void); +static inline int ntfs_ffs(int x) +{ + int r = 1; + + if (!x) + return 0; + if (!(x & 0xffff)) { + x >>= 16; + r += 16; + } + if (!(x & 0xff)) { + x >>= 8; + r += 8; + } + if (!(x & 0xf)) { + x >>= 4; + r += 4; + } + if (!(x & 3)) { + x >>= 2; + r += 2; + } + if (!(x & 1)) { + x >>= 1; + r += 1; + } + return r; +} + #endif /* _LINUX_NTFS_H */ diff --git a/fs/ntfs/quota.c b/fs/ntfs/quota.c index 833df2a4e9f..d80e3315cab 100644 --- a/fs/ntfs/quota.c +++ b/fs/ntfs/quota.c @@ -31,10 +31,10 @@ * ntfs_mark_quotas_out_of_date - mark the quotas out of date on an ntfs volume * @vol: ntfs volume on which to mark the quotas out of date * - * Mark the quotas out of date on the ntfs volume @vol and return TRUE on - * success and FALSE on error. + * Mark the quotas out of date on the ntfs volume @vol and return 'true' on + * success and 'false' on error. */ -BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol) +bool ntfs_mark_quotas_out_of_date(ntfs_volume *vol) { ntfs_index_context *ictx; QUOTA_CONTROL_ENTRY *qce; @@ -46,9 +46,9 @@ BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol) goto done; if (!vol->quota_ino || !vol->quota_q_ino) { ntfs_error(vol->sb, "Quota inodes are not open."); - return FALSE; + return false; } - down(&vol->quota_q_ino->i_sem); + mutex_lock(&vol->quota_q_ino->i_mutex); ictx = ntfs_index_ctx_get(NTFS_I(vol->quota_q_ino)); if (!ictx) { ntfs_error(vol->sb, "Failed to get index context."); @@ -98,7 +98,7 @@ BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol) ntfs_index_entry_mark_dirty(ictx); set_done: ntfs_index_ctx_put(ictx); - up(&vol->quota_q_ino->i_sem); + mutex_unlock(&vol->quota_q_ino->i_mutex); /* * We set the flag so we do not try to mark the quotas out of date * again on remount. @@ -106,12 +106,12 @@ set_done: NVolSetQuotaOutOfDate(vol); done: ntfs_debug("Done."); - return TRUE; + return true; err_out: if (ictx) ntfs_index_ctx_put(ictx); - up(&vol->quota_q_ino->i_sem); - return FALSE; + mutex_unlock(&vol->quota_q_ino->i_mutex); + return false; } #endif /* NTFS_RW */ diff --git a/fs/ntfs/quota.h b/fs/ntfs/quota.h index 40e4763aa22..4cbe5594c0b 100644 --- a/fs/ntfs/quota.h +++ b/fs/ntfs/quota.h @@ -28,7 +28,7 @@ #include "types.h" #include "volume.h" -extern BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol); +extern bool ntfs_mark_quotas_out_of_date(ntfs_volume *vol); #endif /* NTFS_RW */ diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c index 061b5ff6b73..eac7d6788a1 100644 --- a/fs/ntfs/runlist.c +++ b/fs/ntfs/runlist.c @@ -1,7 +1,7 @@ /** * runlist.c - NTFS runlist handling code. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2005 Anton Altaparmakov + * Copyright (c) 2001-2007 Anton Altaparmakov * Copyright (c) 2002-2005 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -149,10 +149,10 @@ static inline runlist_element *ntfs_rl_realloc_nofail(runlist_element *rl, * * It is up to the caller to serialize access to the runlists @dst and @src. * - * Return: TRUE Success, the runlists can be merged. - * FALSE Failure, the runlists cannot be merged. + * Return: true Success, the runlists can be merged. + * false Failure, the runlists cannot be merged. */ -static inline BOOL ntfs_are_rl_mergeable(runlist_element *dst, +static inline bool ntfs_are_rl_mergeable(runlist_element *dst, runlist_element *src) { BUG_ON(!dst); @@ -160,19 +160,19 @@ static inline BOOL ntfs_are_rl_mergeable(runlist_element *dst, /* We can merge unmapped regions even if they are misaligned. */ if ((dst->lcn == LCN_RL_NOT_MAPPED) && (src->lcn == LCN_RL_NOT_MAPPED)) - return TRUE; + return true; /* If the runs are misaligned, we cannot merge them. */ if ((dst->vcn + dst->length) != src->vcn) - return FALSE; + return false; /* If both runs are non-sparse and contiguous, we can merge them. */ if ((dst->lcn >= 0) && (src->lcn >= 0) && ((dst->lcn + dst->length) == src->lcn)) - return TRUE; + return true; /* If we are merging two holes, we can merge them. */ if ((dst->lcn == LCN_HOLE) && (src->lcn == LCN_HOLE)) - return TRUE; + return true; /* Cannot merge. */ - return FALSE; + return false; } /** @@ -218,7 +218,7 @@ static inline void __ntfs_rl_merge(runlist_element *dst, runlist_element *src) static inline runlist_element *ntfs_rl_append(runlist_element *dst, int dsize, runlist_element *src, int ssize, int loc) { - BOOL right = FALSE; /* Right end of @src needs merging. */ + bool right = false; /* Right end of @src needs merging. */ int marker; /* End of the inserted runs. */ BUG_ON(!dst); @@ -285,8 +285,8 @@ static inline runlist_element *ntfs_rl_append(runlist_element *dst, static inline runlist_element *ntfs_rl_insert(runlist_element *dst, int dsize, runlist_element *src, int ssize, int loc) { - BOOL left = FALSE; /* Left end of @src needs merging. */ - BOOL disc = FALSE; /* Discontinuity between @dst and @src. */ + bool left = false; /* Left end of @src needs merging. */ + bool disc = false; /* Discontinuity between @dst and @src. */ int marker; /* End of the inserted runs. */ BUG_ON(!dst); @@ -381,8 +381,9 @@ static inline runlist_element *ntfs_rl_insert(runlist_element *dst, static inline runlist_element *ntfs_rl_replace(runlist_element *dst, int dsize, runlist_element *src, int ssize, int loc) { - BOOL left = FALSE; /* Left end of @src needs merging. */ - BOOL right = FALSE; /* Right end of @src needs merging. */ + signed delta; + bool left = false; /* Left end of @src needs merging. */ + bool right = false; /* Right end of @src needs merging. */ int tail; /* Start of tail of @dst. */ int marker; /* End of the inserted runs. */ @@ -396,11 +397,14 @@ static inline runlist_element *ntfs_rl_replace(runlist_element *dst, left = ntfs_are_rl_mergeable(dst + loc - 1, src); /* * Allocate some space. We will need less if the left, right, or both - * ends get merged. + * ends get merged. The -1 accounts for the run being replaced. */ - dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left - right); - if (IS_ERR(dst)) - return dst; + delta = ssize - 1 - left - right; + if (delta > 0) { + dst = ntfs_rl_realloc(dst, dsize, dsize + delta); + if (IS_ERR(dst)) + return dst; + } /* * We are guaranteed to succeed from here so can start modifying the * original runlists. @@ -616,8 +620,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl, ; { - BOOL start; - BOOL finish; + bool start; + bool finish; int ds = dend + 1; /* Number of elements in drl & srl */ int ss = sfinal - sstart + 1; @@ -631,7 +635,7 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl, if (finish && !drl[dins].length) ss++; if (marker && (drl[dins].vcn + drl[dins].length > srl[send - 1].vcn)) - finish = FALSE; + finish = false; #if 0 ntfs_debug("dfinal = %i, dend = %i", dfinal, dend); ntfs_debug("sstart = %i, sfinal = %i, send = %i", sstart, sfinal, send); @@ -1130,7 +1134,7 @@ int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol, { LCN prev_lcn; int rls; - BOOL the_end = FALSE; + bool the_end = false; BUG_ON(first_vcn < 0); BUG_ON(last_vcn < -1); @@ -1164,7 +1168,7 @@ int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol, s64 s1 = last_vcn + 1; if (unlikely(rl[1].vcn > s1)) length = s1 - rl->vcn; - the_end = TRUE; + the_end = true; } delta = first_vcn - rl->vcn; /* Header byte + length. */ @@ -1200,7 +1204,7 @@ int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol, s64 s1 = last_vcn + 1; if (unlikely(rl[1].vcn > s1)) length = s1 - rl->vcn; - the_end = TRUE; + the_end = true; } /* Header byte + length. */ rls += 1 + ntfs_get_nr_significant_bytes(length); @@ -1239,7 +1243,7 @@ err_out: * write. * * This is used when building the mapping pairs array of a runlist to compress - * a given logical cluster number (lcn) or a specific run length to the minumum + * a given logical cluster number (lcn) or a specific run length to the minimum * size possible. * * Return the number of bytes written on success. On error, i.e. the @@ -1323,7 +1327,7 @@ int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst, LCN prev_lcn; s8 *dst_max, *dst_next; int err = -ENOSPC; - BOOL the_end = FALSE; + bool the_end = false; s8 len_len, lcn_len; BUG_ON(first_vcn < 0); @@ -1366,7 +1370,7 @@ int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst, s64 s1 = last_vcn + 1; if (unlikely(rl[1].vcn > s1)) length = s1 - rl->vcn; - the_end = TRUE; + the_end = true; } delta = first_vcn - rl->vcn; /* Write length. */ @@ -1418,7 +1422,7 @@ int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst, s64 s1 = last_vcn + 1; if (unlikely(rl[1].vcn > s1)) length = s1 - rl->vcn; - the_end = TRUE; + the_end = true; } /* Write length. */ len_len = ntfs_write_significant_bytes(dst + 1, dst_max, @@ -1537,7 +1541,7 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist, */ if (rl->length) { runlist_element *trl; - BOOL is_end; + bool is_end; ntfs_debug("Shrinking runlist."); /* Determine the runlist size. */ @@ -1551,11 +1555,11 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist, * If a run was partially truncated, make the following runlist * element a terminator. */ - is_end = FALSE; + is_end = false; if (rl->length) { rl++; if (!rl->length) - is_end = TRUE; + is_end = true; rl->vcn = new_length; rl->length = 0; } @@ -1644,7 +1648,7 @@ int ntfs_rl_punch_nolock(const ntfs_volume *vol, runlist *const runlist, s64 delta; runlist_element *rl, *rl_end, *rl_real_end, *trl; int old_size; - BOOL lcn_fixup = FALSE; + bool lcn_fixup = false; ntfs_debug("Entering for start 0x%llx, length 0x%llx.", (long long)start, (long long)length); @@ -1710,7 +1714,7 @@ extend_hole: sizeof(*rl)); /* Adjust the beginning of the tail if necessary. */ if (end > rl->vcn) { - s64 delta = end - rl->vcn; + delta = end - rl->vcn; rl->vcn = end; rl->length -= delta; /* Only adjust the lcn if it is real. */ @@ -1858,7 +1862,7 @@ split_end: if (rl->lcn >= 0) { rl->lcn -= delta; /* Need this in case the lcn just became negative. */ - lcn_fixup = TRUE; + lcn_fixup = true; } rl->length += delta; goto split_end; diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 453d0d51ea4..6c3296e546c 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -1,7 +1,7 @@ /* * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2005 Anton Altaparmakov + * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc. * Copyright (c) 2001,2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -19,17 +19,19 @@ * distribution in the file COPYING); if not, write to the Free Software * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/stddef.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/string.h> #include <linux/spinlock.h> -#include <linux/blkdev.h> /* For bdev_hardsect_size(). */ +#include <linux/blkdev.h> /* For bdev_logical_block_size(). */ #include <linux/backing-dev.h> #include <linux/buffer_head.h> #include <linux/vfs.h> #include <linux/moduleparam.h> -#include <linux/smp_lock.h> +#include <linux/bitmap.h> #include "sysctl.h" #include "logfile.h" @@ -38,6 +40,7 @@ #include "dir.h" #include "debug.h" #include "index.h" +#include "inode.h" #include "aops.h" #include "layout.h" #include "malloc.h" @@ -47,8 +50,8 @@ static unsigned long ntfs_nr_compression_users; /* A global default upcase table and a corresponding reference count. */ -static ntfschar *default_upcase = NULL; -static unsigned long ntfs_nr_upcase_users = 0; +static ntfschar *default_upcase; +static unsigned long ntfs_nr_upcase_users; /* Error constants/strings used in inode.c::ntfs_show_options(). */ typedef enum { @@ -73,18 +76,18 @@ const option_t on_errors_arr[] = { * * Copied from old ntfs driver (which copied from vfat driver). */ -static int simple_getbool(char *s, BOOL *setval) +static int simple_getbool(char *s, bool *setval) { if (s) { if (!strcmp(s, "1") || !strcmp(s, "yes") || !strcmp(s, "true")) - *setval = TRUE; + *setval = true; else if (!strcmp(s, "0") || !strcmp(s, "no") || !strcmp(s, "false")) - *setval = FALSE; + *setval = false; else return 0; } else - *setval = TRUE; + *setval = true; return 1; } @@ -95,14 +98,14 @@ static int simple_getbool(char *s, BOOL *setval) * * Parse the recognized options in @opt for the ntfs volume described by @vol. */ -static BOOL parse_options(ntfs_volume *vol, char *opt) +static bool parse_options(ntfs_volume *vol, char *opt) { char *p, *v, *ov; static char *utf8 = "utf8"; int errors = 0, sloppy = 0; - uid_t uid = (uid_t)-1; - gid_t gid = (gid_t)-1; - mode_t fmask = (mode_t)-1, dmask = (mode_t)-1; + kuid_t uid = INVALID_UID; + kgid_t gid = INVALID_GID; + umode_t fmask = (umode_t)-1, dmask = (umode_t)-1; int mft_zone_multiplier = -1, on_errors = -1; int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1; struct nls_table *nls_map = NULL, *old_nls; @@ -126,6 +129,30 @@ static BOOL parse_options(ntfs_volume *vol, char *opt) if (*v) \ goto needs_val; \ } +#define NTFS_GETOPT_UID(option, variable) \ + if (!strcmp(p, option)) { \ + uid_t uid_value; \ + if (!v || !*v) \ + goto needs_arg; \ + uid_value = simple_strtoul(ov = v, &v, 0); \ + if (*v) \ + goto needs_val; \ + variable = make_kuid(current_user_ns(), uid_value); \ + if (!uid_valid(variable)) \ + goto needs_val; \ + } +#define NTFS_GETOPT_GID(option, variable) \ + if (!strcmp(p, option)) { \ + gid_t gid_value; \ + if (!v || !*v) \ + goto needs_arg; \ + gid_value = simple_strtoul(ov = v, &v, 0); \ + if (*v) \ + goto needs_val; \ + variable = make_kgid(current_user_ns(), gid_value); \ + if (!gid_valid(variable)) \ + goto needs_val; \ + } #define NTFS_GETOPT_OCTAL(option, variable) \ if (!strcmp(p, option)) { \ if (!v || !*v) \ @@ -136,7 +163,7 @@ static BOOL parse_options(ntfs_volume *vol, char *opt) } #define NTFS_GETOPT_BOOL(option, variable) \ if (!strcmp(p, option)) { \ - BOOL val; \ + bool val; \ if (!simple_getbool(v, &val)) \ goto needs_bool; \ variable = val; \ @@ -163,13 +190,13 @@ static BOOL parse_options(ntfs_volume *vol, char *opt) while ((p = strsep(&opt, ","))) { if ((v = strchr(p, '='))) *v++ = 0; - NTFS_GETOPT("uid", uid) - else NTFS_GETOPT("gid", gid) + NTFS_GETOPT_UID("uid", uid) + else NTFS_GETOPT_GID("gid", gid) else NTFS_GETOPT_OCTAL("umask", fmask = dmask) else NTFS_GETOPT_OCTAL("fmask", fmask) else NTFS_GETOPT_OCTAL("dmask", dmask) else NTFS_GETOPT("mft_zone_multiplier", mft_zone_multiplier) - else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, TRUE) + else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, true) else NTFS_GETOPT_BOOL("show_sys_files", show_sys_files) else NTFS_GETOPT_BOOL("case_sensitive", case_sensitive) else NTFS_GETOPT_BOOL("disable_sparse", disable_sparse) @@ -193,25 +220,24 @@ use_utf8: if (!old_nls) { ntfs_error(vol->sb, "NLS character set " "%s not found.", v); - return FALSE; + return false; } ntfs_error(vol->sb, "NLS character set %s not " "found. Using previous one %s.", v, old_nls->charset); nls_map = old_nls; } else /* nls_map */ { - if (old_nls) - unload_nls(old_nls); + unload_nls(old_nls); } } else if (!strcmp(p, "utf8")) { - BOOL val = FALSE; + bool val = false; ntfs_warning(vol->sb, "Option utf8 is no longer " "supported, using option nls=utf8. Please " "use option nls=utf8 in the future and " "make sure utf8 is compiled either as a " "module or into the kernel."); if (!v || !*v) - val = TRUE; + val = true; else if (!simple_getbool(v, &val)) goto needs_bool; if (val) { @@ -230,7 +256,7 @@ use_utf8: } no_mount_options: if (errors && !sloppy) - return FALSE; + return false; if (sloppy) ntfs_warning(vol->sb, "Sloppy option given. Ignoring " "unrecognized mount option(s) and continuing."); @@ -239,14 +265,14 @@ no_mount_options: if (!on_errors) { ntfs_error(vol->sb, "Invalid errors option argument " "or bug in options parser."); - return FALSE; + return false; } } if (nls_map) { if (vol->nls_map && vol->nls_map != nls_map) { ntfs_error(vol->sb, "Cannot change NLS character set " "on remount."); - return FALSE; + return false; } /* else (!vol->nls_map) */ ntfs_debug("Using NLS character set %s.", nls_map->charset); vol->nls_map = nls_map; @@ -256,7 +282,7 @@ no_mount_options: if (!vol->nls_map) { ntfs_error(vol->sb, "Failed to load default " "NLS character set."); - return FALSE; + return false; } ntfs_debug("Using default NLS character set (%s).", vol->nls_map->charset); @@ -267,7 +293,7 @@ no_mount_options: mft_zone_multiplier) { ntfs_error(vol->sb, "Cannot change mft_zone_multiplier " "on remount."); - return FALSE; + return false; } if (mft_zone_multiplier < 1 || mft_zone_multiplier > 4) { ntfs_error(vol->sb, "Invalid mft_zone_multiplier. " @@ -282,13 +308,13 @@ no_mount_options: vol->on_errors = on_errors; if (!vol->on_errors || vol->on_errors == ON_ERRORS_RECOVER) vol->on_errors |= ON_ERRORS_CONTINUE; - if (uid != (uid_t)-1) + if (uid_valid(uid)) vol->uid = uid; - if (gid != (gid_t)-1) + if (gid_valid(gid)) vol->gid = gid; - if (fmask != (mode_t)-1) + if (fmask != (umode_t)-1) vol->fmask = fmask; - if (dmask != (mode_t)-1) + if (dmask != (umode_t)-1) vol->dmask = dmask; if (show_sys_files != -1) { if (show_sys_files) @@ -317,16 +343,16 @@ no_mount_options: NVolSetSparseEnabled(vol); } } - return TRUE; + return true; needs_arg: ntfs_error(vol->sb, "The %s option requires an argument.", p); - return FALSE; + return false; needs_bool: ntfs_error(vol->sb, "The %s option requires a boolean argument.", p); - return FALSE; + return false; needs_val: ntfs_error(vol->sb, "Invalid %s option argument: %s", p, ov); - return FALSE; + return false; } #ifdef NTFS_RW @@ -442,9 +468,12 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) ntfs_volume *vol = NTFS_SB(sb); ntfs_debug("Entering with remount options string: %s", opt); + + sync_filesystem(sb); + #ifndef NTFS_RW - /* For read-only compiled driver, enforce all read-only flags. */ - *flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + /* For read-only compiled driver, enforce read-only flag. */ + *flags |= MS_RDONLY; #else /* NTFS_RW */ /* * For the read-write compiled driver, if we are remounting read-write, @@ -456,7 +485,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) * the volume on boot and updates them. * * When remounting read-only, mark the volume clean if no volume errors - * have occured. + * have occurred. */ if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { static const char *es = ". Cannot remount read-write."; @@ -471,9 +500,16 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) ntfs_error(sb, "Volume is dirty and read-only%s", es); return -EROFS; } + if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) { + ntfs_error(sb, "Volume has been modified by chkdsk " + "and is read-only%s", es); + return -EROFS; + } if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) { - ntfs_error(sb, "Volume has unsupported flags set and " - "is read-only%s", es); + ntfs_error(sb, "Volume has unsupported flags set " + "(0x%x) and is read-only%s", + (unsigned)le16_to_cpu(vol->vol_flags), + es); return -EROFS; } if (ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) { @@ -527,6 +563,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) if (!parse_options(vol, opt)) return -EINVAL; + ntfs_debug("Done."); return 0; } @@ -535,16 +572,16 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) * is_boot_sector_ntfs - check whether a boot sector is a valid NTFS boot sector * @sb: Super block of the device to which @b belongs. * @b: Boot sector of device @sb to check. - * @silent: If TRUE, all output will be silenced. + * @silent: If 'true', all output will be silenced. * * is_boot_sector_ntfs() checks whether the boot sector @b is a valid NTFS boot - * sector. Returns TRUE if it is valid and FALSE if not. + * sector. Returns 'true' if it is valid and 'false' if not. * * @sb is only needed for warning/error output, i.e. it can be NULL when silent - * is TRUE. + * is 'true'. */ -static BOOL is_boot_sector_ntfs(const struct super_block *sb, - const NTFS_BOOT_SECTOR *b, const BOOL silent) +static bool is_boot_sector_ntfs(const struct super_block *sb, + const NTFS_BOOT_SECTOR *b, const bool silent) { /* * Check that checksum == sum of u32 values from b to the checksum @@ -610,11 +647,11 @@ static BOOL is_boot_sector_ntfs(const struct super_block *sb, * many BIOSes will refuse to boot from a bootsector if the magic is * incorrect, so we emit a warning. */ - if (!silent && b->end_of_sector_marker != const_cpu_to_le16(0xaa55)) + if (!silent && b->end_of_sector_marker != cpu_to_le16(0xaa55)) ntfs_warning(sb, "Invalid end of sector marker."); - return TRUE; + return true; not_ntfs: - return FALSE; + return false; } /** @@ -641,7 +678,7 @@ static struct buffer_head *read_ntfs_boot_sector(struct super_block *sb, { const char *read_err_str = "Unable to read %s boot sector."; struct buffer_head *bh_primary, *bh_backup; - long nr_blocks = NTFS_SB(sb)->nr_blocks; + sector_t nr_blocks = NTFS_SB(sb)->nr_blocks; /* Try to read primary boot sector. */ if ((bh_primary = sb_bread(sb, 0))) { @@ -688,13 +725,18 @@ hotfix_primary_boot_sector: /* * If we managed to read sector zero and the volume is not * read-only, copy the found, valid backup boot sector to the - * primary boot sector. + * primary boot sector. Note we only copy the actual boot + * sector structure, not the actual whole device sector as that + * may be bigger and would potentially damage the $Boot system + * file (FIXME: Would be nice to know if the backup boot sector + * on a large sector device contains the whole boot loader or + * just the first 512 bytes). */ if (!(sb->s_flags & MS_RDONLY)) { ntfs_warning(sb, "Hot-fix: Recovering invalid primary " "boot sector from backup copy."); memcpy(bh_primary->b_data, bh_backup->b_data, - sb->s_blocksize); + NTFS_BLOCK_SIZE); mark_buffer_dirty(bh_primary); sync_dirty_buffer(bh_primary); if (buffer_uptodate(bh_primary)) { @@ -719,9 +761,9 @@ hotfix_primary_boot_sector: * @b: boot sector to parse * * Parse the ntfs boot sector @b and store all imporant information therein in - * the ntfs super block @vol. Return TRUE on success and FALSE on error. + * the ntfs super block @vol. Return 'true' on success and 'false' on error. */ -static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b) +static bool parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b) { unsigned int sectors_per_cluster_bits, nr_hidden_sects; int clusters_per_mft_record, clusters_per_index_record; @@ -733,9 +775,13 @@ static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b) vol->sector_size); ntfs_debug("vol->sector_size_bits = %i (0x%x)", vol->sector_size_bits, vol->sector_size_bits); - if (vol->sector_size != vol->sb->s_blocksize) - ntfs_warning(vol->sb, "The boot sector indicates a sector size " - "different from the device sector size."); + if (vol->sector_size < vol->sb->s_blocksize) { + ntfs_error(vol->sb, "Sector size (%i) is smaller than the " + "device block size (%lu). This is not " + "supported. Sorry.", vol->sector_size, + vol->sb->s_blocksize); + return false; + } ntfs_debug("sectors_per_cluster = 0x%x", b->bpb.sectors_per_cluster); sectors_per_cluster_bits = ffs(b->bpb.sectors_per_cluster) - 1; ntfs_debug("sectors_per_cluster_bits = 0x%x", @@ -748,17 +794,12 @@ static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b) ntfs_debug("vol->cluster_size = %i (0x%x)", vol->cluster_size, vol->cluster_size); ntfs_debug("vol->cluster_size_mask = 0x%x", vol->cluster_size_mask); - ntfs_debug("vol->cluster_size_bits = %i (0x%x)", - vol->cluster_size_bits, vol->cluster_size_bits); - if (vol->sector_size > vol->cluster_size) { - ntfs_error(vol->sb, "Sector sizes above the cluster size are " - "not supported. Sorry."); - return FALSE; - } - if (vol->sb->s_blocksize > vol->cluster_size) { - ntfs_error(vol->sb, "Cluster sizes smaller than the device " - "sector size are not supported. Sorry."); - return FALSE; + ntfs_debug("vol->cluster_size_bits = %i", vol->cluster_size_bits); + if (vol->cluster_size < vol->sector_size) { + ntfs_error(vol->sb, "Cluster size (%i) is smaller than the " + "sector size (%i). This is not supported. " + "Sorry.", vol->cluster_size, vol->sector_size); + return false; } clusters_per_mft_record = b->clusters_per_mft_record; ntfs_debug("clusters_per_mft_record = %i (0x%x)", @@ -786,12 +827,19 @@ static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b) * we store $MFT/$DATA, the table of mft records in the page cache. */ if (vol->mft_record_size > PAGE_CACHE_SIZE) { - ntfs_error(vol->sb, "Mft record size %i (0x%x) exceeds the " - "page cache size on your system %lu (0x%lx). " + ntfs_error(vol->sb, "Mft record size (%i) exceeds the " + "PAGE_CACHE_SIZE on your system (%lu). " "This is not supported. Sorry.", - vol->mft_record_size, vol->mft_record_size, - PAGE_CACHE_SIZE, PAGE_CACHE_SIZE); - return FALSE; + vol->mft_record_size, PAGE_CACHE_SIZE); + return false; + } + /* We cannot support mft record sizes below the sector size. */ + if (vol->mft_record_size < vol->sector_size) { + ntfs_error(vol->sb, "Mft record size (%i) is smaller than the " + "sector size (%i). This is not supported. " + "Sorry.", vol->mft_record_size, + vol->sector_size); + return false; } clusters_per_index_record = b->clusters_per_index_record; ntfs_debug("clusters_per_index_record = %i (0x%x)", @@ -816,6 +864,14 @@ static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b) ntfs_debug("vol->index_record_size_bits = %i (0x%x)", vol->index_record_size_bits, vol->index_record_size_bits); + /* We cannot support index record sizes below the sector size. */ + if (vol->index_record_size < vol->sector_size) { + ntfs_error(vol->sb, "Index record size (%i) is smaller than " + "the sector size (%i). This is not " + "supported. Sorry.", vol->index_record_size, + vol->sector_size); + return false; + } /* * Get the size of the volume in clusters and check for 64-bit-ness. * Windows currently only uses 32 bits to save the clusters so we do @@ -824,7 +880,7 @@ static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b) ll = sle64_to_cpu(b->number_of_sectors) >> sectors_per_cluster_bits; if ((u64)ll >= 1ULL << 32) { ntfs_error(vol->sb, "Cannot handle 64-bit clusters. Sorry."); - return FALSE; + return false; } vol->nr_clusters = ll; ntfs_debug("vol->nr_clusters = 0x%llx", (long long)vol->nr_clusters); @@ -840,21 +896,24 @@ static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b) "Maximum supported is 2TiB. Sorry.", (unsigned long long)ll >> (40 - vol->cluster_size_bits)); - return FALSE; + return false; } } ll = sle64_to_cpu(b->mft_lcn); if (ll >= vol->nr_clusters) { - ntfs_error(vol->sb, "MFT LCN is beyond end of volume. Weird."); - return FALSE; + ntfs_error(vol->sb, "MFT LCN (%lli, 0x%llx) is beyond end of " + "volume. Weird.", (unsigned long long)ll, + (unsigned long long)ll); + return false; } vol->mft_lcn = ll; ntfs_debug("vol->mft_lcn = 0x%llx", (long long)vol->mft_lcn); ll = sle64_to_cpu(b->mftmirr_lcn); if (ll >= vol->nr_clusters) { - ntfs_error(vol->sb, "MFTMirr LCN is beyond end of volume. " - "Weird."); - return FALSE; + ntfs_error(vol->sb, "MFTMirr LCN (%lli, 0x%llx) is beyond end " + "of volume. Weird.", (unsigned long long)ll, + (unsigned long long)ll); + return false; } vol->mftmirr_lcn = ll; ntfs_debug("vol->mftmirr_lcn = 0x%llx", (long long)vol->mftmirr_lcn); @@ -877,7 +936,7 @@ static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b) vol->serial_no = le64_to_cpu(b->volume_serial_number); ntfs_debug("vol->serial_no = 0x%llx", (unsigned long long)vol->serial_no); - return TRUE; + return true; } /** @@ -970,9 +1029,9 @@ static void ntfs_setup_allocators(ntfs_volume *vol) * load_and_init_mft_mirror - load and setup the mft mirror inode for a volume * @vol: ntfs super block describing device whose mft mirror to load * - * Return TRUE on success or FALSE on error. + * Return 'true' on success or 'false' on error. */ -static BOOL load_and_init_mft_mirror(ntfs_volume *vol) +static bool load_and_init_mft_mirror(ntfs_volume *vol) { struct inode *tmp_ino; ntfs_inode *tmp_ni; @@ -984,14 +1043,15 @@ static BOOL load_and_init_mft_mirror(ntfs_volume *vol) if (!IS_ERR(tmp_ino)) iput(tmp_ino); /* Caller will display error message. */ - return FALSE; + return false; } /* * Re-initialize some specifics about $MFTMirr's inode as * ntfs_read_inode() will have set up the default ones. */ /* Set uid and gid to root. */ - tmp_ino->i_uid = tmp_ino->i_gid = 0; + tmp_ino->i_uid = GLOBAL_ROOT_UID; + tmp_ino->i_gid = GLOBAL_ROOT_GID; /* Regular file. No access for anyone. */ tmp_ino->i_mode = S_IFREG; /* No VFS initiated operations allowed for $MFTMirr. */ @@ -1011,20 +1071,20 @@ static BOOL load_and_init_mft_mirror(ntfs_volume *vol) tmp_ni->itype.index.block_size_bits = vol->mft_record_size_bits; vol->mftmirr_ino = tmp_ino; ntfs_debug("Done."); - return TRUE; + return true; } /** * check_mft_mirror - compare contents of the mft mirror with the mft * @vol: ntfs super block describing device whose mft mirror to check * - * Return TRUE on success or FALSE on error. + * Return 'true' on success or 'false' on error. * * Note, this function also results in the mft mirror runlist being completely * mapped into memory. The mft mirror write code requires this and will BUG() * should it find an unmapped runlist element. */ -static BOOL check_mft_mirror(ntfs_volume *vol) +static bool check_mft_mirror(ntfs_volume *vol) { struct super_block *sb = vol->sb; ntfs_inode *mirr_ni; @@ -1056,7 +1116,7 @@ static BOOL check_mft_mirror(ntfs_volume *vol) index); if (IS_ERR(mft_page)) { ntfs_error(sb, "Failed to read $MFT."); - return FALSE; + return false; } kmft = page_address(mft_page); /* Get the $MFTMirr page. */ @@ -1069,26 +1129,38 @@ static BOOL check_mft_mirror(ntfs_volume *vol) kmirr = page_address(mirr_page); ++index; } - /* Make sure the record is ok. */ - if (ntfs_is_baad_recordp((le32*)kmft)) { - ntfs_error(sb, "Incomplete multi sector transfer " - "detected in mft record %i.", i); + /* Do not check the record if it is not in use. */ + if (((MFT_RECORD*)kmft)->flags & MFT_RECORD_IN_USE) { + /* Make sure the record is ok. */ + if (ntfs_is_baad_recordp((le32*)kmft)) { + ntfs_error(sb, "Incomplete multi sector " + "transfer detected in mft " + "record %i.", i); mm_unmap_out: - ntfs_unmap_page(mirr_page); + ntfs_unmap_page(mirr_page); mft_unmap_out: - ntfs_unmap_page(mft_page); - return FALSE; + ntfs_unmap_page(mft_page); + return false; + } } - if (ntfs_is_baad_recordp((le32*)kmirr)) { - ntfs_error(sb, "Incomplete multi sector transfer " - "detected in mft mirror record %i.", i); - goto mm_unmap_out; + /* Do not check the mirror record if it is not in use. */ + if (((MFT_RECORD*)kmirr)->flags & MFT_RECORD_IN_USE) { + if (ntfs_is_baad_recordp((le32*)kmirr)) { + ntfs_error(sb, "Incomplete multi sector " + "transfer detected in mft " + "mirror record %i.", i); + goto mm_unmap_out; + } } /* Get the amount of data in the current record. */ bytes = le32_to_cpu(((MFT_RECORD*)kmft)->bytes_in_use); - if (!bytes || bytes > vol->mft_record_size) { + if (bytes < sizeof(MFT_RECORD_OLD) || + bytes > vol->mft_record_size || + ntfs_is_baad_recordp((le32*)kmft)) { bytes = le32_to_cpu(((MFT_RECORD*)kmirr)->bytes_in_use); - if (!bytes || bytes > vol->mft_record_size) + if (bytes < sizeof(MFT_RECORD_OLD) || + bytes > vol->mft_record_size || + ntfs_is_baad_recordp((le32*)kmirr)) bytes = vol->mft_record_size; } /* Compare the two records. */ @@ -1127,21 +1199,21 @@ mft_unmap_out: ntfs_error(sb, "$MFTMirr location mismatch. " "Run chkdsk."); up_read(&mirr_ni->runlist.lock); - return FALSE; + return false; } } while (rl2[i++].length); up_read(&mirr_ni->runlist.lock); ntfs_debug("Done."); - return TRUE; + return true; } /** * load_and_check_logfile - load and check the logfile inode for a volume * @vol: ntfs super block describing device whose logfile to load * - * Return TRUE on success or FALSE on error. + * Return 'true' on success or 'false' on error. */ -static BOOL load_and_check_logfile(ntfs_volume *vol, +static bool load_and_check_logfile(ntfs_volume *vol, RESTART_PAGE_HEADER **rp) { struct inode *tmp_ino; @@ -1152,17 +1224,17 @@ static BOOL load_and_check_logfile(ntfs_volume *vol, if (!IS_ERR(tmp_ino)) iput(tmp_ino); /* Caller will display error message. */ - return FALSE; + return false; } if (!ntfs_check_logfile(tmp_ino, rp)) { iput(tmp_ino); /* ntfs_check_logfile() will have displayed error output. */ - return FALSE; + return false; } NInoSetSparseDisabled(NTFS_I(tmp_ino)); vol->logfile_ino = tmp_ino; ntfs_debug("Done."); - return TRUE; + return true; } #define NTFS_HIBERFIL_HEADER_SIZE 4096 @@ -1195,28 +1267,27 @@ static int check_windows_hibernation_status(ntfs_volume *vol) { MFT_REF mref; struct inode *vi; - ntfs_inode *ni; struct page *page; u32 *kaddr, *kend; ntfs_name *name = NULL; int ret = 1; - static const ntfschar hiberfil[13] = { const_cpu_to_le16('h'), - const_cpu_to_le16('i'), const_cpu_to_le16('b'), - const_cpu_to_le16('e'), const_cpu_to_le16('r'), - const_cpu_to_le16('f'), const_cpu_to_le16('i'), - const_cpu_to_le16('l'), const_cpu_to_le16('.'), - const_cpu_to_le16('s'), const_cpu_to_le16('y'), - const_cpu_to_le16('s'), 0 }; + static const ntfschar hiberfil[13] = { cpu_to_le16('h'), + cpu_to_le16('i'), cpu_to_le16('b'), + cpu_to_le16('e'), cpu_to_le16('r'), + cpu_to_le16('f'), cpu_to_le16('i'), + cpu_to_le16('l'), cpu_to_le16('.'), + cpu_to_le16('s'), cpu_to_le16('y'), + cpu_to_le16('s'), 0 }; ntfs_debug("Entering."); /* * Find the inode number for the hibernation file by looking up the * filename hiberfil.sys in the root directory. */ - down(&vol->root_ino->i_sem); + mutex_lock(&vol->root_ino->i_mutex); mref = ntfs_lookup_inode_by_name(NTFS_I(vol->root_ino), hiberfil, 12, &name); - up(&vol->root_ino->i_sem); + mutex_unlock(&vol->root_ino->i_mutex); if (IS_ERR_MREF(mref)) { ret = MREF_ERR(mref); /* If the file does not exist, Windows is not hibernated. */ @@ -1225,7 +1296,7 @@ static int check_windows_hibernation_status(ntfs_volume *vol) "hibernated on the volume."); return 0; } - /* A real error occured. */ + /* A real error occurred. */ ntfs_error(vol->sb, "Failed to find inode number for " "hiberfil.sys."); return ret; @@ -1246,7 +1317,6 @@ static int check_windows_hibernation_status(ntfs_volume *vol) "is not the system volume.", i_size_read(vi)); goto iput_out; } - ni = NTFS_I(vi); page = ntfs_map_page(vi->i_mapping, 0); if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to read from hiberfil.sys."); @@ -1254,7 +1324,7 @@ static int check_windows_hibernation_status(ntfs_volume *vol) goto iput_out; } kaddr = (u32*)page_address(page); - if (*(le32*)kaddr == const_cpu_to_le32(0x72626968)/*'hibr'*/) { + if (*(le32*)kaddr == cpu_to_le32(0x72626968)/*'hibr'*/) { ntfs_debug("Magic \"hibr\" found in hiberfil.sys. Windows is " "hibernated on the volume. This is the " "system volume."); @@ -1287,30 +1357,30 @@ iput_out: * load_and_init_quota - load and setup the quota file for a volume if present * @vol: ntfs super block describing device whose quota file to load * - * Return TRUE on success or FALSE on error. If $Quota is not present, we + * Return 'true' on success or 'false' on error. If $Quota is not present, we * leave vol->quota_ino as NULL and return success. */ -static BOOL load_and_init_quota(ntfs_volume *vol) +static bool load_and_init_quota(ntfs_volume *vol) { MFT_REF mref; struct inode *tmp_ino; ntfs_name *name = NULL; - static const ntfschar Quota[7] = { const_cpu_to_le16('$'), - const_cpu_to_le16('Q'), const_cpu_to_le16('u'), - const_cpu_to_le16('o'), const_cpu_to_le16('t'), - const_cpu_to_le16('a'), 0 }; - static ntfschar Q[3] = { const_cpu_to_le16('$'), - const_cpu_to_le16('Q'), 0 }; + static const ntfschar Quota[7] = { cpu_to_le16('$'), + cpu_to_le16('Q'), cpu_to_le16('u'), + cpu_to_le16('o'), cpu_to_le16('t'), + cpu_to_le16('a'), 0 }; + static ntfschar Q[3] = { cpu_to_le16('$'), + cpu_to_le16('Q'), 0 }; ntfs_debug("Entering."); /* * Find the inode number for the quota file by looking up the filename * $Quota in the extended system files directory $Extend. */ - down(&vol->extend_ino->i_sem); + mutex_lock(&vol->extend_ino->i_mutex); mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), Quota, 6, &name); - up(&vol->extend_ino->i_sem); + mutex_unlock(&vol->extend_ino->i_mutex); if (IS_ERR_MREF(mref)) { /* * If the file does not exist, quotas are disabled and have @@ -1324,11 +1394,11 @@ static BOOL load_and_init_quota(ntfs_volume *vol) * not enabled. */ NVolSetQuotaOutOfDate(vol); - return TRUE; + return true; } - /* A real error occured. */ + /* A real error occurred. */ ntfs_error(vol->sb, "Failed to find inode number for $Quota."); - return FALSE; + return false; } /* We do not care for the type of match that was found. */ kfree(name); @@ -1338,25 +1408,25 @@ static BOOL load_and_init_quota(ntfs_volume *vol) if (!IS_ERR(tmp_ino)) iput(tmp_ino); ntfs_error(vol->sb, "Failed to load $Quota."); - return FALSE; + return false; } vol->quota_ino = tmp_ino; /* Get the $Q index allocation attribute. */ tmp_ino = ntfs_index_iget(vol->quota_ino, Q, 2); if (IS_ERR(tmp_ino)) { ntfs_error(vol->sb, "Failed to load $Quota/$Q index."); - return FALSE; + return false; } vol->quota_q_ino = tmp_ino; ntfs_debug("Done."); - return TRUE; + return true; } /** * load_and_init_usnjrnl - load and setup the transaction log if present * @vol: ntfs super block describing device whose usnjrnl file to load * - * Return TRUE on success or FALSE on error. + * Return 'true' on success or 'false' on error. * * If $UsnJrnl is not present or in the process of being disabled, we set * NVolUsnJrnlStamped() and return success. @@ -1366,7 +1436,7 @@ static BOOL load_and_init_quota(ntfs_volume *vol) * stamped and nothing has been logged since, we also set NVolUsnJrnlStamped() * and return success. */ -static BOOL load_and_init_usnjrnl(ntfs_volume *vol) +static bool load_and_init_usnjrnl(ntfs_volume *vol) { MFT_REF mref; struct inode *tmp_ino; @@ -1374,26 +1444,26 @@ static BOOL load_and_init_usnjrnl(ntfs_volume *vol) struct page *page; ntfs_name *name = NULL; USN_HEADER *uh; - static const ntfschar UsnJrnl[9] = { const_cpu_to_le16('$'), - const_cpu_to_le16('U'), const_cpu_to_le16('s'), - const_cpu_to_le16('n'), const_cpu_to_le16('J'), - const_cpu_to_le16('r'), const_cpu_to_le16('n'), - const_cpu_to_le16('l'), 0 }; - static ntfschar Max[5] = { const_cpu_to_le16('$'), - const_cpu_to_le16('M'), const_cpu_to_le16('a'), - const_cpu_to_le16('x'), 0 }; - static ntfschar J[3] = { const_cpu_to_le16('$'), - const_cpu_to_le16('J'), 0 }; + static const ntfschar UsnJrnl[9] = { cpu_to_le16('$'), + cpu_to_le16('U'), cpu_to_le16('s'), + cpu_to_le16('n'), cpu_to_le16('J'), + cpu_to_le16('r'), cpu_to_le16('n'), + cpu_to_le16('l'), 0 }; + static ntfschar Max[5] = { cpu_to_le16('$'), + cpu_to_le16('M'), cpu_to_le16('a'), + cpu_to_le16('x'), 0 }; + static ntfschar J[3] = { cpu_to_le16('$'), + cpu_to_le16('J'), 0 }; ntfs_debug("Entering."); /* * Find the inode number for the transaction log file by looking up the * filename $UsnJrnl in the extended system files directory $Extend. */ - down(&vol->extend_ino->i_sem); + mutex_lock(&vol->extend_ino->i_mutex); mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), UsnJrnl, 8, &name); - up(&vol->extend_ino->i_sem); + mutex_unlock(&vol->extend_ino->i_mutex); if (IS_ERR_MREF(mref)) { /* * If the file does not exist, transaction logging is disabled, @@ -1408,12 +1478,12 @@ not_enabled: * transaction logging is not enabled. */ NVolSetUsnJrnlStamped(vol); - return TRUE; + return true; } - /* A real error occured. */ + /* A real error occurred. */ ntfs_error(vol->sb, "Failed to find inode number for " "$UsnJrnl."); - return FALSE; + return false; } /* We do not care for the type of match that was found. */ kfree(name); @@ -1423,7 +1493,7 @@ not_enabled: if (!IS_ERR(tmp_ino)) iput(tmp_ino); ntfs_error(vol->sb, "Failed to load $UsnJrnl."); - return FALSE; + return false; } vol->usnjrnl_ino = tmp_ino; /* @@ -1441,22 +1511,22 @@ not_enabled: if (IS_ERR(tmp_ino)) { ntfs_error(vol->sb, "Failed to load $UsnJrnl/$DATA/$Max " "attribute."); - return FALSE; + return false; } vol->usnjrnl_max_ino = tmp_ino; if (unlikely(i_size_read(tmp_ino) < sizeof(USN_HEADER))) { ntfs_error(vol->sb, "Found corrupt $UsnJrnl/$DATA/$Max " "attribute (size is 0x%llx but should be at " - "least 0x%x bytes).", i_size_read(tmp_ino), + "least 0x%zx bytes).", i_size_read(tmp_ino), sizeof(USN_HEADER)); - return FALSE; + return false; } /* Get the $DATA/$J attribute. */ tmp_ino = ntfs_attr_iget(vol->usnjrnl_ino, AT_DATA, J, 2); if (IS_ERR(tmp_ino)) { ntfs_error(vol->sb, "Failed to load $UsnJrnl/$DATA/$J " "attribute."); - return FALSE; + return false; } vol->usnjrnl_j_ino = tmp_ino; /* Verify $J is non-resident and sparse. */ @@ -1464,14 +1534,14 @@ not_enabled: if (unlikely(!NInoNonResident(tmp_ni) || !NInoSparse(tmp_ni))) { ntfs_error(vol->sb, "$UsnJrnl/$DATA/$J attribute is resident " "and/or not sparse."); - return FALSE; + return false; } /* Read the USN_HEADER from $DATA/$Max. */ page = ntfs_map_page(vol->usnjrnl_max_ino->i_mapping, 0); if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to read from $UsnJrnl/$DATA/$Max " "attribute."); - return FALSE; + return false; } uh = (USN_HEADER*)page_address(page); /* Sanity check the $Max. */ @@ -1482,7 +1552,7 @@ not_enabled: (long long)sle64_to_cpu(uh->allocation_delta), (long long)sle64_to_cpu(uh->maximum_size)); ntfs_unmap_page(page); - return FALSE; + return false; } /* * If the transaction log has been stamped and nothing has been written @@ -1506,20 +1576,20 @@ not_enabled: (long long)sle64_to_cpu(uh->lowest_valid_usn), i_size_read(vol->usnjrnl_j_ino)); ntfs_unmap_page(page); - return FALSE; + return false; } ntfs_unmap_page(page); ntfs_debug("Done."); - return TRUE; + return true; } /** * load_and_init_attrdef - load the attribute definitions table for a volume * @vol: ntfs super block describing device whose attrdef to load * - * Return TRUE on success or FALSE on error. + * Return 'true' on success or 'false' on error. */ -static BOOL load_and_init_attrdef(ntfs_volume *vol) +static bool load_and_init_attrdef(ntfs_volume *vol) { loff_t i_size; struct super_block *sb = vol->sb; @@ -1565,7 +1635,7 @@ read_partial_attrdef_page: vol->attrdef_size = i_size; ntfs_debug("Read %llu bytes from $AttrDef.", i_size); iput(ino); - return TRUE; + return true; free_iput_failed: ntfs_free(vol->attrdef); vol->attrdef = NULL; @@ -1573,7 +1643,7 @@ iput_failed: iput(ino); failed: ntfs_error(sb, "Failed to initialize attribute definition table."); - return FALSE; + return false; } #endif /* NTFS_RW */ @@ -1582,9 +1652,9 @@ failed: * load_and_init_upcase - load the upcase table for an ntfs volume * @vol: ntfs super block describing device whose upcase to load * - * Return TRUE on success or FALSE on error. + * Return 'true' on success or 'false' on error. */ -static BOOL load_and_init_upcase(ntfs_volume *vol) +static bool load_and_init_upcase(ntfs_volume *vol) { loff_t i_size; struct super_block *sb = vol->sb; @@ -1635,12 +1705,12 @@ read_partial_upcase_page: ntfs_debug("Read %llu bytes from $UpCase (expected %zu bytes).", i_size, 64 * 1024 * sizeof(ntfschar)); iput(ino); - down(&ntfs_lock); + mutex_lock(&ntfs_lock); if (!default_upcase) { ntfs_debug("Using volume specified $UpCase since default is " "not present."); - up(&ntfs_lock); - return TRUE; + mutex_unlock(&ntfs_lock); + return true; } max = default_upcase_len; if (max > vol->upcase_len) @@ -1653,35 +1723,43 @@ read_partial_upcase_page: vol->upcase = default_upcase; vol->upcase_len = max; ntfs_nr_upcase_users++; - up(&ntfs_lock); + mutex_unlock(&ntfs_lock); ntfs_debug("Volume specified $UpCase matches default. Using " "default."); - return TRUE; + return true; } - up(&ntfs_lock); + mutex_unlock(&ntfs_lock); ntfs_debug("Using volume specified $UpCase since it does not match " "the default."); - return TRUE; + return true; iput_upcase_failed: iput(ino); ntfs_free(vol->upcase); vol->upcase = NULL; upcase_failed: - down(&ntfs_lock); + mutex_lock(&ntfs_lock); if (default_upcase) { vol->upcase = default_upcase; vol->upcase_len = default_upcase_len; ntfs_nr_upcase_users++; - up(&ntfs_lock); + mutex_unlock(&ntfs_lock); ntfs_error(sb, "Failed to load $UpCase from the volume. Using " "default."); - return TRUE; + return true; } - up(&ntfs_lock); + mutex_unlock(&ntfs_lock); ntfs_error(sb, "Failed to initialize upcase table."); - return FALSE; + return false; } +/* + * The lcn and mft bitmap inodes are NTFS-internal inodes with + * their own special locking rules: + */ +static struct lock_class_key + lcnbmp_runlist_lock_key, lcnbmp_mrec_lock_key, + mftbmp_runlist_lock_key, mftbmp_mrec_lock_key; + /** * load_system_files - open the system files using normal functions * @vol: ntfs super block describing device whose system files to load @@ -1689,9 +1767,9 @@ upcase_failed: * Open the system files with normal access functions and complete setting up * the ntfs super block @vol. * - * Return TRUE on success or FALSE on error. + * Return 'true' on success or 'false' on error. */ -static BOOL load_system_files(ntfs_volume *vol) +static bool load_system_files(ntfs_volume *vol) { struct super_block *sb = vol->sb; MFT_RECORD *m; @@ -1721,7 +1799,7 @@ static BOOL load_system_files(ntfs_volume *vol) es3); goto iput_mirr_err_out; } - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", !vol->mftmirr_ino ? es1 : es2, es3); } else @@ -1738,6 +1816,10 @@ static BOOL load_system_files(ntfs_volume *vol) ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute."); goto iput_mirr_err_out; } + lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->runlist.lock, + &mftbmp_runlist_lock_key); + lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->mrec_lock, + &mftbmp_mrec_lock_key); /* Read upcase table and setup @vol->upcase and @vol->upcase_len. */ if (!load_and_init_upcase(vol)) goto iput_mftbmp_err_out; @@ -1760,6 +1842,11 @@ static BOOL load_system_files(ntfs_volume *vol) iput(vol->lcnbmp_ino); goto bitmap_failed; } + lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->runlist.lock, + &lcnbmp_runlist_lock_key); + lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->mrec_lock, + &lcnbmp_mrec_lock_key); + NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino)); if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) { iput(vol->lcnbmp_ino); @@ -1810,7 +1897,7 @@ get_ctx_vol_failed: vol->minor_ver = vi->minor_ver; ntfs_attr_put_search_ctx(ctx); unmap_mft_record(NTFS_I(vol->vol_ino)); - printk(KERN_INFO "NTFS volume version %i.%i.\n", vol->major_ver, + pr_info("volume version %i.%i.\n", vol->major_ver, vol->minor_ver); if (vol->major_ver < 3 && NVolSparseEnabled(vol)) { ntfs_warning(vol->sb, "Disabling sparse support due to NTFS " @@ -1822,11 +1909,24 @@ get_ctx_vol_failed: /* Make sure that no unsupported volume flags are set. */ if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) { static const char *es1a = "Volume is dirty"; - static const char *es1b = "Volume has unsupported flags set"; - static const char *es2 = ". Run chkdsk and mount in Windows."; - const char *es1; - - es1 = vol->vol_flags & VOLUME_IS_DIRTY ? es1a : es1b; + static const char *es1b = "Volume has been modified by chkdsk"; + static const char *es1c = "Volume has unsupported flags set"; + static const char *es2a = ". Run chkdsk and mount in Windows."; + static const char *es2b = ". Mount in Windows."; + const char *es1, *es2; + + es2 = es2a; + if (vol->vol_flags & VOLUME_IS_DIRTY) + es1 = es1a; + else if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) { + es1 = es1b; + es2 = es2b; + } else { + es1 = es1c; + ntfs_warning(sb, "Unsupported volume flags 0x%x " + "encountered.", + (unsigned)le16_to_cpu(vol->vol_flags)); + } /* If a read-write mount, convert it to a read-only mount. */ if (!(sb->s_flags & MS_RDONLY)) { if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO | @@ -1837,7 +1937,7 @@ get_ctx_vol_failed: es1, es2); goto iput_vol_err_out; } - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -1874,7 +1974,7 @@ get_ctx_vol_failed: } goto iput_logfile_err_out; } - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -1919,7 +2019,7 @@ get_ctx_vol_failed: es1, es2); goto iput_root_err_out; } - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -1943,7 +2043,7 @@ get_ctx_vol_failed: goto iput_root_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; /* * Do not set NVolErrors() because ntfs_remount() might manage * to set the dirty flag in which case all would be well. @@ -1970,7 +2070,7 @@ get_ctx_vol_failed: goto iput_root_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; NVolSetErrors(vol); } #endif @@ -1989,13 +2089,13 @@ get_ctx_vol_failed: goto iput_root_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; NVolSetErrors(vol); } #endif /* NTFS_RW */ /* If on NTFS versions before 3.0, we are done. */ if (unlikely(vol->major_ver < 3)) - return TRUE; + return true; /* NTFS 3.0+ specific initialization. */ /* Get the security descriptors inode. */ vol->secure_ino = ntfs_iget(sb, FILE_Secure); @@ -2030,7 +2130,7 @@ get_ctx_vol_failed: es1, es2); goto iput_quota_err_out; } - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -2053,7 +2153,7 @@ get_ctx_vol_failed: goto iput_quota_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; NVolSetErrors(vol); } /* @@ -2074,7 +2174,7 @@ get_ctx_vol_failed: es1, es2); goto iput_usnjrnl_err_out; } - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); } else ntfs_warning(sb, "%s. Will not be able to remount " @@ -2097,11 +2197,11 @@ get_ctx_vol_failed: goto iput_usnjrnl_err_out; } ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; NVolSetErrors(vol); } #endif /* NTFS_RW */ - return TRUE; + return true; #ifdef NTFS_RW iput_usnjrnl_err_out: if (vol->usnjrnl_j_ino) @@ -2140,12 +2240,12 @@ iput_attrdef_err_out: iput_upcase_err_out: #endif /* NTFS_RW */ vol->upcase_len = 0; - down(&ntfs_lock); + mutex_lock(&ntfs_lock); if (vol->upcase == default_upcase) { ntfs_nr_upcase_users--; vol->upcase = NULL; } - up(&ntfs_lock); + mutex_unlock(&ntfs_lock); if (vol->upcase) { ntfs_free(vol->upcase); vol->upcase = NULL; @@ -2157,7 +2257,7 @@ iput_mirr_err_out: if (vol->mftmirr_ino) iput(vol->mftmirr_ino); #endif /* NTFS_RW */ - return FALSE; + return false; } /** @@ -2174,6 +2274,7 @@ static void ntfs_put_super(struct super_block *sb) ntfs_volume *vol = NTFS_SB(sb); ntfs_debug("Entering."); + #ifdef NTFS_RW /* * Commit all inodes while they are still open in case some of them @@ -2217,7 +2318,7 @@ static void ntfs_put_super(struct super_block *sb) ntfs_commit_inode(vol->mft_ino); /* - * If a read-write mount and no volume errors have occured, mark the + * If a read-write mount and no volume errors have occurred, mark the * volume clean. Also, re-commit all affected inodes. */ if (!(sb->s_flags & MS_RDONLY)) { @@ -2301,39 +2402,12 @@ static void ntfs_put_super(struct super_block *sb) vol->mftmirr_ino = NULL; } /* - * If any dirty inodes are left, throw away all mft data page cache - * pages to allow a clean umount. This should never happen any more - * due to mft.c::ntfs_mft_writepage() cleaning all the dirty pages as - * the underlying mft records are written out and cleaned. If it does, - * happen anyway, we want to know... + * We should have no dirty inodes left, due to + * mft.c::ntfs_mft_writepage() cleaning all the dirty pages as + * the underlying mft records are written out and cleaned. */ ntfs_commit_inode(vol->mft_ino); write_inode_now(vol->mft_ino, 1); - if (!list_empty(&sb->s_dirty)) { - const char *s1, *s2; - - down(&vol->mft_ino->i_sem); - truncate_inode_pages(vol->mft_ino->i_mapping, 0); - up(&vol->mft_ino->i_sem); - write_inode_now(vol->mft_ino, 1); - if (!list_empty(&sb->s_dirty)) { - static const char *_s1 = "inodes"; - static const char *_s2 = ""; - s1 = _s1; - s2 = _s2; - } else { - static const char *_s1 = "mft pages"; - static const char *_s2 = "They have been thrown " - "away. "; - s1 = _s1; - s2 = _s2; - } - ntfs_error(sb, "Dirty %s found at umount time. %sYou should " - "run chkdsk. Please email " - "linux-ntfs-dev@lists.sourceforge.net and say " - "that you saw this message. Thank you.", s1, - s2); - } #endif /* NTFS_RW */ iput(vol->mft_ino); @@ -2350,7 +2424,7 @@ static void ntfs_put_super(struct super_block *sb) * Destroy the global default upcase table if necessary. Also decrease * the number of upcase users if we are a user. */ - down(&ntfs_lock); + mutex_lock(&ntfs_lock); if (vol->upcase == default_upcase) { ntfs_nr_upcase_users--; vol->upcase = NULL; @@ -2361,18 +2435,16 @@ static void ntfs_put_super(struct super_block *sb) } if (vol->cluster_size <= 4096 && !--ntfs_nr_compression_users) free_compression_buffers(); - up(&ntfs_lock); + mutex_unlock(&ntfs_lock); if (vol->upcase) { ntfs_free(vol->upcase); vol->upcase = NULL; } - if (vol->nls_map) { - unload_nls(vol->nls_map); - vol->nls_map = NULL; - } + + unload_nls(vol->nls_map); + sb->s_fs_info = NULL; kfree(vol); - return; } /** @@ -2397,9 +2469,7 @@ static void ntfs_put_super(struct super_block *sb) static s64 get_nr_free_clusters(ntfs_volume *vol) { s64 nr_free = vol->nr_clusters; - u32 *kaddr; struct address_space *mapping = vol->lcnbmp_ino->i_mapping; - filler_t *readpage = (filler_t*)mapping->a_ops->readpage; struct page *page; pgoff_t index, max_index; @@ -2417,40 +2487,31 @@ static s64 get_nr_free_clusters(ntfs_volume *vol) ntfs_debug("Reading $Bitmap, max_index = 0x%lx, max_size = 0x%lx.", max_index, PAGE_CACHE_SIZE / 4); for (index = 0; index < max_index; index++) { - unsigned int i; + unsigned long *kaddr; + /* * Read the page from page cache, getting it from backing store * if necessary, and increment the use count. */ - page = read_cache_page(mapping, index, (filler_t*)readpage, - NULL); + page = read_mapping_page(mapping, index, NULL); /* Ignore pages which errored synchronously. */ if (IS_ERR(page)) { - ntfs_debug("Sync read_cache_page() error. Skipping " - "page (index 0x%lx).", index); - nr_free -= PAGE_CACHE_SIZE * 8; - continue; - } - wait_on_page_locked(page); - /* Ignore pages which errored asynchronously. */ - if (!PageUptodate(page)) { - ntfs_debug("Async read_cache_page() error. Skipping " + ntfs_debug("read_mapping_page() error. Skipping " "page (index 0x%lx).", index); - page_cache_release(page); nr_free -= PAGE_CACHE_SIZE * 8; continue; } - kaddr = (u32*)kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); /* - * For each 4 bytes, subtract the number of set bits. If this + * Subtract the number of set bits. If this * is the last page and it is partial we don't really care as * it just means we do a little extra work but it won't affect * the result as all out of range bytes are set to zero by * ntfs_readpage(). */ - for (i = 0; i < PAGE_CACHE_SIZE / 4; i++) - nr_free -= (s64)hweight32(kaddr[i]); - kunmap_atomic(kaddr, KM_USER0); + nr_free -= bitmap_weight(kaddr, + PAGE_CACHE_SIZE * BITS_PER_BYTE); + kunmap_atomic(kaddr); page_cache_release(page); } ntfs_debug("Finished reading $Bitmap, last index = 0x%lx.", index - 1); @@ -2461,7 +2522,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol) if (vol->nr_clusters & 63) nr_free += 64 - (vol->nr_clusters & 63); up_read(&vol->lcnbmp_lock); - /* If errors occured we may well have gone below zero, fix this. */ + /* If errors occurred we may well have gone below zero, fix this. */ if (nr_free < 0) nr_free = 0; ntfs_debug("Exiting."); @@ -2488,9 +2549,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol) static unsigned long __get_nr_free_mft_records(ntfs_volume *vol, s64 nr_free, const pgoff_t max_index) { - u32 *kaddr; struct address_space *mapping = vol->mftbmp_ino->i_mapping; - filler_t *readpage = (filler_t*)mapping->a_ops->readpage; struct page *page; pgoff_t index; @@ -2499,45 +2558,36 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol, ntfs_debug("Reading $MFT/$BITMAP, max_index = 0x%lx, max_size = " "0x%lx.", max_index, PAGE_CACHE_SIZE / 4); for (index = 0; index < max_index; index++) { - unsigned int i; + unsigned long *kaddr; + /* * Read the page from page cache, getting it from backing store * if necessary, and increment the use count. */ - page = read_cache_page(mapping, index, (filler_t*)readpage, - NULL); + page = read_mapping_page(mapping, index, NULL); /* Ignore pages which errored synchronously. */ if (IS_ERR(page)) { - ntfs_debug("Sync read_cache_page() error. Skipping " + ntfs_debug("read_mapping_page() error. Skipping " "page (index 0x%lx).", index); nr_free -= PAGE_CACHE_SIZE * 8; continue; } - wait_on_page_locked(page); - /* Ignore pages which errored asynchronously. */ - if (!PageUptodate(page)) { - ntfs_debug("Async read_cache_page() error. Skipping " - "page (index 0x%lx).", index); - page_cache_release(page); - nr_free -= PAGE_CACHE_SIZE * 8; - continue; - } - kaddr = (u32*)kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); /* - * For each 4 bytes, subtract the number of set bits. If this + * Subtract the number of set bits. If this * is the last page and it is partial we don't really care as * it just means we do a little extra work but it won't affect * the result as all out of range bytes are set to zero by * ntfs_readpage(). */ - for (i = 0; i < PAGE_CACHE_SIZE / 4; i++) - nr_free -= (s64)hweight32(kaddr[i]); - kunmap_atomic(kaddr, KM_USER0); + nr_free -= bitmap_weight(kaddr, + PAGE_CACHE_SIZE * BITS_PER_BYTE); + kunmap_atomic(kaddr); page_cache_release(page); } ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.", index - 1); - /* If errors occured we may well have gone below zero, fix this. */ + /* If errors occurred we may well have gone below zero, fix this. */ if (nr_free < 0) nr_free = 0; ntfs_debug("Exiting."); @@ -2546,10 +2596,10 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol, /** * ntfs_statfs - return information about mounted NTFS volume - * @sb: super block of mounted volume + * @dentry: dentry from mounted volume * @sfs: statfs structure in which to return the information * - * Return information about the mounted NTFS volume @sb in the statfs structure + * Return information about the mounted NTFS volume @dentry in the statfs structure * pointed to by @sfs (this is initialized with zeros before ntfs_statfs is * called). We interpret the values to be correct of the moment in time at * which we are called. Most values are variable otherwise and this isn't just @@ -2562,8 +2612,9 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol, * * Return 0 on success or -errno on error. */ -static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs) +static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs) { + struct super_block *sb = dentry->d_sb; s64 size; ntfs_volume *vol = NTFS_SB(sb); ntfs_inode *mft_ni = NTFS_I(vol->mft_ino); @@ -2623,41 +2674,28 @@ static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs) return 0; } +#ifdef NTFS_RW +static int ntfs_write_inode(struct inode *vi, struct writeback_control *wbc) +{ + return __ntfs_write_inode(vi, wbc->sync_mode == WB_SYNC_ALL); +} +#endif + /** * The complete super operations. */ -static struct super_operations ntfs_sops = { +static const struct super_operations ntfs_sops = { .alloc_inode = ntfs_alloc_big_inode, /* VFS: Allocate new inode. */ .destroy_inode = ntfs_destroy_big_inode, /* VFS: Deallocate inode. */ - .put_inode = ntfs_put_inode, /* VFS: Called just before - the inode reference count - is decreased. */ #ifdef NTFS_RW - //.dirty_inode = NULL, /* VFS: Called from - // __mark_inode_dirty(). */ .write_inode = ntfs_write_inode, /* VFS: Write dirty inode to disk. */ - //.drop_inode = NULL, /* VFS: Called just after the - // inode reference count has - // been decreased to zero. - // NOTE: The inode lock is - // held. See fs/inode.c:: - // generic_drop_inode(). */ - //.delete_inode = NULL, /* VFS: Delete inode from disk. - // Called when i_count becomes - // 0 and i_nlink is also 0. */ - //.write_super = NULL, /* Flush dirty super block to - // disk. */ - //.sync_fs = NULL, /* ? */ - //.write_super_lockfs = NULL, /* ? */ - //.unlockfs = NULL, /* ? */ #endif /* NTFS_RW */ .put_super = ntfs_put_super, /* Syscall: umount. */ .statfs = ntfs_statfs, /* Syscall: statfs */ .remount_fs = ntfs_remount, /* Syscall: mount -o remount. */ - .clear_inode = ntfs_clear_big_inode, /* VFS: Called when an inode is + .evict_inode = ntfs_evict_big_inode, /* VFS: Called when an inode is removed from memory. */ - //.umount_begin = NULL, /* Forced umount. */ .show_options = ntfs_show_options, /* Show mount options in proc. */ }; @@ -2685,11 +2723,22 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) ntfs_volume *vol; struct buffer_head *bh; struct inode *tmp_ino; - int result; + int blocksize, result; + /* + * We do a pretty difficult piece of bootstrap by reading the + * MFT (and other metadata) from disk into memory. We'll only + * release this metadata during umount, so the locking patterns + * observed during bootstrap do not count. So turn off the + * observation of locking patterns (strictly for this context + * only) while mounting NTFS. [The validator is still active + * otherwise, even for this context: it will for example record + * lock class registrations.] + */ + lockdep_off(); ntfs_debug("Entering."); #ifndef NTFS_RW - sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; + sb->s_flags |= MS_RDONLY; #endif /* ! NTFS_RW */ /* Allocate a new ntfs_volume and place it in sb->s_fs_info. */ sb->s_fs_info = kmalloc(sizeof(ntfs_volume), GFP_NOFS); @@ -2698,6 +2747,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) if (!silent) ntfs_error(sb, "Allocation of NTFS volume structure " "failed. Aborting mount..."); + lockdep_on(); return -ENOMEM; } /* Initialize ntfs_volume structure. */ @@ -2715,8 +2765,6 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) init_rwsem(&vol->mftbmp_lock); init_rwsem(&vol->lcnbmp_lock); - unlock_kernel(); - /* By default, enable sparse support. */ NVolSetSparseEnabled(vol); @@ -2724,60 +2772,85 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) if (!parse_options(vol, (char*)opt)) goto err_out_now; + /* We support sector sizes up to the PAGE_CACHE_SIZE. */ + if (bdev_logical_block_size(sb->s_bdev) > PAGE_CACHE_SIZE) { + if (!silent) + ntfs_error(sb, "Device has unsupported sector size " + "(%i). The maximum supported sector " + "size on this architecture is %lu " + "bytes.", + bdev_logical_block_size(sb->s_bdev), + PAGE_CACHE_SIZE); + goto err_out_now; + } /* - * TODO: Fail safety check. In the future we should really be able to - * cope with this being the case, but for now just bail out. + * Setup the device access block size to NTFS_BLOCK_SIZE or the hard + * sector size, whichever is bigger. */ - if (bdev_hardsect_size(sb->s_bdev) > NTFS_BLOCK_SIZE) { + blocksize = sb_min_blocksize(sb, NTFS_BLOCK_SIZE); + if (blocksize < NTFS_BLOCK_SIZE) { if (!silent) - ntfs_error(sb, "Device has unsupported hardsect_size."); + ntfs_error(sb, "Unable to set device block size."); goto err_out_now; } - - /* Setup the device access block size to NTFS_BLOCK_SIZE. */ - if (sb_set_blocksize(sb, NTFS_BLOCK_SIZE) != NTFS_BLOCK_SIZE) { + BUG_ON(blocksize != sb->s_blocksize); + ntfs_debug("Set device block size to %i bytes (block size bits %i).", + blocksize, sb->s_blocksize_bits); + /* Determine the size of the device in units of block_size bytes. */ + if (!i_size_read(sb->s_bdev->bd_inode)) { if (!silent) - ntfs_error(sb, "Unable to set block size."); + ntfs_error(sb, "Unable to determine device size."); goto err_out_now; } - - /* Get the size of the device in units of NTFS_BLOCK_SIZE bytes. */ vol->nr_blocks = i_size_read(sb->s_bdev->bd_inode) >> - NTFS_BLOCK_SIZE_BITS; - + sb->s_blocksize_bits; /* Read the boot sector and return unlocked buffer head to it. */ if (!(bh = read_ntfs_boot_sector(sb, silent))) { if (!silent) ntfs_error(sb, "Not an NTFS volume."); goto err_out_now; } - /* - * Extract the data from the boot sector and setup the ntfs super block + * Extract the data from the boot sector and setup the ntfs volume * using it. */ result = parse_ntfs_boot_sector(vol, (NTFS_BOOT_SECTOR*)bh->b_data); - - /* Initialize the cluster and mft allocators. */ - ntfs_setup_allocators(vol); - brelse(bh); - if (!result) { if (!silent) ntfs_error(sb, "Unsupported NTFS filesystem."); goto err_out_now; } - /* - * TODO: When we start coping with sector sizes different from - * NTFS_BLOCK_SIZE, we now probably need to set the blocksize of the - * device (probably to NTFS_BLOCK_SIZE). + * If the boot sector indicates a sector size bigger than the current + * device block size, switch the device block size to the sector size. + * TODO: It may be possible to support this case even when the set + * below fails, we would just be breaking up the i/o for each sector + * into multiple blocks for i/o purposes but otherwise it should just + * work. However it is safer to leave disabled until someone hits this + * error message and then we can get them to try it without the setting + * so we know for sure that it works. */ - + if (vol->sector_size > blocksize) { + blocksize = sb_set_blocksize(sb, vol->sector_size); + if (blocksize != vol->sector_size) { + if (!silent) + ntfs_error(sb, "Unable to set device block " + "size to sector size (%i).", + vol->sector_size); + goto err_out_now; + } + BUG_ON(blocksize != sb->s_blocksize); + vol->nr_blocks = i_size_read(sb->s_bdev->bd_inode) >> + sb->s_blocksize_bits; + ntfs_debug("Changed device block size to %i bytes (block size " + "bits %i) to match volume sector size.", + blocksize, sb->s_blocksize_bits); + } + /* Initialize the cluster and mft allocators. */ + ntfs_setup_allocators(vol); /* Setup remaining fields in the super block. */ sb->s_magic = NTFS_SB_MAGIC; - /* * Ntfs allows 63 bits for the file size, i.e. correct would be: * sb->s_maxbytes = ~0ULL >> 1; @@ -2787,9 +2860,8 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) * without overflowing the index or to 2^63 - 1, whichever is smaller. */ sb->s_maxbytes = MAX_LFS_FILESIZE; - + /* Ntfs measures time in 100ns intervals. */ sb->s_time_gran = 100; - /* * Now load the metadata required for the page cache and our address * space operations to function. We do this by setting up a specialised @@ -2811,7 +2883,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) ntfs_error(sb, "Failed to load essential metadata."); goto iput_tmp_ino_err_out_now; } - down(&ntfs_lock); + mutex_lock(&ntfs_lock); /* * The current mount is a compression user if the cluster size is * less than or equal 4kiB. @@ -2822,7 +2894,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) ntfs_error(NULL, "Failed to allocate buffers " "for compression engine."); ntfs_nr_compression_users--; - up(&ntfs_lock); + mutex_unlock(&ntfs_lock); goto iput_tmp_ino_err_out_now; } } @@ -2834,7 +2906,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) if (!default_upcase) default_upcase = generate_default_upcase(); ntfs_nr_upcase_users++; - up(&ntfs_lock); + mutex_unlock(&ntfs_lock); /* * From now on, ignore @silent parameter. If we fail below this line, * it will be due to a corrupt fs or a system error, so we report it. @@ -2847,19 +2919,20 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) ntfs_error(sb, "Failed to load system files."); goto unl_upcase_iput_tmp_ino_err_out_now; } - if ((sb->s_root = d_alloc_root(vol->root_ino))) { - /* We increment i_count simulating an ntfs_iget(). */ - atomic_inc(&vol->root_ino->i_count); + + /* We grab a reference, simulating an ntfs_iget(). */ + ihold(vol->root_ino); + if ((sb->s_root = d_make_root(vol->root_ino))) { ntfs_debug("Exiting, status successful."); /* Release the default upcase if it has no users. */ - down(&ntfs_lock); + mutex_lock(&ntfs_lock); if (!--ntfs_nr_upcase_users && default_upcase) { ntfs_free(default_upcase); default_upcase = NULL; } - up(&ntfs_lock); + mutex_unlock(&ntfs_lock); sb->s_export_op = &ntfs_export_ops; - lock_kernel(); + lockdep_on(); return 0; } ntfs_error(sb, "Failed to allocate root directory."); @@ -2925,12 +2998,12 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) vol->attrdef = NULL; } vol->upcase_len = 0; - down(&ntfs_lock); + mutex_lock(&ntfs_lock); if (vol->upcase == default_upcase) { ntfs_nr_upcase_users--; vol->upcase = NULL; } - up(&ntfs_lock); + mutex_unlock(&ntfs_lock); if (vol->upcase) { ntfs_free(vol->upcase); vol->upcase = NULL; @@ -2945,40 +3018,25 @@ unl_upcase_iput_tmp_ino_err_out_now: * Decrease the number of upcase users and destroy the global default * upcase table if necessary. */ - down(&ntfs_lock); + mutex_lock(&ntfs_lock); if (!--ntfs_nr_upcase_users && default_upcase) { ntfs_free(default_upcase); default_upcase = NULL; } if (vol->cluster_size <= 4096 && !--ntfs_nr_compression_users) free_compression_buffers(); - up(&ntfs_lock); + mutex_unlock(&ntfs_lock); iput_tmp_ino_err_out_now: iput(tmp_ino); if (vol->mft_ino && vol->mft_ino != tmp_ino) iput(vol->mft_ino); vol->mft_ino = NULL; - /* - * This is needed to get ntfs_clear_extent_inode() called for each - * inode we have ever called ntfs_iget()/iput() on, otherwise we A) - * leak resources and B) a subsequent mount fails automatically due to - * ntfs_iget() never calling down into our ntfs_read_locked_inode() - * method again... FIXME: Do we need to do this twice now because of - * attribute inodes? I think not, so leave as is for now... (AIA) - */ - if (invalidate_inodes(sb)) { - ntfs_error(sb, "Busy inodes left. This is most likely a NTFS " - "driver bug."); - /* Copied from fs/super.c. I just love this message. (-; */ - printk("NTFS: Busy inodes after umount. Self-destruct in 5 " - "seconds. Have a nice day...\n"); - } /* Errors at this stage are irrelevant. */ err_out_now: - lock_kernel(); sb->s_fs_info = NULL; kfree(vol); ntfs_debug("Failed, returning -EINVAL."); + lockdep_on(); return -EINVAL; } @@ -2987,46 +3045,44 @@ err_out_now: * strings of the maximum length allowed by NTFS, which is NTFS_MAX_NAME_LEN * (255) Unicode characters + a terminating NULL Unicode character. */ -kmem_cache_t *ntfs_name_cache; +struct kmem_cache *ntfs_name_cache; /* Slab caches for efficient allocation/deallocation of inodes. */ -kmem_cache_t *ntfs_inode_cache; -kmem_cache_t *ntfs_big_inode_cache; +struct kmem_cache *ntfs_inode_cache; +struct kmem_cache *ntfs_big_inode_cache; /* Init once constructor for the inode slab cache. */ -static void ntfs_big_inode_init_once(void *foo, kmem_cache_t *cachep, - unsigned long flags) +static void ntfs_big_inode_init_once(void *foo) { ntfs_inode *ni = (ntfs_inode *)foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) - inode_init_once(VFS_I(ni)); + inode_init_once(VFS_I(ni)); } /* * Slab caches to optimize allocations and deallocations of attribute search * contexts and index contexts, respectively. */ -kmem_cache_t *ntfs_attr_ctx_cache; -kmem_cache_t *ntfs_index_ctx_cache; +struct kmem_cache *ntfs_attr_ctx_cache; +struct kmem_cache *ntfs_index_ctx_cache; -/* Driver wide semaphore. */ -DECLARE_MUTEX(ntfs_lock); +/* Driver wide mutex. */ +DEFINE_MUTEX(ntfs_lock); -static struct super_block *ntfs_get_sb(struct file_system_type *fs_type, +static struct dentry *ntfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, ntfs_fill_super); } static struct file_system_type ntfs_fs_type = { .owner = THIS_MODULE, .name = "ntfs", - .get_sb = ntfs_get_sb, + .mount = ntfs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ntfs"); /* Stable names for the slab caches. */ static const char ntfs_index_ctx_cache_name[] = "ntfs_index_ctx_cache"; @@ -3040,7 +3096,7 @@ static int __init init_ntfs_fs(void) int err = 0; /* This may be ugly but it results in pretty output so who cares. (-8 */ - printk(KERN_INFO "NTFS driver " NTFS_VERSION " [Flags: R/" + pr_info("driver " NTFS_VERSION " [Flags: R/" #ifdef NTFS_RW "W" #else @@ -3058,53 +3114,49 @@ static int __init init_ntfs_fs(void) ntfs_index_ctx_cache = kmem_cache_create(ntfs_index_ctx_cache_name, sizeof(ntfs_index_context), 0 /* offset */, - SLAB_HWCACHE_ALIGN, NULL /* ctor */, NULL /* dtor */); + SLAB_HWCACHE_ALIGN, NULL /* ctor */); if (!ntfs_index_ctx_cache) { - printk(KERN_CRIT "NTFS: Failed to create %s!\n", - ntfs_index_ctx_cache_name); + pr_crit("Failed to create %s!\n", ntfs_index_ctx_cache_name); goto ictx_err_out; } ntfs_attr_ctx_cache = kmem_cache_create(ntfs_attr_ctx_cache_name, sizeof(ntfs_attr_search_ctx), 0 /* offset */, - SLAB_HWCACHE_ALIGN, NULL /* ctor */, NULL /* dtor */); + SLAB_HWCACHE_ALIGN, NULL /* ctor */); if (!ntfs_attr_ctx_cache) { - printk(KERN_CRIT "NTFS: Failed to create %s!\n", - ntfs_attr_ctx_cache_name); + pr_crit("NTFS: Failed to create %s!\n", + ntfs_attr_ctx_cache_name); goto actx_err_out; } ntfs_name_cache = kmem_cache_create(ntfs_name_cache_name, (NTFS_MAX_NAME_LEN+1) * sizeof(ntfschar), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (!ntfs_name_cache) { - printk(KERN_CRIT "NTFS: Failed to create %s!\n", - ntfs_name_cache_name); + pr_crit("Failed to create %s!\n", ntfs_name_cache_name); goto name_err_out; } ntfs_inode_cache = kmem_cache_create(ntfs_inode_cache_name, sizeof(ntfs_inode), 0, - SLAB_RECLAIM_ACCOUNT, NULL, NULL); + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); if (!ntfs_inode_cache) { - printk(KERN_CRIT "NTFS: Failed to create %s!\n", - ntfs_inode_cache_name); + pr_crit("Failed to create %s!\n", ntfs_inode_cache_name); goto inode_err_out; } ntfs_big_inode_cache = kmem_cache_create(ntfs_big_inode_cache_name, sizeof(big_ntfs_inode), 0, - SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, - ntfs_big_inode_init_once, NULL); + SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, + ntfs_big_inode_init_once); if (!ntfs_big_inode_cache) { - printk(KERN_CRIT "NTFS: Failed to create %s!\n", - ntfs_big_inode_cache_name); + pr_crit("Failed to create %s!\n", ntfs_big_inode_cache_name); goto big_inode_err_out; } /* Register the ntfs sysctls. */ err = ntfs_sysctl(1); if (err) { - printk(KERN_CRIT "NTFS: Failed to register NTFS sysctls!\n"); + pr_crit("Failed to register NTFS sysctls!\n"); goto sysctl_err_out; } @@ -3113,8 +3165,10 @@ static int __init init_ntfs_fs(void) ntfs_debug("NTFS driver registered successfully."); return 0; /* Success! */ } - printk(KERN_CRIT "NTFS: Failed to register NTFS filesystem driver!\n"); + pr_crit("Failed to register NTFS filesystem driver!\n"); + /* Unregister the ntfs sysctls. */ + ntfs_sysctl(0); sysctl_err_out: kmem_cache_destroy(ntfs_big_inode_cache); big_inode_err_out: @@ -3127,8 +3181,7 @@ actx_err_out: kmem_cache_destroy(ntfs_index_ctx_cache); ictx_err_out: if (!err) { - printk(KERN_CRIT "NTFS: Aborting NTFS filesystem driver " - "registration...\n"); + pr_crit("Aborting NTFS filesystem driver registration...\n"); err = -ENOMEM; } return err; @@ -3136,42 +3189,30 @@ ictx_err_out: static void __exit exit_ntfs_fs(void) { - int err = 0; - ntfs_debug("Unregistering NTFS driver."); unregister_filesystem(&ntfs_fs_type); - if (kmem_cache_destroy(ntfs_big_inode_cache) && (err = 1)) - printk(KERN_CRIT "NTFS: Failed to destory %s.\n", - ntfs_big_inode_cache_name); - if (kmem_cache_destroy(ntfs_inode_cache) && (err = 1)) - printk(KERN_CRIT "NTFS: Failed to destory %s.\n", - ntfs_inode_cache_name); - if (kmem_cache_destroy(ntfs_name_cache) && (err = 1)) - printk(KERN_CRIT "NTFS: Failed to destory %s.\n", - ntfs_name_cache_name); - if (kmem_cache_destroy(ntfs_attr_ctx_cache) && (err = 1)) - printk(KERN_CRIT "NTFS: Failed to destory %s.\n", - ntfs_attr_ctx_cache_name); - if (kmem_cache_destroy(ntfs_index_ctx_cache) && (err = 1)) - printk(KERN_CRIT "NTFS: Failed to destory %s.\n", - ntfs_index_ctx_cache_name); - if (err) - printk(KERN_CRIT "NTFS: This causes memory to leak! There is " - "probably a BUG in the driver! Please report " - "you saw this message to " - "linux-ntfs-dev@lists.sourceforge.net\n"); + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + kmem_cache_destroy(ntfs_big_inode_cache); + kmem_cache_destroy(ntfs_inode_cache); + kmem_cache_destroy(ntfs_name_cache); + kmem_cache_destroy(ntfs_attr_ctx_cache); + kmem_cache_destroy(ntfs_index_ctx_cache); /* Unregister the ntfs sysctls. */ ntfs_sysctl(0); } -MODULE_AUTHOR("Anton Altaparmakov <aia21@cantab.net>"); -MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2005 Anton Altaparmakov"); +MODULE_AUTHOR("Anton Altaparmakov <anton@tuxera.com>"); +MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc."); MODULE_VERSION(NTFS_VERSION); MODULE_LICENSE("GPL"); #ifdef DEBUG -module_param(debug_msgs, bool, 0); +module_param(debug_msgs, bint, 0); MODULE_PARM_DESC(debug_msgs, "Enable debug messages."); #endif diff --git a/fs/ntfs/sysctl.c b/fs/ntfs/sysctl.c index 1c23138d00b..a503156ec15 100644 --- a/fs/ntfs/sysctl.c +++ b/fs/ntfs/sysctl.c @@ -1,7 +1,7 @@ /* * sysctl.c - Code for sysctl handling in NTFS Linux kernel driver. Part of * the Linux-NTFS project. Adapted from the old NTFS driver, - * Copyright (C) 1997 Martin von Löwis, Régis Duchesne + * Copyright (C) 1997 Martin von Löwis, Régis Duchesne * * Copyright (c) 2002-2005 Anton Altaparmakov * @@ -33,24 +33,30 @@ #include "sysctl.h" #include "debug.h" -#define FS_NTFS 1 - /* Definition of the ntfs sysctl. */ -static ctl_table ntfs_sysctls[] = { - { FS_NTFS, "ntfs-debug", /* Binary and text IDs. */ - &debug_msgs,sizeof(debug_msgs), /* Data pointer and size. */ - 0644, NULL, &proc_dointvec }, /* Mode, child, proc handler. */ - { 0 } +static struct ctl_table ntfs_sysctls[] = { + { + .procname = "ntfs-debug", + .data = &debug_msgs, /* Data pointer and size. */ + .maxlen = sizeof(debug_msgs), + .mode = 0644, /* Mode, proc handler. */ + .proc_handler = proc_dointvec + }, + {} }; /* Define the parent directory /proc/sys/fs. */ -static ctl_table sysctls_root[] = { - { CTL_FS, "fs", NULL, 0, 0555, ntfs_sysctls }, - { 0 } +static struct ctl_table sysctls_root[] = { + { + .procname = "fs", + .mode = 0555, + .child = ntfs_sysctls + }, + {} }; /* Storage for the sysctls header. */ -static struct ctl_table_header *sysctls_root_table = NULL; +static struct ctl_table_header *sysctls_root_table; /** * ntfs_sysctl - add or remove the debug sysctl @@ -62,17 +68,9 @@ int ntfs_sysctl(int add) { if (add) { BUG_ON(sysctls_root_table); - sysctls_root_table = register_sysctl_table(sysctls_root, 0); + sysctls_root_table = register_sysctl_table(sysctls_root); if (!sysctls_root_table) return -ENOMEM; -#ifdef CONFIG_PROC_FS - /* - * If the proc filesystem is in use and we are a module, need - * to set the owner of our proc entry to our module. In the - * non-modular case, THIS_MODULE is NULL, so this is ok. - */ - ntfs_sysctls[0].de->owner = THIS_MODULE; -#endif } else { BUG_ON(!sysctls_root_table); unregister_sysctl_table(sysctls_root_table); diff --git a/fs/ntfs/sysctl.h b/fs/ntfs/sysctl.h index c8064cae8f1..d4f8ce920d9 100644 --- a/fs/ntfs/sysctl.h +++ b/fs/ntfs/sysctl.h @@ -1,7 +1,7 @@ /* * sysctl.h - Defines for sysctl handling in NTFS Linux kernel driver. Part of * the Linux-NTFS project. Adapted from the old NTFS driver, - * Copyright (C) 1997 Martin von Löwis, Régis Duchesne + * Copyright (C) 1997 Martin von Löwis, Régis Duchesne * * Copyright (c) 2002-2004 Anton Altaparmakov * @@ -24,7 +24,6 @@ #ifndef _LINUX_NTFS_SYSCTL_H #define _LINUX_NTFS_SYSCTL_H -#include <linux/config.h> #if defined(DEBUG) && defined(CONFIG_SYSCTL) diff --git a/fs/ntfs/types.h b/fs/ntfs/types.h index 6e4a7e3343f..8c8053b6698 100644 --- a/fs/ntfs/types.h +++ b/fs/ntfs/types.h @@ -62,11 +62,6 @@ typedef s64 USN; typedef sle64 leUSN; typedef enum { - FALSE = 0, - TRUE = 1 -} BOOL; - -typedef enum { CASE_SENSITIVE = 0, IGNORE_CASE = 1, } IGNORE_CASE_BOOL; diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c index 0ea887fc859..005ca4b0f13 100644 --- a/fs/ntfs/unistr.c +++ b/fs/ntfs/unistr.c @@ -1,7 +1,7 @@ /* * unistr.c - NTFS Unicode string handling. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2005 Anton Altaparmakov + * Copyright (c) 2001-2006 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -19,6 +19,8 @@ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/slab.h> + #include "types.h" #include "debug.h" #include "ntfs.h" @@ -59,16 +61,16 @@ static const u8 legal_ansi_char_array[0x40] = { * @upcase: upcase table (only if @ic == IGNORE_CASE) * @upcase_size: length in Unicode characters of @upcase (if present) * - * Compare the names @s1 and @s2 and return TRUE (1) if the names are - * identical, or FALSE (0) if they are not identical. If @ic is IGNORE_CASE, + * Compare the names @s1 and @s2 and return 'true' (1) if the names are + * identical, or 'false' (0) if they are not identical. If @ic is IGNORE_CASE, * the @upcase table is used to performa a case insensitive comparison. */ -BOOL ntfs_are_names_equal(const ntfschar *s1, size_t s1_len, +bool ntfs_are_names_equal(const ntfschar *s1, size_t s1_len, const ntfschar *s2, size_t s2_len, const IGNORE_CASE_BOOL ic, const ntfschar *upcase, const u32 upcase_size) { if (s1_len != s2_len) - return FALSE; + return false; if (ic == CASE_SENSITIVE) return !ntfs_ucsncmp(s1, s2, s1_len); return !ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size); @@ -242,7 +244,7 @@ int ntfs_file_compare_values(FILE_NAME_ATTR *file_name_attr1, * map dictates, into a little endian, 2-byte Unicode string. * * This function allocates the string and the caller is responsible for - * calling kmem_cache_free(ntfs_name_cache, @outs); when finished with it. + * calling kmem_cache_free(ntfs_name_cache, *@outs); when finished with it. * * On success the function returns the number of Unicode characters written to * the output string *@outs (>= 0), not counting the terminating Unicode NULL @@ -262,37 +264,48 @@ int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins, wchar_t wc; int i, o, wc_len; - /* We don't trust outside sources. */ - if (ins) { - ucs = kmem_cache_alloc(ntfs_name_cache, SLAB_NOFS); - if (ucs) { + /* We do not trust outside sources. */ + if (likely(ins)) { + ucs = kmem_cache_alloc(ntfs_name_cache, GFP_NOFS); + if (likely(ucs)) { for (i = o = 0; i < ins_len; i += wc_len) { wc_len = nls->char2uni(ins + i, ins_len - i, &wc); - if (wc_len >= 0) { - if (wc) { + if (likely(wc_len >= 0 && + o < NTFS_MAX_NAME_LEN)) { + if (likely(wc)) { ucs[o++] = cpu_to_le16(wc); continue; - } /* else (!wc) */ + } /* else if (!wc) */ break; - } /* else (wc_len < 0) */ - goto conversion_err; + } /* else if (wc_len < 0 || + o >= NTFS_MAX_NAME_LEN) */ + goto name_err; } ucs[o] = 0; *outs = ucs; return o; - } /* else (!ucs) */ - ntfs_error(vol->sb, "Failed to allocate name from " - "ntfs_name_cache!"); + } /* else if (!ucs) */ + ntfs_error(vol->sb, "Failed to allocate buffer for converted " + "name from ntfs_name_cache."); return -ENOMEM; - } /* else (!ins) */ - ntfs_error(NULL, "Received NULL pointer."); + } /* else if (!ins) */ + ntfs_error(vol->sb, "Received NULL pointer."); return -EINVAL; -conversion_err: - ntfs_error(vol->sb, "Name using character set %s contains characters " - "that cannot be converted to Unicode.", nls->charset); +name_err: kmem_cache_free(ntfs_name_cache, ucs); - return -EILSEQ; + if (wc_len < 0) { + ntfs_error(vol->sb, "Name using character set %s contains " + "characters that cannot be converted to " + "Unicode.", nls->charset); + i = -EILSEQ; + } else /* if (o >= NTFS_MAX_NAME_LEN) */ { + ntfs_error(vol->sb, "Name is too long (maximum length for a " + "name on NTFS is %d Unicode characters.", + NTFS_MAX_NAME_LEN); + i = -ENAMETOOLONG; + } + return i; } /** @@ -337,7 +350,7 @@ int ntfs_ucstonls(const ntfs_volume *vol, const ntfschar *ins, } if (!ns) { ns_len = ins_len * NLS_MAX_CHARSET_SIZE; - ns = (unsigned char*)kmalloc(ns_len + 1, GFP_NOFS); + ns = kmalloc(ns_len + 1, GFP_NOFS); if (!ns) goto mem_err_out; } @@ -352,7 +365,7 @@ retry: wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o, else if (wc == -ENAMETOOLONG && ns != *outs) { unsigned char *tc; /* Grow in multiples of 64 bytes. */ - tc = (unsigned char*)kmalloc((ns_len + 64) & + tc = kmalloc((ns_len + 64) & ~63, GFP_NOFS); if (tc) { memcpy(tc, ns, ns_len); diff --git a/fs/ntfs/upcase.c b/fs/ntfs/upcase.c index 879cdf1d5bd..e2f72ca9803 100644 --- a/fs/ntfs/upcase.c +++ b/fs/ntfs/upcase.c @@ -3,10 +3,7 @@ * Part of the Linux-NTFS project. * * Copyright (c) 2001 Richard Russon <ntfs@flatcap.org> - * Copyright (c) 2001-2004 Anton Altaparmakov - * - * Modified for mkntfs inclusion 9 June 2001 by Anton Altaparmakov. - * Modified for kernel inclusion 10 September 2001 by Anton Altparmakov. + * Copyright (c) 2001-2006 Anton Altaparmakov * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -75,15 +72,15 @@ ntfschar *generate_default_upcase(void) if (!uc) return uc; memset(uc, 0, default_upcase_len * sizeof(ntfschar)); + /* Generate the little endian Unicode upcase table used by ntfs. */ for (i = 0; i < default_upcase_len; i++) uc[i] = cpu_to_le16(i); for (r = 0; uc_run_table[r][0]; r++) for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++) - uc[i] = cpu_to_le16((le16_to_cpu(uc[i]) + - uc_run_table[r][2])); + le16_add_cpu(&uc[i], uc_run_table[r][2]); for (r = 0; uc_dup_table[r][0]; r++) for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2) - uc[i + 1] = cpu_to_le16(le16_to_cpu(uc[i + 1]) - 1); + le16_add_cpu(&uc[i + 1], -1); for (r = 0; uc_word_table[r][0]; r++) uc[uc_word_table[r][0]] = cpu_to_le16(uc_word_table[r][1]); return uc; diff --git a/fs/ntfs/usnjrnl.c b/fs/ntfs/usnjrnl.c index 77773240d13..b2bc0d55b03 100644 --- a/fs/ntfs/usnjrnl.c +++ b/fs/ntfs/usnjrnl.c @@ -39,12 +39,12 @@ * @vol: ntfs volume on which to stamp the transaction log * * Stamp the transaction log ($UsnJrnl) on the ntfs volume @vol and return - * TRUE on success and FALSE on error. + * 'true' on success and 'false' on error. * * This function assumes that the transaction log has already been loaded and * consistency checked by a call to fs/ntfs/super.c::load_and_init_usnjrnl(). */ -BOOL ntfs_stamp_usnjrnl(ntfs_volume *vol) +bool ntfs_stamp_usnjrnl(ntfs_volume *vol) { ntfs_debug("Entering."); if (likely(!NVolUsnJrnlStamped(vol))) { @@ -56,7 +56,7 @@ BOOL ntfs_stamp_usnjrnl(ntfs_volume *vol) if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to read from " "$UsnJrnl/$DATA/$Max attribute."); - return FALSE; + return false; } uh = (USN_HEADER*)page_address(page); stamp = get_current_ntfs_time(); @@ -78,7 +78,7 @@ BOOL ntfs_stamp_usnjrnl(ntfs_volume *vol) NVolSetUsnJrnlStamped(vol); } ntfs_debug("Done."); - return TRUE; + return true; } #endif /* NTFS_RW */ diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h index ff988b0deb4..00d8e6bd7c3 100644 --- a/fs/ntfs/usnjrnl.h +++ b/fs/ntfs/usnjrnl.h @@ -113,30 +113,30 @@ typedef struct { * Reason flags (32-bit). Cumulative flags describing the change(s) to the * file since it was last opened. I think the names speak for themselves but * if you disagree check out the descriptions in the Linux NTFS project NTFS - * documentation: http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html + * documentation: http://www.linux-ntfs.org/ */ enum { - USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001), - USN_REASON_DATA_EXTEND = const_cpu_to_le32(0x00000002), - USN_REASON_DATA_TRUNCATION = const_cpu_to_le32(0x00000004), - USN_REASON_NAMED_DATA_OVERWRITE = const_cpu_to_le32(0x00000010), - USN_REASON_NAMED_DATA_EXTEND = const_cpu_to_le32(0x00000020), - USN_REASON_NAMED_DATA_TRUNCATION= const_cpu_to_le32(0x00000040), - USN_REASON_FILE_CREATE = const_cpu_to_le32(0x00000100), - USN_REASON_FILE_DELETE = const_cpu_to_le32(0x00000200), - USN_REASON_EA_CHANGE = const_cpu_to_le32(0x00000400), - USN_REASON_SECURITY_CHANGE = const_cpu_to_le32(0x00000800), - USN_REASON_RENAME_OLD_NAME = const_cpu_to_le32(0x00001000), - USN_REASON_RENAME_NEW_NAME = const_cpu_to_le32(0x00002000), - USN_REASON_INDEXABLE_CHANGE = const_cpu_to_le32(0x00004000), - USN_REASON_BASIC_INFO_CHANGE = const_cpu_to_le32(0x00008000), - USN_REASON_HARD_LINK_CHANGE = const_cpu_to_le32(0x00010000), - USN_REASON_COMPRESSION_CHANGE = const_cpu_to_le32(0x00020000), - USN_REASON_ENCRYPTION_CHANGE = const_cpu_to_le32(0x00040000), - USN_REASON_OBJECT_ID_CHANGE = const_cpu_to_le32(0x00080000), - USN_REASON_REPARSE_POINT_CHANGE = const_cpu_to_le32(0x00100000), - USN_REASON_STREAM_CHANGE = const_cpu_to_le32(0x00200000), - USN_REASON_CLOSE = const_cpu_to_le32(0x80000000), + USN_REASON_DATA_OVERWRITE = cpu_to_le32(0x00000001), + USN_REASON_DATA_EXTEND = cpu_to_le32(0x00000002), + USN_REASON_DATA_TRUNCATION = cpu_to_le32(0x00000004), + USN_REASON_NAMED_DATA_OVERWRITE = cpu_to_le32(0x00000010), + USN_REASON_NAMED_DATA_EXTEND = cpu_to_le32(0x00000020), + USN_REASON_NAMED_DATA_TRUNCATION= cpu_to_le32(0x00000040), + USN_REASON_FILE_CREATE = cpu_to_le32(0x00000100), + USN_REASON_FILE_DELETE = cpu_to_le32(0x00000200), + USN_REASON_EA_CHANGE = cpu_to_le32(0x00000400), + USN_REASON_SECURITY_CHANGE = cpu_to_le32(0x00000800), + USN_REASON_RENAME_OLD_NAME = cpu_to_le32(0x00001000), + USN_REASON_RENAME_NEW_NAME = cpu_to_le32(0x00002000), + USN_REASON_INDEXABLE_CHANGE = cpu_to_le32(0x00004000), + USN_REASON_BASIC_INFO_CHANGE = cpu_to_le32(0x00008000), + USN_REASON_HARD_LINK_CHANGE = cpu_to_le32(0x00010000), + USN_REASON_COMPRESSION_CHANGE = cpu_to_le32(0x00020000), + USN_REASON_ENCRYPTION_CHANGE = cpu_to_le32(0x00040000), + USN_REASON_OBJECT_ID_CHANGE = cpu_to_le32(0x00080000), + USN_REASON_REPARSE_POINT_CHANGE = cpu_to_le32(0x00100000), + USN_REASON_STREAM_CHANGE = cpu_to_le32(0x00200000), + USN_REASON_CLOSE = cpu_to_le32(0x80000000), }; typedef le32 USN_REASON_FLAGS; @@ -145,12 +145,12 @@ typedef le32 USN_REASON_FLAGS; * Source info flags (32-bit). Information about the source of the change(s) * to the file. For detailed descriptions of what these mean, see the Linux * NTFS project NTFS documentation: - * http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html + * http://www.linux-ntfs.org/ */ enum { - USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001), - USN_SOURCE_AUXILIARY_DATA = const_cpu_to_le32(0x00000002), - USN_SOURCE_REPLICATION_MANAGEMENT = const_cpu_to_le32(0x00000004), + USN_SOURCE_DATA_MANAGEMENT = cpu_to_le32(0x00000001), + USN_SOURCE_AUXILIARY_DATA = cpu_to_le32(0x00000002), + USN_SOURCE_REPLICATION_MANAGEMENT = cpu_to_le32(0x00000004), }; typedef le32 USN_SOURCE_INFO_FLAGS; @@ -198,7 +198,7 @@ typedef struct { /* sizeof() = 60 (0x3c) bytes */ } __attribute__ ((__packed__)) USN_RECORD; -extern BOOL ntfs_stamp_usnjrnl(ntfs_volume *vol); +extern bool ntfs_stamp_usnjrnl(ntfs_volume *vol); #endif /* NTFS_RW */ diff --git a/fs/ntfs/volume.h b/fs/ntfs/volume.h index 375cd20a9f6..4f579b02bc7 100644 --- a/fs/ntfs/volume.h +++ b/fs/ntfs/volume.h @@ -2,7 +2,7 @@ * volume.h - Defines for volume structures in NTFS Linux kernel driver. Part * of the Linux-NTFS project. * - * Copyright (c) 2001-2005 Anton Altaparmakov + * Copyright (c) 2001-2006 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -25,6 +25,7 @@ #define _LINUX_NTFS_VOLUME_H #include <linux/rwsem.h> +#include <linux/uidgid.h> #include "types.h" #include "layout.h" @@ -41,17 +42,15 @@ typedef struct { * structure has stabilized... (AIA) */ /* Device specifics. */ - struct super_block *sb; /* Pointer back to the super_block, - so we don't have to get the offset - every time. */ - LCN nr_blocks; /* Number of NTFS_BLOCK_SIZE bytes + struct super_block *sb; /* Pointer back to the super_block. */ + LCN nr_blocks; /* Number of sb->s_blocksize bytes sized blocks on the device. */ /* Configuration provided by user at mount time. */ unsigned long flags; /* Miscellaneous flags, see below. */ - uid_t uid; /* uid that files will be mounted as. */ - gid_t gid; /* gid that files will be mounted as. */ - mode_t fmask; /* The mask for file permissions. */ - mode_t dmask; /* The mask for directory + kuid_t uid; /* uid that files will be mounted as. */ + kgid_t gid; /* gid that files will be mounted as. */ + umode_t fmask; /* The mask for file permissions. */ + umode_t dmask; /* The mask for directory permissions. */ u8 mft_zone_multiplier; /* Initial mft zone multiplier. */ u8 on_errors; /* What to do on filesystem errors. */ @@ -141,8 +140,8 @@ typedef enum { NV_ShowSystemFiles, /* 1: Return system files in ntfs_readdir(). */ NV_CaseSensitive, /* 1: Treat file names as case sensitive and create filenames in the POSIX namespace. - Otherwise be case insensitive and create - file names in WIN32 namespace. */ + Otherwise be case insensitive but still + create file names in POSIX namespace. */ NV_LogFileEmpty, /* 1: $LogFile journal is empty. */ NV_QuotaOutOfDate, /* 1: $Quota is out of date. */ NV_UsnJrnlStamped, /* 1: $UsnJrnl has been stamped. */ @@ -153,7 +152,7 @@ typedef enum { * Macro tricks to expand the NVolFoo(), NVolSetFoo(), and NVolClearFoo() * functions. */ -#define NVOL_FNS(flag) \ +#define DEFINE_NVOL_BIT_OPS(flag) \ static inline int NVol##flag(ntfs_volume *vol) \ { \ return test_bit(NV_##flag, &(vol)->flags); \ @@ -168,12 +167,12 @@ static inline void NVolClear##flag(ntfs_volume *vol) \ } /* Emit the ntfs volume bitops functions. */ -NVOL_FNS(Errors) -NVOL_FNS(ShowSystemFiles) -NVOL_FNS(CaseSensitive) -NVOL_FNS(LogFileEmpty) -NVOL_FNS(QuotaOutOfDate) -NVOL_FNS(UsnJrnlStamped) -NVOL_FNS(SparseEnabled) +DEFINE_NVOL_BIT_OPS(Errors) +DEFINE_NVOL_BIT_OPS(ShowSystemFiles) +DEFINE_NVOL_BIT_OPS(CaseSensitive) +DEFINE_NVOL_BIT_OPS(LogFileEmpty) +DEFINE_NVOL_BIT_OPS(QuotaOutOfDate) +DEFINE_NVOL_BIT_OPS(UsnJrnlStamped) +DEFINE_NVOL_BIT_OPS(SparseEnabled) #endif /* _LINUX_NTFS_VOLUME_H */ |
