aboutsummaryrefslogtreecommitdiff
path: root/fs/ntfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ntfs')
-rw-r--r--fs/ntfs/ChangeLog1496
-rw-r--r--fs/ntfs/Kconfig78
-rw-r--r--fs/ntfs/Makefile19
-rw-r--r--fs/ntfs/aops.c1325
-rw-r--r--fs/ntfs/aops.h8
-rw-r--r--fs/ntfs/attrib.c1192
-rw-r--r--fs/ntfs/attrib.h20
-rw-r--r--fs/ntfs/bitmap.c13
-rw-r--r--fs/ntfs/bitmap.h4
-rw-r--r--fs/ntfs/collate.h8
-rw-r--r--fs/ntfs/compress.c38
-rw-r--r--fs/ntfs/debug.c58
-rw-r--r--fs/ntfs/debug.h32
-rw-r--r--fs/ntfs/dir.c166
-rw-r--r--fs/ntfs/file.c2185
-rw-r--r--fs/ntfs/index.c15
-rw-r--r--fs/ntfs/index.h12
-rw-r--r--fs/ntfs/inode.c1097
-rw-r--r--fs/ntfs/inode.h31
-rw-r--r--fs/ntfs/layout.h431
-rw-r--r--fs/ntfs/lcnalloc.c100
-rw-r--r--fs/ntfs/lcnalloc.h69
-rw-r--r--fs/ntfs/logfile.c523
-rw-r--r--fs/ntfs/logfile.h18
-rw-r--r--fs/ntfs/malloc.h54
-rw-r--r--fs/ntfs/mft.c219
-rw-r--r--fs/ntfs/mft.h7
-rw-r--r--fs/ntfs/namei.c207
-rw-r--r--fs/ntfs/ntfs.h61
-rw-r--r--fs/ntfs/quota.c18
-rw-r--r--fs/ntfs/quota.h2
-rw-r--r--fs/ntfs/runlist.c589
-rw-r--r--fs/ntfs/runlist.h3
-rw-r--r--fs/ntfs/super.c933
-rw-r--r--fs/ntfs/sysctl.c40
-rw-r--r--fs/ntfs/sysctl.h3
-rw-r--r--fs/ntfs/types.h5
-rw-r--r--fs/ntfs/unistr.c68
-rw-r--r--fs/ntfs/upcase.c11
-rw-r--r--fs/ntfs/usnjrnl.c8
-rw-r--r--fs/ntfs/usnjrnl.h54
-rw-r--r--fs/ntfs/volume.h37
42 files changed, 6577 insertions, 4680 deletions
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
deleted file mode 100644
index 9eecc9939df..00000000000
--- a/fs/ntfs/ChangeLog
+++ /dev/null
@@ -1,1496 +0,0 @@
-ToDo/Notes:
- - Find and fix bugs.
- - In between ntfs_prepare/commit_write, need exclusion between
- simultaneous file extensions. This is given to us by holding i_sem
- on the inode. The only places in the kernel when a file is resized
- are prepare/commit write and truncate for both of which i_sem is
- held. Just have to be careful in readpage/writepage and all other
- helpers not running under i_sem that we play nice...
- Also need to be careful with initialized_size extention in
- ntfs_prepare_write. Basically, just be _very_ careful in this code...
- UPDATE: The only things that need to be checked are read/writepage
- which do not hold i_sem. Note writepage cannot change i_size but it
- needs to cope with a concurrent i_size change, just like readpage.
- Also both need to cope with concurrent changes to the other sizes,
- i.e. initialized/allocated/compressed size, as well.
- - Implement mft.c::sync_mft_mirror_umount(). We currently will just
- leave the volume dirty on umount if the final iput(vol->mft_ino)
- causes a write of any mirrored mft records due to the mft mirror
- inode having been discarded already. Whether this can actually ever
- happen is unclear however so it is worth waiting until someone hits
- the problem.
- - Enable the code for setting the NT4 compatibility flag when we start
- making NTFS 1.2 specific modifications.
-
-2.1.23 - Implement extension of resident files and make writing safe as well as
- many bug fixes, cleanups, and enhancements...
-
- - Add printk rate limiting for ntfs_warning() and ntfs_error() when
- compiled without debug. This avoids a possible denial of service
- attack. Thanks to Carl-Daniel Hailfinger from SuSE for pointing this
- out.
- - Fix compilation warnings on ia64. (Randy Dunlap)
- - Use i_size_{read,write}() instead of reading i_size by hand and cache
- the value where apropriate.
- - Add size_lock to the ntfs_inode structure. This is an rw spinlock
- and it locks against access to the inode sizes. Note, ->size_lock
- is also accessed from irq context so you must use the _irqsave and
- _irqrestore lock and unlock functions, respectively. Protect all
- accesses to allocated_size, initialized_size, and compressed_size.
- - Minor optimization to fs/ntfs/super.c::ntfs_statfs() and its helpers.
- - Implement extension of resident files in the regular file write code
- paths (fs/ntfs/aops.c::ntfs_{prepare,commit}_write()). At present
- this only works until the data attribute becomes too big for the mft
- record after which we abort the write returning -EOPNOTSUPP from
- ntfs_prepare_write().
- - Add disable_sparse mount option together with a per volume sparse
- enable bit which is set appropriately and a per inode sparse disable
- bit which is preset on some system file inodes as appropriate.
- - Enforce that sparse support is disabled on NTFS volumes pre 3.0.
- - Fix a bug in fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress() in
- the creation of the unmapped runlist element for the base attribute
- extent.
- - Split ntfs_map_runlist() into ntfs_map_runlist() and a non-locking
- helper ntfs_map_runlist_nolock() which is used by ntfs_map_runlist().
- This allows us to map runlist fragments with the runlist lock already
- held without having to drop and reacquire it around the call. Adapt
- all callers.
- - Change ntfs_find_vcn() to ntfs_find_vcn_nolock() which takes a locked
- runlist. This allows us to find runlist elements with the runlist
- lock already held without having to drop and reacquire it around the
- call. Adapt all callers.
- - Change time to u64 in time.h::ntfs2utc() as it otherwise generates a
- warning in the do_div() call on sparc32. Thanks to Meelis Roos for
- the report and analysis of the warning.
- - Fix a nasty runlist merge bug when merging two holes.
- - Set the ntfs_inode->allocated_size to the real allocated size in the
- mft record for resident attributes (fs/ntfs/inode.c).
- - Small readability cleanup to use "a" instead of "ctx->attr"
- everywhere (fs/ntfs/inode.c).
- - Make fs/ntfs/namei.c::ntfs_get_{parent,dentry} static and move the
- definition of ntfs_export_ops from fs/ntfs/super.c to namei.c. Also,
- declare ntfs_export_ops in fs/ntfs/ntfs.h.
- - Correct sparse file handling. The compressed values need to be
- checked and set in the ntfs inode as done for compressed files and
- the compressed size needs to be used for vfs inode->i_blocks instead
- of the allocated size, again, as done for compressed files.
- - Add AT_EA in addition to AT_DATA to whitelist for being allowed to be
- non-resident in fs/ntfs/attrib.c::ntfs_attr_can_be_non_resident().
- - Add fs/ntfs/attrib.c::ntfs_attr_vcn_to_lcn_nolock() used by the new
- write code.
- - Fix bug in fs/ntfs/attrib.c::ntfs_find_vcn_nolock() where after
- dropping the read lock and taking the write lock we were not checking
- whether someone else did not already do the work we wanted to do.
- - Rename fs/ntfs/attrib.c::ntfs_find_vcn_nolock() to
- ntfs_attr_find_vcn_nolock() and update all callers.
- - Add fs/ntfs/attrib.[hc]::ntfs_attr_make_non_resident().
- - Fix sign of various error return values to be negative in
- fs/ntfs/lcnalloc.c.
- - Modify ->readpage and ->writepage (fs/ntfs/aops.c) so they detect and
- handle the case where an attribute is converted from resident to
- non-resident by a concurrent file write.
- - Remove checks for NULL before calling kfree() since kfree() does the
- checking itself. (Jesper Juhl)
- - Some utilities modify the boot sector but do not update the checksum.
- Thus, relax the checking in fs/ntfs/super.c::is_boot_sector_ntfs() to
- only emit a warning when the checksum is incorrect rather than
- refusing the mount. Thanks to Bernd Casimir for pointing this
- problem out.
- - Update attribute definition handling.
- - Add NTFS_MAX_CLUSTER_SIZE and NTFS_MAX_PAGES_PER_CLUSTER constants.
- - Use NTFS_MAX_CLUSTER_SIZE in super.c instead of hard coding 0x10000.
- - Use MAX_BUF_PER_PAGE instead of variable sized array allocation for
- better code generation and one less sparse warning in fs/ntfs/aops.c.
- - Remove spurious void pointer casts from fs/ntfs/. (Pekka Enberg)
- - Use C99 style structure initialization after memory allocation where
- possible (fs/ntfs/{attrib.c,index.c,super.c}). Thanks to Al Viro and
- Pekka Enberg.
- - Stamp the transaction log ($UsnJrnl), aka user space journal, if it
- is active on the volume and we are mounting read-write or remounting
- from read-only to read-write.
- - Fix a bug in address space operations error recovery code paths where
- if the runlist was not mapped at all and a mapping error occured we
- would leave the runlist locked on exit to the function so that the
- next access to the same file would try to take the lock and deadlock.
- - Detect the case when Windows has been suspended to disk on the volume
- to be mounted and if this is the case do not allow (re)mounting
- read-write. This is done by parsing hiberfil.sys if present.
- - Fix several occurences of a bug where we would perform 'var & ~const'
- with a 64-bit variable and a int, i.e. 32-bit, constant. This causes
- the higher order 32-bits of the 64-bit variable to be zeroed. To fix
- this cast the 'const' to the same 64-bit type as 'var'.
- - Change the runlist terminator of the newly allocated cluster(s) to
- LCN_ENOENT in ntfs_attr_make_non_resident(). Otherwise the runlist
- code gets confused.
- - Add an extra parameter @last_vcn to ntfs_get_size_for_mapping_pairs()
- and ntfs_mapping_pairs_build() to allow the runlist encoding to be
- partial which is desirable when filling holes in sparse attributes.
- Update all callers.
- - Change ntfs_map_runlist_nolock() to only decompress the mapping pairs
- if the requested vcn is inside it. Otherwise we get into problems
- when we try to map an out of bounds vcn because we then try to map
- the already mapped runlist fragment which causes
- ntfs_mapping_pairs_decompress() to fail and return error. Update
- ntfs_attr_find_vcn_nolock() accordingly.
- - Fix a nasty deadlock that appeared in recent kernels.
- The situation: VFS inode X on a mounted ntfs volume is dirty. For
- same inode X, the ntfs_inode is dirty and thus corresponding on-disk
- inode, i.e. mft record, which is in a dirty PAGE_CACHE_PAGE belonging
- to the table of inodes, i.e. $MFT, inode 0.
- What happens:
- Process 1: sys_sync()/umount()/whatever... calls
- __sync_single_inode() for $MFT -> do_writepages() -> write_page for
- the dirty page containing the on-disk inode X, the page is now locked
- -> ntfs_write_mst_block() which clears PageUptodate() on the page to
- prevent anyone else getting hold of it whilst it does the write out.
- This is necessary as the on-disk inode needs "fixups" applied before
- the write to disk which are removed again after the write and
- PageUptodate is then set again. It then analyses the page looking
- for dirty on-disk inodes and when it finds one it calls
- ntfs_may_write_mft_record() to see if it is safe to write this
- on-disk inode. This then calls ilookup5() to check if the
- corresponding VFS inode is in icache(). This in turn calls ifind()
- which waits on the inode lock via wait_on_inode whilst holding the
- global inode_lock.
- Process 2: pdflush results in a call to __sync_single_inode for the
- same VFS inode X on the ntfs volume. This locks the inode (I_LOCK)
- then calls write-inode -> ntfs_write_inode -> map_mft_record() ->
- read_cache_page() for the page (in page cache of table of inodes
- $MFT, inode 0) containing the on-disk inode. This page has
- PageUptodate() clear because of Process 1 (see above) so
- read_cache_page() blocks when it tries to take the page lock for the
- page so it can call ntfs_read_page().
- Thus Process 1 is holding the page lock on the page containing the
- on-disk inode X and it is waiting on the inode X to be unlocked in
- ifind() so it can write the page out and then unlock the page.
- And Process 2 is holding the inode lock on inode X and is waiting for
- the page to be unlocked so it can call ntfs_readpage() or discover
- that Process 1 set PageUptodate() again and use the page.
- Thus we have a deadlock due to ifind() waiting on the inode lock.
- The solution: The fix is to use the newly introduced
- ilookup5_nowait() which does not wait on the inode's lock and hence
- avoids the deadlock. This is safe as we do not care about the VFS
- inode and only use the fact that it is in the VFS inode cache and the
- fact that the vfs and ntfs inodes are one struct in memory to find
- the ntfs inode in memory if present. Also, the ntfs inode has its
- own locking so it does not matter if the vfs inode is locked.
- - Fix bug in mft record writing where we forgot to set the device in
- the buffers when mapping them after the VM had discarded them.
- Thanks to Martin MOKREJÅ  for the bug report.
-
-2.1.22 - Many bug and race fixes and error handling improvements.
-
- - Improve error handling in fs/ntfs/inode.c::ntfs_truncate().
- - Change fs/ntfs/inode.c::ntfs_truncate() to return an error code
- instead of void and provide a helper ntfs_truncate_vfs() for the
- vfs ->truncate method.
- - Add a new ntfs inode flag NInoTruncateFailed() and modify
- fs/ntfs/inode.c::ntfs_truncate() to set and clear it appropriately.
- - Fix min_size and max_size definitions in ATTR_DEF structure in
- fs/ntfs/layout.h to be signed.
- - Add attribute definition handling helpers to fs/ntfs/attrib.[hc]:
- ntfs_attr_size_bounds_check(), ntfs_attr_can_be_non_resident(), and
- ntfs_attr_can_be_resident(), which in turn use the new private helper
- ntfs_attr_find_in_attrdef().
- - In fs/ntfs/aops.c::mark_ntfs_record_dirty(), take the
- mapping->private_lock around the dirtying of the buffer heads
- analagous to the way it is done in __set_page_dirty_buffers().
- - Ensure the mft record size does not exceed the PAGE_CACHE_SIZE at
- mount time as this cannot work with the current implementation.
- - Check for location of attribute name and improve error handling in
- general in fs/ntfs/inode.c::ntfs_read_locked_inode() and friends.
- - In fs/ntfs/aops.c::ntfs_writepage(), if the page is fully outside
- i_size, i.e. race with truncate, invalidate the buffers on the page
- so that they become freeable and hence the page does not leak.
- - Remove unused function fs/ntfs/runlist.c::ntfs_rl_merge(). (Adrian
- Bunk)
- - Fix stupid bug in fs/ntfs/attrib.c::ntfs_attr_find() that resulted in
- a NULL pointer dereference in the error code path when a corrupt
- attribute was found. (Thanks to Domen Puncer for the bug report.)
- - Add MODULE_VERSION() to fs/ntfs/super.c.
- - Make several functions and variables static. (Adrian Bunk)
- - Modify fs/ntfs/aops.c::mark_ntfs_record_dirty() so it allocates
- buffers for the page if they are not present and then marks the
- buffers belonging to the ntfs record dirty. This causes the buffers
- to become busy and hence they are safe from removal until the page
- has been written out.
- - Fix stupid bug in fs/ntfs/attrib.c::ntfs_external_attr_find() in the
- error handling code path that resulted in a BUG() due to trying to
- unmap an extent mft record when the mapping of it had failed and it
- thus was not mapped. (Thanks to Ken MacFerrin for the bug report.)
- - Drop the runlist lock after the vcn has been read in
- fs/ntfs/lcnalloc.c::__ntfs_cluster_free().
- - Rewrite handling of multi sector transfer errors. We now do not set
- PageError() when such errors are detected in the async i/o handler
- fs/ntfs/aops.c::ntfs_end_buffer_async_read(). All users of mst
- protected attributes now check the magic of each ntfs record as they
- use it and act appropriately. This has the effect of making errors
- granular per ntfs record rather than per page which solves the case
- where we cannot access any of the ntfs records in a page when a
- single one of them had an mst error. (Thanks to Ken MacFerrin for
- the bug report.)
- - Fix error handling in fs/ntfs/quota.c::ntfs_mark_quotas_out_of_date()
- where we failed to release i_sem on the $Quota/$Q attribute inode.
- - Fix bug in handling of bad inodes in fs/ntfs/namei.c::ntfs_lookup().
- - Add mapping of unmapped buffers to all remaining code paths, i.e.
- fs/ntfs/aops.c::ntfs_write_mst_block(), mft.c::ntfs_sync_mft_mirror(),
- and write_mft_record_nolock(). From now on we require that the
- complete runlist for the mft mirror is always mapped into memory.
- - Add creation of buffers to fs/ntfs/mft.c::ntfs_sync_mft_mirror().
- - Improve error handling in fs/ntfs/aops.c::ntfs_{read,write}_block().
- - Cleanup fs/ntfs/aops.c::ntfs_{read,write}page() since we know that a
- resident attribute will be smaller than a page which makes the code
- simpler. Also make the code more tolerant to concurrent ->truncate.
-
-2.1.21 - Fix some races and bugs, rewrite mft write code, add mft allocator.
-
- - Implement extent mft record deallocation
- fs/ntfs/mft.c::ntfs_extent_mft_record_free().
- - Splitt runlist related functions off from attrib.[hc] to runlist.[hc].
- - Add vol->mft_data_pos and initialize it at mount time.
- - Rename init_runlist() to ntfs_init_runlist(), ntfs_vcn_to_lcn() to
- ntfs_rl_vcn_to_lcn(), decompress_mapping_pairs() to
- ntfs_mapping_pairs_decompress(), ntfs_merge_runlists() to
- ntfs_runlists_merge() and adapt all callers.
- - Add fs/ntfs/runlist.[hc]::ntfs_get_nr_significant_bytes(),
- ntfs_get_size_for_mapping_pairs(), ntfs_write_significant_bytes(),
- and ntfs_mapping_pairs_build(), adapted from libntfs.
- - Make fs/ntfs/lcnalloc.c::ntfs_cluster_free_from_rl_nolock() not
- static and add a declaration for it to lcnalloc.h.
- - Add fs/ntfs/lcnalloc.h::ntfs_cluster_free_from_rl() which is a static
- inline wrapper for ntfs_cluster_free_from_rl_nolock() which takes the
- cluster bitmap lock for the duration of the call.
- - Add fs/ntfs/attrib.[hc]::ntfs_attr_record_resize().
- - Implement the equivalent of memset() for an ntfs attribute in
- fs/ntfs/attrib.[hc]::ntfs_attr_set() and switch
- fs/ntfs/logfile.c::ntfs_empty_logfile() to using it.
- - Remove unnecessary casts from LCN_* constants.
- - Implement fs/ntfs/runlist.c::ntfs_rl_truncate_nolock().
- - Add MFT_RECORD_OLD as a copy of MFT_RECORD in fs/ntfs/layout.h and
- change MFT_RECORD to contain the NTFS 3.1+ specific fields.
- - Add a helper function fs/ntfs/aops.c::mark_ntfs_record_dirty() which
- marks all buffers belonging to an ntfs record dirty, followed by
- marking the page the ntfs record is in dirty and also marking the vfs
- inode containing the ntfs record dirty (I_DIRTY_PAGES).
- - Switch fs/ntfs/index.h::ntfs_index_entry_mark_dirty() to using the
- new helper fs/ntfs/aops.c::mark_ntfs_record_dirty() and remove the no
- longer needed fs/ntfs/index.[hc]::__ntfs_index_entry_mark_dirty().
- - Move ntfs_{un,}map_page() from ntfs.h to aops.h and fix resulting
- include errors.
- - Move the typedefs for runlist_element and runlist from types.h to
- runlist.h and fix resulting include errors.
- - Remove unused {__,}format_mft_record() from fs/ntfs/mft.c.
- - Modify fs/ntfs/mft.c::__mark_mft_record_dirty() to use the helper
- mark_ntfs_record_dirty() which also changes the behaviour in that we
- now set the buffers belonging to the mft record dirty as well as the
- page itself.
- - Update fs/ntfs/mft.c::write_mft_record_nolock() and sync_mft_mirror()
- to cope with the fact that there now are dirty buffers in mft pages.
- - Update fs/ntfs/inode.c::ntfs_write_inode() to also use the helper
- mark_ntfs_record_dirty() and thus to set the buffers belonging to the
- mft record dirty as well as the page itself.
- - Fix compiler warnings on x86-64 in fs/ntfs/dir.c. (Randy Dunlap,
- slightly modified by me)
- - Add fs/ntfs/mft.c::try_map_mft_record() which fails with -EALREADY if
- the mft record is already locked and otherwise behaves the same way
- as fs/ntfs/mft.c::map_mft_record().
- - Modify fs/ntfs/mft.c::write_mft_record_nolock() so that it only
- writes the mft record if the buffers belonging to it are dirty.
- Otherwise we assume that it was written out by other means already.
- - Attempting to write outside initialized size is _not_ a bug so remove
- the bug check from fs/ntfs/aops.c::ntfs_write_mst_block(). It is in
- fact required to write outside initialized size when preparing to
- extend the initialized size.
- - Map the page instead of using page_address() before writing to it in
- fs/ntfs/aops.c::ntfs_mft_writepage().
- - Provide exclusion between opening an inode / mapping an mft record
- and accessing the mft record in fs/ntfs/mft.c::ntfs_mft_writepage()
- by setting the page not uptodate throughout ntfs_mft_writepage().
- - Clear the page uptodate flag in fs/ntfs/aops.c::ntfs_write_mst_block()
- to ensure noone can see the page whilst the mst fixups are applied.
- - Add the helper fs/ntfs/mft.c::ntfs_may_write_mft_record() which
- checks if an mft record may be written out safely obtaining any
- necessary locks in the process. This is used by
- fs/ntfs/aops.c::ntfs_write_mst_block().
- - Modify fs/ntfs/aops.c::ntfs_write_mst_block() to also work for
- writing mft records and improve its error handling in the process.
- Now if any of the records in the page fail to be written out, all
- other records will be written out instead of aborting completely.
- - Remove ntfs_mft_aops and update all users to use ntfs_mst_aops.
- - Modify fs/ntfs/inode.c::ntfs_read_locked_inode() to set the
- ntfs_mst_aops for all inodes which are NInoMstProtected() and
- ntfs_aops for all other inodes.
- - Rename fs/ntfs/mft.c::sync_mft_mirror{,_umount}() to
- ntfs_sync_mft_mirror{,_umount}() and change their parameters so they
- no longer require an ntfs inode to be present. Update all callers.
- - Cleanup the error handling in fs/ntfs/mft.c::ntfs_sync_mft_mirror().
- - Clear the page uptodate flag in fs/ntfs/mft.c::ntfs_sync_mft_mirror()
- to ensure noone can see the page whilst the mst fixups are applied.
- - Remove the no longer needed fs/ntfs/mft.c::ntfs_mft_writepage() and
- fs/ntfs/mft.c::try_map_mft_record().
- - Fix callers of fs/ntfs/aops.c::mark_ntfs_record_dirty() to call it
- with the ntfs inode which contains the page rather than the ntfs
- inode the mft record of which is in the page.
- - Fix race condition in fs/ntfs/inode.c::ntfs_put_inode() by moving the
- index inode bitmap inode release code from there to
- fs/ntfs/inode.c::ntfs_clear_big_inode(). (Thanks to Christoph
- Hellwig for spotting this.)
- - Fix race condition in fs/ntfs/inode.c::ntfs_put_inode() by taking the
- inode semaphore around the code that sets ni->itype.index.bmp_ino to
- NULL and reorganize the code to optimize it a bit. (Thanks to
- Christoph Hellwig for spotting this.)
- - Modify fs/ntfs/aops.c::mark_ntfs_record_dirty() to no longer take the
- ntfs inode as a parameter as this is confusing and misleading and the
- needed ntfs inode is available via NTFS_I(page->mapping->host).
- Adapt all callers to this change.
- - Modify fs/ntfs/mft.c::write_mft_record_nolock() and
- fs/ntfs/aops.c::ntfs_write_mst_block() to only check the dirty state
- of the first buffer in a record and to take this as the ntfs record
- dirty state. We cannot look at the dirty state for subsequent
- buffers because we might be racing with
- fs/ntfs/aops.c::mark_ntfs_record_dirty().
- - Move the static inline ntfs_init_big_inode() from fs/ntfs/inode.c to
- inode.h and make fs/ntfs/inode.c::__ntfs_init_inode() non-static and
- add a declaration for it to inode.h. Fix some compilation issues
- that resulted due to #includes and header file interdependencies.
- - Simplify setup of i_mode in fs/ntfs/inode.c::ntfs_read_locked_inode().
- - Add helpers fs/ntfs/layout.h::MK_MREF() and MK_LE_MREF().
- - Modify fs/ntfs/mft.c::map_extent_mft_record() to only verify the mft
- record sequence number if it is specified (i.e. not zero).
- - Add fs/ntfs/mft.[hc]::ntfs_mft_record_alloc() and various helper
- functions used by it.
- - Update Documentation/filesystems/ntfs.txt with instructions on how to
- use the Device-Mapper driver with NTFS ftdisk/LDM raid. This removes
- the linear raid problem with the Software RAID / MD driver when one
- or more of the devices has an odd number of sectors.
-
-2.1.20 - Fix two stupid bugs introduced in 2.1.18 release.
-
- - Fix stupid bug in fs/ntfs/attrib.c::ntfs_attr_reinit_search_ctx()
- where we did not clear ctx->al_entry but it was still set due to
- changes in ntfs_attr_lookup() and ntfs_external_attr_find() in
- particular.
- - Fix another stupid bug in fs/ntfs/attrib.c::ntfs_external_attr_find()
- where we forgot to unmap the extent mft record when we had finished
- enumerating an attribute which caused a bug check to trigger when the
- VFS calls ->clear_inode.
-
-2.1.19 - Many cleanups, improvements, and a minor bug fix.
-
- - Update ->setattr (fs/ntfs/inode.c::ntfs_setattr()) to refuse to
- change the uid, gid, and mode of an inode as we do not support NTFS
- ACLs yet.
- - Remove BKL use from ntfs_setattr() syncing up with the rest of the
- kernel.
- - Get rid of the ugly transparent union in fs/ntfs/dir.c::ntfs_readdir()
- and ntfs_filldir() as per suggestion from Al Viro.
- - Change '\0' and L'\0' to simply 0 as per advice from Linus Torvalds.
- - Update ->truncate (fs/ntfs/inode.c::ntfs_truncate()) to check if the
- inode size has changed and to only output an error if so.
- - Rename fs/ntfs/attrib.h::attribute_value_length() to ntfs_attr_size().
- - Add le{16,32,64} as well as sle{16,32,64} data types to
- fs/ntfs/types.h.
- - Change ntfschar to be le16 instead of u16 in fs/ntfs/types.h.
- - Add le versions of VCN, LCN, and LSN called leVCN, leLCN, and leLSN,
- respectively, to fs/ntfs/types.h.
- - Update endianness conversion macros in fs/ntfs/endian.h to use the
- new types as appropriate.
- - Do proper type casting when using sle64_to_cpup() in fs/ntfs/dir.c
- and index.c.
- - Add leMFT_REF data type to fs/ntfs/layout.h.
- - Update all NTFS header files with the new little endian data types.
- Affected files are fs/ntfs/layout.h, logfile.h, and time.h.
- - Do proper type casting when using ntfs_is_*_recordp() in
- fs/ntfs/logfile.c, mft.c, and super.c.
- - Fix all the sparse bitwise warnings. Had to change all the typedef
- enums storing little endian values to simple enums plus a typedef for
- the datatype to make sparse happy.
- - Fix a bug found by the new sparse bitwise warnings where the default
- upcase table was defined as a pointer to wchar_t rather than ntfschar
- in fs/ntfs/ntfs.h and super.c.
- - Change {const_,}cpu_to_le{16,32}(0) to just 0 as suggested by Al Viro.
-
-2.1.18 - Fix scheduling latencies at mount time as well as an endianness bug.
-
- - Remove vol->nr_mft_records as it was pretty meaningless and optimize
- the calculation of total/free inodes as used by statfs().
- - Fix scheduling latencies in ntfs_fill_super() by dropping the BKL
- because the code itself is using the ntfs_lock semaphore which
- provides safe locking. (Ingo Molnar)
- - Fix a potential bug in fs/ntfs/mft.c::map_extent_mft_record() that
- could occur in the future for when we start closing/freeing extent
- inodes if we don't set base_ni->ext.extent_ntfs_inos to NULL after
- we free it.
- - Rename {find,lookup}_attr() to ntfs_attr_{find,lookup}() as well as
- find_external_attr() to ntfs_external_attr_find() to cleanup the
- namespace a bit and to be more consistent with libntfs.
- - Rename {{re,}init,get,put}_attr_search_ctx() to
- ntfs_attr_{{re,}init,get,put}_search_ctx() as well as the type
- attr_search_context to ntfs_attr_search_ctx.
- - Force use of ntfs_attr_find() in ntfs_attr_lookup() when searching
- for the attribute list attribute itself.
- - Fix endianness bug in ntfs_external_attr_find().
- - Change ntfs_{external_,}attr_find() to return 0 on success, -ENOENT
- if the attribute is not found, and -EIO on real error. In the case
- of -ENOENT, the search context is updated to describe the attribute
- before which the attribute being searched for would need to be
- inserted if such an action were to be desired and in the case of
- ntfs_external_attr_find() the search context is also updated to
- indicate the attribute list entry before which the attribute list
- entry of the attribute being searched for would need to be inserted
- if such an action were to be desired. Also make ntfs_find_attr()
- static and remove its prototype from attrib.h as it is not used
- anywhere other than attrib.c. Update ntfs_attr_lookup() and all
- callers of ntfs_{external,}attr_{find,lookup}() for the new return
- values.
- - Minor cleanup of fs/ntfs/inode.c::ntfs_init_locked_inode().
-
-2.1.17 - Fix bugs in mount time error code paths and other updates.
-
- - Implement bitmap modification code (fs/ntfs/bitmap.[hc]). This
- includes functions to set/clear a single bit or a run of bits.
- - Add fs/ntfs/attrib.[hc]::ntfs_find_vcn() which returns the locked
- runlist element containing a particular vcn. It also takes care of
- mapping any needed runlist fragments.
- - Implement cluster (de-)allocation code (fs/ntfs/lcnalloc.[hc]).
- - Load attribute definition table from $AttrDef at mount time.
- - Fix bugs in mount time error code paths involving (de)allocation of
- the default and volume upcase tables.
- - Remove ntfs_nr_mounts as it is no longer used.
-
-2.1.16 - Implement access time updates, file sync, async io, and read/writev.
-
- - Add support for readv/writev and aio_read/aio_write (fs/ntfs/file.c).
- This is done by setting the appropriate file operations pointers to
- the generic helper functions provided by mm/filemap.c.
- - Implement fsync, fdatasync, and msync both for files (fs/ntfs/file.c)
- and directories (fs/ntfs/dir.c).
- - Add support for {a,m,c}time updates to inode.c::ntfs_write_inode().
- Note, except for the root directory and any other system files opened
- by the user, the system files will not have their access times
- updated as they are only accessed at the inode level an hence the
- file level functions which cause the times to be updated are never
- invoked.
-
-2.1.15 - Invalidate quotas when (re)mounting read-write.
-
- - Add new element itype.index.collation_rule to the ntfs inode
- structure and set it appropriately in ntfs_read_locked_inode().
- - Implement a new inode type "index" to allow efficient access to the
- indices found in various system files and adapt inode handling
- accordingly (fs/ntfs/inode.[hc]). An index inode is essentially an
- attribute inode (NInoAttr() is true) with an attribute type of
- AT_INDEX_ALLOCATION. As such, it is no longer allowed to call
- ntfs_attr_iget() with an attribute type of AT_INDEX_ALLOCATION as
- there would be no way to distinguish between normal attribute inodes
- and index inodes. The function to obtain an index inode is
- ntfs_index_iget() and it uses the helper function
- ntfs_read_locked_index_inode(). Note, we do not overload
- ntfs_attr_iget() as indices consist of multiple attributes so using
- ntfs_attr_iget() to obtain an index inode would be confusing.
- - Ensure that there is no overflow when doing page->index <<
- PAGE_CACHE_SHIFT by casting page->index to s64 in fs/ntfs/aops.c.
- - Use atomic kmap instead of kmap() in fs/ntfs/aops.c::ntfs_read_page()
- and ntfs_read_block().
- - Use case sensitive attribute lookups instead of case insensitive ones.
- - Lock all page cache pages belonging to mst protected attributes while
- accessing them to ensure we never see corrupt data while the page is
- under writeout.
- - Add framework for generic ntfs collation (fs/ntfs/collation.[hc]).
- We have ntfs_is_collation_rule_supported() to check if the collation
- rule you want to use is supported and ntfs_collation() which actually
- collates two data items. We currently only support COLLATION_BINARY
- and COLLATION_NTOFS_ULONG but support for other collation rules will
- be added as the need arises.
- - Add a new type, ntfs_index_context, to allow retrieval of an index
- entry using the corresponding index key. To get an index context,
- use ntfs_index_ctx_get() and to release it, use ntfs_index_ctx_put().
- This also adds a new slab cache for the index contexts. To lookup a
- key in an index inode, use ntfs_index_lookup(). After modifying an
- index entry, call ntfs_index_entry_flush_dcache_page() followed by
- ntfs_index_entry_mark_dirty() to ensure the changes are written out
- to disk. For details see fs/ntfs/index.[hc]. Note, at present, if
- an index entry is in the index allocation attribute rather than the
- index root attribute it will not be written out (you will get a
- warning message about discarded changes instead).
- - Load the quota file ($Quota) and check if quota tracking is enabled
- and if so, mark the quotas out of date. This causes windows to
- rescan the volume on boot and update all quota entries.
- - Add a set_page_dirty address space operation for ntfs_m[fs]t_aops.
- It is simply set to __set_page_dirty_nobuffers() to make sure that
- running set_page_dirty() on a page containing mft/ntfs records will
- not affect the dirty state of the page buffers.
- - Add fs/ntfs/index.c::__ntfs_index_entry_mark_dirty() which sets all
- buffers that are inside the ntfs record in the page dirty after which
- it sets the page dirty. This allows ->writepage to only write the
- dirty index records rather than having to write all the records in
- the page. Modify fs/ntfs/index.h::ntfs_index_entry_mark_dirty() to
- use this rather than __set_page_dirty_nobuffers().
- - Implement fs/ntfs/aops.c::ntfs_write_mst_block() which enables the
- writing of page cache pages belonging to mst protected attributes
- like the index allocation attribute in directory indices and other
- indices like $Quota/$Q, etc. This means that the quota is now marked
- out of date on all volumes rather than only on ones where the quota
- defaults entry is in the index root attribute of the $Quota/$Q index.
-
-2.1.14 - Fix an NFSd caused deadlock reported by several users.
-
- - Modify fs/ntfs/ntfs_readdir() to copy the index root attribute value
- to a buffer so that we can put the search context and unmap the mft
- record before calling the filldir() callback. We need to do this
- because of NFSd which calls ->lookup() from its filldir callback()
- and this causes NTFS to deadlock as ntfs_lookup() maps the mft record
- of the directory and since ntfs_readdir() has got it mapped already
- ntfs_lookup() deadlocks.
-
-2.1.13 - Enable overwriting of resident files and housekeeping of system files.
-
- - Implement writing of mft records (fs/ntfs/mft.[hc]), which includes
- keeping the mft mirror in sync with the mft when mirrored mft records
- are written. The functions are write_mft_record{,_nolock}(). The
- implementation is quite rudimentary for now with lots of things not
- implemented yet but I am not sure any of them can actually occur so
- I will wait for people to hit each one and only then implement it.
- - Commit open system inodes at umount time. This should make it
- virtually impossible for sync_mft_mirror_umount() to ever be needed.
- - Implement ->write_inode (fs/ntfs/inode.c::ntfs_write_inode()) for the
- ntfs super operations. This gives us inode writing via the VFS inode
- dirty code paths. Note: Access time updates are not implemented yet.
- - Implement fs/ntfs/mft.[hc]::{,__}mark_mft_record_dirty() and make
- fs/ntfs/aops.c::ntfs_writepage() and ntfs_commit_write() use it, thus
- finally enabling resident file overwrite! (-8 This also includes a
- placeholder for ->writepage (ntfs_mft_writepage()), which for now
- just redirties the page and returns. Also, at umount time, we for
- now throw away all mft data page cache pages after the last call to
- ntfs_commit_inode() in the hope that all inodes will have been
- written out by then and hence no dirty (meta)data will be lost. We
- also check for this case and emit an error message telling the user
- to run chkdsk.
- - Use set_page_writeback() and end_page_writeback() in the resident
- attribute code path of fs/ntfs/aops.c::ntfs_writepage() otherwise
- the radix-tree tag PAGECACHE_TAG_DIRTY remains set even though the
- page is clean.
- - Implement ntfs_mft_writepage() so it now checks if any of the mft
- records in the page are dirty and if so redirties the page and
- returns. Otherwise it just returns (after doing set_page_writeback(),
- unlock_page(), end_page_writeback() or the radix-tree tag
- PAGECACHE_TAG_DIRTY remains set even though the page is clean), thus
- alowing the VM to do with the page as it pleases. Also, at umount
- time, now only throw away dirty mft (meta)data pages if dirty inodes
- are present and ask the user to email us if they see this happening.
- - Add functions ntfs_{clear,set}_volume_flags(), to modify the volume
- information flags (fs/ntfs/super.c).
- - Mark the volume dirty when (re)mounting read-write and mark it clean
- when unmounting or remounting read-only. If any volume errors are
- found, the volume is left marked dirty to force chkdsk to run.
- - Add code to set the NT4 compatibility flag when (re)mounting
- read-write for newer NTFS versions but leave it commented out for now
- since we do not make any modifications that are NTFS 1.2 specific yet
- and since setting this flag breaks Captive-NTFS which is not nice.
- This code must be enabled once we start writing NTFS 1.2 specific
- changes otherwise Windows NTFS driver might crash / cause corruption.
-
-2.1.12 - Fix the second fix to the decompression engine and some cleanups.
-
- - Add a new address space operations struct, ntfs_mst_aops, for mst
- protected attributes. This is because the default ntfs_aops do not
- make sense with mst protected data and were they to write anything to
- such an attribute they would cause data corruption so we provide
- ntfs_mst_aops which does not have any write related operations set.
- - Cleanup dirty ntfs inode handling (fs/ntfs/inode.[hc]) which also
- includes an adapted ntfs_commit_inode() and an implementation of
- ntfs_write_inode() which for now just cleans dirty inodes without
- writing them (it does emit a warning that this is happening).
- - Undo the second decompression engine fix (see 2.1.9 release ChangeLog
- entry) as it was only fixing a theoretical bug but at the same time
- it badly broke the handling of sparse and uncompressed compression
- blocks.
-
-2.1.11 - Driver internal cleanups.
-
- - Only build logfile.o if building the driver with read-write support.
- - Really final white space cleanups.
- - Use generic_ffs() instead of ffs() in logfile.c which allows the
- log_page_size variable to be optimized by gcc into a constant.
- - Rename uchar_t to ntfschar everywhere as uchar_t is unsigned 1-byte
- char as defined by POSIX and as found on some systems.
-
-2.1.10 - Force read-only (re)mounting of volumes with unsupported volume flags.
-
- - Finish off the white space cleanups (remove trailing spaces, etc).
- - Clean up ntfs_fill_super() and ntfs_read_inode_mount() by removing
- the kludges around the first iget(). Instead of (re)setting ->s_op
- we have the $MFT inode set up by explicit new_inode() / set ->i_ino /
- insert_inode_hash() / call ntfs_read_inode_mount() directly. This
- kills the need for second super_operations and allows to return error
- from ntfs_read_inode_mount() without resorting to ugly "poisoning"
- tricks. (Al Viro)
- - Force read-only (re)mounting if any of the following bits are set in
- the volume information flags:
- VOLUME_IS_DIRTY, VOLUME_RESIZE_LOG_FILE,
- VOLUME_UPGRADE_ON_MOUNT, VOLUME_DELETE_USN_UNDERWAY,
- VOLUME_REPAIR_OBJECT_ID, VOLUME_MODIFIED_BY_CHKDSK
- To make this easier we define VOLUME_MUST_MOUNT_RO_MASK with all the
- above bits set so the test is made easy.
-
-2.1.9 - Fix two bugs in decompression engine.
-
- - Fix a bug where we would not always detect that we have reached the
- end of a compression block because we were ending at minus one byte
- which is effectively the same as being at the end. The fix is to
- check whether the uncompressed buffer has been fully filled and if so
- we assume we have reached the end of the compression block. A big
- thank you to Marcin Gibuła for the bug report, the assistance in
- tracking down the bug and testing the fix.
- - Fix a possible bug where when a compressed read is truncated to the
- end of the file, the offset inside the last page was not truncated.
-
-2.1.8 - Handle $MFT mirror and $LogFile, improve time handling, and cleanups.
-
- - Use get_bh() instead of manual atomic_inc() in fs/ntfs/compress.c.
- - Modify fs/ntfs/time.c::ntfs2utc(), get_current_ntfs_time(), and
- utc2ntfs() to work with struct timespec instead of time_t on the
- Linux UTC time side thus preserving the full precision of the NTFS
- time and only loosing up to 99 nano-seconds in the Linux UTC time.
- - Move fs/ntfs/time.c to fs/ntfs/time.h and make the time functions
- static inline.
- - Remove unused ntfs_dirty_inode().
- - Cleanup super operations declaration in fs/ntfs/super.c.
- - Wrap flush_dcache_mft_record_page() in #ifdef NTFS_RW.
- - Add NInoTestSetFoo() and NInoTestClearFoo() macro magic to
- fs/ntfs/inode.h and use it to declare NInoTest{Set,Clear}Dirty.
- - Move typedefs for ntfs_attr and test_t from fs/ntfs/inode.c to
- fs/ntfs/inode.h so they can be used elsewhere.
- - Determine the mft mirror size as the number of mirrored mft records
- and store it in ntfs_volume->mftmirr_size (fs/ntfs/super.c).
- - Load the mft mirror at mount time and compare the mft records stored
- in it to the ones in the mft. Force a read-only mount if the two do
- not match (fs/ntfs/super.c).
- - Fix type casting related warnings on 64-bit architectures. Thanks
- to Meelis Roos for reporting them.
- - Move %L to %ll as %L is floating point and %ll is integer which is
- what we want.
- - Read the journal ($LogFile) and determine if the volume has been
- shutdown cleanly and force a read-only mount if not (fs/ntfs/super.c
- and fs/ntfs/logfile.c). This is a little bit of a crude check in
- that we only look at the restart areas and not at the actual log
- records so that there will be a very small number of cases where we
- think that a volume is dirty when in fact it is clean. This should
- only affect volumes that have not been shutdown cleanly and did not
- have any pending, non-check-pointed i/o.
- - If the $LogFile indicates a clean shutdown and a read-write (re)mount
- is requested, empty $LogFile by overwriting it with 0xff bytes to
- ensure that Windows cannot cause data corruption by replaying a stale
- journal after Linux has written to the volume.
-
-2.1.7 - Enable NFS exporting of mounted NTFS volumes.
-
- - Set i_generation in the VFS inode from the seq_no of the NTFS inode.
- - Make ntfs_lookup() NFS export safe, i.e. use d_splice_alias(), etc.
- - Implement ->get_dentry() in fs/ntfs/namei.c::ntfs_get_dentry() as the
- default doesn't allow inode number 0 which is a valid inode on NTFS
- and even if it did allow that it uses iget() instead of ntfs_iget()
- which makes it useless for us.
- - Implement ->get_parent() in fs/ntfs/namei.c::ntfs_get_parent() as the
- default just returns -EACCES which is not very useful.
- - Define export operations (->s_export_op) for NTFS (ntfs_export_ops)
- and set them up in the super block at mount time (super.c) this
- allows mounted NTFS volumes to be exported via NFS.
- - Add missing return -EOPNOTSUPP; in
- fs/ntfs/aops.c::ntfs_commit_nonresident_write().
- - Enforce no atime and no dir atime updates at mount/remount time as
- they are not implemented yet anyway.
- - Move a few assignments in fs/ntfs/attrib.c::load_attribute_list() to
- after a NULL check. Thanks to Dave Jones for pointing this out.
-
-2.1.6 - Fix minor bug in handling of compressed directories.
-
- - Fix bug in handling of compressed directories. A compressed
- directory is not really compressed so when we set the ->i_blocks
- field of a compressed directory inode we were setting it from the
- non-existing field ni->itype.compressed.size which gave random
- results... For directories we now always use ni->allocated_size.
-
-2.1.5 - Fix minor bug in attribute list attribute handling.
-
- - Fix bug in attribute list handling. Actually it is not as much a bug
- as too much protection in that we were not allowing attribute lists
- which waste space on disk while Windows XP clearly allows it and in
- fact creates such attribute lists so our driver was failing.
- - Update NTFS documentation ready for 2.6 kernel release.
-
-2.1.4 - Reduce compiler requirements.
-
- - Remove all uses of unnamed structs and unions in the driver to make
- old and newer gcc versions happy. Makes it a bit uglier IMO but at
- least people will stop hassling me about it.
-
-2.1.3 - Important bug fixes in corner cases.
-
- - super.c::parse_ntfs_boot_sector(): Correct the check for 64-bit
- clusters. (Philipp Thomas)
- - attrib.c::load_attribute_list(): Fix bug when initialized_size is a
- multiple of the block_size but not the cluster size. (Szabolcs
- Szakacsits <szaka@sienet.hu>)
-
-2.1.2 - Important bug fixes aleviating the hangs in statfs.
-
- - Fix buggy free cluster and free inode determination logic.
-
-2.1.1 - Minor updates.
-
- - Add handling for initialized_size != data_size in compressed files.
- - Reduce function local stack usage from 0x3d4 bytes to just noise in
- fs/ntfs/upcase.c. (Randy Dunlap <rddunlap@osdl.ord>)
- - Remove compiler warnings for newer gcc.
- - Pages are no longer kmapped by mm/filemap.c::generic_file_write()
- around calls to ->{prepare,commit}_write. Adapt NTFS appropriately
- in fs/ntfs/aops.c::ntfs_prepare_nonresident_write() by using
- kmap_atomic(KM_USER0).
-
-2.1.0 - First steps towards write support: implement file overwrite.
-
- - Add configuration option for developmental write support with an
- appropriately scary configuration help text.
- - Initial implementation of fs/ntfs/aops.c::ntfs_writepage() and its
- helper fs/ntfs/aops.c::ntfs_write_block(). This enables mmap(2) based
- overwriting of existing files on ntfs. Note: Resident files are
- only written into memory, and not written out to disk at present, so
- avoid writing to files smaller than about 1kiB.
- - Initial implementation of fs/ntfs/aops.c::ntfs_prepare_write(), its
- helper fs/ntfs/aops.c::ntfs_prepare_nonresident_write() and their
- counterparts, fs/ntfs/aops.c::ntfs_commit_write(), and
- fs/ntfs/aops.c::ntfs_commit_nonresident_write(), respectively. Also,
- add generic_file_write() to the ntfs file operations (fs/ntfs/file.c).
- This enables write(2) based overwriting of existing files on ntfs.
- Note: As with mmap(2) based overwriting, resident files are only
- written into memory, and not written out to disk at present, so avoid
- writing to files smaller than about 1kiB.
- - Implement ->truncate (fs/ntfs/inode.c::ntfs_truncate()) and
- ->setattr() (fs/ntfs/inode.c::ntfs_setattr()) inode operations for
- files with the purpose of intercepting and aborting all i_size
- changes which we do not support yet. ntfs_truncate() actually only
- emits a warning message but AFAICS our interception of i_size changes
- elsewhere means ntfs_truncate() never gets called for i_size changes.
- It is only called from generic_file_write() when we fail in
- ntfs_prepare_{,nonresident_}write() in order to discard any
- instantiated buffers beyond i_size. Thus i_size is not actually
- changed so our warning message is enough. Unfortunately it is not
- possible to easily determine if i_size is being changed or not hence
- we just emit an appropriately worded error message.
-
-2.0.25 - Small bug fixes and cleanups.
-
- - Unlock the page in an out of memory error code path in
- fs/ntfs/aops.c::ntfs_read_block().
- - If fs/ntfs/aops.c::ntfs_read_page() is called on an uptodate page,
- just unlock the page and return. (This can happen due to ->writepage
- clearing PageUptodate() during write out of MstProtected()
- attributes.
- - Remove leaked write code again.
-
-2.0.24 - Cleanups.
-
- - Treat BUG_ON() as ASSERT() not VERIFY(), i.e. do not use side effects
- inside BUG_ON(). (Adam J. Richter)
- - Split logical OR expressions inside BUG_ON() into individual BUG_ON()
- calls for improved debugging. (Adam J. Richter)
- - Add errors flag to the ntfs volume state, accessed via
- NVol{,Set,Clear}Errors(vol).
- - Do not allow read-write remounts of read-only volumes with errors.
- - Clarify comment for ntfs file operation sendfile which was added by
- Christoph Hellwig a while ago (just using generic_file_sendfile())
- to say that ntfs ->sendfile is only used for the case where the
- source data is on the ntfs partition and the destination is
- somewhere else, i.e. nothing we need to concern ourselves with.
- - Add generic_file_write() as our ntfs file write operation.
-
-2.0.23 - Major bug fixes (races, deadlocks, non-i386 architectures).
-
- - Massive internal locking changes to mft record locking. Fixes lock
- recursion and replaces the mrec_lock read/write semaphore with a
- mutex. Also removes the now superfluous mft_count. This fixes several
- race conditions and deadlocks, especially in the future write code.
- - Fix ntfs over loopback for compressed files by adding an
- optimization barrier. (gcc was screwing up otherwise ?)
- - Miscellaneous cleanups all over the code and a fix or two in error
- handling code paths.
- Thanks go to Christoph Hellwig for pointing out the following two:
- - Remove now unused function fs/ntfs/malloc.h::vmalloc_nofs().
- - Fix ntfs_free() for ia64 and parisc by checking for VMALLOC_END, too.
-
-2.0.22 - Cleanups, mainly to ntfs_readdir(), and use C99 initializers.
-
- - Change fs/ntfs/dir.c::ntfs_reddir() to only read/write ->f_pos once
- at entry/exit respectively.
- - Use C99 initializers for structures.
- - Remove unused variable blocks from fs/ntfs/aops.c::ntfs_read_block().
-
-2.0.21 - Check for, and refuse to work with too large files/directories/volumes.
-
- - Limit volume size at mount time to 2TiB on architectures where
- unsigned long is 32-bits (fs/ntfs/super.c::parse_ntfs_boot_sector()).
- This is the most we can do without overflowing the 32-bit limit of
- the block device size imposed on us by sb_bread() and sb_getblk()
- for the time being.
- - Limit file/directory size at open() time to 16TiB on architectures
- where unsigned long is 32-bits (fs/ntfs/file.c::ntfs_file_open() and
- fs/ntfs/dir.c::ntfs_dir_open()). This is the most we can do without
- overflowing the page cache page index.
-
-2.0.20 - Support non-resident directory index bitmaps, fix page leak in readdir.
-
- - Move the directory index bitmap to use an attribute inode instead of
- having special fields for it inside the ntfs inode structure. This
- means that the index bitmaps now use the page cache for i/o, too,
- and also as a side effect we get support for non-resident index
- bitmaps for free.
- - Simplify/cleanup error handling in fs/ntfs/dir.c::ntfs_readdir() and
- fix a page leak that manifested itself in some cases.
- - Add fs/ntfs/inode.c::ntfs_put_inode(), which we need to release the
- index bitmap inode on the final iput().
-
-2.0.19 - Fix race condition, improvements, and optimizations in i/o interface.
-
- - Apply block optimization added to fs/ntfs/aops.c::ntfs_read_block()
- to fs/ntfs/compress.c::ntfs_file_read_compressed_block() as well.
- - Drop the "file" from ntfs_file_read_compressed_block().
- - Rename fs/ntfs/aops.c::ntfs_enb_buffer_read_async() to
- ntfs_end_buffer_async_read() (more like the fs/buffer.c counterpart).
- - Update ntfs_end_buffer_async_read() with the improved logic from
- its updated counterpart fs/buffer.c::end_buffer_async_read(). Apply
- further logic improvements to better determine when we set PageError.
- - Update submission of buffers in fs/ntfs/aops.c::ntfs_read_block() to
- check for the buffers being uptodate first in line with the updated
- fs/buffer.c::block_read_full_page(). This plugs a small race
- condition.
-
-2.0.18 - Fix race condition in reading of compressed files.
-
- - There was a narrow window between checking a buffer head for being
- uptodate and locking it in ntfs_file_read_compressed_block(). We now
- lock the buffer and then check whether it is uptodate or not.
-
-2.0.17 - Cleanups and optimizations - shrinking the ToDo list.
-
- - Modify fs/ntfs/inode.c::ntfs_read_locked_inode() to return an error
- code and update callers, i.e. ntfs_iget(), to pass that error code
- up instead of just using -EIO.
- - Modifications to super.c to ensure that both mount and remount
- cannot set any write related options when the driver is compiled
- read-only.
- - Optimize block resolution in fs/ntfs/aops.c::ntfs_read_block() to
- cache the current runlist element. This should improve performance
- when reading very large and/or very fragmented data.
-
-2.0.16 - Convert access to $MFT/$BITMAP to attribute inode API.
-
- - Fix a stupid bug introduced in 2.0.15 where we were unmapping the
- wrong inode in fs/ntfs/inode.c::ntfs_attr_iget().
- - Fix debugging check in fs/ntfs/aops.c::ntfs_read_block().
- - Convert $MFT/$BITMAP access to attribute inode API and remove all
- remnants of the ugly mftbmp address space and operations hack. This
- means we finally have only one readpage function as well as only one
- async io completion handler. Yey! The mft bitmap is now just an
- attribute inode and is accessed from vol->mftbmp_ino just as if it
- were a normal file. Fake inodes rule. (-:
-
-2.0.15 - Fake inodes based attribute i/o via the pagecache, fixes and cleanups.
-
- - Fix silly bug in fs/ntfs/super.c::parse_options() which was causing
- remounts to fail when the partition had an entry in /etc/fstab and
- the entry specified the nls= option.
- - Apply same macro magic used in fs/ntfs/inode.h to fs/ntfs/volume.h to
- expand all the helper functions NVolFoo(), NVolSetFoo(), and
- NVolClearFoo().
- - Move copyright statement from driver initialisation message to
- module description (fs/super.c). This makes the initialisation
- message fit on one line and fits in better with rest of kernel.
- - Update fs/ntfs/attrib.c::map_run_list() to work on both real and
- attribute inodes, and both for files and directories.
- - Implement fake attribute inodes allowing all attribute i/o to go via
- the page cache and to use all the normal vfs/mm functionality:
- - Add ntfs_attr_iget() and its helper ntfs_read_locked_attr_inode()
- to fs/ntfs/inode.c.
- - Add needed cleanup code to ntfs_clear_big_inode().
- - Merge address space operations for files and directories (aops.c),
- now just have ntfs_aops:
- - Rename:
- end_buffer_read_attr_async() -> ntfs_end_buffer_read_async(),
- ntfs_attr_read_block() -> ntfs_read_block(),
- ntfs_file_read_page() -> ntfs_readpage().
- - Rewrite fs/ntfs/aops.c::ntfs_readpage() to work on both real and
- attribute inodes, and both for files and directories.
- - Remove obsolete fs/ntfs/aops.c::ntfs_mst_readpage().
-
-2.0.14 - Run list merging code cleanup, minor locking changes, typo fixes.
-
- - Change fs/ntfs/super.c::ntfs_statfs() to not rely on BKL by moving
- the locking out of super.c::get_nr_free_mft_records() and taking and
- dropping the mftbmp_lock rw_semaphore in ntfs_statfs() itself.
- - Bring attribute runlist merging code (fs/ntfs/attrib.c) in sync with
- current userspace ntfs library code. This means that if a merge
- fails the original runlists are always left unmodified instead of
- being silently corrupted.
- - Misc typo fixes.
-
-2.0.13 - Use iget5_locked() in preparation for fake inodes and small cleanups.
-
- - Remove nr_mft_bits and the now superfluous union with nr_mft_records
- from ntfs_volume structure.
- - Remove nr_lcn_bits and the now superfluous union with nr_clusters
- from ntfs_volume structure.
- - Use iget5_locked() and friends instead of conventional iget(). Wrap
- the call in fs/ntfs/inode.c::ntfs_iget() and update callers of iget()
- to use ntfs_iget(). Leave only one iget() call at mount time so we
- don't need an ntfs_iget_mount().
- - Change fs/ntfs/inode.c::ntfs_new_extent_inode() to take mft_no as an
- additional argument.
-
-2.0.12 - Initial cleanup of address space operations following 2.0.11 changes.
-
- - Merge fs/ntfs/aops.c::end_buffer_read_mst_async() and
- fs/ntfs/aops.c::end_buffer_read_file_async() into one function
- fs/ntfs/aops.c::end_buffer_read_attr_async() using NInoMstProtected()
- to determine whether to apply mst fixups or not.
- - Above change allows merging fs/ntfs/aops.c::ntfs_file_read_block()
- and fs/ntfs/aops.c::ntfs_mst_readpage() into one function
- fs/ntfs/aops.c::ntfs_attr_read_block(). Also, create a tiny wrapper
- fs/ntfs/aops.c::ntfs_mst_readpage() to transform the parameters from
- the VFS readpage function prototype to the ntfs_attr_read_block()
- function prototype.
-
-2.0.11 - Initial preparations for fake inode based attribute i/o.
-
- - Move definition of ntfs_inode_state_bits to fs/ntfs/inode.h and
- do some macro magic (adapted from include/linux/buffer_head.h) to
- expand all the helper functions NInoFoo(), NInoSetFoo(), and
- NInoClearFoo().
- - Add new flag to ntfs_inode_state_bits: NI_Sparse.
- - Add new fields to ntfs_inode structure to allow use of fake inodes
- for attribute i/o: type, name, name_len. Also add new state bits:
- NI_Attr, which, if set, indicates the inode is a fake inode, and
- NI_MstProtected, which, if set, indicates the attribute uses multi
- sector transfer protection, i.e. fixups need to be applied after
- reads and before/after writes.
- - Rename fs/ntfs/inode.c::ntfs_{new,clear,destroy}_inode() to
- ntfs_{new,clear,destroy}_extent_inode() and update callers.
- - Use ntfs_clear_extent_inode() in fs/ntfs/inode.c::__ntfs_clear_inode()
- instead of ntfs_destroy_extent_inode().
- - Cleanup memory deallocations in {__,}ntfs_clear_{,big_}inode().
- - Make all operations on ntfs inode state bits use the NIno* functions.
- - Set up the new ntfs inode fields and state bits in
- fs/ntfs/inode.c::ntfs_read_inode() and add appropriate cleanup of
- allocated memory to __ntfs_clear_inode().
- - Cleanup ntfs_inode structure a bit for better ordering of elements
- w.r.t. their size to allow better packing of the structure in memory.
-
-2.0.10 - There can only be 2^32 - 1 inodes on an NTFS volume.
-
- - Add check at mount time to verify that the number of inodes on the
- volume does not exceed 2^32 - 1, which is the maximum allowed for
- NTFS according to Microsoft.
- - Change mft_no member of ntfs_inode structure to be unsigned long.
- Update all users. This makes ntfs_inode->mft_no just a copy of struct
- inode->i_ino. But we can't just always use struct inode->i_ino and
- remove mft_no because extent inodes do not have an attached struct
- inode.
-
-2.0.9 - Decompression engine now uses a single buffer and other cleanups.
-
- - Change decompression engine to use a single buffer protected by a
- spin lock instead of per-CPU buffers. (Rusty Russell)
- - Do not update cb_pos when handling a partial final page during
- decompression of a sparse compression block, as the value is later
- reset without being read/used. (Rusty Russell)
- - Switch to using the new KM_BIO_SRC_IRQ for atomic kmap()s. (Andrew
- Morton)
- - Change buffer size in ntfs_readdir()/ntfs_filldir() to use
- NLS_MAX_CHARSET_SIZE which makes the buffers almost 1kiB each but
- it also makes everything safer so it is a good thing.
- - Miscellaneous minor cleanups to comments.
-
-2.0.8 - Major updates for handling of case sensitivity and dcache aliasing.
-
- Big thanks go to Al Viro and other inhabitants of #kernel for investing
- their time to discuss the case sensitivity and dcache aliasing issues.
-
- - Remove unused source file fs/ntfs/attraops.c.
- - Remove show_inodes mount option(s), thus dropping support for
- displaying of short file names.
- - Remove deprecated mount option posix.
- - Restore show_sys_files mount option.
- - Add new mount option case_sensitive, to determine if the driver
- treats file names as case sensitive or not. If case sensitive, create
- file names in the POSIX namespace. Otherwise create file names in the
- LONG/WIN32 namespace. Note, files remain accessible via their short
- file name, if it exists.
- - Remove really dumb logic bug in boot sector recovery code.
- - Fix dcache aliasing issues wrt short/long file names via changes
- to fs/ntfs/dir.c::ntfs_lookup_inode_by_name() and
- fs/ntfs/namei.c::ntfs_lookup():
- - Add additional argument to ntfs_lookup_inode_by_name() in which we
- return information about the matching file name if the case is not
- matching or the match is a short file name. See comments above the
- function definition for details.
- - Change ntfs_lookup() to only create dcache entries for the correctly
- cased file name and only for the WIN32 namespace counterpart of DOS
- namespace file names. This ensures we have only one dentry per
- directory and also removes all dcache aliasing issues between short
- and long file names once we add write support. See comments above
- function for details.
- - Fix potential 1 byte overflow in fs/ntfs/unistr.c::ntfs_ucstonls().
-
-2.0.7 - Minor cleanups and updates for changes in core kernel code.
-
- - Remove much of the NULL struct element initializers.
- - Various updates to make compatible with recent kernels.
- - Remove defines of MAX_BUF_PER_PAGE and include linux/buffer_head.h
- in fs/ntfs/ntfs.h instead.
- - Remove no longer needed KERNEL_VERSION checks. We are now in the
- kernel proper so they are no longer needed.
-
-2.0.6 - Major bugfix to make compatible with other kernel changes.
-
- - Initialize the mftbmp address space properly now that there are more
- fields in the struct address_space. This was leading to hangs and
- oopses on umount since 2.5.12 because of changes to other parts of
- the kernel. We probably want a kernel generic init_address_space()
- function...
- - Drop BKL from ntfs_readdir() after consultation with Al Viro. The
- only caller of ->readdir() is vfs_readdir() which holds i_sem during
- the call, and i_sem is sufficient protection against changes in the
- directory inode (including ->i_size).
- - Use generic_file_llseek() for directories (as opposed to
- default_llseek()) as this downs i_sem instead of the BKL which is
- what we now need for exclusion against ->f_pos changes considering we
- no longer take the BKL in ntfs_readdir().
-
-2.0.5 - Major bugfix. Buffer overflow in extent inode handling.
-
- - No need to set old blocksize in super.c::ntfs_fill_super() as the
- VFS does so via invocation of deactivate_super() calling
- fs->fill_super() calling block_kill_super() which does it.
- - BKL moved from VFS into dir.c::ntfs_readdir(). (Linus Torvalds)
- -> Do we really need it? I don't think so as we have exclusion on
- the directory ntfs_inode rw_semaphore mrec_lock. We mmight have to
- move the ->f_pos accesses under the mrec_lock though. Check this...
- - Fix really, really, really stupid buffer overflow in extent inode
- handling in mft.c::map_extent_mft_record().
-
-2.0.4 - Cleanups and updates for kernel 2.5.11.
-
- - Add documentation on how to use the MD driver to be able to use NTFS
- stripe and volume sets in Linux and generally cleanup documentation
- a bit.
- Remove all uses of kdev_t in favour of struct block_device *:
- - Change compress.c::ntfs_file_read_compressed_block() to use
- sb_getblk() instead of getblk().
- - Change super.c::ntfs_fill_super() to use bdev_hardsect_size() instead
- of get_hardsect_size().
- - No need to get old blocksize in super.c::ntfs_fill_super() as
- fs/super.c::get_sb_bdev() already does this.
- - Set bh->b_bdev instead of bh->b_dev throughout aops.c.
-
-2.0.3 - Small bug fixes, cleanups, and performance improvements.
-
- - Remove some dead code from mft.c.
- - Optimize readpage and read_block functions throughout aops.c so that
- only initialized blocks are read. Non-initialized ones have their
- buffer head mapped, zeroed, and set up to date, without scheduling
- any i/o. Thanks to Al Viro for advice on how to avoid the device i/o.
- Thanks go to Andrew Morton for spotting the below:
- - Fix buglet in allocate_compression_buffers() error code path.
- - Call flush_dcache_page() after modifying page cache page contents in
- ntfs_file_readpage().
- - Check for existence of page buffers throughout aops.c before calling
- create_empty_buffers(). This happens when an I/O error occurs and the
- read is retried. (It also happens once writing is implemented so that
- needed doing anyway but I had left it for later...)
- - Don't BUG_ON() uptodate and/or mapped buffers throughout aops.c in
- readpage and read_block functions. Reasoning same as above (i.e. I/O
- error retries and future write code paths.)
-
-2.0.2 - Minor updates and cleanups.
-
- - Cleanup: rename mst.c::__post_read_mst_fixup to post_write_mst_fixup
- and cleanup the code a bit, removing the unused size parameter.
- - Change default fmask to 0177 and update documentation.
- - Change attrib.c::get_attr_search_ctx() to return the search context
- directly instead of taking the address of a pointer. A return value
- of NULL means the allocation failed. Updated all callers
- appropriately.
- - Update to 2.5.9 kernel (preserving backwards compatibility) by
- replacing all occurences of page->buffers with page_buffers(page).
- - Fix minor bugs in runlist merging, also minor cleanup.
- - Updates to bootsector layout and mft mirror contents descriptions.
- - Small bug fix in error detection in unistr.c and some cleanups.
- - Grow name buffer allocations in unistr.c in aligned mutlipled of 64
- bytes.
-
-2.0.1 - Minor updates.
-
- - Make default umask correspond to documentation.
- - Improve documentation.
- - Set default mode to include execute bit. The {u,f,d}mask can be used
- to take it away if desired. This allows binaries to be executed from
- a mounted ntfs partition.
-
-2.0.0 - New version number. Remove TNG from the name. Now in the kernel.
-
- - Add kill_super, just keeping up with the vfs changes in the kernel.
- - Repeat some changes from tng-0.0.8 that somehow got lost on the way
- from the CVS import into BitKeeper.
- - Begin to implement proper handling of allocated_size vs
- initialized_size vs data_size (i.e. i_size). Done are
- mft.c::ntfs_mft_readpage(), aops.c::end_buffer_read_index_async(),
- and attrib.c::load_attribute_list().
- - Lock the runlist in attrib.c::load_attribute_list() while using it.
- - Fix memory leak in ntfs_file_read_compressed_block() and generally
- clean up compress.c a little, removing some uncommented/unused debug
- code.
- - Tidy up dir.c a little bit.
- - Don't bother getting the runlist in inode.c::ntfs_read_inode().
- - Merge mft.c::ntfs_mft_readpage() and aops.c::ntfs_index_readpage()
- creating aops.c::ntfs_mst_readpage(), improving the handling of
- holes and overflow in the process and implementing the correct
- equivalent of ntfs_file_get_block() in ntfs_mst_readpage() itself.
- I am aiming for correctness at the moment. Modularisation can come
- later.
- - Rename aops.c::end_buffer_read_index_async() to
- end_buffer_read_mst_async() and optimize the overflow checking and
- handling.
- - Use the host of the mftbmp address space mapping to hold the ntfs
- volume. This is needed so the async i/o completion handler can
- retrieve a pointer to the volume. Hopefully this will not cause
- problems elsewhere in the kernel... Otherwise will need to use a
- fake inode.
- - Complete implementation of proper handling of allocated_size vs
- initialized_size vs data_size (i.e. i_size) in whole driver.
- Basically aops.c is now completely rewritten.
- - Change NTFS driver name to just NTFS and set version number to 2.0.0
- to make a clear distinction from the old driver which is still on
- version 1.1.22.
-
-tng-0.0.8 - 08/03/2002 - Now using BitKeeper, http://linux-ntfs.bkbits.net/
-
- - Replace bdevname(sb->s_dev) with sb->s_id.
- - Remove now superfluous new-line characters in all callers of
- ntfs_debug().
- - Apply kludge in ntfs_read_inode(), setting i_nlink to 1 for
- directories. Without this the "find" utility gets very upset which is
- fair enough as Linux/Unix do not support directory hard links.
- - Further runlist merging work. (Richard Russon)
- - Backwards compatibility for gcc-2.95. (Richard Russon)
- - Update to kernel 2.5.5-pre1 and rediff the now tiny patch.
- - Convert to new filesystem declaration using ->ntfs_get_sb() and
- replacing ntfs_read_super() with ntfs_fill_super().
- - Set s_maxbytes to MAX_LFS_FILESIZE to avoid page cache page index
- overflow on 32-bit architectures.
- - Cleanup upcase loading code to use ntfs_(un)map_page().
- - Disable/reenable preemtion in critical sections of compession engine.
- - Replace device size determination in ntfs_fill_super() with
- sb->s_bdev->bd_inode->i_size (in bytes) and remove now superfluous
- function super.c::get_nr_blocks().
- - Implement a mount time option (show_inodes) allowing choice of which
- types of inode names readdir() returns and modify ntfs_filldir()
- accordingly. There are several parameters to show_inodes:
- system: system files
- win32: long file names (including POSIX file names) [DEFAULT]
- long: same as win32
- dos: short file names only (excluding POSIX file names)
- short: same as dos
- posix: same as both win32 and dos
- all: all file names
- Note that the options are additive, i.e. specifying:
- -o show_inodes=system,show_inodes=win32,show_inodes=dos
- is the same as specifying:
- -o show_inodes=all
- Note that the "posix" and "all" options will show all directory
- names, BUT the link count on each directory inode entry is set to 1,
- due to Linux not supporting directory hard links. This may well
- confuse some userspace applications, since the directory names will
- have the same inode numbers. Thus it is NOT advisable to use the
- "posix" or "all" options. We provide them only for completeness sake.
- - Add copies of allocated_size, initialized_size, and compressed_size to
- the ntfs inode structure and set them up in
- inode.c::ntfs_read_inode(). These reflect the unnamed data attribute
- for files and the index allocation attribute for directories.
- - Add copies of allocated_size and initialized_size to ntfs inode for
- $BITMAP attribute of large directories and set them up in
- inode.c::ntfs_read_inode().
- - Add copies of allocated_size and initialized_size to ntfs volume for
- $BITMAP attribute of $MFT and set them up in
- super.c::load_system_files().
- - Parse deprecated ntfs driver options (iocharset, show_sys_files,
- posix, and utf8) and tell user what the new options to use are. Note
- we still do support them but they will be removed with kernel 2.7.x.
- - Change all occurences of integer long long printf formatting to hex
- as printk() will not support long long integer format if/when the
- div64 patch goes into the kernel.
- - Make slab caches have stable names and change the names to what they
- were intended to be. These changes are required/made possible by the
- new slab cache name handling which removes the length limitation by
- requiring the caller of kmem_cache_create() to supply a stable name
- which is then referenced but not copied.
- - Rename run_list structure to run_list_element and create a new
- run_list structure containing a pointer to a run_list_element
- structure and a read/write semaphore. Adapt all users of runlists
- to new scheme and take and release the lock as needed. This fixes a
- nasty race as the run_list changes even when inodes are locked for
- reading and even when the inode isn't locked at all, so we really
- needed the serialization. We use a semaphore rather than a spinlock
- as memory allocations can sleep and doing everything GFP_ATOMIC
- would be silly.
- - Cleanup read_inode() removing all code checking for lowest_vcn != 0.
- This can never happen due to the nature of lookup_attr() and how we
- support attribute lists. If it did happen it would imply the inode
- being corrupt.
- - Check for lowest_vcn != 0 in ntfs_read_inode() and mark the inode as
- bad if found.
- - Update to 2.5.6-pre2 changes in struct address_space.
- - Use parent_ino() when accessing d_parent inode number in dir.c.
- - Import Sourceforge CVS repository into BitKeeper repository:
- http://linux-ntfs.bkbits.net/ntfs-tng-2.5
- - Update fs/Makefile, fs/Config.help, fs/Config.in, and
- Documentation/filesystems/ntfs.txt for NTFS TNG.
- - Create kernel configuration option controlling whether debugging
- is enabled or not.
- - Add the required export of end_buffer_io_sync() from the patches
- directory to the kernel code.
- - Update inode.c::ntfs_show_options() with show_inodes mount option.
- - Update errors mount option.
-
-tng-0.0.7 - 13/02/2002 - The driver is now feature complete for read-only!
-
- - Cleanup mft.c and it's debug/error output in particular. Fix a minor
- bug in mapping of extent inodes. Update all the comments to fit all
- the recent code changes.
- - Modify vcn_to_lcn() to cope with entirely unmapped runlists.
- - Cleanups in compress.c, mostly comments and folding help.
- - Implement attrib.c::map_run_list() as a generic helper.
- - Make compress.c::ntfs_file_read_compressed_block() use map_run_list()
- thus making code shorter and enabling attribute list support.
- - Cleanup incorrect use of [su]64 with %L printf format specifier in
- all source files. Type casts to [unsigned] long long added to correct
- the mismatches (important for architectures which have long long not
- being 64 bits).
- - Merge async io completion handlers for directory indexes and $MFT
- data into one by setting the index_block_size{_bits} of the ntfs
- inode for $MFT to the mft_record_size{_bits} of the ntfs_volume.
- - Cleanup aops.c, update comments.
- - Make ntfs_file_get_block() use map_run_list() so all files now
- support attribute lists.
- - Make ntfs_dir_readpage() almost verbatim copy of
- block_read_full_page() by using ntfs_file_get_block() with only real
- difference being the use of our own async io completion handler
- rather than the default one, thus reducing the amount of code and
- automatically enabling attribute list support for directory indices.
- - Fix bug in load_attribute_list() - forgot to call brelse in error
- code path.
- - Change parameters to find_attr() and lookup_attr(). We no longer
- pass in the upcase table and its length. These can be gotten from
- ctx->ntfs_ino->vol->upcase{_len}. Update all callers.
- - Cleanups in attrib.c.
- - Implement merging of runlists, attrib.c::merge_run_lists() and its
- helpers. (Richard Russon)
- - Attribute lists part 2, attribute extents and multi part runlists:
- enable proper support for LCN_RL_NOT_MAPPED and automatic mapping of
- further runlist parts via attrib.c::map_run_list().
- - Tiny endianness bug fix in decompress_mapping_pairs().
-
-tng-0.0.6 - Encrypted directories, bug fixes, cleanups, debugging enhancements.
-
- - Enable encrypted directories. (Their index root is marked encrypted
- to indicate that new files in that directory should be created
- encrypted.)
- - Fix bug in NInoBmpNonResident() macro. (Cut and paste error.)
- - Enable $Extend system directory. Most (if not all) extended system
- files do not have unnamed data attributes so ntfs_read_inode() had to
- special case them but that is ok, as the special casing recovery
- happens inside an error code path so there is zero slow down in the
- normal fast path. The special casing is done by introducing a new
- function inode.c::ntfs_is_extended_system_file() which checks if any
- of the hard links in the inode point to $Extend as being their parent
- directory and if they do we assume this is an extended system file.
- - Create a sysctl/proc interface to allow {dis,en}abling of debug output
- when compiled with -DDEBUG. Default is debug messages to be disabled.
- To enable them, one writes a non-zero value to /proc/sys/fs/ntfs-debug
- (if /proc is enabled) or uses sysctl(2) to effect the same (if sysctl
- interface is enabled). Inspired by old ntfs driver.
- - Add debug_msgs insmod/kernel boot parameter to set whether debug
- messages are {dis,en}abled. This is useful to enable debug messages
- during ntfs initialization and is the only way to activate debugging
- when the sysctl interface is not enabled.
- - Cleanup debug output in various places.
- - Remove all dollar signs ($) from the source (except comments) to
- enable compilation on architectures whose gcc compiler does not
- support dollar signs in the names of variables/constants. Attribute
- types now start with AT_ instead of $ and $I30 is now just I30.
- - Cleanup ntfs_lookup() and add consistency check of sequence numbers.
- - Load complete runlist for $MFT/$BITMAP during mount and cleanup
- access functions. This means we now cope with $MFT/$BITMAP being
- spread accross several mft records.
- - Disable modification of mft_zone_multiplier on remount. We can always
- reenable this later on if we really want to, but we will need to make
- sure we readjust the mft_zone size / layout accordingly.
-
-tng-0.0.5 - Modernize for 2.5.x and further in line-ing with Al Viro's comments.
-
- - Use sb_set_blocksize() instead of set_blocksize() and verify the
- return value.
- - Use sb_bread() instead of bread() throughout.
- - Add index_vcn_size{_bits} to ntfs_inode structure to store the size
- of a directory index block vcn. Apply resulting simplifications in
- dir.c everywhere.
- - Fix a small bug somewhere (but forgot what it was).
- - Change ntfs_{debug,error,warning} to enable gcc to do type checking
- on the printf-format parameter list and fix bugs reported by gcc
- as a result. (Richard Russon)
- - Move inode allocation strategy to Al's new stuff but maintain the
- divorce of ntfs_inode from struct inode. To achieve this we have two
- separate slab caches, one for big ntfs inodes containing a struct
- inode and pure ntfs inodes and at the same time fix some faulty
- error code paths in ntfs_read_inode().
- - Show mount options in proc (inode.c::ntfs_show_options()).
-
-tng-0.0.4 - Big changes, getting in line with Al Viro's comments.
-
- - Modified (un)map_mft_record functions to be common for read and write
- case. To specify which is which, added extra parameter at front of
- parameter list. Pass either READ or WRITE to this, each has the
- obvious meaning.
- - General cleanups to allow for easier folding in vi.
- - attrib.c::decompress_mapping_pairs() now accepts the old runlist
- argument, and invokes attrib.c::merge_run_lists() to merge the old
- and the new runlists.
- - Removed attrib.c::find_first_attr().
- - Implemented loading of attribute list and complete runlist for $MFT.
- This means we now cope with $MFT being spread across several mft
- records.
- - Adapt to 2.5.2-pre9 and the changed create_empty_buffers() syntax.
- - Adapt major/minor/kdev_t/[bk]devname stuff to new 2.5.x kernels.
- - Make ntfs_volume be allocated via kmalloc() instead of using a slab
- cache. There are too little ntfs_volume structures at any one time
- to justify a private slab cache.
- - Fix bogus kmap() use in async io completion. Now use kmap_atomic().
- Use KM_BIO_IRQ on advice from IRC/kernel...
- - Use ntfs_map_page() in map_mft_record() and create ->readpage method
- for reading $MFT (ntfs_mft_readpage). In the process create dedicated
- address space operations (ntfs_mft_aops) for $MFT inode mapping. Also
- removed the now superfluous exports from the kernel core patch.
- - Fix a bug where kfree() was used insted of ntfs_free().
- - Change map_mft_record() to take ntfs_inode as argument instead of
- vfs inode. Dito for unmap_mft_record(). Adapt all callers.
- - Add pointer to ntfs_volume to ntfs_inode.
- - Add mft record number and sequence number to ntfs_inode. Stop using
- i_ino and i_generation for in-driver purposes.
- - Implement attrib.c::merge_run_lists(). (Richard Russon)
- - Remove use of proper inodes by extent inodes. Move i_ino and
- i_generation to ntfs_inode to do this. Apply simplifications that
- result and remove iget_no_wait(), etc.
- - Pass ntfs_inode everywhere in the driver (used to be struct inode).
- - Add reference counting in ntfs_inode for the ntfs inode itself and
- for the mapped mft record.
- - Extend mft record mapping so we can (un)map extent mft records (new
- functions (un)map_extent_mft_record), and so mappings are reference
- counted and don't have to happen twice if already mapped - just ref
- count increases.
- - Add -o iocharset as alias to -o nls for backwards compatibility.
- - The latest core patch is now tiny. In fact just a single additional
- export is necessary over the base kernel.
-
-tng-0.0.3 - Cleanups, enhancements, bug fixes.
-
- - Work on attrib.c::decompress_mapping_pairs() to detect base extents
- and setup the runlist appropriately using knowledge provided by the
- sizes in the base attribute record.
- - Balance the get_/put_attr_search_ctx() calls so we don't leak memory
- any more.
- - Introduce ntfs_malloc_nofs() and ntfs_free() to allocate/free a single
- page or use vmalloc depending on the amount of memory requested.
- - Cleanup error output. The __FUNCTION__ "(): " is now added
- automatically. Introduced a new header file debug.h to support this
- and also moved ntfs_debug() function into it.
- - Make reading of compressed files more intelligent and especially get
- rid of the vmalloc_nofs() from readpage(). This now uses per CPU
- buffers (allocated at first mount with cluster size <= 4kiB and
- deallocated on last umount with cluster size <= 4kiB), and
- asynchronous io for the compressed data using a list of buffer heads.
- Er, we use synchronous io as async io only works on whole pages
- covered by buffers and not on individual buffer heads...
- - Bug fix for reading compressed files with sparse compression blocks.
-
-tng-0.0.2 - Now handles larger/fragmented/compressed volumes/files/dirs.
-
- - Fixed handling of directories when cluster size exceeds index block
- size.
- - Hide DOS only name space directory entries from readdir() but allow
- them in lookup(). This should fix the problem that Linux doesn't
- support directory hard links, while still allowing access to entries
- via their short file name. This also has the benefit of mimicking
- what Windows users are used to, so it is the ideal solution.
- - Implemented sync_page everywhere so no more hangs in D state when
- waiting for a page.
- - Stop using bforget() in favour of brelse().
- - Stop locking buffers unnecessarily.
- - Implemented compressed files (inode->mapping contains uncompressed
- data, raw compressed data is currently bread() into a vmalloc()ed
- memory buffer).
- - Enable compressed directories. (Their index root is marked compressed
- to indicate that new files in that directory should be created
- compressed.)
- - Use vsnprintf rather than vsprintf in the ntfs_error and ntfs_warning
- functions. (Thanks to Will Dyson for pointing this out.)
- - Moved the ntfs_inode and ntfs_volume (the former ntfs_inode_info and
- ntfs_sb_info) out of the common inode and super_block structures and
- started using the generic_ip and generic_sbp pointers instead. This
- makes ntfs entirely private with respect to the kernel tree.
- - Detect compiler version and abort with error message if gcc less than
- 2.96 is used.
- - Fix bug in name comparison function in unistr.c.
- - Implement attribute lists part 1, the infrastructure: search contexts
- and operations, find_external_attr(), lookup_attr()) and make the
- code use the infrastructure.
- - Fix stupid buffer overflow bug that became apparent on larger run
- list containing attributes.
- - Fix bugs in readdir() that became apparent on larger directories.
-
- The driver is now really useful and survives the test
- find . -type f -exec md5sum "{}" \;
- without any error messages on a over 1GiB sized partition with >16k
- files on it, including compressed files and directories and many files
- and directories with attribute lists.
-
-tng-0.0.1 - The first useful version.
-
- - Added ntfs_lookup().
- - Added default upcase generation and handling.
- - Added compile options to be shown on module init.
- - Many bug fixes that were "hidden" before.
- - Update to latest kernel.
- - Added ntfs_readdir().
- - Added file operations for mmap(), read(), open() and llseek(). We just
- use the generic ones. The whole point of going through implementing
- readpage() methods and where possible get_block() call backs is that
- this allows us to make use of the generic high level methods provided
- by the kernel.
-
- The driver is now actually useful! Yey. (-: It undoubtedly has got bugs
- though and it doesn't implement accesssing compressed files yet. Also,
- accessing files with attribute list attributes is not implemented yet
- either. But for small or simple filesystems it should work and allow
- you to list directories, use stat on directory entries and the file
- system, open, read, mmap and llseek around in files. A big mile stone
- has been reached!
-
-tng-0.0.0 - Initial version tag.
-
- Initial driver implementation. The driver can mount and umount simple
- NTFS filesystems (i.e. ones without attribute lists in the system
- files). If the mount fails there might be problems in the error handling
- code paths, so be warned. Otherwise it seems to be loading the system
- files nicely and the mft record read mapping/unmapping seems to be
- working nicely, too. Proof of inode metadata in the page cache and non-
- resident file unnamed stream data in the page cache concepts is thus
- complete.
diff --git a/fs/ntfs/Kconfig b/fs/ntfs/Kconfig
new file mode 100644
index 00000000000..f5a868cc915
--- /dev/null
+++ b/fs/ntfs/Kconfig
@@ -0,0 +1,78 @@
+config NTFS_FS
+ tristate "NTFS file system support"
+ select NLS
+ help
+ NTFS is the file system of Microsoft Windows NT, 2000, XP and 2003.
+
+ Saying Y or M here enables read support. There is partial, but
+ safe, write support available. For write support you must also
+ say Y to "NTFS write support" below.
+
+ There are also a number of user-space tools available, called
+ ntfsprogs. These include ntfsundelete and ntfsresize, that work
+ without NTFS support enabled in the kernel.
+
+ This is a rewrite from scratch of Linux NTFS support and replaced
+ the old NTFS code starting with Linux 2.5.11. A backport to
+ the Linux 2.4 kernel series is separately available as a patch
+ from the project web site.
+
+ For more information see <file:Documentation/filesystems/ntfs.txt>
+ and <http://www.linux-ntfs.org/>.
+
+ To compile this file system support as a module, choose M here: the
+ module will be called ntfs.
+
+ If you are not using Windows NT, 2000, XP or 2003 in addition to
+ Linux on your computer it is safe to say N.
+
+config NTFS_DEBUG
+ bool "NTFS debugging support"
+ depends on NTFS_FS
+ help
+ If you are experiencing any problems with the NTFS file system, say
+ Y here. This will result in additional consistency checks to be
+ performed by the driver as well as additional debugging messages to
+ be written to the system log. Note that debugging messages are
+ disabled by default. To enable them, supply the option debug_msgs=1
+ at the kernel command line when booting the kernel or as an option
+ to insmod when loading the ntfs module. Once the driver is active,
+ you can enable debugging messages by doing (as root):
+ echo 1 > /proc/sys/fs/ntfs-debug
+ Replacing the "1" with "0" would disable debug messages.
+
+ If you leave debugging messages disabled, this results in little
+ overhead, but enabling debug messages results in very significant
+ slowdown of the system.
+
+ When reporting bugs, please try to have available a full dump of
+ debugging messages while the misbehaviour was occurring.
+
+config NTFS_RW
+ bool "NTFS write support"
+ depends on NTFS_FS
+ help
+ This enables the partial, but safe, write support in the NTFS driver.
+
+ The only supported operation is overwriting existing files, without
+ changing the file length. No file or directory creation, deletion or
+ renaming is possible. Note only non-resident files can be written to
+ so you may find that some very small files (<500 bytes or so) cannot
+ be written to.
+
+ While we cannot guarantee that it will not damage any data, we have
+ so far not received a single report where the driver would have
+ damaged someones data so we assume it is perfectly safe to use.
+
+ Note: While write support is safe in this version (a rewrite from
+ scratch of the NTFS support), it should be noted that the old NTFS
+ write support, included in Linux 2.5.10 and before (since 1997),
+ is not safe.
+
+ This is currently useful with TopologiLinux. TopologiLinux is run
+ on top of any DOS/Microsoft Windows system without partitioning your
+ hard disk. Unlike other Linux distributions TopologiLinux does not
+ need its own partition. For more information see
+ <http://topologi-linux.sourceforge.net/>
+
+ It is perfectly safe to say N here.
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index f083f27d8b6..30206b23843 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -2,18 +2,13 @@
obj-$(CONFIG_NTFS_FS) += ntfs.o
-ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
- index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
- unistr.o upcase.o
+ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
+ index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
+ unistr.o upcase.o
-EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.23\"
+ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
-ifeq ($(CONFIG_NTFS_DEBUG),y)
-EXTRA_CFLAGS += -DDEBUG
-endif
+ccflags-y := -DNTFS_VERSION=\"2.1.30\"
+ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG
+ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW
-ifeq ($(CONFIG_NTFS_RW),y)
-EXTRA_CFLAGS += -DNTFS_RW
-
-ntfs-objs += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
-endif
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 78adad7a988..d267ea6aa1a 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -2,7 +2,7 @@
* aops.c - NTFS kernel address space operations and page cache handling.
* Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2005 Anton Altaparmakov
+ * Copyright (c) 2001-2007 Anton Altaparmakov
* Copyright (c) 2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -22,11 +22,14 @@
*/
#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
+#include <linux/bit_spinlock.h>
#include "aops.h"
#include "attrib.h"
@@ -55,45 +58,58 @@
*/
static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
{
- static DEFINE_SPINLOCK(page_uptodate_lock);
unsigned long flags;
- struct buffer_head *tmp;
+ struct buffer_head *first, *tmp;
struct page *page;
+ struct inode *vi;
ntfs_inode *ni;
int page_uptodate = 1;
page = bh->b_page;
- ni = NTFS_I(page->mapping->host);
+ vi = page->mapping->host;
+ ni = NTFS_I(vi);
if (likely(uptodate)) {
- s64 file_ofs, initialized_size;
+ loff_t i_size;
+ s64 file_ofs, init_size;
set_buffer_uptodate(bh);
file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
bh_offset(bh);
read_lock_irqsave(&ni->size_lock, flags);
- initialized_size = ni->initialized_size;
+ init_size = ni->initialized_size;
+ i_size = i_size_read(vi);
read_unlock_irqrestore(&ni->size_lock, flags);
+ if (unlikely(init_size > i_size)) {
+ /* Race with shrinking truncate. */
+ init_size = i_size;
+ }
/* Check for the current buffer head overflowing. */
- if (file_ofs + bh->b_size > initialized_size) {
- char *addr;
- int ofs = 0;
-
- if (file_ofs < initialized_size)
- ofs = initialized_size - file_ofs;
- addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
- memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs);
+ if (unlikely(file_ofs + bh->b_size > init_size)) {
+ int ofs;
+ void *kaddr;
+
+ ofs = 0;
+ if (file_ofs < init_size)
+ ofs = init_size - file_ofs;
+ local_irq_save(flags);
+ kaddr = kmap_atomic(page);
+ memset(kaddr + bh_offset(bh) + ofs, 0,
+ bh->b_size - ofs);
flush_dcache_page(page);
- kunmap_atomic(addr, KM_BIO_SRC_IRQ);
+ kunmap_atomic(kaddr);
+ local_irq_restore(flags);
}
} else {
clear_buffer_uptodate(bh);
- ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.",
- (unsigned long long)bh->b_blocknr);
SetPageError(page);
+ ntfs_error(ni->vol->sb, "Buffer I/O error, logical block "
+ "0x%llx.", (unsigned long long)bh->b_blocknr);
}
- spin_lock_irqsave(&page_uptodate_lock, flags);
+ first = page_buffers(page);
+ local_irq_save(flags);
+ bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
clear_buffer_async_read(bh);
unlock_buffer(bh);
tmp = bh;
@@ -108,7 +124,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
}
tmp = tmp->b_this_page;
} while (tmp != bh);
- spin_unlock_irqrestore(&page_uptodate_lock, flags);
+ bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+ local_irq_restore(flags);
/*
* If none of the buffers had errors then we can set the page uptodate,
* but we first have to perform the post read mst fixups, if the
@@ -121,7 +138,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
if (likely(page_uptodate && !PageError(page)))
SetPageUptodate(page);
} else {
- char *addr;
+ u8 *kaddr;
unsigned int i, recs;
u32 rec_size;
@@ -129,19 +146,22 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
recs = PAGE_CACHE_SIZE / rec_size;
/* Should have been verified before we got here... */
BUG_ON(!recs);
- addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
+ local_irq_save(flags);
+ kaddr = kmap_atomic(page);
for (i = 0; i < recs; i++)
- post_read_mst_fixup((NTFS_RECORD*)(addr +
+ post_read_mst_fixup((NTFS_RECORD*)(kaddr +
i * rec_size), rec_size);
+ kunmap_atomic(kaddr);
+ local_irq_restore(flags);
flush_dcache_page(page);
- kunmap_atomic(addr, KM_BIO_SRC_IRQ);
if (likely(page_uptodate && !PageError(page)))
SetPageUptodate(page);
}
unlock_page(page);
return;
still_busy:
- spin_unlock_irqrestore(&page_uptodate_lock, flags);
+ bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+ local_irq_restore(flags);
return;
}
@@ -164,8 +184,11 @@ still_busy:
*/
static int ntfs_read_block(struct page *page)
{
+ loff_t i_size;
VCN vcn;
LCN lcn;
+ s64 init_size;
+ struct inode *vi;
ntfs_inode *ni;
ntfs_volume *vol;
runlist_element *rl;
@@ -176,34 +199,54 @@ static int ntfs_read_block(struct page *page)
int i, nr;
unsigned char blocksize_bits;
- ni = NTFS_I(page->mapping->host);
+ vi = page->mapping->host;
+ ni = NTFS_I(vi);
vol = ni->vol;
/* $MFT/$DATA must have its complete runlist in memory at all times. */
BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni));
- blocksize_bits = VFS_I(ni)->i_blkbits;
- blocksize = 1 << blocksize_bits;
+ blocksize = vol->sb->s_blocksize;
+ blocksize_bits = vol->sb->s_blocksize_bits;
- if (!page_has_buffers(page))
+ if (!page_has_buffers(page)) {
create_empty_buffers(page, blocksize, 0);
- bh = head = page_buffers(page);
- if (unlikely(!bh)) {
- unlock_page(page);
- return -ENOMEM;
+ if (unlikely(!page_has_buffers(page))) {
+ unlock_page(page);
+ return -ENOMEM;
+ }
}
+ bh = head = page_buffers(page);
+ BUG_ON(!bh);
+ /*
+ * We may be racing with truncate. To avoid some of the problems we
+ * now take a snapshot of the various sizes and use those for the whole
+ * of the function. In case of an extending truncate it just means we
+ * may leave some buffers unmapped which are now allocated. This is
+ * not a problem since these buffers will just get mapped when a write
+ * occurs. In case of a shrinking truncate, we will detect this later
+ * on due to the runlist being incomplete and if the page is being
+ * fully truncated, truncate will throw it away as soon as we unlock
+ * it so no need to worry what we do with it.
+ */
iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
read_lock_irqsave(&ni->size_lock, flags);
lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
- zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits;
+ init_size = ni->initialized_size;
+ i_size = i_size_read(vi);
read_unlock_irqrestore(&ni->size_lock, flags);
+ if (unlikely(init_size > i_size)) {
+ /* Race with shrinking truncate. */
+ init_size = i_size;
+ }
+ zblock = (init_size + blocksize - 1) >> blocksize_bits;
/* Loop through all the buffers in the page. */
rl = NULL;
nr = i = 0;
do {
- u8 *kaddr;
+ int err = 0;
if (unlikely(buffer_uptodate(bh)))
continue;
@@ -214,7 +257,7 @@ static int ntfs_read_block(struct page *page)
bh->b_bdev = vol->sb->s_bdev;
/* Is the block within the allowed limits? */
if (iblock < lblock) {
- BOOL is_retry = FALSE;
+ bool is_retry = false;
/* Convert iblock into corresponding vcn and offset. */
vcn = (VCN)iblock << blocksize_bits >>
@@ -252,8 +295,7 @@ lock_retry_remap:
goto handle_hole;
/* If first try and runlist unmapped, map and retry. */
if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
- int err;
- is_retry = TRUE;
+ is_retry = true;
/*
* Attempt to map runlist, dropping lock for
* the duration.
@@ -263,20 +305,30 @@ lock_retry_remap:
if (likely(!err))
goto lock_retry_remap;
rl = NULL;
- lcn = err;
} else if (!rl)
up_read(&ni->runlist.lock);
+ /*
+ * If buffer is outside the runlist, treat it as a
+ * hole. This can happen due to concurrent truncate
+ * for example.
+ */
+ if (err == -ENOENT || lcn == LCN_ENOENT) {
+ err = 0;
+ goto handle_hole;
+ }
/* Hard error, zero out region. */
+ if (!err)
+ err = -EIO;
bh->b_blocknr = -1;
SetPageError(page);
ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
"attribute type 0x%x, vcn 0x%llx, "
"offset 0x%x because its location on "
"disk could not be determined%s "
- "(error code %lli).", ni->mft_no,
+ "(error code %i).", ni->mft_no,
ni->type, (unsigned long long)vcn,
vcn_ofs, is_retry ? " even after "
- "retrying" : "", (long long)lcn);
+ "retrying" : "", err);
}
/*
* Either iblock was outside lblock limits or
@@ -287,11 +339,9 @@ handle_hole:
bh->b_blocknr = -1UL;
clear_buffer_mapped(bh);
handle_zblock:
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr + i * blocksize, 0, blocksize);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
- set_buffer_uptodate(bh);
+ zero_user(page, i * blocksize, blocksize);
+ if (likely(!err))
+ set_buffer_uptodate(bh);
} while (i++, iblock++, (bh = bh->b_this_page) != head);
/* Release the lock if we took it. */
@@ -348,8 +398,10 @@ handle_zblock:
*/
static int ntfs_readpage(struct file *file, struct page *page)
{
+ loff_t i_size;
+ struct inode *vi;
ntfs_inode *ni, *base_ni;
- u8 *kaddr;
+ u8 *addr;
ntfs_attr_search_ctx *ctx;
MFT_RECORD *mrec;
unsigned long flags;
@@ -358,6 +410,15 @@ static int ntfs_readpage(struct file *file, struct page *page)
retry_readpage:
BUG_ON(!PageLocked(page));
+ vi = page->mapping->host;
+ i_size = i_size_read(vi);
+ /* Is the page fully outside i_size? (truncate in progress) */
+ if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT)) {
+ zero_user(page, 0, PAGE_CACHE_SIZE);
+ ntfs_debug("Read outside i_size - truncated?");
+ goto done;
+ }
/*
* This can potentially happen because we clear PageUptodate() during
* ntfs_writepage() of MstProtected() attributes.
@@ -366,38 +427,44 @@ retry_readpage:
unlock_page(page);
return 0;
}
- ni = NTFS_I(page->mapping->host);
-
+ ni = NTFS_I(vi);
+ /*
+ * Only $DATA attributes can be encrypted and only unnamed $DATA
+ * attributes can be compressed. Index root can have the flags set but
+ * this means to create compressed/encrypted files, not that the
+ * attribute is compressed/encrypted. Note we need to check for
+ * AT_INDEX_ALLOCATION since this is the type of both directory and
+ * index inodes.
+ */
+ if (ni->type != AT_INDEX_ALLOCATION) {
+ /* If attribute is encrypted, deny access, just like NT4. */
+ if (NInoEncrypted(ni)) {
+ BUG_ON(ni->type != AT_DATA);
+ err = -EACCES;
+ goto err_out;
+ }
+ /* Compressed data streams are handled in compress.c. */
+ if (NInoNonResident(ni) && NInoCompressed(ni)) {
+ BUG_ON(ni->type != AT_DATA);
+ BUG_ON(ni->name_len);
+ return ntfs_read_compressed_block(page);
+ }
+ }
/* NInoNonResident() == NInoIndexAllocPresent() */
if (NInoNonResident(ni)) {
- /*
- * Only unnamed $DATA attributes can be compressed or
- * encrypted.
- */
- if (ni->type == AT_DATA && !ni->name_len) {
- /* If file is encrypted, deny access, just like NT4. */
- if (NInoEncrypted(ni)) {
- err = -EACCES;
- goto err_out;
- }
- /* Compressed data streams are handled in compress.c. */
- if (NInoCompressed(ni))
- return ntfs_read_compressed_block(page);
- }
- /* Normal data stream. */
+ /* Normal, non-resident data stream. */
return ntfs_read_block(page);
}
/*
* Attribute is resident, implying it is not compressed or encrypted.
* This also means the attribute is smaller than an mft record and
* hence smaller than a page, so can simply zero out any pages with
- * index above 0.
+ * index above 0. Note the attribute can actually be marked compressed
+ * but if it is resident the actual data is not compressed so we are
+ * ok to ignore the compressed flag here.
*/
if (unlikely(page->index > 0)) {
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr, 0, PAGE_CACHE_SIZE);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user(page, 0, PAGE_CACHE_SIZE);
goto done;
}
if (!NInoAttr(ni))
@@ -431,16 +498,21 @@ retry_readpage:
read_lock_irqsave(&ni->size_lock, flags);
if (unlikely(attr_len > ni->initialized_size))
attr_len = ni->initialized_size;
+ i_size = i_size_read(vi);
read_unlock_irqrestore(&ni->size_lock, flags);
- kaddr = kmap_atomic(page, KM_USER0);
+ if (unlikely(attr_len > i_size)) {
+ /* Race with shrinking truncate. */
+ attr_len = i_size;
+ }
+ addr = kmap_atomic(page);
/* Copy the data to the page. */
- memcpy(kaddr, (u8*)ctx->attr +
+ memcpy(addr, (u8*)ctx->attr +
le16_to_cpu(ctx->attr->data.resident.value_offset),
attr_len);
/* Zero the remainder of the page. */
- memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
+ memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(addr);
put_unm_err_out:
ntfs_attr_put_search_ctx(ctx);
unm_err_out:
@@ -491,7 +563,7 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
unsigned long flags;
unsigned int blocksize, vcn_ofs;
int err;
- BOOL need_end_writeback;
+ bool need_end_writeback;
unsigned char blocksize_bits;
vi = page->mapping->host;
@@ -503,27 +575,27 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
BUG_ON(!NInoNonResident(ni));
BUG_ON(NInoMstProtected(ni));
-
- blocksize_bits = vi->i_blkbits;
- blocksize = 1 << blocksize_bits;
-
+ blocksize = vol->sb->s_blocksize;
+ blocksize_bits = vol->sb->s_blocksize_bits;
if (!page_has_buffers(page)) {
BUG_ON(!PageUptodate(page));
create_empty_buffers(page, blocksize,
(1 << BH_Uptodate) | (1 << BH_Dirty));
+ if (unlikely(!page_has_buffers(page))) {
+ ntfs_warning(vol->sb, "Error allocating page "
+ "buffers. Redirtying page so we try "
+ "again later.");
+ /*
+ * Put the page back on mapping->dirty_pages, but leave
+ * its buffers' dirty state as-is.
+ */
+ redirty_page_for_writepage(wbc, page);
+ unlock_page(page);
+ return 0;
+ }
}
bh = head = page_buffers(page);
- if (unlikely(!bh)) {
- ntfs_warning(vol->sb, "Error allocating page buffers. "
- "Redirtying page so we try again later.");
- /*
- * Put the page back on mapping->dirty_pages, but leave its
- * buffer's dirty state as-is.
- */
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
- }
+ BUG_ON(!bh);
/* NOTE: Different naming scheme to ntfs_read_block()! */
@@ -559,7 +631,7 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
rl = NULL;
err = 0;
do {
- BOOL is_retry = FALSE;
+ bool is_retry = false;
if (unlikely(block >= dblock)) {
/*
@@ -670,6 +742,27 @@ lock_retry_remap:
}
/* It is a hole, need to instantiate it. */
if (lcn == LCN_HOLE) {
+ u8 *kaddr;
+ unsigned long *bpos, *bend;
+
+ /* Check if the buffer is zero. */
+ kaddr = kmap_atomic(page);
+ bpos = (unsigned long *)(kaddr + bh_offset(bh));
+ bend = (unsigned long *)((u8*)bpos + blocksize);
+ do {
+ if (unlikely(*bpos))
+ break;
+ } while (likely(++bpos < bend));
+ kunmap_atomic(kaddr);
+ if (bpos == bend) {
+ /*
+ * Buffer is zero and sparse, no need to write
+ * it.
+ */
+ bh->b_blocknr = -1;
+ clear_buffer_dirty(bh);
+ continue;
+ }
// TODO: Instantiate the hole.
// clear_buffer_new(bh);
// unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
@@ -680,7 +773,7 @@ lock_retry_remap:
}
/* If first try and runlist unmapped, map and retry. */
if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
- is_retry = TRUE;
+ is_retry = true;
/*
* Attempt to map runlist, dropping lock for
* the duration.
@@ -690,20 +783,32 @@ lock_retry_remap:
if (likely(!err))
goto lock_retry_remap;
rl = NULL;
- lcn = err;
} else if (!rl)
up_read(&ni->runlist.lock);
+ /*
+ * If buffer is outside the runlist, truncate has cut it out
+ * of the runlist. Just clean and clear the buffer and set it
+ * uptodate so it can get discarded by the VM.
+ */
+ if (err == -ENOENT || lcn == LCN_ENOENT) {
+ bh->b_blocknr = -1;
+ clear_buffer_dirty(bh);
+ zero_user(page, bh_offset(bh), blocksize);
+ set_buffer_uptodate(bh);
+ err = 0;
+ continue;
+ }
/* Failed to map the buffer, even after retrying. */
+ if (!err)
+ err = -EIO;
bh->b_blocknr = -1;
ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
"attribute type 0x%x, vcn 0x%llx, offset 0x%x "
"because its location on disk could not be "
- "determined%s (error code %lli).", ni->mft_no,
+ "determined%s (error code %i).", ni->mft_no,
ni->type, (unsigned long long)vcn,
vcn_ofs, is_retry ? " even after "
- "retrying" : "", (long long)lcn);
- if (!err)
- err = -EIO;
+ "retrying" : "", err);
break;
} while (block++, (bh = bh->b_this_page) != head);
@@ -714,7 +819,7 @@ lock_retry_remap:
/* For the error case, need to reset bh to the beginning. */
bh = head;
- /* Just an optimization, so ->readpage() isn't called later. */
+ /* Just an optimization, so ->readpage() is not called later. */
if (unlikely(!PageUptodate(page))) {
int uptodate = 1;
do {
@@ -730,7 +835,6 @@ lock_retry_remap:
/* Setup all mapped, dirty buffers for async write i/o. */
do {
- get_bh(bh);
if (buffer_mapped(bh) && buffer_dirty(bh)) {
lock_buffer(bh);
if (test_clear_buffer_dirty(bh)) {
@@ -768,24 +872,18 @@ lock_retry_remap:
BUG_ON(PageWriteback(page));
set_page_writeback(page); /* Keeps try_to_free_buffers() away. */
- unlock_page(page);
- /*
- * Submit the prepared buffers for i/o. Note the page is unlocked,
- * and the async write i/o completion handler can end_page_writeback()
- * at any time after the *first* submit_bh(). So the buffers can then
- * disappear...
- */
- need_end_writeback = TRUE;
+ /* Submit the prepared buffers for i/o. */
+ need_end_writeback = true;
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
submit_bh(WRITE, bh);
- need_end_writeback = FALSE;
+ need_end_writeback = false;
}
- put_bh(bh);
bh = next;
} while (bh != head);
+ unlock_page(page);
/* If no i/o was started, need to end_page_writeback(). */
if (unlikely(need_end_writeback))
@@ -834,7 +932,7 @@ static int ntfs_write_mst_block(struct page *page,
runlist_element *rl;
int i, nr_locked_nis, nr_recs, nr_bhs, max_bhs, bhs_per_rec, err, err2;
unsigned bh_size, rec_size_bits;
- BOOL sync, is_mft, page_is_dirty, rec_is_dirty;
+ bool sync, is_mft, page_is_dirty, rec_is_dirty;
unsigned char bh_size_bits;
ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
@@ -850,8 +948,8 @@ static int ntfs_write_mst_block(struct page *page,
*/
BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) ||
(NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
- bh_size_bits = vi->i_blkbits;
- bh_size = 1 << bh_size_bits;
+ bh_size = vol->sb->s_blocksize;
+ bh_size_bits = vol->sb->s_blocksize_bits;
max_bhs = PAGE_CACHE_SIZE / bh_size;
BUG_ON(!max_bhs);
BUG_ON(max_bhs > MAX_BUF_PER_PAGE);
@@ -860,7 +958,6 @@ static int ntfs_write_mst_block(struct page *page,
sync = (wbc->sync_mode == WB_SYNC_ALL);
/* Make sure we have mapped buffers. */
- BUG_ON(!page_has_buffers(page));
bh = head = page_buffers(page);
BUG_ON(!bh);
@@ -878,10 +975,10 @@ static int ntfs_write_mst_block(struct page *page,
rl = NULL;
err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0;
- page_is_dirty = rec_is_dirty = FALSE;
+ page_is_dirty = rec_is_dirty = false;
rec_start_bh = NULL;
do {
- BOOL is_retry = FALSE;
+ bool is_retry = false;
if (likely(block < rec_block)) {
if (unlikely(block >= dblock)) {
@@ -912,10 +1009,10 @@ static int ntfs_write_mst_block(struct page *page,
}
if (!buffer_dirty(bh)) {
/* Clean records are not written out. */
- rec_is_dirty = FALSE;
+ rec_is_dirty = false;
continue;
}
- rec_is_dirty = TRUE;
+ rec_is_dirty = true;
rec_start_bh = bh;
}
/* Need to map the buffer if it is not mapped already. */
@@ -956,7 +1053,7 @@ lock_retry_remap:
*/
if (!is_mft && !is_retry &&
lcn == LCN_RL_NOT_MAPPED) {
- is_retry = TRUE;
+ is_retry = true;
/*
* Attempt to map runlist, dropping
* lock for the duration.
@@ -966,7 +1063,7 @@ lock_retry_remap:
if (likely(!err2))
goto lock_retry_remap;
if (err2 == -ENOMEM)
- page_is_dirty = TRUE;
+ page_is_dirty = true;
lcn = err2;
} else {
err2 = -EIO;
@@ -1048,7 +1145,7 @@ lock_retry_remap:
* means we need to redirty the page before
* returning.
*/
- page_is_dirty = TRUE;
+ page_is_dirty = true;
/*
* Remove the buffers in this mft record from
* the list of buffers to write.
@@ -1098,7 +1195,7 @@ lock_retry_remap:
tbh = bhs[i];
if (!tbh)
continue;
- if (unlikely(test_set_buffer_locked(tbh)))
+ if (!trylock_buffer(tbh))
BUG();
/* The buffer dirty state is now irrelevant, just clean it. */
clear_buffer_dirty(tbh);
@@ -1181,18 +1278,18 @@ unm_done:
tni = locked_nis[nr_locked_nis];
/* Get the base inode. */
- down(&tni->extent_lock);
+ mutex_lock(&tni->extent_lock);
if (tni->nr_extents >= 0)
base_tni = tni;
else {
base_tni = tni->ext.base_ntfs_ino;
BUG_ON(!base_tni);
}
- up(&tni->extent_lock);
+ mutex_unlock(&tni->extent_lock);
ntfs_debug("Unlocking %s inode 0x%lx.",
tni == base_tni ? "base" : "extent",
tni->mft_no);
- up(&tni->mrec_lock);
+ mutex_unlock(&tni->mrec_lock);
atomic_dec(&tni->count);
iput(VFS_I(base_tni));
}
@@ -1259,7 +1356,7 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
loff_t i_size;
struct inode *vi = page->mapping->host;
ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
- char *kaddr;
+ char *addr;
ntfs_attr_search_ctx *ctx = NULL;
MFT_RECORD *m = NULL;
u32 attr_len;
@@ -1275,63 +1372,67 @@ retry_writepage:
* The page may have dirty, unmapped buffers. Make them
* freeable here, so the page does not leak.
*/
- block_invalidatepage(page, 0);
+ block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
unlock_page(page);
ntfs_debug("Write outside i_size - truncated?");
return 0;
}
+ /*
+ * Only $DATA attributes can be encrypted and only unnamed $DATA
+ * attributes can be compressed. Index root can have the flags set but
+ * this means to create compressed/encrypted files, not that the
+ * attribute is compressed/encrypted. Note we need to check for
+ * AT_INDEX_ALLOCATION since this is the type of both directory and
+ * index inodes.
+ */
+ if (ni->type != AT_INDEX_ALLOCATION) {
+ /* If file is encrypted, deny access, just like NT4. */
+ if (NInoEncrypted(ni)) {
+ unlock_page(page);
+ BUG_ON(ni->type != AT_DATA);
+ ntfs_debug("Denying write access to encrypted file.");
+ return -EACCES;
+ }
+ /* Compressed data streams are handled in compress.c. */
+ if (NInoNonResident(ni) && NInoCompressed(ni)) {
+ BUG_ON(ni->type != AT_DATA);
+ BUG_ON(ni->name_len);
+ // TODO: Implement and replace this with
+ // return ntfs_write_compressed_block(page);
+ unlock_page(page);
+ ntfs_error(vi->i_sb, "Writing to compressed files is "
+ "not supported yet. Sorry.");
+ return -EOPNOTSUPP;
+ }
+ // TODO: Implement and remove this check.
+ if (NInoNonResident(ni) && NInoSparse(ni)) {
+ unlock_page(page);
+ ntfs_error(vi->i_sb, "Writing to sparse files is not "
+ "supported yet. Sorry.");
+ return -EOPNOTSUPP;
+ }
+ }
/* NInoNonResident() == NInoIndexAllocPresent() */
if (NInoNonResident(ni)) {
- /*
- * Only unnamed $DATA attributes can be compressed, encrypted,
- * and/or sparse.
- */
- if (ni->type == AT_DATA && !ni->name_len) {
- /* If file is encrypted, deny access, just like NT4. */
- if (NInoEncrypted(ni)) {
- unlock_page(page);
- ntfs_debug("Denying write access to encrypted "
- "file.");
- return -EACCES;
- }
- /* Compressed data streams are handled in compress.c. */
- if (NInoCompressed(ni)) {
- // TODO: Implement and replace this check with
- // return ntfs_write_compressed_block(page);
- unlock_page(page);
- ntfs_error(vi->i_sb, "Writing to compressed "
- "files is not supported yet. "
- "Sorry.");
- return -EOPNOTSUPP;
- }
- // TODO: Implement and remove this check.
- if (NInoSparse(ni)) {
- unlock_page(page);
- ntfs_error(vi->i_sb, "Writing to sparse files "
- "is not supported yet. Sorry.");
- return -EOPNOTSUPP;
- }
- }
/* We have to zero every time due to mmap-at-end-of-file. */
if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
/* The page straddles i_size. */
unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_segment(page, ofs, PAGE_CACHE_SIZE);
}
/* Handle mst protected attributes. */
if (NInoMstProtected(ni))
return ntfs_write_mst_block(page, wbc);
- /* Normal data stream. */
+ /* Normal, non-resident data stream. */
return ntfs_write_block(page, wbc);
}
/*
- * Attribute is resident, implying it is not compressed, encrypted,
- * sparse, or mst protected. This also means the attribute is smaller
- * than an mft record and hence smaller than a page, so can simply
- * return error on any pages with index above 0.
+ * Attribute is resident, implying it is not compressed, encrypted, or
+ * mst protected. This also means the attribute is smaller than an mft
+ * record and hence smaller than a page, so can simply return error on
+ * any pages with index above 0. Note the attribute can actually be
+ * marked compressed but if it is resident the actual data is not
+ * compressed so we are ok to ignore the compressed flag here.
*/
BUG_ON(page_has_buffers(page));
BUG_ON(!PageUptodate(page));
@@ -1380,50 +1481,33 @@ retry_writepage:
BUG_ON(PageWriteback(page));
set_page_writeback(page);
unlock_page(page);
-
- /*
- * Here, we don't need to zero the out of bounds area everytime because
- * the below memcpy() already takes care of the mmap-at-end-of-file
- * requirements. If the file is converted to a non-resident one, then
- * the code path use is switched to the non-resident one where the
- * zeroing happens on each ntfs_writepage() invocation.
- *
- * The above also applies nicely when i_size is decreased.
- *
- * When i_size is increased, the memory between the old and new i_size
- * _must_ be zeroed (or overwritten with new data). Otherwise we will
- * expose data to userspace/disk which should never have been exposed.
- *
- * FIXME: Ensure that i_size increases do the zeroing/overwriting and
- * if we cannot guarantee that, then enable the zeroing below. If the
- * zeroing below is enabled, we MUST move the unlock_page() from above
- * to after the kunmap_atomic(), i.e. just before the
- * end_page_writeback().
- * UPDATE: ntfs_prepare/commit_write() do the zeroing on i_size
- * increases for resident attributes so those are ok.
- * TODO: ntfs_truncate(), others?
- */
-
attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
i_size = i_size_read(vi);
if (unlikely(attr_len > i_size)) {
+ /* Race with shrinking truncate or a failed truncate. */
attr_len = i_size;
- ctx->attr->data.resident.value_length = cpu_to_le32(attr_len);
+ /*
+ * If the truncate failed, fix it up now. If a concurrent
+ * truncate, we do its job, so it does not have to do anything.
+ */
+ err = ntfs_resident_attr_value_resize(ctx->mrec, ctx->attr,
+ attr_len);
+ /* Shrinking cannot fail. */
+ BUG_ON(err);
}
- kaddr = kmap_atomic(page, KM_USER0);
+ addr = kmap_atomic(page);
/* Copy the data from the page to the mft record. */
memcpy((u8*)ctx->attr +
le16_to_cpu(ctx->attr->data.resident.value_offset),
- kaddr, attr_len);
- flush_dcache_mft_record_page(ctx->ntfs_ino);
+ addr, attr_len);
/* Zero out of bounds area in the page cache page. */
- memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
+ memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
+ kunmap_atomic(addr);
flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
-
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ /* We are done with the page. */
end_page_writeback(page);
-
- /* Mark the mft record dirty, so it gets written back. */
+ /* Finally, mark the mft record dirty, so it gets written back. */
mark_mft_record_dirty(ctx->ntfs_ino);
ntfs_attr_put_search_ctx(ctx);
unmap_mft_record(base_ni);
@@ -1443,7 +1527,6 @@ err_out:
"error %i.", err);
SetPageError(page);
NVolSetErrors(ni->vol);
- make_bad_inode(vi);
}
unlock_page(page);
if (ctx)
@@ -1453,863 +1536,38 @@ err_out:
return err;
}
-/**
- * ntfs_prepare_nonresident_write -
- *
- */
-static int ntfs_prepare_nonresident_write(struct page *page,
- unsigned from, unsigned to)
-{
- VCN vcn;
- LCN lcn;
- s64 initialized_size;
- loff_t i_size;
- sector_t block, ablock, iblock;
- struct inode *vi;
- ntfs_inode *ni;
- ntfs_volume *vol;
- runlist_element *rl;
- struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
- unsigned long flags;
- unsigned int vcn_ofs, block_start, block_end, blocksize;
- int err;
- BOOL is_retry;
- unsigned char blocksize_bits;
-
- vi = page->mapping->host;
- ni = NTFS_I(vi);
- vol = ni->vol;
-
- ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
- "0x%lx, from = %u, to = %u.", ni->mft_no, ni->type,
- page->index, from, to);
-
- BUG_ON(!NInoNonResident(ni));
-
- blocksize_bits = vi->i_blkbits;
- blocksize = 1 << blocksize_bits;
-
- /*
- * create_empty_buffers() will create uptodate/dirty buffers if the
- * page is uptodate/dirty.
- */
- if (!page_has_buffers(page))
- create_empty_buffers(page, blocksize, 0);
- bh = head = page_buffers(page);
- if (unlikely(!bh))
- return -ENOMEM;
-
- /* The first block in the page. */
- block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
-
- read_lock_irqsave(&ni->size_lock, flags);
- /*
- * The first out of bounds block for the allocated size. No need to
- * round up as allocated_size is in multiples of cluster size and the
- * minimum cluster size is 512 bytes, which is equal to the smallest
- * blocksize.
- */
- ablock = ni->allocated_size >> blocksize_bits;
- i_size = i_size_read(vi);
- initialized_size = ni->initialized_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
-
- /* The last (fully or partially) initialized block. */
- iblock = initialized_size >> blocksize_bits;
-
- /* Loop through all the buffers in the page. */
- block_start = 0;
- rl = NULL;
- err = 0;
- do {
- block_end = block_start + blocksize;
- /*
- * If buffer @bh is outside the write, just mark it uptodate
- * if the page is uptodate and continue with the next buffer.
- */
- if (block_end <= from || block_start >= to) {
- if (PageUptodate(page)) {
- if (!buffer_uptodate(bh))
- set_buffer_uptodate(bh);
- }
- continue;
- }
- /*
- * @bh is at least partially being written to.
- * Make sure it is not marked as new.
- */
- //if (buffer_new(bh))
- // clear_buffer_new(bh);
-
- if (block >= ablock) {
- // TODO: block is above allocated_size, need to
- // allocate it. Best done in one go to accommodate not
- // only block but all above blocks up to and including:
- // ((page->index << PAGE_CACHE_SHIFT) + to + blocksize
- // - 1) >> blobksize_bits. Obviously will need to round
- // up to next cluster boundary, too. This should be
- // done with a helper function, so it can be reused.
- ntfs_error(vol->sb, "Writing beyond allocated size "
- "is not supported yet. Sorry.");
- err = -EOPNOTSUPP;
- goto err_out;
- // Need to update ablock.
- // Need to set_buffer_new() on all block bhs that are
- // newly allocated.
- }
- /*
- * Now we have enough allocated size to fulfill the whole
- * request, i.e. block < ablock is true.
- */
- if (unlikely((block >= iblock) &&
- (initialized_size < i_size))) {
- /*
- * If this page is fully outside initialized size, zero
- * out all pages between the current initialized size
- * and the current page. Just use ntfs_readpage() to do
- * the zeroing transparently.
- */
- if (block > iblock) {
- // TODO:
- // For each page do:
- // - read_cache_page()
- // Again for each page do:
- // - wait_on_page_locked()
- // - Check (PageUptodate(page) &&
- // !PageError(page))
- // Update initialized size in the attribute and
- // in the inode.
- // Again, for each page do:
- // __set_page_dirty_buffers();
- // page_cache_release()
- // We don't need to wait on the writes.
- // Update iblock.
- }
- /*
- * The current page straddles initialized size. Zero
- * all non-uptodate buffers and set them uptodate (and
- * dirty?). Note, there aren't any non-uptodate buffers
- * if the page is uptodate.
- * FIXME: For an uptodate page, the buffers may need to
- * be written out because they were not initialized on
- * disk before.
- */
- if (!PageUptodate(page)) {
- // TODO:
- // Zero any non-uptodate buffers up to i_size.
- // Set them uptodate and dirty.
- }
- // TODO:
- // Update initialized size in the attribute and in the
- // inode (up to i_size).
- // Update iblock.
- // FIXME: This is inefficient. Try to batch the two
- // size changes to happen in one go.
- ntfs_error(vol->sb, "Writing beyond initialized size "
- "is not supported yet. Sorry.");
- err = -EOPNOTSUPP;
- goto err_out;
- // Do NOT set_buffer_new() BUT DO clear buffer range
- // outside write request range.
- // set_buffer_uptodate() on complete buffers as well as
- // set_buffer_dirty().
- }
-
- /* Need to map unmapped buffers. */
- if (!buffer_mapped(bh)) {
- /* Unmapped buffer. Need to map it. */
- bh->b_bdev = vol->sb->s_bdev;
-
- /* Convert block into corresponding vcn and offset. */
- vcn = (VCN)block << blocksize_bits >>
- vol->cluster_size_bits;
- vcn_ofs = ((VCN)block << blocksize_bits) &
- vol->cluster_size_mask;
-
- is_retry = FALSE;
- if (!rl) {
-lock_retry_remap:
- down_read(&ni->runlist.lock);
- rl = ni->runlist.rl;
- }
- if (likely(rl != NULL)) {
- /* Seek to element containing target vcn. */
- while (rl->length && rl[1].vcn <= vcn)
- rl++;
- lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
- } else
- lcn = LCN_RL_NOT_MAPPED;
- if (unlikely(lcn < 0)) {
- /*
- * We extended the attribute allocation above.
- * If we hit an ENOENT here it means that the
- * allocation was insufficient which is a bug.
- */
- BUG_ON(lcn == LCN_ENOENT);
-
- /* It is a hole, need to instantiate it. */
- if (lcn == LCN_HOLE) {
- // TODO: Instantiate the hole.
- // clear_buffer_new(bh);
- // unmap_underlying_metadata(bh->b_bdev,
- // bh->b_blocknr);
- // For non-uptodate buffers, need to
- // zero out the region outside the
- // request in this bh or all bhs,
- // depending on what we implemented
- // above.
- // Need to flush_dcache_page().
- // Or could use set_buffer_new()
- // instead?
- ntfs_error(vol->sb, "Writing into "
- "sparse regions is "
- "not supported yet. "
- "Sorry.");
- err = -EOPNOTSUPP;
- if (!rl)
- up_read(&ni->runlist.lock);
- goto err_out;
- } else if (!is_retry &&
- lcn == LCN_RL_NOT_MAPPED) {
- is_retry = TRUE;
- /*
- * Attempt to map runlist, dropping
- * lock for the duration.
- */
- up_read(&ni->runlist.lock);
- err = ntfs_map_runlist(ni, vcn);
- if (likely(!err))
- goto lock_retry_remap;
- rl = NULL;
- lcn = err;
- } else if (!rl)
- up_read(&ni->runlist.lock);
- /*
- * Failed to map the buffer, even after
- * retrying.
- */
- bh->b_blocknr = -1;
- ntfs_error(vol->sb, "Failed to write to inode "
- "0x%lx, attribute type 0x%x, "
- "vcn 0x%llx, offset 0x%x "
- "because its location on disk "
- "could not be determined%s "
- "(error code %lli).",
- ni->mft_no, ni->type,
- (unsigned long long)vcn,
- vcn_ofs, is_retry ? " even "
- "after retrying" : "",
- (long long)lcn);
- if (!err)
- err = -EIO;
- goto err_out;
- }
- /* We now have a successful remap, i.e. lcn >= 0. */
-
- /* Setup buffer head to correct block. */
- bh->b_blocknr = ((lcn << vol->cluster_size_bits)
- + vcn_ofs) >> blocksize_bits;
- set_buffer_mapped(bh);
-
- // FIXME: Something analogous to this is needed for
- // each newly allocated block, i.e. BH_New.
- // FIXME: Might need to take this out of the
- // if (!buffer_mapped(bh)) {}, depending on how we
- // implement things during the allocated_size and
- // initialized_size extension code above.
- if (buffer_new(bh)) {
- clear_buffer_new(bh);
- unmap_underlying_metadata(bh->b_bdev,
- bh->b_blocknr);
- if (PageUptodate(page)) {
- set_buffer_uptodate(bh);
- continue;
- }
- /*
- * Page is _not_ uptodate, zero surrounding
- * region. NOTE: This is how we decide if to
- * zero or not!
- */
- if (block_end > to || block_start < from) {
- void *kaddr;
-
- kaddr = kmap_atomic(page, KM_USER0);
- if (block_end > to)
- memset(kaddr + to, 0,
- block_end - to);
- if (block_start < from)
- memset(kaddr + block_start, 0,
- from -
- block_start);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
- }
- continue;
- }
- }
- /* @bh is mapped, set it uptodate if the page is uptodate. */
- if (PageUptodate(page)) {
- if (!buffer_uptodate(bh))
- set_buffer_uptodate(bh);
- continue;
- }
- /*
- * The page is not uptodate. The buffer is mapped. If it is not
- * uptodate, and it is only partially being written to, we need
- * to read the buffer in before the write, i.e. right now.
- */
- if (!buffer_uptodate(bh) &&
- (block_start < from || block_end > to)) {
- ll_rw_block(READ, 1, &bh);
- *wait_bh++ = bh;
- }
- } while (block++, block_start = block_end,
- (bh = bh->b_this_page) != head);
-
- /* Release the lock if we took it. */
- if (rl) {
- up_read(&ni->runlist.lock);
- rl = NULL;
- }
-
- /* If we issued read requests, let them complete. */
- while (wait_bh > wait) {
- wait_on_buffer(*--wait_bh);
- if (!buffer_uptodate(*wait_bh))
- return -EIO;
- }
-
- ntfs_debug("Done.");
- return 0;
-err_out:
- /*
- * Zero out any newly allocated blocks to avoid exposing stale data.
- * If BH_New is set, we know that the block was newly allocated in the
- * above loop.
- * FIXME: What about initialized_size increments? Have we done all the
- * required zeroing above? If not this error handling is broken, and
- * in particular the if (block_end <= from) check is completely bogus.
- */
- bh = head;
- block_start = 0;
- is_retry = FALSE;
- do {
- block_end = block_start + blocksize;
- if (block_end <= from)
- continue;
- if (block_start >= to)
- break;
- if (buffer_new(bh)) {
- void *kaddr;
-
- clear_buffer_new(bh);
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr + block_start, 0, bh->b_size);
- kunmap_atomic(kaddr, KM_USER0);
- set_buffer_uptodate(bh);
- mark_buffer_dirty(bh);
- is_retry = TRUE;
- }
- } while (block_start = block_end, (bh = bh->b_this_page) != head);
- if (is_retry)
- flush_dcache_page(page);
- if (rl)
- up_read(&ni->runlist.lock);
- return err;
-}
-
-/**
- * ntfs_prepare_write - prepare a page for receiving data
- *
- * This is called from generic_file_write() with i_sem held on the inode
- * (@page->mapping->host). The @page is locked but not kmap()ped. The source
- * data has not yet been copied into the @page.
- *
- * Need to extend the attribute/fill in holes if necessary, create blocks and
- * make partially overwritten blocks uptodate,
- *
- * i_size is not to be modified yet.
- *
- * Return 0 on success or -errno on error.
- *
- * Should be using block_prepare_write() [support for sparse files] or
- * cont_prepare_write() [no support for sparse files]. Cannot do that due to
- * ntfs specifics but can look at them for implementation guidance.
- *
- * Note: In the range, @from is inclusive and @to is exclusive, i.e. @from is
- * the first byte in the page that will be written to and @to is the first byte
- * after the last byte that will be written to.
- */
-static int ntfs_prepare_write(struct file *file, struct page *page,
- unsigned from, unsigned to)
-{
- s64 new_size;
- loff_t i_size;
- struct inode *vi = page->mapping->host;
- ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
- ntfs_volume *vol = ni->vol;
- ntfs_attr_search_ctx *ctx = NULL;
- MFT_RECORD *m = NULL;
- ATTR_RECORD *a;
- u8 *kaddr;
- u32 attr_len;
- int err;
-
- ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
- "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
- page->index, from, to);
- BUG_ON(!PageLocked(page));
- BUG_ON(from > PAGE_CACHE_SIZE);
- BUG_ON(to > PAGE_CACHE_SIZE);
- BUG_ON(from > to);
- BUG_ON(NInoMstProtected(ni));
- /*
- * If a previous ntfs_truncate() failed, repeat it and abort if it
- * fails again.
- */
- if (unlikely(NInoTruncateFailed(ni))) {
- down_write(&vi->i_alloc_sem);
- err = ntfs_truncate(vi);
- up_write(&vi->i_alloc_sem);
- if (err || NInoTruncateFailed(ni)) {
- if (!err)
- err = -EIO;
- goto err_out;
- }
- }
- /* If the attribute is not resident, deal with it elsewhere. */
- if (NInoNonResident(ni)) {
- /*
- * Only unnamed $DATA attributes can be compressed, encrypted,
- * and/or sparse.
- */
- if (ni->type == AT_DATA && !ni->name_len) {
- /* If file is encrypted, deny access, just like NT4. */
- if (NInoEncrypted(ni)) {
- ntfs_debug("Denying write access to encrypted "
- "file.");
- return -EACCES;
- }
- /* Compressed data streams are handled in compress.c. */
- if (NInoCompressed(ni)) {
- // TODO: Implement and replace this check with
- // return ntfs_write_compressed_block(page);
- ntfs_error(vi->i_sb, "Writing to compressed "
- "files is not supported yet. "
- "Sorry.");
- return -EOPNOTSUPP;
- }
- // TODO: Implement and remove this check.
- if (NInoSparse(ni)) {
- ntfs_error(vi->i_sb, "Writing to sparse files "
- "is not supported yet. Sorry.");
- return -EOPNOTSUPP;
- }
- }
- /* Normal data stream. */
- return ntfs_prepare_nonresident_write(page, from, to);
- }
- /*
- * Attribute is resident, implying it is not compressed, encrypted, or
- * sparse.
- */
- BUG_ON(page_has_buffers(page));
- new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
- /* If we do not need to resize the attribute allocation we are done. */
- if (new_size <= i_size_read(vi))
- goto done;
- /* Map, pin, and lock the (base) mft record. */
- if (!NInoAttr(ni))
- base_ni = ni;
- else
- base_ni = ni->ext.base_ntfs_ino;
- m = map_mft_record(base_ni);
- if (IS_ERR(m)) {
- err = PTR_ERR(m);
- m = NULL;
- ctx = NULL;
- goto err_out;
- }
- ctx = ntfs_attr_get_search_ctx(base_ni, m);
- if (unlikely(!ctx)) {
- err = -ENOMEM;
- goto err_out;
- }
- err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
- CASE_SENSITIVE, 0, NULL, 0, ctx);
- if (unlikely(err)) {
- if (err == -ENOENT)
- err = -EIO;
- goto err_out;
- }
- m = ctx->mrec;
- a = ctx->attr;
- /* The total length of the attribute value. */
- attr_len = le32_to_cpu(a->data.resident.value_length);
- /* Fix an eventual previous failure of ntfs_commit_write(). */
- i_size = i_size_read(vi);
- if (unlikely(attr_len > i_size)) {
- attr_len = i_size;
- a->data.resident.value_length = cpu_to_le32(attr_len);
- }
- /* If we do not need to resize the attribute allocation we are done. */
- if (new_size <= attr_len)
- goto done_unm;
- /* Check if new size is allowed in $AttrDef. */
- err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
- if (unlikely(err)) {
- if (err == -ERANGE) {
- ntfs_error(vol->sb, "Write would cause the inode "
- "0x%lx to exceed the maximum size for "
- "its attribute type (0x%x). Aborting "
- "write.", vi->i_ino,
- le32_to_cpu(ni->type));
- } else {
- ntfs_error(vol->sb, "Inode 0x%lx has unknown "
- "attribute type 0x%x. Aborting "
- "write.", vi->i_ino,
- le32_to_cpu(ni->type));
- err = -EIO;
- }
- goto err_out2;
- }
- /*
- * Extend the attribute record to be able to store the new attribute
- * size.
- */
- if (new_size >= vol->mft_record_size || ntfs_attr_record_resize(m, a,
- le16_to_cpu(a->data.resident.value_offset) +
- new_size)) {
- /* Not enough space in the mft record. */
- ntfs_error(vol->sb, "Not enough space in the mft record for "
- "the resized attribute value. This is not "
- "supported yet. Aborting write.");
- err = -EOPNOTSUPP;
- goto err_out2;
- }
- /*
- * We have enough space in the mft record to fit the write. This
- * implies the attribute is smaller than the mft record and hence the
- * attribute must be in a single page and hence page->index must be 0.
- */
- BUG_ON(page->index);
- /*
- * If the beginning of the write is past the old size, enlarge the
- * attribute value up to the beginning of the write and fill it with
- * zeroes.
- */
- if (from > attr_len) {
- memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) +
- attr_len, 0, from - attr_len);
- a->data.resident.value_length = cpu_to_le32(from);
- /* Zero the corresponding area in the page as well. */
- if (PageUptodate(page)) {
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr + attr_len, 0, from - attr_len);
- kunmap_atomic(kaddr, KM_USER0);
- flush_dcache_page(page);
- }
- }
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
-done_unm:
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
- /*
- * Because resident attributes are handled by memcpy() to/from the
- * corresponding MFT record, and because this form of i/o is byte
- * aligned rather than block aligned, there is no need to bring the
- * page uptodate here as in the non-resident case where we need to
- * bring the buffers straddled by the write uptodate before
- * generic_file_write() does the copying from userspace.
- *
- * We thus defer the uptodate bringing of the page region outside the
- * region written to to ntfs_commit_write(), which makes the code
- * simpler and saves one atomic kmap which is good.
- */
-done:
- ntfs_debug("Done.");
- return 0;
-err_out:
- if (err == -ENOMEM)
- ntfs_warning(vi->i_sb, "Error allocating memory required to "
- "prepare the write.");
- else {
- ntfs_error(vi->i_sb, "Resident attribute prepare write failed "
- "with error %i.", err);
- NVolSetErrors(vol);
- make_bad_inode(vi);
- }
-err_out2:
- if (ctx)
- ntfs_attr_put_search_ctx(ctx);
- if (m)
- unmap_mft_record(base_ni);
- return err;
-}
-
-/**
- * ntfs_commit_nonresident_write -
- *
- */
-static int ntfs_commit_nonresident_write(struct page *page,
- unsigned from, unsigned to)
-{
- s64 pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
- struct inode *vi = page->mapping->host;
- struct buffer_head *bh, *head;
- unsigned int block_start, block_end, blocksize;
- BOOL partial;
-
- ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
- "0x%lx, from = %u, to = %u.", vi->i_ino,
- NTFS_I(vi)->type, page->index, from, to);
- blocksize = 1 << vi->i_blkbits;
-
- // FIXME: We need a whole slew of special cases in here for compressed
- // files for example...
- // For now, we know ntfs_prepare_write() would have failed so we can't
- // get here in any of the cases which we have to special case, so we
- // are just a ripped off, unrolled generic_commit_write().
-
- bh = head = page_buffers(page);
- block_start = 0;
- partial = FALSE;
- do {
- block_end = block_start + blocksize;
- if (block_end <= from || block_start >= to) {
- if (!buffer_uptodate(bh))
- partial = TRUE;
- } else {
- set_buffer_uptodate(bh);
- mark_buffer_dirty(bh);
- }
- } while (block_start = block_end, (bh = bh->b_this_page) != head);
- /*
- * If this is a partial write which happened to make all buffers
- * uptodate then we can optimize away a bogus ->readpage() for the next
- * read(). Here we 'discover' whether the page went uptodate as a
- * result of this (potentially partial) write.
- */
- if (!partial)
- SetPageUptodate(page);
- /*
- * Not convinced about this at all. See disparity comment above. For
- * now we know ntfs_prepare_write() would have failed in the write
- * exceeds i_size case, so this will never trigger which is fine.
- */
- if (pos > i_size_read(vi)) {
- ntfs_error(vi->i_sb, "Writing beyond the existing file size is "
- "not supported yet. Sorry.");
- return -EOPNOTSUPP;
- // vi->i_size = pos;
- // mark_inode_dirty(vi);
- }
- ntfs_debug("Done.");
- return 0;
-}
-
-/**
- * ntfs_commit_write - commit the received data
- *
- * This is called from generic_file_write() with i_sem held on the inode
- * (@page->mapping->host). The @page is locked but not kmap()ped. The source
- * data has already been copied into the @page. ntfs_prepare_write() has been
- * called before the data copied and it returned success so we can take the
- * results of various BUG checks and some error handling for granted.
- *
- * Need to mark modified blocks dirty so they get written out later when
- * ntfs_writepage() is invoked by the VM.
- *
- * Return 0 on success or -errno on error.
- *
- * Should be using generic_commit_write(). This marks buffers uptodate and
- * dirty, sets the page uptodate if all buffers in the page are uptodate, and
- * updates i_size if the end of io is beyond i_size. In that case, it also
- * marks the inode dirty.
- *
- * Cannot use generic_commit_write() due to ntfs specialities but can look at
- * it for implementation guidance.
- *
- * If things have gone as outlined in ntfs_prepare_write(), then we do not
- * need to do any page content modifications here at all, except in the write
- * to resident attribute case, where we need to do the uptodate bringing here
- * which we combine with the copying into the mft record which means we save
- * one atomic kmap.
- */
-static int ntfs_commit_write(struct file *file, struct page *page,
- unsigned from, unsigned to)
-{
- struct inode *vi = page->mapping->host;
- ntfs_inode *base_ni, *ni = NTFS_I(vi);
- char *kaddr, *kattr;
- ntfs_attr_search_ctx *ctx;
- MFT_RECORD *m;
- ATTR_RECORD *a;
- u32 attr_len;
- int err;
-
- ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
- "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
- page->index, from, to);
- /* If the attribute is not resident, deal with it elsewhere. */
- if (NInoNonResident(ni)) {
- /* Only unnamed $DATA attributes can be compressed/encrypted. */
- if (ni->type == AT_DATA && !ni->name_len) {
- /* Encrypted files need separate handling. */
- if (NInoEncrypted(ni)) {
- // We never get here at present!
- BUG();
- }
- /* Compressed data streams are handled in compress.c. */
- if (NInoCompressed(ni)) {
- // TODO: Implement this!
- // return ntfs_write_compressed_block(page);
- // We never get here at present!
- BUG();
- }
- }
- /* Normal data stream. */
- return ntfs_commit_nonresident_write(page, from, to);
- }
- /*
- * Attribute is resident, implying it is not compressed, encrypted, or
- * sparse.
- */
- if (!NInoAttr(ni))
- base_ni = ni;
- else
- base_ni = ni->ext.base_ntfs_ino;
- /* Map, pin, and lock the mft record. */
- m = map_mft_record(base_ni);
- if (IS_ERR(m)) {
- err = PTR_ERR(m);
- m = NULL;
- ctx = NULL;
- goto err_out;
- }
- ctx = ntfs_attr_get_search_ctx(base_ni, m);
- if (unlikely(!ctx)) {
- err = -ENOMEM;
- goto err_out;
- }
- err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
- CASE_SENSITIVE, 0, NULL, 0, ctx);
- if (unlikely(err)) {
- if (err == -ENOENT)
- err = -EIO;
- goto err_out;
- }
- a = ctx->attr;
- /* The total length of the attribute value. */
- attr_len = le32_to_cpu(a->data.resident.value_length);
- BUG_ON(from > attr_len);
- kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
- kaddr = kmap_atomic(page, KM_USER0);
- /* Copy the received data from the page to the mft record. */
- memcpy(kattr + from, kaddr + from, to - from);
- /* Update the attribute length if necessary. */
- if (to > attr_len) {
- attr_len = to;
- a->data.resident.value_length = cpu_to_le32(attr_len);
- }
- /*
- * If the page is not uptodate, bring the out of bounds area(s)
- * uptodate by copying data from the mft record to the page.
- */
- if (!PageUptodate(page)) {
- if (from > 0)
- memcpy(kaddr, kattr, from);
- if (to < attr_len)
- memcpy(kaddr + to, kattr + to, attr_len - to);
- /* Zero the region outside the end of the attribute value. */
- if (attr_len < PAGE_CACHE_SIZE)
- memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
- /*
- * The probability of not having done any of the above is
- * extremely small, so we just flush unconditionally.
- */
- flush_dcache_page(page);
- SetPageUptodate(page);
- }
- kunmap_atomic(kaddr, KM_USER0);
- /* Update i_size if necessary. */
- if (i_size_read(vi) < attr_len) {
- unsigned long flags;
-
- write_lock_irqsave(&ni->size_lock, flags);
- ni->allocated_size = ni->initialized_size = attr_len;
- i_size_write(vi, attr_len);
- write_unlock_irqrestore(&ni->size_lock, flags);
- }
- /* Mark the mft record dirty, so it gets written back. */
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
- ntfs_debug("Done.");
- return 0;
-err_out:
- if (err == -ENOMEM) {
- ntfs_warning(vi->i_sb, "Error allocating memory required to "
- "commit the write.");
- if (PageUptodate(page)) {
- ntfs_warning(vi->i_sb, "Page is uptodate, setting "
- "dirty so the write will be retried "
- "later on by the VM.");
- /*
- * Put the page on mapping->dirty_pages, but leave its
- * buffers' dirty state as-is.
- */
- __set_page_dirty_nobuffers(page);
- err = 0;
- } else
- ntfs_error(vi->i_sb, "Page is not uptodate. Written "
- "data has been lost.");
- } else {
- ntfs_error(vi->i_sb, "Resident attribute commit write failed "
- "with error %i.", err);
- NVolSetErrors(ni->vol);
- make_bad_inode(vi);
- }
- if (ctx)
- ntfs_attr_put_search_ctx(ctx);
- if (m)
- unmap_mft_record(base_ni);
- return err;
-}
-
#endif /* NTFS_RW */
/**
* ntfs_aops - general address space operations for inodes and attributes
*/
-struct address_space_operations ntfs_aops = {
+const struct address_space_operations ntfs_aops = {
.readpage = ntfs_readpage, /* Fill page with data. */
- .sync_page = block_sync_page, /* Currently, just unplugs the
- disk request queue. */
#ifdef NTFS_RW
.writepage = ntfs_writepage, /* Write dirty page to disk. */
- .prepare_write = ntfs_prepare_write, /* Prepare page and buffers
- ready to receive data. */
- .commit_write = ntfs_commit_write, /* Commit received data. */
#endif /* NTFS_RW */
+ .migratepage = buffer_migrate_page, /* Move a page cache page from
+ one physical page to an
+ other. */
+ .error_remove_page = generic_error_remove_page,
};
/**
* ntfs_mst_aops - general address space operations for mst protecteed inodes
* and attributes
*/
-struct address_space_operations ntfs_mst_aops = {
+const struct address_space_operations ntfs_mst_aops = {
.readpage = ntfs_readpage, /* Fill page with data. */
- .sync_page = block_sync_page, /* Currently, just unplugs the
- disk request queue. */
#ifdef NTFS_RW
.writepage = ntfs_writepage, /* Write dirty page to disk. */
.set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty
without touching the buffers
belonging to the page. */
#endif /* NTFS_RW */
+ .migratepage = buffer_migrate_page, /* Move a page cache page from
+ one physical page to an
+ other. */
+ .error_remove_page = generic_error_remove_page,
};
#ifdef NTFS_RW
@@ -2337,7 +1595,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
BUG_ON(!PageUptodate(page));
end = ofs + ni->itype.index.block_size;
- bh_size = 1 << VFS_I(ni)->i_blkbits;
+ bh_size = VFS_I(ni)->i_sb->s_blocksize;
spin_lock(&mapping->private_lock);
if (unlikely(!page_has_buffers(page))) {
spin_unlock(&mapping->private_lock);
@@ -2357,6 +1615,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
buffers_to_free = bh;
}
bh = head = page_buffers(page);
+ BUG_ON(!bh);
do {
bh_ofs = bh_offset(bh);
if (bh_ofs + bh_size <= ofs)
diff --git a/fs/ntfs/aops.h b/fs/ntfs/aops.h
index 3b74e66ca2f..caecc58f529 100644
--- a/fs/ntfs/aops.h
+++ b/fs/ntfs/aops.h
@@ -80,19 +80,17 @@ static inline void ntfs_unmap_page(struct page *page)
*
* The unlocked and uptodate page is returned on success or an encoded error
* on failure. Caller has to test for error using the IS_ERR() macro on the
- * return value. If that evaluates to TRUE, the negative error code can be
+ * return value. If that evaluates to 'true', the negative error code can be
* obtained using PTR_ERR() on the return value of ntfs_map_page().
*/
static inline struct page *ntfs_map_page(struct address_space *mapping,
unsigned long index)
{
- struct page *page = read_cache_page(mapping, index,
- (filler_t*)mapping->a_ops->readpage, NULL);
+ struct page *page = read_mapping_page(mapping, index, NULL);
if (!IS_ERR(page)) {
- wait_on_page_locked(page);
kmap(page);
- if (PageUptodate(page) && !PageError(page))
+ if (!PageError(page))
return page;
ntfs_unmap_page(page);
return ERR_PTR(-EIO);
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index cd0f9e740b1..250ed5b20c8 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1,7 +1,7 @@
/**
* attrib.c - NTFS attribute operations. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2005 Anton Altaparmakov
+ * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
* Copyright (c) 2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -21,7 +21,10 @@
*/
#include <linux/buffer_head.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
#include <linux/swap.h>
+#include <linux/writeback.h>
#include "attrib.h"
#include "debug.h"
@@ -36,25 +39,60 @@
* ntfs_map_runlist_nolock - map (a part of) a runlist of an ntfs inode
* @ni: ntfs inode for which to map (part of) a runlist
* @vcn: map runlist part containing this vcn
+ * @ctx: active attribute search context if present or NULL if not
*
* Map the part of a runlist containing the @vcn of the ntfs inode @ni.
*
+ * If @ctx is specified, it is an active search context of @ni and its base mft
+ * record. This is needed when ntfs_map_runlist_nolock() encounters unmapped
+ * runlist fragments and allows their mapping. If you do not have the mft
+ * record mapped, you can specify @ctx as NULL and ntfs_map_runlist_nolock()
+ * will perform the necessary mapping and unmapping.
+ *
+ * Note, ntfs_map_runlist_nolock() saves the state of @ctx on entry and
+ * restores it before returning. Thus, @ctx will be left pointing to the same
+ * attribute on return as on entry. However, the actual pointers in @ctx may
+ * point to different memory locations on return, so you must remember to reset
+ * any cached pointers from the @ctx, i.e. after the call to
+ * ntfs_map_runlist_nolock(), you will probably want to do:
+ * m = ctx->mrec;
+ * a = ctx->attr;
+ * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
+ * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
+ *
* Return 0 on success and -errno on error. There is one special error code
* which is not an error as such. This is -ENOENT. It means that @vcn is out
* of bounds of the runlist.
*
- * Locking: - The runlist must be locked for writing.
- * - This function modifies the runlist.
+ * Note the runlist can be NULL after this function returns if @vcn is zero and
+ * the attribute has zero allocated size, i.e. there simply is no runlist.
+ *
+ * WARNING: If @ctx is supplied, regardless of whether success or failure is
+ * returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx
+ * is no longer valid, i.e. you need to either call
+ * ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
+ * In that case PTR_ERR(@ctx->mrec) will give you the error code for
+ * why the mapping of the old inode failed.
+ *
+ * Locking: - The runlist described by @ni must be locked for writing on entry
+ * and is locked on return. Note the runlist will be modified.
+ * - If @ctx is NULL, the base mft record of @ni must not be mapped on
+ * entry and it will be left unmapped on return.
+ * - If @ctx is not NULL, the base mft record must be mapped on entry
+ * and it will be left mapped on return.
*/
-int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
+int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn, ntfs_attr_search_ctx *ctx)
{
VCN end_vcn;
+ unsigned long flags;
ntfs_inode *base_ni;
MFT_RECORD *m;
ATTR_RECORD *a;
- ntfs_attr_search_ctx *ctx;
runlist_element *rl;
+ struct page *put_this_page = NULL;
int err = 0;
+ bool ctx_is_temporary, ctx_needs_reset;
+ ntfs_attr_search_ctx old_ctx = { NULL, };
ntfs_debug("Mapping runlist part containing vcn 0x%llx.",
(unsigned long long)vcn);
@@ -62,20 +100,77 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
base_ni = ni;
else
base_ni = ni->ext.base_ntfs_ino;
- m = map_mft_record(base_ni);
- if (IS_ERR(m))
- return PTR_ERR(m);
- ctx = ntfs_attr_get_search_ctx(base_ni, m);
- if (unlikely(!ctx)) {
- err = -ENOMEM;
- goto err_out;
+ if (!ctx) {
+ ctx_is_temporary = ctx_needs_reset = true;
+ m = map_mft_record(base_ni);
+ if (IS_ERR(m))
+ return PTR_ERR(m);
+ ctx = ntfs_attr_get_search_ctx(base_ni, m);
+ if (unlikely(!ctx)) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ } else {
+ VCN allocated_size_vcn;
+
+ BUG_ON(IS_ERR(ctx->mrec));
+ a = ctx->attr;
+ BUG_ON(!a->non_resident);
+ ctx_is_temporary = false;
+ end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
+ read_lock_irqsave(&ni->size_lock, flags);
+ allocated_size_vcn = ni->allocated_size >>
+ ni->vol->cluster_size_bits;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ if (!a->data.non_resident.lowest_vcn && end_vcn <= 0)
+ end_vcn = allocated_size_vcn - 1;
+ /*
+ * If we already have the attribute extent containing @vcn in
+ * @ctx, no need to look it up again. We slightly cheat in
+ * that if vcn exceeds the allocated size, we will refuse to
+ * map the runlist below, so there is definitely no need to get
+ * the right attribute extent.
+ */
+ if (vcn >= allocated_size_vcn || (a->type == ni->type &&
+ a->name_length == ni->name_len &&
+ !memcmp((u8*)a + le16_to_cpu(a->name_offset),
+ ni->name, ni->name_len) &&
+ sle64_to_cpu(a->data.non_resident.lowest_vcn)
+ <= vcn && end_vcn >= vcn))
+ ctx_needs_reset = false;
+ else {
+ /* Save the old search context. */
+ old_ctx = *ctx;
+ /*
+ * If the currently mapped (extent) inode is not the
+ * base inode we will unmap it when we reinitialize the
+ * search context which means we need to get a
+ * reference to the page containing the mapped mft
+ * record so we do not accidentally drop changes to the
+ * mft record when it has not been marked dirty yet.
+ */
+ if (old_ctx.base_ntfs_ino && old_ctx.ntfs_ino !=
+ old_ctx.base_ntfs_ino) {
+ put_this_page = old_ctx.ntfs_ino->page;
+ page_cache_get(put_this_page);
+ }
+ /*
+ * Reinitialize the search context so we can lookup the
+ * needed attribute extent.
+ */
+ ntfs_attr_reinit_search_ctx(ctx);
+ ctx_needs_reset = true;
+ }
}
- err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
- CASE_SENSITIVE, vcn, NULL, 0, ctx);
- if (unlikely(err)) {
- if (err == -ENOENT)
- err = -EIO;
- goto err_out;
+ if (ctx_needs_reset) {
+ err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+ CASE_SENSITIVE, vcn, NULL, 0, ctx);
+ if (unlikely(err)) {
+ if (err == -ENOENT)
+ err = -EIO;
+ goto err_out;
+ }
+ BUG_ON(!ctx->attr->non_resident);
}
a = ctx->attr;
/*
@@ -85,9 +180,7 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
* ntfs_mapping_pairs_decompress() fails.
*/
end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1;
- if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1))
- end_vcn = ni->allocated_size >> ni->vol->cluster_size_bits;
- if (unlikely(vcn >= end_vcn)) {
+ if (unlikely(vcn && vcn >= end_vcn)) {
err = -ENOENT;
goto err_out;
}
@@ -97,9 +190,93 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
else
ni->runlist.rl = rl;
err_out:
- if (likely(ctx))
- ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(base_ni);
+ if (ctx_is_temporary) {
+ if (likely(ctx))
+ ntfs_attr_put_search_ctx(ctx);
+ unmap_mft_record(base_ni);
+ } else if (ctx_needs_reset) {
+ /*
+ * If there is no attribute list, restoring the search context
+ * is accomplished simply by copying the saved context back over
+ * the caller supplied context. If there is an attribute list,
+ * things are more complicated as we need to deal with mapping
+ * of mft records and resulting potential changes in pointers.
+ */
+ if (NInoAttrList(base_ni)) {
+ /*
+ * If the currently mapped (extent) inode is not the
+ * one we had before, we need to unmap it and map the
+ * old one.
+ */
+ if (ctx->ntfs_ino != old_ctx.ntfs_ino) {
+ /*
+ * If the currently mapped inode is not the
+ * base inode, unmap it.
+ */
+ if (ctx->base_ntfs_ino && ctx->ntfs_ino !=
+ ctx->base_ntfs_ino) {
+ unmap_extent_mft_record(ctx->ntfs_ino);
+ ctx->mrec = ctx->base_mrec;
+ BUG_ON(!ctx->mrec);
+ }
+ /*
+ * If the old mapped inode is not the base
+ * inode, map it.
+ */
+ if (old_ctx.base_ntfs_ino &&
+ old_ctx.ntfs_ino !=
+ old_ctx.base_ntfs_ino) {
+retry_map:
+ ctx->mrec = map_mft_record(
+ old_ctx.ntfs_ino);
+ /*
+ * Something bad has happened. If out
+ * of memory retry till it succeeds.
+ * Any other errors are fatal and we
+ * return the error code in ctx->mrec.
+ * Let the caller deal with it... We
+ * just need to fudge things so the
+ * caller can reinit and/or put the
+ * search context safely.
+ */
+ if (IS_ERR(ctx->mrec)) {
+ if (PTR_ERR(ctx->mrec) ==
+ -ENOMEM) {
+ schedule();
+ goto retry_map;
+ } else
+ old_ctx.ntfs_ino =
+ old_ctx.
+ base_ntfs_ino;
+ }
+ }
+ }
+ /* Update the changed pointers in the saved context. */
+ if (ctx->mrec != old_ctx.mrec) {
+ if (!IS_ERR(ctx->mrec))
+ old_ctx.attr = (ATTR_RECORD*)(
+ (u8*)ctx->mrec +
+ ((u8*)old_ctx.attr -
+ (u8*)old_ctx.mrec));
+ old_ctx.mrec = ctx->mrec;
+ }
+ }
+ /* Restore the search context to the saved one. */
+ *ctx = old_ctx;
+ /*
+ * We drop the reference on the page we took earlier. In the
+ * case that IS_ERR(ctx->mrec) is true this means we might lose
+ * some changes to the mft record that had been made between
+ * the last time it was marked dirty/written out and now. This
+ * at this stage is not a problem as the mapping error is fatal
+ * enough that the mft record cannot be written out anyway and
+ * the caller is very likely to shutdown the whole inode
+ * immediately and mark the volume dirty for chkdsk to pick up
+ * the pieces anyway.
+ */
+ if (put_this_page)
+ page_cache_release(put_this_page);
+ }
return err;
}
@@ -115,8 +292,8 @@ err_out:
* of bounds of the runlist.
*
* Locking: - The runlist must be unlocked on entry and is unlocked on return.
- * - This function takes the runlist lock for writing and modifies the
- * runlist.
+ * - This function takes the runlist lock for writing and may modify
+ * the runlist.
*/
int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
{
@@ -126,7 +303,7 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
/* Make sure someone else didn't do the work while we were sleeping. */
if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <=
LCN_RL_NOT_MAPPED))
- err = ntfs_map_runlist_nolock(ni, vcn);
+ err = ntfs_map_runlist_nolock(ni, vcn, NULL);
up_write(&ni->runlist.lock);
return err;
}
@@ -157,22 +334,31 @@ int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
* LCN_EIO Critical error (runlist/file is corrupt, i/o error, etc).
*
* Locking: - The runlist must be locked on entry and is left locked on return.
- * - If @write_locked is FALSE, i.e. the runlist is locked for reading,
+ * - If @write_locked is 'false', i.e. the runlist is locked for reading,
* the lock may be dropped inside the function so you cannot rely on
* the runlist still being the same when this function returns.
*/
LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
- const BOOL write_locked)
+ const bool write_locked)
{
LCN lcn;
- BOOL is_retry = FALSE;
+ unsigned long flags;
+ bool is_retry = false;
+ BUG_ON(!ni);
ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
ni->mft_no, (unsigned long long)vcn,
write_locked ? "write" : "read");
- BUG_ON(!ni);
BUG_ON(!NInoNonResident(ni));
BUG_ON(vcn < 0);
+ if (!ni->runlist.rl) {
+ read_lock_irqsave(&ni->size_lock, flags);
+ if (!ni->allocated_size) {
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ return LCN_ENOENT;
+ }
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ }
retry_remap:
/* Convert vcn to lcn. If that fails map the runlist and retry once. */
lcn = ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn);
@@ -196,13 +382,13 @@ retry_remap:
goto retry_remap;
}
}
- err = ntfs_map_runlist_nolock(ni, vcn);
+ err = ntfs_map_runlist_nolock(ni, vcn, NULL);
if (!write_locked) {
up_write(&ni->runlist.lock);
down_read(&ni->runlist.lock);
}
if (likely(!err)) {
- is_retry = TRUE;
+ is_retry = true;
goto retry_remap;
}
if (err == -ENOENT)
@@ -220,9 +406,9 @@ retry_remap:
/**
* ntfs_attr_find_vcn_nolock - find a vcn in the runlist of an ntfs inode
- * @ni: ntfs inode describing the runlist to search
- * @vcn: vcn to find
- * @write_locked: true if the runlist is locked for writing
+ * @ni: ntfs inode describing the runlist to search
+ * @vcn: vcn to find
+ * @ctx: active attribute search context if present or NULL if not
*
* Find the virtual cluster number @vcn in the runlist described by the ntfs
* inode @ni and return the address of the runlist element containing the @vcn.
@@ -230,9 +416,22 @@ retry_remap:
* If the @vcn is not mapped yet, the attempt is made to map the attribute
* extent containing the @vcn and the vcn to lcn conversion is retried.
*
- * If @write_locked is true the caller has locked the runlist for writing and
- * if false for reading.
- *
+ * If @ctx is specified, it is an active search context of @ni and its base mft
+ * record. This is needed when ntfs_attr_find_vcn_nolock() encounters unmapped
+ * runlist fragments and allows their mapping. If you do not have the mft
+ * record mapped, you can specify @ctx as NULL and ntfs_attr_find_vcn_nolock()
+ * will perform the necessary mapping and unmapping.
+ *
+ * Note, ntfs_attr_find_vcn_nolock() saves the state of @ctx on entry and
+ * restores it before returning. Thus, @ctx will be left pointing to the same
+ * attribute on return as on entry. However, the actual pointers in @ctx may
+ * point to different memory locations on return, so you must remember to reset
+ * any cached pointers from the @ctx, i.e. after the call to
+ * ntfs_attr_find_vcn_nolock(), you will probably want to do:
+ * m = ctx->mrec;
+ * a = ctx->attr;
+ * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
+ * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
* Note you need to distinguish between the lcn of the returned runlist element
* being >= 0 and LCN_HOLE. In the later case you have to return zeroes on
* read and allocate clusters on write.
@@ -247,24 +446,42 @@ retry_remap:
* -ENOMEM - Not enough memory to map runlist.
* -EIO - Critical error (runlist/file is corrupt, i/o error, etc).
*
- * Locking: - The runlist must be locked on entry and is left locked on return.
- * - If @write_locked is FALSE, i.e. the runlist is locked for reading,
- * the lock may be dropped inside the function so you cannot rely on
- * the runlist still being the same when this function returns.
+ * WARNING: If @ctx is supplied, regardless of whether success or failure is
+ * returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx
+ * is no longer valid, i.e. you need to either call
+ * ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
+ * In that case PTR_ERR(@ctx->mrec) will give you the error code for
+ * why the mapping of the old inode failed.
+ *
+ * Locking: - The runlist described by @ni must be locked for writing on entry
+ * and is locked on return. Note the runlist may be modified when
+ * needed runlist fragments need to be mapped.
+ * - If @ctx is NULL, the base mft record of @ni must not be mapped on
+ * entry and it will be left unmapped on return.
+ * - If @ctx is not NULL, the base mft record must be mapped on entry
+ * and it will be left mapped on return.
*/
runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
- const BOOL write_locked)
+ ntfs_attr_search_ctx *ctx)
{
+ unsigned long flags;
runlist_element *rl;
int err = 0;
- BOOL is_retry = FALSE;
+ bool is_retry = false;
- ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
- ni->mft_no, (unsigned long long)vcn,
- write_locked ? "write" : "read");
BUG_ON(!ni);
+ ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, with%s ctx.",
+ ni->mft_no, (unsigned long long)vcn, ctx ? "" : "out");
BUG_ON(!NInoNonResident(ni));
BUG_ON(vcn < 0);
+ if (!ni->runlist.rl) {
+ read_lock_irqsave(&ni->size_lock, flags);
+ if (!ni->allocated_size) {
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ return ERR_PTR(-ENOENT);
+ }
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ }
retry_remap:
rl = ni->runlist.rl;
if (likely(rl && vcn >= rl[0].vcn)) {
@@ -287,33 +504,22 @@ retry_remap:
}
if (!err && !is_retry) {
/*
- * The @vcn is in an unmapped region, map the runlist and
- * retry.
+ * If the search context is invalid we cannot map the unmapped
+ * region.
*/
- if (!write_locked) {
- up_read(&ni->runlist.lock);
- down_write(&ni->runlist.lock);
- if (unlikely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) !=
- LCN_RL_NOT_MAPPED)) {
- up_write(&ni->runlist.lock);
- down_read(&ni->runlist.lock);
+ if (IS_ERR(ctx->mrec))
+ err = PTR_ERR(ctx->mrec);
+ else {
+ /*
+ * The @vcn is in an unmapped region, map the runlist
+ * and retry.
+ */
+ err = ntfs_map_runlist_nolock(ni, vcn, ctx);
+ if (likely(!err)) {
+ is_retry = true;
goto retry_remap;
}
}
- err = ntfs_map_runlist_nolock(ni, vcn);
- if (!write_locked) {
- up_write(&ni->runlist.lock);
- down_read(&ni->runlist.lock);
- }
- if (likely(!err)) {
- is_retry = TRUE;
- goto retry_remap;
- }
- /*
- * -EINVAL coming from a failed mapping attempt is equivalent
- * to i/o error for us as it should not happen in our code
- * paths.
- */
if (err == -EINVAL)
err = -EIO;
} else if (!err)
@@ -350,8 +556,8 @@ retry_remap:
* On actual error, ntfs_attr_find() returns -EIO. In this case @ctx->attr is
* undefined and in particular do not rely on it not changing.
*
- * If @ctx->is_first is TRUE, the search begins with @ctx->attr itself. If it
- * is FALSE, the search begins after @ctx->attr.
+ * If @ctx->is_first is 'true', the search begins with @ctx->attr itself. If it
+ * is 'false', the search begins after @ctx->attr.
*
* If @ic is IGNORE_CASE, the @name comparisson is not case sensitive and
* @ctx->ntfs_ino must be set to the ntfs inode to which the mft record
@@ -391,11 +597,11 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name,
/*
* Iterate over attributes in mft record starting at @ctx->attr, or the
- * attribute following that, if @ctx->is_first is TRUE.
+ * attribute following that, if @ctx->is_first is 'true'.
*/
if (ctx->is_first) {
a = ctx->attr;
- ctx->is_first = FALSE;
+ ctx->is_first = false;
} else
a = (ATTR_RECORD*)((u8*)ctx->attr +
le32_to_cpu(ctx->attr->length));
@@ -528,6 +734,11 @@ int load_attribute_list(ntfs_volume *vol, runlist *runlist, u8 *al_start,
block_size_bits = sb->s_blocksize_bits;
down_read(&runlist->lock);
rl = runlist->rl;
+ if (!rl) {
+ ntfs_error(sb, "Cannot read attribute list since runlist is "
+ "missing.");
+ goto err_out;
+ }
/* Read all clusters specified by the runlist one run at a time. */
while (rl->length) {
lcn = ntfs_rl_vcn_to_lcn(rl, rl->vcn);
@@ -677,11 +888,11 @@ static int ntfs_external_attr_find(const ATTR_TYPE type,
ctx->al_entry = (ATTR_LIST_ENTRY*)al_start;
/*
* Iterate over entries in attribute list starting at @ctx->al_entry,
- * or the entry following that, if @ctx->is_first is TRUE.
+ * or the entry following that, if @ctx->is_first is 'true'.
*/
if (ctx->is_first) {
al_entry = ctx->al_entry;
- ctx->is_first = FALSE;
+ ctx->is_first = false;
} else
al_entry = (ATTR_LIST_ENTRY*)((u8*)ctx->al_entry +
le16_to_cpu(ctx->al_entry->length));
@@ -835,7 +1046,7 @@ do_next_attr_loop:
le32_to_cpu(ctx->mrec->bytes_allocated))
break;
if (a->type == AT_END)
- continue;
+ break;
if (!a->length)
break;
if (al_entry->instance != a->instance)
@@ -914,7 +1125,7 @@ not_found:
ctx->mrec = ctx->base_mrec;
ctx->attr = (ATTR_RECORD*)((u8*)ctx->mrec +
le16_to_cpu(ctx->mrec->attrs_offset));
- ctx->is_first = TRUE;
+ ctx->is_first = true;
ctx->ntfs_ino = base_ni;
ctx->base_ntfs_ino = NULL;
ctx->base_mrec = NULL;
@@ -970,7 +1181,7 @@ not_found:
* for, i.e. if one wants to add the attribute to the mft record this is the
* correct place to insert its attribute list entry into.
*
- * When -errno != -ENOENT, an error occured during the lookup. @ctx->attr is
+ * When -errno != -ENOENT, an error occurred during the lookup. @ctx->attr is
* then undefined and in particular you should not rely on it not changing.
*/
int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
@@ -981,6 +1192,7 @@ int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
ntfs_inode *base_ni;
ntfs_debug("Entering.");
+ BUG_ON(IS_ERR(ctx->mrec));
if (ctx->base_ntfs_ino)
base_ni = ctx->base_ntfs_ino;
else
@@ -1010,7 +1222,7 @@ static inline void ntfs_attr_init_search_ctx(ntfs_attr_search_ctx *ctx,
/* Sanity checks are performed elsewhere. */
.attr = (ATTR_RECORD*)((u8*)mrec +
le16_to_cpu(mrec->attrs_offset)),
- .is_first = TRUE,
+ .is_first = true,
.ntfs_ino = ni,
};
}
@@ -1029,7 +1241,7 @@ void ntfs_attr_reinit_search_ctx(ntfs_attr_search_ctx *ctx)
{
if (likely(!ctx->base_ntfs_ino)) {
/* No attribute list. */
- ctx->is_first = TRUE;
+ ctx->is_first = true;
/* Sanity checks are performed elsewhere. */
ctx->attr = (ATTR_RECORD*)((u8*)ctx->mrec +
le16_to_cpu(ctx->mrec->attrs_offset));
@@ -1058,7 +1270,7 @@ ntfs_attr_search_ctx *ntfs_attr_get_search_ctx(ntfs_inode *ni, MFT_RECORD *mrec)
{
ntfs_attr_search_ctx *ctx;
- ctx = kmem_cache_alloc(ntfs_attr_ctx_cache, SLAB_NOFS);
+ ctx = kmem_cache_alloc(ntfs_attr_ctx_cache, GFP_NOFS);
if (ctx)
ntfs_attr_init_search_ctx(ctx, ni, mrec);
return ctx;
@@ -1197,7 +1409,7 @@ int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, const ATTR_TYPE type)
*/
int ntfs_attr_can_be_resident(const ntfs_volume *vol, const ATTR_TYPE type)
{
- if (type == AT_INDEX_ALLOCATION || type == AT_EA)
+ if (type == AT_INDEX_ALLOCATION)
return -EPERM;
return 0;
}
@@ -1247,12 +1459,59 @@ int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size)
}
/**
+ * ntfs_resident_attr_value_resize - resize the value of a resident attribute
+ * @m: mft record containing attribute record
+ * @a: attribute record whose value to resize
+ * @new_size: new size in bytes to which to resize the attribute value of @a
+ *
+ * Resize the value of the attribute @a in the mft record @m to @new_size bytes.
+ * If the value is made bigger, the newly allocated space is cleared.
+ *
+ * Return 0 on success and -errno on error. The following error codes are
+ * defined:
+ * -ENOSPC - Not enough space in the mft record @m to perform the resize.
+ *
+ * Note: On error, no modifications have been performed whatsoever.
+ *
+ * Warning: If you make a record smaller without having copied all the data you
+ * are interested in the data may be overwritten.
+ */
+int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
+ const u32 new_size)
+{
+ u32 old_size;
+
+ /* Resize the resident part of the attribute record. */
+ if (ntfs_attr_record_resize(m, a,
+ le16_to_cpu(a->data.resident.value_offset) + new_size))
+ return -ENOSPC;
+ /*
+ * The resize succeeded! If we made the attribute value bigger, clear
+ * the area between the old size and @new_size.
+ */
+ old_size = le32_to_cpu(a->data.resident.value_length);
+ if (new_size > old_size)
+ memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) +
+ old_size, 0, new_size - old_size);
+ /* Finally update the length of the attribute value. */
+ a->data.resident.value_length = cpu_to_le32(new_size);
+ return 0;
+}
+
+/**
* ntfs_attr_make_non_resident - convert a resident to a non-resident attribute
* @ni: ntfs inode describing the attribute to convert
+ * @data_size: size of the resident data to copy to the non-resident attribute
*
* Convert the resident ntfs attribute described by the ntfs inode @ni to a
* non-resident one.
*
+ * @data_size must be equal to the attribute value size. This is needed since
+ * we need to know the size before we can map the mft record and our callers
+ * always know it. The reason we cannot simply read the size from the vfs
+ * inode i_size is that this is not necessarily uptodate. This happens when
+ * ntfs_attr_make_non_resident() is called in the ->truncate call path(s).
+ *
* Return 0 on success and -errno on error. The following error return codes
* are defined:
* -EPERM - The attribute is not allowed to be non-resident.
@@ -1271,9 +1530,9 @@ int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size)
* NOTE to self: No changes in the attribute list are required to move from
* a resident to a non-resident attribute.
*
- * Locking: - The caller must hold i_sem on the inode.
+ * Locking: - The caller must hold i_mutex on the inode.
*/
-int ntfs_attr_make_non_resident(ntfs_inode *ni)
+int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
{
s64 new_size;
struct inode *vi = VFS_I(ni);
@@ -1302,14 +1561,18 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
return err;
}
/*
+ * FIXME: Compressed and encrypted attributes are not supported when
+ * writing and we should never have gotten here for them.
+ */
+ BUG_ON(NInoCompressed(ni));
+ BUG_ON(NInoEncrypted(ni));
+ /*
* The size needs to be aligned to a cluster boundary for allocation
* purposes.
*/
- new_size = (i_size_read(vi) + vol->cluster_size - 1) &
+ new_size = (data_size + vol->cluster_size - 1) &
~(vol->cluster_size - 1);
if (new_size > 0) {
- runlist_element *rl2;
-
/*
* Will need the page later and since the page lock nests
* outside all ntfs locks, we need to get the page now.
@@ -1320,7 +1583,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
return -ENOMEM;
/* Start by allocating clusters to hold the attribute value. */
rl = ntfs_cluster_alloc(vol, 0, new_size >>
- vol->cluster_size_bits, -1, DATA_ZONE);
+ vol->cluster_size_bits, -1, DATA_ZONE, true);
if (IS_ERR(rl)) {
err = PTR_ERR(rl);
ntfs_debug("Failed to allocate cluster%s, error code "
@@ -1329,12 +1592,6 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
err);
goto page_err_out;
}
- /* Change the runlist terminator to LCN_ENOENT. */
- rl2 = rl;
- while (rl2->length)
- rl2++;
- BUG_ON(rl2->lcn != LCN_RL_NOT_MAPPED);
- rl2->lcn = LCN_ENOENT;
} else {
rl = NULL;
page = NULL;
@@ -1377,10 +1634,15 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
BUG_ON(a->non_resident);
/*
* Calculate new offsets for the name and the mapping pairs array.
- * We assume the attribute is not compressed or sparse.
*/
- name_ofs = (offsetof(ATTR_REC,
- data.non_resident.compressed_size) + 7) & ~7;
+ if (NInoSparse(ni) || NInoCompressed(ni))
+ name_ofs = (offsetof(ATTR_REC,
+ data.non_resident.compressed_size) +
+ sizeof(a->data.non_resident.compressed_size) +
+ 7) & ~7;
+ else
+ name_ofs = (offsetof(ATTR_REC,
+ data.non_resident.compressed_size) + 7) & ~7;
mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7;
/*
* Determine the size of the resident part of the now non-resident
@@ -1392,14 +1654,14 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
* attribute value.
*/
attr_size = le32_to_cpu(a->data.resident.value_length);
- BUG_ON(attr_size != i_size_read(vi));
+ BUG_ON(attr_size != data_size);
if (page && !PageUptodate(page)) {
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
memcpy(kaddr, (u8*)a +
le16_to_cpu(a->data.resident.value_offset),
attr_size);
memset(kaddr + attr_size, 0, PAGE_CACHE_SIZE - attr_size);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
flush_dcache_page(page);
SetPageUptodate(page);
}
@@ -1419,24 +1681,25 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset),
a->name_length * sizeof(ntfschar));
a->name_offset = cpu_to_le16(name_ofs);
- /*
- * FIXME: For now just clear all of these as we do not support them
- * when writing.
- */
- a->flags &= cpu_to_le16(0xffff & ~le16_to_cpu(ATTR_IS_SPARSE |
- ATTR_IS_ENCRYPTED | ATTR_COMPRESSION_MASK));
/* Setup the fields specific to non-resident attributes. */
a->data.non_resident.lowest_vcn = 0;
a->data.non_resident.highest_vcn = cpu_to_sle64((new_size - 1) >>
vol->cluster_size_bits);
a->data.non_resident.mapping_pairs_offset = cpu_to_le16(mp_ofs);
- a->data.non_resident.compression_unit = 0;
memset(&a->data.non_resident.reserved, 0,
sizeof(a->data.non_resident.reserved));
a->data.non_resident.allocated_size = cpu_to_sle64(new_size);
a->data.non_resident.data_size =
a->data.non_resident.initialized_size =
cpu_to_sle64(attr_size);
+ if (NInoSparse(ni) || NInoCompressed(ni)) {
+ a->data.non_resident.compression_unit = 0;
+ if (NInoCompressed(ni) || vol->major_ver < 3)
+ a->data.non_resident.compression_unit = 4;
+ a->data.non_resident.compressed_size =
+ a->data.non_resident.allocated_size;
+ } else
+ a->data.non_resident.compression_unit = 0;
/* Generate the mapping pairs array into the attribute record. */
err = ntfs_mapping_pairs_build(vol, (u8*)a + mp_ofs,
arec_size - mp_ofs, rl, 0, -1, NULL);
@@ -1446,21 +1709,33 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
goto undo_err_out;
}
/* Setup the in-memory attribute structure to be non-resident. */
- /*
- * FIXME: For now just clear all of these as we do not support them
- * when writing.
- */
- NInoClearSparse(ni);
- NInoClearEncrypted(ni);
- NInoClearCompressed(ni);
ni->runlist.rl = rl;
write_lock_irqsave(&ni->size_lock, flags);
ni->allocated_size = new_size;
+ if (NInoSparse(ni) || NInoCompressed(ni)) {
+ ni->itype.compressed.size = ni->allocated_size;
+ if (a->data.non_resident.compression_unit) {
+ ni->itype.compressed.block_size = 1U << (a->data.
+ non_resident.compression_unit +
+ vol->cluster_size_bits);
+ ni->itype.compressed.block_size_bits =
+ ffs(ni->itype.compressed.block_size) -
+ 1;
+ ni->itype.compressed.block_clusters = 1U <<
+ a->data.non_resident.compression_unit;
+ } else {
+ ni->itype.compressed.block_size = 0;
+ ni->itype.compressed.block_size_bits = 0;
+ ni->itype.compressed.block_clusters = 0;
+ }
+ vi->i_blocks = ni->itype.compressed.size >> 9;
+ } else
+ vi->i_blocks = ni->allocated_size >> 9;
write_unlock_irqrestore(&ni->size_lock, flags);
/*
* This needs to be last since the address space operations ->readpage
* and ->writepage can run concurrently with us as they are not
- * serialized on i_sem. Note, we are not allowed to fail once we flip
+ * serialized on i_mutex. Note, we are not allowed to fail once we flip
* this switch, which is another reason to do this last.
*/
NInoSetNonResident(ni);
@@ -1473,7 +1748,6 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
if (page) {
set_page_dirty(page);
unlock_page(page);
- mark_page_accessed(page);
page_cache_release(page);
}
ntfs_debug("Done.");
@@ -1531,9 +1805,9 @@ undo_err_out:
sizeof(a->data.resident.reserved));
/* Copy the data from the page back to the attribute value. */
if (page) {
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
memcpy((u8*)a + mp_ofs, kaddr, attr_size);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
}
/* Setup the allocated size in the ntfs inode in case it changed. */
write_lock_irqsave(&ni->size_lock, flags);
@@ -1569,6 +1843,634 @@ page_err_out:
}
/**
+ * ntfs_attr_extend_allocation - extend the allocated space of an attribute
+ * @ni: ntfs inode of the attribute whose allocation to extend
+ * @new_alloc_size: new size in bytes to which to extend the allocation to
+ * @new_data_size: new size in bytes to which to extend the data to
+ * @data_start: beginning of region which is required to be non-sparse
+ *
+ * Extend the allocated space of an attribute described by the ntfs inode @ni
+ * to @new_alloc_size bytes. If @data_start is -1, the whole extension may be
+ * implemented as a hole in the file (as long as both the volume and the ntfs
+ * inode @ni have sparse support enabled). If @data_start is >= 0, then the
+ * region between the old allocated size and @data_start - 1 may be made sparse
+ * but the regions between @data_start and @new_alloc_size must be backed by
+ * actual clusters.
+ *
+ * If @new_data_size is -1, it is ignored. If it is >= 0, then the data size
+ * of the attribute is extended to @new_data_size. Note that the i_size of the
+ * vfs inode is not updated. Only the data size in the base attribute record
+ * is updated. The caller has to update i_size separately if this is required.
+ * WARNING: It is a BUG() for @new_data_size to be smaller than the old data
+ * size as well as for @new_data_size to be greater than @new_alloc_size.
+ *
+ * For resident attributes this involves resizing the attribute record and if
+ * necessary moving it and/or other attributes into extent mft records and/or
+ * converting the attribute to a non-resident attribute which in turn involves
+ * extending the allocation of a non-resident attribute as described below.
+ *
+ * For non-resident attributes this involves allocating clusters in the data
+ * zone on the volume (except for regions that are being made sparse) and
+ * extending the run list to describe the allocated clusters as well as
+ * updating the mapping pairs array of the attribute. This in turn involves
+ * resizing the attribute record and if necessary moving it and/or other
+ * attributes into extent mft records and/or splitting the attribute record
+ * into multiple extent attribute records.
+ *
+ * Also, the attribute list attribute is updated if present and in some of the
+ * above cases (the ones where extent mft records/attributes come into play),
+ * an attribute list attribute is created if not already present.
+ *
+ * Return the new allocated size on success and -errno on error. In the case
+ * that an error is encountered but a partial extension at least up to
+ * @data_start (if present) is possible, the allocation is partially extended
+ * and this is returned. This means the caller must check the returned size to
+ * determine if the extension was partial. If @data_start is -1 then partial
+ * allocations are not performed.
+ *
+ * WARNING: Do not call ntfs_attr_extend_allocation() for $MFT/$DATA.
+ *
+ * Locking: This function takes the runlist lock of @ni for writing as well as
+ * locking the mft record of the base ntfs inode. These locks are maintained
+ * throughout execution of the function. These locks are required so that the
+ * attribute can be resized safely and so that it can for example be converted
+ * from resident to non-resident safely.
+ *
+ * TODO: At present attribute list attribute handling is not implemented.
+ *
+ * TODO: At present it is not safe to call this function for anything other
+ * than the $DATA attribute(s) of an uncompressed and unencrypted file.
+ */
+s64 ntfs_attr_extend_allocation(ntfs_inode *ni, s64 new_alloc_size,
+ const s64 new_data_size, const s64 data_start)
+{
+ VCN vcn;
+ s64 ll, allocated_size, start = data_start;
+ struct inode *vi = VFS_I(ni);
+ ntfs_volume *vol = ni->vol;
+ ntfs_inode *base_ni;
+ MFT_RECORD *m;
+ ATTR_RECORD *a;
+ ntfs_attr_search_ctx *ctx;
+ runlist_element *rl, *rl2;
+ unsigned long flags;
+ int err, mp_size;
+ u32 attr_len = 0; /* Silence stupid gcc warning. */
+ bool mp_rebuilt;
+
+#ifdef DEBUG
+ read_lock_irqsave(&ni->size_lock, flags);
+ allocated_size = ni->allocated_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
+ "old_allocated_size 0x%llx, "
+ "new_allocated_size 0x%llx, new_data_size 0x%llx, "
+ "data_start 0x%llx.", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type),
+ (unsigned long long)allocated_size,
+ (unsigned long long)new_alloc_size,
+ (unsigned long long)new_data_size,
+ (unsigned long long)start);
+#endif
+retry_extend:
+ /*
+ * For non-resident attributes, @start and @new_size need to be aligned
+ * to cluster boundaries for allocation purposes.
+ */
+ if (NInoNonResident(ni)) {
+ if (start > 0)
+ start &= ~(s64)vol->cluster_size_mask;
+ new_alloc_size = (new_alloc_size + vol->cluster_size - 1) &
+ ~(s64)vol->cluster_size_mask;
+ }
+ BUG_ON(new_data_size >= 0 && new_data_size > new_alloc_size);
+ /* Check if new size is allowed in $AttrDef. */
+ err = ntfs_attr_size_bounds_check(vol, ni->type, new_alloc_size);
+ if (unlikely(err)) {
+ /* Only emit errors when the write will fail completely. */
+ read_lock_irqsave(&ni->size_lock, flags);
+ allocated_size = ni->allocated_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ if (start < 0 || start >= allocated_size) {
+ if (err == -ERANGE) {
+ ntfs_error(vol->sb, "Cannot extend allocation "
+ "of inode 0x%lx, attribute "
+ "type 0x%x, because the new "
+ "allocation would exceed the "
+ "maximum allowed size for "
+ "this attribute type.",
+ vi->i_ino, (unsigned)
+ le32_to_cpu(ni->type));
+ } else {
+ ntfs_error(vol->sb, "Cannot extend allocation "
+ "of inode 0x%lx, attribute "
+ "type 0x%x, because this "
+ "attribute type is not "
+ "defined on the NTFS volume. "
+ "Possible corruption! You "
+ "should run chkdsk!",
+ vi->i_ino, (unsigned)
+ le32_to_cpu(ni->type));
+ }
+ }
+ /* Translate error code to be POSIX conformant for write(2). */
+ if (err == -ERANGE)
+ err = -EFBIG;
+ else
+ err = -EIO;
+ return err;
+ }
+ if (!NInoAttr(ni))
+ base_ni = ni;
+ else
+ base_ni = ni->ext.base_ntfs_ino;
+ /*
+ * We will be modifying both the runlist (if non-resident) and the mft
+ * record so lock them both down.
+ */
+ down_write(&ni->runlist.lock);
+ m = map_mft_record(base_ni);
+ if (IS_ERR(m)) {
+ err = PTR_ERR(m);
+ m = NULL;
+ ctx = NULL;
+ goto err_out;
+ }
+ ctx = ntfs_attr_get_search_ctx(base_ni, m);
+ if (unlikely(!ctx)) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ read_lock_irqsave(&ni->size_lock, flags);
+ allocated_size = ni->allocated_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ /*
+ * If non-resident, seek to the last extent. If resident, there is
+ * only one extent, so seek to that.
+ */
+ vcn = NInoNonResident(ni) ? allocated_size >> vol->cluster_size_bits :
+ 0;
+ /*
+ * Abort if someone did the work whilst we waited for the locks. If we
+ * just converted the attribute from resident to non-resident it is
+ * likely that exactly this has happened already. We cannot quite
+ * abort if we need to update the data size.
+ */
+ if (unlikely(new_alloc_size <= allocated_size)) {
+ ntfs_debug("Allocated size already exceeds requested size.");
+ new_alloc_size = allocated_size;
+ if (new_data_size < 0)
+ goto done;
+ /*
+ * We want the first attribute extent so that we can update the
+ * data size.
+ */
+ vcn = 0;
+ }
+ err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+ CASE_SENSITIVE, vcn, NULL, 0, ctx);
+ if (unlikely(err)) {
+ if (err == -ENOENT)
+ err = -EIO;
+ goto err_out;
+ }
+ m = ctx->mrec;
+ a = ctx->attr;
+ /* Use goto to reduce indentation. */
+ if (a->non_resident)
+ goto do_non_resident_extend;
+ BUG_ON(NInoNonResident(ni));
+ /* The total length of the attribute value. */
+ attr_len = le32_to_cpu(a->data.resident.value_length);
+ /*
+ * Extend the attribute record to be able to store the new attribute
+ * size. ntfs_attr_record_resize() will not do anything if the size is
+ * not changing.
+ */
+ if (new_alloc_size < vol->mft_record_size &&
+ !ntfs_attr_record_resize(m, a,
+ le16_to_cpu(a->data.resident.value_offset) +
+ new_alloc_size)) {
+ /* The resize succeeded! */
+ write_lock_irqsave(&ni->size_lock, flags);
+ ni->allocated_size = le32_to_cpu(a->length) -
+ le16_to_cpu(a->data.resident.value_offset);
+ write_unlock_irqrestore(&ni->size_lock, flags);
+ if (new_data_size >= 0) {
+ BUG_ON(new_data_size < attr_len);
+ a->data.resident.value_length =
+ cpu_to_le32((u32)new_data_size);
+ }
+ goto flush_done;
+ }
+ /*
+ * We have to drop all the locks so we can call
+ * ntfs_attr_make_non_resident(). This could be optimised by try-
+ * locking the first page cache page and only if that fails dropping
+ * the locks, locking the page, and redoing all the locking and
+ * lookups. While this would be a huge optimisation, it is not worth
+ * it as this is definitely a slow code path.
+ */
+ ntfs_attr_put_search_ctx(ctx);
+ unmap_mft_record(base_ni);
+ up_write(&ni->runlist.lock);
+ /*
+ * Not enough space in the mft record, try to make the attribute
+ * non-resident and if successful restart the extension process.
+ */
+ err = ntfs_attr_make_non_resident(ni, attr_len);
+ if (likely(!err))
+ goto retry_extend;
+ /*
+ * Could not make non-resident. If this is due to this not being
+ * permitted for this attribute type or there not being enough space,
+ * try to make other attributes non-resident. Otherwise fail.
+ */
+ if (unlikely(err != -EPERM && err != -ENOSPC)) {
+ /* Only emit errors when the write will fail completely. */
+ read_lock_irqsave(&ni->size_lock, flags);
+ allocated_size = ni->allocated_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ if (start < 0 || start >= allocated_size)
+ ntfs_error(vol->sb, "Cannot extend allocation of "
+ "inode 0x%lx, attribute type 0x%x, "
+ "because the conversion from resident "
+ "to non-resident attribute failed "
+ "with error code %i.", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type), err);
+ if (err != -ENOMEM)
+ err = -EIO;
+ goto conv_err_out;
+ }
+ /* TODO: Not implemented from here, abort. */
+ read_lock_irqsave(&ni->size_lock, flags);
+ allocated_size = ni->allocated_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ if (start < 0 || start >= allocated_size) {
+ if (err == -ENOSPC)
+ ntfs_error(vol->sb, "Not enough space in the mft "
+ "record/on disk for the non-resident "
+ "attribute value. This case is not "
+ "implemented yet.");
+ else /* if (err == -EPERM) */
+ ntfs_error(vol->sb, "This attribute type may not be "
+ "non-resident. This case is not "
+ "implemented yet.");
+ }
+ err = -EOPNOTSUPP;
+ goto conv_err_out;
+#if 0
+ // TODO: Attempt to make other attributes non-resident.
+ if (!err)
+ goto do_resident_extend;
+ /*
+ * Both the attribute list attribute and the standard information
+ * attribute must remain in the base inode. Thus, if this is one of
+ * these attributes, we have to try to move other attributes out into
+ * extent mft records instead.
+ */
+ if (ni->type == AT_ATTRIBUTE_LIST ||
+ ni->type == AT_STANDARD_INFORMATION) {
+ // TODO: Attempt to move other attributes into extent mft
+ // records.
+ err = -EOPNOTSUPP;
+ if (!err)
+ goto do_resident_extend;
+ goto err_out;
+ }
+ // TODO: Attempt to move this attribute to an extent mft record, but
+ // only if it is not already the only attribute in an mft record in
+ // which case there would be nothing to gain.
+ err = -EOPNOTSUPP;
+ if (!err)
+ goto do_resident_extend;
+ /* There is nothing we can do to make enough space. )-: */
+ goto err_out;
+#endif
+do_non_resident_extend:
+ BUG_ON(!NInoNonResident(ni));
+ if (new_alloc_size == allocated_size) {
+ BUG_ON(vcn);
+ goto alloc_done;
+ }
+ /*
+ * If the data starts after the end of the old allocation, this is a
+ * $DATA attribute and sparse attributes are enabled on the volume and
+ * for this inode, then create a sparse region between the old
+ * allocated size and the start of the data. Otherwise simply proceed
+ * with filling the whole space between the old allocated size and the
+ * new allocated size with clusters.
+ */
+ if ((start >= 0 && start <= allocated_size) || ni->type != AT_DATA ||
+ !NVolSparseEnabled(vol) || NInoSparseDisabled(ni))
+ goto skip_sparse;
+ // TODO: This is not implemented yet. We just fill in with real
+ // clusters for now...
+ ntfs_debug("Inserting holes is not-implemented yet. Falling back to "
+ "allocating real clusters instead.");
+skip_sparse:
+ rl = ni->runlist.rl;
+ if (likely(rl)) {
+ /* Seek to the end of the runlist. */
+ while (rl->length)
+ rl++;
+ }
+ /* If this attribute extent is not mapped, map it now. */
+ if (unlikely(!rl || rl->lcn == LCN_RL_NOT_MAPPED ||
+ (rl->lcn == LCN_ENOENT && rl > ni->runlist.rl &&
+ (rl-1)->lcn == LCN_RL_NOT_MAPPED))) {
+ if (!rl && !allocated_size)
+ goto first_alloc;
+ rl = ntfs_mapping_pairs_decompress(vol, a, ni->runlist.rl);
+ if (IS_ERR(rl)) {
+ err = PTR_ERR(rl);
+ if (start < 0 || start >= allocated_size)
+ ntfs_error(vol->sb, "Cannot extend allocation "
+ "of inode 0x%lx, attribute "
+ "type 0x%x, because the "
+ "mapping of a runlist "
+ "fragment failed with error "
+ "code %i.", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type),
+ err);
+ if (err != -ENOMEM)
+ err = -EIO;
+ goto err_out;
+ }
+ ni->runlist.rl = rl;
+ /* Seek to the end of the runlist. */
+ while (rl->length)
+ rl++;
+ }
+ /*
+ * We now know the runlist of the last extent is mapped and @rl is at
+ * the end of the runlist. We want to begin allocating clusters
+ * starting at the last allocated cluster to reduce fragmentation. If
+ * there are no valid LCNs in the attribute we let the cluster
+ * allocator choose the starting cluster.
+ */
+ /* If the last LCN is a hole or simillar seek back to last real LCN. */
+ while (rl->lcn < 0 && rl > ni->runlist.rl)
+ rl--;
+first_alloc:
+ // FIXME: Need to implement partial allocations so at least part of the
+ // write can be performed when start >= 0. (Needed for POSIX write(2)
+ // conformance.)
+ rl2 = ntfs_cluster_alloc(vol, allocated_size >> vol->cluster_size_bits,
+ (new_alloc_size - allocated_size) >>
+ vol->cluster_size_bits, (rl && (rl->lcn >= 0)) ?
+ rl->lcn + rl->length : -1, DATA_ZONE, true);
+ if (IS_ERR(rl2)) {
+ err = PTR_ERR(rl2);
+ if (start < 0 || start >= allocated_size)
+ ntfs_error(vol->sb, "Cannot extend allocation of "
+ "inode 0x%lx, attribute type 0x%x, "
+ "because the allocation of clusters "
+ "failed with error code %i.", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type), err);
+ if (err != -ENOMEM && err != -ENOSPC)
+ err = -EIO;
+ goto err_out;
+ }
+ rl = ntfs_runlists_merge(ni->runlist.rl, rl2);
+ if (IS_ERR(rl)) {
+ err = PTR_ERR(rl);
+ if (start < 0 || start >= allocated_size)
+ ntfs_error(vol->sb, "Cannot extend allocation of "
+ "inode 0x%lx, attribute type 0x%x, "
+ "because the runlist merge failed "
+ "with error code %i.", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type), err);
+ if (err != -ENOMEM)
+ err = -EIO;
+ if (ntfs_cluster_free_from_rl(vol, rl2)) {
+ ntfs_error(vol->sb, "Failed to release allocated "
+ "cluster(s) in error code path. Run "
+ "chkdsk to recover the lost "
+ "cluster(s).");
+ NVolSetErrors(vol);
+ }
+ ntfs_free(rl2);
+ goto err_out;
+ }
+ ni->runlist.rl = rl;
+ ntfs_debug("Allocated 0x%llx clusters.", (long long)(new_alloc_size -
+ allocated_size) >> vol->cluster_size_bits);
+ /* Find the runlist element with which the attribute extent starts. */
+ ll = sle64_to_cpu(a->data.non_resident.lowest_vcn);
+ rl2 = ntfs_rl_find_vcn_nolock(rl, ll);
+ BUG_ON(!rl2);
+ BUG_ON(!rl2->length);
+ BUG_ON(rl2->lcn < LCN_HOLE);
+ mp_rebuilt = false;
+ /* Get the size for the new mapping pairs array for this extent. */
+ mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1);
+ if (unlikely(mp_size <= 0)) {
+ err = mp_size;
+ if (start < 0 || start >= allocated_size)
+ ntfs_error(vol->sb, "Cannot extend allocation of "
+ "inode 0x%lx, attribute type 0x%x, "
+ "because determining the size for the "
+ "mapping pairs failed with error code "
+ "%i.", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type), err);
+ err = -EIO;
+ goto undo_alloc;
+ }
+ /* Extend the attribute record to fit the bigger mapping pairs array. */
+ attr_len = le32_to_cpu(a->length);
+ err = ntfs_attr_record_resize(m, a, mp_size +
+ le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
+ if (unlikely(err)) {
+ BUG_ON(err != -ENOSPC);
+ // TODO: Deal with this by moving this extent to a new mft
+ // record or by starting a new extent in a new mft record,
+ // possibly by extending this extent partially and filling it
+ // and creating a new extent for the remainder, or by making
+ // other attributes non-resident and/or by moving other
+ // attributes out of this mft record.
+ if (start < 0 || start >= allocated_size)
+ ntfs_error(vol->sb, "Not enough space in the mft "
+ "record for the extended attribute "
+ "record. This case is not "
+ "implemented yet.");
+ err = -EOPNOTSUPP;
+ goto undo_alloc;
+ }
+ mp_rebuilt = true;
+ /* Generate the mapping pairs array directly into the attr record. */
+ err = ntfs_mapping_pairs_build(vol, (u8*)a +
+ le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
+ mp_size, rl2, ll, -1, NULL);
+ if (unlikely(err)) {
+ if (start < 0 || start >= allocated_size)
+ ntfs_error(vol->sb, "Cannot extend allocation of "
+ "inode 0x%lx, attribute type 0x%x, "
+ "because building the mapping pairs "
+ "failed with error code %i.", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type), err);
+ err = -EIO;
+ goto undo_alloc;
+ }
+ /* Update the highest_vcn. */
+ a->data.non_resident.highest_vcn = cpu_to_sle64((new_alloc_size >>
+ vol->cluster_size_bits) - 1);
+ /*
+ * We now have extended the allocated size of the attribute. Reflect
+ * this in the ntfs_inode structure and the attribute record.
+ */
+ if (a->data.non_resident.lowest_vcn) {
+ /*
+ * We are not in the first attribute extent, switch to it, but
+ * first ensure the changes will make it to disk later.
+ */
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ mark_mft_record_dirty(ctx->ntfs_ino);
+ ntfs_attr_reinit_search_ctx(ctx);
+ err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+ CASE_SENSITIVE, 0, NULL, 0, ctx);
+ if (unlikely(err))
+ goto restore_undo_alloc;
+ /* @m is not used any more so no need to set it. */
+ a = ctx->attr;
+ }
+ write_lock_irqsave(&ni->size_lock, flags);
+ ni->allocated_size = new_alloc_size;
+ a->data.non_resident.allocated_size = cpu_to_sle64(new_alloc_size);
+ /*
+ * FIXME: This would fail if @ni is a directory, $MFT, or an index,
+ * since those can have sparse/compressed set. For example can be
+ * set compressed even though it is not compressed itself and in that
+ * case the bit means that files are to be created compressed in the
+ * directory... At present this is ok as this code is only called for
+ * regular files, and only for their $DATA attribute(s).
+ * FIXME: The calculation is wrong if we created a hole above. For now
+ * it does not matter as we never create holes.
+ */
+ if (NInoSparse(ni) || NInoCompressed(ni)) {
+ ni->itype.compressed.size += new_alloc_size - allocated_size;
+ a->data.non_resident.compressed_size =
+ cpu_to_sle64(ni->itype.compressed.size);
+ vi->i_blocks = ni->itype.compressed.size >> 9;
+ } else
+ vi->i_blocks = new_alloc_size >> 9;
+ write_unlock_irqrestore(&ni->size_lock, flags);
+alloc_done:
+ if (new_data_size >= 0) {
+ BUG_ON(new_data_size <
+ sle64_to_cpu(a->data.non_resident.data_size));
+ a->data.non_resident.data_size = cpu_to_sle64(new_data_size);
+ }
+flush_done:
+ /* Ensure the changes make it to disk. */
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ mark_mft_record_dirty(ctx->ntfs_ino);
+done:
+ ntfs_attr_put_search_ctx(ctx);
+ unmap_mft_record(base_ni);
+ up_write(&ni->runlist.lock);
+ ntfs_debug("Done, new_allocated_size 0x%llx.",
+ (unsigned long long)new_alloc_size);
+ return new_alloc_size;
+restore_undo_alloc:
+ if (start < 0 || start >= allocated_size)
+ ntfs_error(vol->sb, "Cannot complete extension of allocation "
+ "of inode 0x%lx, attribute type 0x%x, because "
+ "lookup of first attribute extent failed with "
+ "error code %i.", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type), err);
+ if (err == -ENOENT)
+ err = -EIO;
+ ntfs_attr_reinit_search_ctx(ctx);
+ if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE,
+ allocated_size >> vol->cluster_size_bits, NULL, 0,
+ ctx)) {
+ ntfs_error(vol->sb, "Failed to find last attribute extent of "
+ "attribute in error code path. Run chkdsk to "
+ "recover.");
+ write_lock_irqsave(&ni->size_lock, flags);
+ ni->allocated_size = new_alloc_size;
+ /*
+ * FIXME: This would fail if @ni is a directory... See above.
+ * FIXME: The calculation is wrong if we created a hole above.
+ * For now it does not matter as we never create holes.
+ */
+ if (NInoSparse(ni) || NInoCompressed(ni)) {
+ ni->itype.compressed.size += new_alloc_size -
+ allocated_size;
+ vi->i_blocks = ni->itype.compressed.size >> 9;
+ } else
+ vi->i_blocks = new_alloc_size >> 9;
+ write_unlock_irqrestore(&ni->size_lock, flags);
+ ntfs_attr_put_search_ctx(ctx);
+ unmap_mft_record(base_ni);
+ up_write(&ni->runlist.lock);
+ /*
+ * The only thing that is now wrong is the allocated size of the
+ * base attribute extent which chkdsk should be able to fix.
+ */
+ NVolSetErrors(vol);
+ return err;
+ }
+ ctx->attr->data.non_resident.highest_vcn = cpu_to_sle64(
+ (allocated_size >> vol->cluster_size_bits) - 1);
+undo_alloc:
+ ll = allocated_size >> vol->cluster_size_bits;
+ if (ntfs_cluster_free(ni, ll, -1, ctx) < 0) {
+ ntfs_error(vol->sb, "Failed to release allocated cluster(s) "
+ "in error code path. Run chkdsk to recover "
+ "the lost cluster(s).");
+ NVolSetErrors(vol);
+ }
+ m = ctx->mrec;
+ a = ctx->attr;
+ /*
+ * If the runlist truncation fails and/or the search context is no
+ * longer valid, we cannot resize the attribute record or build the
+ * mapping pairs array thus we mark the inode bad so that no access to
+ * the freed clusters can happen.
+ */
+ if (ntfs_rl_truncate_nolock(vol, &ni->runlist, ll) || IS_ERR(m)) {
+ ntfs_error(vol->sb, "Failed to %s in error code path. Run "
+ "chkdsk to recover.", IS_ERR(m) ?
+ "restore attribute search context" :
+ "truncate attribute runlist");
+ NVolSetErrors(vol);
+ } else if (mp_rebuilt) {
+ if (ntfs_attr_record_resize(m, a, attr_len)) {
+ ntfs_error(vol->sb, "Failed to restore attribute "
+ "record in error code path. Run "
+ "chkdsk to recover.");
+ NVolSetErrors(vol);
+ } else /* if (success) */ {
+ if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
+ a->data.non_resident.
+ mapping_pairs_offset), attr_len -
+ le16_to_cpu(a->data.non_resident.
+ mapping_pairs_offset), rl2, ll, -1,
+ NULL)) {
+ ntfs_error(vol->sb, "Failed to restore "
+ "mapping pairs array in error "
+ "code path. Run chkdsk to "
+ "recover.");
+ NVolSetErrors(vol);
+ }
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ mark_mft_record_dirty(ctx->ntfs_ino);
+ }
+ }
+err_out:
+ if (ctx)
+ ntfs_attr_put_search_ctx(ctx);
+ if (m)
+ unmap_mft_record(base_ni);
+ up_write(&ni->runlist.lock);
+conv_err_out:
+ ntfs_debug("Failed. Returning error code %i.", err);
+ return err;
+}
+
+/**
* ntfs_attr_set - fill (a part of) an attribute with a byte
* @ni: ntfs inode describing the attribute to fill
* @ofs: offset inside the attribute at which to start to fill
@@ -1595,7 +2497,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
struct page *page;
u8 *kaddr;
pgoff_t idx, end;
- unsigned int start_ofs, end_ofs, size;
+ unsigned start_ofs, end_ofs, size;
ntfs_debug("Entering for ofs 0x%llx, cnt 0x%llx, val 0x%hx.",
(long long)ofs, (long long)cnt, val);
@@ -1603,6 +2505,12 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
BUG_ON(cnt < 0);
if (!cnt)
goto done;
+ /*
+ * FIXME: Compressed and encrypted attributes are not supported when
+ * writing and we should never have gotten here for them.
+ */
+ BUG_ON(NInoCompressed(ni));
+ BUG_ON(NInoEncrypted(ni));
mapping = VFS_I(ni)->i_mapping;
/* Work out the starting index and page offset. */
idx = ofs >> PAGE_CACHE_SHIFT;
@@ -1618,18 +2526,10 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
end >>= PAGE_CACHE_SHIFT;
/* If there is a first partial page, need to do it the slow way. */
if (start_ofs) {
- page = read_cache_page(mapping, idx,
- (filler_t*)mapping->a_ops->readpage, NULL);
+ page = read_mapping_page(mapping, idx, NULL);
if (IS_ERR(page)) {
ntfs_error(vol->sb, "Failed to read first partial "
- "page (sync error, index 0x%lx).", idx);
- return PTR_ERR(page);
- }
- wait_on_page_locked(page);
- if (unlikely(!PageUptodate(page))) {
- ntfs_error(vol->sb, "Failed to read first partial page "
- "(async error, index 0x%lx).", idx);
- page_cache_release(page);
+ "page (error, index 0x%lx).", idx);
return PTR_ERR(page);
}
/*
@@ -1639,12 +2539,14 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
size = PAGE_CACHE_SIZE;
if (idx == end)
size = end_ofs;
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
memset(kaddr + start_ofs, val, size - start_ofs);
flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
set_page_dirty(page);
page_cache_release(page);
+ balance_dirty_pages_ratelimited(mapping);
+ cond_resched();
if (idx == end)
goto done;
idx++;
@@ -1658,10 +2560,10 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
"page (index 0x%lx).", idx);
return -ENOMEM;
}
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
memset(kaddr, val, PAGE_CACHE_SIZE);
flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
/*
* If the page has buffers, mark them uptodate since buffer
* state and not page state is definitive in 2.6 kernels.
@@ -1684,29 +2586,25 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
/* Finally unlock and release the page. */
unlock_page(page);
page_cache_release(page);
+ balance_dirty_pages_ratelimited(mapping);
+ cond_resched();
}
/* If there is a last partial page, need to do it the slow way. */
if (end_ofs) {
- page = read_cache_page(mapping, idx,
- (filler_t*)mapping->a_ops->readpage, NULL);
+ page = read_mapping_page(mapping, idx, NULL);
if (IS_ERR(page)) {
ntfs_error(vol->sb, "Failed to read last partial page "
- "(sync error, index 0x%lx).", idx);
- return PTR_ERR(page);
- }
- wait_on_page_locked(page);
- if (unlikely(!PageUptodate(page))) {
- ntfs_error(vol->sb, "Failed to read last partial page "
- "(async error, index 0x%lx).", idx);
- page_cache_release(page);
+ "(error, index 0x%lx).", idx);
return PTR_ERR(page);
}
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
memset(kaddr, val, end_ofs);
flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
set_page_dirty(page);
page_cache_release(page);
+ balance_dirty_pages_ratelimited(mapping);
+ cond_resched();
}
done:
ntfs_debug("Done.");
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h
index 0e4ac6d3c0e..3c8b74c99b8 100644
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
@@ -40,10 +40,10 @@
* Structure must be initialized to zero before the first call to one of the
* attribute search functions. Initialize @mrec to point to the mft record to
* search, and @attr to point to the first attribute within @mrec (not necessary
- * if calling the _first() functions), and set @is_first to TRUE (not necessary
+ * if calling the _first() functions), and set @is_first to 'true' (not necessary
* if calling the _first() functions).
*
- * If @is_first is TRUE, the search begins with @attr. If @is_first is FALSE,
+ * If @is_first is 'true', the search begins with @attr. If @is_first is 'false',
* the search begins after @attr. This is so that, after the first call to one
* of the search attribute functions, we can call the function again, without
* any modification of the search context, to automagically get the next
@@ -52,7 +52,7 @@
typedef struct {
MFT_RECORD *mrec;
ATTR_RECORD *attr;
- BOOL is_first;
+ bool is_first;
ntfs_inode *ntfs_ino;
ATTR_LIST_ENTRY *al_entry;
ntfs_inode *base_ntfs_ino;
@@ -60,14 +60,15 @@ typedef struct {
ATTR_RECORD *base_attr;
} ntfs_attr_search_ctx;
-extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn);
+extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn,
+ ntfs_attr_search_ctx *ctx);
extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn);
extern LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
- const BOOL write_locked);
+ const bool write_locked);
extern runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni,
- const VCN vcn, const BOOL write_locked);
+ const VCN vcn, ntfs_attr_search_ctx *ctx);
int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
const u32 name_len, const IGNORE_CASE_BOOL ic,
@@ -99,8 +100,13 @@ extern int ntfs_attr_can_be_resident(const ntfs_volume *vol,
const ATTR_TYPE type);
extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size);
+extern int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
+ const u32 new_size);
+
+extern int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size);
-extern int ntfs_attr_make_non_resident(ntfs_inode *ni);
+extern s64 ntfs_attr_extend_allocation(ntfs_inode *ni, s64 new_alloc_size,
+ const s64 new_data_size, const s64 data_start);
extern int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt,
const u8 val);
diff --git a/fs/ntfs/bitmap.c b/fs/ntfs/bitmap.c
index 12cf2e30c7d..0809cf87609 100644
--- a/fs/ntfs/bitmap.c
+++ b/fs/ntfs/bitmap.c
@@ -1,7 +1,7 @@
/*
* bitmap.c - NTFS kernel bitmap handling. Part of the Linux-NTFS project.
*
- * Copyright (c) 2004 Anton Altaparmakov
+ * Copyright (c) 2004-2005 Anton Altaparmakov
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
@@ -34,18 +34,18 @@
* @start_bit: first bit to set
* @count: number of bits to set
* @value: value to set the bits to (i.e. 0 or 1)
- * @is_rollback: if TRUE this is a rollback operation
+ * @is_rollback: if 'true' this is a rollback operation
*
* Set @count bits starting at bit @start_bit in the bitmap described by the
* vfs inode @vi to @value, where @value is either 0 or 1.
*
- * @is_rollback should always be FALSE, it is for internal use to rollback
+ * @is_rollback should always be 'false', it is for internal use to rollback
* errors. You probably want to use ntfs_bitmap_set_bits_in_run() instead.
*
* Return 0 on success and -errno on error.
*/
int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit,
- const s64 count, const u8 value, const BOOL is_rollback)
+ const s64 count, const u8 value, const bool is_rollback)
{
s64 cnt = count;
pgoff_t index, end_index;
@@ -90,7 +90,8 @@ int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit,
/* If the first byte is partial, modify the appropriate bits in it. */
if (bit) {
u8 *byte = kaddr + pos;
- while ((bit & 7) && cnt--) {
+ while ((bit & 7) && cnt) {
+ cnt--;
if (value)
*byte |= 1 << bit++;
else
@@ -171,7 +172,7 @@ rollback:
return PTR_ERR(page);
if (count != cnt)
pos = __ntfs_bitmap_set_bits_in_run(vi, start_bit, count - cnt,
- value ? 0 : 1, TRUE);
+ value ? 0 : 1, true);
else
pos = 0;
if (!pos) {
diff --git a/fs/ntfs/bitmap.h b/fs/ntfs/bitmap.h
index bb50d6bc921..72c9ad8be70 100644
--- a/fs/ntfs/bitmap.h
+++ b/fs/ntfs/bitmap.h
@@ -30,7 +30,7 @@
#include "types.h"
extern int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit,
- const s64 count, const u8 value, const BOOL is_rollback);
+ const s64 count, const u8 value, const bool is_rollback);
/**
* ntfs_bitmap_set_bits_in_run - set a run of bits in a bitmap to a value
@@ -48,7 +48,7 @@ static inline int ntfs_bitmap_set_bits_in_run(struct inode *vi,
const s64 start_bit, const s64 count, const u8 value)
{
return __ntfs_bitmap_set_bits_in_run(vi, start_bit, count, value,
- FALSE);
+ false);
}
/**
diff --git a/fs/ntfs/collate.h b/fs/ntfs/collate.h
index e027f36fcc2..aba83347e5f 100644
--- a/fs/ntfs/collate.h
+++ b/fs/ntfs/collate.h
@@ -26,7 +26,7 @@
#include "types.h"
#include "volume.h"
-static inline BOOL ntfs_is_collation_rule_supported(COLLATION_RULE cr) {
+static inline bool ntfs_is_collation_rule_supported(COLLATION_RULE cr) {
int i;
/*
@@ -35,12 +35,12 @@ static inline BOOL ntfs_is_collation_rule_supported(COLLATION_RULE cr) {
* now.
*/
if (unlikely(cr != COLLATION_BINARY && cr != COLLATION_NTOFS_ULONG))
- return FALSE;
+ return false;
i = le32_to_cpu(cr);
if (likely(((i >= 0) && (i <= 0x02)) ||
((i >= 0x10) && (i <= 0x13))))
- return TRUE;
- return FALSE;
+ return true;
+ return false;
}
extern int ntfs_collate(ntfs_volume *vol, COLLATION_RULE cr,
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 6d265cfd49a..f82498c35e7 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -25,6 +25,7 @@
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
#include <linux/vmalloc.h>
+#include <linux/slab.h>
#include "attrib.h"
#include "inode.h"
@@ -57,7 +58,7 @@ typedef enum {
/**
* ntfs_compression_buffer - one buffer for the decompression engine
*/
-static u8 *ntfs_compression_buffer = NULL;
+static u8 *ntfs_compression_buffer;
/**
* ntfs_cb_lock - spinlock which protects ntfs_compression_buffer
@@ -67,7 +68,7 @@ static DEFINE_SPINLOCK(ntfs_cb_lock);
/**
* allocate_compression_buffers - allocate the decompression buffers
*
- * Caller has to hold the ntfs_lock semaphore.
+ * Caller has to hold the ntfs_lock mutex.
*
* Return 0 on success or -ENOMEM if the allocations failed.
*/
@@ -84,7 +85,7 @@ int allocate_compression_buffers(void)
/**
* free_compression_buffers - free the decompression buffers
*
- * Caller has to hold the ntfs_lock semaphore.
+ * Caller has to hold the ntfs_lock mutex.
*/
void free_compression_buffers(void)
{
@@ -500,7 +501,7 @@ int ntfs_read_compressed_block(struct page *page)
VCN start_vcn = (((s64)index << PAGE_CACHE_SHIFT) & ~cb_size_mask) >>
vol->cluster_size_bits;
/*
- * The first vcn after the last wanted vcn (minumum alignment is again
+ * The first vcn after the last wanted vcn (minimum alignment is again
* PAGE_CACHE_SIZE.
*/
VCN end_vcn = ((((s64)(index + 1UL) << PAGE_CACHE_SHIFT) + cb_size - 1)
@@ -539,7 +540,6 @@ int ntfs_read_compressed_block(struct page *page)
if (unlikely(!pages || !bhs)) {
kfree(bhs);
kfree(pages);
- SetPageError(page);
unlock_page(page);
ntfs_error(vol->sb, "Failed to allocate internal buffers.");
return -ENOMEM;
@@ -562,6 +562,16 @@ int ntfs_read_compressed_block(struct page *page)
read_unlock_irqrestore(&ni->size_lock, flags);
max_page = ((i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
offset;
+ /* Is the page fully outside i_size? (truncate in progress) */
+ if (xpage >= max_page) {
+ kfree(bhs);
+ kfree(pages);
+ zero_user(page, 0, PAGE_CACHE_SIZE);
+ ntfs_debug("Compressed read outside i_size - truncated?");
+ SetPageUptodate(page);
+ unlock_page(page);
+ return 0;
+ }
if (nr_pages < max_page)
max_page = nr_pages;
for (i = 0; i < max_page; i++, offset++) {
@@ -601,7 +611,7 @@ do_next_cb:
rl = NULL;
for (vcn = start_vcn, start_vcn += cb_clusters; vcn < start_vcn;
vcn++) {
- BOOL is_retry = FALSE;
+ bool is_retry = false;
if (!rl) {
lock_retry_remap:
@@ -627,7 +637,7 @@ lock_retry_remap:
break;
if (is_retry || lcn != LCN_RL_NOT_MAPPED)
goto rl_err;
- is_retry = TRUE;
+ is_retry = true;
/*
* Attempt to map runlist, dropping lock for the
* duration.
@@ -656,7 +666,7 @@ lock_retry_remap:
for (i = 0; i < nr_bhs; i++) {
struct buffer_head *tbh = bhs[i];
- if (unlikely(test_set_buffer_locked(tbh)))
+ if (!trylock_buffer(tbh))
continue;
if (unlikely(buffer_uptodate(tbh))) {
unlock_buffer(tbh);
@@ -688,8 +698,7 @@ lock_retry_remap:
"uptodate! Unplugging the disk queue "
"and rescheduling.");
get_bh(tbh);
- blk_run_address_space(mapping);
- schedule();
+ io_schedule();
put_bh(tbh);
if (unlikely(!buffer_uptodate(tbh)))
goto read_err;
@@ -871,9 +880,6 @@ lock_retry_remap:
for (; prev_cur_page < cur_page; prev_cur_page++) {
page = pages[prev_cur_page];
if (page) {
- if (prev_cur_page == xpage &&
- !xpage_done)
- SetPageError(page);
flush_dcache_page(page);
kunmap(page);
unlock_page(page);
@@ -904,8 +910,6 @@ lock_retry_remap:
"Terminating them with extreme "
"prejudice. Inode 0x%lx, page index "
"0x%lx.", ni->mft_no, page->index);
- if (cur_page == xpage && !xpage_done)
- SetPageError(page);
flush_dcache_page(page);
kunmap(page);
unlock_page(page);
@@ -923,7 +927,7 @@ lock_retry_remap:
return 0;
ntfs_debug("Failed. Returning error code %s.", err == -EOVERFLOW ?
- "EOVERFLOW" : (!err ? "EIO" : "unkown error"));
+ "EOVERFLOW" : (!err ? "EIO" : "unknown error"));
return err < 0 ? err : -EIO;
read_err:
@@ -953,8 +957,6 @@ err_out:
for (i = cur_page; i < max_page; i++) {
page = pages[i];
if (page) {
- if (i == xpage && !xpage_done)
- SetPageError(page);
flush_dcache_page(page);
kunmap(page);
unlock_page(page);
diff --git a/fs/ntfs/debug.c b/fs/ntfs/debug.c
index 807150e2c2b..dd6103cc93c 100644
--- a/fs/ntfs/debug.c
+++ b/fs/ntfs/debug.c
@@ -18,16 +18,9 @@
* distribution in the file COPYING); if not, write to the Free Software
* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include "debug.h"
-/*
- * A static buffer to hold the error string being displayed and a spinlock
- * to protect concurrent accesses to it.
- */
-static char err_buf[1024];
-static DEFINE_SPINLOCK(err_buf_lock);
-
/**
* __ntfs_warning - output a warning to the syslog
* @function: name of function outputting the warning
@@ -50,6 +43,7 @@ static DEFINE_SPINLOCK(err_buf_lock);
void __ntfs_warning(const char *function, const struct super_block *sb,
const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
int flen = 0;
@@ -59,17 +53,15 @@ void __ntfs_warning(const char *function, const struct super_block *sb,
#endif
if (function)
flen = strlen(function);
- spin_lock(&err_buf_lock);
va_start(args, fmt);
- vsnprintf(err_buf, sizeof(err_buf), fmt, args);
- va_end(args);
+ vaf.fmt = fmt;
+ vaf.va = &args;
if (sb)
- printk(KERN_ERR "NTFS-fs warning (device %s): %s(): %s\n",
- sb->s_id, flen ? function : "", err_buf);
+ pr_warn("(device %s): %s(): %pV\n",
+ sb->s_id, flen ? function : "", &vaf);
else
- printk(KERN_ERR "NTFS-fs warning: %s(): %s\n",
- flen ? function : "", err_buf);
- spin_unlock(&err_buf_lock);
+ pr_warn("%s(): %pV\n", flen ? function : "", &vaf);
+ va_end(args);
}
/**
@@ -94,6 +86,7 @@ void __ntfs_warning(const char *function, const struct super_block *sb,
void __ntfs_error(const char *function, const struct super_block *sb,
const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
int flen = 0;
@@ -103,17 +96,15 @@ void __ntfs_error(const char *function, const struct super_block *sb,
#endif
if (function)
flen = strlen(function);
- spin_lock(&err_buf_lock);
va_start(args, fmt);
- vsnprintf(err_buf, sizeof(err_buf), fmt, args);
- va_end(args);
+ vaf.fmt = fmt;
+ vaf.va = &args;
if (sb)
- printk(KERN_ERR "NTFS-fs error (device %s): %s(): %s\n",
- sb->s_id, flen ? function : "", err_buf);
+ pr_err("(device %s): %s(): %pV\n",
+ sb->s_id, flen ? function : "", &vaf);
else
- printk(KERN_ERR "NTFS-fs error: %s(): %s\n",
- flen ? function : "", err_buf);
- spin_unlock(&err_buf_lock);
+ pr_err("%s(): %pV\n", flen ? function : "", &vaf);
+ va_end(args);
}
#ifdef DEBUG
@@ -124,6 +115,7 @@ int debug_msgs = 0;
void __ntfs_debug (const char *file, int line, const char *function,
const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
int flen = 0;
@@ -131,13 +123,11 @@ void __ntfs_debug (const char *file, int line, const char *function,
return;
if (function)
flen = strlen(function);
- spin_lock(&err_buf_lock);
va_start(args, fmt);
- vsnprintf(err_buf, sizeof(err_buf), fmt, args);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ pr_debug("(%s, %d): %s(): %pV", file, line, flen ? function : "", &vaf);
va_end(args);
- printk(KERN_DEBUG "NTFS-fs DEBUG (%s, %d): %s(): %s\n", file, line,
- flen ? function : "", err_buf);
- spin_unlock(&err_buf_lock);
}
/* Dump a runlist. Caller has to provide synchronisation for @rl. */
@@ -149,12 +139,12 @@ void ntfs_debug_dump_runlist(const runlist_element *rl)
if (!debug_msgs)
return;
- printk(KERN_DEBUG "NTFS-fs DEBUG: Dumping runlist (values in hex):\n");
+ pr_debug("Dumping runlist (values in hex):\n");
if (!rl) {
- printk(KERN_DEBUG "Run list not present.\n");
+ pr_debug("Run list not present.\n");
return;
}
- printk(KERN_DEBUG "VCN LCN Run length\n");
+ pr_debug("VCN LCN Run length\n");
for (i = 0; ; i++) {
LCN lcn = (rl + i)->lcn;
@@ -163,13 +153,13 @@ void ntfs_debug_dump_runlist(const runlist_element *rl)
if (index > -LCN_ENOENT - 1)
index = 3;
- printk(KERN_DEBUG "%-16Lx %s %-16Lx%s\n",
+ pr_debug("%-16Lx %s %-16Lx%s\n",
(long long)(rl + i)->vcn, lcn_str[index],
(long long)(rl + i)->length,
(rl + i)->length ? "" :
" (runlist end)");
} else
- printk(KERN_DEBUG "%-16Lx %-16Lx %-16Lx%s\n",
+ pr_debug("%-16Lx %-16Lx %-16Lx%s\n",
(long long)(rl + i)->vcn,
(long long)(rl + i)->lcn,
(long long)(rl + i)->length,
diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h
index 8ac37c33d12..61bf091e32a 100644
--- a/fs/ntfs/debug.h
+++ b/fs/ntfs/debug.h
@@ -30,7 +30,9 @@
extern int debug_msgs;
-#if 0 /* Fool kernel-doc since it doesn't do macros yet */
+extern __printf(4, 5)
+void __ntfs_debug(const char *file, int line, const char *function,
+ const char *format, ...);
/**
* ntfs_debug - write a debug level message to syslog
* @f: a printf format string containing the message
@@ -39,29 +41,31 @@ extern int debug_msgs;
* ntfs_debug() writes a DEBUG level message to the syslog but only if the
* driver was compiled with -DDEBUG. Otherwise, the call turns into a NOP.
*/
-static void ntfs_debug(const char *f, ...);
-#endif
-
-extern void __ntfs_debug (const char *file, int line, const char *function,
- const char *format, ...) __attribute__ ((format (printf, 4, 5)));
#define ntfs_debug(f, a...) \
- __ntfs_debug(__FILE__, __LINE__, __FUNCTION__, f, ##a)
+ __ntfs_debug(__FILE__, __LINE__, __func__, f, ##a)
extern void ntfs_debug_dump_runlist(const runlist_element *rl);
#else /* !DEBUG */
-#define ntfs_debug(f, a...) do {} while (0)
+#define ntfs_debug(fmt, ...) \
+do { \
+ if (0) \
+ no_printk(fmt, ##__VA_ARGS__); \
+} while (0)
+
#define ntfs_debug_dump_runlist(rl) do {} while (0)
#endif /* !DEBUG */
-extern void __ntfs_warning(const char *function, const struct super_block *sb,
- const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
-#define ntfs_warning(sb, f, a...) __ntfs_warning(__FUNCTION__, sb, f, ##a)
+extern __printf(3, 4)
+void __ntfs_warning(const char *function, const struct super_block *sb,
+ const char *fmt, ...);
+#define ntfs_warning(sb, f, a...) __ntfs_warning(__func__, sb, f, ##a)
-extern void __ntfs_error(const char *function, const struct super_block *sb,
- const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
-#define ntfs_error(sb, f, a...) __ntfs_error(__FUNCTION__, sb, f, ##a)
+extern __printf(3, 4)
+void __ntfs_error(const char *function, const struct super_block *sb,
+ const char *fmt, ...);
+#define ntfs_error(sb, f, a...) __ntfs_error(__func__, sb, f, ##a)
#endif /* _LINUX_NTFS_DEBUG_H */
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 46779471c54..9e38dafa3bc 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1,7 +1,7 @@
/**
* dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2005 Anton Altaparmakov
+ * Copyright (c) 2001-2007 Anton Altaparmakov
* Copyright (c) 2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -20,8 +20,8 @@
* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
+#include <linux/slab.h>
#include "dir.h"
#include "aops.h"
@@ -33,8 +33,8 @@
/**
* The little endian Unicode string $I30 as a global constant.
*/
-ntfschar I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'),
- const_cpu_to_le16('3'), const_cpu_to_le16('0'), 0 };
+ntfschar I30[5] = { cpu_to_le16('$'), cpu_to_le16('I'),
+ cpu_to_le16('3'), cpu_to_le16('0'), 0 };
/**
* ntfs_lookup_inode_by_name - find an inode in a directory given its name
@@ -69,7 +69,7 @@ ntfschar I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'),
* work but we don't care for how quickly one can access them. This also fixes
* the dcache aliasing issues.
*
- * Locking: - Caller must hold i_sem on the directory.
+ * Locking: - Caller must hold i_mutex on the directory.
* - Each page cache page in the index allocation mapping must be
* locked whilst being accessed otherwise we may find a corrupt
* page due to it being under ->writepage at the moment which
@@ -1004,13 +1004,11 @@ dir_err_out:
/**
* ntfs_filldir - ntfs specific filldir method
* @vol: current ntfs volume
- * @fpos: position in the directory
* @ndir: ntfs inode of current directory
* @ia_page: page in which the index allocation buffer @ie is in resides
* @ie: current index entry
* @name: buffer to use for the converted name
- * @dirent: vfs filldir callback context
- * @filldir: vfs filldir callback
+ * @actor: what to feed the entries to
*
* Convert the Unicode @name to the loaded NLS and pass it to the @filldir
* callback.
@@ -1024,12 +1022,12 @@ dir_err_out:
* retake the lock if we are returning a non-zero value as ntfs_readdir()
* would need to drop the lock immediately anyway.
*/
-static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos,
+static inline int ntfs_filldir(ntfs_volume *vol,
ntfs_inode *ndir, struct page *ia_page, INDEX_ENTRY *ie,
- u8 *name, void *dirent, filldir_t filldir)
+ u8 *name, struct dir_context *actor)
{
unsigned long mref;
- int name_len, rc;
+ int name_len;
unsigned dt_type;
FILE_NAME_TYPE_FLAGS name_type;
@@ -1051,7 +1049,8 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos,
ie->key.file_name.file_name_length, &name,
NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1);
if (name_len <= 0) {
- ntfs_debug("Skipping unrepresentable file.");
+ ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.",
+ (long long)MREF_LE(ie->data.dir.indexed_file));
return 0;
}
if (ie->key.file_name.file_attributes &
@@ -1067,13 +1066,14 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos,
if (ia_page)
unlock_page(ia_page);
ntfs_debug("Calling filldir for %s with len %i, fpos 0x%llx, inode "
- "0x%lx, DT_%s.", name, name_len, fpos, mref,
+ "0x%lx, DT_%s.", name, name_len, actor->pos, mref,
dt_type == DT_DIR ? "DIR" : "REG");
- rc = filldir(dirent, name, name_len, fpos, mref, dt_type);
+ if (!dir_emit(actor, name, name_len, mref, dt_type))
+ return 1;
/* Relock the page but not if we are aborting ->readdir. */
- if (!rc && ia_page)
+ if (ia_page)
lock_page(ia_page);
- return rc;
+ return 0;
}
/*
@@ -1084,11 +1084,11 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos,
* While this will return the names in random order this doesn't matter for
* ->readdir but OTOH results in a faster ->readdir.
*
- * VFS calls ->readdir without BKL but with i_sem held. This protects the VFS
+ * VFS calls ->readdir without BKL but with i_mutex held. This protects the VFS
* parts (e.g. ->f_pos and ->i_size, and it also protects against directory
* modifications).
*
- * Locking: - Caller must hold i_sem on the directory.
+ * Locking: - Caller must hold i_mutex on the directory.
* - Each page cache page in the index allocation mapping must be
* locked whilst being accessed otherwise we may find a corrupt
* page due to it being under ->writepage at the moment which
@@ -1096,11 +1096,11 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos,
* removes them again after the write is complete after which it
* unlocks the page.
*/
-static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
+static int ntfs_readdir(struct file *file, struct dir_context *actor)
{
s64 ia_pos, ia_start, prev_ia_pos, bmp_pos;
- loff_t fpos, i_size;
- struct inode *bmp_vi, *vdir = filp->f_dentry->d_inode;
+ loff_t i_size;
+ struct inode *bmp_vi, *vdir = file_inode(file);
struct super_block *sb = vdir->i_sb;
ntfs_inode *ndir = NTFS_I(vdir);
ntfs_volume *vol = NTFS_SB(sb);
@@ -1115,47 +1115,29 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
u8 *kaddr, *bmp, *index_end;
ntfs_attr_search_ctx *ctx;
- fpos = filp->f_pos;
ntfs_debug("Entering for inode 0x%lx, fpos 0x%llx.",
- vdir->i_ino, fpos);
+ vdir->i_ino, actor->pos);
rc = err = 0;
/* Are we at end of dir yet? */
i_size = i_size_read(vdir);
- if (fpos >= i_size + vol->mft_record_size)
- goto done;
+ if (actor->pos >= i_size + vol->mft_record_size)
+ return 0;
/* Emulate . and .. for all directories. */
- if (!fpos) {
- ntfs_debug("Calling filldir for . with len 1, fpos 0x0, "
- "inode 0x%lx, DT_DIR.", vdir->i_ino);
- rc = filldir(dirent, ".", 1, fpos, vdir->i_ino, DT_DIR);
- if (rc)
- goto done;
- fpos++;
- }
- if (fpos == 1) {
- ntfs_debug("Calling filldir for .. with len 2, fpos 0x1, "
- "inode 0x%lx, DT_DIR.",
- parent_ino(filp->f_dentry));
- rc = filldir(dirent, "..", 2, fpos,
- parent_ino(filp->f_dentry), DT_DIR);
- if (rc)
- goto done;
- fpos++;
- }
+ if (!dir_emit_dots(file, actor))
+ return 0;
m = NULL;
ctx = NULL;
/*
* Allocate a buffer to store the current name being processed
* converted to format determined by current NLS.
*/
- name = (u8*)kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1,
- GFP_NOFS);
+ name = kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, GFP_NOFS);
if (unlikely(!name)) {
err = -ENOMEM;
goto err_out;
}
/* Are we jumping straight into the index allocation attribute? */
- if (fpos >= vol->mft_record_size)
+ if (actor->pos >= vol->mft_record_size)
goto skip_index_root;
/* Get hold of the mft record for the directory. */
m = map_mft_record(ndir);
@@ -1170,7 +1152,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
goto err_out;
}
/* Get the offset into the index root attribute. */
- ir_pos = (s64)fpos;
+ ir_pos = (s64)actor->pos;
/* Find the index root attribute in the mft record. */
err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
0, ctx);
@@ -1190,7 +1172,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
* map the mft record without deadlocking.
*/
rc = le32_to_cpu(ctx->attr->data.resident.value_length);
- ir = (INDEX_ROOT*)kmalloc(rc, GFP_NOFS);
+ ir = kmalloc(rc, GFP_NOFS);
if (unlikely(!ir)) {
err = -ENOMEM;
goto err_out;
@@ -1226,10 +1208,9 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
if (ir_pos > (u8*)ie - (u8*)ir)
continue;
/* Advance the position even if going to skip the entry. */
- fpos = (u8*)ie - (u8*)ir;
+ actor->pos = (u8*)ie - (u8*)ir;
/* Submit the name to the filldir callback. */
- rc = ntfs_filldir(vol, fpos, ndir, NULL, ie, name, dirent,
- filldir);
+ rc = ntfs_filldir(vol, ndir, NULL, ie, name, actor);
if (rc) {
kfree(ir);
goto abort;
@@ -1242,23 +1223,19 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
if (!NInoIndexAllocPresent(ndir))
goto EOD;
/* Advance fpos to the beginning of the index allocation. */
- fpos = vol->mft_record_size;
+ actor->pos = vol->mft_record_size;
skip_index_root:
kaddr = NULL;
prev_ia_pos = -1LL;
/* Get the offset into the index allocation attribute. */
- ia_pos = (s64)fpos - vol->mft_record_size;
+ ia_pos = (s64)actor->pos - vol->mft_record_size;
ia_mapping = vdir->i_mapping;
- bmp_vi = ndir->itype.index.bmp_ino;
- if (unlikely(!bmp_vi)) {
- ntfs_debug("Inode 0x%lx, regetting index bitmap.", vdir->i_ino);
- bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4);
- if (IS_ERR(bmp_vi)) {
- ntfs_error(sb, "Failed to get bitmap attribute.");
- err = PTR_ERR(bmp_vi);
- goto err_out;
- }
- ndir->itype.index.bmp_ino = bmp_vi;
+ ntfs_debug("Inode 0x%lx, getting index bitmap.", vdir->i_ino);
+ bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4);
+ if (IS_ERR(bmp_vi)) {
+ ntfs_error(sb, "Failed to get bitmap attribute.");
+ err = PTR_ERR(bmp_vi);
+ goto err_out;
}
bmp_mapping = bmp_vi->i_mapping;
/* Get the starting bitmap bit position and sanity check it. */
@@ -1266,7 +1243,7 @@ skip_index_root:
if (unlikely(bmp_pos >> 3 >= i_size_read(bmp_vi))) {
ntfs_error(sb, "Current index allocation position exceeds "
"index bitmap size.");
- goto err_out;
+ goto iput_err_out;
}
/* Get the starting bit position in the current bitmap page. */
cur_bmp_pos = bmp_pos & ((PAGE_CACHE_SIZE * 8) - 1);
@@ -1282,7 +1259,7 @@ get_next_bmp_page:
ntfs_error(sb, "Reading index bitmap failed.");
err = PTR_ERR(bmp_page);
bmp_page = NULL;
- goto err_out;
+ goto iput_err_out;
}
bmp = (u8*)page_address(bmp_page);
/* Find next index block in use. */
@@ -1413,7 +1390,7 @@ find_next_index_buffer:
if (ia_pos - ia_start > (u8*)ie - (u8*)ia)
continue;
/* Advance the position even if going to skip the entry. */
- fpos = (u8*)ie - (u8*)ia +
+ actor->pos = (u8*)ie - (u8*)ia +
(sle64_to_cpu(ia->index_block_vcn) <<
ndir->itype.index.vcn_size_bits) +
vol->mft_record_size;
@@ -1423,12 +1400,12 @@ find_next_index_buffer:
* before returning, unless a non-zero value is returned in
* which case the page is left unlocked.
*/
- rc = ntfs_filldir(vol, fpos, ndir, ia_page, ie, name, dirent,
- filldir);
+ rc = ntfs_filldir(vol, ndir, ia_page, ie, name, actor);
if (rc) {
/* @ia_page is already unlocked in this case. */
ntfs_unmap_page(ia_page);
ntfs_unmap_page(bmp_page);
+ iput(bmp_vi);
goto abort;
}
}
@@ -1439,24 +1416,19 @@ unm_EOD:
ntfs_unmap_page(ia_page);
}
ntfs_unmap_page(bmp_page);
+ iput(bmp_vi);
EOD:
/* We are finished, set fpos to EOD. */
- fpos = i_size + vol->mft_record_size;
+ actor->pos = i_size + vol->mft_record_size;
abort:
kfree(name);
-done:
-#ifdef DEBUG
- if (!rc)
- ntfs_debug("EOD, fpos 0x%llx, returning 0.", fpos);
- else
- ntfs_debug("filldir returned %i, fpos 0x%llx, returning 0.",
- rc, fpos);
-#endif
- filp->f_pos = fpos;
return 0;
err_out:
- if (bmp_page)
+ if (bmp_page) {
ntfs_unmap_page(bmp_page);
+iput_err_out:
+ iput(bmp_vi);
+ }
if (ia_page) {
unlock_page(ia_page);
ntfs_unmap_page(ia_page);
@@ -1470,7 +1442,6 @@ err_out:
if (!err)
err = -EIO;
ntfs_debug("Failed. Returning error code %i.", -err);
- filp->f_pos = fpos;
return err;
}
@@ -1519,25 +1490,39 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp)
* Note: In the past @filp could be NULL so we ignore it as we don't need it
* anyway.
*
- * Locking: Caller must hold i_sem on the inode.
+ * Locking: Caller must hold i_mutex on the inode.
*
* TODO: We should probably also write all attribute/index inodes associated
* with this inode but since we have no simple way of getting to them we ignore
* this problem for now. We do write the $BITMAP attribute if it is present
* which is the important one for a directory so things are not too bad.
*/
-static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry,
- int datasync)
+static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
- struct inode *vi = dentry->d_inode;
- ntfs_inode *ni = NTFS_I(vi);
+ struct inode *bmp_vi, *vi = filp->f_mapping->host;
int err, ret;
+ ntfs_attr na;
ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
+
+ err = filemap_write_and_wait_range(vi->i_mapping, start, end);
+ if (err)
+ return err;
+ mutex_lock(&vi->i_mutex);
+
BUG_ON(!S_ISDIR(vi->i_mode));
- if (NInoIndexAllocPresent(ni) && ni->itype.index.bmp_ino)
- write_inode_now(ni->itype.index.bmp_ino, !datasync);
- ret = ntfs_write_inode(vi, 1);
+ /* If the bitmap attribute inode is in memory sync it, too. */
+ na.mft_no = vi->i_ino;
+ na.type = AT_BITMAP;
+ na.name = I30;
+ na.name_len = 4;
+ bmp_vi = ilookup5(vi->i_sb, vi->i_ino, (test_t)ntfs_test_inode, &na);
+ if (bmp_vi) {
+ write_inode_now(bmp_vi, !datasync);
+ iput(bmp_vi);
+ }
+ ret = __ntfs_write_inode(vi, 1);
write_inode_now(vi, !datasync);
err = sync_blockdev(vi->i_sb->s_bdev);
if (unlikely(err && !ret))
@@ -1547,15 +1532,16 @@ static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry,
else
ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error "
"%u.", datasync ? "data" : "", vi->i_ino, -ret);
+ mutex_unlock(&vi->i_mutex);
return ret;
}
#endif /* NTFS_RW */
-struct file_operations ntfs_dir_ops = {
+const struct file_operations ntfs_dir_ops = {
.llseek = generic_file_llseek, /* Seek inside directory. */
.read = generic_read_dir, /* Return -EISDIR. */
- .readdir = ntfs_readdir, /* Read directory contents. */
+ .iterate = ntfs_readdir, /* Read directory contents. */
#ifdef NTFS_RW
.fsync = ntfs_dir_fsync, /* Sync a directory to disk. */
/*.aio_fsync = ,*/ /* Sync all outstanding async
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index e0f530ce6b9..5c9e2c81cb1 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1,7 +1,7 @@
/*
- * file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
+ * file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc.
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
@@ -19,11 +19,26 @@
* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#include <linux/pagemap.h>
#include <linux/buffer_head.h>
+#include <linux/gfp.h>
+#include <linux/pagemap.h>
+#include <linux/pagevec.h>
+#include <linux/sched.h>
+#include <linux/swap.h>
+#include <linux/uio.h>
+#include <linux/writeback.h>
+#include <linux/aio.h>
+
+#include <asm/page.h>
+#include <asm/uaccess.h>
+#include "attrib.h"
+#include "bitmap.h"
#include "inode.h"
#include "debug.h"
+#include "lcnalloc.h"
+#include "malloc.h"
+#include "mft.h"
#include "ntfs.h"
/**
@@ -48,7 +63,7 @@ static int ntfs_file_open(struct inode *vi, struct file *filp)
{
if (sizeof(unsigned long) < 8) {
if (i_size_read(vi) > MAX_LFS_FILESIZE)
- return -EFBIG;
+ return -EOVERFLOW;
}
return generic_file_open(vi, filp);
}
@@ -56,9 +71,2075 @@ static int ntfs_file_open(struct inode *vi, struct file *filp)
#ifdef NTFS_RW
/**
+ * ntfs_attr_extend_initialized - extend the initialized size of an attribute
+ * @ni: ntfs inode of the attribute to extend
+ * @new_init_size: requested new initialized size in bytes
+ * @cached_page: store any allocated but unused page here
+ * @lru_pvec: lru-buffering pagevec of the caller
+ *
+ * Extend the initialized size of an attribute described by the ntfs inode @ni
+ * to @new_init_size bytes. This involves zeroing any non-sparse space between
+ * the old initialized size and @new_init_size both in the page cache and on
+ * disk (if relevant complete pages are already uptodate in the page cache then
+ * these are simply marked dirty).
+ *
+ * As a side-effect, the file size (vfs inode->i_size) may be incremented as,
+ * in the resident attribute case, it is tied to the initialized size and, in
+ * the non-resident attribute case, it may not fall below the initialized size.
+ *
+ * Note that if the attribute is resident, we do not need to touch the page
+ * cache at all. This is because if the page cache page is not uptodate we
+ * bring it uptodate later, when doing the write to the mft record since we
+ * then already have the page mapped. And if the page is uptodate, the
+ * non-initialized region will already have been zeroed when the page was
+ * brought uptodate and the region may in fact already have been overwritten
+ * with new data via mmap() based writes, so we cannot just zero it. And since
+ * POSIX specifies that the behaviour of resizing a file whilst it is mmap()ped
+ * is unspecified, we choose not to do zeroing and thus we do not need to touch
+ * the page at all. For a more detailed explanation see ntfs_truncate() in
+ * fs/ntfs/inode.c.
+ *
+ * Return 0 on success and -errno on error. In the case that an error is
+ * encountered it is possible that the initialized size will already have been
+ * incremented some way towards @new_init_size but it is guaranteed that if
+ * this is the case, the necessary zeroing will also have happened and that all
+ * metadata is self-consistent.
+ *
+ * Locking: i_mutex on the vfs inode corrseponsind to the ntfs inode @ni must be
+ * held by the caller.
+ */
+static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size)
+{
+ s64 old_init_size;
+ loff_t old_i_size;
+ pgoff_t index, end_index;
+ unsigned long flags;
+ struct inode *vi = VFS_I(ni);
+ ntfs_inode *base_ni;
+ MFT_RECORD *m = NULL;
+ ATTR_RECORD *a;
+ ntfs_attr_search_ctx *ctx = NULL;
+ struct address_space *mapping;
+ struct page *page = NULL;
+ u8 *kattr;
+ int err;
+ u32 attr_len;
+
+ read_lock_irqsave(&ni->size_lock, flags);
+ old_init_size = ni->initialized_size;
+ old_i_size = i_size_read(vi);
+ BUG_ON(new_init_size > ni->allocated_size);
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
+ "old_initialized_size 0x%llx, "
+ "new_initialized_size 0x%llx, i_size 0x%llx.",
+ vi->i_ino, (unsigned)le32_to_cpu(ni->type),
+ (unsigned long long)old_init_size,
+ (unsigned long long)new_init_size, old_i_size);
+ if (!NInoAttr(ni))
+ base_ni = ni;
+ else
+ base_ni = ni->ext.base_ntfs_ino;
+ /* Use goto to reduce indentation and we need the label below anyway. */
+ if (NInoNonResident(ni))
+ goto do_non_resident_extend;
+ BUG_ON(old_init_size != old_i_size);
+ m = map_mft_record(base_ni);
+ if (IS_ERR(m)) {
+ err = PTR_ERR(m);
+ m = NULL;
+ goto err_out;
+ }
+ ctx = ntfs_attr_get_search_ctx(base_ni, m);
+ if (unlikely(!ctx)) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+ CASE_SENSITIVE, 0, NULL, 0, ctx);
+ if (unlikely(err)) {
+ if (err == -ENOENT)
+ err = -EIO;
+ goto err_out;
+ }
+ m = ctx->mrec;
+ a = ctx->attr;
+ BUG_ON(a->non_resident);
+ /* The total length of the attribute value. */
+ attr_len = le32_to_cpu(a->data.resident.value_length);
+ BUG_ON(old_i_size != (loff_t)attr_len);
+ /*
+ * Do the zeroing in the mft record and update the attribute size in
+ * the mft record.
+ */
+ kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
+ memset(kattr + attr_len, 0, new_init_size - attr_len);
+ a->data.resident.value_length = cpu_to_le32((u32)new_init_size);
+ /* Finally, update the sizes in the vfs and ntfs inodes. */
+ write_lock_irqsave(&ni->size_lock, flags);
+ i_size_write(vi, new_init_size);
+ ni->initialized_size = new_init_size;
+ write_unlock_irqrestore(&ni->size_lock, flags);
+ goto done;
+do_non_resident_extend:
+ /*
+ * If the new initialized size @new_init_size exceeds the current file
+ * size (vfs inode->i_size), we need to extend the file size to the
+ * new initialized size.
+ */
+ if (new_init_size > old_i_size) {
+ m = map_mft_record(base_ni);
+ if (IS_ERR(m)) {
+ err = PTR_ERR(m);
+ m = NULL;
+ goto err_out;
+ }
+ ctx = ntfs_attr_get_search_ctx(base_ni, m);
+ if (unlikely(!ctx)) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+ CASE_SENSITIVE, 0, NULL, 0, ctx);
+ if (unlikely(err)) {
+ if (err == -ENOENT)
+ err = -EIO;
+ goto err_out;
+ }
+ m = ctx->mrec;
+ a = ctx->attr;
+ BUG_ON(!a->non_resident);
+ BUG_ON(old_i_size != (loff_t)
+ sle64_to_cpu(a->data.non_resident.data_size));
+ a->data.non_resident.data_size = cpu_to_sle64(new_init_size);
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ mark_mft_record_dirty(ctx->ntfs_ino);
+ /* Update the file size in the vfs inode. */
+ i_size_write(vi, new_init_size);
+ ntfs_attr_put_search_ctx(ctx);
+ ctx = NULL;
+ unmap_mft_record(base_ni);
+ m = NULL;
+ }
+ mapping = vi->i_mapping;
+ index = old_init_size >> PAGE_CACHE_SHIFT;
+ end_index = (new_init_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ do {
+ /*
+ * Read the page. If the page is not present, this will zero
+ * the uninitialized regions for us.
+ */
+ page = read_mapping_page(mapping, index, NULL);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto init_err_out;
+ }
+ if (unlikely(PageError(page))) {
+ page_cache_release(page);
+ err = -EIO;
+ goto init_err_out;
+ }
+ /*
+ * Update the initialized size in the ntfs inode. This is
+ * enough to make ntfs_writepage() work.
+ */
+ write_lock_irqsave(&ni->size_lock, flags);
+ ni->initialized_size = (s64)(index + 1) << PAGE_CACHE_SHIFT;
+ if (ni->initialized_size > new_init_size)
+ ni->initialized_size = new_init_size;
+ write_unlock_irqrestore(&ni->size_lock, flags);
+ /* Set the page dirty so it gets written out. */
+ set_page_dirty(page);
+ page_cache_release(page);
+ /*
+ * Play nice with the vm and the rest of the system. This is
+ * very much needed as we can potentially be modifying the
+ * initialised size from a very small value to a really huge
+ * value, e.g.
+ * f = open(somefile, O_TRUNC);
+ * truncate(f, 10GiB);
+ * seek(f, 10GiB);
+ * write(f, 1);
+ * And this would mean we would be marking dirty hundreds of
+ * thousands of pages or as in the above example more than
+ * two and a half million pages!
+ *
+ * TODO: For sparse pages could optimize this workload by using
+ * the FsMisc / MiscFs page bit as a "PageIsSparse" bit. This
+ * would be set in readpage for sparse pages and here we would
+ * not need to mark dirty any pages which have this bit set.
+ * The only caveat is that we have to clear the bit everywhere
+ * where we allocate any clusters that lie in the page or that
+ * contain the page.
+ *
+ * TODO: An even greater optimization would be for us to only
+ * call readpage() on pages which are not in sparse regions as
+ * determined from the runlist. This would greatly reduce the
+ * number of pages we read and make dirty in the case of sparse
+ * files.
+ */
+ balance_dirty_pages_ratelimited(mapping);
+ cond_resched();
+ } while (++index < end_index);
+ read_lock_irqsave(&ni->size_lock, flags);
+ BUG_ON(ni->initialized_size != new_init_size);
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ /* Now bring in sync the initialized_size in the mft record. */
+ m = map_mft_record(base_ni);
+ if (IS_ERR(m)) {
+ err = PTR_ERR(m);
+ m = NULL;
+ goto init_err_out;
+ }
+ ctx = ntfs_attr_get_search_ctx(base_ni, m);
+ if (unlikely(!ctx)) {
+ err = -ENOMEM;
+ goto init_err_out;
+ }
+ err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+ CASE_SENSITIVE, 0, NULL, 0, ctx);
+ if (unlikely(err)) {
+ if (err == -ENOENT)
+ err = -EIO;
+ goto init_err_out;
+ }
+ m = ctx->mrec;
+ a = ctx->attr;
+ BUG_ON(!a->non_resident);
+ a->data.non_resident.initialized_size = cpu_to_sle64(new_init_size);
+done:
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ mark_mft_record_dirty(ctx->ntfs_ino);
+ if (ctx)
+ ntfs_attr_put_search_ctx(ctx);
+ if (m)
+ unmap_mft_record(base_ni);
+ ntfs_debug("Done, initialized_size 0x%llx, i_size 0x%llx.",
+ (unsigned long long)new_init_size, i_size_read(vi));
+ return 0;
+init_err_out:
+ write_lock_irqsave(&ni->size_lock, flags);
+ ni->initialized_size = old_init_size;
+ write_unlock_irqrestore(&ni->size_lock, flags);
+err_out:
+ if (ctx)
+ ntfs_attr_put_search_ctx(ctx);
+ if (m)
+ unmap_mft_record(base_ni);
+ ntfs_debug("Failed. Returning error code %i.", err);
+ return err;
+}
+
+/**
+ * ntfs_fault_in_pages_readable -
+ *
+ * Fault a number of userspace pages into pagetables.
+ *
+ * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes
+ * with more than two userspace pages as well as handling the single page case
+ * elegantly.
+ *
+ * If you find this difficult to understand, then think of the while loop being
+ * the following code, except that we do without the integer variable ret:
+ *
+ * do {
+ * ret = __get_user(c, uaddr);
+ * uaddr += PAGE_SIZE;
+ * } while (!ret && uaddr < end);
+ *
+ * Note, the final __get_user() may well run out-of-bounds of the user buffer,
+ * but _not_ out-of-bounds of the page the user buffer belongs to, and since
+ * this is only a read and not a write, and since it is still in the same page,
+ * it should not matter and this makes the code much simpler.
+ */
+static inline void ntfs_fault_in_pages_readable(const char __user *uaddr,
+ int bytes)
+{
+ const char __user *end;
+ volatile char c;
+
+ /* Set @end to the first byte outside the last page we care about. */
+ end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes);
+
+ while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end))
+ ;
+}
+
+/**
+ * ntfs_fault_in_pages_readable_iovec -
+ *
+ * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs.
+ */
+static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
+ size_t iov_ofs, int bytes)
+{
+ do {
+ const char __user *buf;
+ unsigned len;
+
+ buf = iov->iov_base + iov_ofs;
+ len = iov->iov_len - iov_ofs;
+ if (len > bytes)
+ len = bytes;
+ ntfs_fault_in_pages_readable(buf, len);
+ bytes -= len;
+ iov++;
+ iov_ofs = 0;
+ } while (bytes);
+}
+
+/**
+ * __ntfs_grab_cache_pages - obtain a number of locked pages
+ * @mapping: address space mapping from which to obtain page cache pages
+ * @index: starting index in @mapping at which to begin obtaining pages
+ * @nr_pages: number of page cache pages to obtain
+ * @pages: array of pages in which to return the obtained page cache pages
+ * @cached_page: allocated but as yet unused page
+ * @lru_pvec: lru-buffering pagevec of caller
+ *
+ * Obtain @nr_pages locked page cache pages from the mapping @mapping and
+ * starting at index @index.
+ *
+ * If a page is newly created, add it to lru list
+ *
+ * Note, the page locks are obtained in ascending page index order.
+ */
+static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
+ pgoff_t index, const unsigned nr_pages, struct page **pages,
+ struct page **cached_page)
+{
+ int err, nr;
+
+ BUG_ON(!nr_pages);
+ err = nr = 0;
+ do {
+ pages[nr] = find_lock_page(mapping, index);
+ if (!pages[nr]) {
+ if (!*cached_page) {
+ *cached_page = page_cache_alloc(mapping);
+ if (unlikely(!*cached_page)) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ }
+ err = add_to_page_cache_lru(*cached_page, mapping, index,
+ GFP_KERNEL);
+ if (unlikely(err)) {
+ if (err == -EEXIST)
+ continue;
+ goto err_out;
+ }
+ pages[nr] = *cached_page;
+ *cached_page = NULL;
+ }
+ index++;
+ nr++;
+ } while (nr < nr_pages);
+out:
+ return err;
+err_out:
+ while (nr > 0) {
+ unlock_page(pages[--nr]);
+ page_cache_release(pages[nr]);
+ }
+ goto out;
+}
+
+static inline int ntfs_submit_bh_for_read(struct buffer_head *bh)
+{
+ lock_buffer(bh);
+ get_bh(bh);
+ bh->b_end_io = end_buffer_read_sync;
+ return submit_bh(READ, bh);
+}
+
+/**
+ * ntfs_prepare_pages_for_non_resident_write - prepare pages for receiving data
+ * @pages: array of destination pages
+ * @nr_pages: number of pages in @pages
+ * @pos: byte position in file at which the write begins
+ * @bytes: number of bytes to be written
+ *
+ * This is called for non-resident attributes from ntfs_file_buffered_write()
+ * with i_mutex held on the inode (@pages[0]->mapping->host). There are
+ * @nr_pages pages in @pages which are locked but not kmap()ped. The source
+ * data has not yet been copied into the @pages.
+ *
+ * Need to fill any holes with actual clusters, allocate buffers if necessary,
+ * ensure all the buffers are mapped, and bring uptodate any buffers that are
+ * only partially being written to.
+ *
+ * If @nr_pages is greater than one, we are guaranteed that the cluster size is
+ * greater than PAGE_CACHE_SIZE, that all pages in @pages are entirely inside
+ * the same cluster and that they are the entirety of that cluster, and that
+ * the cluster is sparse, i.e. we need to allocate a cluster to fill the hole.
+ *
+ * i_size is not to be modified yet.
+ *
+ * Return 0 on success or -errno on error.
+ */
+static int ntfs_prepare_pages_for_non_resident_write(struct page **pages,
+ unsigned nr_pages, s64 pos, size_t bytes)
+{
+ VCN vcn, highest_vcn = 0, cpos, cend, bh_cpos, bh_cend;
+ LCN lcn;
+ s64 bh_pos, vcn_len, end, initialized_size;
+ sector_t lcn_block;
+ struct page *page;
+ struct inode *vi;
+ ntfs_inode *ni, *base_ni = NULL;
+ ntfs_volume *vol;
+ runlist_element *rl, *rl2;
+ struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
+ ntfs_attr_search_ctx *ctx = NULL;
+ MFT_RECORD *m = NULL;
+ ATTR_RECORD *a = NULL;
+ unsigned long flags;
+ u32 attr_rec_len = 0;
+ unsigned blocksize, u;
+ int err, mp_size;
+ bool rl_write_locked, was_hole, is_retry;
+ unsigned char blocksize_bits;
+ struct {
+ u8 runlist_merged:1;
+ u8 mft_attr_mapped:1;
+ u8 mp_rebuilt:1;
+ u8 attr_switched:1;
+ } status = { 0, 0, 0, 0 };
+
+ BUG_ON(!nr_pages);
+ BUG_ON(!pages);
+ BUG_ON(!*pages);
+ vi = pages[0]->mapping->host;
+ ni = NTFS_I(vi);
+ vol = ni->vol;
+ ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page "
+ "index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.",
+ vi->i_ino, ni->type, pages[0]->index, nr_pages,
+ (long long)pos, bytes);
+ blocksize = vol->sb->s_blocksize;
+ blocksize_bits = vol->sb->s_blocksize_bits;
+ u = 0;
+ do {
+ page = pages[u];
+ BUG_ON(!page);
+ /*
+ * create_empty_buffers() will create uptodate/dirty buffers if
+ * the page is uptodate/dirty.
+ */
+ if (!page_has_buffers(page)) {
+ create_empty_buffers(page, blocksize, 0);
+ if (unlikely(!page_has_buffers(page)))
+ return -ENOMEM;
+ }
+ } while (++u < nr_pages);
+ rl_write_locked = false;
+ rl = NULL;
+ err = 0;
+ vcn = lcn = -1;
+ vcn_len = 0;
+ lcn_block = -1;
+ was_hole = false;
+ cpos = pos >> vol->cluster_size_bits;
+ end = pos + bytes;
+ cend = (end + vol->cluster_size - 1) >> vol->cluster_size_bits;
+ /*
+ * Loop over each page and for each page over each buffer. Use goto to
+ * reduce indentation.
+ */
+ u = 0;
+do_next_page:
+ page = pages[u];
+ bh_pos = (s64)page->index << PAGE_CACHE_SHIFT;
+ bh = head = page_buffers(page);
+ do {
+ VCN cdelta;
+ s64 bh_end;
+ unsigned bh_cofs;
+
+ /* Clear buffer_new on all buffers to reinitialise state. */
+ if (buffer_new(bh))
+ clear_buffer_new(bh);
+ bh_end = bh_pos + blocksize;
+ bh_cpos = bh_pos >> vol->cluster_size_bits;
+ bh_cofs = bh_pos & vol->cluster_size_mask;
+ if (buffer_mapped(bh)) {
+ /*
+ * The buffer is already mapped. If it is uptodate,
+ * ignore it.
+ */
+ if (buffer_uptodate(bh))
+ continue;
+ /*
+ * The buffer is not uptodate. If the page is uptodate
+ * set the buffer uptodate and otherwise ignore it.
+ */
+ if (PageUptodate(page)) {
+ set_buffer_uptodate(bh);
+ continue;
+ }
+ /*
+ * Neither the page nor the buffer are uptodate. If
+ * the buffer is only partially being written to, we
+ * need to read it in before the write, i.e. now.
+ */
+ if ((bh_pos < pos && bh_end > pos) ||
+ (bh_pos < end && bh_end > end)) {
+ /*
+ * If the buffer is fully or partially within
+ * the initialized size, do an actual read.
+ * Otherwise, simply zero the buffer.
+ */
+ read_lock_irqsave(&ni->size_lock, flags);
+ initialized_size = ni->initialized_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ if (bh_pos < initialized_size) {
+ ntfs_submit_bh_for_read(bh);
+ *wait_bh++ = bh;
+ } else {
+ zero_user(page, bh_offset(bh),
+ blocksize);
+ set_buffer_uptodate(bh);
+ }
+ }
+ continue;
+ }
+ /* Unmapped buffer. Need to map it. */
+ bh->b_bdev = vol->sb->s_bdev;
+ /*
+ * If the current buffer is in the same clusters as the map
+ * cache, there is no need to check the runlist again. The
+ * map cache is made up of @vcn, which is the first cached file
+ * cluster, @vcn_len which is the number of cached file
+ * clusters, @lcn is the device cluster corresponding to @vcn,
+ * and @lcn_block is the block number corresponding to @lcn.
+ */
+ cdelta = bh_cpos - vcn;
+ if (likely(!cdelta || (cdelta > 0 && cdelta < vcn_len))) {
+map_buffer_cached:
+ BUG_ON(lcn < 0);
+ bh->b_blocknr = lcn_block +
+ (cdelta << (vol->cluster_size_bits -
+ blocksize_bits)) +
+ (bh_cofs >> blocksize_bits);
+ set_buffer_mapped(bh);
+ /*
+ * If the page is uptodate so is the buffer. If the
+ * buffer is fully outside the write, we ignore it if
+ * it was already allocated and we mark it dirty so it
+ * gets written out if we allocated it. On the other
+ * hand, if we allocated the buffer but we are not
+ * marking it dirty we set buffer_new so we can do
+ * error recovery.
+ */
+ if (PageUptodate(page)) {
+ if (!buffer_uptodate(bh))
+ set_buffer_uptodate(bh);
+ if (unlikely(was_hole)) {
+ /* We allocated the buffer. */
+ unmap_underlying_metadata(bh->b_bdev,
+ bh->b_blocknr);
+ if (bh_end <= pos || bh_pos >= end)
+ mark_buffer_dirty(bh);
+ else
+ set_buffer_new(bh);
+ }
+ continue;
+ }
+ /* Page is _not_ uptodate. */
+ if (likely(!was_hole)) {
+ /*
+ * Buffer was already allocated. If it is not
+ * uptodate and is only partially being written
+ * to, we need to read it in before the write,
+ * i.e. now.
+ */
+ if (!buffer_uptodate(bh) && bh_pos < end &&
+ bh_end > pos &&
+ (bh_pos < pos ||
+ bh_end > end)) {
+ /*
+ * If the buffer is fully or partially
+ * within the initialized size, do an
+ * actual read. Otherwise, simply zero
+ * the buffer.
+ */
+ read_lock_irqsave(&ni->size_lock,
+ flags);
+ initialized_size = ni->initialized_size;
+ read_unlock_irqrestore(&ni->size_lock,
+ flags);
+ if (bh_pos < initialized_size) {
+ ntfs_submit_bh_for_read(bh);
+ *wait_bh++ = bh;
+ } else {
+ zero_user(page, bh_offset(bh),
+ blocksize);
+ set_buffer_uptodate(bh);
+ }
+ }
+ continue;
+ }
+ /* We allocated the buffer. */
+ unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+ /*
+ * If the buffer is fully outside the write, zero it,
+ * set it uptodate, and mark it dirty so it gets
+ * written out. If it is partially being written to,
+ * zero region surrounding the write but leave it to
+ * commit write to do anything else. Finally, if the
+ * buffer is fully being overwritten, do nothing.
+ */
+ if (bh_end <= pos || bh_pos >= end) {
+ if (!buffer_uptodate(bh)) {
+ zero_user(page, bh_offset(bh),
+ blocksize);
+ set_buffer_uptodate(bh);
+ }
+ mark_buffer_dirty(bh);
+ continue;
+ }
+ set_buffer_new(bh);
+ if (!buffer_uptodate(bh) &&
+ (bh_pos < pos || bh_end > end)) {
+ u8 *kaddr;
+ unsigned pofs;
+
+ kaddr = kmap_atomic(page);
+ if (bh_pos < pos) {
+ pofs = bh_pos & ~PAGE_CACHE_MASK;
+ memset(kaddr + pofs, 0, pos - bh_pos);
+ }
+ if (bh_end > end) {
+ pofs = end & ~PAGE_CACHE_MASK;
+ memset(kaddr + pofs, 0, bh_end - end);
+ }
+ kunmap_atomic(kaddr);
+ flush_dcache_page(page);
+ }
+ continue;
+ }
+ /*
+ * Slow path: this is the first buffer in the cluster. If it
+ * is outside allocated size and is not uptodate, zero it and
+ * set it uptodate.
+ */
+ read_lock_irqsave(&ni->size_lock, flags);
+ initialized_size = ni->allocated_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ if (bh_pos > initialized_size) {
+ if (PageUptodate(page)) {
+ if (!buffer_uptodate(bh))
+ set_buffer_uptodate(bh);
+ } else if (!buffer_uptodate(bh)) {
+ zero_user(page, bh_offset(bh), blocksize);
+ set_buffer_uptodate(bh);
+ }
+ continue;
+ }
+ is_retry = false;
+ if (!rl) {
+ down_read(&ni->runlist.lock);
+retry_remap:
+ rl = ni->runlist.rl;
+ }
+ if (likely(rl != NULL)) {
+ /* Seek to element containing target cluster. */
+ while (rl->length && rl[1].vcn <= bh_cpos)
+ rl++;
+ lcn = ntfs_rl_vcn_to_lcn(rl, bh_cpos);
+ if (likely(lcn >= 0)) {
+ /*
+ * Successful remap, setup the map cache and
+ * use that to deal with the buffer.
+ */
+ was_hole = false;
+ vcn = bh_cpos;
+ vcn_len = rl[1].vcn - vcn;
+ lcn_block = lcn << (vol->cluster_size_bits -
+ blocksize_bits);
+ cdelta = 0;
+ /*
+ * If the number of remaining clusters touched
+ * by the write is smaller or equal to the
+ * number of cached clusters, unlock the
+ * runlist as the map cache will be used from
+ * now on.
+ */
+ if (likely(vcn + vcn_len >= cend)) {
+ if (rl_write_locked) {
+ up_write(&ni->runlist.lock);
+ rl_write_locked = false;
+ } else
+ up_read(&ni->runlist.lock);
+ rl = NULL;
+ }
+ goto map_buffer_cached;
+ }
+ } else
+ lcn = LCN_RL_NOT_MAPPED;
+ /*
+ * If it is not a hole and not out of bounds, the runlist is
+ * probably unmapped so try to map it now.
+ */
+ if (unlikely(lcn != LCN_HOLE && lcn != LCN_ENOENT)) {
+ if (likely(!is_retry && lcn == LCN_RL_NOT_MAPPED)) {
+ /* Attempt to map runlist. */
+ if (!rl_write_locked) {
+ /*
+ * We need the runlist locked for
+ * writing, so if it is locked for
+ * reading relock it now and retry in
+ * case it changed whilst we dropped
+ * the lock.
+ */
+ up_read(&ni->runlist.lock);
+ down_write(&ni->runlist.lock);
+ rl_write_locked = true;
+ goto retry_remap;
+ }
+ err = ntfs_map_runlist_nolock(ni, bh_cpos,
+ NULL);
+ if (likely(!err)) {
+ is_retry = true;
+ goto retry_remap;
+ }
+ /*
+ * If @vcn is out of bounds, pretend @lcn is
+ * LCN_ENOENT. As long as the buffer is out
+ * of bounds this will work fine.
+ */
+ if (err == -ENOENT) {
+ lcn = LCN_ENOENT;
+ err = 0;
+ goto rl_not_mapped_enoent;
+ }
+ } else
+ err = -EIO;
+ /* Failed to map the buffer, even after retrying. */
+ bh->b_blocknr = -1;
+ ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
+ "attribute type 0x%x, vcn 0x%llx, "
+ "vcn offset 0x%x, because its "
+ "location on disk could not be "
+ "determined%s (error code %i).",
+ ni->mft_no, ni->type,
+ (unsigned long long)bh_cpos,
+ (unsigned)bh_pos &
+ vol->cluster_size_mask,
+ is_retry ? " even after retrying" : "",
+ err);
+ break;
+ }
+rl_not_mapped_enoent:
+ /*
+ * The buffer is in a hole or out of bounds. We need to fill
+ * the hole, unless the buffer is in a cluster which is not
+ * touched by the write, in which case we just leave the buffer
+ * unmapped. This can only happen when the cluster size is
+ * less than the page cache size.
+ */
+ if (unlikely(vol->cluster_size < PAGE_CACHE_SIZE)) {
+ bh_cend = (bh_end + vol->cluster_size - 1) >>
+ vol->cluster_size_bits;
+ if ((bh_cend <= cpos || bh_cpos >= cend)) {
+ bh->b_blocknr = -1;
+ /*
+ * If the buffer is uptodate we skip it. If it
+ * is not but the page is uptodate, we can set
+ * the buffer uptodate. If the page is not
+ * uptodate, we can clear the buffer and set it
+ * uptodate. Whether this is worthwhile is
+ * debatable and this could be removed.
+ */
+ if (PageUptodate(page)) {
+ if (!buffer_uptodate(bh))
+ set_buffer_uptodate(bh);
+ } else if (!buffer_uptodate(bh)) {
+ zero_user(page, bh_offset(bh),
+ blocksize);
+ set_buffer_uptodate(bh);
+ }
+ continue;
+ }
+ }
+ /*
+ * Out of bounds buffer is invalid if it was not really out of
+ * bounds.
+ */
+ BUG_ON(lcn != LCN_HOLE);
+ /*
+ * We need the runlist locked for writing, so if it is locked
+ * for reading relock it now and retry in case it changed
+ * whilst we dropped the lock.
+ */
+ BUG_ON(!rl);
+ if (!rl_write_locked) {
+ up_read(&ni->runlist.lock);
+ down_write(&ni->runlist.lock);
+ rl_write_locked = true;
+ goto retry_remap;
+ }
+ /* Find the previous last allocated cluster. */
+ BUG_ON(rl->lcn != LCN_HOLE);
+ lcn = -1;
+ rl2 = rl;
+ while (--rl2 >= ni->runlist.rl) {
+ if (rl2->lcn >= 0) {
+ lcn = rl2->lcn + rl2->length;
+ break;
+ }
+ }
+ rl2 = ntfs_cluster_alloc(vol, bh_cpos, 1, lcn, DATA_ZONE,
+ false);
+ if (IS_ERR(rl2)) {
+ err = PTR_ERR(rl2);
+ ntfs_debug("Failed to allocate cluster, error code %i.",
+ err);
+ break;
+ }
+ lcn = rl2->lcn;
+ rl = ntfs_runlists_merge(ni->runlist.rl, rl2);
+ if (IS_ERR(rl)) {
+ err = PTR_ERR(rl);
+ if (err != -ENOMEM)
+ err = -EIO;
+ if (ntfs_cluster_free_from_rl(vol, rl2)) {
+ ntfs_error(vol->sb, "Failed to release "
+ "allocated cluster in error "
+ "code path. Run chkdsk to "
+ "recover the lost cluster.");
+ NVolSetErrors(vol);
+ }
+ ntfs_free(rl2);
+ break;
+ }
+ ni->runlist.rl = rl;
+ status.runlist_merged = 1;
+ ntfs_debug("Allocated cluster, lcn 0x%llx.",
+ (unsigned long long)lcn);
+ /* Map and lock the mft record and get the attribute record. */
+ if (!NInoAttr(ni))
+ base_ni = ni;
+ else
+ base_ni = ni->ext.base_ntfs_ino;
+ m = map_mft_record(base_ni);
+ if (IS_ERR(m)) {
+ err = PTR_ERR(m);
+ break;
+ }
+ ctx = ntfs_attr_get_search_ctx(base_ni, m);
+ if (unlikely(!ctx)) {
+ err = -ENOMEM;
+ unmap_mft_record(base_ni);
+ break;
+ }
+ status.mft_attr_mapped = 1;
+ err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+ CASE_SENSITIVE, bh_cpos, NULL, 0, ctx);
+ if (unlikely(err)) {
+ if (err == -ENOENT)
+ err = -EIO;
+ break;
+ }
+ m = ctx->mrec;
+ a = ctx->attr;
+ /*
+ * Find the runlist element with which the attribute extent
+ * starts. Note, we cannot use the _attr_ version because we
+ * have mapped the mft record. That is ok because we know the
+ * runlist fragment must be mapped already to have ever gotten
+ * here, so we can just use the _rl_ version.
+ */
+ vcn = sle64_to_cpu(a->data.non_resident.lowest_vcn);
+ rl2 = ntfs_rl_find_vcn_nolock(rl, vcn);
+ BUG_ON(!rl2);
+ BUG_ON(!rl2->length);
+ BUG_ON(rl2->lcn < LCN_HOLE);
+ highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
+ /*
+ * If @highest_vcn is zero, calculate the real highest_vcn
+ * (which can really be zero).
+ */
+ if (!highest_vcn)
+ highest_vcn = (sle64_to_cpu(
+ a->data.non_resident.allocated_size) >>
+ vol->cluster_size_bits) - 1;
+ /*
+ * Determine the size of the mapping pairs array for the new
+ * extent, i.e. the old extent with the hole filled.
+ */
+ mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, vcn,
+ highest_vcn);
+ if (unlikely(mp_size <= 0)) {
+ if (!(err = mp_size))
+ err = -EIO;
+ ntfs_debug("Failed to get size for mapping pairs "
+ "array, error code %i.", err);
+ break;
+ }
+ /*
+ * Resize the attribute record to fit the new mapping pairs
+ * array.
+ */
+ attr_rec_len = le32_to_cpu(a->length);
+ err = ntfs_attr_record_resize(m, a, mp_size + le16_to_cpu(
+ a->data.non_resident.mapping_pairs_offset));
+ if (unlikely(err)) {
+ BUG_ON(err != -ENOSPC);
+ // TODO: Deal with this by using the current attribute
+ // and fill it with as much of the mapping pairs
+ // array as possible. Then loop over each attribute
+ // extent rewriting the mapping pairs arrays as we go
+ // along and if when we reach the end we have not
+ // enough space, try to resize the last attribute
+ // extent and if even that fails, add a new attribute
+ // extent.
+ // We could also try to resize at each step in the hope
+ // that we will not need to rewrite every single extent.
+ // Note, we may need to decompress some extents to fill
+ // the runlist as we are walking the extents...
+ ntfs_error(vol->sb, "Not enough space in the mft "
+ "record for the extended attribute "
+ "record. This case is not "
+ "implemented yet.");
+ err = -EOPNOTSUPP;
+ break ;
+ }
+ status.mp_rebuilt = 1;
+ /*
+ * Generate the mapping pairs array directly into the attribute
+ * record.
+ */
+ err = ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
+ a->data.non_resident.mapping_pairs_offset),
+ mp_size, rl2, vcn, highest_vcn, NULL);
+ if (unlikely(err)) {
+ ntfs_error(vol->sb, "Cannot fill hole in inode 0x%lx, "
+ "attribute type 0x%x, because building "
+ "the mapping pairs failed with error "
+ "code %i.", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type), err);
+ err = -EIO;
+ break;
+ }
+ /* Update the highest_vcn but only if it was not set. */
+ if (unlikely(!a->data.non_resident.highest_vcn))
+ a->data.non_resident.highest_vcn =
+ cpu_to_sle64(highest_vcn);
+ /*
+ * If the attribute is sparse/compressed, update the compressed
+ * size in the ntfs_inode structure and the attribute record.
+ */
+ if (likely(NInoSparse(ni) || NInoCompressed(ni))) {
+ /*
+ * If we are not in the first attribute extent, switch
+ * to it, but first ensure the changes will make it to
+ * disk later.
+ */
+ if (a->data.non_resident.lowest_vcn) {
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ mark_mft_record_dirty(ctx->ntfs_ino);
+ ntfs_attr_reinit_search_ctx(ctx);
+ err = ntfs_attr_lookup(ni->type, ni->name,
+ ni->name_len, CASE_SENSITIVE,
+ 0, NULL, 0, ctx);
+ if (unlikely(err)) {
+ status.attr_switched = 1;
+ break;
+ }
+ /* @m is not used any more so do not set it. */
+ a = ctx->attr;
+ }
+ write_lock_irqsave(&ni->size_lock, flags);
+ ni->itype.compressed.size += vol->cluster_size;
+ a->data.non_resident.compressed_size =
+ cpu_to_sle64(ni->itype.compressed.size);
+ write_unlock_irqrestore(&ni->size_lock, flags);
+ }
+ /* Ensure the changes make it to disk. */
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ mark_mft_record_dirty(ctx->ntfs_ino);
+ ntfs_attr_put_search_ctx(ctx);
+ unmap_mft_record(base_ni);
+ /* Successfully filled the hole. */
+ status.runlist_merged = 0;
+ status.mft_attr_mapped = 0;
+ status.mp_rebuilt = 0;
+ /* Setup the map cache and use that to deal with the buffer. */
+ was_hole = true;
+ vcn = bh_cpos;
+ vcn_len = 1;
+ lcn_block = lcn << (vol->cluster_size_bits - blocksize_bits);
+ cdelta = 0;
+ /*
+ * If the number of remaining clusters in the @pages is smaller
+ * or equal to the number of cached clusters, unlock the
+ * runlist as the map cache will be used from now on.
+ */
+ if (likely(vcn + vcn_len >= cend)) {
+ up_write(&ni->runlist.lock);
+ rl_write_locked = false;
+ rl = NULL;
+ }
+ goto map_buffer_cached;
+ } while (bh_pos += blocksize, (bh = bh->b_this_page) != head);
+ /* If there are no errors, do the next page. */
+ if (likely(!err && ++u < nr_pages))
+ goto do_next_page;
+ /* If there are no errors, release the runlist lock if we took it. */
+ if (likely(!err)) {
+ if (unlikely(rl_write_locked)) {
+ up_write(&ni->runlist.lock);
+ rl_write_locked = false;
+ } else if (unlikely(rl))
+ up_read(&ni->runlist.lock);
+ rl = NULL;
+ }
+ /* If we issued read requests, let them complete. */
+ read_lock_irqsave(&ni->size_lock, flags);
+ initialized_size = ni->initialized_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ while (wait_bh > wait) {
+ bh = *--wait_bh;
+ wait_on_buffer(bh);
+ if (likely(buffer_uptodate(bh))) {
+ page = bh->b_page;
+ bh_pos = ((s64)page->index << PAGE_CACHE_SHIFT) +
+ bh_offset(bh);
+ /*
+ * If the buffer overflows the initialized size, need
+ * to zero the overflowing region.
+ */
+ if (unlikely(bh_pos + blocksize > initialized_size)) {
+ int ofs = 0;
+
+ if (likely(bh_pos < initialized_size))
+ ofs = initialized_size - bh_pos;
+ zero_user_segment(page, bh_offset(bh) + ofs,
+ blocksize);
+ }
+ } else /* if (unlikely(!buffer_uptodate(bh))) */
+ err = -EIO;
+ }
+ if (likely(!err)) {
+ /* Clear buffer_new on all buffers. */
+ u = 0;
+ do {
+ bh = head = page_buffers(pages[u]);
+ do {
+ if (buffer_new(bh))
+ clear_buffer_new(bh);
+ } while ((bh = bh->b_this_page) != head);
+ } while (++u < nr_pages);
+ ntfs_debug("Done.");
+ return err;
+ }
+ if (status.attr_switched) {
+ /* Get back to the attribute extent we modified. */
+ ntfs_attr_reinit_search_ctx(ctx);
+ if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+ CASE_SENSITIVE, bh_cpos, NULL, 0, ctx)) {
+ ntfs_error(vol->sb, "Failed to find required "
+ "attribute extent of attribute in "
+ "error code path. Run chkdsk to "
+ "recover.");
+ write_lock_irqsave(&ni->size_lock, flags);
+ ni->itype.compressed.size += vol->cluster_size;
+ write_unlock_irqrestore(&ni->size_lock, flags);
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ mark_mft_record_dirty(ctx->ntfs_ino);
+ /*
+ * The only thing that is now wrong is the compressed
+ * size of the base attribute extent which chkdsk
+ * should be able to fix.
+ */
+ NVolSetErrors(vol);
+ } else {
+ m = ctx->mrec;
+ a = ctx->attr;
+ status.attr_switched = 0;
+ }
+ }
+ /*
+ * If the runlist has been modified, need to restore it by punching a
+ * hole into it and we then need to deallocate the on-disk cluster as
+ * well. Note, we only modify the runlist if we are able to generate a
+ * new mapping pairs array, i.e. only when the mapped attribute extent
+ * is not switched.
+ */
+ if (status.runlist_merged && !status.attr_switched) {
+ BUG_ON(!rl_write_locked);
+ /* Make the file cluster we allocated sparse in the runlist. */
+ if (ntfs_rl_punch_nolock(vol, &ni->runlist, bh_cpos, 1)) {
+ ntfs_error(vol->sb, "Failed to punch hole into "
+ "attribute runlist in error code "
+ "path. Run chkdsk to recover the "
+ "lost cluster.");
+ NVolSetErrors(vol);
+ } else /* if (success) */ {
+ status.runlist_merged = 0;
+ /*
+ * Deallocate the on-disk cluster we allocated but only
+ * if we succeeded in punching its vcn out of the
+ * runlist.
+ */
+ down_write(&vol->lcnbmp_lock);
+ if (ntfs_bitmap_clear_bit(vol->lcnbmp_ino, lcn)) {
+ ntfs_error(vol->sb, "Failed to release "
+ "allocated cluster in error "
+ "code path. Run chkdsk to "
+ "recover the lost cluster.");
+ NVolSetErrors(vol);
+ }
+ up_write(&vol->lcnbmp_lock);
+ }
+ }
+ /*
+ * Resize the attribute record to its old size and rebuild the mapping
+ * pairs array. Note, we only can do this if the runlist has been
+ * restored to its old state which also implies that the mapped
+ * attribute extent is not switched.
+ */
+ if (status.mp_rebuilt && !status.runlist_merged) {
+ if (ntfs_attr_record_resize(m, a, attr_rec_len)) {
+ ntfs_error(vol->sb, "Failed to restore attribute "
+ "record in error code path. Run "
+ "chkdsk to recover.");
+ NVolSetErrors(vol);
+ } else /* if (success) */ {
+ if (ntfs_mapping_pairs_build(vol, (u8*)a +
+ le16_to_cpu(a->data.non_resident.
+ mapping_pairs_offset), attr_rec_len -
+ le16_to_cpu(a->data.non_resident.
+ mapping_pairs_offset), ni->runlist.rl,
+ vcn, highest_vcn, NULL)) {
+ ntfs_error(vol->sb, "Failed to restore "
+ "mapping pairs array in error "
+ "code path. Run chkdsk to "
+ "recover.");
+ NVolSetErrors(vol);
+ }
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ mark_mft_record_dirty(ctx->ntfs_ino);
+ }
+ }
+ /* Release the mft record and the attribute. */
+ if (status.mft_attr_mapped) {
+ ntfs_attr_put_search_ctx(ctx);
+ unmap_mft_record(base_ni);
+ }
+ /* Release the runlist lock. */
+ if (rl_write_locked)
+ up_write(&ni->runlist.lock);
+ else if (rl)
+ up_read(&ni->runlist.lock);
+ /*
+ * Zero out any newly allocated blocks to avoid exposing stale data.
+ * If BH_New is set, we know that the block was newly allocated above
+ * and that it has not been fully zeroed and marked dirty yet.
+ */
+ nr_pages = u;
+ u = 0;
+ end = bh_cpos << vol->cluster_size_bits;
+ do {
+ page = pages[u];
+ bh = head = page_buffers(page);
+ do {
+ if (u == nr_pages &&
+ ((s64)page->index << PAGE_CACHE_SHIFT) +
+ bh_offset(bh) >= end)
+ break;
+ if (!buffer_new(bh))
+ continue;
+ clear_buffer_new(bh);
+ if (!buffer_uptodate(bh)) {
+ if (PageUptodate(page))
+ set_buffer_uptodate(bh);
+ else {
+ zero_user(page, bh_offset(bh),
+ blocksize);
+ set_buffer_uptodate(bh);
+ }
+ }
+ mark_buffer_dirty(bh);
+ } while ((bh = bh->b_this_page) != head);
+ } while (++u <= nr_pages);
+ ntfs_error(vol->sb, "Failed. Returning error code %i.", err);
+ return err;
+}
+
+/*
+ * Copy as much as we can into the pages and return the number of bytes which
+ * were successfully copied. If a fault is encountered then clear the pages
+ * out to (ofs + bytes) and return the number of bytes which were copied.
+ */
+static inline size_t ntfs_copy_from_user(struct page **pages,
+ unsigned nr_pages, unsigned ofs, const char __user *buf,
+ size_t bytes)
+{
+ struct page **last_page = pages + nr_pages;
+ char *addr;
+ size_t total = 0;
+ unsigned len;
+ int left;
+
+ do {
+ len = PAGE_CACHE_SIZE - ofs;
+ if (len > bytes)
+ len = bytes;
+ addr = kmap_atomic(*pages);
+ left = __copy_from_user_inatomic(addr + ofs, buf, len);
+ kunmap_atomic(addr);
+ if (unlikely(left)) {
+ /* Do it the slow way. */
+ addr = kmap(*pages);
+ left = __copy_from_user(addr + ofs, buf, len);
+ kunmap(*pages);
+ if (unlikely(left))
+ goto err_out;
+ }
+ total += len;
+ bytes -= len;
+ if (!bytes)
+ break;
+ buf += len;
+ ofs = 0;
+ } while (++pages < last_page);
+out:
+ return total;
+err_out:
+ total += len - left;
+ /* Zero the rest of the target like __copy_from_user(). */
+ while (++pages < last_page) {
+ bytes -= len;
+ if (!bytes)
+ break;
+ len = PAGE_CACHE_SIZE;
+ if (len > bytes)
+ len = bytes;
+ zero_user(*pages, 0, len);
+ }
+ goto out;
+}
+
+static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr,
+ const struct iovec *iov, size_t iov_ofs, size_t bytes)
+{
+ size_t total = 0;
+
+ while (1) {
+ const char __user *buf = iov->iov_base + iov_ofs;
+ unsigned len;
+ size_t left;
+
+ len = iov->iov_len - iov_ofs;
+ if (len > bytes)
+ len = bytes;
+ left = __copy_from_user_inatomic(vaddr, buf, len);
+ total += len;
+ bytes -= len;
+ vaddr += len;
+ if (unlikely(left)) {
+ total -= left;
+ break;
+ }
+ if (!bytes)
+ break;
+ iov++;
+ iov_ofs = 0;
+ }
+ return total;
+}
+
+static inline void ntfs_set_next_iovec(const struct iovec **iovp,
+ size_t *iov_ofsp, size_t bytes)
+{
+ const struct iovec *iov = *iovp;
+ size_t iov_ofs = *iov_ofsp;
+
+ while (bytes) {
+ unsigned len;
+
+ len = iov->iov_len - iov_ofs;
+ if (len > bytes)
+ len = bytes;
+ bytes -= len;
+ iov_ofs += len;
+ if (iov->iov_len == iov_ofs) {
+ iov++;
+ iov_ofs = 0;
+ }
+ }
+ *iovp = iov;
+ *iov_ofsp = iov_ofs;
+}
+
+/*
+ * This has the same side-effects and return value as ntfs_copy_from_user().
+ * The difference is that on a fault we need to memset the remainder of the
+ * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s
+ * single-segment behaviour.
+ *
+ * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when
+ * atomic and when not atomic. This is ok because it calls
+ * __copy_from_user_inatomic() and it is ok to call this when non-atomic. In
+ * fact, the only difference between __copy_from_user_inatomic() and
+ * __copy_from_user() is that the latter calls might_sleep() and the former
+ * should not zero the tail of the buffer on error. And on many architectures
+ * __copy_from_user_inatomic() is just defined to __copy_from_user() so it
+ * makes no difference at all on those architectures.
+ */
+static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
+ unsigned nr_pages, unsigned ofs, const struct iovec **iov,
+ size_t *iov_ofs, size_t bytes)
+{
+ struct page **last_page = pages + nr_pages;
+ char *addr;
+ size_t copied, len, total = 0;
+
+ do {
+ len = PAGE_CACHE_SIZE - ofs;
+ if (len > bytes)
+ len = bytes;
+ addr = kmap_atomic(*pages);
+ copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
+ *iov, *iov_ofs, len);
+ kunmap_atomic(addr);
+ if (unlikely(copied != len)) {
+ /* Do it the slow way. */
+ addr = kmap(*pages);
+ copied = __ntfs_copy_from_user_iovec_inatomic(addr +
+ ofs, *iov, *iov_ofs, len);
+ if (unlikely(copied != len))
+ goto err_out;
+ kunmap(*pages);
+ }
+ total += len;
+ ntfs_set_next_iovec(iov, iov_ofs, len);
+ bytes -= len;
+ if (!bytes)
+ break;
+ ofs = 0;
+ } while (++pages < last_page);
+out:
+ return total;
+err_out:
+ BUG_ON(copied > len);
+ /* Zero the rest of the target like __copy_from_user(). */
+ memset(addr + ofs + copied, 0, len - copied);
+ kunmap(*pages);
+ total += copied;
+ ntfs_set_next_iovec(iov, iov_ofs, copied);
+ while (++pages < last_page) {
+ bytes -= len;
+ if (!bytes)
+ break;
+ len = PAGE_CACHE_SIZE;
+ if (len > bytes)
+ len = bytes;
+ zero_user(*pages, 0, len);
+ }
+ goto out;
+}
+
+static inline void ntfs_flush_dcache_pages(struct page **pages,
+ unsigned nr_pages)
+{
+ BUG_ON(!nr_pages);
+ /*
+ * Warning: Do not do the decrement at the same time as the call to
+ * flush_dcache_page() because it is a NULL macro on i386 and hence the
+ * decrement never happens so the loop never terminates.
+ */
+ do {
+ --nr_pages;
+ flush_dcache_page(pages[nr_pages]);
+ } while (nr_pages > 0);
+}
+
+/**
+ * ntfs_commit_pages_after_non_resident_write - commit the received data
+ * @pages: array of destination pages
+ * @nr_pages: number of pages in @pages
+ * @pos: byte position in file at which the write begins
+ * @bytes: number of bytes to be written
+ *
+ * See description of ntfs_commit_pages_after_write(), below.
+ */
+static inline int ntfs_commit_pages_after_non_resident_write(
+ struct page **pages, const unsigned nr_pages,
+ s64 pos, size_t bytes)
+{
+ s64 end, initialized_size;
+ struct inode *vi;
+ ntfs_inode *ni, *base_ni;
+ struct buffer_head *bh, *head;
+ ntfs_attr_search_ctx *ctx;
+ MFT_RECORD *m;
+ ATTR_RECORD *a;
+ unsigned long flags;
+ unsigned blocksize, u;
+ int err;
+
+ vi = pages[0]->mapping->host;
+ ni = NTFS_I(vi);
+ blocksize = vi->i_sb->s_blocksize;
+ end = pos + bytes;
+ u = 0;
+ do {
+ s64 bh_pos;
+ struct page *page;
+ bool partial;
+
+ page = pages[u];
+ bh_pos = (s64)page->index << PAGE_CACHE_SHIFT;
+ bh = head = page_buffers(page);
+ partial = false;
+ do {
+ s64 bh_end;
+
+ bh_end = bh_pos + blocksize;
+ if (bh_end <= pos || bh_pos >= end) {
+ if (!buffer_uptodate(bh))
+ partial = true;
+ } else {
+ set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
+ }
+ } while (bh_pos += blocksize, (bh = bh->b_this_page) != head);
+ /*
+ * If all buffers are now uptodate but the page is not, set the
+ * page uptodate.
+ */
+ if (!partial && !PageUptodate(page))
+ SetPageUptodate(page);
+ } while (++u < nr_pages);
+ /*
+ * Finally, if we do not need to update initialized_size or i_size we
+ * are finished.
+ */
+ read_lock_irqsave(&ni->size_lock, flags);
+ initialized_size = ni->initialized_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ if (end <= initialized_size) {
+ ntfs_debug("Done.");
+ return 0;
+ }
+ /*
+ * Update initialized_size/i_size as appropriate, both in the inode and
+ * the mft record.
+ */
+ if (!NInoAttr(ni))
+ base_ni = ni;
+ else
+ base_ni = ni->ext.base_ntfs_ino;
+ /* Map, pin, and lock the mft record. */
+ m = map_mft_record(base_ni);
+ if (IS_ERR(m)) {
+ err = PTR_ERR(m);
+ m = NULL;
+ ctx = NULL;
+ goto err_out;
+ }
+ BUG_ON(!NInoNonResident(ni));
+ ctx = ntfs_attr_get_search_ctx(base_ni, m);
+ if (unlikely(!ctx)) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+ CASE_SENSITIVE, 0, NULL, 0, ctx);
+ if (unlikely(err)) {
+ if (err == -ENOENT)
+ err = -EIO;
+ goto err_out;
+ }
+ a = ctx->attr;
+ BUG_ON(!a->non_resident);
+ write_lock_irqsave(&ni->size_lock, flags);
+ BUG_ON(end > ni->allocated_size);
+ ni->initialized_size = end;
+ a->data.non_resident.initialized_size = cpu_to_sle64(end);
+ if (end > i_size_read(vi)) {
+ i_size_write(vi, end);
+ a->data.non_resident.data_size =
+ a->data.non_resident.initialized_size;
+ }
+ write_unlock_irqrestore(&ni->size_lock, flags);
+ /* Mark the mft record dirty, so it gets written back. */
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ mark_mft_record_dirty(ctx->ntfs_ino);
+ ntfs_attr_put_search_ctx(ctx);
+ unmap_mft_record(base_ni);
+ ntfs_debug("Done.");
+ return 0;
+err_out:
+ if (ctx)
+ ntfs_attr_put_search_ctx(ctx);
+ if (m)
+ unmap_mft_record(base_ni);
+ ntfs_error(vi->i_sb, "Failed to update initialized_size/i_size (error "
+ "code %i).", err);
+ if (err != -ENOMEM)
+ NVolSetErrors(ni->vol);
+ return err;
+}
+
+/**
+ * ntfs_commit_pages_after_write - commit the received data
+ * @pages: array of destination pages
+ * @nr_pages: number of pages in @pages
+ * @pos: byte position in file at which the write begins
+ * @bytes: number of bytes to be written
+ *
+ * This is called from ntfs_file_buffered_write() with i_mutex held on the inode
+ * (@pages[0]->mapping->host). There are @nr_pages pages in @pages which are
+ * locked but not kmap()ped. The source data has already been copied into the
+ * @page. ntfs_prepare_pages_for_non_resident_write() has been called before
+ * the data was copied (for non-resident attributes only) and it returned
+ * success.
+ *
+ * Need to set uptodate and mark dirty all buffers within the boundary of the
+ * write. If all buffers in a page are uptodate we set the page uptodate, too.
+ *
+ * Setting the buffers dirty ensures that they get written out later when
+ * ntfs_writepage() is invoked by the VM.
+ *
+ * Finally, we need to update i_size and initialized_size as appropriate both
+ * in the inode and the mft record.
+ *
+ * This is modelled after fs/buffer.c::generic_commit_write(), which marks
+ * buffers uptodate and dirty, sets the page uptodate if all buffers in the
+ * page are uptodate, and updates i_size if the end of io is beyond i_size. In
+ * that case, it also marks the inode dirty.
+ *
+ * If things have gone as outlined in
+ * ntfs_prepare_pages_for_non_resident_write(), we do not need to do any page
+ * content modifications here for non-resident attributes. For resident
+ * attributes we need to do the uptodate bringing here which we combine with
+ * the copying into the mft record which means we save one atomic kmap.
+ *
+ * Return 0 on success or -errno on error.
+ */
+static int ntfs_commit_pages_after_write(struct page **pages,
+ const unsigned nr_pages, s64 pos, size_t bytes)
+{
+ s64 end, initialized_size;
+ loff_t i_size;
+ struct inode *vi;
+ ntfs_inode *ni, *base_ni;
+ struct page *page;
+ ntfs_attr_search_ctx *ctx;
+ MFT_RECORD *m;
+ ATTR_RECORD *a;
+ char *kattr, *kaddr;
+ unsigned long flags;
+ u32 attr_len;
+ int err;
+
+ BUG_ON(!nr_pages);
+ BUG_ON(!pages);
+ page = pages[0];
+ BUG_ON(!page);
+ vi = page->mapping->host;
+ ni = NTFS_I(vi);
+ ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page "
+ "index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.",
+ vi->i_ino, ni->type, page->index, nr_pages,
+ (long long)pos, bytes);
+ if (NInoNonResident(ni))
+ return ntfs_commit_pages_after_non_resident_write(pages,
+ nr_pages, pos, bytes);
+ BUG_ON(nr_pages > 1);
+ /*
+ * Attribute is resident, implying it is not compressed, encrypted, or
+ * sparse.
+ */
+ if (!NInoAttr(ni))
+ base_ni = ni;
+ else
+ base_ni = ni->ext.base_ntfs_ino;
+ BUG_ON(NInoNonResident(ni));
+ /* Map, pin, and lock the mft record. */
+ m = map_mft_record(base_ni);
+ if (IS_ERR(m)) {
+ err = PTR_ERR(m);
+ m = NULL;
+ ctx = NULL;
+ goto err_out;
+ }
+ ctx = ntfs_attr_get_search_ctx(base_ni, m);
+ if (unlikely(!ctx)) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
+ CASE_SENSITIVE, 0, NULL, 0, ctx);
+ if (unlikely(err)) {
+ if (err == -ENOENT)
+ err = -EIO;
+ goto err_out;
+ }
+ a = ctx->attr;
+ BUG_ON(a->non_resident);
+ /* The total length of the attribute value. */
+ attr_len = le32_to_cpu(a->data.resident.value_length);
+ i_size = i_size_read(vi);
+ BUG_ON(attr_len != i_size);
+ BUG_ON(pos > attr_len);
+ end = pos + bytes;
+ BUG_ON(end > le32_to_cpu(a->length) -
+ le16_to_cpu(a->data.resident.value_offset));
+ kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
+ kaddr = kmap_atomic(page);
+ /* Copy the received data from the page to the mft record. */
+ memcpy(kattr + pos, kaddr + pos, bytes);
+ /* Update the attribute length if necessary. */
+ if (end > attr_len) {
+ attr_len = end;
+ a->data.resident.value_length = cpu_to_le32(attr_len);
+ }
+ /*
+ * If the page is not uptodate, bring the out of bounds area(s)
+ * uptodate by copying data from the mft record to the page.
+ */
+ if (!PageUptodate(page)) {
+ if (pos > 0)
+ memcpy(kaddr, kattr, pos);
+ if (end < attr_len)
+ memcpy(kaddr + end, kattr + end, attr_len - end);
+ /* Zero the region outside the end of the attribute value. */
+ memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ }
+ kunmap_atomic(kaddr);
+ /* Update initialized_size/i_size if necessary. */
+ read_lock_irqsave(&ni->size_lock, flags);
+ initialized_size = ni->initialized_size;
+ BUG_ON(end > ni->allocated_size);
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ BUG_ON(initialized_size != i_size);
+ if (end > initialized_size) {
+ write_lock_irqsave(&ni->size_lock, flags);
+ ni->initialized_size = end;
+ i_size_write(vi, end);
+ write_unlock_irqrestore(&ni->size_lock, flags);
+ }
+ /* Mark the mft record dirty, so it gets written back. */
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ mark_mft_record_dirty(ctx->ntfs_ino);
+ ntfs_attr_put_search_ctx(ctx);
+ unmap_mft_record(base_ni);
+ ntfs_debug("Done.");
+ return 0;
+err_out:
+ if (err == -ENOMEM) {
+ ntfs_warning(vi->i_sb, "Error allocating memory required to "
+ "commit the write.");
+ if (PageUptodate(page)) {
+ ntfs_warning(vi->i_sb, "Page is uptodate, setting "
+ "dirty so the write will be retried "
+ "later on by the VM.");
+ /*
+ * Put the page on mapping->dirty_pages, but leave its
+ * buffers' dirty state as-is.
+ */
+ __set_page_dirty_nobuffers(page);
+ err = 0;
+ } else
+ ntfs_error(vi->i_sb, "Page is not uptodate. Written "
+ "data has been lost.");
+ } else {
+ ntfs_error(vi->i_sb, "Resident attribute commit write failed "
+ "with error %i.", err);
+ NVolSetErrors(ni->vol);
+ }
+ if (ctx)
+ ntfs_attr_put_search_ctx(ctx);
+ if (m)
+ unmap_mft_record(base_ni);
+ return err;
+}
+
+static void ntfs_write_failed(struct address_space *mapping, loff_t to)
+{
+ struct inode *inode = mapping->host;
+
+ if (to > inode->i_size) {
+ truncate_pagecache(inode, inode->i_size);
+ ntfs_truncate_vfs(inode);
+ }
+}
+
+/**
+ * ntfs_file_buffered_write -
+ *
+ * Locking: The vfs is holding ->i_mutex on the inode.
+ */
+static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
+ const struct iovec *iov, unsigned long nr_segs,
+ loff_t pos, loff_t *ppos, size_t count)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *vi = mapping->host;
+ ntfs_inode *ni = NTFS_I(vi);
+ ntfs_volume *vol = ni->vol;
+ struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER];
+ struct page *cached_page = NULL;
+ char __user *buf = NULL;
+ s64 end, ll;
+ VCN last_vcn;
+ LCN lcn;
+ unsigned long flags;
+ size_t bytes, iov_ofs = 0; /* Offset in the current iovec. */
+ ssize_t status, written;
+ unsigned nr_pages;
+ int err;
+
+ ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
+ "pos 0x%llx, count 0x%lx.",
+ vi->i_ino, (unsigned)le32_to_cpu(ni->type),
+ (unsigned long long)pos, (unsigned long)count);
+ if (unlikely(!count))
+ return 0;
+ BUG_ON(NInoMstProtected(ni));
+ /*
+ * If the attribute is not an index root and it is encrypted or
+ * compressed, we cannot write to it yet. Note we need to check for
+ * AT_INDEX_ALLOCATION since this is the type of both directory and
+ * index inodes.
+ */
+ if (ni->type != AT_INDEX_ALLOCATION) {
+ /* If file is encrypted, deny access, just like NT4. */
+ if (NInoEncrypted(ni)) {
+ /*
+ * Reminder for later: Encrypted files are _always_
+ * non-resident so that the content can always be
+ * encrypted.
+ */
+ ntfs_debug("Denying write access to encrypted file.");
+ return -EACCES;
+ }
+ if (NInoCompressed(ni)) {
+ /* Only unnamed $DATA attribute can be compressed. */
+ BUG_ON(ni->type != AT_DATA);
+ BUG_ON(ni->name_len);
+ /*
+ * Reminder for later: If resident, the data is not
+ * actually compressed. Only on the switch to non-
+ * resident does compression kick in. This is in
+ * contrast to encrypted files (see above).
+ */
+ ntfs_error(vi->i_sb, "Writing to compressed files is "
+ "not implemented yet. Sorry.");
+ return -EOPNOTSUPP;
+ }
+ }
+ /*
+ * If a previous ntfs_truncate() failed, repeat it and abort if it
+ * fails again.
+ */
+ if (unlikely(NInoTruncateFailed(ni))) {
+ inode_dio_wait(vi);
+ err = ntfs_truncate(vi);
+ if (err || NInoTruncateFailed(ni)) {
+ if (!err)
+ err = -EIO;
+ ntfs_error(vol->sb, "Cannot perform write to inode "
+ "0x%lx, attribute type 0x%x, because "
+ "ntfs_truncate() failed (error code "
+ "%i).", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type), err);
+ return err;
+ }
+ }
+ /* The first byte after the write. */
+ end = pos + count;
+ /*
+ * If the write goes beyond the allocated size, extend the allocation
+ * to cover the whole of the write, rounded up to the nearest cluster.
+ */
+ read_lock_irqsave(&ni->size_lock, flags);
+ ll = ni->allocated_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ if (end > ll) {
+ /* Extend the allocation without changing the data size. */
+ ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
+ if (likely(ll >= 0)) {
+ BUG_ON(pos >= ll);
+ /* If the extension was partial truncate the write. */
+ if (end > ll) {
+ ntfs_debug("Truncating write to inode 0x%lx, "
+ "attribute type 0x%x, because "
+ "the allocation was only "
+ "partially extended.",
+ vi->i_ino, (unsigned)
+ le32_to_cpu(ni->type));
+ end = ll;
+ count = ll - pos;
+ }
+ } else {
+ err = ll;
+ read_lock_irqsave(&ni->size_lock, flags);
+ ll = ni->allocated_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ /* Perform a partial write if possible or fail. */
+ if (pos < ll) {
+ ntfs_debug("Truncating write to inode 0x%lx, "
+ "attribute type 0x%x, because "
+ "extending the allocation "
+ "failed (error code %i).",
+ vi->i_ino, (unsigned)
+ le32_to_cpu(ni->type), err);
+ end = ll;
+ count = ll - pos;
+ } else {
+ ntfs_error(vol->sb, "Cannot perform write to "
+ "inode 0x%lx, attribute type "
+ "0x%x, because extending the "
+ "allocation failed (error "
+ "code %i).", vi->i_ino,
+ (unsigned)
+ le32_to_cpu(ni->type), err);
+ return err;
+ }
+ }
+ }
+ written = 0;
+ /*
+ * If the write starts beyond the initialized size, extend it up to the
+ * beginning of the write and initialize all non-sparse space between
+ * the old initialized size and the new one. This automatically also
+ * increments the vfs inode->i_size to keep it above or equal to the
+ * initialized_size.
+ */
+ read_lock_irqsave(&ni->size_lock, flags);
+ ll = ni->initialized_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ if (pos > ll) {
+ err = ntfs_attr_extend_initialized(ni, pos);
+ if (err < 0) {
+ ntfs_error(vol->sb, "Cannot perform write to inode "
+ "0x%lx, attribute type 0x%x, because "
+ "extending the initialized size "
+ "failed (error code %i).", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type), err);
+ status = err;
+ goto err_out;
+ }
+ }
+ /*
+ * Determine the number of pages per cluster for non-resident
+ * attributes.
+ */
+ nr_pages = 1;
+ if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni))
+ nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT;
+ /* Finally, perform the actual write. */
+ last_vcn = -1;
+ if (likely(nr_segs == 1))
+ buf = iov->iov_base;
+ do {
+ VCN vcn;
+ pgoff_t idx, start_idx;
+ unsigned ofs, do_pages, u;
+ size_t copied;
+
+ start_idx = idx = pos >> PAGE_CACHE_SHIFT;
+ ofs = pos & ~PAGE_CACHE_MASK;
+ bytes = PAGE_CACHE_SIZE - ofs;
+ do_pages = 1;
+ if (nr_pages > 1) {
+ vcn = pos >> vol->cluster_size_bits;
+ if (vcn != last_vcn) {
+ last_vcn = vcn;
+ /*
+ * Get the lcn of the vcn the write is in. If
+ * it is a hole, need to lock down all pages in
+ * the cluster.
+ */
+ down_read(&ni->runlist.lock);
+ lcn = ntfs_attr_vcn_to_lcn_nolock(ni, pos >>
+ vol->cluster_size_bits, false);
+ up_read(&ni->runlist.lock);
+ if (unlikely(lcn < LCN_HOLE)) {
+ status = -EIO;
+ if (lcn == LCN_ENOMEM)
+ status = -ENOMEM;
+ else
+ ntfs_error(vol->sb, "Cannot "
+ "perform write to "
+ "inode 0x%lx, "
+ "attribute type 0x%x, "
+ "because the attribute "
+ "is corrupt.",
+ vi->i_ino, (unsigned)
+ le32_to_cpu(ni->type));
+ break;
+ }
+ if (lcn == LCN_HOLE) {
+ start_idx = (pos & ~(s64)
+ vol->cluster_size_mask)
+ >> PAGE_CACHE_SHIFT;
+ bytes = vol->cluster_size - (pos &
+ vol->cluster_size_mask);
+ do_pages = nr_pages;
+ }
+ }
+ }
+ if (bytes > count)
+ bytes = count;
+ /*
+ * Bring in the user page(s) that we will copy from _first_.
+ * Otherwise there is a nasty deadlock on copying from the same
+ * page(s) as we are writing to, without it/them being marked
+ * up-to-date. Note, at present there is nothing to stop the
+ * pages being swapped out between us bringing them into memory
+ * and doing the actual copying.
+ */
+ if (likely(nr_segs == 1))
+ ntfs_fault_in_pages_readable(buf, bytes);
+ else
+ ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes);
+ /* Get and lock @do_pages starting at index @start_idx. */
+ status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages,
+ pages, &cached_page);
+ if (unlikely(status))
+ break;
+ /*
+ * For non-resident attributes, we need to fill any holes with
+ * actual clusters and ensure all bufferes are mapped. We also
+ * need to bring uptodate any buffers that are only partially
+ * being written to.
+ */
+ if (NInoNonResident(ni)) {
+ status = ntfs_prepare_pages_for_non_resident_write(
+ pages, do_pages, pos, bytes);
+ if (unlikely(status)) {
+ loff_t i_size;
+
+ do {
+ unlock_page(pages[--do_pages]);
+ page_cache_release(pages[do_pages]);
+ } while (do_pages);
+ /*
+ * The write preparation may have instantiated
+ * allocated space outside i_size. Trim this
+ * off again. We can ignore any errors in this
+ * case as we will just be waisting a bit of
+ * allocated space, which is not a disaster.
+ */
+ i_size = i_size_read(vi);
+ if (pos + bytes > i_size) {
+ ntfs_write_failed(mapping, pos + bytes);
+ }
+ break;
+ }
+ }
+ u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index;
+ if (likely(nr_segs == 1)) {
+ copied = ntfs_copy_from_user(pages + u, do_pages - u,
+ ofs, buf, bytes);
+ buf += copied;
+ } else
+ copied = ntfs_copy_from_user_iovec(pages + u,
+ do_pages - u, ofs, &iov, &iov_ofs,
+ bytes);
+ ntfs_flush_dcache_pages(pages + u, do_pages - u);
+ status = ntfs_commit_pages_after_write(pages, do_pages, pos,
+ bytes);
+ if (likely(!status)) {
+ written += copied;
+ count -= copied;
+ pos += copied;
+ if (unlikely(copied != bytes))
+ status = -EFAULT;
+ }
+ do {
+ unlock_page(pages[--do_pages]);
+ page_cache_release(pages[do_pages]);
+ } while (do_pages);
+ if (unlikely(status))
+ break;
+ balance_dirty_pages_ratelimited(mapping);
+ cond_resched();
+ } while (count);
+err_out:
+ *ppos = pos;
+ if (cached_page)
+ page_cache_release(cached_page);
+ ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
+ written ? "written" : "status", (unsigned long)written,
+ (long)status);
+ return written ? written : status;
+}
+
+/**
+ * ntfs_file_aio_write_nolock -
+ */
+static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
+ const struct iovec *iov, unsigned long nr_segs, loff_t *ppos)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ loff_t pos;
+ size_t count; /* after file limit checks */
+ ssize_t written, err;
+
+ count = iov_length(iov, nr_segs);
+ pos = *ppos;
+ /* We can write back this queue in page reclaim. */
+ current->backing_dev_info = mapping->backing_dev_info;
+ written = 0;
+ err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
+ if (err)
+ goto out;
+ if (!count)
+ goto out;
+ err = file_remove_suid(file);
+ if (err)
+ goto out;
+ err = file_update_time(file);
+ if (err)
+ goto out;
+ written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos,
+ count);
+out:
+ current->backing_dev_info = NULL;
+ return written ? written : err;
+}
+
+/**
+ * ntfs_file_aio_write -
+ */
+static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ ssize_t ret;
+
+ BUG_ON(iocb->ki_pos != pos);
+
+ mutex_lock(&inode->i_mutex);
+ ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
+ mutex_unlock(&inode->i_mutex);
+ if (ret > 0) {
+ int err = generic_write_sync(file, iocb->ki_pos - ret, ret);
+ if (err < 0)
+ ret = err;
+ }
+ return ret;
+}
+
+/**
* ntfs_file_fsync - sync a file to disk
* @filp: file to be synced
- * @dentry: dentry describing the file to sync
* @datasync: if non-zero only flush user data and not metadata
*
* Data integrity sync of a file to disk. Used for fsync, fdatasync, and msync
@@ -74,26 +2155,34 @@ static int ntfs_file_open(struct inode *vi, struct file *filp)
* Also, if @datasync is true, we do not wait on the inode to be written out
* but we always wait on the page cache pages to be written out.
*
- * Note: In the past @filp could be NULL so we ignore it as we don't need it
- * anyway.
- *
- * Locking: Caller must hold i_sem on the inode.
+ * Locking: Caller must hold i_mutex on the inode.
*
* TODO: We should probably also write all attribute/index inodes associated
* with this inode but since we have no simple way of getting to them we ignore
* this problem for now.
*/
-static int ntfs_file_fsync(struct file *filp, struct dentry *dentry,
- int datasync)
+static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
- struct inode *vi = dentry->d_inode;
+ struct inode *vi = filp->f_mapping->host;
int err, ret = 0;
ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
+
+ err = filemap_write_and_wait_range(vi->i_mapping, start, end);
+ if (err)
+ return err;
+ mutex_lock(&vi->i_mutex);
+
BUG_ON(S_ISDIR(vi->i_mode));
if (!datasync || !NInoNonResident(NTFS_I(vi)))
- ret = ntfs_write_inode(vi, 1);
+ ret = __ntfs_write_inode(vi, 1);
write_inode_now(vi, !datasync);
+ /*
+ * NOTE: If we were to use mapping->private_list (see ext2 and
+ * fs/buffer.c) for dirty blocks then we could optimize the below to be
+ * sync_mapping_buffers(vi->i_mapping).
+ */
err = sync_blockdev(vi->i_sb->s_bdev);
if (unlikely(err && !ret))
ret = err;
@@ -102,54 +2191,52 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry,
else
ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error "
"%u.", datasync ? "data" : "", vi->i_ino, -ret);
+ mutex_unlock(&vi->i_mutex);
return ret;
}
#endif /* NTFS_RW */
-struct file_operations ntfs_file_ops = {
- .llseek = generic_file_llseek, /* Seek inside file. */
- .read = generic_file_read, /* Read from file. */
- .aio_read = generic_file_aio_read, /* Async read from file. */
- .readv = generic_file_readv, /* Read from file. */
+const struct file_operations ntfs_file_ops = {
+ .llseek = generic_file_llseek, /* Seek inside file. */
+ .read = new_sync_read, /* Read from file. */
+ .read_iter = generic_file_read_iter, /* Async read from file. */
#ifdef NTFS_RW
- .write = generic_file_write, /* Write to file. */
- .aio_write = generic_file_aio_write, /* Async write to file. */
- .writev = generic_file_writev, /* Write to file. */
- /*.release = ,*/ /* Last file is closed. See
- fs/ext2/file.c::
- ext2_release_file() for
- how to use this to discard
- preallocated space for
- write opened files. */
- .fsync = ntfs_file_fsync, /* Sync a file to disk. */
- /*.aio_fsync = ,*/ /* Sync all outstanding async
- i/o operations on a
- kiocb. */
+ .write = do_sync_write, /* Write to file. */
+ .aio_write = ntfs_file_aio_write, /* Async write to file. */
+ /*.release = ,*/ /* Last file is closed. See
+ fs/ext2/file.c::
+ ext2_release_file() for
+ how to use this to discard
+ preallocated space for
+ write opened files. */
+ .fsync = ntfs_file_fsync, /* Sync a file to disk. */
+ /*.aio_fsync = ,*/ /* Sync all outstanding async
+ i/o operations on a
+ kiocb. */
#endif /* NTFS_RW */
- /*.ioctl = ,*/ /* Perform function on the
- mounted filesystem. */
- .mmap = generic_file_mmap, /* Mmap file. */
- .open = ntfs_file_open, /* Open file. */
- .sendfile = generic_file_sendfile, /* Zero-copy data send with
- the data source being on
- the ntfs partition. We
- do not need to care about
- the data destination. */
- /*.sendpage = ,*/ /* Zero-copy data send with
- the data destination being
- on the ntfs partition. We
- do not need to care about
- the data source. */
+ /*.ioctl = ,*/ /* Perform function on the
+ mounted filesystem. */
+ .mmap = generic_file_mmap, /* Mmap file. */
+ .open = ntfs_file_open, /* Open file. */
+ .splice_read = generic_file_splice_read /* Zero-copy data send with
+ the data source being on
+ the ntfs partition. We do
+ not need to care about the
+ data destination. */
+ /*.sendpage = ,*/ /* Zero-copy data send with
+ the data destination being
+ on the ntfs partition. We
+ do not need to care about
+ the data source. */
};
-struct inode_operations ntfs_file_inode_ops = {
+const struct inode_operations ntfs_file_inode_ops = {
#ifdef NTFS_RW
- .truncate = ntfs_truncate_vfs,
.setattr = ntfs_setattr,
#endif /* NTFS_RW */
};
-struct file_operations ntfs_empty_file_ops = {};
+const struct file_operations ntfs_empty_file_ops = {};
-struct inode_operations ntfs_empty_inode_ops = {};
+const struct inode_operations ntfs_empty_inode_ops = {};
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index 11fd5307d78..096c135691a 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c
@@ -19,6 +19,8 @@
* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <linux/slab.h>
+
#include "aops.h"
#include "collate.h"
#include "debug.h"
@@ -32,13 +34,13 @@
* Allocate a new index context, initialize it with @idx_ni and return it.
* Return NULL if allocation failed.
*
- * Locking: Caller must hold i_sem on the index inode.
+ * Locking: Caller must hold i_mutex on the index inode.
*/
ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni)
{
ntfs_index_context *ictx;
- ictx = kmem_cache_alloc(ntfs_index_ctx_cache, SLAB_NOFS);
+ ictx = kmem_cache_alloc(ntfs_index_ctx_cache, GFP_NOFS);
if (ictx)
*ictx = (ntfs_index_context){ .idx_ni = idx_ni };
return ictx;
@@ -50,7 +52,7 @@ ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni)
*
* Release the index context @ictx, releasing all associated resources.
*
- * Locking: Caller must hold i_sem on the index inode.
+ * Locking: Caller must hold i_mutex on the index inode.
*/
void ntfs_index_ctx_put(ntfs_index_context *ictx)
{
@@ -106,7 +108,7 @@ void ntfs_index_ctx_put(ntfs_index_context *ictx)
* or ntfs_index_entry_write() before the call to ntfs_index_ctx_put() to
* ensure that the changes are written to disk.
*
- * Locking: - Caller must hold i_sem on the index inode.
+ * Locking: - Caller must hold i_mutex on the index inode.
* - Each page cache page in the index allocation mapping must be
* locked whilst being accessed otherwise we may find a corrupt
* page due to it being under ->writepage at the moment which
@@ -204,7 +206,8 @@ int ntfs_index_lookup(const void *key, const int key_len,
if ((key_len == le16_to_cpu(ie->key_length)) && !memcmp(key,
&ie->key, key_len)) {
ir_done:
- ictx->is_in_root = TRUE;
+ ictx->is_in_root = true;
+ ictx->ir = ir;
ictx->actx = actx;
ictx->base_ni = base_ni;
ictx->ia = NULL;
@@ -373,7 +376,7 @@ fast_descend_into_child_node:
if ((key_len == le16_to_cpu(ie->key_length)) && !memcmp(key,
&ie->key, key_len)) {
ia_done:
- ictx->is_in_root = FALSE;
+ ictx->is_in_root = false;
ictx->actx = NULL;
ictx->base_ni = NULL;
ictx->ia = ia;
diff --git a/fs/ntfs/index.h b/fs/ntfs/index.h
index 846a489e869..8745469c398 100644
--- a/fs/ntfs/index.h
+++ b/fs/ntfs/index.h
@@ -37,12 +37,12 @@
* @entry: index entry (points into @ir or @ia)
* @data: index entry data (points into @entry)
* @data_len: length in bytes of @data
- * @is_in_root: TRUE if @entry is in @ir and FALSE if it is in @ia
+ * @is_in_root: 'true' if @entry is in @ir and 'false' if it is in @ia
* @ir: index root if @is_in_root and NULL otherwise
* @actx: attribute search context if @is_in_root and NULL otherwise
* @base_ni: base inode if @is_in_root and NULL otherwise
- * @ia: index block if @is_in_root is FALSE and NULL otherwise
- * @page: page if @is_in_root is FALSE and NULL otherwise
+ * @ia: index block if @is_in_root is 'false' and NULL otherwise
+ * @page: page if @is_in_root is 'false' and NULL otherwise
*
* @idx_ni is the index inode this context belongs to.
*
@@ -50,11 +50,11 @@
* are the index entry data and its length in bytes, respectively. @data
* simply points into @entry. This is probably what the user is interested in.
*
- * If @is_in_root is TRUE, @entry is in the index root attribute @ir described
+ * If @is_in_root is 'true', @entry is in the index root attribute @ir described
* by the attribute search context @actx and the base inode @base_ni. @ia and
* @page are NULL in this case.
*
- * If @is_in_root is FALSE, @entry is in the index allocation attribute and @ia
+ * If @is_in_root is 'false', @entry is in the index allocation attribute and @ia
* and @page point to the index allocation block and the mapped, locked page it
* is in, respectively. @ir, @actx and @base_ni are NULL in this case.
*
@@ -77,7 +77,7 @@ typedef struct {
INDEX_ENTRY *entry;
void *data;
u16 data_len;
- BOOL is_in_root;
+ bool is_in_root;
INDEX_ROOT *ir;
ntfs_attr_search_ctx *actx;
ntfs_inode *base_ni;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 886214a77f9..f47af5e6e23 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1,7 +1,7 @@
/**
* inode.c - NTFS kernel inode handling. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2005 Anton Altaparmakov
+ * Copyright (c) 2001-2007 Anton Altaparmakov
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
@@ -19,17 +19,24 @@
* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#include <linux/pagemap.h>
#include <linux/buffer_head.h>
-#include <linux/smp_lock.h>
-#include <linux/quotaops.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
#include <linux/mount.h>
+#include <linux/mutex.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/slab.h>
+#include <linux/log2.h>
+#include <linux/aio.h>
#include "aops.h"
+#include "attrib.h"
+#include "bitmap.h"
#include "dir.h"
#include "debug.h"
#include "inode.h"
-#include "attrib.h"
+#include "lcnalloc.h"
#include "malloc.h"
#include "mft.h"
#include "time.h"
@@ -48,7 +55,7 @@
*
* Return 1 if the attributes match and 0 if not.
*
- * NOTE: This function runs with the inode_lock spin lock held so it is not
+ * NOTE: This function runs with the inode_hash_lock spin lock held so it is not
* allowed to sleep.
*/
int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
@@ -88,11 +95,11 @@ int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
* If initializing the normal file/directory inode, set @na->type to AT_UNUSED.
* In that case, @na->name and @na->name_len should be set to NULL and 0,
* respectively. Although that is not strictly necessary as
- * ntfs_read_inode_locked() will fill them in later.
+ * ntfs_read_locked_inode() will fill them in later.
*
* Return 0 on success and -errno on error.
*
- * NOTE: This function runs with the inode_lock spin lock held so it is not
+ * NOTE: This function runs with the inode->i_lock spin lock held so it is not
* allowed to sleep. (Hence the GFP_ATOMIC allocation.)
*/
static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na)
@@ -130,11 +137,11 @@ static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na)
BUG_ON(!na->name);
i = na->name_len * sizeof(ntfschar);
- ni->name = (ntfschar*)kmalloc(i + sizeof(ntfschar), GFP_ATOMIC);
+ ni->name = kmalloc(i + sizeof(ntfschar), GFP_ATOMIC);
if (!ni->name)
return -ENOMEM;
memcpy(ni->name, na->name, i);
- ni->name[i] = 0;
+ ni->name[na->name_len] = 0;
}
return 0;
}
@@ -164,8 +171,8 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi,
struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no)
{
struct inode *vi;
- ntfs_attr na;
int err;
+ ntfs_attr na;
na.mft_no = mft_no;
na.type = AT_UNUSED;
@@ -222,8 +229,8 @@ struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPE type,
ntfschar *name, u32 name_len)
{
struct inode *vi;
- ntfs_attr na;
int err;
+ ntfs_attr na;
/* Make sure no one calls ntfs_attr_iget() for indices. */
BUG_ON(type == AT_INDEX_ALLOCATION);
@@ -280,8 +287,8 @@ struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name,
u32 name_len)
{
struct inode *vi;
- ntfs_attr na;
int err;
+ ntfs_attr na;
na.mft_no = base_vi->i_ino;
na.type = AT_INDEX_ALLOCATION;
@@ -317,7 +324,7 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
ntfs_inode *ni;
ntfs_debug("Entering.");
- ni = kmem_cache_alloc(ntfs_big_inode_cache, SLAB_NOFS);
+ ni = kmem_cache_alloc(ntfs_big_inode_cache, GFP_NOFS);
if (likely(ni != NULL)) {
ni->state = 0;
return VFS_I(ni);
@@ -326,6 +333,12 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
return NULL;
}
+static void ntfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
+}
+
void ntfs_destroy_big_inode(struct inode *inode)
{
ntfs_inode *ni = NTFS_I(inode);
@@ -334,7 +347,7 @@ void ntfs_destroy_big_inode(struct inode *inode)
BUG_ON(ni->page);
if (!atomic_dec_and_test(&ni->count))
BUG();
- kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
+ call_rcu(&inode->i_rcu, ntfs_i_callback);
}
static inline ntfs_inode *ntfs_alloc_extent_inode(void)
@@ -342,7 +355,7 @@ static inline ntfs_inode *ntfs_alloc_extent_inode(void)
ntfs_inode *ni;
ntfs_debug("Entering.");
- ni = kmem_cache_alloc(ntfs_inode_cache, SLAB_NOFS);
+ ni = kmem_cache_alloc(ntfs_inode_cache, GFP_NOFS);
if (likely(ni != NULL)) {
ni->state = 0;
return ni;
@@ -360,6 +373,12 @@ static void ntfs_destroy_extent_inode(ntfs_inode *ni)
kmem_cache_free(ntfs_inode_cache, ni);
}
+/*
+ * The attribute runlist lock has separate locking rules from the
+ * normal runlist lock, so split the two lock-classes:
+ */
+static struct lock_class_key attr_list_rl_lock_class;
+
/**
* __ntfs_init_inode - initialize ntfs specific part of an inode
* @sb: super block of mounted volume
@@ -381,23 +400,31 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
atomic_set(&ni->count, 1);
ni->vol = NTFS_SB(sb);
ntfs_init_runlist(&ni->runlist);
- init_MUTEX(&ni->mrec_lock);
+ mutex_init(&ni->mrec_lock);
ni->page = NULL;
ni->page_ofs = 0;
ni->attr_list_size = 0;
ni->attr_list = NULL;
ntfs_init_runlist(&ni->attr_list_rl);
- ni->itype.index.bmp_ino = NULL;
+ lockdep_set_class(&ni->attr_list_rl.lock,
+ &attr_list_rl_lock_class);
ni->itype.index.block_size = 0;
ni->itype.index.vcn_size = 0;
ni->itype.index.collation_rule = 0;
ni->itype.index.block_size_bits = 0;
ni->itype.index.vcn_size_bits = 0;
- init_MUTEX(&ni->extent_lock);
+ mutex_init(&ni->extent_lock);
ni->nr_extents = 0;
ni->ext.base_ntfs_ino = NULL;
}
+/*
+ * Extent inodes get MFT-mapped in a nested way, while the base inode
+ * is still mapped. Teach this nesting to the lock validator by creating
+ * a separate class for nested inode's mrec_lock's:
+ */
+static struct lock_class_key extent_inode_mrec_lock_key;
+
inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
unsigned long mft_no)
{
@@ -406,6 +433,7 @@ inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
ntfs_debug("Entering.");
if (likely(ni != NULL)) {
__ntfs_init_inode(sb, ni);
+ lockdep_set_class(&ni->mrec_lock, &extent_inode_mrec_lock_key);
ni->mft_no = mft_no;
ni->type = AT_UNUSED;
ni->name = NULL;
@@ -509,7 +537,7 @@ err_corrupt_attr:
* the ntfs inode.
*
* Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
* i_count is set to 1, so it is not going to go away
* i_flags is set to 0 and we have no business touching it. Only an ioctl()
* is allowed to write to them. We should of course be honouring them but
@@ -523,6 +551,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
{
ntfs_volume *vol = NTFS_SB(vi->i_sb);
ntfs_inode *ni;
+ struct inode *bvi;
MFT_RECORD *m;
ATTR_RECORD *a;
STANDARD_INFORMATION *si;
@@ -533,8 +562,6 @@ static int ntfs_read_locked_inode(struct inode *vi)
/* Setup the generic vfs inode parts now. */
- /* This is the optimal IO size (for stat), not the fs block size. */
- vi->i_blksize = PAGE_CACHE_SIZE;
/*
* This is for checking whether an inode has changed w.r.t. a file so
* that the file can be updated if necessary (compare with f_version).
@@ -585,7 +612,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
* might be tricky due to vfs interactions. Need to think about this
* some more when implementing the unlink command.
*/
- vi->i_nlink = le16_to_cpu(m->link_count);
+ set_nlink(vi, le16_to_cpu(m->link_count));
/*
* FIXME: Reparse points can have the directory bit set even though
* they would be S_IFLNK. Need to deal with this further below when we
@@ -595,7 +622,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
*/
/* Everyone gets all permissions. */
vi->i_mode |= S_IRWXUGO;
- /* If read-only, noone gets write permissions. */
+ /* If read-only, no one gets write permissions. */
if (IS_RDONLY(vi))
vi->i_mode &= ~S_IWUGO;
if (m->flags & MFT_RECORD_IS_DIRECTORY) {
@@ -607,7 +634,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
vi->i_mode &= ~vol->dmask;
/* Things break without this kludge! */
if (vi->i_nlink > 1)
- vi->i_nlink = 1;
+ set_nlink(vi, 1);
} else {
vi->i_mode |= S_IFREG;
/* Apply the file permissions mask set in the mount options. */
@@ -676,13 +703,28 @@ static int ntfs_read_locked_inode(struct inode *vi)
ntfs_debug("Attribute list found in inode 0x%lx.", vi->i_ino);
NInoSetAttrList(ni);
a = ctx->attr;
- if (a->flags & ATTR_IS_ENCRYPTED ||
- a->flags & ATTR_COMPRESSION_MASK ||
- a->flags & ATTR_IS_SPARSE) {
+ if (a->flags & ATTR_COMPRESSION_MASK) {
ntfs_error(vi->i_sb, "Attribute list attribute is "
- "compressed/encrypted/sparse.");
+ "compressed.");
goto unm_err_out;
}
+ if (a->flags & ATTR_IS_ENCRYPTED ||
+ a->flags & ATTR_IS_SPARSE) {
+ if (a->non_resident) {
+ ntfs_error(vi->i_sb, "Non-resident attribute "
+ "list attribute is encrypted/"
+ "sparse.");
+ goto unm_err_out;
+ }
+ ntfs_warning(vi->i_sb, "Resident attribute list "
+ "attribute in inode 0x%lx is marked "
+ "encrypted/sparse which is not true. "
+ "However, Windows allows this and "
+ "chkdsk does not detect or correct it "
+ "so we will just ignore the invalid "
+ "flags and pretend they are not set.",
+ vi->i_ino);
+ }
/* Now allocate memory for the attribute list. */
ni->attr_list_size = (u32)ntfs_attr_size(a);
ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
@@ -744,7 +786,6 @@ skip_attr_list_load:
*/
if (S_ISDIR(vi->i_mode)) {
loff_t bvi_size;
- struct inode *bvi;
ntfs_inode *bni;
INDEX_ROOT *ir;
u8 *ir_end, *index_end;
@@ -949,13 +990,12 @@ skip_attr_list_load:
err = PTR_ERR(bvi);
goto unm_err_out;
}
- ni->itype.index.bmp_ino = bvi;
bni = NTFS_I(bvi);
if (NInoCompressed(bni) || NInoEncrypted(bni) ||
NInoSparse(bni)) {
ntfs_error(vi->i_sb, "$BITMAP attribute is compressed "
"and/or encrypted and/or sparse.");
- goto unm_err_out;
+ goto iput_unm_err_out;
}
/* Consistency check bitmap size vs. index allocation size. */
bvi_size = i_size_read(bvi);
@@ -964,8 +1004,10 @@ skip_attr_list_load:
ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) "
"for index allocation (0x%llx).",
bvi_size << 3, vi->i_size);
- goto unm_err_out;
+ goto iput_unm_err_out;
}
+ /* No longer need the bitmap attribute inode. */
+ iput(bvi);
skip_large_dir_stuff:
/* Setup the operations for this inode. */
vi->i_op = &ntfs_dir_inode_ops;
@@ -1013,66 +1055,77 @@ skip_large_dir_stuff:
}
a = ctx->attr;
/* Setup the state. */
- if (a->non_resident) {
- NInoSetNonResident(ni);
- if (a->flags & (ATTR_COMPRESSION_MASK |
- ATTR_IS_SPARSE)) {
- if (a->flags & ATTR_COMPRESSION_MASK) {
- NInoSetCompressed(ni);
- if (vol->cluster_size > 4096) {
- ntfs_error(vi->i_sb, "Found "
+ if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
+ if (a->flags & ATTR_COMPRESSION_MASK) {
+ NInoSetCompressed(ni);
+ if (vol->cluster_size > 4096) {
+ ntfs_error(vi->i_sb, "Found "
"compressed data but "
"compression is "
"disabled due to "
"cluster size (%i) > "
"4kiB.",
vol->cluster_size);
- goto unm_err_out;
- }
- if ((a->flags & ATTR_COMPRESSION_MASK)
- != ATTR_IS_COMPRESSED) {
- ntfs_error(vi->i_sb, "Found "
- "unknown compression "
- "method or corrupt "
- "file.");
- goto unm_err_out;
- }
+ goto unm_err_out;
}
- if (a->flags & ATTR_IS_SPARSE)
- NInoSetSparse(ni);
- if (a->data.non_resident.compression_unit !=
- 4) {
+ if ((a->flags & ATTR_COMPRESSION_MASK)
+ != ATTR_IS_COMPRESSED) {
+ ntfs_error(vi->i_sb, "Found unknown "
+ "compression method "
+ "or corrupt file.");
+ goto unm_err_out;
+ }
+ }
+ if (a->flags & ATTR_IS_SPARSE)
+ NInoSetSparse(ni);
+ }
+ if (a->flags & ATTR_IS_ENCRYPTED) {
+ if (NInoCompressed(ni)) {
+ ntfs_error(vi->i_sb, "Found encrypted and "
+ "compressed data.");
+ goto unm_err_out;
+ }
+ NInoSetEncrypted(ni);
+ }
+ if (a->non_resident) {
+ NInoSetNonResident(ni);
+ if (NInoCompressed(ni) || NInoSparse(ni)) {
+ if (NInoCompressed(ni) && a->data.non_resident.
+ compression_unit != 4) {
ntfs_error(vi->i_sb, "Found "
- "nonstandard compression unit "
- "(%u instead of 4). Cannot "
- "handle this.",
- a->data.non_resident.
- compression_unit);
+ "non-standard "
+ "compression unit (%u "
+ "instead of 4). "
+ "Cannot handle this.",
+ a->data.non_resident.
+ compression_unit);
err = -EOPNOTSUPP;
goto unm_err_out;
}
- ni->itype.compressed.block_clusters = 1U <<
- a->data.non_resident.
- compression_unit;
- ni->itype.compressed.block_size = 1U << (
- a->data.non_resident.
- compression_unit +
- vol->cluster_size_bits);
- ni->itype.compressed.block_size_bits = ffs(
- ni->itype.compressed.
- block_size) - 1;
+ if (a->data.non_resident.compression_unit) {
+ ni->itype.compressed.block_size = 1U <<
+ (a->data.non_resident.
+ compression_unit +
+ vol->cluster_size_bits);
+ ni->itype.compressed.block_size_bits =
+ ffs(ni->itype.
+ compressed.
+ block_size) - 1;
+ ni->itype.compressed.block_clusters =
+ 1U << a->data.
+ non_resident.
+ compression_unit;
+ } else {
+ ni->itype.compressed.block_size = 0;
+ ni->itype.compressed.block_size_bits =
+ 0;
+ ni->itype.compressed.block_clusters =
+ 0;
+ }
ni->itype.compressed.size = sle64_to_cpu(
a->data.non_resident.
compressed_size);
}
- if (a->flags & ATTR_IS_ENCRYPTED) {
- if (a->flags & ATTR_COMPRESSION_MASK) {
- ntfs_error(vi->i_sb, "Found encrypted "
- "and compressed data.");
- goto unm_err_out;
- }
- NInoSetEncrypted(ni);
- }
if (a->data.non_resident.lowest_vcn) {
ntfs_error(vi->i_sb, "First extent of $DATA "
"attribute has non zero "
@@ -1129,7 +1182,8 @@ no_data_attr_special_case:
vi->i_blocks = ni->allocated_size >> 9;
ntfs_debug("Done.");
return 0;
-
+iput_unm_err_out:
+ iput(bvi);
unm_err_out:
if (!err)
err = -EIO;
@@ -1160,11 +1214,13 @@ err_out:
* necessary fields in @vi as well as initializing the ntfs inode.
*
* Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
* i_count is set to 1, so it is not going to go away
*
* Return 0 on success and -errno on error. In the error case, the inode will
* have had make_bad_inode() executed on it.
+ *
+ * Note this cannot be called for AT_INDEX_ALLOCATION.
*/
static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
{
@@ -1183,11 +1239,10 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
base_ni = NTFS_I(base_vi);
/* Just mirror the values from the base inode. */
- vi->i_blksize = base_vi->i_blksize;
vi->i_version = base_vi->i_version;
vi->i_uid = base_vi->i_uid;
vi->i_gid = base_vi->i_gid;
- vi->i_nlink = base_vi->i_nlink;
+ set_nlink(vi, base_vi->i_nlink);
vi->i_mtime = base_vi->i_mtime;
vi->i_ctime = base_vi->i_ctime;
vi->i_atime = base_vi->i_atime;
@@ -1212,6 +1267,75 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
if (unlikely(err))
goto unm_err_out;
a = ctx->attr;
+ if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
+ if (a->flags & ATTR_COMPRESSION_MASK) {
+ NInoSetCompressed(ni);
+ if ((ni->type != AT_DATA) || (ni->type == AT_DATA &&
+ ni->name_len)) {
+ ntfs_error(vi->i_sb, "Found compressed "
+ "non-data or named data "
+ "attribute. Please report "
+ "you saw this message to "
+ "linux-ntfs-dev@lists."
+ "sourceforge.net");
+ goto unm_err_out;
+ }
+ if (vol->cluster_size > 4096) {
+ ntfs_error(vi->i_sb, "Found compressed "
+ "attribute but compression is "
+ "disabled due to cluster size "
+ "(%i) > 4kiB.",
+ vol->cluster_size);
+ goto unm_err_out;
+ }
+ if ((a->flags & ATTR_COMPRESSION_MASK) !=
+ ATTR_IS_COMPRESSED) {
+ ntfs_error(vi->i_sb, "Found unknown "
+ "compression method.");
+ goto unm_err_out;
+ }
+ }
+ /*
+ * The compressed/sparse flag set in an index root just means
+ * to compress all files.
+ */
+ if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
+ ntfs_error(vi->i_sb, "Found mst protected attribute "
+ "but the attribute is %s. Please "
+ "report you saw this message to "
+ "linux-ntfs-dev@lists.sourceforge.net",
+ NInoCompressed(ni) ? "compressed" :
+ "sparse");
+ goto unm_err_out;
+ }
+ if (a->flags & ATTR_IS_SPARSE)
+ NInoSetSparse(ni);
+ }
+ if (a->flags & ATTR_IS_ENCRYPTED) {
+ if (NInoCompressed(ni)) {
+ ntfs_error(vi->i_sb, "Found encrypted and compressed "
+ "data.");
+ goto unm_err_out;
+ }
+ /*
+ * The encryption flag set in an index root just means to
+ * encrypt all files.
+ */
+ if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
+ ntfs_error(vi->i_sb, "Found mst protected attribute "
+ "but the attribute is encrypted. "
+ "Please report you saw this message "
+ "to linux-ntfs-dev@lists.sourceforge."
+ "net");
+ goto unm_err_out;
+ }
+ if (ni->type != AT_DATA) {
+ ntfs_error(vi->i_sb, "Found encrypted non-data "
+ "attribute.");
+ goto unm_err_out;
+ }
+ NInoSetEncrypted(ni);
+ }
if (!a->non_resident) {
/* Ensure the attribute name is placed before the value. */
if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
@@ -1220,11 +1344,10 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
"the attribute value.");
goto unm_err_out;
}
- if (NInoMstProtected(ni) || a->flags) {
+ if (NInoMstProtected(ni)) {
ntfs_error(vi->i_sb, "Found mst protected attribute "
- "or attribute with non-zero flags but "
- "the attribute is resident. Please "
- "report you saw this message to "
+ "but the attribute is resident. "
+ "Please report you saw this message to "
"linux-ntfs-dev@lists.sourceforge.net");
goto unm_err_out;
}
@@ -1250,52 +1373,10 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
"the mapping pairs array.");
goto unm_err_out;
}
- if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
- if (a->flags & ATTR_COMPRESSION_MASK) {
- NInoSetCompressed(ni);
- if ((ni->type != AT_DATA) || (ni->type ==
- AT_DATA && ni->name_len)) {
- ntfs_error(vi->i_sb, "Found compressed "
- "non-data or named "
- "data attribute. "
- "Please report you "
- "saw this message to "
- "linux-ntfs-dev@lists."
- "sourceforge.net");
- goto unm_err_out;
- }
- if (vol->cluster_size > 4096) {
- ntfs_error(vi->i_sb, "Found compressed "
- "attribute but "
- "compression is "
- "disabled due to "
- "cluster size (%i) > "
- "4kiB.",
- vol->cluster_size);
- goto unm_err_out;
- }
- if ((a->flags & ATTR_COMPRESSION_MASK) !=
- ATTR_IS_COMPRESSED) {
- ntfs_error(vi->i_sb, "Found unknown "
- "compression method.");
- goto unm_err_out;
- }
- }
- if (NInoMstProtected(ni)) {
- ntfs_error(vi->i_sb, "Found mst protected "
- "attribute but the attribute "
- "is %s. Please report you "
- "saw this message to "
- "linux-ntfs-dev@lists."
- "sourceforge.net",
- NInoCompressed(ni) ?
- "compressed" : "sparse");
- goto unm_err_out;
- }
- if (a->flags & ATTR_IS_SPARSE)
- NInoSetSparse(ni);
- if (a->data.non_resident.compression_unit != 4) {
- ntfs_error(vi->i_sb, "Found nonstandard "
+ if (NInoCompressed(ni) || NInoSparse(ni)) {
+ if (NInoCompressed(ni) && a->data.non_resident.
+ compression_unit != 4) {
+ ntfs_error(vi->i_sb, "Found non-standard "
"compression unit (%u instead "
"of 4). Cannot handle this.",
a->data.non_resident.
@@ -1303,33 +1384,25 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
err = -EOPNOTSUPP;
goto unm_err_out;
}
- ni->itype.compressed.block_clusters = 1U <<
- a->data.non_resident.compression_unit;
- ni->itype.compressed.block_size = 1U << (
- a->data.non_resident.compression_unit +
- vol->cluster_size_bits);
- ni->itype.compressed.block_size_bits = ffs(
- ni->itype.compressed.block_size) - 1;
+ if (a->data.non_resident.compression_unit) {
+ ni->itype.compressed.block_size = 1U <<
+ (a->data.non_resident.
+ compression_unit +
+ vol->cluster_size_bits);
+ ni->itype.compressed.block_size_bits =
+ ffs(ni->itype.compressed.
+ block_size) - 1;
+ ni->itype.compressed.block_clusters = 1U <<
+ a->data.non_resident.
+ compression_unit;
+ } else {
+ ni->itype.compressed.block_size = 0;
+ ni->itype.compressed.block_size_bits = 0;
+ ni->itype.compressed.block_clusters = 0;
+ }
ni->itype.compressed.size = sle64_to_cpu(
a->data.non_resident.compressed_size);
}
- if (a->flags & ATTR_IS_ENCRYPTED) {
- if (a->flags & ATTR_COMPRESSION_MASK) {
- ntfs_error(vi->i_sb, "Found encrypted and "
- "compressed data.");
- goto unm_err_out;
- }
- if (NInoMstProtected(ni)) {
- ntfs_error(vi->i_sb, "Found mst protected "
- "attribute but the attribute "
- "is encrypted. Please report "
- "you saw this message to "
- "linux-ntfs-dev@lists."
- "sourceforge.net");
- goto unm_err_out;
- }
- NInoSetEncrypted(ni);
- }
if (a->data.non_resident.lowest_vcn) {
ntfs_error(vi->i_sb, "First extent of attribute has "
"non-zero lowest_vcn.");
@@ -1341,19 +1414,16 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
ni->allocated_size = sle64_to_cpu(
a->data.non_resident.allocated_size);
}
- /* Setup the operations for this attribute inode. */
- vi->i_op = NULL;
- vi->i_fop = NULL;
if (NInoMstProtected(ni))
vi->i_mapping->a_ops = &ntfs_mst_aops;
else
vi->i_mapping->a_ops = &ntfs_aops;
- if (NInoCompressed(ni) || NInoSparse(ni))
+ if ((NInoCompressed(ni) || NInoSparse(ni)) && ni->type != AT_INDEX_ROOT)
vi->i_blocks = ni->itype.compressed.size >> 9;
else
vi->i_blocks = ni->allocated_size >> 9;
/*
- * Make sure the base inode doesn't go away and attach it to the
+ * Make sure the base inode does not go away and attach it to the
* attribute inode.
*/
igrab(base_vi);
@@ -1379,7 +1449,6 @@ err_out:
"Run chkdsk.", err, vi->i_ino, ni->type, ni->name_len,
base_vi->i_ino);
make_bad_inode(vi);
- make_bad_inode(base_vi);
if (err != -ENOMEM)
NVolSetErrors(vol);
return err;
@@ -1412,7 +1481,7 @@ err_out:
* normal directory inodes.
*
* Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
* i_count is set to 1, so it is not going to go away
*
* Return 0 on success and -errno on error. In the error case, the inode will
@@ -1436,11 +1505,10 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
ni = NTFS_I(vi);
base_ni = NTFS_I(base_vi);
/* Just mirror the values from the base inode. */
- vi->i_blksize = base_vi->i_blksize;
vi->i_version = base_vi->i_version;
vi->i_uid = base_vi->i_uid;
vi->i_gid = base_vi->i_gid;
- vi->i_nlink = base_vi->i_nlink;
+ set_nlink(vi, base_vi->i_nlink);
vi->i_mtime = base_vi->i_mtime;
vi->i_ctime = base_vi->i_ctime;
vi->i_atime = base_vi->i_atime;
@@ -1480,7 +1548,10 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
"after the attribute value.");
goto unm_err_out;
}
- /* Compressed/encrypted/sparse index root is not allowed. */
+ /*
+ * Compressed/encrypted/sparse index root is not allowed, except for
+ * directories of course but those are not dealt with here.
+ */
if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED |
ATTR_IS_SPARSE)) {
ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index "
@@ -1507,7 +1578,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
ntfs_debug("Index collation rule is 0x%x.",
le32_to_cpu(ir->collation_rule));
ni->itype.index.block_size = le32_to_cpu(ir->index_block_size);
- if (ni->itype.index.block_size & (ni->itype.index.block_size - 1)) {
+ if (!is_power_of_2(ni->itype.index.block_size)) {
ntfs_error(vi->i_sb, "Index block size (%u) is not a power of "
"two.", ni->itype.index.block_size);
goto unm_err_out;
@@ -1561,6 +1632,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
"$INDEX_ALLOCATION attribute.");
goto unm_err_out;
}
+ a = ctx->attr;
if (!a->non_resident) {
ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
"resident.");
@@ -1629,11 +1701,9 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
vi->i_size);
goto iput_unm_err_out;
}
- ni->itype.index.bmp_ino = bvi;
+ iput(bvi);
skip_large_index_stuff:
/* Setup the operations for this index inode. */
- vi->i_op = NULL;
- vi->i_fop = NULL;
vi->i_mapping->a_ops = &ntfs_mst_aops;
vi->i_blocks = ni->allocated_size >> 9;
/*
@@ -1646,7 +1716,6 @@ skip_large_index_stuff:
ntfs_debug("Done.");
return 0;
-
iput_unm_err_out:
iput(bvi);
unm_err_out:
@@ -1666,6 +1735,15 @@ err_out:
return err;
}
+/*
+ * The MFT inode has special locking, so teach the lock validator
+ * about this by splitting off the locking rules of the MFT from
+ * the locking rules of other inodes. The MFT inode can never be
+ * accessed from the VFS side (or even internally), only by the
+ * map_mft functions.
+ */
+static struct lock_class_key mft_ni_runlist_lock_key, mft_ni_mrec_lock_key;
+
/**
* ntfs_read_inode_mount - special read_inode for mount time use only
* @vi: inode to read
@@ -1794,19 +1872,33 @@ int ntfs_read_inode_mount(struct inode *vi)
} else /* if (!err) */ {
ATTR_LIST_ENTRY *al_entry, *next_al_entry;
u8 *al_end;
+ static const char *es = " Not allowed. $MFT is corrupt. "
+ "You should run chkdsk.";
ntfs_debug("Attribute list attribute found in $MFT.");
NInoSetAttrList(ni);
a = ctx->attr;
- if (a->flags & ATTR_IS_ENCRYPTED ||
- a->flags & ATTR_COMPRESSION_MASK ||
- a->flags & ATTR_IS_SPARSE) {
+ if (a->flags & ATTR_COMPRESSION_MASK) {
ntfs_error(sb, "Attribute list attribute is "
- "compressed/encrypted/sparse. Not "
- "allowed. $MFT is corrupt. You should "
- "run chkdsk.");
+ "compressed.%s", es);
goto put_err_out;
}
+ if (a->flags & ATTR_IS_ENCRYPTED ||
+ a->flags & ATTR_IS_SPARSE) {
+ if (a->non_resident) {
+ ntfs_error(sb, "Non-resident attribute list "
+ "attribute is encrypted/"
+ "sparse.%s", es);
+ goto put_err_out;
+ }
+ ntfs_warning(sb, "Resident attribute list attribute "
+ "in $MFT system file is marked "
+ "encrypted/sparse which is not true. "
+ "However, Windows allows this and "
+ "chkdsk does not detect or correct it "
+ "so we will just ignore the invalid "
+ "flags and pretend they are not set.");
+ }
/* Now allocate memory for the attribute list. */
ni->attr_list_size = (u32)ntfs_attr_size(a);
ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
@@ -1889,8 +1981,7 @@ int ntfs_read_inode_mount(struct inode *vi)
goto em_put_err_out;
next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry +
le16_to_cpu(al_entry->length));
- if (le32_to_cpu(al_entry->type) >
- const_le32_to_cpu(AT_DATA))
+ if (le32_to_cpu(al_entry->type) > le32_to_cpu(AT_DATA))
goto em_put_err_out;
if (AT_DATA != al_entry->type)
continue;
@@ -2032,7 +2123,8 @@ int ntfs_read_inode_mount(struct inode *vi)
* ntfs_read_inode() will have set up the default ones.
*/
/* Set uid and gid to root. */
- vi->i_uid = vi->i_gid = 0;
+ vi->i_uid = GLOBAL_ROOT_UID;
+ vi->i_gid = GLOBAL_ROOT_GID;
/* Regular file. No access for anyone. */
vi->i_mode = S_IFREG;
/* No VFS initiated operations allowed for $MFT. */
@@ -2078,6 +2170,14 @@ int ntfs_read_inode_mount(struct inode *vi)
ntfs_attr_put_search_ctx(ctx);
ntfs_debug("Done.");
ntfs_free(m);
+
+ /*
+ * Split the locking rules of the MFT inode from the
+ * locking rules of other inodes:
+ */
+ lockdep_set_class(&ni->runlist.lock, &mft_ni_runlist_lock_key);
+ lockdep_set_class(&ni->mrec_lock, &mft_ni_mrec_lock_key);
+
return 0;
em_put_err_out:
@@ -2092,37 +2192,6 @@ err_out:
return -1;
}
-/**
- * ntfs_put_inode - handler for when the inode reference count is decremented
- * @vi: vfs inode
- *
- * The VFS calls ntfs_put_inode() every time the inode reference count (i_count)
- * is about to be decremented (but before the decrement itself.
- *
- * If the inode @vi is a directory with two references, one of which is being
- * dropped, we need to put the attribute inode for the directory index bitmap,
- * if it is present, otherwise the directory inode would remain pinned for
- * ever.
- */
-void ntfs_put_inode(struct inode *vi)
-{
- if (S_ISDIR(vi->i_mode) && atomic_read(&vi->i_count) == 2) {
- ntfs_inode *ni = NTFS_I(vi);
- if (NInoIndexAllocPresent(ni)) {
- struct inode *bvi = NULL;
- down(&vi->i_sem);
- if (atomic_read(&vi->i_count) == 2) {
- bvi = ni->itype.index.bmp_ino;
- if (bvi)
- ni->itype.index.bmp_ino = NULL;
- }
- up(&vi->i_sem);
- if (bvi)
- iput(bvi);
- }
- }
-}
-
static void __ntfs_clear_inode(ntfs_inode *ni)
{
/* Free all alocated memory. */
@@ -2175,7 +2244,7 @@ void ntfs_clear_extent_inode(ntfs_inode *ni)
}
/**
- * ntfs_clear_big_inode - clean up the ntfs specific part of an inode
+ * ntfs_evict_big_inode - clean up the ntfs specific part of an inode
* @vi: vfs inode pending annihilation
*
* When the VFS is going to remove an inode from memory, ntfs_clear_big_inode()
@@ -2184,25 +2253,16 @@ void ntfs_clear_extent_inode(ntfs_inode *ni)
*
* If the MFT record is dirty, we commit it before doing anything else.
*/
-void ntfs_clear_big_inode(struct inode *vi)
+void ntfs_evict_big_inode(struct inode *vi)
{
ntfs_inode *ni = NTFS_I(vi);
- /*
- * If the inode @vi is an index inode we need to put the attribute
- * inode for the index bitmap, if it is present, otherwise the index
- * inode would disappear and the attribute inode for the index bitmap
- * would no longer be referenced from anywhere and thus it would remain
- * pinned for ever.
- */
- if (NInoAttr(ni) && (ni->type == AT_INDEX_ALLOCATION) &&
- NInoIndexAllocPresent(ni) && ni->itype.index.bmp_ino) {
- iput(ni->itype.index.bmp_ino);
- ni->itype.index.bmp_ino = NULL;
- }
+ truncate_inode_pages_final(&vi->i_data);
+ clear_inode(vi);
+
#ifdef NTFS_RW
if (NInoDirty(ni)) {
- BOOL was_bad = (is_bad_inode(vi));
+ bool was_bad = (is_bad_inode(vi));
/* Committing the inode also commits all extent inodes. */
ntfs_commit_inode(vi);
@@ -2240,20 +2300,20 @@ void ntfs_clear_big_inode(struct inode *vi)
/**
* ntfs_show_options - show mount options in /proc/mounts
* @sf: seq_file in which to write our mount options
- * @mnt: vfs mount whose mount options to display
+ * @root: root of the mounted tree whose mount options to display
*
* Called by the VFS once for each mounted ntfs volume when someone reads
* /proc/mounts in order to display the NTFS specific mount options of each
- * mount. The mount options of the vfs mount @mnt are written to the seq file
+ * mount. The mount options of fs specified by @root are written to the seq file
* @sf and success is returned.
*/
-int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt)
+int ntfs_show_options(struct seq_file *sf, struct dentry *root)
{
- ntfs_volume *vol = NTFS_SB(mnt->mnt_sb);
+ ntfs_volume *vol = NTFS_SB(root->d_sb);
int i;
- seq_printf(sf, ",uid=%i", vol->uid);
- seq_printf(sf, ",gid=%i", vol->gid);
+ seq_printf(sf, ",uid=%i", from_kuid_munged(&init_user_ns, vol->uid));
+ seq_printf(sf, ",gid=%i", from_kgid_munged(&init_user_ns, vol->gid));
if (vol->fmask == vol->dmask)
seq_printf(sf, ",umask=0%o", vol->fmask);
else {
@@ -2277,11 +2337,16 @@ int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt)
#ifdef NTFS_RW
+static const char *es = " Leaving inconsistent metadata. Unmount and run "
+ "chkdsk.";
+
/**
* ntfs_truncate - called when the i_size of an ntfs inode is changed
* @vi: inode for which the i_size was changed
*
- * We do not support i_size changes yet.
+ * We only support i_size changes for normal files at present, i.e. not
+ * compressed and not encrypted. This is enforced in ntfs_setattr(), see
+ * below.
*
* The kernel guarantees that @vi is a regular file (S_ISREG() is true) and
* that the change is allowed.
@@ -2291,81 +2356,505 @@ int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt)
*
* Returns 0 on success or -errno on error.
*
- * Called with ->i_sem held. In all but one case ->i_alloc_sem is held for
- * writing. The only case where ->i_alloc_sem is not held is
- * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called
- * with the current i_size as the offset which means that it is a noop as far
- * as ntfs_truncate() is concerned.
+ * Called with ->i_mutex held.
*/
int ntfs_truncate(struct inode *vi)
{
- ntfs_inode *ni = NTFS_I(vi);
+ s64 new_size, old_size, nr_freed, new_alloc_size, old_alloc_size;
+ VCN highest_vcn;
+ unsigned long flags;
+ ntfs_inode *base_ni, *ni = NTFS_I(vi);
ntfs_volume *vol = ni->vol;
ntfs_attr_search_ctx *ctx;
MFT_RECORD *m;
ATTR_RECORD *a;
const char *te = " Leaving file length out of sync with i_size.";
- int err;
+ int err, mp_size, size_change, alloc_change;
+ u32 attr_len;
ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
BUG_ON(NInoAttr(ni));
+ BUG_ON(S_ISDIR(vi->i_mode));
+ BUG_ON(NInoMstProtected(ni));
BUG_ON(ni->nr_extents < 0);
- m = map_mft_record(ni);
+retry_truncate:
+ /*
+ * Lock the runlist for writing and map the mft record to ensure it is
+ * safe to mess with the attribute runlist and sizes.
+ */
+ down_write(&ni->runlist.lock);
+ if (!NInoAttr(ni))
+ base_ni = ni;
+ else
+ base_ni = ni->ext.base_ntfs_ino;
+ m = map_mft_record(base_ni);
if (IS_ERR(m)) {
err = PTR_ERR(m);
ntfs_error(vi->i_sb, "Failed to map mft record for inode 0x%lx "
"(error code %d).%s", vi->i_ino, err, te);
ctx = NULL;
m = NULL;
- goto err_out;
+ goto old_bad_out;
}
- ctx = ntfs_attr_get_search_ctx(ni, m);
+ ctx = ntfs_attr_get_search_ctx(base_ni, m);
if (unlikely(!ctx)) {
ntfs_error(vi->i_sb, "Failed to allocate a search context for "
"inode 0x%lx (not enough memory).%s",
vi->i_ino, te);
err = -ENOMEM;
- goto err_out;
+ goto old_bad_out;
}
err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
CASE_SENSITIVE, 0, NULL, 0, ctx);
if (unlikely(err)) {
- if (err == -ENOENT)
+ if (err == -ENOENT) {
ntfs_error(vi->i_sb, "Open attribute is missing from "
"mft record. Inode 0x%lx is corrupt. "
- "Run chkdsk.", vi->i_ino);
- else
+ "Run chkdsk.%s", vi->i_ino, te);
+ err = -EIO;
+ } else
ntfs_error(vi->i_sb, "Failed to lookup attribute in "
- "inode 0x%lx (error code %d).",
- vi->i_ino, err);
- goto err_out;
+ "inode 0x%lx (error code %d).%s",
+ vi->i_ino, err, te);
+ goto old_bad_out;
}
+ m = ctx->mrec;
a = ctx->attr;
- /* If the size has not changed there is nothing to do. */
- if (ntfs_attr_size(a) == i_size_read(vi))
- goto done;
- // TODO: Implement the truncate...
- ntfs_error(vi->i_sb, "Inode size has changed but this is not "
- "implemented yet. Resetting inode size to old value. "
- " This is most likely a bug in the ntfs driver!");
- i_size_write(vi, ntfs_attr_size(a));
-done:
+ /*
+ * The i_size of the vfs inode is the new size for the attribute value.
+ */
+ new_size = i_size_read(vi);
+ /* The current size of the attribute value is the old size. */
+ old_size = ntfs_attr_size(a);
+ /* Calculate the new allocated size. */
+ if (NInoNonResident(ni))
+ new_alloc_size = (new_size + vol->cluster_size - 1) &
+ ~(s64)vol->cluster_size_mask;
+ else
+ new_alloc_size = (new_size + 7) & ~7;
+ /* The current allocated size is the old allocated size. */
+ read_lock_irqsave(&ni->size_lock, flags);
+ old_alloc_size = ni->allocated_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ /*
+ * The change in the file size. This will be 0 if no change, >0 if the
+ * size is growing, and <0 if the size is shrinking.
+ */
+ size_change = -1;
+ if (new_size - old_size >= 0) {
+ size_change = 1;
+ if (new_size == old_size)
+ size_change = 0;
+ }
+ /* As above for the allocated size. */
+ alloc_change = -1;
+ if (new_alloc_size - old_alloc_size >= 0) {
+ alloc_change = 1;
+ if (new_alloc_size == old_alloc_size)
+ alloc_change = 0;
+ }
+ /*
+ * If neither the size nor the allocation are being changed there is
+ * nothing to do.
+ */
+ if (!size_change && !alloc_change)
+ goto unm_done;
+ /* If the size is changing, check if new size is allowed in $AttrDef. */
+ if (size_change) {
+ err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
+ if (unlikely(err)) {
+ if (err == -ERANGE) {
+ ntfs_error(vol->sb, "Truncate would cause the "
+ "inode 0x%lx to %simum size "
+ "for its attribute type "
+ "(0x%x). Aborting truncate.",
+ vi->i_ino,
+ new_size > old_size ? "exceed "
+ "the max" : "go under the min",
+ le32_to_cpu(ni->type));
+ err = -EFBIG;
+ } else {
+ ntfs_error(vol->sb, "Inode 0x%lx has unknown "
+ "attribute type 0x%x. "
+ "Aborting truncate.",
+ vi->i_ino,
+ le32_to_cpu(ni->type));
+ err = -EIO;
+ }
+ /* Reset the vfs inode size to the old size. */
+ i_size_write(vi, old_size);
+ goto err_out;
+ }
+ }
+ if (NInoCompressed(ni) || NInoEncrypted(ni)) {
+ ntfs_warning(vi->i_sb, "Changes in inode size are not "
+ "supported yet for %s files, ignoring.",
+ NInoCompressed(ni) ? "compressed" :
+ "encrypted");
+ err = -EOPNOTSUPP;
+ goto bad_out;
+ }
+ if (a->non_resident)
+ goto do_non_resident_truncate;
+ BUG_ON(NInoNonResident(ni));
+ /* Resize the attribute record to best fit the new attribute size. */
+ if (new_size < vol->mft_record_size &&
+ !ntfs_resident_attr_value_resize(m, a, new_size)) {
+ /* The resize succeeded! */
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ mark_mft_record_dirty(ctx->ntfs_ino);
+ write_lock_irqsave(&ni->size_lock, flags);
+ /* Update the sizes in the ntfs inode and all is done. */
+ ni->allocated_size = le32_to_cpu(a->length) -
+ le16_to_cpu(a->data.resident.value_offset);
+ /*
+ * Note ntfs_resident_attr_value_resize() has already done any
+ * necessary data clearing in the attribute record. When the
+ * file is being shrunk vmtruncate() will already have cleared
+ * the top part of the last partial page, i.e. since this is
+ * the resident case this is the page with index 0. However,
+ * when the file is being expanded, the page cache page data
+ * between the old data_size, i.e. old_size, and the new_size
+ * has not been zeroed. Fortunately, we do not need to zero it
+ * either since on one hand it will either already be zero due
+ * to both readpage and writepage clearing partial page data
+ * beyond i_size in which case there is nothing to do or in the
+ * case of the file being mmap()ped at the same time, POSIX
+ * specifies that the behaviour is unspecified thus we do not
+ * have to do anything. This means that in our implementation
+ * in the rare case that the file is mmap()ped and a write
+ * occurred into the mmap()ped region just beyond the file size
+ * and writepage has not yet been called to write out the page
+ * (which would clear the area beyond the file size) and we now
+ * extend the file size to incorporate this dirty region
+ * outside the file size, a write of the page would result in
+ * this data being written to disk instead of being cleared.
+ * Given both POSIX and the Linux mmap(2) man page specify that
+ * this corner case is undefined, we choose to leave it like
+ * that as this is much simpler for us as we cannot lock the
+ * relevant page now since we are holding too many ntfs locks
+ * which would result in a lock reversal deadlock.
+ */
+ ni->initialized_size = new_size;
+ write_unlock_irqrestore(&ni->size_lock, flags);
+ goto unm_done;
+ }
+ /* If the above resize failed, this must be an attribute extension. */
+ BUG_ON(size_change < 0);
+ /*
+ * We have to drop all the locks so we can call
+ * ntfs_attr_make_non_resident(). This could be optimised by try-
+ * locking the first page cache page and only if that fails dropping
+ * the locks, locking the page, and redoing all the locking and
+ * lookups. While this would be a huge optimisation, it is not worth
+ * it as this is definitely a slow code path as it only ever can happen
+ * once for any given file.
+ */
ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(ni);
- NInoClearTruncateFailed(ni);
- ntfs_debug("Done.");
- return 0;
-err_out:
- if (err != -ENOMEM) {
+ unmap_mft_record(base_ni);
+ up_write(&ni->runlist.lock);
+ /*
+ * Not enough space in the mft record, try to make the attribute
+ * non-resident and if successful restart the truncation process.
+ */
+ err = ntfs_attr_make_non_resident(ni, old_size);
+ if (likely(!err))
+ goto retry_truncate;
+ /*
+ * Could not make non-resident. If this is due to this not being
+ * permitted for this attribute type or there not being enough space,
+ * try to make other attributes non-resident. Otherwise fail.
+ */
+ if (unlikely(err != -EPERM && err != -ENOSPC)) {
+ ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, attribute "
+ "type 0x%x, because the conversion from "
+ "resident to non-resident attribute failed "
+ "with error code %i.", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type), err);
+ if (err != -ENOMEM)
+ err = -EIO;
+ goto conv_err_out;
+ }
+ /* TODO: Not implemented from here, abort. */
+ if (err == -ENOSPC)
+ ntfs_error(vol->sb, "Not enough space in the mft record/on "
+ "disk for the non-resident attribute value. "
+ "This case is not implemented yet.");
+ else /* if (err == -EPERM) */
+ ntfs_error(vol->sb, "This attribute type may not be "
+ "non-resident. This case is not implemented "
+ "yet.");
+ err = -EOPNOTSUPP;
+ goto conv_err_out;
+#if 0
+ // TODO: Attempt to make other attributes non-resident.
+ if (!err)
+ goto do_resident_extend;
+ /*
+ * Both the attribute list attribute and the standard information
+ * attribute must remain in the base inode. Thus, if this is one of
+ * these attributes, we have to try to move other attributes out into
+ * extent mft records instead.
+ */
+ if (ni->type == AT_ATTRIBUTE_LIST ||
+ ni->type == AT_STANDARD_INFORMATION) {
+ // TODO: Attempt to move other attributes into extent mft
+ // records.
+ err = -EOPNOTSUPP;
+ if (!err)
+ goto do_resident_extend;
+ goto err_out;
+ }
+ // TODO: Attempt to move this attribute to an extent mft record, but
+ // only if it is not already the only attribute in an mft record in
+ // which case there would be nothing to gain.
+ err = -EOPNOTSUPP;
+ if (!err)
+ goto do_resident_extend;
+ /* There is nothing we can do to make enough space. )-: */
+ goto err_out;
+#endif
+do_non_resident_truncate:
+ BUG_ON(!NInoNonResident(ni));
+ if (alloc_change < 0) {
+ highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
+ if (highest_vcn > 0 &&
+ old_alloc_size >> vol->cluster_size_bits >
+ highest_vcn + 1) {
+ /*
+ * This attribute has multiple extents. Not yet
+ * supported.
+ */
+ ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, "
+ "attribute type 0x%x, because the "
+ "attribute is highly fragmented (it "
+ "consists of multiple extents) and "
+ "this case is not implemented yet.",
+ vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type));
+ err = -EOPNOTSUPP;
+ goto bad_out;
+ }
+ }
+ /*
+ * If the size is shrinking, need to reduce the initialized_size and
+ * the data_size before reducing the allocation.
+ */
+ if (size_change < 0) {
+ /*
+ * Make the valid size smaller (i_size is already up-to-date).
+ */
+ write_lock_irqsave(&ni->size_lock, flags);
+ if (new_size < ni->initialized_size) {
+ ni->initialized_size = new_size;
+ a->data.non_resident.initialized_size =
+ cpu_to_sle64(new_size);
+ }
+ a->data.non_resident.data_size = cpu_to_sle64(new_size);
+ write_unlock_irqrestore(&ni->size_lock, flags);
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ mark_mft_record_dirty(ctx->ntfs_ino);
+ /* If the allocated size is not changing, we are done. */
+ if (!alloc_change)
+ goto unm_done;
+ /*
+ * If the size is shrinking it makes no sense for the
+ * allocation to be growing.
+ */
+ BUG_ON(alloc_change > 0);
+ } else /* if (size_change >= 0) */ {
+ /*
+ * The file size is growing or staying the same but the
+ * allocation can be shrinking, growing or staying the same.
+ */
+ if (alloc_change > 0) {
+ /*
+ * We need to extend the allocation and possibly update
+ * the data size. If we are updating the data size,
+ * since we are not touching the initialized_size we do
+ * not need to worry about the actual data on disk.
+ * And as far as the page cache is concerned, there
+ * will be no pages beyond the old data size and any
+ * partial region in the last page between the old and
+ * new data size (or the end of the page if the new
+ * data size is outside the page) does not need to be
+ * modified as explained above for the resident
+ * attribute truncate case. To do this, we simply drop
+ * the locks we hold and leave all the work to our
+ * friendly helper ntfs_attr_extend_allocation().
+ */
+ ntfs_attr_put_search_ctx(ctx);
+ unmap_mft_record(base_ni);
+ up_write(&ni->runlist.lock);
+ err = ntfs_attr_extend_allocation(ni, new_size,
+ size_change > 0 ? new_size : -1, -1);
+ /*
+ * ntfs_attr_extend_allocation() will have done error
+ * output already.
+ */
+ goto done;
+ }
+ if (!alloc_change)
+ goto alloc_done;
+ }
+ /* alloc_change < 0 */
+ /* Free the clusters. */
+ nr_freed = ntfs_cluster_free(ni, new_alloc_size >>
+ vol->cluster_size_bits, -1, ctx);
+ m = ctx->mrec;
+ a = ctx->attr;
+ if (unlikely(nr_freed < 0)) {
+ ntfs_error(vol->sb, "Failed to release cluster(s) (error code "
+ "%lli). Unmount and run chkdsk to recover "
+ "the lost cluster(s).", (long long)nr_freed);
NVolSetErrors(vol);
- make_bad_inode(vi);
+ nr_freed = 0;
+ }
+ /* Truncate the runlist. */
+ err = ntfs_rl_truncate_nolock(vol, &ni->runlist,
+ new_alloc_size >> vol->cluster_size_bits);
+ /*
+ * If the runlist truncation failed and/or the search context is no
+ * longer valid, we cannot resize the attribute record or build the
+ * mapping pairs array thus we mark the inode bad so that no access to
+ * the freed clusters can happen.
+ */
+ if (unlikely(err || IS_ERR(m))) {
+ ntfs_error(vol->sb, "Failed to %s (error code %li).%s",
+ IS_ERR(m) ?
+ "restore attribute search context" :
+ "truncate attribute runlist",
+ IS_ERR(m) ? PTR_ERR(m) : err, es);
+ err = -EIO;
+ goto bad_out;
+ }
+ /* Get the size for the shrunk mapping pairs array for the runlist. */
+ mp_size = ntfs_get_size_for_mapping_pairs(vol, ni->runlist.rl, 0, -1);
+ if (unlikely(mp_size <= 0)) {
+ ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, "
+ "attribute type 0x%x, because determining the "
+ "size for the mapping pairs failed with error "
+ "code %i.%s", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type), mp_size, es);
+ err = -EIO;
+ goto bad_out;
}
+ /*
+ * Shrink the attribute record for the new mapping pairs array. Note,
+ * this cannot fail since we are making the attribute smaller thus by
+ * definition there is enough space to do so.
+ */
+ attr_len = le32_to_cpu(a->length);
+ err = ntfs_attr_record_resize(m, a, mp_size +
+ le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
+ BUG_ON(err);
+ /*
+ * Generate the mapping pairs array directly into the attribute record.
+ */
+ err = ntfs_mapping_pairs_build(vol, (u8*)a +
+ le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
+ mp_size, ni->runlist.rl, 0, -1, NULL);
+ if (unlikely(err)) {
+ ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, "
+ "attribute type 0x%x, because building the "
+ "mapping pairs failed with error code %i.%s",
+ vi->i_ino, (unsigned)le32_to_cpu(ni->type),
+ err, es);
+ err = -EIO;
+ goto bad_out;
+ }
+ /* Update the allocated/compressed size as well as the highest vcn. */
+ a->data.non_resident.highest_vcn = cpu_to_sle64((new_alloc_size >>
+ vol->cluster_size_bits) - 1);
+ write_lock_irqsave(&ni->size_lock, flags);
+ ni->allocated_size = new_alloc_size;
+ a->data.non_resident.allocated_size = cpu_to_sle64(new_alloc_size);
+ if (NInoSparse(ni) || NInoCompressed(ni)) {
+ if (nr_freed) {
+ ni->itype.compressed.size -= nr_freed <<
+ vol->cluster_size_bits;
+ BUG_ON(ni->itype.compressed.size < 0);
+ a->data.non_resident.compressed_size = cpu_to_sle64(
+ ni->itype.compressed.size);
+ vi->i_blocks = ni->itype.compressed.size >> 9;
+ }
+ } else
+ vi->i_blocks = new_alloc_size >> 9;
+ write_unlock_irqrestore(&ni->size_lock, flags);
+ /*
+ * We have shrunk the allocation. If this is a shrinking truncate we
+ * have already dealt with the initialized_size and the data_size above
+ * and we are done. If the truncate is only changing the allocation
+ * and not the data_size, we are also done. If this is an extending
+ * truncate, need to extend the data_size now which is ensured by the
+ * fact that @size_change is positive.
+ */
+alloc_done:
+ /*
+ * If the size is growing, need to update it now. If it is shrinking,
+ * we have already updated it above (before the allocation change).
+ */
+ if (size_change > 0)
+ a->data.non_resident.data_size = cpu_to_sle64(new_size);
+ /* Ensure the modified mft record is written out. */
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ mark_mft_record_dirty(ctx->ntfs_ino);
+unm_done:
+ ntfs_attr_put_search_ctx(ctx);
+ unmap_mft_record(base_ni);
+ up_write(&ni->runlist.lock);
+done:
+ /* Update the mtime and ctime on the base inode. */
+ /* normally ->truncate shouldn't update ctime or mtime,
+ * but ntfs did before so it got a copy & paste version
+ * of file_update_time. one day someone should fix this
+ * for real.
+ */
+ if (!IS_NOCMTIME(VFS_I(base_ni)) && !IS_RDONLY(VFS_I(base_ni))) {
+ struct timespec now = current_fs_time(VFS_I(base_ni)->i_sb);
+ int sync_it = 0;
+
+ if (!timespec_equal(&VFS_I(base_ni)->i_mtime, &now) ||
+ !timespec_equal(&VFS_I(base_ni)->i_ctime, &now))
+ sync_it = 1;
+ VFS_I(base_ni)->i_mtime = now;
+ VFS_I(base_ni)->i_ctime = now;
+
+ if (sync_it)
+ mark_inode_dirty_sync(VFS_I(base_ni));
+ }
+
+ if (likely(!err)) {
+ NInoClearTruncateFailed(ni);
+ ntfs_debug("Done.");
+ }
+ return err;
+old_bad_out:
+ old_size = -1;
+bad_out:
+ if (err != -ENOMEM && err != -EOPNOTSUPP)
+ NVolSetErrors(vol);
+ if (err != -EOPNOTSUPP)
+ NInoSetTruncateFailed(ni);
+ else if (old_size >= 0)
+ i_size_write(vi, old_size);
+err_out:
if (ctx)
ntfs_attr_put_search_ctx(ctx);
if (m)
- unmap_mft_record(ni);
- NInoSetTruncateFailed(ni);
+ unmap_mft_record(base_ni);
+ up_write(&ni->runlist.lock);
+out:
+ ntfs_debug("Failed. Returning error code %i.", err);
return err;
+conv_err_out:
+ if (err != -ENOMEM && err != -EOPNOTSUPP)
+ NVolSetErrors(vol);
+ if (err != -EOPNOTSUPP)
+ NInoSetTruncateFailed(ni);
+ else
+ i_size_write(vi, old_size);
+ goto out;
}
/**
@@ -2376,9 +2865,11 @@ err_out:
*
* See ntfs_truncate() description above for details.
*/
+#ifdef NTFS_RW
void ntfs_truncate_vfs(struct inode *vi) {
ntfs_truncate(vi);
}
+#endif
/**
* ntfs_setattr - called from notify_change() when an attribute is being changed
@@ -2392,11 +2883,7 @@ void ntfs_truncate_vfs(struct inode *vi) {
* We also abort all changes of user, group, and mode as we do not implement
* the NTFS ACLs yet.
*
- * Called with ->i_sem held. For the ATTR_SIZE (i.e. ->truncate) case, also
- * called with ->i_alloc_sem held for writing.
- *
- * Basically this is a copy of generic notify_change() and inode_setattr()
- * functionality, except we intercept and abort changes in i_size.
+ * Called with ->i_mutex held.
*/
int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
{
@@ -2406,8 +2893,7 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
err = inode_change_ok(vi, attr);
if (err)
- return err;
-
+ goto out;
/* We do not support NTFS ACLs yet. */
if (ia_valid & (ATTR_UID | ATTR_GID | ATTR_MODE)) {
ntfs_warning(vi->i_sb, "Changes in user/group/mode are not "
@@ -2415,14 +2901,24 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
err = -EOPNOTSUPP;
goto out;
}
-
if (ia_valid & ATTR_SIZE) {
if (attr->ia_size != i_size_read(vi)) {
- ntfs_warning(vi->i_sb, "Changes in inode size are not "
- "supported yet, ignoring.");
- err = -EOPNOTSUPP;
- // TODO: Implement...
- // err = vmtruncate(vi, attr->ia_size);
+ ntfs_inode *ni = NTFS_I(vi);
+ /*
+ * FIXME: For now we do not support resizing of
+ * compressed or encrypted files yet.
+ */
+ if (NInoCompressed(ni) || NInoEncrypted(ni)) {
+ ntfs_warning(vi->i_sb, "Changes in inode size "
+ "are not supported yet for "
+ "%s files, ignoring.",
+ NInoCompressed(ni) ?
+ "compressed" : "encrypted");
+ err = -EOPNOTSUPP;
+ } else {
+ truncate_setsize(vi, attr->ia_size);
+ ntfs_truncate_vfs(vi);
+ }
if (err || ia_valid == ATTR_SIZE)
goto out;
} else {
@@ -2430,16 +2926,18 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
* We skipped the truncate but must still update
* timestamps.
*/
- ia_valid |= ATTR_MTIME|ATTR_CTIME;
+ ia_valid |= ATTR_MTIME | ATTR_CTIME;
}
}
-
if (ia_valid & ATTR_ATIME)
- vi->i_atime = attr->ia_atime;
+ vi->i_atime = timespec_trunc(attr->ia_atime,
+ vi->i_sb->s_time_gran);
if (ia_valid & ATTR_MTIME)
- vi->i_mtime = attr->ia_mtime;
+ vi->i_mtime = timespec_trunc(attr->ia_mtime,
+ vi->i_sb->s_time_gran);
if (ia_valid & ATTR_CTIME)
- vi->i_ctime = attr->ia_ctime;
+ vi->i_ctime = timespec_trunc(attr->ia_ctime,
+ vi->i_sb->s_time_gran);
mark_inode_dirty(vi);
out:
return err;
@@ -2463,7 +2961,7 @@ out:
*
* Return 0 on success and -errno on error.
*/
-int ntfs_write_inode(struct inode *vi, int sync)
+int __ntfs_write_inode(struct inode *vi, int sync)
{
sle64 nt;
ntfs_inode *ni = NTFS_I(vi);
@@ -2471,7 +2969,7 @@ int ntfs_write_inode(struct inode *vi, int sync)
MFT_RECORD *m;
STANDARD_INFORMATION *si;
int err = 0;
- BOOL modified = FALSE;
+ bool modified = false;
ntfs_debug("Entering for %sinode 0x%lx.", NInoAttr(ni) ? "attr " : "",
vi->i_ino);
@@ -2513,7 +3011,7 @@ int ntfs_write_inode(struct inode *vi, int sync)
sle64_to_cpu(si->last_data_change_time),
(long long)sle64_to_cpu(nt));
si->last_data_change_time = nt;
- modified = TRUE;
+ modified = true;
}
nt = utc2ntfs(vi->i_ctime);
if (si->last_mft_change_time != nt) {
@@ -2522,7 +3020,7 @@ int ntfs_write_inode(struct inode *vi, int sync)
sle64_to_cpu(si->last_mft_change_time),
(long long)sle64_to_cpu(nt));
si->last_mft_change_time = nt;
- modified = TRUE;
+ modified = true;
}
nt = utc2ntfs(vi->i_atime);
if (si->last_access_time != nt) {
@@ -2531,7 +3029,7 @@ int ntfs_write_inode(struct inode *vi, int sync)
(long long)sle64_to_cpu(si->last_access_time),
(long long)sle64_to_cpu(nt));
si->last_access_time = nt;
- modified = TRUE;
+ modified = true;
}
/*
* If we just modified the standard information attribute we need to
@@ -2549,15 +3047,18 @@ int ntfs_write_inode(struct inode *vi, int sync)
* record will be cleaned and written out to disk below, i.e. before
* this function returns.
*/
- if (modified && !NInoTestSetDirty(ctx->ntfs_ino))
- mark_ntfs_record_dirty(ctx->ntfs_ino->page,
- ctx->ntfs_ino->page_ofs);
+ if (modified) {
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ if (!NInoTestSetDirty(ctx->ntfs_ino))
+ mark_ntfs_record_dirty(ctx->ntfs_ino->page,
+ ctx->ntfs_ino->page_ofs);
+ }
ntfs_attr_put_search_ctx(ctx);
/* Now the access times are updated, write the base mft record. */
if (NInoDirty(ni))
err = write_mft_record(ni, m, sync);
/* Write all attached extent mft records. */
- down(&ni->extent_lock);
+ mutex_lock(&ni->extent_lock);
if (ni->nr_extents > 0) {
ntfs_inode **extent_nis = ni->ext.extent_ntfs_inos;
int i;
@@ -2584,7 +3085,7 @@ int ntfs_write_inode(struct inode *vi, int sync)
}
}
}
- up(&ni->extent_lock);
+ mutex_unlock(&ni->extent_lock);
unmap_mft_record(ni);
if (unlikely(err))
goto err_out;
@@ -2599,9 +3100,7 @@ err_out:
"retries later.");
mark_inode_dirty(vi);
} else {
- ntfs_error(vi->i_sb, "Failed (error code %i): Marking inode "
- "as bad. You should run chkdsk.", -err);
- make_bad_inode(vi);
+ ntfs_error(vi->i_sb, "Failed (error %i): Run chkdsk.", -err);
NVolSetErrors(ni->vol);
}
return err;
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 3de5c023196..76b6cfb579d 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -2,7 +2,7 @@
* inode.h - Defines for inode structures NTFS Linux kernel driver. Part of
* the Linux-NTFS project.
*
- * Copyright (c) 2001-2005 Anton Altaparmakov
+ * Copyright (c) 2001-2007 Anton Altaparmakov
* Copyright (c) 2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -24,12 +24,13 @@
#ifndef _LINUX_NTFS_INODE_H
#define _LINUX_NTFS_INODE_H
-#include <linux/mm.h>
+#include <linux/atomic.h>
+
#include <linux/fs.h>
-#include <linux/seq_file.h>
#include <linux/list.h>
-#include <asm/atomic.h>
-#include <asm/semaphore.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/seq_file.h>
#include "layout.h"
#include "volume.h"
@@ -81,7 +82,7 @@ struct _ntfs_inode {
* The following fields are only valid for real inodes and extent
* inodes.
*/
- struct semaphore mrec_lock; /* Lock for serializing access to the
+ struct mutex mrec_lock; /* Lock for serializing access to the
mft record belonging to this inode. */
struct page *page; /* The page containing the mft record of the
inode. This should only be touched by the
@@ -100,8 +101,6 @@ struct _ntfs_inode {
runlist attr_list_rl; /* Run list for the attribute list value. */
union {
struct { /* It is a directory, $MFT, or an index inode. */
- struct inode *bmp_ino; /* Attribute inode for the
- index $BITMAP. */
u32 block_size; /* Size of an index block. */
u32 vcn_size; /* Size of a vcn in this
index. */
@@ -119,7 +118,7 @@ struct _ntfs_inode {
u8 block_clusters; /* Number of clusters per cb. */
} compressed;
} itype;
- struct semaphore extent_lock; /* Lock for accessing/modifying the
+ struct mutex extent_lock; /* Lock for accessing/modifying the
below . */
s32 nr_extents; /* For a base mft record, the number of attached extent
inodes (0 if none), for extent records and for fake
@@ -280,7 +279,7 @@ extern struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name,
extern struct inode *ntfs_alloc_big_inode(struct super_block *sb);
extern void ntfs_destroy_big_inode(struct inode *inode);
-extern void ntfs_clear_big_inode(struct inode *vi);
+extern void ntfs_evict_big_inode(struct inode *vi);
extern void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni);
@@ -299,9 +298,7 @@ extern void ntfs_clear_extent_inode(ntfs_inode *ni);
extern int ntfs_read_inode_mount(struct inode *vi);
-extern void ntfs_put_inode(struct inode *vi);
-
-extern int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt);
+extern int ntfs_show_options(struct seq_file *sf, struct dentry *root);
#ifdef NTFS_RW
@@ -310,15 +307,19 @@ extern void ntfs_truncate_vfs(struct inode *vi);
extern int ntfs_setattr(struct dentry *dentry, struct iattr *attr);
-extern int ntfs_write_inode(struct inode *vi, int sync);
+extern int __ntfs_write_inode(struct inode *vi, int sync);
static inline void ntfs_commit_inode(struct inode *vi)
{
if (!is_bad_inode(vi))
- ntfs_write_inode(vi, 1);
+ __ntfs_write_inode(vi, 1);
return;
}
+#else
+
+static inline void ntfs_truncate_vfs(struct inode *vi) {}
+
#endif /* NTFS_RW */
#endif /* _LINUX_NTFS_INODE_H */
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 609ad1728ce..809c0e6d8e0 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -31,19 +31,8 @@
#include "types.h"
-/*
- * Constant endianness conversion defines.
- */
-#define const_le16_to_cpu(x) __constant_le16_to_cpu(x)
-#define const_le32_to_cpu(x) __constant_le32_to_cpu(x)
-#define const_le64_to_cpu(x) __constant_le64_to_cpu(x)
-
-#define const_cpu_to_le16(x) __constant_cpu_to_le16(x)
-#define const_cpu_to_le32(x) __constant_cpu_to_le32(x)
-#define const_cpu_to_le64(x) __constant_cpu_to_le64(x)
-
/* The NTFS oem_id "NTFS " */
-#define magicNTFS const_cpu_to_le64(0x202020205346544eULL)
+#define magicNTFS cpu_to_le64(0x202020205346544eULL)
/*
* Location of bootsector on partition:
@@ -114,25 +103,25 @@ typedef struct {
*/
enum {
/* Found in $MFT/$DATA. */
- magic_FILE = const_cpu_to_le32(0x454c4946), /* Mft entry. */
- magic_INDX = const_cpu_to_le32(0x58444e49), /* Index buffer. */
- magic_HOLE = const_cpu_to_le32(0x454c4f48), /* ? (NTFS 3.0+?) */
+ magic_FILE = cpu_to_le32(0x454c4946), /* Mft entry. */
+ magic_INDX = cpu_to_le32(0x58444e49), /* Index buffer. */
+ magic_HOLE = cpu_to_le32(0x454c4f48), /* ? (NTFS 3.0+?) */
/* Found in $LogFile/$DATA. */
- magic_RSTR = const_cpu_to_le32(0x52545352), /* Restart page. */
- magic_RCRD = const_cpu_to_le32(0x44524352), /* Log record page. */
+ magic_RSTR = cpu_to_le32(0x52545352), /* Restart page. */
+ magic_RCRD = cpu_to_le32(0x44524352), /* Log record page. */
/* Found in $LogFile/$DATA. (May be found in $MFT/$DATA, also?) */
- magic_CHKD = const_cpu_to_le32(0x424b4843), /* Modified by chkdsk. */
+ magic_CHKD = cpu_to_le32(0x444b4843), /* Modified by chkdsk. */
/* Found in all ntfs record containing records. */
- magic_BAAD = const_cpu_to_le32(0x44414142), /* Failed multi sector
+ magic_BAAD = cpu_to_le32(0x44414142), /* Failed multi sector
transfer was detected. */
/*
* Found in $LogFile/$DATA when a page is full of 0xff bytes and is
* thus not initialized. Page must be initialized before using it.
*/
- magic_empty = const_cpu_to_le32(0xffffffff) /* Record is empty. */
+ magic_empty = cpu_to_le32(0xffffffff) /* Record is empty. */
};
typedef le32 NTFS_RECORD_TYPE;
@@ -142,13 +131,13 @@ typedef le32 NTFS_RECORD_TYPE;
* operator! (-8
*/
-static inline BOOL __ntfs_is_magic(le32 x, NTFS_RECORD_TYPE r)
+static inline bool __ntfs_is_magic(le32 x, NTFS_RECORD_TYPE r)
{
return (x == r);
}
#define ntfs_is_magic(x, m) __ntfs_is_magic(x, magic_##m)
-static inline BOOL __ntfs_is_magicp(le32 *p, NTFS_RECORD_TYPE r)
+static inline bool __ntfs_is_magicp(le32 *p, NTFS_RECORD_TYPE r)
{
return (*p == r);
}
@@ -258,8 +247,8 @@ typedef enum {
* information about the mft record in which they are present.
*/
enum {
- MFT_RECORD_IN_USE = const_cpu_to_le16(0x0001),
- MFT_RECORD_IS_DIRECTORY = const_cpu_to_le16(0x0002),
+ MFT_RECORD_IN_USE = cpu_to_le16(0x0001),
+ MFT_RECORD_IS_DIRECTORY = cpu_to_le16(0x0002),
} __attribute__ ((__packed__));
typedef le16 MFT_RECORD_FLAGS;
@@ -297,7 +286,7 @@ typedef le16 MFT_RECORD_FLAGS;
* fragmented. Volume free space includes the empty part of the mft zone and
* when the volume's free 88% are used up, the mft zone is shrunk by a factor
* of 2, thus making more space available for more files/data. This process is
- * repeated everytime there is no more free space except for the mft zone until
+ * repeated every time there is no more free space except for the mft zone until
* there really is no more free space.
*/
@@ -308,10 +297,8 @@ typedef le16 MFT_RECORD_FLAGS;
* The _LE versions are to be applied on little endian MFT_REFs.
* Note: The _LE versions will return a CPU endian formatted value!
*/
-typedef enum {
- MFT_REF_MASK_CPU = 0x0000ffffffffffffULL,
- MFT_REF_MASK_LE = const_cpu_to_le64(0x0000ffffffffffffULL),
-} MFT_REF_CONSTS;
+#define MFT_REF_MASK_CPU 0x0000ffffffffffffULL
+#define MFT_REF_MASK_LE cpu_to_le64(MFT_REF_MASK_CPU)
typedef u64 MFT_REF;
typedef le64 leMFT_REF;
@@ -325,7 +312,7 @@ typedef le64 leMFT_REF;
#define MREF_LE(x) ((unsigned long)(le64_to_cpu(x) & MFT_REF_MASK_CPU))
#define MSEQNO_LE(x) ((u16)((le64_to_cpu(x) >> 48) & 0xffff))
-#define IS_ERR_MREF(x) (((x) & 0x0000800000000000ULL) ? 1 : 0)
+#define IS_ERR_MREF(x) (((x) & 0x0000800000000000ULL) ? true : false)
#define ERR_MREF(x) ((u64)((s64)(x)))
#define MREF_ERR(x) ((int)((s64)(x)))
@@ -479,25 +466,25 @@ typedef struct {
* a revealing choice of symbol I do not know what is... (-;
*/
enum {
- AT_UNUSED = const_cpu_to_le32( 0),
- AT_STANDARD_INFORMATION = const_cpu_to_le32( 0x10),
- AT_ATTRIBUTE_LIST = const_cpu_to_le32( 0x20),
- AT_FILE_NAME = const_cpu_to_le32( 0x30),
- AT_OBJECT_ID = const_cpu_to_le32( 0x40),
- AT_SECURITY_DESCRIPTOR = const_cpu_to_le32( 0x50),
- AT_VOLUME_NAME = const_cpu_to_le32( 0x60),
- AT_VOLUME_INFORMATION = const_cpu_to_le32( 0x70),
- AT_DATA = const_cpu_to_le32( 0x80),
- AT_INDEX_ROOT = const_cpu_to_le32( 0x90),
- AT_INDEX_ALLOCATION = const_cpu_to_le32( 0xa0),
- AT_BITMAP = const_cpu_to_le32( 0xb0),
- AT_REPARSE_POINT = const_cpu_to_le32( 0xc0),
- AT_EA_INFORMATION = const_cpu_to_le32( 0xd0),
- AT_EA = const_cpu_to_le32( 0xe0),
- AT_PROPERTY_SET = const_cpu_to_le32( 0xf0),
- AT_LOGGED_UTILITY_STREAM = const_cpu_to_le32( 0x100),
- AT_FIRST_USER_DEFINED_ATTRIBUTE = const_cpu_to_le32( 0x1000),
- AT_END = const_cpu_to_le32(0xffffffff)
+ AT_UNUSED = cpu_to_le32( 0),
+ AT_STANDARD_INFORMATION = cpu_to_le32( 0x10),
+ AT_ATTRIBUTE_LIST = cpu_to_le32( 0x20),
+ AT_FILE_NAME = cpu_to_le32( 0x30),
+ AT_OBJECT_ID = cpu_to_le32( 0x40),
+ AT_SECURITY_DESCRIPTOR = cpu_to_le32( 0x50),
+ AT_VOLUME_NAME = cpu_to_le32( 0x60),
+ AT_VOLUME_INFORMATION = cpu_to_le32( 0x70),
+ AT_DATA = cpu_to_le32( 0x80),
+ AT_INDEX_ROOT = cpu_to_le32( 0x90),
+ AT_INDEX_ALLOCATION = cpu_to_le32( 0xa0),
+ AT_BITMAP = cpu_to_le32( 0xb0),
+ AT_REPARSE_POINT = cpu_to_le32( 0xc0),
+ AT_EA_INFORMATION = cpu_to_le32( 0xd0),
+ AT_EA = cpu_to_le32( 0xe0),
+ AT_PROPERTY_SET = cpu_to_le32( 0xf0),
+ AT_LOGGED_UTILITY_STREAM = cpu_to_le32( 0x100),
+ AT_FIRST_USER_DEFINED_ATTRIBUTE = cpu_to_le32( 0x1000),
+ AT_END = cpu_to_le32(0xffffffff)
};
typedef le32 ATTR_TYPE;
@@ -541,13 +528,13 @@ typedef le32 ATTR_TYPE;
* equal then the second le32 values would be compared, etc.
*/
enum {
- COLLATION_BINARY = const_cpu_to_le32(0x00),
- COLLATION_FILE_NAME = const_cpu_to_le32(0x01),
- COLLATION_UNICODE_STRING = const_cpu_to_le32(0x02),
- COLLATION_NTOFS_ULONG = const_cpu_to_le32(0x10),
- COLLATION_NTOFS_SID = const_cpu_to_le32(0x11),
- COLLATION_NTOFS_SECURITY_HASH = const_cpu_to_le32(0x12),
- COLLATION_NTOFS_ULONGS = const_cpu_to_le32(0x13),
+ COLLATION_BINARY = cpu_to_le32(0x00),
+ COLLATION_FILE_NAME = cpu_to_le32(0x01),
+ COLLATION_UNICODE_STRING = cpu_to_le32(0x02),
+ COLLATION_NTOFS_ULONG = cpu_to_le32(0x10),
+ COLLATION_NTOFS_SID = cpu_to_le32(0x11),
+ COLLATION_NTOFS_SECURITY_HASH = cpu_to_le32(0x12),
+ COLLATION_NTOFS_ULONGS = cpu_to_le32(0x13),
};
typedef le32 COLLATION_RULE;
@@ -561,25 +548,25 @@ typedef le32 COLLATION_RULE;
* NT4.
*/
enum {
- ATTR_DEF_INDEXABLE = const_cpu_to_le32(0x02), /* Attribute can be
+ ATTR_DEF_INDEXABLE = cpu_to_le32(0x02), /* Attribute can be
indexed. */
- ATTR_DEF_MULTIPLE = const_cpu_to_le32(0x04), /* Attribute type
+ ATTR_DEF_MULTIPLE = cpu_to_le32(0x04), /* Attribute type
can be present multiple times in the
mft records of an inode. */
- ATTR_DEF_NOT_ZERO = const_cpu_to_le32(0x08), /* Attribute value
+ ATTR_DEF_NOT_ZERO = cpu_to_le32(0x08), /* Attribute value
must contain at least one non-zero
byte. */
- ATTR_DEF_INDEXED_UNIQUE = const_cpu_to_le32(0x10), /* Attribute must be
+ ATTR_DEF_INDEXED_UNIQUE = cpu_to_le32(0x10), /* Attribute must be
indexed and the attribute value must be
unique for the attribute type in all of
the mft records of an inode. */
- ATTR_DEF_NAMED_UNIQUE = const_cpu_to_le32(0x20), /* Attribute must be
+ ATTR_DEF_NAMED_UNIQUE = cpu_to_le32(0x20), /* Attribute must be
named and the name must be unique for
the attribute type in all of the mft
records of an inode. */
- ATTR_DEF_RESIDENT = const_cpu_to_le32(0x40), /* Attribute must be
+ ATTR_DEF_RESIDENT = cpu_to_le32(0x40), /* Attribute must be
resident. */
- ATTR_DEF_ALWAYS_LOG = const_cpu_to_le32(0x80), /* Always log
+ ATTR_DEF_ALWAYS_LOG = cpu_to_le32(0x80), /* Always log
modifications to this attribute,
regardless of whether it is resident or
non-resident. Without this, only log
@@ -616,12 +603,12 @@ typedef struct {
* Attribute flags (16-bit).
*/
enum {
- ATTR_IS_COMPRESSED = const_cpu_to_le16(0x0001),
- ATTR_COMPRESSION_MASK = const_cpu_to_le16(0x00ff), /* Compression method
+ ATTR_IS_COMPRESSED = cpu_to_le16(0x0001),
+ ATTR_COMPRESSION_MASK = cpu_to_le16(0x00ff), /* Compression method
mask. Also, first
illegal value. */
- ATTR_IS_ENCRYPTED = const_cpu_to_le16(0x4000),
- ATTR_IS_SPARSE = const_cpu_to_le16(0x8000),
+ ATTR_IS_ENCRYPTED = cpu_to_le16(0x4000),
+ ATTR_IS_SPARSE = cpu_to_le16(0x8000),
} __attribute__ ((__packed__));
typedef le16 ATTR_FLAGS;
@@ -771,7 +758,7 @@ typedef struct {
compressed. (This effectively limits the
compression unit size to be a power of two
clusters.) WinNT4 only uses a value of 4.
- Sparse files also have this set to 4. */
+ Sparse files have this set to 0 on XPSP2. */
/* 35*/ u8 reserved[5]; /* Align to 8-byte boundary. */
/* The sizes below are only used when lowest_vcn is zero, as otherwise it would
be difficult to keep them up-to-date.*/
@@ -803,53 +790,56 @@ typedef struct {
typedef ATTR_RECORD ATTR_REC;
/*
- * File attribute flags (32-bit).
+ * File attribute flags (32-bit) appearing in the file_attributes fields of the
+ * STANDARD_INFORMATION attribute of MFT_RECORDs and the FILENAME_ATTR
+ * attributes of MFT_RECORDs and directory index entries.
+ *
+ * All of the below flags appear in the directory index entries but only some
+ * appear in the STANDARD_INFORMATION attribute whilst only some others appear
+ * in the FILENAME_ATTR attribute of MFT_RECORDs. Unless otherwise stated the
+ * flags appear in all of the above.
*/
enum {
- /*
- * The following flags are only present in the STANDARD_INFORMATION
- * attribute (in the field file_attributes).
- */
- FILE_ATTR_READONLY = const_cpu_to_le32(0x00000001),
- FILE_ATTR_HIDDEN = const_cpu_to_le32(0x00000002),
- FILE_ATTR_SYSTEM = const_cpu_to_le32(0x00000004),
- /* Old DOS volid. Unused in NT. = const_cpu_to_le32(0x00000008), */
+ FILE_ATTR_READONLY = cpu_to_le32(0x00000001),
+ FILE_ATTR_HIDDEN = cpu_to_le32(0x00000002),
+ FILE_ATTR_SYSTEM = cpu_to_le32(0x00000004),
+ /* Old DOS volid. Unused in NT. = cpu_to_le32(0x00000008), */
- FILE_ATTR_DIRECTORY = const_cpu_to_le32(0x00000010),
+ FILE_ATTR_DIRECTORY = cpu_to_le32(0x00000010),
/* Note, FILE_ATTR_DIRECTORY is not considered valid in NT. It is
reserved for the DOS SUBDIRECTORY flag. */
- FILE_ATTR_ARCHIVE = const_cpu_to_le32(0x00000020),
- FILE_ATTR_DEVICE = const_cpu_to_le32(0x00000040),
- FILE_ATTR_NORMAL = const_cpu_to_le32(0x00000080),
+ FILE_ATTR_ARCHIVE = cpu_to_le32(0x00000020),
+ FILE_ATTR_DEVICE = cpu_to_le32(0x00000040),
+ FILE_ATTR_NORMAL = cpu_to_le32(0x00000080),
- FILE_ATTR_TEMPORARY = const_cpu_to_le32(0x00000100),
- FILE_ATTR_SPARSE_FILE = const_cpu_to_le32(0x00000200),
- FILE_ATTR_REPARSE_POINT = const_cpu_to_le32(0x00000400),
- FILE_ATTR_COMPRESSED = const_cpu_to_le32(0x00000800),
+ FILE_ATTR_TEMPORARY = cpu_to_le32(0x00000100),
+ FILE_ATTR_SPARSE_FILE = cpu_to_le32(0x00000200),
+ FILE_ATTR_REPARSE_POINT = cpu_to_le32(0x00000400),
+ FILE_ATTR_COMPRESSED = cpu_to_le32(0x00000800),
- FILE_ATTR_OFFLINE = const_cpu_to_le32(0x00001000),
- FILE_ATTR_NOT_CONTENT_INDEXED = const_cpu_to_le32(0x00002000),
- FILE_ATTR_ENCRYPTED = const_cpu_to_le32(0x00004000),
+ FILE_ATTR_OFFLINE = cpu_to_le32(0x00001000),
+ FILE_ATTR_NOT_CONTENT_INDEXED = cpu_to_le32(0x00002000),
+ FILE_ATTR_ENCRYPTED = cpu_to_le32(0x00004000),
- FILE_ATTR_VALID_FLAGS = const_cpu_to_le32(0x00007fb7),
+ FILE_ATTR_VALID_FLAGS = cpu_to_le32(0x00007fb7),
/* Note, FILE_ATTR_VALID_FLAGS masks out the old DOS VolId and the
FILE_ATTR_DEVICE and preserves everything else. This mask is used
to obtain all flags that are valid for reading. */
- FILE_ATTR_VALID_SET_FLAGS = const_cpu_to_le32(0x000031a7),
+ FILE_ATTR_VALID_SET_FLAGS = cpu_to_le32(0x000031a7),
/* Note, FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the
F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT,
F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest. This mask
- is used to to obtain all flags that are valid for setting. */
-
+ is used to obtain all flags that are valid for setting. */
/*
- * The following flags are only present in the FILE_NAME attribute (in
- * the field file_attributes).
+ * The flag FILE_ATTR_DUP_FILENAME_INDEX_PRESENT is present in all
+ * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION
+ * attribute of an mft record.
*/
- FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT = const_cpu_to_le32(0x10000000),
+ FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT = cpu_to_le32(0x10000000),
/* Note, this is a copy of the corresponding bit from the mft record,
telling us whether this is a directory or not, i.e. whether it has
an index root attribute or not. */
- FILE_ATTR_DUP_VIEW_INDEX_PRESENT = const_cpu_to_le32(0x20000000),
+ FILE_ATTR_DUP_VIEW_INDEX_PRESENT = cpu_to_le32(0x20000000),
/* Note, this is a copy of the corresponding bit from the mft record,
telling us whether this file has a view index present (eg. object id
index, quota index, one of the security indexes or the encrypting
@@ -889,7 +879,7 @@ typedef struct {
Windows this is only updated when
accessed if some time delta has
passed since the last update. Also,
- last access times updates can be
+ last access time updates can be
disabled altogether for speed. */
/* 32*/ FILE_ATTR_FLAGS file_attributes; /* Flags describing the file. */
/* 36*/ union {
@@ -1023,10 +1013,17 @@ enum {
FILE_NAME_POSIX = 0x00,
/* This is the largest namespace. It is case sensitive and allows all
Unicode characters except for: '\0' and '/'. Beware that in
- WinNT/2k files which eg have the same name except for their case
- will not be distinguished by the standard utilities and thus a "del
- filename" will delete both "filename" and "fileName" without
- warning. */
+ WinNT/2k/2003 by default files which eg have the same name except
+ for their case will not be distinguished by the standard utilities
+ and thus a "del filename" will delete both "filename" and "fileName"
+ without warning. However if for example Services For Unix (SFU) are
+ installed and the case sensitive option was enabled at installation
+ time, then you can create/access/delete such files.
+ Note that even SFU places restrictions on the filenames beyond the
+ '\0' and '/' and in particular the following set of characters is
+ not allowed: '"', '/', '<', '>', '\'. All other characters,
+ including the ones no allowed in WIN32 namespace are allowed.
+ Tested with SFU 3.5 (this is now free) running on Windows XP. */
FILE_NAME_WIN32 = 0x01,
/* The standard WinNT/2k NTFS long filenames. Case insensitive. All
Unicode chars except: '\0', '"', '*', '/', ':', '<', '>', '?', '\',
@@ -1066,11 +1063,22 @@ typedef struct {
modified. */
/* 20*/ sle64 last_access_time; /* Time this mft record was last
accessed. */
-/* 28*/ sle64 allocated_size; /* Byte size of allocated space for the
- data attribute. NOTE: Is a multiple
- of the cluster size. */
-/* 30*/ sle64 data_size; /* Byte size of actual data in data
- attribute. */
+/* 28*/ sle64 allocated_size; /* Byte size of on-disk allocated space
+ for the unnamed data attribute. So
+ for normal $DATA, this is the
+ allocated_size from the unnamed
+ $DATA attribute and for compressed
+ and/or sparse $DATA, this is the
+ compressed_size from the unnamed
+ $DATA attribute. For a directory or
+ other inode without an unnamed $DATA
+ attribute, this is always 0. NOTE:
+ This is a multiple of the cluster
+ size. */
+/* 30*/ sle64 data_size; /* Byte size of actual data in unnamed
+ data attribute. For a directory or
+ other inode without an unnamed $DATA
+ attribute, this is always 0. */
/* 38*/ FILE_ATTR_FLAGS file_attributes; /* Flags describing the file. */
/* 3c*/ union {
/* 3c*/ struct {
@@ -1427,42 +1435,42 @@ enum {
/* Specific rights for files and directories are as follows: */
/* Right to read data from the file. (FILE) */
- FILE_READ_DATA = const_cpu_to_le32(0x00000001),
+ FILE_READ_DATA = cpu_to_le32(0x00000001),
/* Right to list contents of a directory. (DIRECTORY) */
- FILE_LIST_DIRECTORY = const_cpu_to_le32(0x00000001),
+ FILE_LIST_DIRECTORY = cpu_to_le32(0x00000001),
/* Right to write data to the file. (FILE) */
- FILE_WRITE_DATA = const_cpu_to_le32(0x00000002),
+ FILE_WRITE_DATA = cpu_to_le32(0x00000002),
/* Right to create a file in the directory. (DIRECTORY) */
- FILE_ADD_FILE = const_cpu_to_le32(0x00000002),
+ FILE_ADD_FILE = cpu_to_le32(0x00000002),
/* Right to append data to the file. (FILE) */
- FILE_APPEND_DATA = const_cpu_to_le32(0x00000004),
+ FILE_APPEND_DATA = cpu_to_le32(0x00000004),
/* Right to create a subdirectory. (DIRECTORY) */
- FILE_ADD_SUBDIRECTORY = const_cpu_to_le32(0x00000004),
+ FILE_ADD_SUBDIRECTORY = cpu_to_le32(0x00000004),
/* Right to read extended attributes. (FILE/DIRECTORY) */
- FILE_READ_EA = const_cpu_to_le32(0x00000008),
+ FILE_READ_EA = cpu_to_le32(0x00000008),
/* Right to write extended attributes. (FILE/DIRECTORY) */
- FILE_WRITE_EA = const_cpu_to_le32(0x00000010),
+ FILE_WRITE_EA = cpu_to_le32(0x00000010),
/* Right to execute a file. (FILE) */
- FILE_EXECUTE = const_cpu_to_le32(0x00000020),
+ FILE_EXECUTE = cpu_to_le32(0x00000020),
/* Right to traverse the directory. (DIRECTORY) */
- FILE_TRAVERSE = const_cpu_to_le32(0x00000020),
+ FILE_TRAVERSE = cpu_to_le32(0x00000020),
/*
* Right to delete a directory and all the files it contains (its
* children), even if the files are read-only. (DIRECTORY)
*/
- FILE_DELETE_CHILD = const_cpu_to_le32(0x00000040),
+ FILE_DELETE_CHILD = cpu_to_le32(0x00000040),
/* Right to read file attributes. (FILE/DIRECTORY) */
- FILE_READ_ATTRIBUTES = const_cpu_to_le32(0x00000080),
+ FILE_READ_ATTRIBUTES = cpu_to_le32(0x00000080),
/* Right to change file attributes. (FILE/DIRECTORY) */
- FILE_WRITE_ATTRIBUTES = const_cpu_to_le32(0x00000100),
+ FILE_WRITE_ATTRIBUTES = cpu_to_le32(0x00000100),
/*
* The standard rights (bits 16 to 23). These are independent of the
@@ -1470,27 +1478,27 @@ enum {
*/
/* Right to delete the object. */
- DELETE = const_cpu_to_le32(0x00010000),
+ DELETE = cpu_to_le32(0x00010000),
/*
* Right to read the information in the object's security descriptor,
* not including the information in the SACL, i.e. right to read the
* security descriptor and owner.
*/
- READ_CONTROL = const_cpu_to_le32(0x00020000),
+ READ_CONTROL = cpu_to_le32(0x00020000),
/* Right to modify the DACL in the object's security descriptor. */
- WRITE_DAC = const_cpu_to_le32(0x00040000),
+ WRITE_DAC = cpu_to_le32(0x00040000),
/* Right to change the owner in the object's security descriptor. */
- WRITE_OWNER = const_cpu_to_le32(0x00080000),
+ WRITE_OWNER = cpu_to_le32(0x00080000),
/*
* Right to use the object for synchronization. Enables a process to
* wait until the object is in the signalled state. Some object types
* do not support this access right.
*/
- SYNCHRONIZE = const_cpu_to_le32(0x00100000),
+ SYNCHRONIZE = cpu_to_le32(0x00100000),
/*
* The following STANDARD_RIGHTS_* are combinations of the above for
@@ -1498,25 +1506,25 @@ enum {
*/
/* These are currently defined to READ_CONTROL. */
- STANDARD_RIGHTS_READ = const_cpu_to_le32(0x00020000),
- STANDARD_RIGHTS_WRITE = const_cpu_to_le32(0x00020000),
- STANDARD_RIGHTS_EXECUTE = const_cpu_to_le32(0x00020000),
+ STANDARD_RIGHTS_READ = cpu_to_le32(0x00020000),
+ STANDARD_RIGHTS_WRITE = cpu_to_le32(0x00020000),
+ STANDARD_RIGHTS_EXECUTE = cpu_to_le32(0x00020000),
/* Combines DELETE, READ_CONTROL, WRITE_DAC, and WRITE_OWNER access. */
- STANDARD_RIGHTS_REQUIRED = const_cpu_to_le32(0x000f0000),
+ STANDARD_RIGHTS_REQUIRED = cpu_to_le32(0x000f0000),
/*
* Combines DELETE, READ_CONTROL, WRITE_DAC, WRITE_OWNER, and
* SYNCHRONIZE access.
*/
- STANDARD_RIGHTS_ALL = const_cpu_to_le32(0x001f0000),
+ STANDARD_RIGHTS_ALL = cpu_to_le32(0x001f0000),
/*
* The access system ACL and maximum allowed access types (bits 24 to
* 25, bits 26 to 27 are reserved).
*/
- ACCESS_SYSTEM_SECURITY = const_cpu_to_le32(0x01000000),
- MAXIMUM_ALLOWED = const_cpu_to_le32(0x02000000),
+ ACCESS_SYSTEM_SECURITY = cpu_to_le32(0x01000000),
+ MAXIMUM_ALLOWED = cpu_to_le32(0x02000000),
/*
* The generic rights (bits 28 to 31). These map onto the standard and
@@ -1524,10 +1532,10 @@ enum {
*/
/* Read, write, and execute access. */
- GENERIC_ALL = const_cpu_to_le32(0x10000000),
+ GENERIC_ALL = cpu_to_le32(0x10000000),
/* Execute access. */
- GENERIC_EXECUTE = const_cpu_to_le32(0x20000000),
+ GENERIC_EXECUTE = cpu_to_le32(0x20000000),
/*
* Write access. For files, this maps onto:
@@ -1536,7 +1544,7 @@ enum {
* For directories, the mapping has the same numerical value. See
* above for the descriptions of the rights granted.
*/
- GENERIC_WRITE = const_cpu_to_le32(0x40000000),
+ GENERIC_WRITE = cpu_to_le32(0x40000000),
/*
* Read access. For files, this maps onto:
@@ -1545,7 +1553,7 @@ enum {
* For directories, the mapping has the same numberical value. See
* above for the descriptions of the rights granted.
*/
- GENERIC_READ = const_cpu_to_le32(0x80000000),
+ GENERIC_READ = cpu_to_le32(0x80000000),
};
typedef le32 ACCESS_MASK;
@@ -1585,8 +1593,8 @@ typedef struct {
* The object ACE flags (32-bit).
*/
enum {
- ACE_OBJECT_TYPE_PRESENT = const_cpu_to_le32(1),
- ACE_INHERITED_OBJECT_TYPE_PRESENT = const_cpu_to_le32(2),
+ ACE_OBJECT_TYPE_PRESENT = cpu_to_le32(1),
+ ACE_INHERITED_OBJECT_TYPE_PRESENT = cpu_to_le32(2),
};
typedef le32 OBJECT_ACE_FLAGS;
@@ -1649,13 +1657,13 @@ typedef enum {
* pointed to by the Owner field was provided by a defaulting mechanism
* rather than explicitly provided by the original provider of the
* security descriptor. This may affect the treatment of the SID with
- * respect to inheritence of an owner.
+ * respect to inheritance of an owner.
*
* SE_GROUP_DEFAULTED - This boolean flag, when set, indicates that the SID in
* the Group field was provided by a defaulting mechanism rather than
* explicitly provided by the original provider of the security
* descriptor. This may affect the treatment of the SID with respect to
- * inheritence of a primary group.
+ * inheritance of a primary group.
*
* SE_DACL_PRESENT - This boolean flag, when set, indicates that the security
* descriptor contains a discretionary ACL. If this flag is set and the
@@ -1666,7 +1674,7 @@ typedef enum {
* pointed to by the Dacl field was provided by a defaulting mechanism
* rather than explicitly provided by the original provider of the
* security descriptor. This may affect the treatment of the ACL with
- * respect to inheritence of an ACL. This flag is ignored if the
+ * respect to inheritance of an ACL. This flag is ignored if the
* DaclPresent flag is not set.
*
* SE_SACL_PRESENT - This boolean flag, when set, indicates that the security
@@ -1678,7 +1686,7 @@ typedef enum {
* pointed to by the Sacl field was provided by a defaulting mechanism
* rather than explicitly provided by the original provider of the
* security descriptor. This may affect the treatment of the ACL with
- * respect to inheritence of an ACL. This flag is ignored if the
+ * respect to inheritance of an ACL. This flag is ignored if the
* SaclPresent flag is not set.
*
* SE_SELF_RELATIVE - This boolean flag, when set, indicates that the security
@@ -1687,23 +1695,23 @@ typedef enum {
* expressed as offsets from the beginning of the security descriptor.
*/
enum {
- SE_OWNER_DEFAULTED = const_cpu_to_le16(0x0001),
- SE_GROUP_DEFAULTED = const_cpu_to_le16(0x0002),
- SE_DACL_PRESENT = const_cpu_to_le16(0x0004),
- SE_DACL_DEFAULTED = const_cpu_to_le16(0x0008),
-
- SE_SACL_PRESENT = const_cpu_to_le16(0x0010),
- SE_SACL_DEFAULTED = const_cpu_to_le16(0x0020),
-
- SE_DACL_AUTO_INHERIT_REQ = const_cpu_to_le16(0x0100),
- SE_SACL_AUTO_INHERIT_REQ = const_cpu_to_le16(0x0200),
- SE_DACL_AUTO_INHERITED = const_cpu_to_le16(0x0400),
- SE_SACL_AUTO_INHERITED = const_cpu_to_le16(0x0800),
-
- SE_DACL_PROTECTED = const_cpu_to_le16(0x1000),
- SE_SACL_PROTECTED = const_cpu_to_le16(0x2000),
- SE_RM_CONTROL_VALID = const_cpu_to_le16(0x4000),
- SE_SELF_RELATIVE = const_cpu_to_le16(0x8000)
+ SE_OWNER_DEFAULTED = cpu_to_le16(0x0001),
+ SE_GROUP_DEFAULTED = cpu_to_le16(0x0002),
+ SE_DACL_PRESENT = cpu_to_le16(0x0004),
+ SE_DACL_DEFAULTED = cpu_to_le16(0x0008),
+
+ SE_SACL_PRESENT = cpu_to_le16(0x0010),
+ SE_SACL_DEFAULTED = cpu_to_le16(0x0020),
+
+ SE_DACL_AUTO_INHERIT_REQ = cpu_to_le16(0x0100),
+ SE_SACL_AUTO_INHERIT_REQ = cpu_to_le16(0x0200),
+ SE_DACL_AUTO_INHERITED = cpu_to_le16(0x0400),
+ SE_SACL_AUTO_INHERITED = cpu_to_le16(0x0800),
+
+ SE_DACL_PROTECTED = cpu_to_le16(0x1000),
+ SE_SACL_PROTECTED = cpu_to_le16(0x2000),
+ SE_RM_CONTROL_VALID = cpu_to_le16(0x4000),
+ SE_SELF_RELATIVE = cpu_to_le16(0x8000)
} __attribute__ ((__packed__));
typedef le16 SECURITY_DESCRIPTOR_CONTROL;
@@ -1891,20 +1899,21 @@ typedef struct {
* Possible flags for the volume (16-bit).
*/
enum {
- VOLUME_IS_DIRTY = const_cpu_to_le16(0x0001),
- VOLUME_RESIZE_LOG_FILE = const_cpu_to_le16(0x0002),
- VOLUME_UPGRADE_ON_MOUNT = const_cpu_to_le16(0x0004),
- VOLUME_MOUNTED_ON_NT4 = const_cpu_to_le16(0x0008),
+ VOLUME_IS_DIRTY = cpu_to_le16(0x0001),
+ VOLUME_RESIZE_LOG_FILE = cpu_to_le16(0x0002),
+ VOLUME_UPGRADE_ON_MOUNT = cpu_to_le16(0x0004),
+ VOLUME_MOUNTED_ON_NT4 = cpu_to_le16(0x0008),
- VOLUME_DELETE_USN_UNDERWAY = const_cpu_to_le16(0x0010),
- VOLUME_REPAIR_OBJECT_ID = const_cpu_to_le16(0x0020),
+ VOLUME_DELETE_USN_UNDERWAY = cpu_to_le16(0x0010),
+ VOLUME_REPAIR_OBJECT_ID = cpu_to_le16(0x0020),
- VOLUME_MODIFIED_BY_CHKDSK = const_cpu_to_le16(0x8000),
+ VOLUME_CHKDSK_UNDERWAY = cpu_to_le16(0x4000),
+ VOLUME_MODIFIED_BY_CHKDSK = cpu_to_le16(0x8000),
- VOLUME_FLAGS_MASK = const_cpu_to_le16(0x803f),
+ VOLUME_FLAGS_MASK = cpu_to_le16(0xc03f),
/* To make our life easier when checking if we must mount read-only. */
- VOLUME_MUST_MOUNT_RO_MASK = const_cpu_to_le16(0x8027),
+ VOLUME_MUST_MOUNT_RO_MASK = cpu_to_le16(0xc027),
} __attribute__ ((__packed__));
typedef le16 VOLUME_FLAGS;
@@ -1999,14 +2008,14 @@ typedef struct {
*
* When a directory is small enough to fit inside the index root then this
* is the only attribute describing the directory. When the directory is too
- * large to fit in the index root, on the other hand, two aditional attributes
+ * large to fit in the index root, on the other hand, two additional attributes
* are present: an index allocation attribute, containing sub-nodes of the B+
* directory tree (see below), and a bitmap attribute, describing which virtual
* cluster numbers (vcns) in the index allocation attribute are in use by an
* index block.
*
* NOTE: The root directory (FILE_root) contains an entry for itself. Other
- * dircetories do not contain entries for themselves, though.
+ * directories do not contain entries for themselves, though.
*/
typedef struct {
ATTR_TYPE type; /* Type of the indexed attribute. Is
@@ -2089,26 +2098,26 @@ typedef struct {
* The user quota flags. Names explain meaning.
*/
enum {
- QUOTA_FLAG_DEFAULT_LIMITS = const_cpu_to_le32(0x00000001),
- QUOTA_FLAG_LIMIT_REACHED = const_cpu_to_le32(0x00000002),
- QUOTA_FLAG_ID_DELETED = const_cpu_to_le32(0x00000004),
+ QUOTA_FLAG_DEFAULT_LIMITS = cpu_to_le32(0x00000001),
+ QUOTA_FLAG_LIMIT_REACHED = cpu_to_le32(0x00000002),
+ QUOTA_FLAG_ID_DELETED = cpu_to_le32(0x00000004),
- QUOTA_FLAG_USER_MASK = const_cpu_to_le32(0x00000007),
+ QUOTA_FLAG_USER_MASK = cpu_to_le32(0x00000007),
/* This is a bit mask for the user quota flags. */
/*
* These flags are only present in the quota defaults index entry, i.e.
* in the entry where owner_id = QUOTA_DEFAULTS_ID.
*/
- QUOTA_FLAG_TRACKING_ENABLED = const_cpu_to_le32(0x00000010),
- QUOTA_FLAG_ENFORCEMENT_ENABLED = const_cpu_to_le32(0x00000020),
- QUOTA_FLAG_TRACKING_REQUESTED = const_cpu_to_le32(0x00000040),
- QUOTA_FLAG_LOG_THRESHOLD = const_cpu_to_le32(0x00000080),
-
- QUOTA_FLAG_LOG_LIMIT = const_cpu_to_le32(0x00000100),
- QUOTA_FLAG_OUT_OF_DATE = const_cpu_to_le32(0x00000200),
- QUOTA_FLAG_CORRUPT = const_cpu_to_le32(0x00000400),
- QUOTA_FLAG_PENDING_DELETES = const_cpu_to_le32(0x00000800),
+ QUOTA_FLAG_TRACKING_ENABLED = cpu_to_le32(0x00000010),
+ QUOTA_FLAG_ENFORCEMENT_ENABLED = cpu_to_le32(0x00000020),
+ QUOTA_FLAG_TRACKING_REQUESTED = cpu_to_le32(0x00000040),
+ QUOTA_FLAG_LOG_THRESHOLD = cpu_to_le32(0x00000080),
+
+ QUOTA_FLAG_LOG_LIMIT = cpu_to_le32(0x00000100),
+ QUOTA_FLAG_OUT_OF_DATE = cpu_to_le32(0x00000200),
+ QUOTA_FLAG_CORRUPT = cpu_to_le32(0x00000400),
+ QUOTA_FLAG_PENDING_DELETES = cpu_to_le32(0x00000800),
};
typedef le32 QUOTA_FLAGS;
@@ -2152,9 +2161,9 @@ typedef struct {
* Predefined owner_id values (32-bit).
*/
enum {
- QUOTA_INVALID_ID = const_cpu_to_le32(0x00000000),
- QUOTA_DEFAULTS_ID = const_cpu_to_le32(0x00000001),
- QUOTA_FIRST_USER_ID = const_cpu_to_le32(0x00000100),
+ QUOTA_INVALID_ID = cpu_to_le32(0x00000000),
+ QUOTA_DEFAULTS_ID = cpu_to_le32(0x00000001),
+ QUOTA_FIRST_USER_ID = cpu_to_le32(0x00000100),
};
/*
@@ -2169,14 +2178,14 @@ typedef enum {
* Index entry flags (16-bit).
*/
enum {
- INDEX_ENTRY_NODE = const_cpu_to_le16(1), /* This entry contains a
+ INDEX_ENTRY_NODE = cpu_to_le16(1), /* This entry contains a
sub-node, i.e. a reference to an index block in form of
a virtual cluster number (see below). */
- INDEX_ENTRY_END = const_cpu_to_le16(2), /* This signifies the last
+ INDEX_ENTRY_END = cpu_to_le16(2), /* This signifies the last
entry in an index block. The index entry does not
represent a file but it can point to a sub-node. */
- INDEX_ENTRY_SPACE_FILLER = const_cpu_to_le16(0xffff), /* gcc: Force
+ INDEX_ENTRY_SPACE_FILLER = cpu_to_le16(0xffff), /* gcc: Force
enum bit width to 16-bit. */
} __attribute__ ((__packed__));
@@ -2274,7 +2283,7 @@ typedef struct {
// the key_length is zero, then the vcn immediately
// follows the INDEX_ENTRY_HEADER. Regardless of
// key_length, the address of the 8-byte boundary
- // alligned vcn of INDEX_ENTRY{_HEADER} *ie is given by
+ // aligned vcn of INDEX_ENTRY{_HEADER} *ie is given by
// (char*)ie + le16_to_cpu(ie*)->length) - sizeof(VCN),
// where sizeof(VCN) can be hardcoded as 8 if wanted. */
} __attribute__ ((__packed__)) INDEX_ENTRY;
@@ -2314,26 +2323,26 @@ typedef struct {
* These are the predefined reparse point tags:
*/
enum {
- IO_REPARSE_TAG_IS_ALIAS = const_cpu_to_le32(0x20000000),
- IO_REPARSE_TAG_IS_HIGH_LATENCY = const_cpu_to_le32(0x40000000),
- IO_REPARSE_TAG_IS_MICROSOFT = const_cpu_to_le32(0x80000000),
+ IO_REPARSE_TAG_IS_ALIAS = cpu_to_le32(0x20000000),
+ IO_REPARSE_TAG_IS_HIGH_LATENCY = cpu_to_le32(0x40000000),
+ IO_REPARSE_TAG_IS_MICROSOFT = cpu_to_le32(0x80000000),
- IO_REPARSE_TAG_RESERVED_ZERO = const_cpu_to_le32(0x00000000),
- IO_REPARSE_TAG_RESERVED_ONE = const_cpu_to_le32(0x00000001),
- IO_REPARSE_TAG_RESERVED_RANGE = const_cpu_to_le32(0x00000001),
+ IO_REPARSE_TAG_RESERVED_ZERO = cpu_to_le32(0x00000000),
+ IO_REPARSE_TAG_RESERVED_ONE = cpu_to_le32(0x00000001),
+ IO_REPARSE_TAG_RESERVED_RANGE = cpu_to_le32(0x00000001),
- IO_REPARSE_TAG_NSS = const_cpu_to_le32(0x68000005),
- IO_REPARSE_TAG_NSS_RECOVER = const_cpu_to_le32(0x68000006),
- IO_REPARSE_TAG_SIS = const_cpu_to_le32(0x68000007),
- IO_REPARSE_TAG_DFS = const_cpu_to_le32(0x68000008),
+ IO_REPARSE_TAG_NSS = cpu_to_le32(0x68000005),
+ IO_REPARSE_TAG_NSS_RECOVER = cpu_to_le32(0x68000006),
+ IO_REPARSE_TAG_SIS = cpu_to_le32(0x68000007),
+ IO_REPARSE_TAG_DFS = cpu_to_le32(0x68000008),
- IO_REPARSE_TAG_MOUNT_POINT = const_cpu_to_le32(0x88000003),
+ IO_REPARSE_TAG_MOUNT_POINT = cpu_to_le32(0x88000003),
- IO_REPARSE_TAG_HSM = const_cpu_to_le32(0xa8000004),
+ IO_REPARSE_TAG_HSM = cpu_to_le32(0xa8000004),
- IO_REPARSE_TAG_SYMBOLIC_LINK = const_cpu_to_le32(0xe8000000),
+ IO_REPARSE_TAG_SYMBOLIC_LINK = cpu_to_le32(0xe8000000),
- IO_REPARSE_TAG_VALID_VALUES = const_cpu_to_le32(0xe000ffff),
+ IO_REPARSE_TAG_VALID_VALUES = cpu_to_le32(0xe000ffff),
};
/*
@@ -2369,7 +2378,9 @@ typedef struct {
* Extended attribute flags (8-bit).
*/
enum {
- NEED_EA = 0x80
+ NEED_EA = 0x80 /* If set the file to which the EA belongs
+ cannot be interpreted without understanding
+ the associates extended attributes. */
} __attribute__ ((__packed__));
typedef u8 EA_FLAGS;
@@ -2377,20 +2388,20 @@ typedef u8 EA_FLAGS;
/*
* Attribute: Extended attribute (EA) (0xe0).
*
- * NOTE: Always non-resident. (Is this true?)
+ * NOTE: Can be resident or non-resident.
*
* Like the attribute list and the index buffer list, the EA attribute value is
* a sequence of EA_ATTR variable length records.
- *
- * FIXME: It appears weird that the EA name is not unicode. Is it true?
*/
typedef struct {
le32 next_entry_offset; /* Offset to the next EA_ATTR. */
EA_FLAGS flags; /* Flags describing the EA. */
- u8 ea_name_length; /* Length of the name of the EA in bytes. */
+ u8 ea_name_length; /* Length of the name of the EA in bytes
+ excluding the '\0' byte terminator. */
le16 ea_value_length; /* Byte size of the EA's value. */
- u8 ea_name[0]; /* Name of the EA. */
- u8 ea_value[0]; /* The value of the EA. Immediately follows
+ u8 ea_name[0]; /* Name of the EA. Note this is ASCII, not
+ Unicode and it is zero terminated. */
+ u8 ea_value[0]; /* The value of the EA. Immediately follows
the name. */
} __attribute__ ((__packed__)) EA_ATTR;
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index a4bc07616e5..1711b710b64 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -54,6 +54,8 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
int ret = 0;
ntfs_debug("Entering.");
+ if (!rl)
+ return 0;
for (; rl->length; rl++) {
int err;
@@ -74,6 +76,7 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
* @count: number of clusters to allocate
* @start_lcn: starting lcn at which to allocate the clusters (or -1 if none)
* @zone: zone from which to allocate the clusters
+ * @is_extension: if 'true', this is an attribute extension
*
* Allocate @count clusters preferably starting at cluster @start_lcn or at the
* current allocator position if @start_lcn is -1, on the mounted ntfs volume
@@ -84,6 +87,13 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
* @start_vcn specifies the vcn of the first allocated cluster. This makes
* merging the resulting runlist with the old runlist easier.
*
+ * If @is_extension is 'true', the caller is allocating clusters to extend an
+ * attribute and if it is 'false', the caller is allocating clusters to fill a
+ * hole in an attribute. Practically the difference is that if @is_extension
+ * is 'true' the returned runlist will be terminated with LCN_ENOENT and if
+ * @is_extension is 'false' the runlist will be terminated with
+ * LCN_RL_NOT_MAPPED.
+ *
* You need to check the return value with IS_ERR(). If this is false, the
* function was successful and the return value is a runlist describing the
* allocated cluster(s). If IS_ERR() is true, the function failed and
@@ -135,7 +145,8 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
*/
runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
const s64 count, const LCN start_lcn,
- const NTFS_CLUSTER_ALLOCATION_ZONES zone)
+ const NTFS_CLUSTER_ALLOCATION_ZONES zone,
+ const bool is_extension)
{
LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn;
LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size;
@@ -163,17 +174,9 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
BUG_ON(zone < FIRST_ZONE);
BUG_ON(zone > LAST_ZONE);
- /* Return empty runlist if @count == 0 */
- // FIXME: Do we want to just return NULL instead? (AIA)
- if (!count) {
- rl = ntfs_malloc_nofs(PAGE_SIZE);
- if (!rl)
- return ERR_PTR(-ENOMEM);
- rl[0].vcn = start_vcn;
- rl[0].lcn = LCN_RL_NOT_MAPPED;
- rl[0].length = 0;
- return rl;
- }
+ /* Return NULL if @count is zero. */
+ if (!count)
+ return NULL;
/* Take the lcnbmp lock for writing. */
down_write(&vol->lcnbmp_lock);
/*
@@ -316,7 +319,7 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
continue;
}
bit = 1 << (lcn & 7);
- ntfs_debug("bit %i.", bit);
+ ntfs_debug("bit 0x%x.", bit);
/* If the bit is already set, go onto the next one. */
if (*byte & bit) {
lcn++;
@@ -735,7 +738,7 @@ out:
/* Add runlist terminator element. */
if (likely(rl)) {
rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length;
- rl[rlpos].lcn = LCN_RL_NOT_MAPPED;
+ rl[rlpos].lcn = is_extension ? LCN_ENOENT : LCN_RL_NOT_MAPPED;
rl[rlpos].length = 0;
}
if (likely(page && !IS_ERR(page))) {
@@ -785,52 +788,78 @@ out:
/**
* __ntfs_cluster_free - free clusters on an ntfs volume
- * @vi: vfs inode whose runlist describes the clusters to free
- * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters
+ * @ni: ntfs inode whose runlist describes the clusters to free
+ * @start_vcn: vcn in the runlist of @ni at which to start freeing clusters
* @count: number of clusters to free or -1 for all clusters
- * @is_rollback: if TRUE this is a rollback operation
+ * @ctx: active attribute search context if present or NULL if not
+ * @is_rollback: true if this is a rollback operation
*
* Free @count clusters starting at the cluster @start_vcn in the runlist
- * described by the vfs inode @vi.
+ * described by the vfs inode @ni.
*
* If @count is -1, all clusters from @start_vcn to the end of the runlist are
* deallocated. Thus, to completely free all clusters in a runlist, use
* @start_vcn = 0 and @count = -1.
*
- * @is_rollback should always be FALSE, it is for internal use to rollback
+ * If @ctx is specified, it is an active search context of @ni and its base mft
+ * record. This is needed when __ntfs_cluster_free() encounters unmapped
+ * runlist fragments and allows their mapping. If you do not have the mft
+ * record mapped, you can specify @ctx as NULL and __ntfs_cluster_free() will
+ * perform the necessary mapping and unmapping.
+ *
+ * Note, __ntfs_cluster_free() saves the state of @ctx on entry and restores it
+ * before returning. Thus, @ctx will be left pointing to the same attribute on
+ * return as on entry. However, the actual pointers in @ctx may point to
+ * different memory locations on return, so you must remember to reset any
+ * cached pointers from the @ctx, i.e. after the call to __ntfs_cluster_free(),
+ * you will probably want to do:
+ * m = ctx->mrec;
+ * a = ctx->attr;
+ * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
+ * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
+ *
+ * @is_rollback should always be 'false', it is for internal use to rollback
* errors. You probably want to use ntfs_cluster_free() instead.
*
- * Note, ntfs_cluster_free() does not modify the runlist at all, so the caller
- * has to deal with it later.
+ * Note, __ntfs_cluster_free() does not modify the runlist, so you have to
+ * remove from the runlist or mark sparse the freed runs later.
*
* Return the number of deallocated clusters (not counting sparse ones) on
* success and -errno on error.
*
- * Locking: - The runlist described by @vi must be unlocked on entry and is
- * unlocked on return.
- * - This function takes the runlist lock of @vi for reading and
- * sometimes for writing and sometimes modifies the runlist.
+ * WARNING: If @ctx is supplied, regardless of whether success or failure is
+ * returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx
+ * is no longer valid, i.e. you need to either call
+ * ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
+ * In that case PTR_ERR(@ctx->mrec) will give you the error code for
+ * why the mapping of the old inode failed.
+ *
+ * Locking: - The runlist described by @ni must be locked for writing on entry
+ * and is locked on return. Note the runlist may be modified when
+ * needed runlist fragments need to be mapped.
* - The volume lcn bitmap must be unlocked on entry and is unlocked
* on return.
* - This function takes the volume lcn bitmap lock for writing and
* modifies the bitmap contents.
+ * - If @ctx is NULL, the base mft record of @ni must not be mapped on
+ * entry and it will be left unmapped on return.
+ * - If @ctx is not NULL, the base mft record must be mapped on entry
+ * and it will be left mapped on return.
*/
-s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
- const BOOL is_rollback)
+s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count,
+ ntfs_attr_search_ctx *ctx, const bool is_rollback)
{
s64 delta, to_free, total_freed, real_freed;
- ntfs_inode *ni;
ntfs_volume *vol;
struct inode *lcnbmp_vi;
runlist_element *rl;
int err;
- BUG_ON(!vi);
+ BUG_ON(!ni);
ntfs_debug("Entering for i_ino 0x%lx, start_vcn 0x%llx, count "
- "0x%llx.%s", vi->i_ino, (unsigned long long)start_vcn,
+ "0x%llx.%s", ni->mft_no, (unsigned long long)start_vcn,
(unsigned long long)count,
is_rollback ? " (rollback)" : "");
- ni = NTFS_I(vi);
vol = ni->vol;
lcnbmp_vi = vol->lcnbmp_ino;
BUG_ON(!lcnbmp_vi);
@@ -848,8 +877,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
total_freed = real_freed = 0;
- down_read(&ni->runlist.lock);
- rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, FALSE);
+ rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, ctx);
if (IS_ERR(rl)) {
if (!is_rollback)
ntfs_error(vol->sb, "Failed to find first runlist "
@@ -903,7 +931,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
/* Attempt to map runlist. */
vcn = rl->vcn;
- rl = ntfs_attr_find_vcn_nolock(ni, vcn, FALSE);
+ rl = ntfs_attr_find_vcn_nolock(ni, vcn, ctx);
if (IS_ERR(rl)) {
err = PTR_ERR(rl);
if (!is_rollback)
@@ -950,7 +978,6 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
/* Update the total done clusters. */
total_freed += to_free;
}
- up_read(&ni->runlist.lock);
if (likely(!is_rollback))
up_write(&vol->lcnbmp_lock);
@@ -960,7 +987,6 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
ntfs_debug("Done.");
return real_freed;
err_out:
- up_read(&ni->runlist.lock);
if (is_rollback)
return err;
/* If no real clusters were freed, no need to rollback. */
@@ -973,7 +999,7 @@ err_out:
* If rollback fails, set the volume errors flag, emit an error
* message, and return the error code.
*/
- delta = __ntfs_cluster_free(vi, start_vcn, total_freed, TRUE);
+ delta = __ntfs_cluster_free(ni, start_vcn, total_freed, ctx, true);
if (delta < 0) {
ntfs_error(vol->sb, "Failed to rollback (error %i). Leaving "
"inconsistent metadata! Unmount and run "
diff --git a/fs/ntfs/lcnalloc.h b/fs/ntfs/lcnalloc.h
index 4cac1c024af..2adb0431694 100644
--- a/fs/ntfs/lcnalloc.h
+++ b/fs/ntfs/lcnalloc.h
@@ -2,7 +2,7 @@
* lcnalloc.h - Exports for NTFS kernel cluster (de)allocation. Part of the
* Linux-NTFS project.
*
- * Copyright (c) 2004 Anton Altaparmakov
+ * Copyright (c) 2004-2005 Anton Altaparmakov
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
@@ -27,7 +27,9 @@
#include <linux/fs.h>
+#include "attrib.h"
#include "types.h"
+#include "inode.h"
#include "runlist.h"
#include "volume.h"
@@ -40,43 +42,72 @@ typedef enum {
extern runlist_element *ntfs_cluster_alloc(ntfs_volume *vol,
const VCN start_vcn, const s64 count, const LCN start_lcn,
- const NTFS_CLUSTER_ALLOCATION_ZONES zone);
+ const NTFS_CLUSTER_ALLOCATION_ZONES zone,
+ const bool is_extension);
-extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn,
- s64 count, const BOOL is_rollback);
+extern s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn,
+ s64 count, ntfs_attr_search_ctx *ctx, const bool is_rollback);
/**
* ntfs_cluster_free - free clusters on an ntfs volume
- * @vi: vfs inode whose runlist describes the clusters to free
- * @start_vcn: vcn in the runlist of @vi at which to start freeing clusters
+ * @ni: ntfs inode whose runlist describes the clusters to free
+ * @start_vcn: vcn in the runlist of @ni at which to start freeing clusters
* @count: number of clusters to free or -1 for all clusters
+ * @ctx: active attribute search context if present or NULL if not
*
* Free @count clusters starting at the cluster @start_vcn in the runlist
- * described by the vfs inode @vi.
+ * described by the ntfs inode @ni.
*
* If @count is -1, all clusters from @start_vcn to the end of the runlist are
* deallocated. Thus, to completely free all clusters in a runlist, use
* @start_vcn = 0 and @count = -1.
*
- * Note, ntfs_cluster_free() does not modify the runlist at all, so the caller
- * has to deal with it later.
+ * If @ctx is specified, it is an active search context of @ni and its base mft
+ * record. This is needed when ntfs_cluster_free() encounters unmapped runlist
+ * fragments and allows their mapping. If you do not have the mft record
+ * mapped, you can specify @ctx as NULL and ntfs_cluster_free() will perform
+ * the necessary mapping and unmapping.
+ *
+ * Note, ntfs_cluster_free() saves the state of @ctx on entry and restores it
+ * before returning. Thus, @ctx will be left pointing to the same attribute on
+ * return as on entry. However, the actual pointers in @ctx may point to
+ * different memory locations on return, so you must remember to reset any
+ * cached pointers from the @ctx, i.e. after the call to ntfs_cluster_free(),
+ * you will probably want to do:
+ * m = ctx->mrec;
+ * a = ctx->attr;
+ * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
+ * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
+ *
+ * Note, ntfs_cluster_free() does not modify the runlist, so you have to remove
+ * from the runlist or mark sparse the freed runs later.
*
* Return the number of deallocated clusters (not counting sparse ones) on
* success and -errno on error.
*
- * Locking: - The runlist described by @vi must be unlocked on entry and is
- * unlocked on return.
- * - This function takes the runlist lock of @vi for reading and
- * sometimes for writing and sometimes modifies the runlist.
+ * WARNING: If @ctx is supplied, regardless of whether success or failure is
+ * returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx
+ * is no longer valid, i.e. you need to either call
+ * ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
+ * In that case PTR_ERR(@ctx->mrec) will give you the error code for
+ * why the mapping of the old inode failed.
+ *
+ * Locking: - The runlist described by @ni must be locked for writing on entry
+ * and is locked on return. Note the runlist may be modified when
+ * needed runlist fragments need to be mapped.
* - The volume lcn bitmap must be unlocked on entry and is unlocked
* on return.
* - This function takes the volume lcn bitmap lock for writing and
* modifies the bitmap contents.
+ * - If @ctx is NULL, the base mft record of @ni must not be mapped on
+ * entry and it will be left unmapped on return.
+ * - If @ctx is not NULL, the base mft record must be mapped on entry
+ * and it will be left mapped on return.
*/
-static inline s64 ntfs_cluster_free(struct inode *vi, const VCN start_vcn,
- s64 count)
+static inline s64 ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn,
+ s64 count, ntfs_attr_search_ctx *ctx)
{
- return __ntfs_cluster_free(vi, start_vcn, count, FALSE);
+ return __ntfs_cluster_free(ni, start_vcn, count, ctx, false);
}
extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
@@ -93,8 +124,10 @@ extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
*
* Return 0 on success and -errno on error.
*
- * Locking: This function takes the volume lcn bitmap lock for writing and
- * modifies the bitmap contents.
+ * Locking: - This function takes the volume lcn bitmap lock for writing and
+ * modifies the bitmap contents.
+ * - The caller must have locked the runlist @rl for reading or
+ * writing.
*/
static inline int ntfs_cluster_free_from_rl(ntfs_volume *vol,
const runlist_element *rl)
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 8edb8e20fb0..c71de292c5a 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -1,7 +1,7 @@
/*
* logfile.c - NTFS kernel journal handling. Part of the Linux-NTFS project.
*
- * Copyright (c) 2002-2005 Anton Altaparmakov
+ * Copyright (c) 2002-2007 Anton Altaparmakov
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
@@ -26,6 +26,7 @@
#include <linux/highmem.h>
#include <linux/buffer_head.h>
#include <linux/bitops.h>
+#include <linux/log2.h>
#include "attrib.h"
#include "aops.h"
@@ -41,17 +42,18 @@
* @rp: restart page header to check
* @pos: position in @vi at which the restart page header resides
*
- * Check the restart page header @rp for consistency and return TRUE if it is
- * consistent and FALSE otherwise.
+ * Check the restart page header @rp for consistency and return 'true' if it is
+ * consistent and 'false' otherwise.
*
* This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not
* require the full restart page.
*/
-static BOOL ntfs_check_restart_page_header(struct inode *vi,
+static bool ntfs_check_restart_page_header(struct inode *vi,
RESTART_PAGE_HEADER *rp, s64 pos)
{
u32 logfile_system_page_size, logfile_log_page_size;
- u16 usa_count, usa_ofs, usa_end, ra_ofs;
+ u16 ra_ofs, usa_count, usa_ofs, usa_end = 0;
+ bool have_usa = true;
ntfs_debug("Entering.");
/*
@@ -64,9 +66,9 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
logfile_log_page_size < NTFS_BLOCK_SIZE ||
logfile_system_page_size &
(logfile_system_page_size - 1) ||
- logfile_log_page_size & (logfile_log_page_size - 1)) {
+ !is_power_of_2(logfile_log_page_size)) {
ntfs_error(vi->i_sb, "$LogFile uses unsupported page size.");
- return FALSE;
+ return false;
}
/*
* We must be either at !pos (1st restart page) or at pos = system page
@@ -75,7 +77,7 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
if (pos && pos != logfile_system_page_size) {
ntfs_error(vi->i_sb, "Found restart area in incorrect "
"position in $LogFile.");
- return FALSE;
+ return false;
}
/* We only know how to handle version 1.1. */
if (sle16_to_cpu(rp->major_ver) != 1 ||
@@ -84,14 +86,22 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
"supported. (This driver supports version "
"1.1 only.)", (int)sle16_to_cpu(rp->major_ver),
(int)sle16_to_cpu(rp->minor_ver));
- return FALSE;
+ return false;
+ }
+ /*
+ * If chkdsk has been run the restart page may not be protected by an
+ * update sequence array.
+ */
+ if (ntfs_is_chkd_record(rp->magic) && !le16_to_cpu(rp->usa_count)) {
+ have_usa = false;
+ goto skip_usa_checks;
}
/* Verify the size of the update sequence array. */
usa_count = 1 + (logfile_system_page_size >> NTFS_BLOCK_SIZE_BITS);
if (usa_count != le16_to_cpu(rp->usa_count)) {
ntfs_error(vi->i_sb, "$LogFile restart page specifies "
"inconsistent update sequence array count.");
- return FALSE;
+ return false;
}
/* Verify the position of the update sequence array. */
usa_ofs = le16_to_cpu(rp->usa_ofs);
@@ -100,8 +110,9 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
usa_end > NTFS_BLOCK_SIZE - sizeof(u16)) {
ntfs_error(vi->i_sb, "$LogFile restart page specifies "
"inconsistent update sequence array offset.");
- return FALSE;
+ return false;
}
+skip_usa_checks:
/*
* Verify the position of the restart area. It must be:
* - aligned to 8-byte boundary,
@@ -109,11 +120,12 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
* - within the system page size.
*/
ra_ofs = le16_to_cpu(rp->restart_area_offset);
- if (ra_ofs & 7 || ra_ofs < usa_end ||
+ if (ra_ofs & 7 || (have_usa ? ra_ofs < usa_end :
+ ra_ofs < sizeof(RESTART_PAGE_HEADER)) ||
ra_ofs > logfile_system_page_size) {
ntfs_error(vi->i_sb, "$LogFile restart page specifies "
"inconsistent restart area offset.");
- return FALSE;
+ return false;
}
/*
* Only restart pages modified by chkdsk are allowed to have chkdsk_lsn
@@ -121,11 +133,11 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
*/
if (!ntfs_is_chkd_record(rp->magic) && sle64_to_cpu(rp->chkdsk_lsn)) {
ntfs_error(vi->i_sb, "$LogFile restart page is not modified "
- "chkdsk but a chkdsk LSN is specified.");
- return FALSE;
+ "by chkdsk but a chkdsk LSN is specified.");
+ return false;
}
ntfs_debug("Done.");
- return TRUE;
+ return true;
}
/**
@@ -134,7 +146,7 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
* @rp: restart page whose restart area to check
*
* Check the restart area of the restart page @rp for consistency and return
- * TRUE if it is consistent and FALSE otherwise.
+ * 'true' if it is consistent and 'false' otherwise.
*
* This function assumes that the restart page header has already been
* consistency checked.
@@ -142,7 +154,7 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
* This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not
* require the full restart page.
*/
-static BOOL ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp)
+static bool ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp)
{
u64 file_size;
RESTART_AREA *ra;
@@ -161,7 +173,7 @@ static BOOL ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp)
NTFS_BLOCK_SIZE - sizeof(u16)) {
ntfs_error(vi->i_sb, "$LogFile restart area specifies "
"inconsistent file offset.");
- return FALSE;
+ return false;
}
/*
* Now that we can access ra->client_array_offset, make sure everything
@@ -175,7 +187,7 @@ static BOOL ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp)
ra_ofs + ca_ofs > NTFS_BLOCK_SIZE - sizeof(u16)) {
ntfs_error(vi->i_sb, "$LogFile restart area specifies "
"inconsistent client array offset.");
- return FALSE;
+ return false;
}
/*
* The restart area must end within the system page size both when
@@ -192,7 +204,7 @@ static BOOL ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp)
"of the system page size specified by the "
"restart page header and/or the specified "
"restart area length is inconsistent.");
- return FALSE;
+ return false;
}
/*
* The ra->client_free_list and ra->client_in_use_list must be either
@@ -207,7 +219,7 @@ static BOOL ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp)
le16_to_cpu(ra->log_clients))) {
ntfs_error(vi->i_sb, "$LogFile restart area specifies "
"overflowing client free and/or in use lists.");
- return FALSE;
+ return false;
}
/*
* Check ra->seq_number_bits against ra->file_size for consistency.
@@ -222,24 +234,24 @@ static BOOL ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp)
if (le32_to_cpu(ra->seq_number_bits) != 67 - fs_bits) {
ntfs_error(vi->i_sb, "$LogFile restart area specifies "
"inconsistent sequence number bits.");
- return FALSE;
+ return false;
}
/* The log record header length must be a multiple of 8. */
if (((le16_to_cpu(ra->log_record_header_length) + 7) & ~7) !=
le16_to_cpu(ra->log_record_header_length)) {
ntfs_error(vi->i_sb, "$LogFile restart area specifies "
"inconsistent log record header length.");
- return FALSE;
+ return false;
}
/* Dito for the log page data offset. */
if (((le16_to_cpu(ra->log_page_data_offset) + 7) & ~7) !=
le16_to_cpu(ra->log_page_data_offset)) {
ntfs_error(vi->i_sb, "$LogFile restart area specifies "
"inconsistent log page data offset.");
- return FALSE;
+ return false;
}
ntfs_debug("Done.");
- return TRUE;
+ return true;
}
/**
@@ -248,7 +260,7 @@ static BOOL ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp)
* @rp: restart page whose log client array to check
*
* Check the log client array of the restart page @rp for consistency and
- * return TRUE if it is consistent and FALSE otherwise.
+ * return 'true' if it is consistent and 'false' otherwise.
*
* This function assumes that the restart page header and the restart area have
* already been consistency checked.
@@ -257,13 +269,13 @@ static BOOL ntfs_check_restart_area(struct inode *vi, RESTART_PAGE_HEADER *rp)
* function needs @rp->system_page_size bytes in @rp, i.e. it requires the full
* restart page and the page must be multi sector transfer deprotected.
*/
-static BOOL ntfs_check_log_client_array(struct inode *vi,
+static bool ntfs_check_log_client_array(struct inode *vi,
RESTART_PAGE_HEADER *rp)
{
RESTART_AREA *ra;
LOG_CLIENT_RECORD *ca, *cr;
u16 nr_clients, idx;
- BOOL in_free_list, idx_is_first;
+ bool in_free_list, idx_is_first;
ntfs_debug("Entering.");
ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset));
@@ -279,9 +291,9 @@ static BOOL ntfs_check_log_client_array(struct inode *vi,
*/
nr_clients = le16_to_cpu(ra->log_clients);
idx = le16_to_cpu(ra->client_free_list);
- in_free_list = TRUE;
+ in_free_list = true;
check_list:
- for (idx_is_first = TRUE; idx != LOGFILE_NO_CLIENT_CPU; nr_clients--,
+ for (idx_is_first = true; idx != LOGFILE_NO_CLIENT_CPU; nr_clients--,
idx = le16_to_cpu(cr->next_client)) {
if (!nr_clients || idx >= le16_to_cpu(ra->log_clients))
goto err_out;
@@ -291,20 +303,20 @@ check_list:
if (idx_is_first) {
if (cr->prev_client != LOGFILE_NO_CLIENT)
goto err_out;
- idx_is_first = FALSE;
+ idx_is_first = false;
}
}
/* Switch to and check the in use list if we just did the free list. */
if (in_free_list) {
- in_free_list = FALSE;
+ in_free_list = false;
idx = le16_to_cpu(ra->client_in_use_list);
goto check_list;
}
ntfs_debug("Done.");
- return TRUE;
+ return true;
err_out:
ntfs_error(vi->i_sb, "$LogFile log client array is corrupt.");
- return FALSE;
+ return false;
}
/**
@@ -312,10 +324,12 @@ err_out:
* @vi: $LogFile inode to which the restart page belongs
* @rp: restart page to check
* @pos: position in @vi at which the restart page resides
- * @wrp: copy of the multi sector transfer deprotected restart page
+ * @wrp: [OUT] copy of the multi sector transfer deprotected restart page
+ * @lsn: [OUT] set to the current logfile lsn on success
*
- * Check the restart page @rp for consistency and return TRUE if it is
- * consistent and FALSE otherwise.
+ * Check the restart page @rp for consistency and return 0 if it is consistent
+ * and -errno otherwise. The restart page may have been modified by chkdsk in
+ * which case its magic is CHKD instead of RSTR.
*
* This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not
* require the full restart page.
@@ -323,25 +337,33 @@ err_out:
* If @wrp is not NULL, on success, *@wrp will point to a buffer containing a
* copy of the complete multi sector transfer deprotected page. On failure,
* *@wrp is undefined.
+ *
+ * Simillarly, if @lsn is not NULL, on success *@lsn will be set to the current
+ * logfile lsn according to this restart page. On failure, *@lsn is undefined.
+ *
+ * The following error codes are defined:
+ * -EINVAL - The restart page is inconsistent.
+ * -ENOMEM - Not enough memory to load the restart page.
+ * -EIO - Failed to reading from $LogFile.
*/
-static BOOL ntfs_check_and_load_restart_page(struct inode *vi,
- RESTART_PAGE_HEADER *rp, s64 pos, RESTART_PAGE_HEADER **wrp)
+static int ntfs_check_and_load_restart_page(struct inode *vi,
+ RESTART_PAGE_HEADER *rp, s64 pos, RESTART_PAGE_HEADER **wrp,
+ LSN *lsn)
{
RESTART_AREA *ra;
RESTART_PAGE_HEADER *trp;
- int size;
- BOOL ret;
+ int size, err;
ntfs_debug("Entering.");
/* Check the restart page header for consistency. */
if (!ntfs_check_restart_page_header(vi, rp, pos)) {
/* Error output already done inside the function. */
- return FALSE;
+ return -EINVAL;
}
/* Check the restart area for consistency. */
if (!ntfs_check_restart_area(vi, rp)) {
/* Error output already done inside the function. */
- return FALSE;
+ return -EINVAL;
}
ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset));
/*
@@ -352,7 +374,7 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi,
if (!trp) {
ntfs_error(vi->i_sb, "Failed to allocate memory for $LogFile "
"restart page buffer.");
- return FALSE;
+ return -ENOMEM;
}
/*
* Read the whole of the restart page into the buffer. If it fits
@@ -379,6 +401,9 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi,
if (IS_ERR(page)) {
ntfs_error(vi->i_sb, "Error mapping $LogFile "
"page (index %lu).", idx);
+ err = PTR_ERR(page);
+ if (err != -EIO && err != -ENOMEM)
+ err = -EIO;
goto err_out;
}
size = min_t(int, to_read, PAGE_CACHE_SIZE);
@@ -389,32 +414,64 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi,
idx++;
} while (to_read > 0);
}
- /* Perform the multi sector transfer deprotection on the buffer. */
- if (post_read_mst_fixup((NTFS_RECORD*)trp,
+ /*
+ * Perform the multi sector transfer deprotection on the buffer if the
+ * restart page is protected.
+ */
+ if ((!ntfs_is_chkd_record(trp->magic) || le16_to_cpu(trp->usa_count))
+ && post_read_mst_fixup((NTFS_RECORD*)trp,
le32_to_cpu(rp->system_page_size))) {
- ntfs_error(vi->i_sb, "Multi sector transfer error detected in "
- "$LogFile restart page.");
- goto err_out;
+ /*
+ * A multi sector tranfer error was detected. We only need to
+ * abort if the restart page contents exceed the multi sector
+ * transfer fixup of the first sector.
+ */
+ if (le16_to_cpu(rp->restart_area_offset) +
+ le16_to_cpu(ra->restart_area_length) >
+ NTFS_BLOCK_SIZE - sizeof(u16)) {
+ ntfs_error(vi->i_sb, "Multi sector transfer error "
+ "detected in $LogFile restart page.");
+ err = -EINVAL;
+ goto err_out;
+ }
+ }
+ /*
+ * If the restart page is modified by chkdsk or there are no active
+ * logfile clients, the logfile is consistent. Otherwise, need to
+ * check the log client records for consistency, too.
+ */
+ err = 0;
+ if (ntfs_is_rstr_record(rp->magic) &&
+ ra->client_in_use_list != LOGFILE_NO_CLIENT) {
+ if (!ntfs_check_log_client_array(vi, trp)) {
+ err = -EINVAL;
+ goto err_out;
+ }
+ }
+ if (lsn) {
+ if (ntfs_is_rstr_record(rp->magic))
+ *lsn = sle64_to_cpu(ra->current_lsn);
+ else /* if (ntfs_is_chkd_record(rp->magic)) */
+ *lsn = sle64_to_cpu(rp->chkdsk_lsn);
}
- /* Check the log client records for consistency. */
- ret = ntfs_check_log_client_array(vi, trp);
- if (ret && wrp)
- *wrp = trp;
- else
- ntfs_free(trp);
ntfs_debug("Done.");
- return ret;
+ if (wrp)
+ *wrp = trp;
+ else {
err_out:
- ntfs_free(trp);
- return FALSE;
+ ntfs_free(trp);
+ }
+ return err;
}
/**
* ntfs_check_logfile - check the journal for consistency
* @log_vi: struct inode of loaded journal $LogFile to check
+ * @rp: [OUT] on success this is a copy of the current restart page
*
- * Check the $LogFile journal for consistency and return TRUE if it is
- * consistent and FALSE if not.
+ * Check the $LogFile journal for consistency and return 'true' if it is
+ * consistent and 'false' if not. On success, the current restart page is
+ * returned in *@rp. Caller must call ntfs_free(*@rp) when finished with it.
*
* At present we only check the two restart pages and ignore the log record
* pages.
@@ -424,19 +481,18 @@ err_out:
* if the $LogFile was created on a system with a different page size to ours
* yet and mst deprotection would fail if our page size is smaller.
*/
-BOOL ntfs_check_logfile(struct inode *log_vi)
+bool ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp)
{
- s64 size, pos, rstr1_pos, rstr2_pos;
+ s64 size, pos;
+ LSN rstr1_lsn, rstr2_lsn;
ntfs_volume *vol = NTFS_SB(log_vi->i_sb);
struct address_space *mapping = log_vi->i_mapping;
struct page *page = NULL;
u8 *kaddr = NULL;
RESTART_PAGE_HEADER *rstr1_ph = NULL;
RESTART_PAGE_HEADER *rstr2_ph = NULL;
- int log_page_size, log_page_mask, ofs;
- BOOL logfile_is_empty = TRUE;
- BOOL rstr1_found = FALSE;
- BOOL rstr2_found = FALSE;
+ int log_page_size, log_page_mask, err;
+ bool logfile_is_empty = true;
u8 log_page_bits;
ntfs_debug("Entering.");
@@ -460,10 +516,10 @@ BOOL ntfs_check_logfile(struct inode *log_vi)
log_page_size = PAGE_CACHE_SIZE;
log_page_mask = log_page_size - 1;
/*
- * Use generic_ffs() instead of ffs() to enable the compiler to
+ * Use ntfs_ffs() instead of ffs() to enable the compiler to
* optimize log_page_size and log_page_bits into constants.
*/
- log_page_bits = generic_ffs(log_page_size) - 1;
+ log_page_bits = ntfs_ffs(log_page_size) - 1;
size &= ~(s64)(log_page_size - 1);
/*
* Ensure the log file is big enough to store at least the two restart
@@ -472,7 +528,7 @@ BOOL ntfs_check_logfile(struct inode *log_vi)
if (size < log_page_size * 2 || (size - log_page_size * 2) >>
log_page_bits < MinLogRecordPages) {
ntfs_error(vol->sb, "$LogFile is too small.");
- return FALSE;
+ return false;
}
/*
* Read through the file looking for a restart page. Since the restart
@@ -491,7 +547,7 @@ BOOL ntfs_check_logfile(struct inode *log_vi)
if (IS_ERR(page)) {
ntfs_error(vol->sb, "Error mapping $LogFile "
"page (index %lu).", idx);
- return FALSE;
+ goto err_out;
}
}
kaddr = (u8*)page_address(page) + (pos & ~PAGE_CACHE_MASK);
@@ -501,7 +557,7 @@ BOOL ntfs_check_logfile(struct inode *log_vi)
* means we are done.
*/
if (!ntfs_is_empty_recordp((le32*)kaddr))
- logfile_is_empty = FALSE;
+ logfile_is_empty = false;
else if (!logfile_is_empty)
break;
/*
@@ -510,145 +566,135 @@ BOOL ntfs_check_logfile(struct inode *log_vi)
*/
if (ntfs_is_rcrd_recordp((le32*)kaddr))
break;
- /*
- * A modified by chkdsk restart page means we cannot handle
- * this log file.
- */
- if (ntfs_is_chkd_recordp((le32*)kaddr)) {
- ntfs_error(vol->sb, "$LogFile has been modified by "
- "chkdsk. Mount this volume in "
- "Windows.");
- goto err_out;
- }
- /* If not a restart page, continue. */
- if (!ntfs_is_rstr_recordp((le32*)kaddr)) {
- /* Skip to the minimum page size for the next one. */
+ /* If not a (modified by chkdsk) restart page, continue. */
+ if (!ntfs_is_rstr_recordp((le32*)kaddr) &&
+ !ntfs_is_chkd_recordp((le32*)kaddr)) {
if (!pos)
pos = NTFS_BLOCK_SIZE >> 1;
continue;
}
- /* We now know we have a restart page. */
- if (!pos) {
- rstr1_found = TRUE;
- rstr1_pos = pos;
- } else {
- if (rstr2_found) {
- ntfs_error(vol->sb, "Found more than two "
- "restart pages in $LogFile.");
- goto err_out;
- }
- rstr2_found = TRUE;
- rstr2_pos = pos;
- }
/*
- * Check the restart page for consistency and get a copy of the
- * complete multi sector transfer deprotected restart page.
+ * Check the (modified by chkdsk) restart page for consistency
+ * and get a copy of the complete multi sector transfer
+ * deprotected restart page.
*/
- if (!ntfs_check_and_load_restart_page(log_vi,
+ err = ntfs_check_and_load_restart_page(log_vi,
(RESTART_PAGE_HEADER*)kaddr, pos,
- !pos ? &rstr1_ph : &rstr2_ph)) {
- /* Error output already done inside the function. */
- goto err_out;
+ !rstr1_ph ? &rstr1_ph : &rstr2_ph,
+ !rstr1_ph ? &rstr1_lsn : &rstr2_lsn);
+ if (!err) {
+ /*
+ * If we have now found the first (modified by chkdsk)
+ * restart page, continue looking for the second one.
+ */
+ if (!pos) {
+ pos = NTFS_BLOCK_SIZE >> 1;
+ continue;
+ }
+ /*
+ * We have now found the second (modified by chkdsk)
+ * restart page, so we can stop looking.
+ */
+ break;
}
/*
- * We have a valid restart page. The next one must be after
- * a whole system page size as specified by the valid restart
- * page.
+ * Error output already done inside the function. Note, we do
+ * not abort if the restart page was invalid as we might still
+ * find a valid one further in the file.
*/
+ if (err != -EINVAL) {
+ ntfs_unmap_page(page);
+ goto err_out;
+ }
+ /* Continue looking. */
if (!pos)
- pos = le32_to_cpu(rstr1_ph->system_page_size) >> 1;
+ pos = NTFS_BLOCK_SIZE >> 1;
}
- if (page) {
+ if (page)
ntfs_unmap_page(page);
- page = NULL;
- }
if (logfile_is_empty) {
NVolSetLogFileEmpty(vol);
is_empty:
ntfs_debug("Done. ($LogFile is empty.)");
- return TRUE;
+ return true;
}
- if (!rstr1_found || !rstr2_found) {
- ntfs_error(vol->sb, "Did not find two restart pages in "
- "$LogFile.");
- goto err_out;
+ if (!rstr1_ph) {
+ BUG_ON(rstr2_ph);
+ ntfs_error(vol->sb, "Did not find any restart pages in "
+ "$LogFile and it was not empty.");
+ return false;
+ }
+ /* If both restart pages were found, use the more recent one. */
+ if (rstr2_ph) {
+ /*
+ * If the second restart area is more recent, switch to it.
+ * Otherwise just throw it away.
+ */
+ if (rstr2_lsn > rstr1_lsn) {
+ ntfs_debug("Using second restart page as it is more "
+ "recent.");
+ ntfs_free(rstr1_ph);
+ rstr1_ph = rstr2_ph;
+ /* rstr1_lsn = rstr2_lsn; */
+ } else {
+ ntfs_debug("Using first restart page as it is more "
+ "recent.");
+ ntfs_free(rstr2_ph);
+ }
+ rstr2_ph = NULL;
}
- /*
- * The two restart areas must be identical except for the update
- * sequence number.
- */
- ofs = le16_to_cpu(rstr1_ph->usa_ofs);
- if (memcmp(rstr1_ph, rstr2_ph, ofs) || (ofs += sizeof(u16),
- memcmp((u8*)rstr1_ph + ofs, (u8*)rstr2_ph + ofs,
- le32_to_cpu(rstr1_ph->system_page_size) - ofs))) {
- ntfs_error(vol->sb, "The two restart pages in $LogFile do not "
- "match.");
- goto err_out;
- }
- ntfs_free(rstr1_ph);
- ntfs_free(rstr2_ph);
/* All consistency checks passed. */
+ if (rp)
+ *rp = rstr1_ph;
+ else
+ ntfs_free(rstr1_ph);
ntfs_debug("Done.");
- return TRUE;
+ return true;
err_out:
- if (page)
- ntfs_unmap_page(page);
if (rstr1_ph)
ntfs_free(rstr1_ph);
- if (rstr2_ph)
- ntfs_free(rstr2_ph);
- return FALSE;
+ return false;
}
/**
* ntfs_is_logfile_clean - check in the journal if the volume is clean
* @log_vi: struct inode of loaded journal $LogFile to check
+ * @rp: copy of the current restart page
*
- * Analyze the $LogFile journal and return TRUE if it indicates the volume was
- * shutdown cleanly and FALSE if not.
+ * Analyze the $LogFile journal and return 'true' if it indicates the volume was
+ * shutdown cleanly and 'false' if not.
*
* At present we only look at the two restart pages and ignore the log record
* pages. This is a little bit crude in that there will be a very small number
* of cases where we think that a volume is dirty when in fact it is clean.
* This should only affect volumes that have not been shutdown cleanly but did
* not have any pending, non-check-pointed i/o, i.e. they were completely idle
- * at least for the five seconds preceeding the unclean shutdown.
+ * at least for the five seconds preceding the unclean shutdown.
*
* This function assumes that the $LogFile journal has already been consistency
* checked by a call to ntfs_check_logfile() and in particular if the $LogFile
* is empty this function requires that NVolLogFileEmpty() is true otherwise an
* empty volume will be reported as dirty.
*/
-BOOL ntfs_is_logfile_clean(struct inode *log_vi)
+bool ntfs_is_logfile_clean(struct inode *log_vi, const RESTART_PAGE_HEADER *rp)
{
ntfs_volume *vol = NTFS_SB(log_vi->i_sb);
- struct page *page;
- RESTART_PAGE_HEADER *rp;
RESTART_AREA *ra;
ntfs_debug("Entering.");
/* An empty $LogFile must have been clean before it got emptied. */
if (NVolLogFileEmpty(vol)) {
ntfs_debug("Done. ($LogFile is empty.)");
- return TRUE;
+ return true;
}
- /*
- * Read the first restart page. It will be possibly incomplete and
- * will not be multi sector transfer deprotected but we only need the
- * first NTFS_BLOCK_SIZE bytes so it does not matter.
- */
- page = ntfs_map_page(log_vi->i_mapping, 0);
- if (IS_ERR(page)) {
- ntfs_error(vol->sb, "Error mapping $LogFile page (index 0).");
- return FALSE;
- }
- rp = (RESTART_PAGE_HEADER*)page_address(page);
- if (!ntfs_is_rstr_record(rp->magic)) {
- ntfs_error(vol->sb, "No restart page found at offset zero in "
- "$LogFile. This is probably a bug in that "
- "the $LogFile should have been consistency "
- "checked before calling this function.");
- goto err_out;
+ BUG_ON(!rp);
+ if (!ntfs_is_rstr_record(rp->magic) &&
+ !ntfs_is_chkd_record(rp->magic)) {
+ ntfs_error(vol->sb, "Restart page buffer is invalid. This is "
+ "probably a bug in that the $LogFile should "
+ "have been consistency checked before calling "
+ "this function.");
+ return false;
}
ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset));
/*
@@ -659,48 +705,159 @@ BOOL ntfs_is_logfile_clean(struct inode *log_vi)
if (ra->client_in_use_list != LOGFILE_NO_CLIENT &&
!(ra->flags & RESTART_VOLUME_IS_CLEAN)) {
ntfs_debug("Done. $LogFile indicates a dirty shutdown.");
- goto err_out;
+ return false;
}
- ntfs_unmap_page(page);
/* $LogFile indicates a clean shutdown. */
ntfs_debug("Done. $LogFile indicates a clean shutdown.");
- return TRUE;
-err_out:
- ntfs_unmap_page(page);
- return FALSE;
+ return true;
}
/**
* ntfs_empty_logfile - empty the contents of the $LogFile journal
* @log_vi: struct inode of loaded journal $LogFile to empty
*
- * Empty the contents of the $LogFile journal @log_vi and return TRUE on
- * success and FALSE on error.
+ * Empty the contents of the $LogFile journal @log_vi and return 'true' on
+ * success and 'false' on error.
*
* This function assumes that the $LogFile journal has already been consistency
* checked by a call to ntfs_check_logfile() and that ntfs_is_logfile_clean()
* has been used to ensure that the $LogFile is clean.
*/
-BOOL ntfs_empty_logfile(struct inode *log_vi)
+bool ntfs_empty_logfile(struct inode *log_vi)
{
- ntfs_volume *vol = NTFS_SB(log_vi->i_sb);
+ VCN vcn, end_vcn;
+ ntfs_inode *log_ni = NTFS_I(log_vi);
+ ntfs_volume *vol = log_ni->vol;
+ struct super_block *sb = vol->sb;
+ runlist_element *rl;
+ unsigned long flags;
+ unsigned block_size, block_size_bits;
+ int err;
+ bool should_wait = true;
ntfs_debug("Entering.");
- if (!NVolLogFileEmpty(vol)) {
- int err;
-
- err = ntfs_attr_set(NTFS_I(log_vi), 0, i_size_read(log_vi),
- 0xff);
- if (unlikely(err)) {
- ntfs_error(vol->sb, "Failed to fill $LogFile with "
- "0xff bytes (error code %i).", err);
- return FALSE;
- }
- /* Set the flag so we do not have to do it again on remount. */
- NVolSetLogFileEmpty(vol);
+ if (NVolLogFileEmpty(vol)) {
+ ntfs_debug("Done.");
+ return true;
}
+ /*
+ * We cannot use ntfs_attr_set() because we may be still in the middle
+ * of a mount operation. Thus we do the emptying by hand by first
+ * zapping the page cache pages for the $LogFile/$DATA attribute and
+ * then emptying each of the buffers in each of the clusters specified
+ * by the runlist by hand.
+ */
+ block_size = sb->s_blocksize;
+ block_size_bits = sb->s_blocksize_bits;
+ vcn = 0;
+ read_lock_irqsave(&log_ni->size_lock, flags);
+ end_vcn = (log_ni->initialized_size + vol->cluster_size_mask) >>
+ vol->cluster_size_bits;
+ read_unlock_irqrestore(&log_ni->size_lock, flags);
+ truncate_inode_pages(log_vi->i_mapping, 0);
+ down_write(&log_ni->runlist.lock);
+ rl = log_ni->runlist.rl;
+ if (unlikely(!rl || vcn < rl->vcn || !rl->length)) {
+map_vcn:
+ err = ntfs_map_runlist_nolock(log_ni, vcn, NULL);
+ if (err) {
+ ntfs_error(sb, "Failed to map runlist fragment (error "
+ "%d).", -err);
+ goto err;
+ }
+ rl = log_ni->runlist.rl;
+ BUG_ON(!rl || vcn < rl->vcn || !rl->length);
+ }
+ /* Seek to the runlist element containing @vcn. */
+ while (rl->length && vcn >= rl[1].vcn)
+ rl++;
+ do {
+ LCN lcn;
+ sector_t block, end_block;
+ s64 len;
+
+ /*
+ * If this run is not mapped map it now and start again as the
+ * runlist will have been updated.
+ */
+ lcn = rl->lcn;
+ if (unlikely(lcn == LCN_RL_NOT_MAPPED)) {
+ vcn = rl->vcn;
+ goto map_vcn;
+ }
+ /* If this run is not valid abort with an error. */
+ if (unlikely(!rl->length || lcn < LCN_HOLE))
+ goto rl_err;
+ /* Skip holes. */
+ if (lcn == LCN_HOLE)
+ continue;
+ block = lcn << vol->cluster_size_bits >> block_size_bits;
+ len = rl->length;
+ if (rl[1].vcn > end_vcn)
+ len = end_vcn - rl->vcn;
+ end_block = (lcn + len) << vol->cluster_size_bits >>
+ block_size_bits;
+ /* Iterate over the blocks in the run and empty them. */
+ do {
+ struct buffer_head *bh;
+
+ /* Obtain the buffer, possibly not uptodate. */
+ bh = sb_getblk(sb, block);
+ BUG_ON(!bh);
+ /* Setup buffer i/o submission. */
+ lock_buffer(bh);
+ bh->b_end_io = end_buffer_write_sync;
+ get_bh(bh);
+ /* Set the entire contents of the buffer to 0xff. */
+ memset(bh->b_data, -1, block_size);
+ if (!buffer_uptodate(bh))
+ set_buffer_uptodate(bh);
+ if (buffer_dirty(bh))
+ clear_buffer_dirty(bh);
+ /*
+ * Submit the buffer and wait for i/o to complete but
+ * only for the first buffer so we do not miss really
+ * serious i/o errors. Once the first buffer has
+ * completed ignore errors afterwards as we can assume
+ * that if one buffer worked all of them will work.
+ */
+ submit_bh(WRITE, bh);
+ if (should_wait) {
+ should_wait = false;
+ wait_on_buffer(bh);
+ if (unlikely(!buffer_uptodate(bh)))
+ goto io_err;
+ }
+ brelse(bh);
+ } while (++block < end_block);
+ } while ((++rl)->vcn < end_vcn);
+ up_write(&log_ni->runlist.lock);
+ /*
+ * Zap the pages again just in case any got instantiated whilst we were
+ * emptying the blocks by hand. FIXME: We may not have completed
+ * writing to all the buffer heads yet so this may happen too early.
+ * We really should use a kernel thread to do the emptying
+ * asynchronously and then we can also set the volume dirty and output
+ * an error message if emptying should fail.
+ */
+ truncate_inode_pages(log_vi->i_mapping, 0);
+ /* Set the flag so we do not have to do it again on remount. */
+ NVolSetLogFileEmpty(vol);
ntfs_debug("Done.");
- return TRUE;
+ return true;
+io_err:
+ ntfs_error(sb, "Failed to write buffer. Unmount and run chkdsk.");
+ goto dirty_err;
+rl_err:
+ ntfs_error(sb, "Runlist is corrupt. Unmount and run chkdsk.");
+dirty_err:
+ NVolSetErrors(vol);
+ err = -EIO;
+err:
+ up_write(&log_ni->runlist.lock);
+ ntfs_error(sb, "Failed to fill $LogFile with 0xff bytes (error %d).",
+ -err);
+ return false;
}
#endif /* NTFS_RW */
diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h
index 4ee4378de06..aa2b6ac3f0a 100644
--- a/fs/ntfs/logfile.h
+++ b/fs/ntfs/logfile.h
@@ -2,7 +2,7 @@
* logfile.h - Defines for NTFS kernel journal ($LogFile) handling. Part of
* the Linux-NTFS project.
*
- * Copyright (c) 2000-2004 Anton Altaparmakov
+ * Copyright (c) 2000-2005 Anton Altaparmakov
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
@@ -104,7 +104,7 @@ typedef struct {
* in this particular client array. Also inside the client records themselves,
* this means that there are no client records preceding or following this one.
*/
-#define LOGFILE_NO_CLIENT const_cpu_to_le16(0xffff)
+#define LOGFILE_NO_CLIENT cpu_to_le16(0xffff)
#define LOGFILE_NO_CLIENT_CPU 0xffff
/*
@@ -112,8 +112,8 @@ typedef struct {
* information about the log file in which they are present.
*/
enum {
- RESTART_VOLUME_IS_CLEAN = const_cpu_to_le16(0x0002),
- RESTART_SPACE_FILLER = 0xffff, /* gcc: Force enum bit width to 16. */
+ RESTART_VOLUME_IS_CLEAN = cpu_to_le16(0x0002),
+ RESTART_SPACE_FILLER = cpu_to_le16(0xffff), /* gcc: Force enum bit width to 16. */
} __attribute__ ((__packed__));
typedef le16 RESTART_AREA_FLAGS;
@@ -222,7 +222,7 @@ typedef struct {
/* 24*/ sle64 file_size; /* Usable byte size of the log file. If the
restart_area_offset + the offset of the
file_size are > 510 then corruption has
- occured. This is the very first check when
+ occurred. This is the very first check when
starting with the restart_area as if it
fails it means that some of the above values
will be corrupted by the multi sector
@@ -296,11 +296,13 @@ typedef struct {
/* sizeof() = 160 (0xa0) bytes */
} __attribute__ ((__packed__)) LOG_CLIENT_RECORD;
-extern BOOL ntfs_check_logfile(struct inode *log_vi);
+extern bool ntfs_check_logfile(struct inode *log_vi,
+ RESTART_PAGE_HEADER **rp);
-extern BOOL ntfs_is_logfile_clean(struct inode *log_vi);
+extern bool ntfs_is_logfile_clean(struct inode *log_vi,
+ const RESTART_PAGE_HEADER *rp);
-extern BOOL ntfs_empty_logfile(struct inode *log_vi);
+extern bool ntfs_empty_logfile(struct inode *log_vi);
#endif /* NTFS_RW */
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index fac5944df6d..a44b14cbcee 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -1,7 +1,7 @@
/*
* malloc.h - NTFS kernel memory handling. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
@@ -27,31 +27,65 @@
#include <linux/highmem.h>
/**
- * ntfs_malloc_nofs - allocate memory in multiples of pages
- * @size number of bytes to allocate
+ * __ntfs_malloc - allocate memory in multiples of pages
+ * @size: number of bytes to allocate
+ * @gfp_mask: extra flags for the allocator
+ *
+ * Internal function. You probably want ntfs_malloc_nofs()...
*
* Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and
* returns a pointer to the allocated memory.
*
* If there was insufficient memory to complete the request, return NULL.
+ * Depending on @gfp_mask the allocation may be guaranteed to succeed.
*/
-static inline void *ntfs_malloc_nofs(unsigned long size)
+static inline void *__ntfs_malloc(unsigned long size, gfp_t gfp_mask)
{
if (likely(size <= PAGE_SIZE)) {
BUG_ON(!size);
/* kmalloc() has per-CPU caches so is faster for now. */
- return kmalloc(PAGE_SIZE, GFP_NOFS);
- /* return (void *)__get_free_page(GFP_NOFS | __GFP_HIGHMEM); */
+ return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM);
+ /* return (void *)__get_free_page(gfp_mask); */
}
- if (likely(size >> PAGE_SHIFT < num_physpages))
- return __vmalloc(size, GFP_NOFS | __GFP_HIGHMEM, PAGE_KERNEL);
+ if (likely((size >> PAGE_SHIFT) < totalram_pages))
+ return __vmalloc(size, gfp_mask, PAGE_KERNEL);
return NULL;
}
+/**
+ * ntfs_malloc_nofs - allocate memory in multiples of pages
+ * @size: number of bytes to allocate
+ *
+ * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and
+ * returns a pointer to the allocated memory.
+ *
+ * If there was insufficient memory to complete the request, return NULL.
+ */
+static inline void *ntfs_malloc_nofs(unsigned long size)
+{
+ return __ntfs_malloc(size, GFP_NOFS | __GFP_HIGHMEM);
+}
+
+/**
+ * ntfs_malloc_nofs_nofail - allocate memory in multiples of pages
+ * @size: number of bytes to allocate
+ *
+ * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and
+ * returns a pointer to the allocated memory.
+ *
+ * This function guarantees that the allocation will succeed. It will sleep
+ * for as long as it takes to complete the allocation.
+ *
+ * If there was insufficient memory to complete the request, return NULL.
+ */
+static inline void *ntfs_malloc_nofs_nofail(unsigned long size)
+{
+ return __ntfs_malloc(size, GFP_NOFS | __GFP_HIGHMEM | __GFP_NOFAIL);
+}
+
static inline void ntfs_free(void *addr)
{
- if (likely(((unsigned long)addr < VMALLOC_START) ||
- ((unsigned long)addr >= VMALLOC_END ))) {
+ if (!is_vmalloc_addr(addr)) {
kfree(addr);
/* free_page((unsigned long)addr); */
return;
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 317f7c679fd..3014a36a255 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1,7 +1,7 @@
/**
* mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2005 Anton Altaparmakov
+ * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
* Copyright (c) 2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -21,6 +21,7 @@
*/
#include <linux/buffer_head.h>
+#include <linux/slab.h>
#include <linux/swap.h>
#include "attrib.h"
@@ -49,7 +50,8 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
ntfs_volume *vol = ni->vol;
struct inode *mft_vi = vol->mft_ino;
struct page *page;
- unsigned long index, ofs, end_index;
+ unsigned long index, end_index;
+ unsigned ofs;
BUG_ON(ni->page);
/*
@@ -58,7 +60,8 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
* overflowing the unsigned long, but I don't think we would ever get
* here if the volume was that big...
*/
- index = ni->mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT;
+ index = (u64)ni->mft_no << vol->mft_record_size_bits >>
+ PAGE_CACHE_SHIFT;
ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
i_size = i_size_read(mft_vi);
@@ -70,7 +73,7 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
if (index > end_index || (i_size & ~PAGE_CACHE_MASK) < ofs +
vol->mft_record_size) {
page = ERR_PTR(-ENOENT);
- ntfs_error(vol->sb, "Attemt to read mft record 0x%lx, "
+ ntfs_error(vol->sb, "Attempt to read mft record 0x%lx, "
"which is beyond the end of the mft. "
"This is probably a bug in the ntfs "
"driver.", ni->mft_no);
@@ -91,6 +94,7 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
"Run chkdsk.", ni->mft_no);
ntfs_unmap_page(page);
page = ERR_PTR(-EIO);
+ NVolSetErrors(vol);
}
err_out:
ni->page = NULL;
@@ -102,8 +106,8 @@ err_out:
* map_mft_record - map, pin and lock an mft record
* @ni: ntfs inode whose MFT record to map
*
- * First, take the mrec_lock semaphore. We might now be sleeping, while waiting
- * for the semaphore if it was already locked by someone else.
+ * First, take the mrec_lock mutex. We might now be sleeping, while waiting
+ * for the mutex if it was already locked by someone else.
*
* The page of the record is mapped using map_mft_record_page() before being
* returned to the caller.
@@ -133,9 +137,9 @@ err_out:
* So that code will end up having to own the mrec_lock of all mft
* records/inodes present in the page before I/O can proceed. In that case we
* wouldn't need to bother with PG_locked and PG_uptodate as nobody will be
- * accessing anything without owning the mrec_lock semaphore. But we do need
- * to use them because of the read_cache_page() invocation and the code becomes
- * so much simpler this way that it is well worth it.
+ * accessing anything without owning the mrec_lock mutex. But we do need to
+ * use them because of the read_cache_page() invocation and the code becomes so
+ * much simpler this way that it is well worth it.
*
* The mft record is now ours and we return a pointer to it. You need to check
* the returned pointer with IS_ERR() and if that is true, PTR_ERR() will return
@@ -158,13 +162,13 @@ MFT_RECORD *map_mft_record(ntfs_inode *ni)
atomic_inc(&ni->count);
/* Serialize access to this mft record. */
- down(&ni->mrec_lock);
+ mutex_lock(&ni->mrec_lock);
m = map_mft_record_page(ni);
if (likely(!IS_ERR(m)))
return m;
- up(&ni->mrec_lock);
+ mutex_unlock(&ni->mrec_lock);
atomic_dec(&ni->count);
ntfs_error(ni->vol->sb, "Failed with error code %lu.", -PTR_ERR(m));
return m;
@@ -215,7 +219,7 @@ void unmap_mft_record(ntfs_inode *ni)
ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no);
unmap_mft_record_page(ni);
- up(&ni->mrec_lock);
+ mutex_unlock(&ni->mrec_lock);
atomic_dec(&ni->count);
/*
* If pure ntfs_inode, i.e. no vfs inode attached, we leave it to
@@ -248,7 +252,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
int i;
unsigned long mft_no = MREF(mref);
u16 seq_no = MSEQNO(mref);
- BOOL destroy_ni = FALSE;
+ bool destroy_ni = false;
ntfs_debug("Mapping extent mft record 0x%lx (base mft record 0x%lx).",
mft_no, base_ni->mft_no);
@@ -259,7 +263,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
* in which case just return it. If not found, add it to the base
* inode before returning it.
*/
- down(&base_ni->extent_lock);
+ mutex_lock(&base_ni->extent_lock);
if (base_ni->nr_extents > 0) {
extent_nis = base_ni->ext.extent_ntfs_inos;
for (i = 0; i < base_ni->nr_extents; i++) {
@@ -272,7 +276,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
}
}
if (likely(ni != NULL)) {
- up(&base_ni->extent_lock);
+ mutex_unlock(&base_ni->extent_lock);
atomic_dec(&base_ni->count);
/* We found the record; just have to map and return it. */
m = map_mft_record(ni);
@@ -299,7 +303,7 @@ map_err_out:
/* Record wasn't there. Get a new ntfs inode and initialize it. */
ni = ntfs_new_extent_inode(base_ni->vol->sb, mft_no);
if (unlikely(!ni)) {
- up(&base_ni->extent_lock);
+ mutex_unlock(&base_ni->extent_lock);
atomic_dec(&base_ni->count);
return ERR_PTR(-ENOMEM);
}
@@ -310,7 +314,7 @@ map_err_out:
/* Now map the record. */
m = map_mft_record(ni);
if (IS_ERR(m)) {
- up(&base_ni->extent_lock);
+ mutex_unlock(&base_ni->extent_lock);
atomic_dec(&base_ni->count);
ntfs_clear_extent_inode(ni);
goto map_err_out;
@@ -319,7 +323,7 @@ map_err_out:
if (seq_no && (le16_to_cpu(m->sequence_number) != seq_no)) {
ntfs_error(base_ni->vol->sb, "Found stale extent mft "
"reference! Corrupt filesystem. Run chkdsk.");
- destroy_ni = TRUE;
+ destroy_ni = true;
m = ERR_PTR(-EIO);
goto unm_err_out;
}
@@ -328,11 +332,11 @@ map_err_out:
ntfs_inode **tmp;
int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *);
- tmp = (ntfs_inode **)kmalloc(new_size, GFP_NOFS);
+ tmp = kmalloc(new_size, GFP_NOFS);
if (unlikely(!tmp)) {
ntfs_error(base_ni->vol->sb, "Failed to allocate "
"internal buffer.");
- destroy_ni = TRUE;
+ destroy_ni = true;
m = ERR_PTR(-ENOMEM);
goto unm_err_out;
}
@@ -345,14 +349,14 @@ map_err_out:
base_ni->ext.extent_ntfs_inos = tmp;
}
base_ni->ext.extent_ntfs_inos[base_ni->nr_extents++] = ni;
- up(&base_ni->extent_lock);
+ mutex_unlock(&base_ni->extent_lock);
atomic_dec(&base_ni->count);
ntfs_debug("Done 2.");
*ntfs_ino = ni;
return m;
unm_err_out:
unmap_mft_record(ni);
- up(&base_ni->extent_lock);
+ mutex_unlock(&base_ni->extent_lock);
atomic_dec(&base_ni->count);
/*
* If the extent inode was not attached to the base inode we need to
@@ -381,13 +385,12 @@ unm_err_out:
* it is dirty in the inode meta data rather than the data page cache of the
* inode, and thus there are no data pages that need writing out. Therefore, a
* full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the
- * other hand, is not sufficient, because I_DIRTY_DATASYNC needs to be set to
- * ensure ->write_inode is called from generic_osync_inode() and this needs to
- * happen or the file data would not necessarily hit the device synchronously,
- * even though the vfs inode has the O_SYNC flag set. Also, I_DIRTY_DATASYNC
- * simply "feels" better than just I_DIRTY_SYNC, since the file data has not
- * actually hit the block device yet, which is not what I_DIRTY_SYNC on its own
- * would suggest.
+ * other hand, is not sufficient, because ->write_inode needs to be called even
+ * in case of fdatasync. This needs to happen or the file data would not
+ * necessarily hit the device synchronously, even though the vfs inode has the
+ * O_SYNC flag set. Also, I_DIRTY_DATASYNC simply "feels" better than just
+ * I_DIRTY_SYNC, since the file data has not actually hit the block device yet,
+ * which is not what I_DIRTY_SYNC on its own would suggest.
*/
void __mark_mft_record_dirty(ntfs_inode *ni)
{
@@ -397,12 +400,12 @@ void __mark_mft_record_dirty(ntfs_inode *ni)
BUG_ON(NInoAttr(ni));
mark_ntfs_record_dirty(ni->page, ni->page_ofs);
/* Determine the base vfs inode and mark it dirty, too. */
- down(&ni->extent_lock);
+ mutex_lock(&ni->extent_lock);
if (likely(ni->nr_extents >= 0))
base_ni = ni;
else
base_ni = ni->ext.base_ntfs_ino;
- up(&ni->extent_lock);
+ mutex_unlock(&ni->extent_lock);
__mark_inode_dirty(VFS_I(base_ni), I_DIRTY_SYNC | I_DIRTY_DATASYNC);
}
@@ -471,7 +474,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
runlist_element *rl;
unsigned int block_start, block_end, m_start, m_end, page_ofs;
int i_bhs, nr_bhs, err = 0;
- unsigned char blocksize_bits = vol->mftmirr_ino->i_blkbits;
+ unsigned char blocksize_bits = vol->sb->s_blocksize_bits;
ntfs_debug("Entering for inode 0x%lx.", mft_no);
BUG_ON(!max_bhs);
@@ -511,7 +514,6 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
} while (bh);
tail->b_this_page = head;
attach_page_buffers(page, head);
- BUG_ON(!page_has_buffers(page));
}
bh = head = page_buffers(page);
BUG_ON(!bh);
@@ -584,7 +586,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
struct buffer_head *tbh = bhs[i_bhs];
- if (unlikely(test_set_buffer_locked(tbh)))
+ if (!trylock_buffer(tbh))
BUG();
BUG_ON(!buffer_uptodate(tbh));
clear_buffer_dirty(tbh);
@@ -649,10 +651,7 @@ err_out:
* fs/ntfs/aops.c::mark_ntfs_record_dirty().
*
* On success, clean the mft record and return 0. On error, leave the mft
- * record dirty and return -errno. The caller should call make_bad_inode() on
- * the base inode to ensure no more access happens to this inode. We do not do
- * it here as the caller may want to finish writing other extent mft records
- * first to minimize on-disk metadata inconsistencies.
+ * record dirty and return -errno.
*
* NOTE: We always perform synchronous i/o and ignore the @sync parameter.
* However, if the mft record has a counterpart in the mft mirror and @sync is
@@ -671,8 +670,8 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
{
ntfs_volume *vol = ni->vol;
struct page *page = ni->page;
- unsigned char blocksize_bits = vol->mft_ino->i_blkbits;
- unsigned int blocksize = 1 << blocksize_bits;
+ unsigned int blocksize = vol->sb->s_blocksize;
+ unsigned char blocksize_bits = vol->sb->s_blocksize_bits;
int max_bhs = vol->mft_record_size / blocksize;
struct buffer_head *bhs[max_bhs];
struct buffer_head *bh, *head;
@@ -692,7 +691,6 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
*/
if (!NInoTestClearDirty(ni))
goto done;
- BUG_ON(!page_has_buffers(page));
bh = head = page_buffers(page);
BUG_ON(!bh);
rl = NULL;
@@ -781,7 +779,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
struct buffer_head *tbh = bhs[i_bhs];
- if (unlikely(test_set_buffer_locked(tbh)))
+ if (!trylock_buffer(tbh))
BUG();
BUG_ON(!buffer_uptodate(tbh));
clear_buffer_dirty(tbh);
@@ -859,7 +857,7 @@ err_out:
* caller is responsible for unlocking the ntfs inode and unpinning the base
* vfs inode.
*
- * Return TRUE if the mft record may be written out and FALSE if not.
+ * Return 'true' if the mft record may be written out and 'false' if not.
*
* The caller has locked the page and cleared the uptodate flag on it which
* means that we can safely write out any dirty mft records that do not have
@@ -870,7 +868,7 @@ err_out:
* Here is a description of the tests we perform:
*
* If the inode is found in icache we know the mft record must be a base mft
- * record. If it is dirty, we do not write it and return FALSE as the vfs
+ * record. If it is dirty, we do not write it and return 'false' as the vfs
* inode write paths will result in the access times being updated which would
* cause the base mft record to be redirtied and written out again. (We know
* the access time update will modify the base mft record because Windows
@@ -879,11 +877,11 @@ err_out:
*
* If the inode is in icache and not dirty, we attempt to lock the mft record
* and if we find the lock was already taken, it is not safe to write the mft
- * record and we return FALSE.
+ * record and we return 'false'.
*
* If we manage to obtain the lock we have exclusive access to the mft record,
* which also allows us safe writeout of the mft record. We then set
- * @locked_ni to the locked ntfs inode and return TRUE.
+ * @locked_ni to the locked ntfs inode and return 'true'.
*
* Note we cannot just lock the mft record and sleep while waiting for the lock
* because this would deadlock due to lock reversal (normally the mft record is
@@ -893,24 +891,24 @@ err_out:
* If the inode is not in icache we need to perform further checks.
*
* If the mft record is not a FILE record or it is a base mft record, we can
- * safely write it and return TRUE.
+ * safely write it and return 'true'.
*
* We now know the mft record is an extent mft record. We check if the inode
* corresponding to its base mft record is in icache and obtain a reference to
- * it if it is. If it is not, we can safely write it and return TRUE.
+ * it if it is. If it is not, we can safely write it and return 'true'.
*
* We now have the base inode for the extent mft record. We check if it has an
* ntfs inode for the extent mft record attached and if not it is safe to write
- * the extent mft record and we return TRUE.
+ * the extent mft record and we return 'true'.
*
* The ntfs inode for the extent mft record is attached to the base inode so we
* attempt to lock the extent mft record and if we find the lock was already
- * taken, it is not safe to write the extent mft record and we return FALSE.
+ * taken, it is not safe to write the extent mft record and we return 'false'.
*
* If we manage to obtain the lock we have exclusive access to the extent mft
* record, which also allows us safe writeout of the extent mft record. We
* set the ntfs inode of the extent mft record clean and then set @locked_ni to
- * the now locked ntfs inode and return TRUE.
+ * the now locked ntfs inode and return 'true'.
*
* Note, the reason for actually writing dirty mft records here and not just
* relying on the vfs inode dirty code paths is that we can have mft records
@@ -924,7 +922,7 @@ err_out:
* appear if the mft record is reused for a new inode before it got written
* out.
*/
-BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
+bool ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
const MFT_RECORD *m, ntfs_inode **locked_ni)
{
struct super_block *sb = vol->sb;
@@ -979,16 +977,16 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
mft_no);
atomic_dec(&ni->count);
iput(vi);
- return FALSE;
+ return false;
}
ntfs_debug("Inode 0x%lx is not dirty.", mft_no);
/* The inode is not dirty, try to take the mft record lock. */
- if (unlikely(down_trylock(&ni->mrec_lock))) {
+ if (unlikely(!mutex_trylock(&ni->mrec_lock))) {
ntfs_debug("Mft record 0x%lx is already locked, do "
"not write it.", mft_no);
atomic_dec(&ni->count);
iput(vi);
- return FALSE;
+ return false;
}
ntfs_debug("Managed to lock mft record 0x%lx, write it.",
mft_no);
@@ -997,7 +995,7 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
* return the locked ntfs inode.
*/
*locked_ni = ni;
- return TRUE;
+ return true;
}
ntfs_debug("Inode 0x%lx is not in icache.", mft_no);
/* The inode is not in icache. */
@@ -1005,13 +1003,13 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
if (!ntfs_is_mft_record(m->magic)) {
ntfs_debug("Mft record 0x%lx is not a FILE record, write it.",
mft_no);
- return TRUE;
+ return true;
}
/* Write the mft record if it is a base inode. */
if (!m->base_mft_record) {
ntfs_debug("Mft record 0x%lx is a base record, write it.",
mft_no);
- return TRUE;
+ return true;
}
/*
* This is an extent mft record. Check if the inode corresponding to
@@ -1035,7 +1033,7 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
*/
ntfs_debug("Base inode 0x%lx is not in icache, write the "
"extent record.", na.mft_no);
- return TRUE;
+ return true;
}
ntfs_debug("Base inode 0x%lx is in icache.", na.mft_no);
/*
@@ -1043,17 +1041,17 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
* corresponding to this extent mft record attached.
*/
ni = NTFS_I(vi);
- down(&ni->extent_lock);
+ mutex_lock(&ni->extent_lock);
if (ni->nr_extents <= 0) {
/*
* The base inode has no attached extent inodes, write this
* extent mft record.
*/
- up(&ni->extent_lock);
+ mutex_unlock(&ni->extent_lock);
iput(vi);
ntfs_debug("Base inode 0x%lx has no attached extent inodes, "
"write the extent record.", na.mft_no);
- return TRUE;
+ return true;
}
/* Iterate over the attached extent inodes. */
extent_nis = ni->ext.extent_ntfs_inos;
@@ -1072,28 +1070,28 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
* extent mft record.
*/
if (!eni) {
- up(&ni->extent_lock);
+ mutex_unlock(&ni->extent_lock);
iput(vi);
ntfs_debug("Extent inode 0x%lx is not attached to its base "
"inode 0x%lx, write the extent record.",
mft_no, na.mft_no);
- return TRUE;
+ return true;
}
ntfs_debug("Extent inode 0x%lx is attached to its base inode 0x%lx.",
mft_no, na.mft_no);
/* Take a reference to the extent ntfs inode. */
atomic_inc(&eni->count);
- up(&ni->extent_lock);
+ mutex_unlock(&ni->extent_lock);
/*
* Found the extent inode coresponding to this extent mft record.
* Try to take the mft record lock.
*/
- if (unlikely(down_trylock(&eni->mrec_lock))) {
+ if (unlikely(!mutex_trylock(&eni->mrec_lock))) {
atomic_dec(&eni->count);
iput(vi);
ntfs_debug("Extent mft record 0x%lx is already locked, do "
"not write it.", mft_no);
- return FALSE;
+ return false;
}
ntfs_debug("Managed to lock extent mft record 0x%lx, write it.",
mft_no);
@@ -1105,7 +1103,7 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
* the locked extent ntfs inode.
*/
*locked_ni = eni;
- return TRUE;
+ return true;
}
static const char *es = " Leaving inconsistent metadata. Unmount and run "
@@ -1193,7 +1191,7 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol,
if (size) {
page = ntfs_map_page(mftbmp_mapping,
ofs >> PAGE_CACHE_SHIFT);
- if (unlikely(IS_ERR(page))) {
+ if (IS_ERR(page)) {
ntfs_error(vol->sb, "Failed to read mft "
"bitmap, aborting.");
return PTR_ERR(page);
@@ -1309,7 +1307,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
ll = mftbmp_ni->allocated_size;
read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
rl = ntfs_attr_find_vcn_nolock(mftbmp_ni,
- (ll - 1) >> vol->cluster_size_bits, TRUE);
+ (ll - 1) >> vol->cluster_size_bits, NULL);
if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
up_write(&mftbmp_ni->runlist.lock);
ntfs_error(vol->sb, "Failed to determine last allocated "
@@ -1355,7 +1353,8 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
up_write(&vol->lcnbmp_lock);
ntfs_unmap_page(page);
/* Allocate a cluster from the DATA_ZONE. */
- rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE);
+ rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE,
+ true);
if (IS_ERR(rl2)) {
up_write(&mftbmp_ni->runlist.lock);
ntfs_error(vol->sb, "Failed to allocate a cluster for "
@@ -1368,7 +1367,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
ntfs_error(vol->sb, "Failed to merge runlists for mft "
"bitmap.");
if (ntfs_cluster_free_from_rl(vol, rl2)) {
- ntfs_error(vol->sb, "Failed to dealocate "
+ ntfs_error(vol->sb, "Failed to deallocate "
"allocated cluster.%s", es);
NVolSetErrors(vol);
}
@@ -1443,7 +1442,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
// Note: It will need to be a special mft record and if none of
// those are available it gets rather complicated...
ntfs_error(vol->sb, "Not enough space in this mft record to "
- "accomodate extended mft bitmap attribute "
+ "accommodate extended mft bitmap attribute "
"extent. Cannot handle this yet.");
ret = -EOPNOTSUPP;
goto undo_alloc;
@@ -1725,7 +1724,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
ATTR_RECORD *a = NULL;
int ret, mp_size;
u32 old_alen = 0;
- BOOL mp_rebuilt = FALSE;
+ bool mp_rebuilt = false;
ntfs_debug("Extending mft data allocation.");
mft_ni = NTFS_I(vol->mft_ino);
@@ -1739,7 +1738,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
ll = mft_ni->allocated_size;
read_unlock_irqrestore(&mft_ni->size_lock, flags);
rl = ntfs_attr_find_vcn_nolock(mft_ni,
- (ll - 1) >> vol->cluster_size_bits, TRUE);
+ (ll - 1) >> vol->cluster_size_bits, NULL);
if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
up_write(&mft_ni->runlist.lock);
ntfs_error(vol->sb, "Failed to determine last allocated "
@@ -1780,7 +1779,8 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
nr > min_nr ? "default" : "minimal", (long long)nr);
old_last_vcn = rl[1].vcn;
do {
- rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE);
+ rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE,
+ true);
if (likely(!IS_ERR(rl2)))
break;
if (PTR_ERR(rl2) != -ENOSPC || nr == min_nr) {
@@ -1805,7 +1805,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
ntfs_error(vol->sb, "Failed to merge runlists for mft data "
"attribute.");
if (ntfs_cluster_free_from_rl(vol, rl2)) {
- ntfs_error(vol->sb, "Failed to dealocate clusters "
+ ntfs_error(vol->sb, "Failed to deallocate clusters "
"from the mft data attribute.%s", es);
NVolSetErrors(vol);
}
@@ -1879,12 +1879,12 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
// and we would then need to update all references to this mft
// record appropriately. This is rather complicated...
ntfs_error(vol->sb, "Not enough space in this mft record to "
- "accomodate extended mft data attribute "
+ "accommodate extended mft data attribute "
"extent. Cannot handle this yet.");
ret = -EOPNOTSUPP;
goto undo_alloc;
}
- mp_rebuilt = TRUE;
+ mp_rebuilt = true;
/* Generate the mapping pairs array directly into the attr record. */
ret = ntfs_mapping_pairs_build(vol, (u8*)a +
le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
@@ -1952,20 +1952,21 @@ restore_undo_alloc:
NVolSetErrors(vol);
return ret;
}
- a = ctx->attr;
- a->data.non_resident.highest_vcn = cpu_to_sle64(old_last_vcn - 1);
+ ctx->attr->data.non_resident.highest_vcn =
+ cpu_to_sle64(old_last_vcn - 1);
undo_alloc:
- if (ntfs_cluster_free(vol->mft_ino, old_last_vcn, -1) < 0) {
+ if (ntfs_cluster_free(mft_ni, old_last_vcn, -1, ctx) < 0) {
ntfs_error(vol->sb, "Failed to free clusters from mft data "
"attribute.%s", es);
NVolSetErrors(vol);
}
+ a = ctx->attr;
if (ntfs_rl_truncate_nolock(vol, &mft_ni->runlist, old_last_vcn)) {
ntfs_error(vol->sb, "Failed to truncate mft data attribute "
"runlist.%s", es);
NVolSetErrors(vol);
}
- if (mp_rebuilt) {
+ if (mp_rebuilt && !IS_ERR(ctx->mrec)) {
if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
a->data.non_resident.mapping_pairs_offset),
old_alen - le16_to_cpu(
@@ -1982,6 +1983,10 @@ undo_alloc:
}
flush_dcache_mft_record_page(ctx->ntfs_ino);
mark_mft_record_dirty(ctx->ntfs_ino);
+ } else if (IS_ERR(ctx->mrec)) {
+ ntfs_error(vol->sb, "Failed to restore attribute search "
+ "context.%s", es);
+ NVolSetErrors(vol);
}
if (ctx)
ntfs_attr_put_search_ctx(ctx);
@@ -2113,7 +2118,7 @@ static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no)
}
/* Read, map, and pin the page containing the mft record. */
page = ntfs_map_page(mft_vi->i_mapping, index);
- if (unlikely(IS_ERR(page))) {
+ if (IS_ERR(page)) {
ntfs_error(vol->sb, "Failed to map page containing mft record "
"to format 0x%llx.", (long long)mft_no);
return PTR_ERR(page);
@@ -2250,7 +2255,7 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
unsigned int ofs;
int err;
le16 seq_no, usn;
- BOOL record_formatted = FALSE;
+ bool record_formatted = false;
if (base_ni) {
ntfs_debug("Entering (allocating an extent mft record for "
@@ -2352,7 +2357,7 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
}
#ifdef DEBUG
read_lock_irqsave(&mftbmp_ni->size_lock, flags);
- ntfs_debug("Status of mftbmp after initialized extention: "
+ ntfs_debug("Status of mftbmp after initialized extension: "
"allocated_size 0x%llx, data_size 0x%llx, "
"initialized_size 0x%llx.",
(long long)mftbmp_ni->allocated_size,
@@ -2449,7 +2454,7 @@ have_alloc_rec:
mft_ni->initialized_size = new_initialized_size;
}
write_unlock_irqrestore(&mft_ni->size_lock, flags);
- record_formatted = TRUE;
+ record_formatted = true;
/* Update the mft data attribute record to reflect the new sizes. */
m = map_mft_record(mft_ni);
if (IS_ERR(m)) {
@@ -2514,7 +2519,7 @@ mft_rec_already_initialized:
ofs = (bit << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
/* Read, map, and pin the page containing the mft record. */
page = ntfs_map_page(vol->mft_ino->i_mapping, index);
- if (unlikely(IS_ERR(page))) {
+ if (IS_ERR(page)) {
ntfs_error(vol->sb, "Failed to map page containing allocated "
"mft record 0x%llx.", (long long)bit);
err = PTR_ERR(page);
@@ -2571,6 +2576,8 @@ mft_rec_already_initialized:
flush_dcache_page(page);
SetPageUptodate(page);
if (base_ni) {
+ MFT_RECORD *m_tmp;
+
/*
* Setup the base mft record in the extent mft record. This
* completes initialization of the allocated extent mft record
@@ -2583,11 +2590,11 @@ mft_rec_already_initialized:
* attach it to the base inode @base_ni and map, pin, and lock
* its, i.e. the allocated, mft record.
*/
- m = map_extent_mft_record(base_ni, bit, &ni);
- if (IS_ERR(m)) {
+ m_tmp = map_extent_mft_record(base_ni, bit, &ni);
+ if (IS_ERR(m_tmp)) {
ntfs_error(vol->sb, "Failed to map allocated extent "
"mft record 0x%llx.", (long long)bit);
- err = PTR_ERR(m);
+ err = PTR_ERR(m_tmp);
/* Set the mft record itself not in use. */
m->flags &= cpu_to_le16(
~le16_to_cpu(MFT_RECORD_IN_USE));
@@ -2598,6 +2605,7 @@ mft_rec_already_initialized:
ntfs_unmap_page(page);
goto undo_mftbmp_alloc;
}
+ BUG_ON(m != m_tmp);
/*
* Make sure the allocated mft record is written out to disk.
* No need to set the inode dirty because the caller is going
@@ -2633,11 +2641,6 @@ mft_rec_already_initialized:
}
vi->i_ino = bit;
/*
- * This is the optimal IO size (for stat), not the fs block
- * size.
- */
- vi->i_blksize = PAGE_CACHE_SIZE;
- /*
* This is for checking whether an inode has changed w.r.t. a
* file so that the file can be updated if necessary (compare
* with f_version).
@@ -2665,7 +2668,7 @@ mft_rec_already_initialized:
ni->name_len = 4;
ni->itype.index.block_size = 4096;
- ni->itype.index.block_size_bits = generic_ffs(4096) - 1;
+ ni->itype.index.block_size_bits = ntfs_ffs(4096) - 1;
ni->itype.index.collation_rule = COLLATION_FILE_NAME;
if (vol->cluster_size <= ni->itype.index.block_size) {
ni->itype.index.vcn_size = vol->cluster_size;
@@ -2704,7 +2707,7 @@ mft_rec_already_initialized:
* have its page mapped and it is very easy to do.
*/
atomic_inc(&ni->count);
- down(&ni->mrec_lock);
+ mutex_lock(&ni->mrec_lock);
ni->page = page;
ni->page_ofs = ofs;
/*
@@ -2791,22 +2794,22 @@ int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m)
BUG_ON(NInoAttr(ni));
BUG_ON(ni->nr_extents != -1);
- down(&ni->extent_lock);
+ mutex_lock(&ni->extent_lock);
base_ni = ni->ext.base_ntfs_ino;
- up(&ni->extent_lock);
+ mutex_unlock(&ni->extent_lock);
BUG_ON(base_ni->nr_extents <= 0);
ntfs_debug("Entering for extent inode 0x%lx, base inode 0x%lx.\n",
mft_no, base_ni->mft_no);
- down(&base_ni->extent_lock);
+ mutex_lock(&base_ni->extent_lock);
/* Make sure we are holding the only reference to the extent inode. */
if (atomic_read(&ni->count) > 2) {
ntfs_error(vol->sb, "Tried to free busy extent inode 0x%lx, "
"not freeing.", base_ni->mft_no);
- up(&base_ni->extent_lock);
+ mutex_unlock(&base_ni->extent_lock);
return -EBUSY;
}
@@ -2824,7 +2827,7 @@ int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m)
break;
}
- up(&base_ni->extent_lock);
+ mutex_unlock(&base_ni->extent_lock);
if (unlikely(err)) {
ntfs_error(vol->sb, "Extent inode 0x%lx is not attached to "
@@ -2839,7 +2842,7 @@ int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m)
*/
/* Mark the mft record as not in use. */
- m->flags &= const_cpu_to_le16(~const_le16_to_cpu(MFT_RECORD_IN_USE));
+ m->flags &= ~MFT_RECORD_IN_USE;
/* Increment the sequence number, skipping zero, if it is not zero. */
old_seq_no = m->sequence_number;
@@ -2883,16 +2886,16 @@ rollback_error:
return 0;
rollback:
/* Rollback what we did... */
- down(&base_ni->extent_lock);
+ mutex_lock(&base_ni->extent_lock);
extent_nis = base_ni->ext.extent_ntfs_inos;
if (!(base_ni->nr_extents & 3)) {
int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode*);
- extent_nis = (ntfs_inode**)kmalloc(new_size, GFP_NOFS);
+ extent_nis = kmalloc(new_size, GFP_NOFS);
if (unlikely(!extent_nis)) {
ntfs_error(vol->sb, "Failed to allocate internal "
"buffer during rollback.%s", es);
- up(&base_ni->extent_lock);
+ mutex_unlock(&base_ni->extent_lock);
NVolSetErrors(vol);
goto rollback_error;
}
@@ -2907,7 +2910,7 @@ rollback:
m->flags |= MFT_RECORD_IN_USE;
m->sequence_number = old_seq_no;
extent_nis[base_ni->nr_extents++] = ni;
- up(&base_ni->extent_lock);
+ mutex_unlock(&base_ni->extent_lock);
mark_mft_record_dirty(ni);
return err;
}
diff --git a/fs/ntfs/mft.h b/fs/ntfs/mft.h
index 407de2cef1d..b52bf87b99d 100644
--- a/fs/ntfs/mft.h
+++ b/fs/ntfs/mft.h
@@ -97,10 +97,7 @@ extern int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync);
* uptodate.
*
* On success, clean the mft record and return 0. On error, leave the mft
- * record dirty and return -errno. The caller should call make_bad_inode() on
- * the base inode to ensure no more access happens to this inode. We do not do
- * it here as the caller may want to finish writing other extent mft records
- * first to minimize on-disk metadata inconsistencies.
+ * record dirty and return -errno.
*/
static inline int write_mft_record(ntfs_inode *ni, MFT_RECORD *m, int sync)
{
@@ -114,7 +111,7 @@ static inline int write_mft_record(ntfs_inode *ni, MFT_RECORD *m, int sync)
return err;
}
-extern BOOL ntfs_may_write_mft_record(ntfs_volume *vol,
+extern bool ntfs_may_write_mft_record(ntfs_volume *vol,
const unsigned long mft_no, const MFT_RECORD *m,
ntfs_inode **locked_ni);
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 351dbc3b6e4..436f36037e0 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -2,7 +2,7 @@
* namei.c - NTFS kernel directory inode operations. Part of the Linux-NTFS
* project.
*
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2006 Anton Altaparmakov
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
@@ -21,7 +21,9 @@
*/
#include <linux/dcache.h>
+#include <linux/exportfs.h>
#include <linux/security.h>
+#include <linux/slab.h>
#include "attrib.h"
#include "debug.h"
@@ -96,10 +98,10 @@
* name. We then convert the name to the current NLS code page, and proceed
* searching for a dentry with this name, etc, as in case 2), above.
*
- * Locking: Caller must hold i_sem on the directory.
+ * Locking: Caller must hold i_mutex on the directory.
*/
static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
- struct nameidata *nd)
+ unsigned int flags)
{
ntfs_volume *vol = NTFS_SB(dir_ino->i_sb);
struct inode *dent_inode;
@@ -115,7 +117,9 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
uname_len = ntfs_nlstoucs(vol, dent->d_name.name, dent->d_name.len,
&uname);
if (uname_len < 0) {
- ntfs_error(vol->sb, "Failed to convert name to Unicode.");
+ if (uname_len != -ENAMETOOLONG)
+ ntfs_error(vol->sb, "Failed to convert name to "
+ "Unicode.");
return ERR_PTR(uname_len);
}
mref = ntfs_lookup_inode_by_name(NTFS_I(dir_ino), uname, uname_len,
@@ -157,7 +161,7 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
/* Return the error code. */
return (struct dentry *)dent_inode;
}
- /* It is guaranteed that name is no longer allocated at this point. */
+ /* It is guaranteed that @name is no longer allocated at this point. */
if (MREF_ERR(mref) == -ENOENT) {
ntfs_debug("Entry was not found, adding negative dentry.");
/* The dcache will handle negative entries. */
@@ -168,11 +172,9 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
ntfs_error(vol->sb, "ntfs_lookup_ino_by_name() failed with error "
"code %i.", -MREF_ERR(mref));
return ERR_PTR(MREF_ERR(mref));
-
// TODO: Consider moving this lot to a separate function! (AIA)
handle_name:
{
- struct dentry *real_dent, *new_dent;
MFT_RECORD *m;
ntfs_attr_search_ctx *ctx;
ntfs_inode *ni = NTFS_I(dent_inode);
@@ -253,93 +255,9 @@ handle_name:
}
nls_name.hash = full_name_hash(nls_name.name, nls_name.len);
- /*
- * Note: No need for dent->d_lock lock as i_sem is held on the
- * parent inode.
- */
-
- /* Does a dentry matching the nls_name exist already? */
- real_dent = d_lookup(dent->d_parent, &nls_name);
- /* If not, create it now. */
- if (!real_dent) {
- real_dent = d_alloc(dent->d_parent, &nls_name);
- kfree(nls_name.name);
- if (!real_dent) {
- err = -ENOMEM;
- goto err_out;
- }
- new_dent = d_splice_alias(dent_inode, real_dent);
- if (new_dent)
- dput(real_dent);
- else
- new_dent = real_dent;
- ntfs_debug("Done. (Created new dentry.)");
- return new_dent;
- }
+ dent = d_add_ci(dent, dent_inode, &nls_name);
kfree(nls_name.name);
- /* Matching dentry exists, check if it is negative. */
- if (real_dent->d_inode) {
- if (unlikely(real_dent->d_inode != dent_inode)) {
- /* This can happen because bad inodes are unhashed. */
- BUG_ON(!is_bad_inode(dent_inode));
- BUG_ON(!is_bad_inode(real_dent->d_inode));
- }
- /*
- * Already have the inode and the dentry attached, decrement
- * the reference count to balance the ntfs_iget() we did
- * earlier on. We found the dentry using d_lookup() so it
- * cannot be disconnected and thus we do not need to worry
- * about any NFS/disconnectedness issues here.
- */
- iput(dent_inode);
- ntfs_debug("Done. (Already had inode and dentry.)");
- return real_dent;
- }
- /*
- * Negative dentry: instantiate it unless the inode is a directory and
- * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED),
- * in which case d_move() that in place of the found dentry.
- */
- if (!S_ISDIR(dent_inode->i_mode)) {
- /* Not a directory; everything is easy. */
- d_instantiate(real_dent, dent_inode);
- ntfs_debug("Done. (Already had negative file dentry.)");
- return real_dent;
- }
- spin_lock(&dcache_lock);
- if (list_empty(&dent_inode->i_dentry)) {
- /*
- * Directory without a 'disconnected' dentry; we need to do
- * d_instantiate() by hand because it takes dcache_lock which
- * we already hold.
- */
- list_add(&real_dent->d_alias, &dent_inode->i_dentry);
- real_dent->d_inode = dent_inode;
- spin_unlock(&dcache_lock);
- security_d_instantiate(real_dent, dent_inode);
- ntfs_debug("Done. (Already had negative directory dentry.)");
- return real_dent;
- }
- /*
- * Directory with a 'disconnected' dentry; get a reference to the
- * 'disconnected' dentry.
- */
- new_dent = list_entry(dent_inode->i_dentry.next, struct dentry,
- d_alias);
- dget_locked(new_dent);
- spin_unlock(&dcache_lock);
- /* Do security vodoo. */
- security_d_instantiate(real_dent, dent_inode);
- /* Move new_dent in place of real_dent. */
- d_move(new_dent, real_dent);
- /* Balance the ntfs_iget() we did above. */
- iput(dent_inode);
- /* Throw away real_dent. */
- dput(real_dent);
- /* Use new_dent as the actual dentry. */
- ntfs_debug("Done. (Already had negative, disconnected directory "
- "dentry.)");
- return new_dent;
+ return dent;
eio_err_out:
ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk.");
@@ -358,7 +276,7 @@ err_out:
/**
* Inode operations for directories.
*/
-struct inode_operations ntfs_dir_inode_ops = {
+const struct inode_operations ntfs_dir_inode_ops = {
.lookup = ntfs_lookup, /* VFS: Lookup directory. */
};
@@ -374,7 +292,7 @@ struct inode_operations ntfs_dir_inode_ops = {
* The code is based on the ext3 ->get_parent() implementation found in
* fs/ext3/namei.c::ext3_get_parent().
*
- * Note: ntfs_get_parent() is called with @child_dent->d_inode->i_sem down.
+ * Note: ntfs_get_parent() is called with @child_dent->d_inode->i_mutex down.
*
* Return the dentry of the parent directory on success or the error code on
* error (IS_ERR() is true).
@@ -387,8 +305,6 @@ static struct dentry *ntfs_get_parent(struct dentry *child_dent)
ntfs_attr_search_ctx *ctx;
ATTR_RECORD *attr;
FILE_NAME_ATTR *fn;
- struct inode *parent_vi;
- struct dentry *parent_dent;
unsigned long parent_ino;
int err;
@@ -428,78 +344,44 @@ try_next:
/* Release the search context and the mft record of the child. */
ntfs_attr_put_search_ctx(ctx);
unmap_mft_record(ni);
- /* Get the inode of the parent directory. */
- parent_vi = ntfs_iget(vi->i_sb, parent_ino);
- if (IS_ERR(parent_vi) || unlikely(is_bad_inode(parent_vi))) {
- if (!IS_ERR(parent_vi))
- iput(parent_vi);
- ntfs_error(vi->i_sb, "Failed to get parent directory inode "
- "0x%lx of child inode 0x%lx.", parent_ino,
- vi->i_ino);
- return ERR_PTR(-EACCES);
- }
- /* Finally get a dentry for the parent directory and return it. */
- parent_dent = d_alloc_anon(parent_vi);
- if (unlikely(!parent_dent)) {
- iput(parent_vi);
- return ERR_PTR(-ENOMEM);
- }
- ntfs_debug("Done for inode 0x%lx.", vi->i_ino);
- return parent_dent;
+
+ return d_obtain_alias(ntfs_iget(vi->i_sb, parent_ino));
}
-/**
- * ntfs_get_dentry - find a dentry for the inode from a file handle sub-fragment
- * @sb: super block identifying the mounted ntfs volume
- * @fh: the file handle sub-fragment
- *
- * Find a dentry for the inode given a file handle sub-fragment. This function
- * is called from fs/exportfs/expfs.c::find_exported_dentry() which in turn is
- * called from the default ->decode_fh() which is export_decode_fh() in the
- * same file. The code is closely based on the default ->get_dentry() helper
- * fs/exportfs/expfs.c::get_object().
- *
- * The @fh contains two 32-bit unsigned values, the first one is the inode
- * number and the second one is the inode generation.
- *
- * Return the dentry on success or the error code on error (IS_ERR() is true).
- */
-static struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh)
+static struct inode *ntfs_nfs_get_inode(struct super_block *sb,
+ u64 ino, u32 generation)
{
- struct inode *vi;
- struct dentry *dent;
- unsigned long ino = ((u32 *)fh)[0];
- u32 gen = ((u32 *)fh)[1];
+ struct inode *inode;
- ntfs_debug("Entering for inode 0x%lx, generation 0x%x.", ino, gen);
- vi = ntfs_iget(sb, ino);
- if (IS_ERR(vi)) {
- ntfs_error(sb, "Failed to get inode 0x%lx.", ino);
- return (struct dentry *)vi;
- }
- if (unlikely(is_bad_inode(vi) || vi->i_generation != gen)) {
- /* We didn't find the right inode. */
- ntfs_error(sb, "Inode 0x%lx, bad count: %d %d or version 0x%x "
- "0x%x.", vi->i_ino, vi->i_nlink,
- atomic_read(&vi->i_count), vi->i_generation,
- gen);
- iput(vi);
- return ERR_PTR(-ESTALE);
- }
- /* Now find a dentry. If possible, get a well-connected one. */
- dent = d_alloc_anon(vi);
- if (unlikely(!dent)) {
- iput(vi);
- return ERR_PTR(-ENOMEM);
+ inode = ntfs_iget(sb, ino);
+ if (!IS_ERR(inode)) {
+ if (is_bad_inode(inode) || inode->i_generation != generation) {
+ iput(inode);
+ inode = ERR_PTR(-ESTALE);
+ }
}
- ntfs_debug("Done for inode 0x%lx, generation 0x%x.", ino, gen);
- return dent;
+
+ return inode;
+}
+
+static struct dentry *ntfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
+ ntfs_nfs_get_inode);
+}
+
+static struct dentry *ntfs_fh_to_parent(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ return generic_fh_to_parent(sb, fid, fh_len, fh_type,
+ ntfs_nfs_get_inode);
}
/**
* Export operations allowing NFS exporting of mounted NTFS partitions.
*
- * We use the default ->decode_fh() and ->encode_fh() for now. Note that they
+ * We use the default ->encode_fh() for now. Note that they
* use 32 bits to store the inode number which is an unsigned long so on 64-bit
* architectures is usually 64 bits so it would all fail horribly on huge
* volumes. I guess we need to define our own encode and decode fh functions
@@ -515,10 +397,9 @@ static struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh)
* allowing the inode number 0 which is used in NTFS for the system file $MFT
* and due to using iget() whereas NTFS needs ntfs_iget().
*/
-struct export_operations ntfs_export_ops = {
+const struct export_operations ntfs_export_ops = {
.get_parent = ntfs_get_parent, /* Find the parent of a given
directory. */
- .get_dentry = ntfs_get_dentry, /* Find a dentry for the inode
- given a file handle
- sub-fragment. */
+ .fh_to_dentry = ntfs_fh_to_dentry,
+ .fh_to_parent = ntfs_fh_to_parent,
};
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index 446b5014115..d6a340bf80f 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -50,26 +50,26 @@ typedef enum {
/* Global variables. */
/* Slab caches (from super.c). */
-extern kmem_cache_t *ntfs_name_cache;
-extern kmem_cache_t *ntfs_inode_cache;
-extern kmem_cache_t *ntfs_big_inode_cache;
-extern kmem_cache_t *ntfs_attr_ctx_cache;
-extern kmem_cache_t *ntfs_index_ctx_cache;
+extern struct kmem_cache *ntfs_name_cache;
+extern struct kmem_cache *ntfs_inode_cache;
+extern struct kmem_cache *ntfs_big_inode_cache;
+extern struct kmem_cache *ntfs_attr_ctx_cache;
+extern struct kmem_cache *ntfs_index_ctx_cache;
/* The various operations structs defined throughout the driver files. */
-extern struct address_space_operations ntfs_aops;
-extern struct address_space_operations ntfs_mst_aops;
+extern const struct address_space_operations ntfs_aops;
+extern const struct address_space_operations ntfs_mst_aops;
-extern struct file_operations ntfs_file_ops;
-extern struct inode_operations ntfs_file_inode_ops;
+extern const struct file_operations ntfs_file_ops;
+extern const struct inode_operations ntfs_file_inode_ops;
-extern struct file_operations ntfs_dir_ops;
-extern struct inode_operations ntfs_dir_inode_ops;
+extern const struct file_operations ntfs_dir_ops;
+extern const struct inode_operations ntfs_dir_inode_ops;
-extern struct file_operations ntfs_empty_file_ops;
-extern struct inode_operations ntfs_empty_inode_ops;
+extern const struct file_operations ntfs_empty_file_ops;
+extern const struct inode_operations ntfs_empty_inode_ops;
-extern struct export_operations ntfs_export_ops;
+extern const struct export_operations ntfs_export_ops;
/**
* NTFS_SB - return the ntfs volume given a vfs super block
@@ -91,7 +91,7 @@ extern void free_compression_buffers(void);
/* From fs/ntfs/super.c */
#define default_upcase_len 0x10000
-extern struct semaphore ntfs_lock;
+extern struct mutex ntfs_lock;
typedef struct {
int val;
@@ -105,7 +105,7 @@ extern int pre_write_mst_fixup(NTFS_RECORD *b, const u32 size);
extern void post_write_mst_fixup(NTFS_RECORD *b);
/* From fs/ntfs/unistr.c */
-extern BOOL ntfs_are_names_equal(const ntfschar *s1, size_t s1_len,
+extern bool ntfs_are_names_equal(const ntfschar *s1, size_t s1_len,
const ntfschar *s2, size_t s2_len,
const IGNORE_CASE_BOOL ic,
const ntfschar *upcase, const u32 upcase_size);
@@ -132,4 +132,33 @@ extern int ntfs_ucstonls(const ntfs_volume *vol, const ntfschar *ins,
/* From fs/ntfs/upcase.c */
extern ntfschar *generate_default_upcase(void);
+static inline int ntfs_ffs(int x)
+{
+ int r = 1;
+
+ if (!x)
+ return 0;
+ if (!(x & 0xffff)) {
+ x >>= 16;
+ r += 16;
+ }
+ if (!(x & 0xff)) {
+ x >>= 8;
+ r += 8;
+ }
+ if (!(x & 0xf)) {
+ x >>= 4;
+ r += 4;
+ }
+ if (!(x & 3)) {
+ x >>= 2;
+ r += 2;
+ }
+ if (!(x & 1)) {
+ x >>= 1;
+ r += 1;
+ }
+ return r;
+}
+
#endif /* _LINUX_NTFS_H */
diff --git a/fs/ntfs/quota.c b/fs/ntfs/quota.c
index 833df2a4e9f..d80e3315cab 100644
--- a/fs/ntfs/quota.c
+++ b/fs/ntfs/quota.c
@@ -31,10 +31,10 @@
* ntfs_mark_quotas_out_of_date - mark the quotas out of date on an ntfs volume
* @vol: ntfs volume on which to mark the quotas out of date
*
- * Mark the quotas out of date on the ntfs volume @vol and return TRUE on
- * success and FALSE on error.
+ * Mark the quotas out of date on the ntfs volume @vol and return 'true' on
+ * success and 'false' on error.
*/
-BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
+bool ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
{
ntfs_index_context *ictx;
QUOTA_CONTROL_ENTRY *qce;
@@ -46,9 +46,9 @@ BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
goto done;
if (!vol->quota_ino || !vol->quota_q_ino) {
ntfs_error(vol->sb, "Quota inodes are not open.");
- return FALSE;
+ return false;
}
- down(&vol->quota_q_ino->i_sem);
+ mutex_lock(&vol->quota_q_ino->i_mutex);
ictx = ntfs_index_ctx_get(NTFS_I(vol->quota_q_ino));
if (!ictx) {
ntfs_error(vol->sb, "Failed to get index context.");
@@ -98,7 +98,7 @@ BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
ntfs_index_entry_mark_dirty(ictx);
set_done:
ntfs_index_ctx_put(ictx);
- up(&vol->quota_q_ino->i_sem);
+ mutex_unlock(&vol->quota_q_ino->i_mutex);
/*
* We set the flag so we do not try to mark the quotas out of date
* again on remount.
@@ -106,12 +106,12 @@ set_done:
NVolSetQuotaOutOfDate(vol);
done:
ntfs_debug("Done.");
- return TRUE;
+ return true;
err_out:
if (ictx)
ntfs_index_ctx_put(ictx);
- up(&vol->quota_q_ino->i_sem);
- return FALSE;
+ mutex_unlock(&vol->quota_q_ino->i_mutex);
+ return false;
}
#endif /* NTFS_RW */
diff --git a/fs/ntfs/quota.h b/fs/ntfs/quota.h
index 40e4763aa22..4cbe5594c0b 100644
--- a/fs/ntfs/quota.h
+++ b/fs/ntfs/quota.h
@@ -28,7 +28,7 @@
#include "types.h"
#include "volume.h"
-extern BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol);
+extern bool ntfs_mark_quotas_out_of_date(ntfs_volume *vol);
#endif /* NTFS_RW */
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index 758855b0414..eac7d6788a1 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -1,8 +1,8 @@
/**
* runlist.c - NTFS runlist handling code. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2005 Anton Altaparmakov
- * Copyright (c) 2002 Richard Russon
+ * Copyright (c) 2001-2007 Anton Altaparmakov
+ * Copyright (c) 2002-2005 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
@@ -35,7 +35,7 @@ static inline void ntfs_rl_mm(runlist_element *base, int dst, int src,
int size)
{
if (likely((dst != src) && (size > 0)))
- memmove(base + dst, base + src, size * sizeof (*base));
+ memmove(base + dst, base + src, size * sizeof(*base));
}
/**
@@ -95,6 +95,51 @@ static inline runlist_element *ntfs_rl_realloc(runlist_element *rl,
}
/**
+ * ntfs_rl_realloc_nofail - Reallocate memory for runlists
+ * @rl: original runlist
+ * @old_size: number of runlist elements in the original runlist @rl
+ * @new_size: number of runlist elements we need space for
+ *
+ * As the runlists grow, more memory will be required. To prevent the
+ * kernel having to allocate and reallocate large numbers of small bits of
+ * memory, this function returns an entire page of memory.
+ *
+ * This function guarantees that the allocation will succeed. It will sleep
+ * for as long as it takes to complete the allocation.
+ *
+ * It is up to the caller to serialize access to the runlist @rl.
+ *
+ * N.B. If the new allocation doesn't require a different number of pages in
+ * memory, the function will return the original pointer.
+ *
+ * On success, return a pointer to the newly allocated, or recycled, memory.
+ * On error, return -errno. The following error codes are defined:
+ * -ENOMEM - Not enough memory to allocate runlist array.
+ * -EINVAL - Invalid parameters were passed in.
+ */
+static inline runlist_element *ntfs_rl_realloc_nofail(runlist_element *rl,
+ int old_size, int new_size)
+{
+ runlist_element *new_rl;
+
+ old_size = PAGE_ALIGN(old_size * sizeof(*rl));
+ new_size = PAGE_ALIGN(new_size * sizeof(*rl));
+ if (old_size == new_size)
+ return rl;
+
+ new_rl = ntfs_malloc_nofs_nofail(new_size);
+ BUG_ON(!new_rl);
+
+ if (likely(rl != NULL)) {
+ if (unlikely(old_size > new_size))
+ old_size = new_size;
+ memcpy(new_rl, rl, old_size);
+ ntfs_free(rl);
+ }
+ return new_rl;
+}
+
+/**
* ntfs_are_rl_mergeable - test if two runlists can be joined together
* @dst: original runlist
* @src: new runlist to test for mergeability with @dst
@@ -104,26 +149,30 @@ static inline runlist_element *ntfs_rl_realloc(runlist_element *rl,
*
* It is up to the caller to serialize access to the runlists @dst and @src.
*
- * Return: TRUE Success, the runlists can be merged.
- * FALSE Failure, the runlists cannot be merged.
+ * Return: true Success, the runlists can be merged.
+ * false Failure, the runlists cannot be merged.
*/
-static inline BOOL ntfs_are_rl_mergeable(runlist_element *dst,
+static inline bool ntfs_are_rl_mergeable(runlist_element *dst,
runlist_element *src)
{
BUG_ON(!dst);
BUG_ON(!src);
- if ((dst->lcn < 0) || (src->lcn < 0)) { /* Are we merging holes? */
- if (dst->lcn == LCN_HOLE && src->lcn == LCN_HOLE)
- return TRUE;
- return FALSE;
- }
- if ((dst->lcn + dst->length) != src->lcn) /* Are the runs contiguous? */
- return FALSE;
- if ((dst->vcn + dst->length) != src->vcn) /* Are the runs misaligned? */
- return FALSE;
-
- return TRUE;
+ /* We can merge unmapped regions even if they are misaligned. */
+ if ((dst->lcn == LCN_RL_NOT_MAPPED) && (src->lcn == LCN_RL_NOT_MAPPED))
+ return true;
+ /* If the runs are misaligned, we cannot merge them. */
+ if ((dst->vcn + dst->length) != src->vcn)
+ return false;
+ /* If both runs are non-sparse and contiguous, we can merge them. */
+ if ((dst->lcn >= 0) && (src->lcn >= 0) &&
+ ((dst->lcn + dst->length) == src->lcn))
+ return true;
+ /* If we are merging two holes, we can merge them. */
+ if ((dst->lcn == LCN_HOLE) && (src->lcn == LCN_HOLE))
+ return true;
+ /* Cannot merge. */
+ return false;
}
/**
@@ -169,14 +218,15 @@ static inline void __ntfs_rl_merge(runlist_element *dst, runlist_element *src)
static inline runlist_element *ntfs_rl_append(runlist_element *dst,
int dsize, runlist_element *src, int ssize, int loc)
{
- BOOL right;
- int magic;
+ bool right = false; /* Right end of @src needs merging. */
+ int marker; /* End of the inserted runs. */
BUG_ON(!dst);
BUG_ON(!src);
/* First, check if the right hand end needs merging. */
- right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
+ if ((loc + 1) < dsize)
+ right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
/* Space required: @dst size + @src size, less one if we merged. */
dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - right);
@@ -191,18 +241,19 @@ static inline runlist_element *ntfs_rl_append(runlist_element *dst,
if (right)
__ntfs_rl_merge(src + ssize - 1, dst + loc + 1);
- magic = loc + ssize;
+ /* First run after the @src runs that have been inserted. */
+ marker = loc + ssize + 1;
/* Move the tail of @dst out of the way, then copy in @src. */
- ntfs_rl_mm(dst, magic + 1, loc + 1 + right, dsize - loc - 1 - right);
+ ntfs_rl_mm(dst, marker, loc + 1 + right, dsize - (loc + 1 + right));
ntfs_rl_mc(dst, loc + 1, src, 0, ssize);
/* Adjust the size of the preceding hole. */
dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn;
/* We may have changed the length of the file, so fix the end marker */
- if (dst[magic + 1].lcn == LCN_ENOENT)
- dst[magic + 1].vcn = dst[magic].vcn + dst[magic].length;
+ if (dst[marker].lcn == LCN_ENOENT)
+ dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
return dst;
}
@@ -234,18 +285,17 @@ static inline runlist_element *ntfs_rl_append(runlist_element *dst,
static inline runlist_element *ntfs_rl_insert(runlist_element *dst,
int dsize, runlist_element *src, int ssize, int loc)
{
- BOOL left = FALSE;
- BOOL disc = FALSE; /* Discontinuity */
- BOOL hole = FALSE; /* Following a hole */
- int magic;
+ bool left = false; /* Left end of @src needs merging. */
+ bool disc = false; /* Discontinuity between @dst and @src. */
+ int marker; /* End of the inserted runs. */
BUG_ON(!dst);
BUG_ON(!src);
- /* disc => Discontinuity between the end of @dst and the start of @src.
- * This means we might need to insert a hole.
- * hole => @dst ends with a hole or an unmapped region which we can
- * extend to match the discontinuity. */
+ /*
+ * disc => Discontinuity between the end of @dst and the start of @src.
+ * This means we might need to insert a "not mapped" run.
+ */
if (loc == 0)
disc = (src[0].vcn > 0);
else {
@@ -258,58 +308,49 @@ static inline runlist_element *ntfs_rl_insert(runlist_element *dst,
merged_length += src->length;
disc = (src[0].vcn > dst[loc - 1].vcn + merged_length);
- if (disc)
- hole = (dst[loc - 1].lcn == LCN_HOLE);
}
-
- /* Space required: @dst size + @src size, less one if we merged, plus
- * one if there was a discontinuity, less one for a trailing hole. */
- dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc - hole);
+ /*
+ * Space required: @dst size + @src size, less one if we merged, plus
+ * one if there was a discontinuity.
+ */
+ dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc);
if (IS_ERR(dst))
return dst;
/*
* We are guaranteed to succeed from here so can start modifying the
* original runlist.
*/
-
if (left)
__ntfs_rl_merge(dst + loc - 1, src);
-
- magic = loc + ssize - left + disc - hole;
+ /*
+ * First run after the @src runs that have been inserted.
+ * Nominally, @marker equals @loc + @ssize, i.e. location + number of
+ * runs in @src. However, if @left, then the first run in @src has
+ * been merged with one in @dst. And if @disc, then @dst and @src do
+ * not meet and we need an extra run to fill the gap.
+ */
+ marker = loc + ssize - left + disc;
/* Move the tail of @dst out of the way, then copy in @src. */
- ntfs_rl_mm(dst, magic, loc, dsize - loc);
- ntfs_rl_mc(dst, loc + disc - hole, src, left, ssize - left);
+ ntfs_rl_mm(dst, marker, loc, dsize - loc);
+ ntfs_rl_mc(dst, loc + disc, src, left, ssize - left);
- /* Adjust the VCN of the last run ... */
- if (dst[magic].lcn <= LCN_HOLE)
- dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length;
+ /* Adjust the VCN of the first run after the insertion... */
+ dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
/* ... and the length. */
- if (dst[magic].lcn == LCN_HOLE || dst[magic].lcn == LCN_RL_NOT_MAPPED)
- dst[magic].length = dst[magic + 1].vcn - dst[magic].vcn;
+ if (dst[marker].lcn == LCN_HOLE || dst[marker].lcn == LCN_RL_NOT_MAPPED)
+ dst[marker].length = dst[marker + 1].vcn - dst[marker].vcn;
- /* Writing beyond the end of the file and there's a discontinuity. */
+ /* Writing beyond the end of the file and there is a discontinuity. */
if (disc) {
- if (hole)
- dst[loc - 1].length = dst[loc].vcn - dst[loc - 1].vcn;
- else {
- if (loc > 0) {
- dst[loc].vcn = dst[loc - 1].vcn +
- dst[loc - 1].length;
- dst[loc].length = dst[loc + 1].vcn -
- dst[loc].vcn;
- } else {
- dst[loc].vcn = 0;
- dst[loc].length = dst[loc + 1].vcn;
- }
- dst[loc].lcn = LCN_RL_NOT_MAPPED;
+ if (loc > 0) {
+ dst[loc].vcn = dst[loc - 1].vcn + dst[loc - 1].length;
+ dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn;
+ } else {
+ dst[loc].vcn = 0;
+ dst[loc].length = dst[loc + 1].vcn;
}
-
- magic += hole;
-
- if (dst[magic].lcn == LCN_ENOENT)
- dst[magic].vcn = dst[magic - 1].vcn +
- dst[magic - 1].length;
+ dst[loc].lcn = LCN_RL_NOT_MAPPED;
}
return dst;
}
@@ -340,42 +381,65 @@ static inline runlist_element *ntfs_rl_insert(runlist_element *dst,
static inline runlist_element *ntfs_rl_replace(runlist_element *dst,
int dsize, runlist_element *src, int ssize, int loc)
{
- BOOL left = FALSE;
- BOOL right;
- int magic;
+ signed delta;
+ bool left = false; /* Left end of @src needs merging. */
+ bool right = false; /* Right end of @src needs merging. */
+ int tail; /* Start of tail of @dst. */
+ int marker; /* End of the inserted runs. */
BUG_ON(!dst);
BUG_ON(!src);
- /* First, merge the left and right ends, if necessary. */
- right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
+ /* First, see if the left and right ends need merging. */
+ if ((loc + 1) < dsize)
+ right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
if (loc > 0)
left = ntfs_are_rl_mergeable(dst + loc - 1, src);
-
- /* Allocate some space. We'll need less if the left, right, or both
- * ends were merged. */
- dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left - right);
- if (IS_ERR(dst))
- return dst;
+ /*
+ * Allocate some space. We will need less if the left, right, or both
+ * ends get merged. The -1 accounts for the run being replaced.
+ */
+ delta = ssize - 1 - left - right;
+ if (delta > 0) {
+ dst = ntfs_rl_realloc(dst, dsize, dsize + delta);
+ if (IS_ERR(dst))
+ return dst;
+ }
/*
* We are guaranteed to succeed from here so can start modifying the
* original runlists.
*/
+
+ /* First, merge the left and right ends, if necessary. */
if (right)
__ntfs_rl_merge(src + ssize - 1, dst + loc + 1);
if (left)
__ntfs_rl_merge(dst + loc - 1, src);
-
- /* FIXME: What does this mean? (AIA) */
- magic = loc + ssize - left;
+ /*
+ * Offset of the tail of @dst. This needs to be moved out of the way
+ * to make space for the runs to be copied from @src, i.e. the first
+ * run of the tail of @dst.
+ * Nominally, @tail equals @loc + 1, i.e. location, skipping the
+ * replaced run. However, if @right, then one of @dst's runs is
+ * already merged into @src.
+ */
+ tail = loc + right + 1;
+ /*
+ * First run after the @src runs that have been inserted, i.e. where
+ * the tail of @dst needs to be moved to.
+ * Nominally, @marker equals @loc + @ssize, i.e. location + number of
+ * runs in @src. However, if @left, then the first run in @src has
+ * been merged with one in @dst.
+ */
+ marker = loc + ssize - left;
/* Move the tail of @dst out of the way, then copy in @src. */
- ntfs_rl_mm(dst, magic, loc + right + 1, dsize - loc - right - 1);
+ ntfs_rl_mm(dst, marker, tail, dsize - tail);
ntfs_rl_mc(dst, loc, src, left, ssize - left);
- /* We may have changed the length of the file, so fix the end marker */
- if (dst[magic].lcn == LCN_ENOENT)
- dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length;
+ /* We may have changed the length of the file, so fix the end marker. */
+ if (dsize - tail > 0 && dst[marker].lcn == LCN_ENOENT)
+ dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
return dst;
}
@@ -497,6 +561,7 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
/* Scan to the end of the source runlist. */
for (dend = 0; likely(drl[dend].length); dend++)
;
+ dend++;
drl = ntfs_rl_realloc(drl, dend, dend + 1);
if (IS_ERR(drl))
return drl;
@@ -555,8 +620,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
;
{
- BOOL start;
- BOOL finish;
+ bool start;
+ bool finish;
int ds = dend + 1; /* Number of elements in drl & srl */
int ss = sfinal - sstart + 1;
@@ -566,11 +631,11 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
((drl[dins].vcn + drl[dins].length) <= /* End of hole */
(srl[send - 1].vcn + srl[send - 1].length)));
- /* Or we'll lose an end marker */
- if (start && finish && (drl[dins].length == 0))
+ /* Or we will lose an end marker. */
+ if (finish && !drl[dins].length)
ss++;
if (marker && (drl[dins].vcn + drl[dins].length > srl[send - 1].vcn))
- finish = FALSE;
+ finish = false;
#if 0
ntfs_debug("dfinal = %i, dend = %i", dfinal, dend);
ntfs_debug("sstart = %i, sfinal = %i, send = %i", sstart, sfinal, send);
@@ -621,11 +686,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
if (drl[ds].lcn != LCN_RL_NOT_MAPPED) {
/* Add an unmapped runlist element. */
if (!slots) {
- /* FIXME/TODO: We need to have the
- * extra memory already! (AIA) */
- drl = ntfs_rl_realloc(drl, ds, ds + 2);
- if (!drl)
- goto critical_error;
+ drl = ntfs_rl_realloc_nofail(drl, ds,
+ ds + 2);
slots = 2;
}
ds++;
@@ -640,13 +702,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
drl[ds].length = marker_vcn - drl[ds].vcn;
/* Finally add the ENOENT terminator. */
ds++;
- if (!slots) {
- /* FIXME/TODO: We need to have the extra
- * memory already! (AIA) */
- drl = ntfs_rl_realloc(drl, ds, ds + 1);
- if (!drl)
- goto critical_error;
- }
+ if (!slots)
+ drl = ntfs_rl_realloc_nofail(drl, ds, ds + 1);
drl[ds].vcn = marker_vcn;
drl[ds].lcn = LCN_ENOENT;
drl[ds].length = (s64)0;
@@ -659,11 +716,6 @@ finished:
ntfs_debug("Merged runlist:");
ntfs_debug_dump_runlist(drl);
return drl;
-
-critical_error:
- /* Critical error! We cannot afford to fail here. */
- ntfs_error(NULL, "Critical error! Not enough memory.");
- panic("NTFS: Cannot continue.");
}
/**
@@ -727,6 +779,9 @@ runlist_element *ntfs_mapping_pairs_decompress(const ntfs_volume *vol,
ntfs_error(vol->sb, "Corrupt attribute.");
return ERR_PTR(-EIO);
}
+ /* If the mapping pairs array is valid but empty, nothing to do. */
+ if (!vcn && !*buf)
+ return old_rl;
/* Current position in runlist array. */
rlpos = 0;
/* Allocate first page and set current runlist size to one page. */
@@ -1079,7 +1134,7 @@ int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
{
LCN prev_lcn;
int rls;
- BOOL the_end = FALSE;
+ bool the_end = false;
BUG_ON(first_vcn < 0);
BUG_ON(last_vcn < -1);
@@ -1113,7 +1168,7 @@ int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
s64 s1 = last_vcn + 1;
if (unlikely(rl[1].vcn > s1))
length = s1 - rl->vcn;
- the_end = TRUE;
+ the_end = true;
}
delta = first_vcn - rl->vcn;
/* Header byte + length. */
@@ -1149,7 +1204,7 @@ int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
s64 s1 = last_vcn + 1;
if (unlikely(rl[1].vcn > s1))
length = s1 - rl->vcn;
- the_end = TRUE;
+ the_end = true;
}
/* Header byte + length. */
rls += 1 + ntfs_get_nr_significant_bytes(length);
@@ -1188,7 +1243,7 @@ err_out:
* write.
*
* This is used when building the mapping pairs array of a runlist to compress
- * a given logical cluster number (lcn) or a specific run length to the minumum
+ * a given logical cluster number (lcn) or a specific run length to the minimum
* size possible.
*
* Return the number of bytes written on success. On error, i.e. the
@@ -1272,7 +1327,7 @@ int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
LCN prev_lcn;
s8 *dst_max, *dst_next;
int err = -ENOSPC;
- BOOL the_end = FALSE;
+ bool the_end = false;
s8 len_len, lcn_len;
BUG_ON(first_vcn < 0);
@@ -1315,7 +1370,7 @@ int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
s64 s1 = last_vcn + 1;
if (unlikely(rl[1].vcn > s1))
length = s1 - rl->vcn;
- the_end = TRUE;
+ the_end = true;
}
delta = first_vcn - rl->vcn;
/* Write length. */
@@ -1367,7 +1422,7 @@ int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
s64 s1 = last_vcn + 1;
if (unlikely(rl[1].vcn > s1))
length = s1 - rl->vcn;
- the_end = TRUE;
+ the_end = true;
}
/* Write length. */
len_len = ntfs_write_significant_bytes(dst + 1, dst_max,
@@ -1419,6 +1474,7 @@ err_out:
/**
* ntfs_rl_truncate_nolock - truncate a runlist starting at a specified vcn
+ * @vol: ntfs volume (needed for error output)
* @runlist: runlist to truncate
* @new_length: the new length of the runlist in VCNs
*
@@ -1426,12 +1482,16 @@ err_out:
* holding the runlist elements to a length of @new_length VCNs.
*
* If @new_length lies within the runlist, the runlist elements with VCNs of
- * @new_length and above are discarded.
+ * @new_length and above are discarded. As a special case if @new_length is
+ * zero, the runlist is discarded and set to NULL.
*
* If @new_length lies beyond the runlist, a sparse runlist element is added to
* the end of the runlist @runlist or if the last runlist element is a sparse
* one already, this is extended.
*
+ * Note, no checking is done for unmapped runlist elements. It is assumed that
+ * the caller has mapped any elements that need to be mapped already.
+ *
* Return 0 on success and -errno on error.
*
* Locking: The caller must hold @runlist->lock for writing.
@@ -1446,6 +1506,13 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist,
BUG_ON(!runlist);
BUG_ON(new_length < 0);
rl = runlist->rl;
+ if (!new_length) {
+ ntfs_debug("Freeing runlist.");
+ runlist->rl = NULL;
+ if (rl)
+ ntfs_free(rl);
+ return 0;
+ }
if (unlikely(!rl)) {
/*
* Create a runlist consisting of a sparse runlist element of
@@ -1474,7 +1541,7 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist,
*/
if (rl->length) {
runlist_element *trl;
- BOOL is_end;
+ bool is_end;
ntfs_debug("Shrinking runlist.");
/* Determine the runlist size. */
@@ -1488,11 +1555,11 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist,
* If a run was partially truncated, make the following runlist
* element a terminator.
*/
- is_end = FALSE;
+ is_end = false;
if (rl->length) {
rl++;
if (!rl->length)
- is_end = TRUE;
+ is_end = true;
rl->vcn = new_length;
rl->length = 0;
}
@@ -1553,4 +1620,288 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist,
return 0;
}
+/**
+ * ntfs_rl_punch_nolock - punch a hole into a runlist
+ * @vol: ntfs volume (needed for error output)
+ * @runlist: runlist to punch a hole into
+ * @start: starting VCN of the hole to be created
+ * @length: size of the hole to be created in units of clusters
+ *
+ * Punch a hole into the runlist @runlist starting at VCN @start and of size
+ * @length clusters.
+ *
+ * Return 0 on success and -errno on error, in which case @runlist has not been
+ * modified.
+ *
+ * If @start and/or @start + @length are outside the runlist return error code
+ * -ENOENT.
+ *
+ * If the runlist contains unmapped or error elements between @start and @start
+ * + @length return error code -EINVAL.
+ *
+ * Locking: The caller must hold @runlist->lock for writing.
+ */
+int ntfs_rl_punch_nolock(const ntfs_volume *vol, runlist *const runlist,
+ const VCN start, const s64 length)
+{
+ const VCN end = start + length;
+ s64 delta;
+ runlist_element *rl, *rl_end, *rl_real_end, *trl;
+ int old_size;
+ bool lcn_fixup = false;
+
+ ntfs_debug("Entering for start 0x%llx, length 0x%llx.",
+ (long long)start, (long long)length);
+ BUG_ON(!runlist);
+ BUG_ON(start < 0);
+ BUG_ON(length < 0);
+ BUG_ON(end < 0);
+ rl = runlist->rl;
+ if (unlikely(!rl)) {
+ if (likely(!start && !length))
+ return 0;
+ return -ENOENT;
+ }
+ /* Find @start in the runlist. */
+ while (likely(rl->length && start >= rl[1].vcn))
+ rl++;
+ rl_end = rl;
+ /* Find @end in the runlist. */
+ while (likely(rl_end->length && end >= rl_end[1].vcn)) {
+ /* Verify there are no unmapped or error elements. */
+ if (unlikely(rl_end->lcn < LCN_HOLE))
+ return -EINVAL;
+ rl_end++;
+ }
+ /* Check the last element. */
+ if (unlikely(rl_end->length && rl_end->lcn < LCN_HOLE))
+ return -EINVAL;
+ /* This covers @start being out of bounds, too. */
+ if (!rl_end->length && end > rl_end->vcn)
+ return -ENOENT;
+ if (!length)
+ return 0;
+ if (!rl->length)
+ return -ENOENT;
+ rl_real_end = rl_end;
+ /* Determine the runlist size. */
+ while (likely(rl_real_end->length))
+ rl_real_end++;
+ old_size = rl_real_end - runlist->rl + 1;
+ /* If @start is in a hole simply extend the hole. */
+ if (rl->lcn == LCN_HOLE) {
+ /*
+ * If both @start and @end are in the same sparse run, we are
+ * done.
+ */
+ if (end <= rl[1].vcn) {
+ ntfs_debug("Done (requested hole is already sparse).");
+ return 0;
+ }
+extend_hole:
+ /* Extend the hole. */
+ rl->length = end - rl->vcn;
+ /* If @end is in a hole, merge it with the current one. */
+ if (rl_end->lcn == LCN_HOLE) {
+ rl_end++;
+ rl->length = rl_end->vcn - rl->vcn;
+ }
+ /* We have done the hole. Now deal with the remaining tail. */
+ rl++;
+ /* Cut out all runlist elements up to @end. */
+ if (rl < rl_end)
+ memmove(rl, rl_end, (rl_real_end - rl_end + 1) *
+ sizeof(*rl));
+ /* Adjust the beginning of the tail if necessary. */
+ if (end > rl->vcn) {
+ delta = end - rl->vcn;
+ rl->vcn = end;
+ rl->length -= delta;
+ /* Only adjust the lcn if it is real. */
+ if (rl->lcn >= 0)
+ rl->lcn += delta;
+ }
+shrink_allocation:
+ /* Reallocate memory if the allocation changed. */
+ if (rl < rl_end) {
+ rl = ntfs_rl_realloc(runlist->rl, old_size,
+ old_size - (rl_end - rl));
+ if (IS_ERR(rl))
+ ntfs_warning(vol->sb, "Failed to shrink "
+ "runlist buffer. This just "
+ "wastes a bit of memory "
+ "temporarily so we ignore it "
+ "and return success.");
+ else
+ runlist->rl = rl;
+ }
+ ntfs_debug("Done (extend hole).");
+ return 0;
+ }
+ /*
+ * If @start is at the beginning of a run things are easier as there is
+ * no need to split the first run.
+ */
+ if (start == rl->vcn) {
+ /*
+ * @start is at the beginning of a run.
+ *
+ * If the previous run is sparse, extend its hole.
+ *
+ * If @end is not in the same run, switch the run to be sparse
+ * and extend the newly created hole.
+ *
+ * Thus both of these cases reduce the problem to the above
+ * case of "@start is in a hole".
+ */
+ if (rl > runlist->rl && (rl - 1)->lcn == LCN_HOLE) {
+ rl--;
+ goto extend_hole;
+ }
+ if (end >= rl[1].vcn) {
+ rl->lcn = LCN_HOLE;
+ goto extend_hole;
+ }
+ /*
+ * The final case is when @end is in the same run as @start.
+ * For this need to split the run into two. One run for the
+ * sparse region between the beginning of the old run, i.e.
+ * @start, and @end and one for the remaining non-sparse
+ * region, i.e. between @end and the end of the old run.
+ */
+ trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 1);
+ if (IS_ERR(trl))
+ goto enomem_out;
+ old_size++;
+ if (runlist->rl != trl) {
+ rl = trl + (rl - runlist->rl);
+ rl_end = trl + (rl_end - runlist->rl);
+ rl_real_end = trl + (rl_real_end - runlist->rl);
+ runlist->rl = trl;
+ }
+split_end:
+ /* Shift all the runs up by one. */
+ memmove(rl + 1, rl, (rl_real_end - rl + 1) * sizeof(*rl));
+ /* Finally, setup the two split runs. */
+ rl->lcn = LCN_HOLE;
+ rl->length = length;
+ rl++;
+ rl->vcn += length;
+ /* Only adjust the lcn if it is real. */
+ if (rl->lcn >= 0 || lcn_fixup)
+ rl->lcn += length;
+ rl->length -= length;
+ ntfs_debug("Done (split one).");
+ return 0;
+ }
+ /*
+ * @start is neither in a hole nor at the beginning of a run.
+ *
+ * If @end is in a hole, things are easier as simply truncating the run
+ * @start is in to end at @start - 1, deleting all runs after that up
+ * to @end, and finally extending the beginning of the run @end is in
+ * to be @start is all that is needed.
+ */
+ if (rl_end->lcn == LCN_HOLE) {
+ /* Truncate the run containing @start. */
+ rl->length = start - rl->vcn;
+ rl++;
+ /* Cut out all runlist elements up to @end. */
+ if (rl < rl_end)
+ memmove(rl, rl_end, (rl_real_end - rl_end + 1) *
+ sizeof(*rl));
+ /* Extend the beginning of the run @end is in to be @start. */
+ rl->vcn = start;
+ rl->length = rl[1].vcn - start;
+ goto shrink_allocation;
+ }
+ /*
+ * If @end is not in a hole there are still two cases to distinguish.
+ * Either @end is or is not in the same run as @start.
+ *
+ * The second case is easier as it can be reduced to an already solved
+ * problem by truncating the run @start is in to end at @start - 1.
+ * Then, if @end is in the next run need to split the run into a sparse
+ * run followed by a non-sparse run (already covered above) and if @end
+ * is not in the next run switching it to be sparse, again reduces the
+ * problem to the already covered case of "@start is in a hole".
+ */
+ if (end >= rl[1].vcn) {
+ /*
+ * If @end is not in the next run, reduce the problem to the
+ * case of "@start is in a hole".
+ */
+ if (rl[1].length && end >= rl[2].vcn) {
+ /* Truncate the run containing @start. */
+ rl->length = start - rl->vcn;
+ rl++;
+ rl->vcn = start;
+ rl->lcn = LCN_HOLE;
+ goto extend_hole;
+ }
+ trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 1);
+ if (IS_ERR(trl))
+ goto enomem_out;
+ old_size++;
+ if (runlist->rl != trl) {
+ rl = trl + (rl - runlist->rl);
+ rl_end = trl + (rl_end - runlist->rl);
+ rl_real_end = trl + (rl_real_end - runlist->rl);
+ runlist->rl = trl;
+ }
+ /* Truncate the run containing @start. */
+ rl->length = start - rl->vcn;
+ rl++;
+ /*
+ * @end is in the next run, reduce the problem to the case
+ * where "@start is at the beginning of a run and @end is in
+ * the same run as @start".
+ */
+ delta = rl->vcn - start;
+ rl->vcn = start;
+ if (rl->lcn >= 0) {
+ rl->lcn -= delta;
+ /* Need this in case the lcn just became negative. */
+ lcn_fixup = true;
+ }
+ rl->length += delta;
+ goto split_end;
+ }
+ /*
+ * The first case from above, i.e. @end is in the same run as @start.
+ * We need to split the run into three. One run for the non-sparse
+ * region between the beginning of the old run and @start, one for the
+ * sparse region between @start and @end, and one for the remaining
+ * non-sparse region, i.e. between @end and the end of the old run.
+ */
+ trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 2);
+ if (IS_ERR(trl))
+ goto enomem_out;
+ old_size += 2;
+ if (runlist->rl != trl) {
+ rl = trl + (rl - runlist->rl);
+ rl_end = trl + (rl_end - runlist->rl);
+ rl_real_end = trl + (rl_real_end - runlist->rl);
+ runlist->rl = trl;
+ }
+ /* Shift all the runs up by two. */
+ memmove(rl + 2, rl, (rl_real_end - rl + 1) * sizeof(*rl));
+ /* Finally, setup the three split runs. */
+ rl->length = start - rl->vcn;
+ rl++;
+ rl->vcn = start;
+ rl->lcn = LCN_HOLE;
+ rl->length = length;
+ rl++;
+ delta = end - rl->vcn;
+ rl->vcn = end;
+ rl->lcn += delta;
+ rl->length -= delta;
+ ntfs_debug("Done (split both).");
+ return 0;
+enomem_out:
+ ntfs_error(vol->sb, "Not enough memory to extend runlist buffer.");
+ return -ENOMEM;
+}
+
#endif /* NTFS_RW */
diff --git a/fs/ntfs/runlist.h b/fs/ntfs/runlist.h
index aa0ee6540e7..47728fbb610 100644
--- a/fs/ntfs/runlist.h
+++ b/fs/ntfs/runlist.h
@@ -94,6 +94,9 @@ extern int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol,
runlist *const runlist, const s64 new_length);
+int ntfs_rl_punch_nolock(const ntfs_volume *vol, runlist *const runlist,
+ const VCN start, const s64 length);
+
#endif /* NTFS_RW */
#endif /* _LINUX_NTFS_RUNLIST_H */
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 41aa8eb6755..6c3296e546c 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1,7 +1,7 @@
/*
* super.c - NTFS kernel super block handling. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2005 Anton Altaparmakov
+ * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
* Copyright (c) 2001,2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -19,17 +19,19 @@
* distribution in the file COPYING); if not, write to the Free Software
* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/stddef.h>
#include <linux/init.h>
+#include <linux/slab.h>
#include <linux/string.h>
#include <linux/spinlock.h>
-#include <linux/blkdev.h> /* For bdev_hardsect_size(). */
+#include <linux/blkdev.h> /* For bdev_logical_block_size(). */
#include <linux/backing-dev.h>
#include <linux/buffer_head.h>
#include <linux/vfs.h>
#include <linux/moduleparam.h>
-#include <linux/smp_lock.h>
+#include <linux/bitmap.h>
#include "sysctl.h"
#include "logfile.h"
@@ -38,6 +40,7 @@
#include "dir.h"
#include "debug.h"
#include "index.h"
+#include "inode.h"
#include "aops.h"
#include "layout.h"
#include "malloc.h"
@@ -47,8 +50,8 @@
static unsigned long ntfs_nr_compression_users;
/* A global default upcase table and a corresponding reference count. */
-static ntfschar *default_upcase = NULL;
-static unsigned long ntfs_nr_upcase_users = 0;
+static ntfschar *default_upcase;
+static unsigned long ntfs_nr_upcase_users;
/* Error constants/strings used in inode.c::ntfs_show_options(). */
typedef enum {
@@ -73,18 +76,18 @@ const option_t on_errors_arr[] = {
*
* Copied from old ntfs driver (which copied from vfat driver).
*/
-static int simple_getbool(char *s, BOOL *setval)
+static int simple_getbool(char *s, bool *setval)
{
if (s) {
if (!strcmp(s, "1") || !strcmp(s, "yes") || !strcmp(s, "true"))
- *setval = TRUE;
+ *setval = true;
else if (!strcmp(s, "0") || !strcmp(s, "no") ||
!strcmp(s, "false"))
- *setval = FALSE;
+ *setval = false;
else
return 0;
} else
- *setval = TRUE;
+ *setval = true;
return 1;
}
@@ -95,14 +98,14 @@ static int simple_getbool(char *s, BOOL *setval)
*
* Parse the recognized options in @opt for the ntfs volume described by @vol.
*/
-static BOOL parse_options(ntfs_volume *vol, char *opt)
+static bool parse_options(ntfs_volume *vol, char *opt)
{
char *p, *v, *ov;
static char *utf8 = "utf8";
int errors = 0, sloppy = 0;
- uid_t uid = (uid_t)-1;
- gid_t gid = (gid_t)-1;
- mode_t fmask = (mode_t)-1, dmask = (mode_t)-1;
+ kuid_t uid = INVALID_UID;
+ kgid_t gid = INVALID_GID;
+ umode_t fmask = (umode_t)-1, dmask = (umode_t)-1;
int mft_zone_multiplier = -1, on_errors = -1;
int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1;
struct nls_table *nls_map = NULL, *old_nls;
@@ -126,9 +129,41 @@ static BOOL parse_options(ntfs_volume *vol, char *opt)
if (*v) \
goto needs_val; \
}
+#define NTFS_GETOPT_UID(option, variable) \
+ if (!strcmp(p, option)) { \
+ uid_t uid_value; \
+ if (!v || !*v) \
+ goto needs_arg; \
+ uid_value = simple_strtoul(ov = v, &v, 0); \
+ if (*v) \
+ goto needs_val; \
+ variable = make_kuid(current_user_ns(), uid_value); \
+ if (!uid_valid(variable)) \
+ goto needs_val; \
+ }
+#define NTFS_GETOPT_GID(option, variable) \
+ if (!strcmp(p, option)) { \
+ gid_t gid_value; \
+ if (!v || !*v) \
+ goto needs_arg; \
+ gid_value = simple_strtoul(ov = v, &v, 0); \
+ if (*v) \
+ goto needs_val; \
+ variable = make_kgid(current_user_ns(), gid_value); \
+ if (!gid_valid(variable)) \
+ goto needs_val; \
+ }
+#define NTFS_GETOPT_OCTAL(option, variable) \
+ if (!strcmp(p, option)) { \
+ if (!v || !*v) \
+ goto needs_arg; \
+ variable = simple_strtoul(ov = v, &v, 8); \
+ if (*v) \
+ goto needs_val; \
+ }
#define NTFS_GETOPT_BOOL(option, variable) \
if (!strcmp(p, option)) { \
- BOOL val; \
+ bool val; \
if (!simple_getbool(v, &val)) \
goto needs_bool; \
variable = val; \
@@ -155,13 +190,13 @@ static BOOL parse_options(ntfs_volume *vol, char *opt)
while ((p = strsep(&opt, ","))) {
if ((v = strchr(p, '=')))
*v++ = 0;
- NTFS_GETOPT("uid", uid)
- else NTFS_GETOPT("gid", gid)
- else NTFS_GETOPT("umask", fmask = dmask)
- else NTFS_GETOPT("fmask", fmask)
- else NTFS_GETOPT("dmask", dmask)
+ NTFS_GETOPT_UID("uid", uid)
+ else NTFS_GETOPT_GID("gid", gid)
+ else NTFS_GETOPT_OCTAL("umask", fmask = dmask)
+ else NTFS_GETOPT_OCTAL("fmask", fmask)
+ else NTFS_GETOPT_OCTAL("dmask", dmask)
else NTFS_GETOPT("mft_zone_multiplier", mft_zone_multiplier)
- else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, TRUE)
+ else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, true)
else NTFS_GETOPT_BOOL("show_sys_files", show_sys_files)
else NTFS_GETOPT_BOOL("case_sensitive", case_sensitive)
else NTFS_GETOPT_BOOL("disable_sparse", disable_sparse)
@@ -185,25 +220,24 @@ use_utf8:
if (!old_nls) {
ntfs_error(vol->sb, "NLS character set "
"%s not found.", v);
- return FALSE;
+ return false;
}
ntfs_error(vol->sb, "NLS character set %s not "
"found. Using previous one %s.",
v, old_nls->charset);
nls_map = old_nls;
} else /* nls_map */ {
- if (old_nls)
- unload_nls(old_nls);
+ unload_nls(old_nls);
}
} else if (!strcmp(p, "utf8")) {
- BOOL val = FALSE;
+ bool val = false;
ntfs_warning(vol->sb, "Option utf8 is no longer "
"supported, using option nls=utf8. Please "
"use option nls=utf8 in the future and "
"make sure utf8 is compiled either as a "
"module or into the kernel.");
if (!v || !*v)
- val = TRUE;
+ val = true;
else if (!simple_getbool(v, &val))
goto needs_bool;
if (val) {
@@ -222,7 +256,7 @@ use_utf8:
}
no_mount_options:
if (errors && !sloppy)
- return FALSE;
+ return false;
if (sloppy)
ntfs_warning(vol->sb, "Sloppy option given. Ignoring "
"unrecognized mount option(s) and continuing.");
@@ -231,14 +265,14 @@ no_mount_options:
if (!on_errors) {
ntfs_error(vol->sb, "Invalid errors option argument "
"or bug in options parser.");
- return FALSE;
+ return false;
}
}
if (nls_map) {
if (vol->nls_map && vol->nls_map != nls_map) {
ntfs_error(vol->sb, "Cannot change NLS character set "
"on remount.");
- return FALSE;
+ return false;
} /* else (!vol->nls_map) */
ntfs_debug("Using NLS character set %s.", nls_map->charset);
vol->nls_map = nls_map;
@@ -248,7 +282,7 @@ no_mount_options:
if (!vol->nls_map) {
ntfs_error(vol->sb, "Failed to load default "
"NLS character set.");
- return FALSE;
+ return false;
}
ntfs_debug("Using default NLS character set (%s).",
vol->nls_map->charset);
@@ -259,7 +293,7 @@ no_mount_options:
mft_zone_multiplier) {
ntfs_error(vol->sb, "Cannot change mft_zone_multiplier "
"on remount.");
- return FALSE;
+ return false;
}
if (mft_zone_multiplier < 1 || mft_zone_multiplier > 4) {
ntfs_error(vol->sb, "Invalid mft_zone_multiplier. "
@@ -274,13 +308,13 @@ no_mount_options:
vol->on_errors = on_errors;
if (!vol->on_errors || vol->on_errors == ON_ERRORS_RECOVER)
vol->on_errors |= ON_ERRORS_CONTINUE;
- if (uid != (uid_t)-1)
+ if (uid_valid(uid))
vol->uid = uid;
- if (gid != (gid_t)-1)
+ if (gid_valid(gid))
vol->gid = gid;
- if (fmask != (mode_t)-1)
+ if (fmask != (umode_t)-1)
vol->fmask = fmask;
- if (dmask != (mode_t)-1)
+ if (dmask != (umode_t)-1)
vol->dmask = dmask;
if (show_sys_files != -1) {
if (show_sys_files)
@@ -309,16 +343,16 @@ no_mount_options:
NVolSetSparseEnabled(vol);
}
}
- return TRUE;
+ return true;
needs_arg:
ntfs_error(vol->sb, "The %s option requires an argument.", p);
- return FALSE;
+ return false;
needs_bool:
ntfs_error(vol->sb, "The %s option requires a boolean argument.", p);
- return FALSE;
+ return false;
needs_val:
ntfs_error(vol->sb, "Invalid %s option argument: %s", p, ov);
- return FALSE;
+ return false;
}
#ifdef NTFS_RW
@@ -434,9 +468,12 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
ntfs_volume *vol = NTFS_SB(sb);
ntfs_debug("Entering with remount options string: %s", opt);
+
+ sync_filesystem(sb);
+
#ifndef NTFS_RW
- /* For read-only compiled driver, enforce all read-only flags. */
- *flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+ /* For read-only compiled driver, enforce read-only flag. */
+ *flags |= MS_RDONLY;
#else /* NTFS_RW */
/*
* For the read-write compiled driver, if we are remounting read-write,
@@ -448,7 +485,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
* the volume on boot and updates them.
*
* When remounting read-only, mark the volume clean if no volume errors
- * have occured.
+ * have occurred.
*/
if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
static const char *es = ". Cannot remount read-write.";
@@ -463,9 +500,16 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
ntfs_error(sb, "Volume is dirty and read-only%s", es);
return -EROFS;
}
+ if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) {
+ ntfs_error(sb, "Volume has been modified by chkdsk "
+ "and is read-only%s", es);
+ return -EROFS;
+ }
if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
- ntfs_error(sb, "Volume has unsupported flags set and "
- "is read-only%s", es);
+ ntfs_error(sb, "Volume has unsupported flags set "
+ "(0x%x) and is read-only%s",
+ (unsigned)le16_to_cpu(vol->vol_flags),
+ es);
return -EROFS;
}
if (ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) {
@@ -519,6 +563,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
if (!parse_options(vol, opt))
return -EINVAL;
+
ntfs_debug("Done.");
return 0;
}
@@ -527,16 +572,16 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
* is_boot_sector_ntfs - check whether a boot sector is a valid NTFS boot sector
* @sb: Super block of the device to which @b belongs.
* @b: Boot sector of device @sb to check.
- * @silent: If TRUE, all output will be silenced.
+ * @silent: If 'true', all output will be silenced.
*
* is_boot_sector_ntfs() checks whether the boot sector @b is a valid NTFS boot
- * sector. Returns TRUE if it is valid and FALSE if not.
+ * sector. Returns 'true' if it is valid and 'false' if not.
*
* @sb is only needed for warning/error output, i.e. it can be NULL when silent
- * is TRUE.
+ * is 'true'.
*/
-static BOOL is_boot_sector_ntfs(const struct super_block *sb,
- const NTFS_BOOT_SECTOR *b, const BOOL silent)
+static bool is_boot_sector_ntfs(const struct super_block *sb,
+ const NTFS_BOOT_SECTOR *b, const bool silent)
{
/*
* Check that checksum == sum of u32 values from b to the checksum
@@ -602,11 +647,11 @@ static BOOL is_boot_sector_ntfs(const struct super_block *sb,
* many BIOSes will refuse to boot from a bootsector if the magic is
* incorrect, so we emit a warning.
*/
- if (!silent && b->end_of_sector_marker != const_cpu_to_le16(0xaa55))
+ if (!silent && b->end_of_sector_marker != cpu_to_le16(0xaa55))
ntfs_warning(sb, "Invalid end of sector marker.");
- return TRUE;
+ return true;
not_ntfs:
- return FALSE;
+ return false;
}
/**
@@ -633,7 +678,7 @@ static struct buffer_head *read_ntfs_boot_sector(struct super_block *sb,
{
const char *read_err_str = "Unable to read %s boot sector.";
struct buffer_head *bh_primary, *bh_backup;
- long nr_blocks = NTFS_SB(sb)->nr_blocks;
+ sector_t nr_blocks = NTFS_SB(sb)->nr_blocks;
/* Try to read primary boot sector. */
if ((bh_primary = sb_bread(sb, 0))) {
@@ -680,13 +725,18 @@ hotfix_primary_boot_sector:
/*
* If we managed to read sector zero and the volume is not
* read-only, copy the found, valid backup boot sector to the
- * primary boot sector.
+ * primary boot sector. Note we only copy the actual boot
+ * sector structure, not the actual whole device sector as that
+ * may be bigger and would potentially damage the $Boot system
+ * file (FIXME: Would be nice to know if the backup boot sector
+ * on a large sector device contains the whole boot loader or
+ * just the first 512 bytes).
*/
if (!(sb->s_flags & MS_RDONLY)) {
ntfs_warning(sb, "Hot-fix: Recovering invalid primary "
"boot sector from backup copy.");
memcpy(bh_primary->b_data, bh_backup->b_data,
- sb->s_blocksize);
+ NTFS_BLOCK_SIZE);
mark_buffer_dirty(bh_primary);
sync_dirty_buffer(bh_primary);
if (buffer_uptodate(bh_primary)) {
@@ -711,9 +761,9 @@ hotfix_primary_boot_sector:
* @b: boot sector to parse
*
* Parse the ntfs boot sector @b and store all imporant information therein in
- * the ntfs super block @vol. Return TRUE on success and FALSE on error.
+ * the ntfs super block @vol. Return 'true' on success and 'false' on error.
*/
-static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b)
+static bool parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b)
{
unsigned int sectors_per_cluster_bits, nr_hidden_sects;
int clusters_per_mft_record, clusters_per_index_record;
@@ -725,9 +775,13 @@ static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b)
vol->sector_size);
ntfs_debug("vol->sector_size_bits = %i (0x%x)", vol->sector_size_bits,
vol->sector_size_bits);
- if (vol->sector_size != vol->sb->s_blocksize)
- ntfs_warning(vol->sb, "The boot sector indicates a sector size "
- "different from the device sector size.");
+ if (vol->sector_size < vol->sb->s_blocksize) {
+ ntfs_error(vol->sb, "Sector size (%i) is smaller than the "
+ "device block size (%lu). This is not "
+ "supported. Sorry.", vol->sector_size,
+ vol->sb->s_blocksize);
+ return false;
+ }
ntfs_debug("sectors_per_cluster = 0x%x", b->bpb.sectors_per_cluster);
sectors_per_cluster_bits = ffs(b->bpb.sectors_per_cluster) - 1;
ntfs_debug("sectors_per_cluster_bits = 0x%x",
@@ -740,17 +794,12 @@ static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b)
ntfs_debug("vol->cluster_size = %i (0x%x)", vol->cluster_size,
vol->cluster_size);
ntfs_debug("vol->cluster_size_mask = 0x%x", vol->cluster_size_mask);
- ntfs_debug("vol->cluster_size_bits = %i (0x%x)",
- vol->cluster_size_bits, vol->cluster_size_bits);
- if (vol->sector_size > vol->cluster_size) {
- ntfs_error(vol->sb, "Sector sizes above the cluster size are "
- "not supported. Sorry.");
- return FALSE;
- }
- if (vol->sb->s_blocksize > vol->cluster_size) {
- ntfs_error(vol->sb, "Cluster sizes smaller than the device "
- "sector size are not supported. Sorry.");
- return FALSE;
+ ntfs_debug("vol->cluster_size_bits = %i", vol->cluster_size_bits);
+ if (vol->cluster_size < vol->sector_size) {
+ ntfs_error(vol->sb, "Cluster size (%i) is smaller than the "
+ "sector size (%i). This is not supported. "
+ "Sorry.", vol->cluster_size, vol->sector_size);
+ return false;
}
clusters_per_mft_record = b->clusters_per_mft_record;
ntfs_debug("clusters_per_mft_record = %i (0x%x)",
@@ -778,12 +827,19 @@ static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b)
* we store $MFT/$DATA, the table of mft records in the page cache.
*/
if (vol->mft_record_size > PAGE_CACHE_SIZE) {
- ntfs_error(vol->sb, "Mft record size %i (0x%x) exceeds the "
- "page cache size on your system %lu (0x%lx). "
+ ntfs_error(vol->sb, "Mft record size (%i) exceeds the "
+ "PAGE_CACHE_SIZE on your system (%lu). "
"This is not supported. Sorry.",
- vol->mft_record_size, vol->mft_record_size,
- PAGE_CACHE_SIZE, PAGE_CACHE_SIZE);
- return FALSE;
+ vol->mft_record_size, PAGE_CACHE_SIZE);
+ return false;
+ }
+ /* We cannot support mft record sizes below the sector size. */
+ if (vol->mft_record_size < vol->sector_size) {
+ ntfs_error(vol->sb, "Mft record size (%i) is smaller than the "
+ "sector size (%i). This is not supported. "
+ "Sorry.", vol->mft_record_size,
+ vol->sector_size);
+ return false;
}
clusters_per_index_record = b->clusters_per_index_record;
ntfs_debug("clusters_per_index_record = %i (0x%x)",
@@ -808,6 +864,14 @@ static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b)
ntfs_debug("vol->index_record_size_bits = %i (0x%x)",
vol->index_record_size_bits,
vol->index_record_size_bits);
+ /* We cannot support index record sizes below the sector size. */
+ if (vol->index_record_size < vol->sector_size) {
+ ntfs_error(vol->sb, "Index record size (%i) is smaller than "
+ "the sector size (%i). This is not "
+ "supported. Sorry.", vol->index_record_size,
+ vol->sector_size);
+ return false;
+ }
/*
* Get the size of the volume in clusters and check for 64-bit-ness.
* Windows currently only uses 32 bits to save the clusters so we do
@@ -816,7 +880,7 @@ static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b)
ll = sle64_to_cpu(b->number_of_sectors) >> sectors_per_cluster_bits;
if ((u64)ll >= 1ULL << 32) {
ntfs_error(vol->sb, "Cannot handle 64-bit clusters. Sorry.");
- return FALSE;
+ return false;
}
vol->nr_clusters = ll;
ntfs_debug("vol->nr_clusters = 0x%llx", (long long)vol->nr_clusters);
@@ -832,21 +896,24 @@ static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b)
"Maximum supported is 2TiB. Sorry.",
(unsigned long long)ll >> (40 -
vol->cluster_size_bits));
- return FALSE;
+ return false;
}
}
ll = sle64_to_cpu(b->mft_lcn);
if (ll >= vol->nr_clusters) {
- ntfs_error(vol->sb, "MFT LCN is beyond end of volume. Weird.");
- return FALSE;
+ ntfs_error(vol->sb, "MFT LCN (%lli, 0x%llx) is beyond end of "
+ "volume. Weird.", (unsigned long long)ll,
+ (unsigned long long)ll);
+ return false;
}
vol->mft_lcn = ll;
ntfs_debug("vol->mft_lcn = 0x%llx", (long long)vol->mft_lcn);
ll = sle64_to_cpu(b->mftmirr_lcn);
if (ll >= vol->nr_clusters) {
- ntfs_error(vol->sb, "MFTMirr LCN is beyond end of volume. "
- "Weird.");
- return FALSE;
+ ntfs_error(vol->sb, "MFTMirr LCN (%lli, 0x%llx) is beyond end "
+ "of volume. Weird.", (unsigned long long)ll,
+ (unsigned long long)ll);
+ return false;
}
vol->mftmirr_lcn = ll;
ntfs_debug("vol->mftmirr_lcn = 0x%llx", (long long)vol->mftmirr_lcn);
@@ -869,7 +936,7 @@ static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b)
vol->serial_no = le64_to_cpu(b->volume_serial_number);
ntfs_debug("vol->serial_no = 0x%llx",
(unsigned long long)vol->serial_no);
- return TRUE;
+ return true;
}
/**
@@ -962,9 +1029,9 @@ static void ntfs_setup_allocators(ntfs_volume *vol)
* load_and_init_mft_mirror - load and setup the mft mirror inode for a volume
* @vol: ntfs super block describing device whose mft mirror to load
*
- * Return TRUE on success or FALSE on error.
+ * Return 'true' on success or 'false' on error.
*/
-static BOOL load_and_init_mft_mirror(ntfs_volume *vol)
+static bool load_and_init_mft_mirror(ntfs_volume *vol)
{
struct inode *tmp_ino;
ntfs_inode *tmp_ni;
@@ -976,14 +1043,15 @@ static BOOL load_and_init_mft_mirror(ntfs_volume *vol)
if (!IS_ERR(tmp_ino))
iput(tmp_ino);
/* Caller will display error message. */
- return FALSE;
+ return false;
}
/*
* Re-initialize some specifics about $MFTMirr's inode as
* ntfs_read_inode() will have set up the default ones.
*/
/* Set uid and gid to root. */
- tmp_ino->i_uid = tmp_ino->i_gid = 0;
+ tmp_ino->i_uid = GLOBAL_ROOT_UID;
+ tmp_ino->i_gid = GLOBAL_ROOT_GID;
/* Regular file. No access for anyone. */
tmp_ino->i_mode = S_IFREG;
/* No VFS initiated operations allowed for $MFTMirr. */
@@ -1003,20 +1071,20 @@ static BOOL load_and_init_mft_mirror(ntfs_volume *vol)
tmp_ni->itype.index.block_size_bits = vol->mft_record_size_bits;
vol->mftmirr_ino = tmp_ino;
ntfs_debug("Done.");
- return TRUE;
+ return true;
}
/**
* check_mft_mirror - compare contents of the mft mirror with the mft
* @vol: ntfs super block describing device whose mft mirror to check
*
- * Return TRUE on success or FALSE on error.
+ * Return 'true' on success or 'false' on error.
*
* Note, this function also results in the mft mirror runlist being completely
* mapped into memory. The mft mirror write code requires this and will BUG()
* should it find an unmapped runlist element.
*/
-static BOOL check_mft_mirror(ntfs_volume *vol)
+static bool check_mft_mirror(ntfs_volume *vol)
{
struct super_block *sb = vol->sb;
ntfs_inode *mirr_ni;
@@ -1048,7 +1116,7 @@ static BOOL check_mft_mirror(ntfs_volume *vol)
index);
if (IS_ERR(mft_page)) {
ntfs_error(sb, "Failed to read $MFT.");
- return FALSE;
+ return false;
}
kmft = page_address(mft_page);
/* Get the $MFTMirr page. */
@@ -1061,26 +1129,38 @@ static BOOL check_mft_mirror(ntfs_volume *vol)
kmirr = page_address(mirr_page);
++index;
}
- /* Make sure the record is ok. */
- if (ntfs_is_baad_recordp((le32*)kmft)) {
- ntfs_error(sb, "Incomplete multi sector transfer "
- "detected in mft record %i.", i);
+ /* Do not check the record if it is not in use. */
+ if (((MFT_RECORD*)kmft)->flags & MFT_RECORD_IN_USE) {
+ /* Make sure the record is ok. */
+ if (ntfs_is_baad_recordp((le32*)kmft)) {
+ ntfs_error(sb, "Incomplete multi sector "
+ "transfer detected in mft "
+ "record %i.", i);
mm_unmap_out:
- ntfs_unmap_page(mirr_page);
+ ntfs_unmap_page(mirr_page);
mft_unmap_out:
- ntfs_unmap_page(mft_page);
- return FALSE;
+ ntfs_unmap_page(mft_page);
+ return false;
+ }
}
- if (ntfs_is_baad_recordp((le32*)kmirr)) {
- ntfs_error(sb, "Incomplete multi sector transfer "
- "detected in mft mirror record %i.", i);
- goto mm_unmap_out;
+ /* Do not check the mirror record if it is not in use. */
+ if (((MFT_RECORD*)kmirr)->flags & MFT_RECORD_IN_USE) {
+ if (ntfs_is_baad_recordp((le32*)kmirr)) {
+ ntfs_error(sb, "Incomplete multi sector "
+ "transfer detected in mft "
+ "mirror record %i.", i);
+ goto mm_unmap_out;
+ }
}
/* Get the amount of data in the current record. */
bytes = le32_to_cpu(((MFT_RECORD*)kmft)->bytes_in_use);
- if (!bytes || bytes > vol->mft_record_size) {
+ if (bytes < sizeof(MFT_RECORD_OLD) ||
+ bytes > vol->mft_record_size ||
+ ntfs_is_baad_recordp((le32*)kmft)) {
bytes = le32_to_cpu(((MFT_RECORD*)kmirr)->bytes_in_use);
- if (!bytes || bytes > vol->mft_record_size)
+ if (bytes < sizeof(MFT_RECORD_OLD) ||
+ bytes > vol->mft_record_size ||
+ ntfs_is_baad_recordp((le32*)kmirr))
bytes = vol->mft_record_size;
}
/* Compare the two records. */
@@ -1119,21 +1199,22 @@ mft_unmap_out:
ntfs_error(sb, "$MFTMirr location mismatch. "
"Run chkdsk.");
up_read(&mirr_ni->runlist.lock);
- return FALSE;
+ return false;
}
} while (rl2[i++].length);
up_read(&mirr_ni->runlist.lock);
ntfs_debug("Done.");
- return TRUE;
+ return true;
}
/**
* load_and_check_logfile - load and check the logfile inode for a volume
* @vol: ntfs super block describing device whose logfile to load
*
- * Return TRUE on success or FALSE on error.
+ * Return 'true' on success or 'false' on error.
*/
-static BOOL load_and_check_logfile(ntfs_volume *vol)
+static bool load_and_check_logfile(ntfs_volume *vol,
+ RESTART_PAGE_HEADER **rp)
{
struct inode *tmp_ino;
@@ -1143,17 +1224,17 @@ static BOOL load_and_check_logfile(ntfs_volume *vol)
if (!IS_ERR(tmp_ino))
iput(tmp_ino);
/* Caller will display error message. */
- return FALSE;
+ return false;
}
- if (!ntfs_check_logfile(tmp_ino)) {
+ if (!ntfs_check_logfile(tmp_ino, rp)) {
iput(tmp_ino);
/* ntfs_check_logfile() will have displayed error output. */
- return FALSE;
+ return false;
}
NInoSetSparseDisabled(NTFS_I(tmp_ino));
vol->logfile_ino = tmp_ino;
ntfs_debug("Done.");
- return TRUE;
+ return true;
}
#define NTFS_HIBERFIL_HEADER_SIZE 4096
@@ -1186,28 +1267,27 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
{
MFT_REF mref;
struct inode *vi;
- ntfs_inode *ni;
struct page *page;
u32 *kaddr, *kend;
ntfs_name *name = NULL;
int ret = 1;
- static const ntfschar hiberfil[13] = { const_cpu_to_le16('h'),
- const_cpu_to_le16('i'), const_cpu_to_le16('b'),
- const_cpu_to_le16('e'), const_cpu_to_le16('r'),
- const_cpu_to_le16('f'), const_cpu_to_le16('i'),
- const_cpu_to_le16('l'), const_cpu_to_le16('.'),
- const_cpu_to_le16('s'), const_cpu_to_le16('y'),
- const_cpu_to_le16('s'), 0 };
+ static const ntfschar hiberfil[13] = { cpu_to_le16('h'),
+ cpu_to_le16('i'), cpu_to_le16('b'),
+ cpu_to_le16('e'), cpu_to_le16('r'),
+ cpu_to_le16('f'), cpu_to_le16('i'),
+ cpu_to_le16('l'), cpu_to_le16('.'),
+ cpu_to_le16('s'), cpu_to_le16('y'),
+ cpu_to_le16('s'), 0 };
ntfs_debug("Entering.");
/*
* Find the inode number for the hibernation file by looking up the
* filename hiberfil.sys in the root directory.
*/
- down(&vol->root_ino->i_sem);
+ mutex_lock(&vol->root_ino->i_mutex);
mref = ntfs_lookup_inode_by_name(NTFS_I(vol->root_ino), hiberfil, 12,
&name);
- up(&vol->root_ino->i_sem);
+ mutex_unlock(&vol->root_ino->i_mutex);
if (IS_ERR_MREF(mref)) {
ret = MREF_ERR(mref);
/* If the file does not exist, Windows is not hibernated. */
@@ -1216,7 +1296,7 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
"hibernated on the volume.");
return 0;
}
- /* A real error occured. */
+ /* A real error occurred. */
ntfs_error(vol->sb, "Failed to find inode number for "
"hiberfil.sys.");
return ret;
@@ -1237,7 +1317,6 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
"is not the system volume.", i_size_read(vi));
goto iput_out;
}
- ni = NTFS_I(vi);
page = ntfs_map_page(vi->i_mapping, 0);
if (IS_ERR(page)) {
ntfs_error(vol->sb, "Failed to read from hiberfil.sys.");
@@ -1245,7 +1324,7 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
goto iput_out;
}
kaddr = (u32*)page_address(page);
- if (*(le32*)kaddr == const_cpu_to_le32(0x72626968)/*'hibr'*/) {
+ if (*(le32*)kaddr == cpu_to_le32(0x72626968)/*'hibr'*/) {
ntfs_debug("Magic \"hibr\" found in hiberfil.sys. Windows is "
"hibernated on the volume. This is the "
"system volume.");
@@ -1278,30 +1357,30 @@ iput_out:
* load_and_init_quota - load and setup the quota file for a volume if present
* @vol: ntfs super block describing device whose quota file to load
*
- * Return TRUE on success or FALSE on error. If $Quota is not present, we
+ * Return 'true' on success or 'false' on error. If $Quota is not present, we
* leave vol->quota_ino as NULL and return success.
*/
-static BOOL load_and_init_quota(ntfs_volume *vol)
+static bool load_and_init_quota(ntfs_volume *vol)
{
MFT_REF mref;
struct inode *tmp_ino;
ntfs_name *name = NULL;
- static const ntfschar Quota[7] = { const_cpu_to_le16('$'),
- const_cpu_to_le16('Q'), const_cpu_to_le16('u'),
- const_cpu_to_le16('o'), const_cpu_to_le16('t'),
- const_cpu_to_le16('a'), 0 };
- static ntfschar Q[3] = { const_cpu_to_le16('$'),
- const_cpu_to_le16('Q'), 0 };
+ static const ntfschar Quota[7] = { cpu_to_le16('$'),
+ cpu_to_le16('Q'), cpu_to_le16('u'),
+ cpu_to_le16('o'), cpu_to_le16('t'),
+ cpu_to_le16('a'), 0 };
+ static ntfschar Q[3] = { cpu_to_le16('$'),
+ cpu_to_le16('Q'), 0 };
ntfs_debug("Entering.");
/*
* Find the inode number for the quota file by looking up the filename
* $Quota in the extended system files directory $Extend.
*/
- down(&vol->extend_ino->i_sem);
+ mutex_lock(&vol->extend_ino->i_mutex);
mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), Quota, 6,
&name);
- up(&vol->extend_ino->i_sem);
+ mutex_unlock(&vol->extend_ino->i_mutex);
if (IS_ERR_MREF(mref)) {
/*
* If the file does not exist, quotas are disabled and have
@@ -1315,11 +1394,11 @@ static BOOL load_and_init_quota(ntfs_volume *vol)
* not enabled.
*/
NVolSetQuotaOutOfDate(vol);
- return TRUE;
+ return true;
}
- /* A real error occured. */
+ /* A real error occurred. */
ntfs_error(vol->sb, "Failed to find inode number for $Quota.");
- return FALSE;
+ return false;
}
/* We do not care for the type of match that was found. */
kfree(name);
@@ -1329,25 +1408,25 @@ static BOOL load_and_init_quota(ntfs_volume *vol)
if (!IS_ERR(tmp_ino))
iput(tmp_ino);
ntfs_error(vol->sb, "Failed to load $Quota.");
- return FALSE;
+ return false;
}
vol->quota_ino = tmp_ino;
/* Get the $Q index allocation attribute. */
tmp_ino = ntfs_index_iget(vol->quota_ino, Q, 2);
if (IS_ERR(tmp_ino)) {
ntfs_error(vol->sb, "Failed to load $Quota/$Q index.");
- return FALSE;
+ return false;
}
vol->quota_q_ino = tmp_ino;
ntfs_debug("Done.");
- return TRUE;
+ return true;
}
/**
* load_and_init_usnjrnl - load and setup the transaction log if present
* @vol: ntfs super block describing device whose usnjrnl file to load
*
- * Return TRUE on success or FALSE on error.
+ * Return 'true' on success or 'false' on error.
*
* If $UsnJrnl is not present or in the process of being disabled, we set
* NVolUsnJrnlStamped() and return success.
@@ -1357,7 +1436,7 @@ static BOOL load_and_init_quota(ntfs_volume *vol)
* stamped and nothing has been logged since, we also set NVolUsnJrnlStamped()
* and return success.
*/
-static BOOL load_and_init_usnjrnl(ntfs_volume *vol)
+static bool load_and_init_usnjrnl(ntfs_volume *vol)
{
MFT_REF mref;
struct inode *tmp_ino;
@@ -1365,26 +1444,26 @@ static BOOL load_and_init_usnjrnl(ntfs_volume *vol)
struct page *page;
ntfs_name *name = NULL;
USN_HEADER *uh;
- static const ntfschar UsnJrnl[9] = { const_cpu_to_le16('$'),
- const_cpu_to_le16('U'), const_cpu_to_le16('s'),
- const_cpu_to_le16('n'), const_cpu_to_le16('J'),
- const_cpu_to_le16('r'), const_cpu_to_le16('n'),
- const_cpu_to_le16('l'), 0 };
- static ntfschar Max[5] = { const_cpu_to_le16('$'),
- const_cpu_to_le16('M'), const_cpu_to_le16('a'),
- const_cpu_to_le16('x'), 0 };
- static ntfschar J[3] = { const_cpu_to_le16('$'),
- const_cpu_to_le16('J'), 0 };
+ static const ntfschar UsnJrnl[9] = { cpu_to_le16('$'),
+ cpu_to_le16('U'), cpu_to_le16('s'),
+ cpu_to_le16('n'), cpu_to_le16('J'),
+ cpu_to_le16('r'), cpu_to_le16('n'),
+ cpu_to_le16('l'), 0 };
+ static ntfschar Max[5] = { cpu_to_le16('$'),
+ cpu_to_le16('M'), cpu_to_le16('a'),
+ cpu_to_le16('x'), 0 };
+ static ntfschar J[3] = { cpu_to_le16('$'),
+ cpu_to_le16('J'), 0 };
ntfs_debug("Entering.");
/*
* Find the inode number for the transaction log file by looking up the
* filename $UsnJrnl in the extended system files directory $Extend.
*/
- down(&vol->extend_ino->i_sem);
+ mutex_lock(&vol->extend_ino->i_mutex);
mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), UsnJrnl, 8,
&name);
- up(&vol->extend_ino->i_sem);
+ mutex_unlock(&vol->extend_ino->i_mutex);
if (IS_ERR_MREF(mref)) {
/*
* If the file does not exist, transaction logging is disabled,
@@ -1399,12 +1478,12 @@ not_enabled:
* transaction logging is not enabled.
*/
NVolSetUsnJrnlStamped(vol);
- return TRUE;
+ return true;
}
- /* A real error occured. */
+ /* A real error occurred. */
ntfs_error(vol->sb, "Failed to find inode number for "
"$UsnJrnl.");
- return FALSE;
+ return false;
}
/* We do not care for the type of match that was found. */
kfree(name);
@@ -1414,7 +1493,7 @@ not_enabled:
if (!IS_ERR(tmp_ino))
iput(tmp_ino);
ntfs_error(vol->sb, "Failed to load $UsnJrnl.");
- return FALSE;
+ return false;
}
vol->usnjrnl_ino = tmp_ino;
/*
@@ -1432,22 +1511,22 @@ not_enabled:
if (IS_ERR(tmp_ino)) {
ntfs_error(vol->sb, "Failed to load $UsnJrnl/$DATA/$Max "
"attribute.");
- return FALSE;
+ return false;
}
vol->usnjrnl_max_ino = tmp_ino;
if (unlikely(i_size_read(tmp_ino) < sizeof(USN_HEADER))) {
ntfs_error(vol->sb, "Found corrupt $UsnJrnl/$DATA/$Max "
"attribute (size is 0x%llx but should be at "
- "least 0x%x bytes).", i_size_read(tmp_ino),
+ "least 0x%zx bytes).", i_size_read(tmp_ino),
sizeof(USN_HEADER));
- return FALSE;
+ return false;
}
/* Get the $DATA/$J attribute. */
tmp_ino = ntfs_attr_iget(vol->usnjrnl_ino, AT_DATA, J, 2);
if (IS_ERR(tmp_ino)) {
ntfs_error(vol->sb, "Failed to load $UsnJrnl/$DATA/$J "
"attribute.");
- return FALSE;
+ return false;
}
vol->usnjrnl_j_ino = tmp_ino;
/* Verify $J is non-resident and sparse. */
@@ -1455,14 +1534,14 @@ not_enabled:
if (unlikely(!NInoNonResident(tmp_ni) || !NInoSparse(tmp_ni))) {
ntfs_error(vol->sb, "$UsnJrnl/$DATA/$J attribute is resident "
"and/or not sparse.");
- return FALSE;
+ return false;
}
/* Read the USN_HEADER from $DATA/$Max. */
page = ntfs_map_page(vol->usnjrnl_max_ino->i_mapping, 0);
if (IS_ERR(page)) {
ntfs_error(vol->sb, "Failed to read from $UsnJrnl/$DATA/$Max "
"attribute.");
- return FALSE;
+ return false;
}
uh = (USN_HEADER*)page_address(page);
/* Sanity check the $Max. */
@@ -1473,7 +1552,7 @@ not_enabled:
(long long)sle64_to_cpu(uh->allocation_delta),
(long long)sle64_to_cpu(uh->maximum_size));
ntfs_unmap_page(page);
- return FALSE;
+ return false;
}
/*
* If the transaction log has been stamped and nothing has been written
@@ -1497,20 +1576,20 @@ not_enabled:
(long long)sle64_to_cpu(uh->lowest_valid_usn),
i_size_read(vol->usnjrnl_j_ino));
ntfs_unmap_page(page);
- return FALSE;
+ return false;
}
ntfs_unmap_page(page);
ntfs_debug("Done.");
- return TRUE;
+ return true;
}
/**
* load_and_init_attrdef - load the attribute definitions table for a volume
* @vol: ntfs super block describing device whose attrdef to load
*
- * Return TRUE on success or FALSE on error.
+ * Return 'true' on success or 'false' on error.
*/
-static BOOL load_and_init_attrdef(ntfs_volume *vol)
+static bool load_and_init_attrdef(ntfs_volume *vol)
{
loff_t i_size;
struct super_block *sb = vol->sb;
@@ -1556,7 +1635,7 @@ read_partial_attrdef_page:
vol->attrdef_size = i_size;
ntfs_debug("Read %llu bytes from $AttrDef.", i_size);
iput(ino);
- return TRUE;
+ return true;
free_iput_failed:
ntfs_free(vol->attrdef);
vol->attrdef = NULL;
@@ -1564,7 +1643,7 @@ iput_failed:
iput(ino);
failed:
ntfs_error(sb, "Failed to initialize attribute definition table.");
- return FALSE;
+ return false;
}
#endif /* NTFS_RW */
@@ -1573,9 +1652,9 @@ failed:
* load_and_init_upcase - load the upcase table for an ntfs volume
* @vol: ntfs super block describing device whose upcase to load
*
- * Return TRUE on success or FALSE on error.
+ * Return 'true' on success or 'false' on error.
*/
-static BOOL load_and_init_upcase(ntfs_volume *vol)
+static bool load_and_init_upcase(ntfs_volume *vol)
{
loff_t i_size;
struct super_block *sb = vol->sb;
@@ -1626,12 +1705,12 @@ read_partial_upcase_page:
ntfs_debug("Read %llu bytes from $UpCase (expected %zu bytes).",
i_size, 64 * 1024 * sizeof(ntfschar));
iput(ino);
- down(&ntfs_lock);
+ mutex_lock(&ntfs_lock);
if (!default_upcase) {
ntfs_debug("Using volume specified $UpCase since default is "
"not present.");
- up(&ntfs_lock);
- return TRUE;
+ mutex_unlock(&ntfs_lock);
+ return true;
}
max = default_upcase_len;
if (max > vol->upcase_len)
@@ -1644,35 +1723,43 @@ read_partial_upcase_page:
vol->upcase = default_upcase;
vol->upcase_len = max;
ntfs_nr_upcase_users++;
- up(&ntfs_lock);
+ mutex_unlock(&ntfs_lock);
ntfs_debug("Volume specified $UpCase matches default. Using "
"default.");
- return TRUE;
+ return true;
}
- up(&ntfs_lock);
+ mutex_unlock(&ntfs_lock);
ntfs_debug("Using volume specified $UpCase since it does not match "
"the default.");
- return TRUE;
+ return true;
iput_upcase_failed:
iput(ino);
ntfs_free(vol->upcase);
vol->upcase = NULL;
upcase_failed:
- down(&ntfs_lock);
+ mutex_lock(&ntfs_lock);
if (default_upcase) {
vol->upcase = default_upcase;
vol->upcase_len = default_upcase_len;
ntfs_nr_upcase_users++;
- up(&ntfs_lock);
+ mutex_unlock(&ntfs_lock);
ntfs_error(sb, "Failed to load $UpCase from the volume. Using "
"default.");
- return TRUE;
+ return true;
}
- up(&ntfs_lock);
+ mutex_unlock(&ntfs_lock);
ntfs_error(sb, "Failed to initialize upcase table.");
- return FALSE;
+ return false;
}
+/*
+ * The lcn and mft bitmap inodes are NTFS-internal inodes with
+ * their own special locking rules:
+ */
+static struct lock_class_key
+ lcnbmp_runlist_lock_key, lcnbmp_mrec_lock_key,
+ mftbmp_runlist_lock_key, mftbmp_mrec_lock_key;
+
/**
* load_system_files - open the system files using normal functions
* @vol: ntfs super block describing device whose system files to load
@@ -1680,15 +1767,16 @@ upcase_failed:
* Open the system files with normal access functions and complete setting up
* the ntfs super block @vol.
*
- * Return TRUE on success or FALSE on error.
+ * Return 'true' on success or 'false' on error.
*/
-static BOOL load_system_files(ntfs_volume *vol)
+static bool load_system_files(ntfs_volume *vol)
{
struct super_block *sb = vol->sb;
MFT_RECORD *m;
VOLUME_INFORMATION *vi;
ntfs_attr_search_ctx *ctx;
#ifdef NTFS_RW
+ RESTART_PAGE_HEADER *rp;
int err;
#endif /* NTFS_RW */
@@ -1711,7 +1799,7 @@ static BOOL load_system_files(ntfs_volume *vol)
es3);
goto iput_mirr_err_out;
}
- sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+ sb->s_flags |= MS_RDONLY;
ntfs_error(sb, "%s. Mounting read-only%s",
!vol->mftmirr_ino ? es1 : es2, es3);
} else
@@ -1728,6 +1816,10 @@ static BOOL load_system_files(ntfs_volume *vol)
ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute.");
goto iput_mirr_err_out;
}
+ lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->runlist.lock,
+ &mftbmp_runlist_lock_key);
+ lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->mrec_lock,
+ &mftbmp_mrec_lock_key);
/* Read upcase table and setup @vol->upcase and @vol->upcase_len. */
if (!load_and_init_upcase(vol))
goto iput_mftbmp_err_out;
@@ -1750,6 +1842,11 @@ static BOOL load_system_files(ntfs_volume *vol)
iput(vol->lcnbmp_ino);
goto bitmap_failed;
}
+ lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->runlist.lock,
+ &lcnbmp_runlist_lock_key);
+ lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->mrec_lock,
+ &lcnbmp_mrec_lock_key);
+
NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino));
if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) {
iput(vol->lcnbmp_ino);
@@ -1800,7 +1897,7 @@ get_ctx_vol_failed:
vol->minor_ver = vi->minor_ver;
ntfs_attr_put_search_ctx(ctx);
unmap_mft_record(NTFS_I(vol->vol_ino));
- printk(KERN_INFO "NTFS volume version %i.%i.\n", vol->major_ver,
+ pr_info("volume version %i.%i.\n", vol->major_ver,
vol->minor_ver);
if (vol->major_ver < 3 && NVolSparseEnabled(vol)) {
ntfs_warning(vol->sb, "Disabling sparse support due to NTFS "
@@ -1812,11 +1909,24 @@ get_ctx_vol_failed:
/* Make sure that no unsupported volume flags are set. */
if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
static const char *es1a = "Volume is dirty";
- static const char *es1b = "Volume has unsupported flags set";
- static const char *es2 = ". Run chkdsk and mount in Windows.";
- const char *es1;
-
- es1 = vol->vol_flags & VOLUME_IS_DIRTY ? es1a : es1b;
+ static const char *es1b = "Volume has been modified by chkdsk";
+ static const char *es1c = "Volume has unsupported flags set";
+ static const char *es2a = ". Run chkdsk and mount in Windows.";
+ static const char *es2b = ". Mount in Windows.";
+ const char *es1, *es2;
+
+ es2 = es2a;
+ if (vol->vol_flags & VOLUME_IS_DIRTY)
+ es1 = es1a;
+ else if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) {
+ es1 = es1b;
+ es2 = es2b;
+ } else {
+ es1 = es1c;
+ ntfs_warning(sb, "Unsupported volume flags 0x%x "
+ "encountered.",
+ (unsigned)le16_to_cpu(vol->vol_flags));
+ }
/* If a read-write mount, convert it to a read-only mount. */
if (!(sb->s_flags & MS_RDONLY)) {
if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
@@ -1827,7 +1937,7 @@ get_ctx_vol_failed:
es1, es2);
goto iput_vol_err_out;
}
- sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+ sb->s_flags |= MS_RDONLY;
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
} else
ntfs_warning(sb, "%s. Will not be able to remount "
@@ -1841,8 +1951,9 @@ get_ctx_vol_failed:
* Get the inode for the logfile, check it and determine if the volume
* was shutdown cleanly.
*/
- if (!load_and_check_logfile(vol) ||
- !ntfs_is_logfile_clean(vol->logfile_ino)) {
+ rp = NULL;
+ if (!load_and_check_logfile(vol, &rp) ||
+ !ntfs_is_logfile_clean(vol->logfile_ino, rp)) {
static const char *es1a = "Failed to load $LogFile";
static const char *es1b = "$LogFile is not clean";
static const char *es2 = ". Mount in Windows.";
@@ -1857,9 +1968,13 @@ get_ctx_vol_failed:
"continue nor on_errors="
"remount-ro was specified%s",
es1, es2);
+ if (vol->logfile_ino) {
+ BUG_ON(!rp);
+ ntfs_free(rp);
+ }
goto iput_logfile_err_out;
}
- sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+ sb->s_flags |= MS_RDONLY;
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
} else
ntfs_warning(sb, "%s. Will not be able to remount "
@@ -1867,6 +1982,7 @@ get_ctx_vol_failed:
/* This will prevent a read-write remount. */
NVolSetErrors(vol);
}
+ ntfs_free(rp);
#endif /* NTFS_RW */
/* Get the root directory inode so we can do path lookups. */
vol->root_ino = ntfs_iget(sb, FILE_root);
@@ -1903,7 +2019,7 @@ get_ctx_vol_failed:
es1, es2);
goto iput_root_err_out;
}
- sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+ sb->s_flags |= MS_RDONLY;
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
} else
ntfs_warning(sb, "%s. Will not be able to remount "
@@ -1927,7 +2043,7 @@ get_ctx_vol_failed:
goto iput_root_err_out;
}
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+ sb->s_flags |= MS_RDONLY;
/*
* Do not set NVolErrors() because ntfs_remount() might manage
* to set the dirty flag in which case all would be well.
@@ -1954,7 +2070,7 @@ get_ctx_vol_failed:
goto iput_root_err_out;
}
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+ sb->s_flags |= MS_RDONLY;
NVolSetErrors(vol);
}
#endif
@@ -1973,13 +2089,13 @@ get_ctx_vol_failed:
goto iput_root_err_out;
}
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+ sb->s_flags |= MS_RDONLY;
NVolSetErrors(vol);
}
#endif /* NTFS_RW */
/* If on NTFS versions before 3.0, we are done. */
if (unlikely(vol->major_ver < 3))
- return TRUE;
+ return true;
/* NTFS 3.0+ specific initialization. */
/* Get the security descriptors inode. */
vol->secure_ino = ntfs_iget(sb, FILE_Secure);
@@ -2014,7 +2130,7 @@ get_ctx_vol_failed:
es1, es2);
goto iput_quota_err_out;
}
- sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+ sb->s_flags |= MS_RDONLY;
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
} else
ntfs_warning(sb, "%s. Will not be able to remount "
@@ -2037,7 +2153,7 @@ get_ctx_vol_failed:
goto iput_quota_err_out;
}
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+ sb->s_flags |= MS_RDONLY;
NVolSetErrors(vol);
}
/*
@@ -2058,7 +2174,7 @@ get_ctx_vol_failed:
es1, es2);
goto iput_usnjrnl_err_out;
}
- sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+ sb->s_flags |= MS_RDONLY;
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
} else
ntfs_warning(sb, "%s. Will not be able to remount "
@@ -2081,11 +2197,11 @@ get_ctx_vol_failed:
goto iput_usnjrnl_err_out;
}
ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
- sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+ sb->s_flags |= MS_RDONLY;
NVolSetErrors(vol);
}
#endif /* NTFS_RW */
- return TRUE;
+ return true;
#ifdef NTFS_RW
iput_usnjrnl_err_out:
if (vol->usnjrnl_j_ino)
@@ -2124,12 +2240,12 @@ iput_attrdef_err_out:
iput_upcase_err_out:
#endif /* NTFS_RW */
vol->upcase_len = 0;
- down(&ntfs_lock);
+ mutex_lock(&ntfs_lock);
if (vol->upcase == default_upcase) {
ntfs_nr_upcase_users--;
vol->upcase = NULL;
}
- up(&ntfs_lock);
+ mutex_unlock(&ntfs_lock);
if (vol->upcase) {
ntfs_free(vol->upcase);
vol->upcase = NULL;
@@ -2141,7 +2257,7 @@ iput_mirr_err_out:
if (vol->mftmirr_ino)
iput(vol->mftmirr_ino);
#endif /* NTFS_RW */
- return FALSE;
+ return false;
}
/**
@@ -2158,6 +2274,7 @@ static void ntfs_put_super(struct super_block *sb)
ntfs_volume *vol = NTFS_SB(sb);
ntfs_debug("Entering.");
+
#ifdef NTFS_RW
/*
* Commit all inodes while they are still open in case some of them
@@ -2201,7 +2318,7 @@ static void ntfs_put_super(struct super_block *sb)
ntfs_commit_inode(vol->mft_ino);
/*
- * If a read-write mount and no volume errors have occured, mark the
+ * If a read-write mount and no volume errors have occurred, mark the
* volume clean. Also, re-commit all affected inodes.
*/
if (!(sb->s_flags & MS_RDONLY)) {
@@ -2285,39 +2402,12 @@ static void ntfs_put_super(struct super_block *sb)
vol->mftmirr_ino = NULL;
}
/*
- * If any dirty inodes are left, throw away all mft data page cache
- * pages to allow a clean umount. This should never happen any more
- * due to mft.c::ntfs_mft_writepage() cleaning all the dirty pages as
- * the underlying mft records are written out and cleaned. If it does,
- * happen anyway, we want to know...
+ * We should have no dirty inodes left, due to
+ * mft.c::ntfs_mft_writepage() cleaning all the dirty pages as
+ * the underlying mft records are written out and cleaned.
*/
ntfs_commit_inode(vol->mft_ino);
write_inode_now(vol->mft_ino, 1);
- if (!list_empty(&sb->s_dirty)) {
- const char *s1, *s2;
-
- down(&vol->mft_ino->i_sem);
- truncate_inode_pages(vol->mft_ino->i_mapping, 0);
- up(&vol->mft_ino->i_sem);
- write_inode_now(vol->mft_ino, 1);
- if (!list_empty(&sb->s_dirty)) {
- static const char *_s1 = "inodes";
- static const char *_s2 = "";
- s1 = _s1;
- s2 = _s2;
- } else {
- static const char *_s1 = "mft pages";
- static const char *_s2 = "They have been thrown "
- "away. ";
- s1 = _s1;
- s2 = _s2;
- }
- ntfs_error(sb, "Dirty %s found at umount time. %sYou should "
- "run chkdsk. Please email "
- "linux-ntfs-dev@lists.sourceforge.net and say "
- "that you saw this message. Thank you.", s1,
- s2);
- }
#endif /* NTFS_RW */
iput(vol->mft_ino);
@@ -2334,7 +2424,7 @@ static void ntfs_put_super(struct super_block *sb)
* Destroy the global default upcase table if necessary. Also decrease
* the number of upcase users if we are a user.
*/
- down(&ntfs_lock);
+ mutex_lock(&ntfs_lock);
if (vol->upcase == default_upcase) {
ntfs_nr_upcase_users--;
vol->upcase = NULL;
@@ -2345,18 +2435,16 @@ static void ntfs_put_super(struct super_block *sb)
}
if (vol->cluster_size <= 4096 && !--ntfs_nr_compression_users)
free_compression_buffers();
- up(&ntfs_lock);
+ mutex_unlock(&ntfs_lock);
if (vol->upcase) {
ntfs_free(vol->upcase);
vol->upcase = NULL;
}
- if (vol->nls_map) {
- unload_nls(vol->nls_map);
- vol->nls_map = NULL;
- }
+
+ unload_nls(vol->nls_map);
+
sb->s_fs_info = NULL;
kfree(vol);
- return;
}
/**
@@ -2381,9 +2469,7 @@ static void ntfs_put_super(struct super_block *sb)
static s64 get_nr_free_clusters(ntfs_volume *vol)
{
s64 nr_free = vol->nr_clusters;
- u32 *kaddr;
struct address_space *mapping = vol->lcnbmp_ino->i_mapping;
- filler_t *readpage = (filler_t*)mapping->a_ops->readpage;
struct page *page;
pgoff_t index, max_index;
@@ -2401,40 +2487,31 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
ntfs_debug("Reading $Bitmap, max_index = 0x%lx, max_size = 0x%lx.",
max_index, PAGE_CACHE_SIZE / 4);
for (index = 0; index < max_index; index++) {
- unsigned int i;
+ unsigned long *kaddr;
+
/*
* Read the page from page cache, getting it from backing store
* if necessary, and increment the use count.
*/
- page = read_cache_page(mapping, index, (filler_t*)readpage,
- NULL);
+ page = read_mapping_page(mapping, index, NULL);
/* Ignore pages which errored synchronously. */
if (IS_ERR(page)) {
- ntfs_debug("Sync read_cache_page() error. Skipping "
+ ntfs_debug("read_mapping_page() error. Skipping "
"page (index 0x%lx).", index);
nr_free -= PAGE_CACHE_SIZE * 8;
continue;
}
- wait_on_page_locked(page);
- /* Ignore pages which errored asynchronously. */
- if (!PageUptodate(page)) {
- ntfs_debug("Async read_cache_page() error. Skipping "
- "page (index 0x%lx).", index);
- page_cache_release(page);
- nr_free -= PAGE_CACHE_SIZE * 8;
- continue;
- }
- kaddr = (u32*)kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
/*
- * For each 4 bytes, subtract the number of set bits. If this
+ * Subtract the number of set bits. If this
* is the last page and it is partial we don't really care as
* it just means we do a little extra work but it won't affect
* the result as all out of range bytes are set to zero by
* ntfs_readpage().
*/
- for (i = 0; i < PAGE_CACHE_SIZE / 4; i++)
- nr_free -= (s64)hweight32(kaddr[i]);
- kunmap_atomic(kaddr, KM_USER0);
+ nr_free -= bitmap_weight(kaddr,
+ PAGE_CACHE_SIZE * BITS_PER_BYTE);
+ kunmap_atomic(kaddr);
page_cache_release(page);
}
ntfs_debug("Finished reading $Bitmap, last index = 0x%lx.", index - 1);
@@ -2445,7 +2522,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
if (vol->nr_clusters & 63)
nr_free += 64 - (vol->nr_clusters & 63);
up_read(&vol->lcnbmp_lock);
- /* If errors occured we may well have gone below zero, fix this. */
+ /* If errors occurred we may well have gone below zero, fix this. */
if (nr_free < 0)
nr_free = 0;
ntfs_debug("Exiting.");
@@ -2472,9 +2549,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
s64 nr_free, const pgoff_t max_index)
{
- u32 *kaddr;
struct address_space *mapping = vol->mftbmp_ino->i_mapping;
- filler_t *readpage = (filler_t*)mapping->a_ops->readpage;
struct page *page;
pgoff_t index;
@@ -2483,45 +2558,36 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
ntfs_debug("Reading $MFT/$BITMAP, max_index = 0x%lx, max_size = "
"0x%lx.", max_index, PAGE_CACHE_SIZE / 4);
for (index = 0; index < max_index; index++) {
- unsigned int i;
+ unsigned long *kaddr;
+
/*
* Read the page from page cache, getting it from backing store
* if necessary, and increment the use count.
*/
- page = read_cache_page(mapping, index, (filler_t*)readpage,
- NULL);
+ page = read_mapping_page(mapping, index, NULL);
/* Ignore pages which errored synchronously. */
if (IS_ERR(page)) {
- ntfs_debug("Sync read_cache_page() error. Skipping "
- "page (index 0x%lx).", index);
- nr_free -= PAGE_CACHE_SIZE * 8;
- continue;
- }
- wait_on_page_locked(page);
- /* Ignore pages which errored asynchronously. */
- if (!PageUptodate(page)) {
- ntfs_debug("Async read_cache_page() error. Skipping "
+ ntfs_debug("read_mapping_page() error. Skipping "
"page (index 0x%lx).", index);
- page_cache_release(page);
nr_free -= PAGE_CACHE_SIZE * 8;
continue;
}
- kaddr = (u32*)kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
/*
- * For each 4 bytes, subtract the number of set bits. If this
+ * Subtract the number of set bits. If this
* is the last page and it is partial we don't really care as
* it just means we do a little extra work but it won't affect
* the result as all out of range bytes are set to zero by
* ntfs_readpage().
*/
- for (i = 0; i < PAGE_CACHE_SIZE / 4; i++)
- nr_free -= (s64)hweight32(kaddr[i]);
- kunmap_atomic(kaddr, KM_USER0);
+ nr_free -= bitmap_weight(kaddr,
+ PAGE_CACHE_SIZE * BITS_PER_BYTE);
+ kunmap_atomic(kaddr);
page_cache_release(page);
}
ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.",
index - 1);
- /* If errors occured we may well have gone below zero, fix this. */
+ /* If errors occurred we may well have gone below zero, fix this. */
if (nr_free < 0)
nr_free = 0;
ntfs_debug("Exiting.");
@@ -2530,10 +2596,10 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
/**
* ntfs_statfs - return information about mounted NTFS volume
- * @sb: super block of mounted volume
+ * @dentry: dentry from mounted volume
* @sfs: statfs structure in which to return the information
*
- * Return information about the mounted NTFS volume @sb in the statfs structure
+ * Return information about the mounted NTFS volume @dentry in the statfs structure
* pointed to by @sfs (this is initialized with zeros before ntfs_statfs is
* called). We interpret the values to be correct of the moment in time at
* which we are called. Most values are variable otherwise and this isn't just
@@ -2546,8 +2612,9 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
*
* Return 0 on success or -errno on error.
*/
-static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs)
+static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs)
{
+ struct super_block *sb = dentry->d_sb;
s64 size;
ntfs_volume *vol = NTFS_SB(sb);
ntfs_inode *mft_ni = NTFS_I(vol->mft_ino);
@@ -2607,41 +2674,28 @@ static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs)
return 0;
}
+#ifdef NTFS_RW
+static int ntfs_write_inode(struct inode *vi, struct writeback_control *wbc)
+{
+ return __ntfs_write_inode(vi, wbc->sync_mode == WB_SYNC_ALL);
+}
+#endif
+
/**
* The complete super operations.
*/
-static struct super_operations ntfs_sops = {
+static const struct super_operations ntfs_sops = {
.alloc_inode = ntfs_alloc_big_inode, /* VFS: Allocate new inode. */
.destroy_inode = ntfs_destroy_big_inode, /* VFS: Deallocate inode. */
- .put_inode = ntfs_put_inode, /* VFS: Called just before
- the inode reference count
- is decreased. */
#ifdef NTFS_RW
- //.dirty_inode = NULL, /* VFS: Called from
- // __mark_inode_dirty(). */
.write_inode = ntfs_write_inode, /* VFS: Write dirty inode to
disk. */
- //.drop_inode = NULL, /* VFS: Called just after the
- // inode reference count has
- // been decreased to zero.
- // NOTE: The inode lock is
- // held. See fs/inode.c::
- // generic_drop_inode(). */
- //.delete_inode = NULL, /* VFS: Delete inode from disk.
- // Called when i_count becomes
- // 0 and i_nlink is also 0. */
- //.write_super = NULL, /* Flush dirty super block to
- // disk. */
- //.sync_fs = NULL, /* ? */
- //.write_super_lockfs = NULL, /* ? */
- //.unlockfs = NULL, /* ? */
#endif /* NTFS_RW */
.put_super = ntfs_put_super, /* Syscall: umount. */
.statfs = ntfs_statfs, /* Syscall: statfs */
.remount_fs = ntfs_remount, /* Syscall: mount -o remount. */
- .clear_inode = ntfs_clear_big_inode, /* VFS: Called when an inode is
+ .evict_inode = ntfs_evict_big_inode, /* VFS: Called when an inode is
removed from memory. */
- //.umount_begin = NULL, /* Forced umount. */
.show_options = ntfs_show_options, /* Show mount options in
proc. */
};
@@ -2669,11 +2723,22 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
ntfs_volume *vol;
struct buffer_head *bh;
struct inode *tmp_ino;
- int result;
+ int blocksize, result;
+ /*
+ * We do a pretty difficult piece of bootstrap by reading the
+ * MFT (and other metadata) from disk into memory. We'll only
+ * release this metadata during umount, so the locking patterns
+ * observed during bootstrap do not count. So turn off the
+ * observation of locking patterns (strictly for this context
+ * only) while mounting NTFS. [The validator is still active
+ * otherwise, even for this context: it will for example record
+ * lock class registrations.]
+ */
+ lockdep_off();
ntfs_debug("Entering.");
#ifndef NTFS_RW
- sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+ sb->s_flags |= MS_RDONLY;
#endif /* ! NTFS_RW */
/* Allocate a new ntfs_volume and place it in sb->s_fs_info. */
sb->s_fs_info = kmalloc(sizeof(ntfs_volume), GFP_NOFS);
@@ -2682,6 +2747,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
if (!silent)
ntfs_error(sb, "Allocation of NTFS volume structure "
"failed. Aborting mount...");
+ lockdep_on();
return -ENOMEM;
}
/* Initialize ntfs_volume structure. */
@@ -2699,8 +2765,6 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
init_rwsem(&vol->mftbmp_lock);
init_rwsem(&vol->lcnbmp_lock);
- unlock_kernel();
-
/* By default, enable sparse support. */
NVolSetSparseEnabled(vol);
@@ -2708,60 +2772,85 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
if (!parse_options(vol, (char*)opt))
goto err_out_now;
+ /* We support sector sizes up to the PAGE_CACHE_SIZE. */
+ if (bdev_logical_block_size(sb->s_bdev) > PAGE_CACHE_SIZE) {
+ if (!silent)
+ ntfs_error(sb, "Device has unsupported sector size "
+ "(%i). The maximum supported sector "
+ "size on this architecture is %lu "
+ "bytes.",
+ bdev_logical_block_size(sb->s_bdev),
+ PAGE_CACHE_SIZE);
+ goto err_out_now;
+ }
/*
- * TODO: Fail safety check. In the future we should really be able to
- * cope with this being the case, but for now just bail out.
+ * Setup the device access block size to NTFS_BLOCK_SIZE or the hard
+ * sector size, whichever is bigger.
*/
- if (bdev_hardsect_size(sb->s_bdev) > NTFS_BLOCK_SIZE) {
+ blocksize = sb_min_blocksize(sb, NTFS_BLOCK_SIZE);
+ if (blocksize < NTFS_BLOCK_SIZE) {
if (!silent)
- ntfs_error(sb, "Device has unsupported hardsect_size.");
+ ntfs_error(sb, "Unable to set device block size.");
goto err_out_now;
}
-
- /* Setup the device access block size to NTFS_BLOCK_SIZE. */
- if (sb_set_blocksize(sb, NTFS_BLOCK_SIZE) != NTFS_BLOCK_SIZE) {
+ BUG_ON(blocksize != sb->s_blocksize);
+ ntfs_debug("Set device block size to %i bytes (block size bits %i).",
+ blocksize, sb->s_blocksize_bits);
+ /* Determine the size of the device in units of block_size bytes. */
+ if (!i_size_read(sb->s_bdev->bd_inode)) {
if (!silent)
- ntfs_error(sb, "Unable to set block size.");
+ ntfs_error(sb, "Unable to determine device size.");
goto err_out_now;
}
-
- /* Get the size of the device in units of NTFS_BLOCK_SIZE bytes. */
vol->nr_blocks = i_size_read(sb->s_bdev->bd_inode) >>
- NTFS_BLOCK_SIZE_BITS;
-
+ sb->s_blocksize_bits;
/* Read the boot sector and return unlocked buffer head to it. */
if (!(bh = read_ntfs_boot_sector(sb, silent))) {
if (!silent)
ntfs_error(sb, "Not an NTFS volume.");
goto err_out_now;
}
-
/*
- * Extract the data from the boot sector and setup the ntfs super block
+ * Extract the data from the boot sector and setup the ntfs volume
* using it.
*/
result = parse_ntfs_boot_sector(vol, (NTFS_BOOT_SECTOR*)bh->b_data);
-
- /* Initialize the cluster and mft allocators. */
- ntfs_setup_allocators(vol);
-
brelse(bh);
-
if (!result) {
if (!silent)
ntfs_error(sb, "Unsupported NTFS filesystem.");
goto err_out_now;
}
-
/*
- * TODO: When we start coping with sector sizes different from
- * NTFS_BLOCK_SIZE, we now probably need to set the blocksize of the
- * device (probably to NTFS_BLOCK_SIZE).
+ * If the boot sector indicates a sector size bigger than the current
+ * device block size, switch the device block size to the sector size.
+ * TODO: It may be possible to support this case even when the set
+ * below fails, we would just be breaking up the i/o for each sector
+ * into multiple blocks for i/o purposes but otherwise it should just
+ * work. However it is safer to leave disabled until someone hits this
+ * error message and then we can get them to try it without the setting
+ * so we know for sure that it works.
*/
-
+ if (vol->sector_size > blocksize) {
+ blocksize = sb_set_blocksize(sb, vol->sector_size);
+ if (blocksize != vol->sector_size) {
+ if (!silent)
+ ntfs_error(sb, "Unable to set device block "
+ "size to sector size (%i).",
+ vol->sector_size);
+ goto err_out_now;
+ }
+ BUG_ON(blocksize != sb->s_blocksize);
+ vol->nr_blocks = i_size_read(sb->s_bdev->bd_inode) >>
+ sb->s_blocksize_bits;
+ ntfs_debug("Changed device block size to %i bytes (block size "
+ "bits %i) to match volume sector size.",
+ blocksize, sb->s_blocksize_bits);
+ }
+ /* Initialize the cluster and mft allocators. */
+ ntfs_setup_allocators(vol);
/* Setup remaining fields in the super block. */
sb->s_magic = NTFS_SB_MAGIC;
-
/*
* Ntfs allows 63 bits for the file size, i.e. correct would be:
* sb->s_maxbytes = ~0ULL >> 1;
@@ -2771,9 +2860,8 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
* without overflowing the index or to 2^63 - 1, whichever is smaller.
*/
sb->s_maxbytes = MAX_LFS_FILESIZE;
-
+ /* Ntfs measures time in 100ns intervals. */
sb->s_time_gran = 100;
-
/*
* Now load the metadata required for the page cache and our address
* space operations to function. We do this by setting up a specialised
@@ -2795,7 +2883,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
ntfs_error(sb, "Failed to load essential metadata.");
goto iput_tmp_ino_err_out_now;
}
- down(&ntfs_lock);
+ mutex_lock(&ntfs_lock);
/*
* The current mount is a compression user if the cluster size is
* less than or equal 4kiB.
@@ -2806,7 +2894,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
ntfs_error(NULL, "Failed to allocate buffers "
"for compression engine.");
ntfs_nr_compression_users--;
- up(&ntfs_lock);
+ mutex_unlock(&ntfs_lock);
goto iput_tmp_ino_err_out_now;
}
}
@@ -2818,7 +2906,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
if (!default_upcase)
default_upcase = generate_default_upcase();
ntfs_nr_upcase_users++;
- up(&ntfs_lock);
+ mutex_unlock(&ntfs_lock);
/*
* From now on, ignore @silent parameter. If we fail below this line,
* it will be due to a corrupt fs or a system error, so we report it.
@@ -2831,19 +2919,20 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
ntfs_error(sb, "Failed to load system files.");
goto unl_upcase_iput_tmp_ino_err_out_now;
}
- if ((sb->s_root = d_alloc_root(vol->root_ino))) {
- /* We increment i_count simulating an ntfs_iget(). */
- atomic_inc(&vol->root_ino->i_count);
+
+ /* We grab a reference, simulating an ntfs_iget(). */
+ ihold(vol->root_ino);
+ if ((sb->s_root = d_make_root(vol->root_ino))) {
ntfs_debug("Exiting, status successful.");
/* Release the default upcase if it has no users. */
- down(&ntfs_lock);
+ mutex_lock(&ntfs_lock);
if (!--ntfs_nr_upcase_users && default_upcase) {
ntfs_free(default_upcase);
default_upcase = NULL;
}
- up(&ntfs_lock);
+ mutex_unlock(&ntfs_lock);
sb->s_export_op = &ntfs_export_ops;
- lock_kernel();
+ lockdep_on();
return 0;
}
ntfs_error(sb, "Failed to allocate root directory.");
@@ -2909,12 +2998,12 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
vol->attrdef = NULL;
}
vol->upcase_len = 0;
- down(&ntfs_lock);
+ mutex_lock(&ntfs_lock);
if (vol->upcase == default_upcase) {
ntfs_nr_upcase_users--;
vol->upcase = NULL;
}
- up(&ntfs_lock);
+ mutex_unlock(&ntfs_lock);
if (vol->upcase) {
ntfs_free(vol->upcase);
vol->upcase = NULL;
@@ -2929,40 +3018,25 @@ unl_upcase_iput_tmp_ino_err_out_now:
* Decrease the number of upcase users and destroy the global default
* upcase table if necessary.
*/
- down(&ntfs_lock);
+ mutex_lock(&ntfs_lock);
if (!--ntfs_nr_upcase_users && default_upcase) {
ntfs_free(default_upcase);
default_upcase = NULL;
}
if (vol->cluster_size <= 4096 && !--ntfs_nr_compression_users)
free_compression_buffers();
- up(&ntfs_lock);
+ mutex_unlock(&ntfs_lock);
iput_tmp_ino_err_out_now:
iput(tmp_ino);
if (vol->mft_ino && vol->mft_ino != tmp_ino)
iput(vol->mft_ino);
vol->mft_ino = NULL;
- /*
- * This is needed to get ntfs_clear_extent_inode() called for each
- * inode we have ever called ntfs_iget()/iput() on, otherwise we A)
- * leak resources and B) a subsequent mount fails automatically due to
- * ntfs_iget() never calling down into our ntfs_read_locked_inode()
- * method again... FIXME: Do we need to do this twice now because of
- * attribute inodes? I think not, so leave as is for now... (AIA)
- */
- if (invalidate_inodes(sb)) {
- ntfs_error(sb, "Busy inodes left. This is most likely a NTFS "
- "driver bug.");
- /* Copied from fs/super.c. I just love this message. (-; */
- printk("NTFS: Busy inodes after umount. Self-destruct in 5 "
- "seconds. Have a nice day...\n");
- }
/* Errors at this stage are irrelevant. */
err_out_now:
- lock_kernel();
sb->s_fs_info = NULL;
kfree(vol);
ntfs_debug("Failed, returning -EINVAL.");
+ lockdep_on();
return -EINVAL;
}
@@ -2971,46 +3045,44 @@ err_out_now:
* strings of the maximum length allowed by NTFS, which is NTFS_MAX_NAME_LEN
* (255) Unicode characters + a terminating NULL Unicode character.
*/
-kmem_cache_t *ntfs_name_cache;
+struct kmem_cache *ntfs_name_cache;
/* Slab caches for efficient allocation/deallocation of inodes. */
-kmem_cache_t *ntfs_inode_cache;
-kmem_cache_t *ntfs_big_inode_cache;
+struct kmem_cache *ntfs_inode_cache;
+struct kmem_cache *ntfs_big_inode_cache;
/* Init once constructor for the inode slab cache. */
-static void ntfs_big_inode_init_once(void *foo, kmem_cache_t *cachep,
- unsigned long flags)
+static void ntfs_big_inode_init_once(void *foo)
{
ntfs_inode *ni = (ntfs_inode *)foo;
- if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
- SLAB_CTOR_CONSTRUCTOR)
- inode_init_once(VFS_I(ni));
+ inode_init_once(VFS_I(ni));
}
/*
* Slab caches to optimize allocations and deallocations of attribute search
* contexts and index contexts, respectively.
*/
-kmem_cache_t *ntfs_attr_ctx_cache;
-kmem_cache_t *ntfs_index_ctx_cache;
+struct kmem_cache *ntfs_attr_ctx_cache;
+struct kmem_cache *ntfs_index_ctx_cache;
-/* Driver wide semaphore. */
-DECLARE_MUTEX(ntfs_lock);
+/* Driver wide mutex. */
+DEFINE_MUTEX(ntfs_lock);
-static struct super_block *ntfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *ntfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super);
+ return mount_bdev(fs_type, flags, dev_name, data, ntfs_fill_super);
}
static struct file_system_type ntfs_fs_type = {
.owner = THIS_MODULE,
.name = "ntfs",
- .get_sb = ntfs_get_sb,
+ .mount = ntfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
+MODULE_ALIAS_FS("ntfs");
/* Stable names for the slab caches. */
static const char ntfs_index_ctx_cache_name[] = "ntfs_index_ctx_cache";
@@ -3024,7 +3096,7 @@ static int __init init_ntfs_fs(void)
int err = 0;
/* This may be ugly but it results in pretty output so who cares. (-8 */
- printk(KERN_INFO "NTFS driver " NTFS_VERSION " [Flags: R/"
+ pr_info("driver " NTFS_VERSION " [Flags: R/"
#ifdef NTFS_RW
"W"
#else
@@ -3042,53 +3114,49 @@ static int __init init_ntfs_fs(void)
ntfs_index_ctx_cache = kmem_cache_create(ntfs_index_ctx_cache_name,
sizeof(ntfs_index_context), 0 /* offset */,
- SLAB_HWCACHE_ALIGN, NULL /* ctor */, NULL /* dtor */);
+ SLAB_HWCACHE_ALIGN, NULL /* ctor */);
if (!ntfs_index_ctx_cache) {
- printk(KERN_CRIT "NTFS: Failed to create %s!\n",
- ntfs_index_ctx_cache_name);
+ pr_crit("Failed to create %s!\n", ntfs_index_ctx_cache_name);
goto ictx_err_out;
}
ntfs_attr_ctx_cache = kmem_cache_create(ntfs_attr_ctx_cache_name,
sizeof(ntfs_attr_search_ctx), 0 /* offset */,
- SLAB_HWCACHE_ALIGN, NULL /* ctor */, NULL /* dtor */);
+ SLAB_HWCACHE_ALIGN, NULL /* ctor */);
if (!ntfs_attr_ctx_cache) {
- printk(KERN_CRIT "NTFS: Failed to create %s!\n",
- ntfs_attr_ctx_cache_name);
+ pr_crit("NTFS: Failed to create %s!\n",
+ ntfs_attr_ctx_cache_name);
goto actx_err_out;
}
ntfs_name_cache = kmem_cache_create(ntfs_name_cache_name,
(NTFS_MAX_NAME_LEN+1) * sizeof(ntfschar), 0,
- SLAB_HWCACHE_ALIGN, NULL, NULL);
+ SLAB_HWCACHE_ALIGN, NULL);
if (!ntfs_name_cache) {
- printk(KERN_CRIT "NTFS: Failed to create %s!\n",
- ntfs_name_cache_name);
+ pr_crit("Failed to create %s!\n", ntfs_name_cache_name);
goto name_err_out;
}
ntfs_inode_cache = kmem_cache_create(ntfs_inode_cache_name,
sizeof(ntfs_inode), 0,
- SLAB_RECLAIM_ACCOUNT, NULL, NULL);
+ SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
if (!ntfs_inode_cache) {
- printk(KERN_CRIT "NTFS: Failed to create %s!\n",
- ntfs_inode_cache_name);
+ pr_crit("Failed to create %s!\n", ntfs_inode_cache_name);
goto inode_err_out;
}
ntfs_big_inode_cache = kmem_cache_create(ntfs_big_inode_cache_name,
sizeof(big_ntfs_inode), 0,
- SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
- ntfs_big_inode_init_once, NULL);
+ SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
+ ntfs_big_inode_init_once);
if (!ntfs_big_inode_cache) {
- printk(KERN_CRIT "NTFS: Failed to create %s!\n",
- ntfs_big_inode_cache_name);
+ pr_crit("Failed to create %s!\n", ntfs_big_inode_cache_name);
goto big_inode_err_out;
}
/* Register the ntfs sysctls. */
err = ntfs_sysctl(1);
if (err) {
- printk(KERN_CRIT "NTFS: Failed to register NTFS sysctls!\n");
+ pr_crit("Failed to register NTFS sysctls!\n");
goto sysctl_err_out;
}
@@ -3097,8 +3165,10 @@ static int __init init_ntfs_fs(void)
ntfs_debug("NTFS driver registered successfully.");
return 0; /* Success! */
}
- printk(KERN_CRIT "NTFS: Failed to register NTFS filesystem driver!\n");
+ pr_crit("Failed to register NTFS filesystem driver!\n");
+ /* Unregister the ntfs sysctls. */
+ ntfs_sysctl(0);
sysctl_err_out:
kmem_cache_destroy(ntfs_big_inode_cache);
big_inode_err_out:
@@ -3111,8 +3181,7 @@ actx_err_out:
kmem_cache_destroy(ntfs_index_ctx_cache);
ictx_err_out:
if (!err) {
- printk(KERN_CRIT "NTFS: Aborting NTFS filesystem driver "
- "registration...\n");
+ pr_crit("Aborting NTFS filesystem driver registration...\n");
err = -ENOMEM;
}
return err;
@@ -3120,42 +3189,30 @@ ictx_err_out:
static void __exit exit_ntfs_fs(void)
{
- int err = 0;
-
ntfs_debug("Unregistering NTFS driver.");
unregister_filesystem(&ntfs_fs_type);
- if (kmem_cache_destroy(ntfs_big_inode_cache) && (err = 1))
- printk(KERN_CRIT "NTFS: Failed to destory %s.\n",
- ntfs_big_inode_cache_name);
- if (kmem_cache_destroy(ntfs_inode_cache) && (err = 1))
- printk(KERN_CRIT "NTFS: Failed to destory %s.\n",
- ntfs_inode_cache_name);
- if (kmem_cache_destroy(ntfs_name_cache) && (err = 1))
- printk(KERN_CRIT "NTFS: Failed to destory %s.\n",
- ntfs_name_cache_name);
- if (kmem_cache_destroy(ntfs_attr_ctx_cache) && (err = 1))
- printk(KERN_CRIT "NTFS: Failed to destory %s.\n",
- ntfs_attr_ctx_cache_name);
- if (kmem_cache_destroy(ntfs_index_ctx_cache) && (err = 1))
- printk(KERN_CRIT "NTFS: Failed to destory %s.\n",
- ntfs_index_ctx_cache_name);
- if (err)
- printk(KERN_CRIT "NTFS: This causes memory to leak! There is "
- "probably a BUG in the driver! Please report "
- "you saw this message to "
- "linux-ntfs-dev@lists.sourceforge.net\n");
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
+ kmem_cache_destroy(ntfs_big_inode_cache);
+ kmem_cache_destroy(ntfs_inode_cache);
+ kmem_cache_destroy(ntfs_name_cache);
+ kmem_cache_destroy(ntfs_attr_ctx_cache);
+ kmem_cache_destroy(ntfs_index_ctx_cache);
/* Unregister the ntfs sysctls. */
ntfs_sysctl(0);
}
-MODULE_AUTHOR("Anton Altaparmakov <aia21@cantab.net>");
-MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2005 Anton Altaparmakov");
+MODULE_AUTHOR("Anton Altaparmakov <anton@tuxera.com>");
+MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc.");
MODULE_VERSION(NTFS_VERSION);
MODULE_LICENSE("GPL");
#ifdef DEBUG
-module_param(debug_msgs, bool, 0);
+module_param(debug_msgs, bint, 0);
MODULE_PARM_DESC(debug_msgs, "Enable debug messages.");
#endif
diff --git a/fs/ntfs/sysctl.c b/fs/ntfs/sysctl.c
index 1c23138d00b..a503156ec15 100644
--- a/fs/ntfs/sysctl.c
+++ b/fs/ntfs/sysctl.c
@@ -1,7 +1,7 @@
/*
* sysctl.c - Code for sysctl handling in NTFS Linux kernel driver. Part of
* the Linux-NTFS project. Adapted from the old NTFS driver,
- * Copyright (C) 1997 Martin von Löwis, Régis Duchesne
+ * Copyright (C) 1997 Martin von Löwis, Régis Duchesne
*
* Copyright (c) 2002-2005 Anton Altaparmakov
*
@@ -33,24 +33,30 @@
#include "sysctl.h"
#include "debug.h"
-#define FS_NTFS 1
-
/* Definition of the ntfs sysctl. */
-static ctl_table ntfs_sysctls[] = {
- { FS_NTFS, "ntfs-debug", /* Binary and text IDs. */
- &debug_msgs,sizeof(debug_msgs), /* Data pointer and size. */
- 0644, NULL, &proc_dointvec }, /* Mode, child, proc handler. */
- { 0 }
+static struct ctl_table ntfs_sysctls[] = {
+ {
+ .procname = "ntfs-debug",
+ .data = &debug_msgs, /* Data pointer and size. */
+ .maxlen = sizeof(debug_msgs),
+ .mode = 0644, /* Mode, proc handler. */
+ .proc_handler = proc_dointvec
+ },
+ {}
};
/* Define the parent directory /proc/sys/fs. */
-static ctl_table sysctls_root[] = {
- { CTL_FS, "fs", NULL, 0, 0555, ntfs_sysctls },
- { 0 }
+static struct ctl_table sysctls_root[] = {
+ {
+ .procname = "fs",
+ .mode = 0555,
+ .child = ntfs_sysctls
+ },
+ {}
};
/* Storage for the sysctls header. */
-static struct ctl_table_header *sysctls_root_table = NULL;
+static struct ctl_table_header *sysctls_root_table;
/**
* ntfs_sysctl - add or remove the debug sysctl
@@ -62,17 +68,9 @@ int ntfs_sysctl(int add)
{
if (add) {
BUG_ON(sysctls_root_table);
- sysctls_root_table = register_sysctl_table(sysctls_root, 0);
+ sysctls_root_table = register_sysctl_table(sysctls_root);
if (!sysctls_root_table)
return -ENOMEM;
-#ifdef CONFIG_PROC_FS
- /*
- * If the proc filesystem is in use and we are a module, need
- * to set the owner of our proc entry to our module. In the
- * non-modular case, THIS_MODULE is NULL, so this is ok.
- */
- ntfs_sysctls[0].de->owner = THIS_MODULE;
-#endif
} else {
BUG_ON(!sysctls_root_table);
unregister_sysctl_table(sysctls_root_table);
diff --git a/fs/ntfs/sysctl.h b/fs/ntfs/sysctl.h
index c8064cae8f1..d4f8ce920d9 100644
--- a/fs/ntfs/sysctl.h
+++ b/fs/ntfs/sysctl.h
@@ -1,7 +1,7 @@
/*
* sysctl.h - Defines for sysctl handling in NTFS Linux kernel driver. Part of
* the Linux-NTFS project. Adapted from the old NTFS driver,
- * Copyright (C) 1997 Martin von Löwis, Régis Duchesne
+ * Copyright (C) 1997 Martin von Löwis, Régis Duchesne
*
* Copyright (c) 2002-2004 Anton Altaparmakov
*
@@ -24,7 +24,6 @@
#ifndef _LINUX_NTFS_SYSCTL_H
#define _LINUX_NTFS_SYSCTL_H
-#include <linux/config.h>
#if defined(DEBUG) && defined(CONFIG_SYSCTL)
diff --git a/fs/ntfs/types.h b/fs/ntfs/types.h
index 6e4a7e3343f..8c8053b6698 100644
--- a/fs/ntfs/types.h
+++ b/fs/ntfs/types.h
@@ -62,11 +62,6 @@ typedef s64 USN;
typedef sle64 leUSN;
typedef enum {
- FALSE = 0,
- TRUE = 1
-} BOOL;
-
-typedef enum {
CASE_SENSITIVE = 0,
IGNORE_CASE = 1,
} IGNORE_CASE_BOOL;
diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c
index 19c42e231b4..005ca4b0f13 100644
--- a/fs/ntfs/unistr.c
+++ b/fs/ntfs/unistr.c
@@ -1,7 +1,7 @@
/*
* unistr.c - NTFS Unicode string handling. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2006 Anton Altaparmakov
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
@@ -19,6 +19,8 @@
* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <linux/slab.h>
+
#include "types.h"
#include "debug.h"
#include "ntfs.h"
@@ -59,16 +61,16 @@ static const u8 legal_ansi_char_array[0x40] = {
* @upcase: upcase table (only if @ic == IGNORE_CASE)
* @upcase_size: length in Unicode characters of @upcase (if present)
*
- * Compare the names @s1 and @s2 and return TRUE (1) if the names are
- * identical, or FALSE (0) if they are not identical. If @ic is IGNORE_CASE,
+ * Compare the names @s1 and @s2 and return 'true' (1) if the names are
+ * identical, or 'false' (0) if they are not identical. If @ic is IGNORE_CASE,
* the @upcase table is used to performa a case insensitive comparison.
*/
-BOOL ntfs_are_names_equal(const ntfschar *s1, size_t s1_len,
+bool ntfs_are_names_equal(const ntfschar *s1, size_t s1_len,
const ntfschar *s2, size_t s2_len, const IGNORE_CASE_BOOL ic,
const ntfschar *upcase, const u32 upcase_size)
{
if (s1_len != s2_len)
- return FALSE;
+ return false;
if (ic == CASE_SENSITIVE)
return !ntfs_ucsncmp(s1, s2, s1_len);
return !ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size);
@@ -242,7 +244,7 @@ int ntfs_file_compare_values(FILE_NAME_ATTR *file_name_attr1,
* map dictates, into a little endian, 2-byte Unicode string.
*
* This function allocates the string and the caller is responsible for
- * calling kmem_cache_free(ntfs_name_cache, @outs); when finished with it.
+ * calling kmem_cache_free(ntfs_name_cache, *@outs); when finished with it.
*
* On success the function returns the number of Unicode characters written to
* the output string *@outs (>= 0), not counting the terminating Unicode NULL
@@ -262,37 +264,48 @@ int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins,
wchar_t wc;
int i, o, wc_len;
- /* We don't trust outside sources. */
- if (ins) {
- ucs = kmem_cache_alloc(ntfs_name_cache, SLAB_NOFS);
- if (ucs) {
+ /* We do not trust outside sources. */
+ if (likely(ins)) {
+ ucs = kmem_cache_alloc(ntfs_name_cache, GFP_NOFS);
+ if (likely(ucs)) {
for (i = o = 0; i < ins_len; i += wc_len) {
wc_len = nls->char2uni(ins + i, ins_len - i,
&wc);
- if (wc_len >= 0) {
- if (wc) {
+ if (likely(wc_len >= 0 &&
+ o < NTFS_MAX_NAME_LEN)) {
+ if (likely(wc)) {
ucs[o++] = cpu_to_le16(wc);
continue;
- } /* else (!wc) */
+ } /* else if (!wc) */
break;
- } /* else (wc_len < 0) */
- goto conversion_err;
+ } /* else if (wc_len < 0 ||
+ o >= NTFS_MAX_NAME_LEN) */
+ goto name_err;
}
ucs[o] = 0;
*outs = ucs;
return o;
- } /* else (!ucs) */
- ntfs_error(vol->sb, "Failed to allocate name from "
- "ntfs_name_cache!");
+ } /* else if (!ucs) */
+ ntfs_error(vol->sb, "Failed to allocate buffer for converted "
+ "name from ntfs_name_cache.");
return -ENOMEM;
- } /* else (!ins) */
- ntfs_error(NULL, "Received NULL pointer.");
+ } /* else if (!ins) */
+ ntfs_error(vol->sb, "Received NULL pointer.");
return -EINVAL;
-conversion_err:
- ntfs_error(vol->sb, "Name using character set %s contains characters "
- "that cannot be converted to Unicode.", nls->charset);
+name_err:
kmem_cache_free(ntfs_name_cache, ucs);
- return -EILSEQ;
+ if (wc_len < 0) {
+ ntfs_error(vol->sb, "Name using character set %s contains "
+ "characters that cannot be converted to "
+ "Unicode.", nls->charset);
+ i = -EILSEQ;
+ } else /* if (o >= NTFS_MAX_NAME_LEN) */ {
+ ntfs_error(vol->sb, "Name is too long (maximum length for a "
+ "name on NTFS is %d Unicode characters.",
+ NTFS_MAX_NAME_LEN);
+ i = -ENAMETOOLONG;
+ }
+ return i;
}
/**
@@ -337,7 +350,7 @@ int ntfs_ucstonls(const ntfs_volume *vol, const ntfschar *ins,
}
if (!ns) {
ns_len = ins_len * NLS_MAX_CHARSET_SIZE;
- ns = (unsigned char*)kmalloc(ns_len + 1, GFP_NOFS);
+ ns = kmalloc(ns_len + 1, GFP_NOFS);
if (!ns)
goto mem_err_out;
}
@@ -352,7 +365,7 @@ retry: wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o,
else if (wc == -ENAMETOOLONG && ns != *outs) {
unsigned char *tc;
/* Grow in multiples of 64 bytes. */
- tc = (unsigned char*)kmalloc((ns_len + 64) &
+ tc = kmalloc((ns_len + 64) &
~63, GFP_NOFS);
if (tc) {
memcpy(tc, ns, ns_len);
@@ -372,7 +385,8 @@ retry: wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o,
return -EINVAL;
conversion_err:
ntfs_error(vol->sb, "Unicode name contains characters that cannot be "
- "converted to character set %s.", nls->charset);
+ "converted to character set %s. You might want to "
+ "try to use the mount option nls=utf8.", nls->charset);
if (ns != *outs)
kfree(ns);
if (wc != -ENAMETOOLONG)
diff --git a/fs/ntfs/upcase.c b/fs/ntfs/upcase.c
index 879cdf1d5bd..e2f72ca9803 100644
--- a/fs/ntfs/upcase.c
+++ b/fs/ntfs/upcase.c
@@ -3,10 +3,7 @@
* Part of the Linux-NTFS project.
*
* Copyright (c) 2001 Richard Russon <ntfs@flatcap.org>
- * Copyright (c) 2001-2004 Anton Altaparmakov
- *
- * Modified for mkntfs inclusion 9 June 2001 by Anton Altaparmakov.
- * Modified for kernel inclusion 10 September 2001 by Anton Altparmakov.
+ * Copyright (c) 2001-2006 Anton Altaparmakov
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -75,15 +72,15 @@ ntfschar *generate_default_upcase(void)
if (!uc)
return uc;
memset(uc, 0, default_upcase_len * sizeof(ntfschar));
+ /* Generate the little endian Unicode upcase table used by ntfs. */
for (i = 0; i < default_upcase_len; i++)
uc[i] = cpu_to_le16(i);
for (r = 0; uc_run_table[r][0]; r++)
for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++)
- uc[i] = cpu_to_le16((le16_to_cpu(uc[i]) +
- uc_run_table[r][2]));
+ le16_add_cpu(&uc[i], uc_run_table[r][2]);
for (r = 0; uc_dup_table[r][0]; r++)
for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2)
- uc[i + 1] = cpu_to_le16(le16_to_cpu(uc[i + 1]) - 1);
+ le16_add_cpu(&uc[i + 1], -1);
for (r = 0; uc_word_table[r][0]; r++)
uc[uc_word_table[r][0]] = cpu_to_le16(uc_word_table[r][1]);
return uc;
diff --git a/fs/ntfs/usnjrnl.c b/fs/ntfs/usnjrnl.c
index 77773240d13..b2bc0d55b03 100644
--- a/fs/ntfs/usnjrnl.c
+++ b/fs/ntfs/usnjrnl.c
@@ -39,12 +39,12 @@
* @vol: ntfs volume on which to stamp the transaction log
*
* Stamp the transaction log ($UsnJrnl) on the ntfs volume @vol and return
- * TRUE on success and FALSE on error.
+ * 'true' on success and 'false' on error.
*
* This function assumes that the transaction log has already been loaded and
* consistency checked by a call to fs/ntfs/super.c::load_and_init_usnjrnl().
*/
-BOOL ntfs_stamp_usnjrnl(ntfs_volume *vol)
+bool ntfs_stamp_usnjrnl(ntfs_volume *vol)
{
ntfs_debug("Entering.");
if (likely(!NVolUsnJrnlStamped(vol))) {
@@ -56,7 +56,7 @@ BOOL ntfs_stamp_usnjrnl(ntfs_volume *vol)
if (IS_ERR(page)) {
ntfs_error(vol->sb, "Failed to read from "
"$UsnJrnl/$DATA/$Max attribute.");
- return FALSE;
+ return false;
}
uh = (USN_HEADER*)page_address(page);
stamp = get_current_ntfs_time();
@@ -78,7 +78,7 @@ BOOL ntfs_stamp_usnjrnl(ntfs_volume *vol)
NVolSetUsnJrnlStamped(vol);
}
ntfs_debug("Done.");
- return TRUE;
+ return true;
}
#endif /* NTFS_RW */
diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h
index ff988b0deb4..00d8e6bd7c3 100644
--- a/fs/ntfs/usnjrnl.h
+++ b/fs/ntfs/usnjrnl.h
@@ -113,30 +113,30 @@ typedef struct {
* Reason flags (32-bit). Cumulative flags describing the change(s) to the
* file since it was last opened. I think the names speak for themselves but
* if you disagree check out the descriptions in the Linux NTFS project NTFS
- * documentation: http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html
+ * documentation: http://www.linux-ntfs.org/
*/
enum {
- USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001),
- USN_REASON_DATA_EXTEND = const_cpu_to_le32(0x00000002),
- USN_REASON_DATA_TRUNCATION = const_cpu_to_le32(0x00000004),
- USN_REASON_NAMED_DATA_OVERWRITE = const_cpu_to_le32(0x00000010),
- USN_REASON_NAMED_DATA_EXTEND = const_cpu_to_le32(0x00000020),
- USN_REASON_NAMED_DATA_TRUNCATION= const_cpu_to_le32(0x00000040),
- USN_REASON_FILE_CREATE = const_cpu_to_le32(0x00000100),
- USN_REASON_FILE_DELETE = const_cpu_to_le32(0x00000200),
- USN_REASON_EA_CHANGE = const_cpu_to_le32(0x00000400),
- USN_REASON_SECURITY_CHANGE = const_cpu_to_le32(0x00000800),
- USN_REASON_RENAME_OLD_NAME = const_cpu_to_le32(0x00001000),
- USN_REASON_RENAME_NEW_NAME = const_cpu_to_le32(0x00002000),
- USN_REASON_INDEXABLE_CHANGE = const_cpu_to_le32(0x00004000),
- USN_REASON_BASIC_INFO_CHANGE = const_cpu_to_le32(0x00008000),
- USN_REASON_HARD_LINK_CHANGE = const_cpu_to_le32(0x00010000),
- USN_REASON_COMPRESSION_CHANGE = const_cpu_to_le32(0x00020000),
- USN_REASON_ENCRYPTION_CHANGE = const_cpu_to_le32(0x00040000),
- USN_REASON_OBJECT_ID_CHANGE = const_cpu_to_le32(0x00080000),
- USN_REASON_REPARSE_POINT_CHANGE = const_cpu_to_le32(0x00100000),
- USN_REASON_STREAM_CHANGE = const_cpu_to_le32(0x00200000),
- USN_REASON_CLOSE = const_cpu_to_le32(0x80000000),
+ USN_REASON_DATA_OVERWRITE = cpu_to_le32(0x00000001),
+ USN_REASON_DATA_EXTEND = cpu_to_le32(0x00000002),
+ USN_REASON_DATA_TRUNCATION = cpu_to_le32(0x00000004),
+ USN_REASON_NAMED_DATA_OVERWRITE = cpu_to_le32(0x00000010),
+ USN_REASON_NAMED_DATA_EXTEND = cpu_to_le32(0x00000020),
+ USN_REASON_NAMED_DATA_TRUNCATION= cpu_to_le32(0x00000040),
+ USN_REASON_FILE_CREATE = cpu_to_le32(0x00000100),
+ USN_REASON_FILE_DELETE = cpu_to_le32(0x00000200),
+ USN_REASON_EA_CHANGE = cpu_to_le32(0x00000400),
+ USN_REASON_SECURITY_CHANGE = cpu_to_le32(0x00000800),
+ USN_REASON_RENAME_OLD_NAME = cpu_to_le32(0x00001000),
+ USN_REASON_RENAME_NEW_NAME = cpu_to_le32(0x00002000),
+ USN_REASON_INDEXABLE_CHANGE = cpu_to_le32(0x00004000),
+ USN_REASON_BASIC_INFO_CHANGE = cpu_to_le32(0x00008000),
+ USN_REASON_HARD_LINK_CHANGE = cpu_to_le32(0x00010000),
+ USN_REASON_COMPRESSION_CHANGE = cpu_to_le32(0x00020000),
+ USN_REASON_ENCRYPTION_CHANGE = cpu_to_le32(0x00040000),
+ USN_REASON_OBJECT_ID_CHANGE = cpu_to_le32(0x00080000),
+ USN_REASON_REPARSE_POINT_CHANGE = cpu_to_le32(0x00100000),
+ USN_REASON_STREAM_CHANGE = cpu_to_le32(0x00200000),
+ USN_REASON_CLOSE = cpu_to_le32(0x80000000),
};
typedef le32 USN_REASON_FLAGS;
@@ -145,12 +145,12 @@ typedef le32 USN_REASON_FLAGS;
* Source info flags (32-bit). Information about the source of the change(s)
* to the file. For detailed descriptions of what these mean, see the Linux
* NTFS project NTFS documentation:
- * http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html
+ * http://www.linux-ntfs.org/
*/
enum {
- USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001),
- USN_SOURCE_AUXILIARY_DATA = const_cpu_to_le32(0x00000002),
- USN_SOURCE_REPLICATION_MANAGEMENT = const_cpu_to_le32(0x00000004),
+ USN_SOURCE_DATA_MANAGEMENT = cpu_to_le32(0x00000001),
+ USN_SOURCE_AUXILIARY_DATA = cpu_to_le32(0x00000002),
+ USN_SOURCE_REPLICATION_MANAGEMENT = cpu_to_le32(0x00000004),
};
typedef le32 USN_SOURCE_INFO_FLAGS;
@@ -198,7 +198,7 @@ typedef struct {
/* sizeof() = 60 (0x3c) bytes */
} __attribute__ ((__packed__)) USN_RECORD;
-extern BOOL ntfs_stamp_usnjrnl(ntfs_volume *vol);
+extern bool ntfs_stamp_usnjrnl(ntfs_volume *vol);
#endif /* NTFS_RW */
diff --git a/fs/ntfs/volume.h b/fs/ntfs/volume.h
index 375cd20a9f6..4f579b02bc7 100644
--- a/fs/ntfs/volume.h
+++ b/fs/ntfs/volume.h
@@ -2,7 +2,7 @@
* volume.h - Defines for volume structures in NTFS Linux kernel driver. Part
* of the Linux-NTFS project.
*
- * Copyright (c) 2001-2005 Anton Altaparmakov
+ * Copyright (c) 2001-2006 Anton Altaparmakov
* Copyright (c) 2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -25,6 +25,7 @@
#define _LINUX_NTFS_VOLUME_H
#include <linux/rwsem.h>
+#include <linux/uidgid.h>
#include "types.h"
#include "layout.h"
@@ -41,17 +42,15 @@ typedef struct {
* structure has stabilized... (AIA)
*/
/* Device specifics. */
- struct super_block *sb; /* Pointer back to the super_block,
- so we don't have to get the offset
- every time. */
- LCN nr_blocks; /* Number of NTFS_BLOCK_SIZE bytes
+ struct super_block *sb; /* Pointer back to the super_block. */
+ LCN nr_blocks; /* Number of sb->s_blocksize bytes
sized blocks on the device. */
/* Configuration provided by user at mount time. */
unsigned long flags; /* Miscellaneous flags, see below. */
- uid_t uid; /* uid that files will be mounted as. */
- gid_t gid; /* gid that files will be mounted as. */
- mode_t fmask; /* The mask for file permissions. */
- mode_t dmask; /* The mask for directory
+ kuid_t uid; /* uid that files will be mounted as. */
+ kgid_t gid; /* gid that files will be mounted as. */
+ umode_t fmask; /* The mask for file permissions. */
+ umode_t dmask; /* The mask for directory
permissions. */
u8 mft_zone_multiplier; /* Initial mft zone multiplier. */
u8 on_errors; /* What to do on filesystem errors. */
@@ -141,8 +140,8 @@ typedef enum {
NV_ShowSystemFiles, /* 1: Return system files in ntfs_readdir(). */
NV_CaseSensitive, /* 1: Treat file names as case sensitive and
create filenames in the POSIX namespace.
- Otherwise be case insensitive and create
- file names in WIN32 namespace. */
+ Otherwise be case insensitive but still
+ create file names in POSIX namespace. */
NV_LogFileEmpty, /* 1: $LogFile journal is empty. */
NV_QuotaOutOfDate, /* 1: $Quota is out of date. */
NV_UsnJrnlStamped, /* 1: $UsnJrnl has been stamped. */
@@ -153,7 +152,7 @@ typedef enum {
* Macro tricks to expand the NVolFoo(), NVolSetFoo(), and NVolClearFoo()
* functions.
*/
-#define NVOL_FNS(flag) \
+#define DEFINE_NVOL_BIT_OPS(flag) \
static inline int NVol##flag(ntfs_volume *vol) \
{ \
return test_bit(NV_##flag, &(vol)->flags); \
@@ -168,12 +167,12 @@ static inline void NVolClear##flag(ntfs_volume *vol) \
}
/* Emit the ntfs volume bitops functions. */
-NVOL_FNS(Errors)
-NVOL_FNS(ShowSystemFiles)
-NVOL_FNS(CaseSensitive)
-NVOL_FNS(LogFileEmpty)
-NVOL_FNS(QuotaOutOfDate)
-NVOL_FNS(UsnJrnlStamped)
-NVOL_FNS(SparseEnabled)
+DEFINE_NVOL_BIT_OPS(Errors)
+DEFINE_NVOL_BIT_OPS(ShowSystemFiles)
+DEFINE_NVOL_BIT_OPS(CaseSensitive)
+DEFINE_NVOL_BIT_OPS(LogFileEmpty)
+DEFINE_NVOL_BIT_OPS(QuotaOutOfDate)
+DEFINE_NVOL_BIT_OPS(UsnJrnlStamped)
+DEFINE_NVOL_BIT_OPS(SparseEnabled)
#endif /* _LINUX_NTFS_VOLUME_H */