diff options
Diffstat (limited to 'fs')
69 files changed, 1328 insertions, 683 deletions
diff --git a/fs/Makefile b/fs/Makefile index 2168c902d5c..d9f8afe6f0c 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -81,8 +81,6 @@ obj-$(CONFIG_HUGETLBFS) += hugetlbfs/ obj-$(CONFIG_CODA_FS) += coda/ obj-$(CONFIG_MINIX_FS) += minix/ obj-$(CONFIG_FAT_FS) += fat/ -obj-$(CONFIG_MSDOS_FS) += msdos/ -obj-$(CONFIG_VFAT_FS) += vfat/ obj-$(CONFIG_BFS_FS) += bfs/ obj-$(CONFIG_ISO9660_FS) += isofs/ obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 625abf5422e..33bf8cbfd05 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -128,9 +128,10 @@ static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) */ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) { - int err = -EINVAL; + int err; - if (check_dev_ioctl_version(cmd, param)) { + err = check_dev_ioctl_version(cmd, param); + if (err) { AUTOFS_WARN("invalid device control module version " "supplied for cmd(0x%08x)", cmd); goto out; diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index cde2f8e8935..4b6fb3f628c 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -56,12 +56,23 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) mntget(mnt); dget(dentry); - if (!autofs4_follow_mount(&mnt, &dentry)) + if (!follow_down(&mnt, &dentry)) goto done; - /* This is an autofs submount, we can't expire it */ - if (is_autofs4_dentry(dentry)) - goto done; + if (is_autofs4_dentry(dentry)) { + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + + /* This is an autofs submount, we can't expire it */ + if (sbi->type == AUTOFS_TYPE_INDIRECT) + goto done; + + /* + * Otherwise it's an offset mount and we need to check + * if we can umount its mount, if there is one. + */ + if (!d_mountpoint(dentry)) + goto done; + } /* Update the expiry counter if fs is busy */ if (!may_umount_tree(mnt)) { diff --git a/fs/block_dev.c b/fs/block_dev.c index 88a776fa0ef..db831efbdbb 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -986,7 +986,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) { struct gendisk *disk; - struct hd_struct *part = NULL; int ret; int partno; int perm = 0; @@ -1004,24 +1003,25 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) return ret; } - ret = -ENXIO; - lock_kernel(); + ret = -ENXIO; disk = get_gendisk(bdev->bd_dev, &partno); if (!disk) goto out_unlock_kernel; - part = disk_get_part(disk, partno); - if (!part) - goto out_unlock_kernel; mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { bdev->bd_disk = disk; - bdev->bd_part = part; bdev->bd_contains = bdev; if (!partno) { struct backing_dev_info *bdi; + + ret = -ENXIO; + bdev->bd_part = disk_get_part(disk, partno); + if (!bdev->bd_part) + goto out_clear; + if (disk->fops->open) { ret = disk->fops->open(bdev, mode); if (ret) @@ -1049,18 +1049,17 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_contains = whole; bdev->bd_inode->i_data.backing_dev_info = whole->bd_inode->i_data.backing_dev_info; + bdev->bd_part = disk_get_part(disk, partno); if (!(disk->flags & GENHD_FL_UP) || - !part || !part->nr_sects) { + !bdev->bd_part || !bdev->bd_part->nr_sects) { ret = -ENXIO; goto out_clear; } - bd_set_size(bdev, (loff_t)part->nr_sects << 9); + bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); } } else { - disk_put_part(part); put_disk(disk); module_put(disk->fops->owner); - part = NULL; disk = NULL; if (bdev->bd_contains == bdev) { if (bdev->bd_disk->fops->open) { @@ -1080,6 +1079,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) return 0; out_clear: + disk_put_part(bdev->bd_part); bdev->bd_disk = NULL; bdev->bd_part = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; @@ -1091,7 +1091,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) out_unlock_kernel: unlock_kernel(); - disk_put_part(part); if (disk) module_put(disk->fops->owner); put_disk(disk); diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 8f528ea24c4..8855331b2fb 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -4,7 +4,11 @@ Various fixes to make delete of open files behavior more predictable (when delete of an open file fails we mark the file as "delete-on-close" in a way that more servers accept, but only if we can first rename the file to a temporary name). Add experimental support for more safely -handling fcntl(F_SETLEASE). +handling fcntl(F_SETLEASE). Convert cifs to using blocking tcp +sends, and also let tcp autotune the socket send and receive buffers. +This reduces the number of EAGAIN errors returned by TCP/IP in +high stress workloads (and the number of retries on socket writes +when sending large SMBWriteX requests). Version 1.54 ------------ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index c791e5b5a91..1cb1189f24e 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -141,6 +141,8 @@ struct TCP_Server_Info { char versionMajor; char versionMinor; bool svlocal:1; /* local server or remote */ + bool noblocksnd; /* use blocking sendmsg */ + bool noautotune; /* do not autotune send buf sizes */ atomic_t socketUseCount; /* number of open cifs sessions on socket */ atomic_t inFlight; /* number of requests on the wire to server */ #ifdef CONFIG_CIFS_STATS2 diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 0cff7fe986e..6f21ecb85ce 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -36,7 +36,7 @@ extern void cifs_buf_release(void *); extern struct smb_hdr *cifs_small_buf_get(void); extern void cifs_small_buf_release(void *); extern int smb_send(struct socket *, struct smb_hdr *, - unsigned int /* length */ , struct sockaddr *); + unsigned int /* length */ , struct sockaddr *, bool); extern unsigned int _GetXid(void); extern void _FreeXid(unsigned int); #define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current->fsuid)); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 843a85fb8b9..d5eac48fc41 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1536,7 +1536,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, __u32 bytes_sent; __u16 byte_count; - /* cFYI(1,("write at %lld %d bytes",offset,count));*/ + /* cFYI(1, ("write at %lld %d bytes", offset, count));*/ if (tcon->ses == NULL) return -ECONNABORTED; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 71b7661e226..e9f9248cb3f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -92,6 +92,8 @@ struct smb_vol { bool seal:1; /* request transport encryption on share */ bool nodfs:1; /* Do not request DFS, even if available */ bool local_lease:1; /* check leases only on local system, not remote */ + bool noblocksnd:1; + bool noautotune:1; unsigned int rsize; unsigned int wsize; unsigned int sockopt; @@ -102,9 +104,11 @@ struct smb_vol { static int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, char *netb_name, - char *server_netb_name); + char *server_netb_name, + bool noblocksnd, + bool nosndbuf); /* ipv6 never set sndbuf size */ static int ipv6_connect(struct sockaddr_in6 *psin_server, - struct socket **csocket); + struct socket **csocket, bool noblocksnd); /* @@ -191,12 +195,13 @@ cifs_reconnect(struct TCP_Server_Info *server) try_to_freeze(); if (server->protocolType == IPV6) { rc = ipv6_connect(&server->addr.sockAddr6, - &server->ssocket); + &server->ssocket, server->noautotune); } else { rc = ipv4_connect(&server->addr.sockAddr, &server->ssocket, server->workstation_RFC1001_name, - server->server_RFC1001_name); + server->server_RFC1001_name, + server->noblocksnd, server->noautotune); } if (rc) { cFYI(1, ("reconnect error %d", rc)); @@ -1192,6 +1197,10 @@ cifs_parse_mount_options(char *options, const char *devname, /* ignore */ } else if (strnicmp(data, "rw", 2) == 0) { vol->rw = true; + } else if (strnicmp(data, "noblocksend", 11) == 0) { + vol->noblocksnd = 1; + } else if (strnicmp(data, "noautotune", 10) == 0) { + vol->noautotune = 1; } else if ((strnicmp(data, "suid", 4) == 0) || (strnicmp(data, "nosuid", 6) == 0) || (strnicmp(data, "exec", 4) == 0) || @@ -1518,7 +1527,8 @@ static void rfc1002mangle(char *target, char *source, unsigned int length) static int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, - char *netbios_name, char *target_name) + char *netbios_name, char *target_name, + bool noblocksnd, bool noautotune) { int rc = 0; int connected = 0; @@ -1590,11 +1600,16 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, (*csocket)->sk->sk_sndbuf, (*csocket)->sk->sk_rcvbuf, (*csocket)->sk->sk_rcvtimeo)); (*csocket)->sk->sk_rcvtimeo = 7 * HZ; + if (!noblocksnd) + (*csocket)->sk->sk_sndtimeo = 3 * HZ; + /* make the bufsizes depend on wsize/rsize and max requests */ - if ((*csocket)->sk->sk_sndbuf < (200 * 1024)) - (*csocket)->sk->sk_sndbuf = 200 * 1024; - if ((*csocket)->sk->sk_rcvbuf < (140 * 1024)) - (*csocket)->sk->sk_rcvbuf = 140 * 1024; + if (noautotune) { + if ((*csocket)->sk->sk_sndbuf < (200 * 1024)) + (*csocket)->sk->sk_sndbuf = 200 * 1024; + if ((*csocket)->sk->sk_rcvbuf < (140 * 1024)) + (*csocket)->sk->sk_rcvbuf = 140 * 1024; + } /* send RFC1001 sessinit */ if (psin_server->sin_port == htons(RFC1001_PORT)) { @@ -1631,7 +1646,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, /* sizeof RFC1002_SESSION_REQUEST with no scope */ smb_buf->smb_buf_length = 0x81000044; rc = smb_send(*csocket, smb_buf, 0x44, - (struct sockaddr *)psin_server); + (struct sockaddr *)psin_server, noblocksnd); kfree(ses_init_buf); msleep(1); /* RFC1001 layer in at least one server requires very short break before negprot @@ -1651,7 +1666,8 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, } static int -ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket) +ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket, + bool noblocksnd) { int rc = 0; int connected = 0; @@ -1720,6 +1736,9 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket) the default. sock_setsockopt not used because it expects user space buffer */ (*csocket)->sk->sk_rcvtimeo = 7 * HZ; + if (!noblocksnd) + (*csocket)->sk->sk_sndtimeo = 3 * HZ; + return rc; } @@ -1983,11 +2002,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, cFYI(1, ("attempting ipv6 connect")); /* BB should we allow ipv6 on port 139? */ /* other OS never observed in Wild doing 139 with v6 */ - rc = ipv6_connect(&sin_server6, &csocket); + rc = ipv6_connect(&sin_server6, &csocket, + volume_info.noblocksnd); } else rc = ipv4_connect(&sin_server, &csocket, volume_info.source_rfc1001_name, - volume_info.target_rfc1001_name); + volume_info.target_rfc1001_name, + volume_info.noblocksnd, + volume_info.noautotune); if (rc < 0) { cERROR(1, ("Error connecting to IPv4 socket. " "Aborting operation")); @@ -2002,6 +2024,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, sock_release(csocket); goto out; } else { + srvTcp->noblocksnd = volume_info.noblocksnd; + srvTcp->noautotune = volume_info.noautotune; memcpy(&srvTcp->addr.sockAddr, &sin_server, sizeof(struct sockaddr_in)); atomic_set(&srvTcp->inFlight, 0); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 62d8bd8f14c..ead1a3bb025 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1824,7 +1824,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, pTcon = cifs_sb->tcon; pagevec_init(&lru_pvec, 0); - cFYI(DBG2, ("rpages: num pages %d", num_pages)); + cFYI(DBG2, ("rpages: num pages %d", num_pages)); for (i = 0; i < num_pages; ) { unsigned contig_pages; struct page *tmp_page; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index d54fa8aeaea..ff8c68de4a9 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1361,9 +1361,11 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, CIFS_MOUNT_MAP_SPECIAL_CHR); if (tmprc == 0 && (info_buf_source->UniqueId == - info_buf_target->UniqueId)) + info_buf_target->UniqueId)) { /* same file, POSIX says that this is a noop */ + rc = 0; goto cifs_rename_exit; + } } /* else ... BB we could add the same check for Windows by checking the UniqueId via FILE_INTERNAL_INFO */ diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index bf0e6d8e382..ff8243a8fe3 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -161,7 +161,7 @@ void DeleteTconOplockQEntries(struct cifsTconInfo *tcon) int smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, - unsigned int smb_buf_length, struct sockaddr *sin) + unsigned int smb_buf_length, struct sockaddr *sin, bool noblocksnd) { int rc = 0; int i = 0; @@ -178,7 +178,10 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, smb_msg.msg_namelen = sizeof(struct sockaddr); smb_msg.msg_control = NULL; smb_msg.msg_controllen = 0; - smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ + if (noblocksnd) + smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; + else + smb_msg.msg_flags = MSG_NOSIGNAL; /* smb header is converted in header_assemble. bcc and rest of SMB word area, and byte area if necessary, is converted to littleendian in @@ -229,8 +232,8 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, } static int -smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, - struct sockaddr *sin) +smb_send2(struct TCP_Server_Info *server, struct kvec *iov, int n_vec, + struct sockaddr *sin, bool noblocksnd) { int rc = 0; int i = 0; @@ -240,6 +243,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, unsigned int total_len; int first_vec = 0; unsigned int smb_buf_length = smb_buffer->smb_buf_length; + struct socket *ssocket = server->ssocket; if (ssocket == NULL) return -ENOTSOCK; /* BB eventually add reconnect code here */ @@ -248,7 +252,10 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, smb_msg.msg_namelen = sizeof(struct sockaddr); smb_msg.msg_control = NULL; smb_msg.msg_controllen = 0; - smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ + if (noblocksnd) + smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; + else + smb_msg.msg_flags = MSG_NOSIGNAL; /* smb header is converted in header_assemble. bcc and rest of SMB word area, and byte area if necessary, is converted to littleendian in @@ -283,8 +290,11 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, if (rc < 0) break; - if (rc >= total_len) { - WARN_ON(rc > total_len); + if (rc == total_len) { + total_len = 0; + break; + } else if (rc > total_len) { + cERROR(1, ("sent %d requested %d", rc, total_len)); break; } if (rc == 0) { @@ -312,6 +322,16 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, i = 0; /* in case we get ENOSPC on the next send */ } + if ((total_len > 0) && (total_len != smb_buf_length + 4)) { + cFYI(1, ("partial send (%d remaining), terminating session", + total_len)); + /* If we have only sent part of an SMB then the next SMB + could be taken as the remainder of this one. We need + to kill the socket so the server throws away the partial + SMB */ + server->tcpStatus = CifsNeedReconnect; + } + if (rc < 0) { cERROR(1, ("Error %d sending data on socket to server", rc)); } else @@ -518,8 +538,9 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, #ifdef CONFIG_CIFS_STATS2 atomic_inc(&ses->server->inSend); #endif - rc = smb_send2(ses->server->ssocket, iov, n_vec, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + rc = smb_send2(ses->server, iov, n_vec, + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); #ifdef CONFIG_CIFS_STATS2 atomic_dec(&ses->server->inSend); midQ->when_sent = jiffies; @@ -711,7 +732,8 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, atomic_inc(&ses->server->inSend); #endif rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); #ifdef CONFIG_CIFS_STATS2 atomic_dec(&ses->server->inSend); midQ->when_sent = jiffies; @@ -851,7 +873,8 @@ send_nt_cancel(struct cifsTconInfo *tcon, struct smb_hdr *in_buf, return rc; } rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); up(&ses->server->tcpSem); return rc; } @@ -941,7 +964,8 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, atomic_inc(&ses->server->inSend); #endif rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); #ifdef CONFIG_CIFS_STATS2 atomic_dec(&ses->server->inSend); midQ->when_sent = jiffies; diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index cfd29da714d..0376ac66c44 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -2,7 +2,7 @@ * An implementation of a loadable kernel mode driver providing * multiple kernel/user space bidirectional communications links. * - * Author: Alan Cox <alan@redhat.com> + * Author: Alan Cox <alan@lxorguk.ukuu.org.uk> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 06db79d05c1..6046239465a 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1251,6 +1251,7 @@ struct kmem_cache *ecryptfs_header_cache_2; /** * ecryptfs_write_headers_virt * @page_virt: The virtual address to write the headers to + * @m |